Bridge++  Version 1.4.4
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
fft_xyz_1dim.cpp
Go to the documentation of this file.
1 
14 #ifdef USE_FFTWLIB
15 
16 #include "fft_xyz_1dim.h"
17 
18 #ifdef USE_FACTORY
19 namespace {
20  FFT *create_object()
21  {
22  return new FFT_xyz_1dim();
23  }
24 
25 
26  bool init = FFT::Factory::Register("FFT_xyz_1dim", create_object);
27 }
28 #endif
29 
30 const std::string FFT_xyz_1dim::class_name = "FFT_xyz_1dim";
31 
32 //====================================================================
33 void FFT_xyz_1dim::set_parameters(const Parameters& params)
34 {
35  const std::string str_vlevel = params.get_string("verbose_level");
36 
37  m_vl = vout.set_verbose_level(str_vlevel);
38 
39  //- fetch and check input parameters
40  std::string str_fft_direction;
41 
42  int err = 0;
43  err += params.fetch_string("FFT_direction", str_fft_direction);
44 
45  if (err) {
46  vout.crucial(m_vl, "Error at %s: input parameter not found.\n", class_name.c_str());
47  exit(EXIT_FAILURE);
48  }
49 
50  set_parameters(str_fft_direction);
51 }
52 
53 
54 //====================================================================
55 void FFT_xyz_1dim::set_parameters(const std::string str_fft_direction)
56 {
57  //- print input parameters
58  vout.general(m_vl, "%s:\n", class_name.c_str());
59  vout.general(m_vl, " FFT_direction = %s\n", str_fft_direction.c_str());
60 
61  //- range check
62 
63  //- store values
64  if (str_fft_direction == "Forward") {
65  m_is_forward = true;
66  } else if (str_fft_direction == "Backward") {
67  m_is_forward = false;
68  } else {
69  vout.crucial(m_vl, "Error at %s: unsupported FFT direction \"%s\"\n", class_name.c_str(), str_fft_direction.c_str());
70  exit(EXIT_FAILURE);
71  }
72 }
73 
74 
75 //====================================================================
76 void FFT_xyz_1dim::fft(Field& field)
77 {
78  const int Ndim = CommonParameters::Ndim();
79 
80  //- global lattice size
81  std::vector<int> Lsize(Ndim - 1);
82 
83  Lsize[0] = CommonParameters::Lx();
84  Lsize[1] = CommonParameters::Ly();
85  Lsize[2] = CommonParameters::Lz();
86 
87  const int Lt = CommonParameters::Lt();
88  const int Lxyz = Lsize[0] * Lsize[1] * Lsize[2];
89 
90  //- local lattice size
91  std::vector<int> Nsize(Ndim - 1);
92  Nsize[0] = CommonParameters::Nx();
93  Nsize[1] = CommonParameters::Ny();
94  Nsize[2] = CommonParameters::Nz();
95 
96  const int Nin = field.nin();
97  const int Nex = field.nex();
98 
99  std::vector<int> NPE(Ndim);
100  NPE[0] = CommonParameters::NPEx();
101  NPE[1] = CommonParameters::NPEy();
102  NPE[2] = CommonParameters::NPEz();
103  NPE[3] = CommonParameters::NPEt();
104 
105  const int NPE_xyt = NPE[0] * NPE[1] * NPE[3];
106  const int NPE_xzt = NPE[0] * NPE[2] * NPE[3];
107  const int NPE_yzt = NPE[1] * NPE[2] * NPE[3];
108 
109  if ((NPE_xyt != 1) && (NPE_xzt != 1) && (NPE_yzt != 1)) {
110  vout.crucial(m_vl, "Error at %s: FFTW supports parallelization only in 1 direction of x,y,z.\n",
111  class_name.c_str());
112  exit(EXIT_FAILURE);
113  }
114 
115  bool is_allocated = false;
116  int Lsize_Nsize_prev = 0;
117 
118 #ifdef USE_OPENMP
119  int threads_ok = fftw_init_threads();
120 #endif
121 #ifdef USE_MPI
122  fftw_mpi_init();
123 #endif
124 
125 
126  //- xyz loop
127  for (int i_dim = 0; i_dim < Ndim - 1; ++i_dim) {
128  //- allocate m_in,out = m_in,out[Lsize]
129  if (Lsize_Nsize_prev != Lsize[i_dim] * Nsize[i_dim]) {
130  if (is_allocated) {
131  fftw_free(m_in);
132  fftw_free(m_out);
133  fftw_destroy_plan(m_plan);
134  }
135 
136  is_allocated = false;
137  }
138  Lsize_Nsize_prev = Lsize[i_dim] * Nsize[i_dim];
139 
140  if (!is_allocated) {
141  if (Lsize[i_dim] == Nsize[i_dim]) {
142  size_t fftw_size = sizeof(fftw_complex) * Lsize[i_dim];
143  m_in = (fftw_complex *)fftw_malloc(fftw_size);
144  m_out = (fftw_complex *)fftw_malloc(fftw_size);
145 
146  if (!m_in || !m_out) {
147  vout.crucial(m_vl, "Error at %s: failed to allocate memory %d [Byte].\n",
148  class_name.c_str(), (int)fftw_size);
149  exit(EXIT_FAILURE);
150  }
151  } else {
152 #ifdef USE_MPI
153  ptrdiff_t Lsize_p = Lsize[i_dim];
154  ptrdiff_t fftw_size_p;
155 
156  if (m_is_forward) {
157  fftw_size_p = fftw_mpi_local_size_1d(Lsize_p,
159  FFTW_FORWARD, FFTW_ESTIMATE,
160  &m_Nsize_in_p, &m_start_in_p,
161  &m_Nsize_out_p, &m_start_out_p);
162  } else {
163  fftw_size_p = fftw_mpi_local_size_1d(Lsize_p,
165  FFTW_BACKWARD, FFTW_ESTIMATE,
166  &m_Nsize_in_p, &m_start_in_p,
167  &m_Nsize_out_p, &m_start_out_p);
168  }
169 
170  m_in = fftw_alloc_complex(fftw_size_p);
171  m_out = fftw_alloc_complex(fftw_size_p);
172 
173  if (!m_in || !m_out) {
174  vout.crucial(m_vl, "Error at %s: failed to allocate memory %d [Byte].\n",
175  class_name.c_str(), (int)fftw_size_p);
176  exit(EXIT_FAILURE);
177  }
178 #endif
179  }
180 
181  is_allocated = true;
182  }
183 
184 
185  //- setup FFTW plan
186 #ifdef USE_OPENMP
188  fftw_plan_with_nthreads(Nthread);
189 #endif
190 
191  if (Lsize[i_dim] == Nsize[i_dim]) {
192  if (m_is_forward) {
193  m_plan = fftw_plan_dft_1d(Lsize[i_dim],
194  m_in, m_out,
195  FFTW_FORWARD, FFTW_ESTIMATE);
196  } else {
197  m_plan = fftw_plan_dft_1d(Lsize[i_dim],
198  m_in, m_out,
199  FFTW_BACKWARD, FFTW_ESTIMATE);
200  }
201  } else {
202 #ifdef USE_MPI
203  ptrdiff_t Lsize_p = Lsize[i_dim];
204 
205  if (m_is_forward) {
206  m_plan = fftw_mpi_plan_dft_1d(Lsize_p,
207  m_in, m_out,
209  FFTW_FORWARD, FFTW_ESTIMATE);
210  } else {
211  m_plan = fftw_mpi_plan_dft_1d(Lsize_p,
212  m_in, m_out,
214  FFTW_BACKWARD, FFTW_ESTIMATE);
215  }
216 #endif
217  }
218 
219 
220  // #### Execution main part ####
221  //- Nin is devided by 2, because of complex(i.e. real and imag)
222  for (int in2 = 0; in2 < Nin / 2; ++in2) {
223  for (int t_global = 0; t_global < Lt; t_global++) {
224  for (int ex = 0; ex < Nex; ++ex) {
225  if (i_dim == 0) {
226  for (int z = 0; z < Nsize[2]; z++) {
227  for (int y = 0; y < Nsize[1]; y++) {
228  for (int xyz_local = 0; xyz_local < Nsize[i_dim]; xyz_local++) {
229  int isite = m_index.site(xyz_local, y, z, t_global);
230  int i_real = 2 * in2;
231  int i_imag = 2 * in2 + 1;
232 
233  m_in[xyz_local][0] = field.cmp(i_real, isite, ex);
234  m_in[xyz_local][1] = field.cmp(i_imag, isite, ex);
235  }
236 
237 
238  fftw_execute(m_plan);
239 
240 
241  for (int xyz_local = 0; xyz_local < Nsize[i_dim]; xyz_local++) {
242  int isite = m_index.site(xyz_local, y, z, t_global);
243  int i_real = 2 * in2;
244  int i_imag = 2 * in2 + 1;
245 
246  field.set(i_real, isite, ex, m_out[xyz_local][0]);
247  field.set(i_imag, isite, ex, m_out[xyz_local][1]);
248  }
249  }
250  }
251  } else if (i_dim == 1) {
252  for (int x = 0; x < Nsize[0]; x++) {
253  for (int z = 0; z < Nsize[2]; z++) {
254  for (int xyz_local = 0; xyz_local < Nsize[i_dim]; xyz_local++) {
255  int isite = m_index.site(x, xyz_local, z, t_global);
256  int i_real = 2 * in2;
257  int i_imag = 2 * in2 + 1;
258 
259  m_in[xyz_local][0] = field.cmp(i_real, isite, ex);
260  m_in[xyz_local][1] = field.cmp(i_imag, isite, ex);
261  }
262 
263 
264  fftw_execute(m_plan);
265 
266 
267  for (int xyz_local = 0; xyz_local < Nsize[i_dim]; xyz_local++) {
268  int isite = m_index.site(x, xyz_local, z, t_global);
269  int i_real = 2 * in2;
270  int i_imag = 2 * in2 + 1;
271 
272  field.set(i_real, isite, ex, m_out[xyz_local][0]);
273  field.set(i_imag, isite, ex, m_out[xyz_local][1]);
274  }
275  }
276  }
277  } else if (i_dim == 2) {
278  for (int y = 0; y < Nsize[1]; y++) {
279  for (int x = 0; x < Nsize[0]; x++) {
280  for (int xyz_local = 0; xyz_local < Nsize[i_dim]; xyz_local++) {
281  int isite = m_index.site(x, y, xyz_local, t_global);
282  int i_real = 2 * in2;
283  int i_imag = 2 * in2 + 1;
284 
285  m_in[xyz_local][0] = field.cmp(i_real, isite, ex);
286  m_in[xyz_local][1] = field.cmp(i_imag, isite, ex);
287  }
288 
289 
290  fftw_execute(m_plan);
291 
292 
293  for (int xyz_local = 0; xyz_local < Nsize[i_dim]; xyz_local++) {
294  int isite = m_index.site(x, y, xyz_local, t_global);
295  int i_real = 2 * in2;
296  int i_imag = 2 * in2 + 1;
297 
298  field.set(i_real, isite, ex, m_out[xyz_local][0]);
299  field.set(i_imag, isite, ex, m_out[xyz_local][1]);
300  }
301  }
302  }
303  }
304  //- end of if( i_dim == 0 ){
305  }
306  }
307  }
308  //- end of outer loops
309  }
310  //- end of for (int i_dim = 0; i_dim < Ndim - 1; ++i_dim)
311 
312  //- normailzation for FFTW_BACKWARD
313  if (!m_is_forward) {
314  scal(field, 1.0 / Lxyz);
315  }
316 
317  //- tidy up
318  if (is_allocated) {
319  fftw_free(m_in);
320  fftw_free(m_out);
321  fftw_destroy_plan(m_plan);
322  }
323 }
324 
325 
326 //====================================================================
327 void FFT_xyz_1dim::fft(Field& field_out, const Field& field_in)
328 {
329  const int Ndim = CommonParameters::Ndim();
330 
331  //- global lattice size
332  std::vector<int> Lsize(Ndim - 1);
333 
334  Lsize[0] = CommonParameters::Lx();
335  Lsize[1] = CommonParameters::Ly();
336  Lsize[2] = CommonParameters::Lz();
337 
338  const int Lt = CommonParameters::Lt();
339  const int Lxyz = Lsize[0] * Lsize[1] * Lsize[2];
340 
341  //- local lattice size
342  std::vector<int> Nsize(Ndim - 1);
343  Nsize[0] = CommonParameters::Nx();
344  Nsize[1] = CommonParameters::Ny();
345  Nsize[2] = CommonParameters::Nz();
346 
347  const int Nin = field_in.nin();
348  const int Nex = field_in.nex();
349 
350  std::vector<int> NPE(Ndim);
351  NPE[0] = CommonParameters::NPEx();
352  NPE[1] = CommonParameters::NPEy();
353  NPE[2] = CommonParameters::NPEz();
354  NPE[3] = CommonParameters::NPEt();
355 
356  const int NPE_xyt = NPE[0] * NPE[1] * NPE[3];
357  const int NPE_xzt = NPE[0] * NPE[2] * NPE[3];
358  const int NPE_yzt = NPE[1] * NPE[2] * NPE[3];
359 
360  if ((NPE_xyt != 1) && (NPE_xzt != 1) && (NPE_yzt != 1)) {
361  vout.crucial(m_vl, "Error at %s: FFTW supports parallelization only in 1 direction.\n",
362  class_name.c_str());
363  exit(EXIT_FAILURE);
364  }
365 
366  bool is_allocated = false;
367  int Lsize_Nsize_prev = 0;
368 
369  field_out = field_in;
370 
371 #ifdef USE_OPENMP
372  int threads_ok = fftw_init_threads();
373 #endif
374 #ifdef USE_MPI
375  fftw_mpi_init();
376 #endif
377 
378 
379  //- xyz loop
380  for (int i_dim = 0; i_dim < Ndim - 1; ++i_dim) {
381  //- allocate m_in,out = m_in,out[Lsize]
382  if (Lsize_Nsize_prev != Lsize[i_dim] * Nsize[i_dim]) {
383  if (is_allocated) {
384  fftw_free(m_in);
385  fftw_free(m_out);
386  fftw_destroy_plan(m_plan);
387  }
388 
389  is_allocated = false;
390  }
391  Lsize_Nsize_prev = Lsize[i_dim] * Nsize[i_dim];
392 
393  if (!is_allocated) {
394  if (Lsize[i_dim] == Nsize[i_dim]) {
395  size_t fftw_size = sizeof(fftw_complex) * Lsize[i_dim];
396  m_in = (fftw_complex *)fftw_malloc(fftw_size);
397  m_out = (fftw_complex *)fftw_malloc(fftw_size);
398 
399  if (!m_in || !m_out) {
400  vout.crucial(m_vl, "Error at %s: failed to allocate memory %d [Byte].\n",
401  class_name.c_str(), (int)fftw_size);
402  exit(EXIT_FAILURE);
403  }
404  } else {
405 #ifdef USE_MPI
406  ptrdiff_t Lsize_p = Lsize[i_dim];
407  ptrdiff_t fftw_size_p;
408 
409  if (m_is_forward) {
410  fftw_size_p = fftw_mpi_local_size_1d(Lsize_p,
412  FFTW_FORWARD, FFTW_ESTIMATE,
413  &m_Nsize_in_p, &m_start_in_p,
414  &m_Nsize_out_p, &m_start_out_p);
415  } else {
416  fftw_size_p = fftw_mpi_local_size_1d(Lsize_p,
418  FFTW_BACKWARD, FFTW_ESTIMATE,
419  &m_Nsize_in_p, &m_start_in_p,
420  &m_Nsize_out_p, &m_start_out_p);
421  }
422 
423  m_in = fftw_alloc_complex(fftw_size_p);
424  m_out = fftw_alloc_complex(fftw_size_p);
425 
426  if (!m_in || !m_out) {
427  vout.crucial(m_vl, "Error at %s: failed to allocate memory %d [Byte].\n",
428  class_name.c_str(), (int)fftw_size_p);
429  exit(EXIT_FAILURE);
430  }
431 #endif
432  }
433 
434  is_allocated = true;
435  }
436 
437 
438  //- setup FFTW plan
439 #ifdef USE_OPENMP
441  fftw_plan_with_nthreads(Nthread);
442 #endif
443 
444  if (Lsize[i_dim] == Nsize[i_dim]) {
445  if (m_is_forward) {
446  m_plan = fftw_plan_dft_1d(Lsize[i_dim],
447  m_in, m_out,
448  FFTW_FORWARD, FFTW_ESTIMATE);
449  } else {
450  m_plan = fftw_plan_dft_1d(Lsize[i_dim],
451  m_in, m_out,
452  FFTW_BACKWARD, FFTW_ESTIMATE);
453  }
454  } else {
455 #ifdef USE_MPI
456  ptrdiff_t Lsize_p = Lsize[i_dim];
457 
458  if (m_is_forward) {
459  m_plan = fftw_mpi_plan_dft_1d(Lsize_p,
460  m_in, m_out,
462  FFTW_FORWARD, FFTW_ESTIMATE);
463  } else {
464  m_plan = fftw_mpi_plan_dft_1d(Lsize_p,
465  m_in, m_out,
467  FFTW_BACKWARD, FFTW_ESTIMATE);
468  }
469 #endif
470  }
471 
472 
473  // #### Execution main part ####
474  //- Nin is devided by 2, because of complex(i.e. real and imag)
475  for (int in2 = 0; in2 < Nin / 2; ++in2) {
476  for (int t_global = 0; t_global < Lt; t_global++) {
477  for (int ex = 0; ex < Nex; ++ex) {
478 
479  if (i_dim == 0) {
480  for (int z = 0; z < Nsize[2]; z++) {
481  for (int y = 0; y < Nsize[1]; y++) {
482  for (int xyz_local = 0; xyz_local < Nsize[i_dim]; xyz_local++) {
483  int isite = m_index.site(xyz_local, y, z, t_global);
484  int i_real = 2 * in2;
485  int i_imag = 2 * in2 + 1;
486 
487  m_in[xyz_local][0] = field_out.cmp(i_real, isite, ex);
488  m_in[xyz_local][1] = field_out.cmp(i_imag, isite, ex);
489  }
490 
491 
492  fftw_execute(m_plan);
493 
494 
495  for (int xyz_local = 0; xyz_local < Nsize[i_dim]; xyz_local++) {
496  int isite = m_index.site(xyz_local, y, z, t_global);
497  int i_real = 2 * in2;
498  int i_imag = 2 * in2 + 1;
499 
500  field_out.set(i_real, isite, ex, m_out[xyz_local][0]);
501  field_out.set(i_imag, isite, ex, m_out[xyz_local][1]);
502  }
503  }
504  }
505 
506  } else if (i_dim == 1) {
507  for (int x = 0; x < Nsize[0]; x++) {
508  for (int z = 0; z < Nsize[2]; z++) {
509  for (int xyz_local = 0; xyz_local < Nsize[i_dim]; xyz_local++) {
510  int isite = m_index.site(x, xyz_local, z, t_global);
511  int i_real = 2 * in2;
512  int i_imag = 2 * in2 + 1;
513 
514  m_in[xyz_local][0] = field_out.cmp(i_real, isite, ex);
515  m_in[xyz_local][1] = field_out.cmp(i_imag, isite, ex);
516  }
517 
518 
519  fftw_execute(m_plan);
520 
521 
522  for (int xyz_local = 0; xyz_local < Nsize[i_dim]; xyz_local++) {
523  int isite = m_index.site(x, xyz_local, z, t_global);
524  int i_real = 2 * in2;
525  int i_imag = 2 * in2 + 1;
526 
527  field_out.set(i_real, isite, ex, m_out[xyz_local][0]);
528  field_out.set(i_imag, isite, ex, m_out[xyz_local][1]);
529  }
530  }
531  }
532 
533  } else if (i_dim == 2) {
534  for (int y = 0; y < Nsize[1]; y++) {
535  for (int x = 0; x < Nsize[0]; x++) {
536  for (int xyz_local = 0; xyz_local < Nsize[i_dim]; xyz_local++) {
537  int isite = m_index.site(x, y, xyz_local, t_global);
538  int i_real = 2 * in2;
539  int i_imag = 2 * in2 + 1;
540 
541  m_in[xyz_local][0] = field_out.cmp(i_real, isite, ex);
542  m_in[xyz_local][1] = field_out.cmp(i_imag, isite, ex);
543  }
544 
545 
546  fftw_execute(m_plan);
547 
548 
549  for (int xyz_local = 0; xyz_local < Nsize[i_dim]; xyz_local++) {
550  int isite = m_index.site(x, y, xyz_local, t_global);
551  int i_real = 2 * in2;
552  int i_imag = 2 * in2 + 1;
553 
554  field_out.set(i_real, isite, ex, m_out[xyz_local][0]);
555  field_out.set(i_imag, isite, ex, m_out[xyz_local][1]);
556  }
557  }
558  }
559  }
560  //- end of if( i_dim == 0 ){
561  }
562  }
563  }
564  //- end of outer loops
565  }
566  //- end of for (int i_dim = 0; i_dim < Ndim - 1; ++i_dim)
567 
568  //- normailzation for FFTW_BACKWARD
569  if (!m_is_forward) {
570  scal(field_out, 1.0 / Lxyz);
571  }
572 
573  //- tidy up
574  if (is_allocated) {
575  fftw_free(m_in);
576  fftw_free(m_out);
577  fftw_destroy_plan(m_plan);
578  }
579 }
580 
581 
582 //==========================================================
583 //==================================================END=====
584 #endif
void scal(Field &x, const double a)
scal(x, a): x = a * x
Definition: field.cpp:282
BridgeIO vout
Definition: bridgeIO.cpp:495
static int get_num_threads()
returns available number of threads.
static int NPEy()
static int NPEt()
void set(const int jin, const int site, const int jex, double v)
Definition: field.h:164
void general(const char *format,...)
Definition: bridgeIO.cpp:195
Container of Field-type object.
Definition: field.h:39
double cmp(const int jin, const int site, const int jex) const
Definition: field.h:132
Class for parameters.
Definition: parameters.h:46
int fetch_string(const string &key, string &value) const
Definition: parameters.cpp:262
int nin() const
Definition: field.h:115
int nex() const
Definition: field.h:117
static int NPEx()
void crucial(const char *format,...)
Definition: bridgeIO.cpp:178
static int NPEz()
string get_string(const string &key) const
Definition: parameters.cpp:116
static VerboseLevel set_verbose_level(const std::string &str)
Definition: bridgeIO.cpp:131