Bridge++  Version 1.5.4
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
fft_xyz_1dim.cpp
Go to the documentation of this file.
1 
14 #ifdef USE_FFTWLIB
15 
16 #include "fft_xyz_1dim.h"
17 
18 #ifdef USE_FACTORY_AUTOREGISTER
19 namespace {
20  bool init = FFT_xyz_1dim::register_factory();
21 }
22 #endif
23 
24 const std::string FFT_xyz_1dim::class_name = "FFT_xyz_1dim";
25 
26 //====================================================================
27 void FFT_xyz_1dim::set_parameters(const Parameters& params)
28 {
29  const std::string str_vlevel = params.get_string("verbose_level");
30 
31  m_vl = vout.set_verbose_level(str_vlevel);
32 
33  //- fetch and check input parameters
34  std::string str_fft_direction;
35 
36  int err = 0;
37  err += params.fetch_string("FFT_direction", str_fft_direction);
38 
39  if (err) {
40  vout.crucial(m_vl, "Error at %s: input parameter not found.\n", class_name.c_str());
41  exit(EXIT_FAILURE);
42  }
43 
44  set_parameters(str_fft_direction);
45 }
46 
47 
48 //====================================================================
49 void FFT_xyz_1dim::set_parameters(const std::string& str_fft_direction)
50 {
51  //- print input parameters
52  vout.general(m_vl, "%s:\n", class_name.c_str());
53  vout.general(m_vl, " FFT_direction = %s\n", str_fft_direction.c_str());
54 
55  //- range check
56 
57  //- store values
58  if (str_fft_direction == "Forward") {
59  m_is_forward = true;
60  } else if (str_fft_direction == "Backward") {
61  m_is_forward = false;
62  } else {
63  vout.crucial(m_vl, "Error at %s: unsupported FFT direction \"%s\"\n", class_name.c_str(), str_fft_direction.c_str());
64  exit(EXIT_FAILURE);
65  }
66 }
67 
68 
69 //====================================================================
70 void FFT_xyz_1dim::fft(Field& field)
71 {
72  const int Ndim = CommonParameters::Ndim();
73 
74  //- global lattice size
75  std::vector<int> Lsize(Ndim - 1);
76 
77  Lsize[0] = CommonParameters::Lx();
78  Lsize[1] = CommonParameters::Ly();
79  Lsize[2] = CommonParameters::Lz();
80 
81  const int Lt = CommonParameters::Lt();
82  const int Lxyz = Lsize[0] * Lsize[1] * Lsize[2];
83 
84  //- local lattice size
85  std::vector<int> Nsize(Ndim - 1);
86  Nsize[0] = CommonParameters::Nx();
87  Nsize[1] = CommonParameters::Ny();
88  Nsize[2] = CommonParameters::Nz();
89 
90  const int Nin = field.nin();
91  const int Nex = field.nex();
92 
93  std::vector<int> NPE(Ndim);
94  NPE[0] = CommonParameters::NPEx();
95  NPE[1] = CommonParameters::NPEy();
96  NPE[2] = CommonParameters::NPEz();
97  NPE[3] = CommonParameters::NPEt();
98 
99  const int NPE_xyt = NPE[0] * NPE[1] * NPE[3];
100  const int NPE_xzt = NPE[0] * NPE[2] * NPE[3];
101  const int NPE_yzt = NPE[1] * NPE[2] * NPE[3];
102 
103  if ((NPE_xyt != 1) && (NPE_xzt != 1) && (NPE_yzt != 1)) {
104  vout.crucial(m_vl, "Error at %s: FFTW supports parallelization only in 1 direction of x,y,z.\n",
105  class_name.c_str());
106  exit(EXIT_FAILURE);
107  }
108 
109  bool is_allocated = false;
110  int Lsize_Nsize_prev = 0;
111 
112 #ifdef USE_OPENMP
113  int threads_ok = fftw_init_threads();
114 #endif
115 #ifdef USE_MPI
116  fftw_mpi_init();
117 #endif
118 
119 
120  //- xyz loop
121  for (int i_dim = 0; i_dim < Ndim - 1; ++i_dim) {
122  //- allocate m_in,out = m_in,out[Lsize]
123  if (Lsize_Nsize_prev != Lsize[i_dim] * Nsize[i_dim]) {
124  if (is_allocated) {
125  fftw_free(m_in);
126  fftw_free(m_out);
127  fftw_destroy_plan(m_plan);
128  }
129 
130  is_allocated = false;
131  }
132  Lsize_Nsize_prev = Lsize[i_dim] * Nsize[i_dim];
133 
134  if (!is_allocated) {
135  if (Lsize[i_dim] == Nsize[i_dim]) {
136  size_t fftw_size = sizeof(fftw_complex) * Lsize[i_dim];
137  m_in = (fftw_complex *)fftw_malloc(fftw_size);
138  m_out = (fftw_complex *)fftw_malloc(fftw_size);
139 
140  if (!m_in || !m_out) {
141  vout.crucial(m_vl, "Error at %s: failed to allocate memory %d [Byte].\n",
142  class_name.c_str(), (int)fftw_size);
143  exit(EXIT_FAILURE);
144  }
145  } else {
146 #ifdef USE_MPI
147  ptrdiff_t Lsize_p = Lsize[i_dim];
148  ptrdiff_t fftw_size_p;
149 
150  if (m_is_forward) {
151  fftw_size_p = fftw_mpi_local_size_1d(Lsize_p,
153  FFTW_FORWARD, FFTW_ESTIMATE,
154  &m_Nsize_in_p, &m_start_in_p,
155  &m_Nsize_out_p, &m_start_out_p);
156  } else {
157  fftw_size_p = fftw_mpi_local_size_1d(Lsize_p,
159  FFTW_BACKWARD, FFTW_ESTIMATE,
160  &m_Nsize_in_p, &m_start_in_p,
161  &m_Nsize_out_p, &m_start_out_p);
162  }
163 
164  m_in = fftw_alloc_complex(fftw_size_p);
165  m_out = fftw_alloc_complex(fftw_size_p);
166 
167  if (!m_in || !m_out) {
168  vout.crucial(m_vl, "Error at %s: failed to allocate memory %d [Byte].\n",
169  class_name.c_str(), (int)fftw_size_p);
170  exit(EXIT_FAILURE);
171  }
172 #endif
173  }
174 
175  is_allocated = true;
176  }
177 
178 
179  //- setup FFTW plan
180 #ifdef USE_OPENMP
182  fftw_plan_with_nthreads(Nthread);
183 #endif
184 
185  if (Lsize[i_dim] == Nsize[i_dim]) {
186  if (m_is_forward) {
187  m_plan = fftw_plan_dft_1d(Lsize[i_dim],
188  m_in, m_out,
189  FFTW_FORWARD, FFTW_ESTIMATE);
190  } else {
191  m_plan = fftw_plan_dft_1d(Lsize[i_dim],
192  m_in, m_out,
193  FFTW_BACKWARD, FFTW_ESTIMATE);
194  }
195  } else {
196 #ifdef USE_MPI
197  ptrdiff_t Lsize_p = Lsize[i_dim];
198 
199  if (m_is_forward) {
200  m_plan = fftw_mpi_plan_dft_1d(Lsize_p,
201  m_in, m_out,
203  FFTW_FORWARD, FFTW_ESTIMATE);
204  } else {
205  m_plan = fftw_mpi_plan_dft_1d(Lsize_p,
206  m_in, m_out,
208  FFTW_BACKWARD, FFTW_ESTIMATE);
209  }
210 #endif
211  }
212 
213 
214  // #### Execution main part ####
215  //- Nin is devided by 2, because of complex(i.e. real and imag)
216  for (int in2 = 0; in2 < Nin / 2; ++in2) {
217  for (int t_global = 0; t_global < Lt; t_global++) {
218  for (int ex = 0; ex < Nex; ++ex) {
219  if (i_dim == 0) {
220  for (int z = 0; z < Nsize[2]; z++) {
221  for (int y = 0; y < Nsize[1]; y++) {
222  for (int xyz_local = 0; xyz_local < Nsize[i_dim]; xyz_local++) {
223  int isite = m_index.site(xyz_local, y, z, t_global);
224  int i_real = 2 * in2;
225  int i_imag = 2 * in2 + 1;
226 
227  m_in[xyz_local][0] = field.cmp(i_real, isite, ex);
228  m_in[xyz_local][1] = field.cmp(i_imag, isite, ex);
229  }
230 
231 
232  fftw_execute(m_plan);
233 
234 
235  for (int xyz_local = 0; xyz_local < Nsize[i_dim]; xyz_local++) {
236  int isite = m_index.site(xyz_local, y, z, t_global);
237  int i_real = 2 * in2;
238  int i_imag = 2 * in2 + 1;
239 
240  field.set(i_real, isite, ex, m_out[xyz_local][0]);
241  field.set(i_imag, isite, ex, m_out[xyz_local][1]);
242  }
243  }
244  }
245  } else if (i_dim == 1) {
246  for (int x = 0; x < Nsize[0]; x++) {
247  for (int z = 0; z < Nsize[2]; z++) {
248  for (int xyz_local = 0; xyz_local < Nsize[i_dim]; xyz_local++) {
249  int isite = m_index.site(x, xyz_local, z, t_global);
250  int i_real = 2 * in2;
251  int i_imag = 2 * in2 + 1;
252 
253  m_in[xyz_local][0] = field.cmp(i_real, isite, ex);
254  m_in[xyz_local][1] = field.cmp(i_imag, isite, ex);
255  }
256 
257 
258  fftw_execute(m_plan);
259 
260 
261  for (int xyz_local = 0; xyz_local < Nsize[i_dim]; xyz_local++) {
262  int isite = m_index.site(x, xyz_local, z, t_global);
263  int i_real = 2 * in2;
264  int i_imag = 2 * in2 + 1;
265 
266  field.set(i_real, isite, ex, m_out[xyz_local][0]);
267  field.set(i_imag, isite, ex, m_out[xyz_local][1]);
268  }
269  }
270  }
271  } else if (i_dim == 2) {
272  for (int y = 0; y < Nsize[1]; y++) {
273  for (int x = 0; x < Nsize[0]; x++) {
274  for (int xyz_local = 0; xyz_local < Nsize[i_dim]; xyz_local++) {
275  int isite = m_index.site(x, y, xyz_local, t_global);
276  int i_real = 2 * in2;
277  int i_imag = 2 * in2 + 1;
278 
279  m_in[xyz_local][0] = field.cmp(i_real, isite, ex);
280  m_in[xyz_local][1] = field.cmp(i_imag, isite, ex);
281  }
282 
283 
284  fftw_execute(m_plan);
285 
286 
287  for (int xyz_local = 0; xyz_local < Nsize[i_dim]; xyz_local++) {
288  int isite = m_index.site(x, y, xyz_local, t_global);
289  int i_real = 2 * in2;
290  int i_imag = 2 * in2 + 1;
291 
292  field.set(i_real, isite, ex, m_out[xyz_local][0]);
293  field.set(i_imag, isite, ex, m_out[xyz_local][1]);
294  }
295  }
296  }
297  }
298  //- end of if( i_dim == 0 ){
299  }
300  }
301  }
302  //- end of outer loops
303  }
304  //- end of for (int i_dim = 0; i_dim < Ndim - 1; ++i_dim)
305 
306  //- normailzation for FFTW_BACKWARD
307  if (!m_is_forward) {
308  scal(field, 1.0 / Lxyz);
309  }
310 
311  //- tidy up
312  if (is_allocated) {
313  fftw_free(m_in);
314  fftw_free(m_out);
315  fftw_destroy_plan(m_plan);
316  }
317 }
318 
319 
320 //====================================================================
321 void FFT_xyz_1dim::fft(Field& field_out, const Field& field_in)
322 {
323  const int Ndim = CommonParameters::Ndim();
324 
325  //- global lattice size
326  std::vector<int> Lsize(Ndim - 1);
327 
328  Lsize[0] = CommonParameters::Lx();
329  Lsize[1] = CommonParameters::Ly();
330  Lsize[2] = CommonParameters::Lz();
331 
332  const int Lt = CommonParameters::Lt();
333  const int Lxyz = Lsize[0] * Lsize[1] * Lsize[2];
334 
335  //- local lattice size
336  std::vector<int> Nsize(Ndim - 1);
337  Nsize[0] = CommonParameters::Nx();
338  Nsize[1] = CommonParameters::Ny();
339  Nsize[2] = CommonParameters::Nz();
340 
341  const int Nin = field_in.nin();
342  const int Nex = field_in.nex();
343 
344  std::vector<int> NPE(Ndim);
345  NPE[0] = CommonParameters::NPEx();
346  NPE[1] = CommonParameters::NPEy();
347  NPE[2] = CommonParameters::NPEz();
348  NPE[3] = CommonParameters::NPEt();
349 
350  const int NPE_xyt = NPE[0] * NPE[1] * NPE[3];
351  const int NPE_xzt = NPE[0] * NPE[2] * NPE[3];
352  const int NPE_yzt = NPE[1] * NPE[2] * NPE[3];
353 
354  if ((NPE_xyt != 1) && (NPE_xzt != 1) && (NPE_yzt != 1)) {
355  vout.crucial(m_vl, "Error at %s: FFTW supports parallelization only in 1 direction.\n",
356  class_name.c_str());
357  exit(EXIT_FAILURE);
358  }
359 
360  bool is_allocated = false;
361  int Lsize_Nsize_prev = 0;
362 
363  field_out = field_in;
364 
365 #ifdef USE_OPENMP
366  int threads_ok = fftw_init_threads();
367 #endif
368 #ifdef USE_MPI
369  fftw_mpi_init();
370 #endif
371 
372 
373  //- xyz loop
374  for (int i_dim = 0; i_dim < Ndim - 1; ++i_dim) {
375  //- allocate m_in,out = m_in,out[Lsize]
376  if (Lsize_Nsize_prev != Lsize[i_dim] * Nsize[i_dim]) {
377  if (is_allocated) {
378  fftw_free(m_in);
379  fftw_free(m_out);
380  fftw_destroy_plan(m_plan);
381  }
382 
383  is_allocated = false;
384  }
385  Lsize_Nsize_prev = Lsize[i_dim] * Nsize[i_dim];
386 
387  if (!is_allocated) {
388  if (Lsize[i_dim] == Nsize[i_dim]) {
389  size_t fftw_size = sizeof(fftw_complex) * Lsize[i_dim];
390  m_in = (fftw_complex *)fftw_malloc(fftw_size);
391  m_out = (fftw_complex *)fftw_malloc(fftw_size);
392 
393  if (!m_in || !m_out) {
394  vout.crucial(m_vl, "Error at %s: failed to allocate memory %d [Byte].\n",
395  class_name.c_str(), (int)fftw_size);
396  exit(EXIT_FAILURE);
397  }
398  } else {
399 #ifdef USE_MPI
400  ptrdiff_t Lsize_p = Lsize[i_dim];
401  ptrdiff_t fftw_size_p;
402 
403  if (m_is_forward) {
404  fftw_size_p = fftw_mpi_local_size_1d(Lsize_p,
406  FFTW_FORWARD, FFTW_ESTIMATE,
407  &m_Nsize_in_p, &m_start_in_p,
408  &m_Nsize_out_p, &m_start_out_p);
409  } else {
410  fftw_size_p = fftw_mpi_local_size_1d(Lsize_p,
412  FFTW_BACKWARD, FFTW_ESTIMATE,
413  &m_Nsize_in_p, &m_start_in_p,
414  &m_Nsize_out_p, &m_start_out_p);
415  }
416 
417  m_in = fftw_alloc_complex(fftw_size_p);
418  m_out = fftw_alloc_complex(fftw_size_p);
419 
420  if (!m_in || !m_out) {
421  vout.crucial(m_vl, "Error at %s: failed to allocate memory %d [Byte].\n",
422  class_name.c_str(), (int)fftw_size_p);
423  exit(EXIT_FAILURE);
424  }
425 #endif
426  }
427 
428  is_allocated = true;
429  }
430 
431 
432  //- setup FFTW plan
433 #ifdef USE_OPENMP
435  fftw_plan_with_nthreads(Nthread);
436 #endif
437 
438  if (Lsize[i_dim] == Nsize[i_dim]) {
439  if (m_is_forward) {
440  m_plan = fftw_plan_dft_1d(Lsize[i_dim],
441  m_in, m_out,
442  FFTW_FORWARD, FFTW_ESTIMATE);
443  } else {
444  m_plan = fftw_plan_dft_1d(Lsize[i_dim],
445  m_in, m_out,
446  FFTW_BACKWARD, FFTW_ESTIMATE);
447  }
448  } else {
449 #ifdef USE_MPI
450  ptrdiff_t Lsize_p = Lsize[i_dim];
451 
452  if (m_is_forward) {
453  m_plan = fftw_mpi_plan_dft_1d(Lsize_p,
454  m_in, m_out,
456  FFTW_FORWARD, FFTW_ESTIMATE);
457  } else {
458  m_plan = fftw_mpi_plan_dft_1d(Lsize_p,
459  m_in, m_out,
461  FFTW_BACKWARD, FFTW_ESTIMATE);
462  }
463 #endif
464  }
465 
466 
467  // #### Execution main part ####
468  //- Nin is devided by 2, because of complex(i.e. real and imag)
469  for (int in2 = 0; in2 < Nin / 2; ++in2) {
470  for (int t_global = 0; t_global < Lt; t_global++) {
471  for (int ex = 0; ex < Nex; ++ex) {
472  if (i_dim == 0) {
473  for (int z = 0; z < Nsize[2]; z++) {
474  for (int y = 0; y < Nsize[1]; y++) {
475  for (int xyz_local = 0; xyz_local < Nsize[i_dim]; xyz_local++) {
476  int isite = m_index.site(xyz_local, y, z, t_global);
477  int i_real = 2 * in2;
478  int i_imag = 2 * in2 + 1;
479 
480  m_in[xyz_local][0] = field_out.cmp(i_real, isite, ex);
481  m_in[xyz_local][1] = field_out.cmp(i_imag, isite, ex);
482  }
483 
484 
485  fftw_execute(m_plan);
486 
487 
488  for (int xyz_local = 0; xyz_local < Nsize[i_dim]; xyz_local++) {
489  int isite = m_index.site(xyz_local, y, z, t_global);
490  int i_real = 2 * in2;
491  int i_imag = 2 * in2 + 1;
492 
493  field_out.set(i_real, isite, ex, m_out[xyz_local][0]);
494  field_out.set(i_imag, isite, ex, m_out[xyz_local][1]);
495  }
496  }
497  }
498  } else if (i_dim == 1) {
499  for (int x = 0; x < Nsize[0]; x++) {
500  for (int z = 0; z < Nsize[2]; z++) {
501  for (int xyz_local = 0; xyz_local < Nsize[i_dim]; xyz_local++) {
502  int isite = m_index.site(x, xyz_local, z, t_global);
503  int i_real = 2 * in2;
504  int i_imag = 2 * in2 + 1;
505 
506  m_in[xyz_local][0] = field_out.cmp(i_real, isite, ex);
507  m_in[xyz_local][1] = field_out.cmp(i_imag, isite, ex);
508  }
509 
510 
511  fftw_execute(m_plan);
512 
513 
514  for (int xyz_local = 0; xyz_local < Nsize[i_dim]; xyz_local++) {
515  int isite = m_index.site(x, xyz_local, z, t_global);
516  int i_real = 2 * in2;
517  int i_imag = 2 * in2 + 1;
518 
519  field_out.set(i_real, isite, ex, m_out[xyz_local][0]);
520  field_out.set(i_imag, isite, ex, m_out[xyz_local][1]);
521  }
522  }
523  }
524  } else if (i_dim == 2) {
525  for (int y = 0; y < Nsize[1]; y++) {
526  for (int x = 0; x < Nsize[0]; x++) {
527  for (int xyz_local = 0; xyz_local < Nsize[i_dim]; xyz_local++) {
528  int isite = m_index.site(x, y, xyz_local, t_global);
529  int i_real = 2 * in2;
530  int i_imag = 2 * in2 + 1;
531 
532  m_in[xyz_local][0] = field_out.cmp(i_real, isite, ex);
533  m_in[xyz_local][1] = field_out.cmp(i_imag, isite, ex);
534  }
535 
536 
537  fftw_execute(m_plan);
538 
539 
540  for (int xyz_local = 0; xyz_local < Nsize[i_dim]; xyz_local++) {
541  int isite = m_index.site(x, y, xyz_local, t_global);
542  int i_real = 2 * in2;
543  int i_imag = 2 * in2 + 1;
544 
545  field_out.set(i_real, isite, ex, m_out[xyz_local][0]);
546  field_out.set(i_imag, isite, ex, m_out[xyz_local][1]);
547  }
548  }
549  }
550  }
551  //- end of if( i_dim == 0 ){
552  }
553  }
554  }
555  //- end of outer loops
556  }
557  //- end of for (int i_dim = 0; i_dim < Ndim - 1; ++i_dim)
558 
559  //- normailzation for FFTW_BACKWARD
560  if (!m_is_forward) {
561  scal(field_out, 1.0 / Lxyz);
562  }
563 
564  //- tidy up
565  if (is_allocated) {
566  fftw_free(m_in);
567  fftw_free(m_out);
568  fftw_destroy_plan(m_plan);
569  }
570 }
571 
572 
573 //====================================================================
574 void FFT_xyz_1dim::fft(Field& field_out, const Field& field_in, const Direction dir)
575 {
576  // save state
577  bool backup_fwbw = m_is_forward;
578 
579  // find direction and set
580  if (dir == FORWARD) {
581  m_is_forward = true;
582  } else if (dir == BACKWARD) {
583  m_is_forward = false;
584  } else {
585  vout.crucial(m_vl, "%s: unknown direction %d. failed.\n", class_name.c_str(), dir);
586  exit(EXIT_FAILURE);
587  }
588 
589  // delegate to another method
590  fft(field_out, field_in);
591 
592  // restore state
593  m_is_forward = backup_fwbw;
594 }
595 
596 
597 //==========================================================
598 //==================================================END=====
599 #endif
void scal(Field &x, const double a)
scal(x, a): x = a * x
Definition: field.cpp:433
BridgeIO vout
Definition: bridgeIO.cpp:503
static int get_num_threads()
returns available number of threads.
static int NPEy()
void set(const int jin, const int site, const int jex, double v)
Definition: field.h:175
void general(const char *format,...)
Definition: bridgeIO.cpp:197
Container of Field-type object.
Definition: field.h:45
double cmp(const int jin, const int site, const int jex) const
Definition: field.h:143
Class for parameters.
Definition: parameters.h:46
int fetch_string(const string &key, string &value) const
Definition: parameters.cpp:378
int nin() const
Definition: field.h:126
static MPI_Comm & world()
retrieves current communicator.
int nex() const
Definition: field.h:128
static int NPEx()
void crucial(const char *format,...)
Definition: bridgeIO.cpp:178
static int NPEz()
Direction
Definition: bridge_defs.h:24
string get_string(const string &key) const
Definition: parameters.cpp:221
static VerboseLevel set_verbose_level(const std::string &str)
Definition: bridgeIO.cpp:131