Bridge++  Ver. 1.3.x
fft_xyz_1dim.cpp
Go to the documentation of this file.
1 
14 #ifdef USE_FFTWLIB
15 
16 #include "fft_xyz_1dim.h"
17 
18 const std::string FFT_xyz_1dim::class_name = "FFT_xyz_1dim";
19 
20 //====================================================================
21 void FFT_xyz_1dim::FFT(Field& field, const bool is_forward)
22 {
23  const int Ndim = CommonParameters::Ndim();
24 
25  //- global lattice size
26  std::vector<int> Lsize(Ndim - 1);
27 
28  Lsize[0] = CommonParameters::Lx();
29  Lsize[1] = CommonParameters::Ly();
30  Lsize[2] = CommonParameters::Lz();
31 
32  const int Lt = CommonParameters::Lt();
33 
34  //- local lattice size
35  std::vector<int> Nsize(Ndim - 1);
36  Nsize[0] = CommonParameters::Nx();
37  Nsize[1] = CommonParameters::Ny();
38  Nsize[2] = CommonParameters::Nz();
39 
40  const int Nin = field.nin();
41  const int Nex = field.nex();
42 
43  std::vector<int> NPE(Ndim);
44  NPE[0] = CommonParameters::NPEx();
45  NPE[1] = CommonParameters::NPEy();
46  NPE[2] = CommonParameters::NPEz();
47  NPE[3] = CommonParameters::NPEt();
48 
49  const int NPE_xyt = NPE[0] * NPE[1] * NPE[3];
50  const int NPE_xzt = NPE[0] * NPE[2] * NPE[3];
51  const int NPE_yzt = NPE[1] * NPE[2] * NPE[3];
52 
53  if ((NPE_xyt != 1) && (NPE_xzt != 1) && (NPE_yzt != 1)) {
54  vout.crucial(m_vl, "%s: FFTW supports parallelization only in 1 direction of x,y,z.\n",
55  class_name.c_str());
56  exit(EXIT_FAILURE);
57  }
58 
59  bool is_allocated = false;
60  int Lsize_Nsize_prev = 0;
61 
62 
63 #ifdef USE_OPENMP
64  int threads_ok = fftw_init_threads();
65 #endif
66 #ifdef USE_MPI
67  fftw_mpi_init();
68 #endif
69 
70 
71  //- xyz loop
72  for (int i_dim = 0; i_dim < Ndim - 1; ++i_dim) {
73  //- allocate m_in,out = m_in,out[Lsize]
74  if (Lsize_Nsize_prev != Lsize[i_dim] * Nsize[i_dim]) {
75  if (is_allocated) {
76  fftw_free(m_in);
77  fftw_free(m_out);
78  fftw_destroy_plan(m_plan);
79  }
80 
81  is_allocated = false;
82  }
83  Lsize_Nsize_prev = Lsize[i_dim] * Nsize[i_dim];
84 
85  if (!is_allocated) {
86  if (Lsize[i_dim] == Nsize[i_dim]) {
87  size_t fftw_size = sizeof(fftw_complex) * Lsize[i_dim];
88  m_in = (fftw_complex *)fftw_malloc(fftw_size);
89  m_out = (fftw_complex *)fftw_malloc(fftw_size);
90 
91  if (!m_in || !m_out) {
92  vout.crucial(m_vl, "%s: failed to allocate memory %d [Byte].\n",
93  class_name.c_str(), (int)fftw_size);
94  exit(EXIT_FAILURE);
95  }
96  } else {
97 #ifdef USE_MPI
98  ptrdiff_t Lsize_p = Lsize[i_dim];
99  ptrdiff_t fftw_size_p;
100 
101  if (is_forward) {
102  fftw_size_p = fftw_mpi_local_size_1d(Lsize_p,
104  FFTW_FORWARD, FFTW_ESTIMATE,
105  &m_Nsize_in_p, &m_start_in_p,
106  &m_Nsize_out_p, &m_start_out_p);
107  } else {
108  fftw_size_p = fftw_mpi_local_size_1d(Lsize_p,
110  FFTW_BACKWARD, FFTW_ESTIMATE,
111  &m_Nsize_in_p, &m_start_in_p,
112  &m_Nsize_out_p, &m_start_out_p);
113  }
114 
115  m_in = fftw_alloc_complex(fftw_size_p);
116  m_out = fftw_alloc_complex(fftw_size_p);
117 
118  if (!m_in || !m_out) {
119  vout.crucial(m_vl, "%s: failed to allocate memory %d [Byte].\n",
120  class_name.c_str(), (int)fftw_size_p);
121  exit(EXIT_FAILURE);
122  }
123 #endif
124  }
125 
126  is_allocated = true;
127  }
128 
129 
130  //- setup FFTW plan
131 #ifdef USE_OPENMP
133  fftw_plan_with_nthreads(Nthread);
134 #endif
135 
136  if (Lsize[i_dim] == Nsize[i_dim]) {
137  if (is_forward) {
138  m_plan = fftw_plan_dft_1d(Lsize[i_dim],
139  m_in, m_out,
140  FFTW_FORWARD, FFTW_ESTIMATE);
141  } else {
142  m_plan = fftw_plan_dft_1d(Lsize[i_dim],
143  m_in, m_out,
144  FFTW_BACKWARD, FFTW_ESTIMATE);
145  }
146  } else {
147 #ifdef USE_MPI
148  ptrdiff_t Lsize_p = Lsize[i_dim];
149 
150  if (is_forward) {
151  m_plan = fftw_mpi_plan_dft_1d(Lsize_p,
152  m_in, m_out,
154  FFTW_FORWARD, FFTW_ESTIMATE);
155  } else {
156  m_plan = fftw_mpi_plan_dft_1d(Lsize_p,
157  m_in, m_out,
159  FFTW_BACKWARD, FFTW_ESTIMATE);
160  }
161 #endif
162  }
163 
164 
165  // #### Execution main part ####
166  //- Nin is devided by 2, because of complex(i.e. real and imag)
167  for (int in2 = 0; in2 < Nin / 2; ++in2) {
168  for (int t_global = 0; t_global < Lt; t_global++) {
169  for (int ex = 0; ex < Nex; ++ex) {
170  if (i_dim == 0) {
171  for (int z = 0; z < Nsize[(i_dim + 2) % (Ndim - 1)]; z++) {
172  for (int y = 0; y < Nsize[(i_dim + 1) % (Ndim - 1)]; y++) {
173  for (int xyz_local = 0; xyz_local < Nsize[i_dim]; xyz_local++) {
174  int isite = m_index.site(xyz_local, y, z, t_global);
175  int i_real = 2 * in2;
176  int i_imag = 2 * in2 + 1;
177 
178  m_in[xyz_local][0] = field.cmp(i_real, isite, ex);
179  m_in[xyz_local][1] = field.cmp(i_imag, isite, ex);
180  }
181 
182 
183  fftw_execute(m_plan);
184 
185 
186  for (int xyz_local = 0; xyz_local < Nsize[i_dim]; xyz_local++) {
187  int isite = m_index.site(xyz_local, y, z, t_global);
188  int i_real = 2 * in2;
189  int i_imag = 2 * in2 + 1;
190 
191  field.set(i_real, isite, ex, m_out[xyz_local][0]);
192  field.set(i_imag, isite, ex, m_out[xyz_local][1]);
193  }
194  }
195  }
196  } else if (i_dim == 1) {
197  for (int x = 0; x < Nsize[(i_dim + 2) % (Ndim - 1)]; x++) {
198  for (int z = 0; z < Nsize[(i_dim + 1) % (Ndim - 1)]; z++) {
199  for (int xyz_local = 0; xyz_local < Nsize[i_dim]; xyz_local++) {
200  int isite = m_index.site(x, xyz_local, z, t_global);
201  int i_real = 2 * in2;
202  int i_imag = 2 * in2 + 1;
203 
204  m_in[xyz_local][0] = field.cmp(i_real, isite, ex);
205  m_in[xyz_local][1] = field.cmp(i_imag, isite, ex);
206  }
207 
208 
209  fftw_execute(m_plan);
210 
211 
212  for (int xyz_local = 0; xyz_local < Nsize[i_dim]; xyz_local++) {
213  int isite = m_index.site(x, xyz_local, z, t_global);
214  int i_real = 2 * in2;
215  int i_imag = 2 * in2 + 1;
216 
217  field.set(i_real, isite, ex, m_out[xyz_local][0]);
218  field.set(i_imag, isite, ex, m_out[xyz_local][1]);
219  }
220  }
221  }
222  } else if (i_dim == 2) {
223  for (int y = 0; y < Nsize[(i_dim + 2) % (Ndim - 1)]; y++) {
224  for (int x = 0; x < Nsize[(i_dim + 1) % (Ndim - 1)]; x++) {
225  for (int xyz_local = 0; xyz_local < Nsize[i_dim]; xyz_local++) {
226  int isite = m_index.site(x, y, xyz_local, t_global);
227  int i_real = 2 * in2;
228  int i_imag = 2 * in2 + 1;
229 
230  m_in[xyz_local][0] = field.cmp(i_real, isite, ex);
231  m_in[xyz_local][1] = field.cmp(i_imag, isite, ex);
232  }
233 
234 
235  fftw_execute(m_plan);
236 
237 
238  for (int xyz_local = 0; xyz_local < Nsize[i_dim]; xyz_local++) {
239  int isite = m_index.site(x, y, xyz_local, t_global);
240  int i_real = 2 * in2;
241  int i_imag = 2 * in2 + 1;
242 
243  field.set(i_real, isite, ex, m_out[xyz_local][0]);
244  field.set(i_imag, isite, ex, m_out[xyz_local][1]);
245  }
246  }
247  }
248  }
249  //- end of if( i_dim == 0 ){
250  }
251  }
252  }
253  //- end of outer loops
254  }
255  //- end of for (int i_dim = 0; i_dim < Ndim - 1; ++i_dim)
256 
257 
258  //- tidy up
259  if (is_allocated) {
260  fftw_free(m_in);
261  fftw_free(m_out);
262  fftw_destroy_plan(m_plan);
263  }
264 }
265 
266 
267 //====================================================================
268 void FFT_xyz_1dim::FFT(Field& field_out, const Field& field_in, const bool is_forward)
269 {
270  const int Ndim = CommonParameters::Ndim();
271 
272  //- global lattice size
273  std::vector<int> Lsize(Ndim - 1);
274 
275  Lsize[0] = CommonParameters::Lx();
276  Lsize[1] = CommonParameters::Ly();
277  Lsize[2] = CommonParameters::Lz();
278 
279  const int Lt = CommonParameters::Lt();
280 
281  //- local lattice size
282  std::vector<int> Nsize(Ndim - 1);
283  Nsize[0] = CommonParameters::Nx();
284  Nsize[1] = CommonParameters::Ny();
285  Nsize[2] = CommonParameters::Nz();
286 
287  const int Nin = field_in.nin();
288  const int Nex = field_in.nex();
289 
290  std::vector<int> NPE(Ndim);
291  NPE[0] = CommonParameters::NPEx();
292  NPE[1] = CommonParameters::NPEy();
293  NPE[2] = CommonParameters::NPEz();
294  NPE[3] = CommonParameters::NPEt();
295 
296  const int NPE_xyt = NPE[0] * NPE[1] * NPE[3];
297  const int NPE_xzt = NPE[0] * NPE[2] * NPE[3];
298  const int NPE_yzt = NPE[1] * NPE[2] * NPE[3];
299 
300  if ((NPE_xyt != 1) && (NPE_xzt != 1) && (NPE_yzt != 1)) {
301  vout.crucial(m_vl, "%s: FFTW supports parallelization only in 1 direction.\n",
302  class_name.c_str());
303  exit(EXIT_FAILURE);
304  }
305 
306  bool is_allocated = false;
307  int Lsize_Nsize_prev = 0;
308 
309 
310 #ifdef USE_OPENMP
311  int threads_ok = fftw_init_threads();
312 #endif
313 #ifdef USE_MPI
314  fftw_mpi_init();
315 #endif
316 
317 
318  //- xyz loop
319  for (int i_dim = 0; i_dim < Ndim - 1; ++i_dim) {
320  //- allocate m_in,out = m_in,out[Lsize]
321  if (Lsize_Nsize_prev != Lsize[i_dim] * Nsize[i_dim]) {
322  if (is_allocated) {
323  fftw_free(m_in);
324  fftw_free(m_out);
325  fftw_destroy_plan(m_plan);
326  }
327 
328  is_allocated = false;
329  }
330  Lsize_Nsize_prev = Lsize[i_dim] * Nsize[i_dim];
331 
332  if (!is_allocated) {
333  if (Lsize[i_dim] == Nsize[i_dim]) {
334  size_t fftw_size = sizeof(fftw_complex) * Lsize[i_dim];
335  m_in = (fftw_complex *)fftw_malloc(fftw_size);
336  m_out = (fftw_complex *)fftw_malloc(fftw_size);
337 
338  if (!m_in || !m_out) {
339  vout.crucial(m_vl, "%s: failed to allocate memory %d [Byte].\n",
340  class_name.c_str(), (int)fftw_size);
341  exit(EXIT_FAILURE);
342  }
343  } else {
344 #ifdef USE_MPI
345  ptrdiff_t Lsize_p = Lsize[i_dim];
346  ptrdiff_t fftw_size_p;
347 
348  if (is_forward) {
349  fftw_size_p = fftw_mpi_local_size_1d(Lsize_p,
351  FFTW_FORWARD, FFTW_ESTIMATE,
352  &m_Nsize_in_p, &m_start_in_p,
353  &m_Nsize_out_p, &m_start_out_p);
354  } else {
355  fftw_size_p = fftw_mpi_local_size_1d(Lsize_p,
357  FFTW_BACKWARD, FFTW_ESTIMATE,
358  &m_Nsize_in_p, &m_start_in_p,
359  &m_Nsize_out_p, &m_start_out_p);
360  }
361 
362  m_in = fftw_alloc_complex(fftw_size_p);
363  m_out = fftw_alloc_complex(fftw_size_p);
364 
365  if (!m_in || !m_out) {
366  vout.crucial(m_vl, "%s: failed to allocate memory %d [Byte].\n",
367  class_name.c_str(), (int)fftw_size_p);
368  exit(EXIT_FAILURE);
369  }
370 #endif
371  }
372 
373  is_allocated = true;
374  }
375 
376 
377  //- setup FFTW plan
378 #ifdef USE_OPENMP
380  fftw_plan_with_nthreads(Nthread);
381 #endif
382 
383  if (Lsize[i_dim] == Nsize[i_dim]) {
384  if (is_forward) {
385  m_plan = fftw_plan_dft_1d(Lsize[i_dim],
386  m_in, m_out,
387  FFTW_FORWARD, FFTW_ESTIMATE);
388  } else {
389  m_plan = fftw_plan_dft_1d(Lsize[i_dim],
390  m_in, m_out,
391  FFTW_BACKWARD, FFTW_ESTIMATE);
392  }
393  } else {
394 #ifdef USE_MPI
395  ptrdiff_t Lsize_p = Lsize[i_dim];
396 
397  if (is_forward) {
398  m_plan = fftw_mpi_plan_dft_1d(Lsize_p,
399  m_in, m_out,
401  FFTW_FORWARD, FFTW_ESTIMATE);
402  } else {
403  m_plan = fftw_mpi_plan_dft_1d(Lsize_p,
404  m_in, m_out,
406  FFTW_BACKWARD, FFTW_ESTIMATE);
407  }
408 #endif
409  }
410 
411 
412  // #### Execution main part ####
413  //- Nin is devided by 2, because of complex(i.e. real and imag)
414  for (int in2 = 0; in2 < Nin / 2; ++in2) {
415  for (int t_global = 0; t_global < Lt; t_global++) {
416  for (int ex = 0; ex < Nex; ++ex) {
417  if (i_dim == 0) {
418  for (int z = 0; z < Nsize[(i_dim + 2) % (Ndim - 1)]; z++) {
419  for (int y = 0; y < Nsize[(i_dim + 1) % (Ndim - 1)]; y++) {
420  for (int xyz_local = 0; xyz_local < Nsize[i_dim]; xyz_local++) {
421  int isite = m_index.site(xyz_local, y, z, t_global);
422  int i_real = 2 * in2;
423  int i_imag = 2 * in2 + 1;
424 
425  m_in[xyz_local][0] = field_in.cmp(i_real, isite, ex);
426  m_in[xyz_local][1] = field_in.cmp(i_imag, isite, ex);
427  }
428 
429 
430  fftw_execute(m_plan);
431 
432 
433  for (int xyz_local = 0; xyz_local < Nsize[i_dim]; xyz_local++) {
434  int isite = m_index.site(xyz_local, y, z, t_global);
435  int i_real = 2 * in2;
436  int i_imag = 2 * in2 + 1;
437 
438  field_out.set(i_real, isite, ex, m_out[xyz_local][0]);
439  field_out.set(i_imag, isite, ex, m_out[xyz_local][1]);
440  }
441  }
442  }
443  } else if (i_dim == 1) {
444  for (int x = 0; x < Nsize[(i_dim + 2) % (Ndim - 1)]; x++) {
445  for (int z = 0; z < Nsize[(i_dim + 1) % (Ndim - 1)]; z++) {
446  for (int xyz_local = 0; xyz_local < Nsize[i_dim]; xyz_local++) {
447  int isite = m_index.site(x, xyz_local, z, t_global);
448  int i_real = 2 * in2;
449  int i_imag = 2 * in2 + 1;
450 
451  m_in[xyz_local][0] = field_in.cmp(i_real, isite, ex);
452  m_in[xyz_local][1] = field_in.cmp(i_imag, isite, ex);
453  }
454 
455 
456  fftw_execute(m_plan);
457 
458 
459  for (int xyz_local = 0; xyz_local < Nsize[i_dim]; xyz_local++) {
460  int isite = m_index.site(x, xyz_local, z, t_global);
461  int i_real = 2 * in2;
462  int i_imag = 2 * in2 + 1;
463 
464  field_out.set(i_real, isite, ex, m_out[xyz_local][0]);
465  field_out.set(i_imag, isite, ex, m_out[xyz_local][1]);
466  }
467  }
468  }
469  } else if (i_dim == 2) {
470  for (int y = 0; y < Nsize[(i_dim + 2) % (Ndim - 1)]; y++) {
471  for (int x = 0; x < Nsize[(i_dim + 1) % (Ndim - 1)]; x++) {
472  for (int xyz_local = 0; xyz_local < Nsize[i_dim]; xyz_local++) {
473  int isite = m_index.site(x, y, xyz_local, t_global);
474  int i_real = 2 * in2;
475  int i_imag = 2 * in2 + 1;
476 
477  m_in[xyz_local][0] = field_in.cmp(i_real, isite, ex);
478  m_in[xyz_local][1] = field_in.cmp(i_imag, isite, ex);
479  }
480 
481 
482  fftw_execute(m_plan);
483 
484 
485  for (int xyz_local = 0; xyz_local < Nsize[i_dim]; xyz_local++) {
486  int isite = m_index.site(x, y, xyz_local, t_global);
487  int i_real = 2 * in2;
488  int i_imag = 2 * in2 + 1;
489 
490  field_out.set(i_real, isite, ex, m_out[xyz_local][0]);
491  field_out.set(i_imag, isite, ex, m_out[xyz_local][1]);
492  }
493  }
494  }
495  }
496  //- end of if( i_dim == 0 ){
497  }
498  }
499  }
500  //- end of outer loops
501  }
502  //- end of for (int i_dim = 0; i_dim < Ndim - 1; ++i_dim)
503 
504 
505  //- tidy up
506  if (is_allocated) {
507  fftw_free(m_in);
508  fftw_free(m_out);
509  fftw_destroy_plan(m_plan);
510  }
511 }
512 
513 
514 //==========================================================
515 //==================================================END=====
516 #endif
BridgeIO vout
Definition: bridgeIO.cpp:278
static int get_num_threads()
returns available number of threads.
static int NPEy()
static int NPEt()
void set(const int jin, const int site, const int jex, double v)
Definition: field.h:155
Container of Field-type object.
Definition: field.h:39
double cmp(const int jin, const int site, const int jex) const
Definition: field.h:123
int nin() const
Definition: field.h:115
int nex() const
Definition: field.h:117
static int NPEx()
void crucial(const char *format,...)
Definition: bridgeIO.cpp:48
static int NPEz()