18 #ifdef USE_FACTORY_AUTOREGISTER
20 bool init = FFT_xyz_1dim::register_factory();
24 const std::string FFT_xyz_1dim::class_name =
"FFT_xyz_1dim";
27 void FFT_xyz_1dim::set_parameters(
const Parameters& params)
35 std::string str_fft_direction;
38 err += params.
fetch_string(
"FFT_direction", str_fft_direction);
41 vout.
crucial(m_vl,
"Error at %s: input parameter not found.\n", class_name.c_str());
45 set_parameters(str_fft_direction);
50 void FFT_xyz_1dim::get_parameters(
Parameters& params)
const
52 params.
set_string(
"FFT_direction", m_is_forward ?
"Forward" :
"Backward");
59 void FFT_xyz_1dim::set_parameters(
const std::string& str_fft_direction)
63 vout.
general(m_vl,
" FFT_direction = %s\n", str_fft_direction.c_str());
68 if (str_fft_direction ==
"Forward") {
70 }
else if (str_fft_direction ==
"Backward") {
73 vout.
crucial(m_vl,
"Error at %s: unsupported FFT direction \"%s\"\n", class_name.c_str(), str_fft_direction.c_str());
80 void FFT_xyz_1dim::fft(
Field& field)
85 std::vector<int> Lsize(Ndim - 1);
92 const int Lxyz = Lsize[0] * Lsize[1] * Lsize[2];
95 std::vector<int> Nsize(Ndim - 1);
100 const int Nin = field.
nin();
101 const int Nex = field.
nex();
103 std::vector<int> NPE(Ndim);
109 const int NPE_xyt = NPE[0] * NPE[1] * NPE[3];
110 const int NPE_xzt = NPE[0] * NPE[2] * NPE[3];
111 const int NPE_yzt = NPE[1] * NPE[2] * NPE[3];
113 if ((NPE_xyt != 1) && (NPE_xzt != 1) && (NPE_yzt != 1)) {
114 vout.
crucial(m_vl,
"Error at %s: FFTW supports parallelization only in 1 direction of x,y,z.\n",
119 bool is_allocated =
false;
120 int Lsize_Nsize_prev = 0;
123 int threads_ok = fftw_init_threads();
131 for (
int i_dim = 0; i_dim < Ndim - 1; ++i_dim) {
133 if (Lsize_Nsize_prev != Lsize[i_dim] * Nsize[i_dim]) {
137 fftw_destroy_plan(m_plan);
140 is_allocated =
false;
142 Lsize_Nsize_prev = Lsize[i_dim] * Nsize[i_dim];
145 if (Lsize[i_dim] == Nsize[i_dim]) {
146 size_t fftw_size =
sizeof(fftw_complex) * Lsize[i_dim];
147 m_in = (fftw_complex *)fftw_malloc(fftw_size);
148 m_out = (fftw_complex *)fftw_malloc(fftw_size);
150 if (!m_in || !m_out) {
151 vout.
crucial(m_vl,
"Error at %s: failed to allocate memory %d [Byte].\n",
152 class_name.c_str(), (int)fftw_size);
157 ptrdiff_t Lsize_p = Lsize[i_dim];
158 ptrdiff_t fftw_size_p;
161 fftw_size_p = fftw_mpi_local_size_1d(Lsize_p,
162 Communicator_impl::world(),
163 FFTW_FORWARD, FFTW_ESTIMATE,
164 &m_Nsize_in_p, &m_start_in_p,
165 &m_Nsize_out_p, &m_start_out_p);
167 fftw_size_p = fftw_mpi_local_size_1d(Lsize_p,
168 Communicator_impl::world(),
169 FFTW_BACKWARD, FFTW_ESTIMATE,
170 &m_Nsize_in_p, &m_start_in_p,
171 &m_Nsize_out_p, &m_start_out_p);
174 m_in = fftw_alloc_complex(fftw_size_p);
175 m_out = fftw_alloc_complex(fftw_size_p);
177 if (!m_in || !m_out) {
178 vout.
crucial(m_vl,
"Error at %s: failed to allocate memory %d [Byte].\n",
179 class_name.c_str(), (
int)fftw_size_p);
192 fftw_plan_with_nthreads(Nthread);
195 if (Lsize[i_dim] == Nsize[i_dim]) {
197 m_plan = fftw_plan_dft_1d(Lsize[i_dim],
199 FFTW_FORWARD, FFTW_ESTIMATE);
201 m_plan = fftw_plan_dft_1d(Lsize[i_dim],
203 FFTW_BACKWARD, FFTW_ESTIMATE);
207 ptrdiff_t Lsize_p = Lsize[i_dim];
210 m_plan = fftw_mpi_plan_dft_1d(Lsize_p,
212 Communicator_impl::world(),
213 FFTW_FORWARD, FFTW_ESTIMATE);
215 m_plan = fftw_mpi_plan_dft_1d(Lsize_p,
217 Communicator_impl::world(),
218 FFTW_BACKWARD, FFTW_ESTIMATE);
226 for (
int in2 = 0; in2 < Nin / 2; ++in2) {
227 for (
int t_global = 0; t_global < Lt; t_global++) {
228 for (
int ex = 0; ex < Nex; ++ex) {
230 for (
int z = 0; z < Nsize[2]; z++) {
231 for (
int y = 0; y < Nsize[1]; y++) {
232 for (
int xyz_local = 0; xyz_local < Nsize[i_dim]; xyz_local++) {
233 int isite = m_index.site(xyz_local, y, z, t_global);
234 int i_real = 2 * in2;
235 int i_imag = 2 * in2 + 1;
237 m_in[xyz_local][0] = field.
cmp(i_real, isite, ex);
238 m_in[xyz_local][1] = field.
cmp(i_imag, isite, ex);
242 fftw_execute(m_plan);
245 for (
int xyz_local = 0; xyz_local < Nsize[i_dim]; xyz_local++) {
246 int isite = m_index.site(xyz_local, y, z, t_global);
247 int i_real = 2 * in2;
248 int i_imag = 2 * in2 + 1;
250 field.
set(i_real, isite, ex, m_out[xyz_local][0]);
251 field.
set(i_imag, isite, ex, m_out[xyz_local][1]);
255 }
else if (i_dim == 1) {
256 for (
int x = 0; x < Nsize[0]; x++) {
257 for (
int z = 0; z < Nsize[2]; z++) {
258 for (
int xyz_local = 0; xyz_local < Nsize[i_dim]; xyz_local++) {
259 int isite = m_index.site(x, xyz_local, z, t_global);
260 int i_real = 2 * in2;
261 int i_imag = 2 * in2 + 1;
263 m_in[xyz_local][0] = field.
cmp(i_real, isite, ex);
264 m_in[xyz_local][1] = field.
cmp(i_imag, isite, ex);
268 fftw_execute(m_plan);
271 for (
int xyz_local = 0; xyz_local < Nsize[i_dim]; xyz_local++) {
272 int isite = m_index.site(x, xyz_local, z, t_global);
273 int i_real = 2 * in2;
274 int i_imag = 2 * in2 + 1;
276 field.
set(i_real, isite, ex, m_out[xyz_local][0]);
277 field.
set(i_imag, isite, ex, m_out[xyz_local][1]);
281 }
else if (i_dim == 2) {
282 for (
int y = 0; y < Nsize[1]; y++) {
283 for (
int x = 0; x < Nsize[0]; x++) {
284 for (
int xyz_local = 0; xyz_local < Nsize[i_dim]; xyz_local++) {
285 int isite = m_index.site(x, y, xyz_local, t_global);
286 int i_real = 2 * in2;
287 int i_imag = 2 * in2 + 1;
289 m_in[xyz_local][0] = field.
cmp(i_real, isite, ex);
290 m_in[xyz_local][1] = field.
cmp(i_imag, isite, ex);
294 fftw_execute(m_plan);
297 for (
int xyz_local = 0; xyz_local < Nsize[i_dim]; xyz_local++) {
298 int isite = m_index.site(x, y, xyz_local, t_global);
299 int i_real = 2 * in2;
300 int i_imag = 2 * in2 + 1;
302 field.
set(i_real, isite, ex, m_out[xyz_local][0]);
303 field.
set(i_imag, isite, ex, m_out[xyz_local][1]);
318 scal(field, 1.0 / Lxyz);
325 fftw_destroy_plan(m_plan);
331 void FFT_xyz_1dim::fft(
Field& field_out,
const Field& field_in)
336 std::vector<int> Lsize(Ndim - 1);
343 const int Lxyz = Lsize[0] * Lsize[1] * Lsize[2];
346 std::vector<int> Nsize(Ndim - 1);
351 const int Nin = field_in.
nin();
352 const int Nex = field_in.
nex();
354 std::vector<int> NPE(Ndim);
360 const int NPE_xyt = NPE[0] * NPE[1] * NPE[3];
361 const int NPE_xzt = NPE[0] * NPE[2] * NPE[3];
362 const int NPE_yzt = NPE[1] * NPE[2] * NPE[3];
364 if ((NPE_xyt != 1) && (NPE_xzt != 1) && (NPE_yzt != 1)) {
365 vout.
crucial(m_vl,
"Error at %s: FFTW supports parallelization only in 1 direction.\n",
370 bool is_allocated =
false;
371 int Lsize_Nsize_prev = 0;
373 field_out = field_in;
376 int threads_ok = fftw_init_threads();
384 for (
int i_dim = 0; i_dim < Ndim - 1; ++i_dim) {
386 if (Lsize_Nsize_prev != Lsize[i_dim] * Nsize[i_dim]) {
390 fftw_destroy_plan(m_plan);
393 is_allocated =
false;
395 Lsize_Nsize_prev = Lsize[i_dim] * Nsize[i_dim];
398 if (Lsize[i_dim] == Nsize[i_dim]) {
399 size_t fftw_size =
sizeof(fftw_complex) * Lsize[i_dim];
400 m_in = (fftw_complex *)fftw_malloc(fftw_size);
401 m_out = (fftw_complex *)fftw_malloc(fftw_size);
403 if (!m_in || !m_out) {
404 vout.
crucial(m_vl,
"Error at %s: failed to allocate memory %d [Byte].\n",
405 class_name.c_str(), (int)fftw_size);
410 ptrdiff_t Lsize_p = Lsize[i_dim];
411 ptrdiff_t fftw_size_p;
414 fftw_size_p = fftw_mpi_local_size_1d(Lsize_p,
415 Communicator_impl::world(),
416 FFTW_FORWARD, FFTW_ESTIMATE,
417 &m_Nsize_in_p, &m_start_in_p,
418 &m_Nsize_out_p, &m_start_out_p);
420 fftw_size_p = fftw_mpi_local_size_1d(Lsize_p,
421 Communicator_impl::world(),
422 FFTW_BACKWARD, FFTW_ESTIMATE,
423 &m_Nsize_in_p, &m_start_in_p,
424 &m_Nsize_out_p, &m_start_out_p);
427 m_in = fftw_alloc_complex(fftw_size_p);
428 m_out = fftw_alloc_complex(fftw_size_p);
430 if (!m_in || !m_out) {
431 vout.
crucial(m_vl,
"Error at %s: failed to allocate memory %d [Byte].\n",
432 class_name.c_str(), (
int)fftw_size_p);
445 fftw_plan_with_nthreads(Nthread);
448 if (Lsize[i_dim] == Nsize[i_dim]) {
450 m_plan = fftw_plan_dft_1d(Lsize[i_dim],
452 FFTW_FORWARD, FFTW_ESTIMATE);
454 m_plan = fftw_plan_dft_1d(Lsize[i_dim],
456 FFTW_BACKWARD, FFTW_ESTIMATE);
460 ptrdiff_t Lsize_p = Lsize[i_dim];
463 m_plan = fftw_mpi_plan_dft_1d(Lsize_p,
465 Communicator_impl::world(),
466 FFTW_FORWARD, FFTW_ESTIMATE);
468 m_plan = fftw_mpi_plan_dft_1d(Lsize_p,
470 Communicator_impl::world(),
471 FFTW_BACKWARD, FFTW_ESTIMATE);
479 for (
int in2 = 0; in2 < Nin / 2; ++in2) {
480 for (
int t_global = 0; t_global < Lt; t_global++) {
481 for (
int ex = 0; ex < Nex; ++ex) {
483 for (
int z = 0; z < Nsize[2]; z++) {
484 for (
int y = 0; y < Nsize[1]; y++) {
485 for (
int xyz_local = 0; xyz_local < Nsize[i_dim]; xyz_local++) {
486 int isite = m_index.site(xyz_local, y, z, t_global);
487 int i_real = 2 * in2;
488 int i_imag = 2 * in2 + 1;
490 m_in[xyz_local][0] = field_out.
cmp(i_real, isite, ex);
491 m_in[xyz_local][1] = field_out.
cmp(i_imag, isite, ex);
495 fftw_execute(m_plan);
498 for (
int xyz_local = 0; xyz_local < Nsize[i_dim]; xyz_local++) {
499 int isite = m_index.site(xyz_local, y, z, t_global);
500 int i_real = 2 * in2;
501 int i_imag = 2 * in2 + 1;
503 field_out.
set(i_real, isite, ex, m_out[xyz_local][0]);
504 field_out.
set(i_imag, isite, ex, m_out[xyz_local][1]);
508 }
else if (i_dim == 1) {
509 for (
int x = 0; x < Nsize[0]; x++) {
510 for (
int z = 0; z < Nsize[2]; z++) {
511 for (
int xyz_local = 0; xyz_local < Nsize[i_dim]; xyz_local++) {
512 int isite = m_index.site(x, xyz_local, z, t_global);
513 int i_real = 2 * in2;
514 int i_imag = 2 * in2 + 1;
516 m_in[xyz_local][0] = field_out.
cmp(i_real, isite, ex);
517 m_in[xyz_local][1] = field_out.
cmp(i_imag, isite, ex);
521 fftw_execute(m_plan);
524 for (
int xyz_local = 0; xyz_local < Nsize[i_dim]; xyz_local++) {
525 int isite = m_index.site(x, xyz_local, z, t_global);
526 int i_real = 2 * in2;
527 int i_imag = 2 * in2 + 1;
529 field_out.
set(i_real, isite, ex, m_out[xyz_local][0]);
530 field_out.
set(i_imag, isite, ex, m_out[xyz_local][1]);
534 }
else if (i_dim == 2) {
535 for (
int y = 0; y < Nsize[1]; y++) {
536 for (
int x = 0; x < Nsize[0]; x++) {
537 for (
int xyz_local = 0; xyz_local < Nsize[i_dim]; xyz_local++) {
538 int isite = m_index.site(x, y, xyz_local, t_global);
539 int i_real = 2 * in2;
540 int i_imag = 2 * in2 + 1;
542 m_in[xyz_local][0] = field_out.
cmp(i_real, isite, ex);
543 m_in[xyz_local][1] = field_out.
cmp(i_imag, isite, ex);
547 fftw_execute(m_plan);
550 for (
int xyz_local = 0; xyz_local < Nsize[i_dim]; xyz_local++) {
551 int isite = m_index.site(x, y, xyz_local, t_global);
552 int i_real = 2 * in2;
553 int i_imag = 2 * in2 + 1;
555 field_out.
set(i_real, isite, ex, m_out[xyz_local][0]);
556 field_out.
set(i_imag, isite, ex, m_out[xyz_local][1]);
571 scal(field_out, 1.0 / Lxyz);
578 fftw_destroy_plan(m_plan);
587 bool backup_fwbw = m_is_forward;
590 if (dir == FORWARD) {
592 }
else if (dir == BACKWARD) {
593 m_is_forward =
false;
595 vout.
crucial(m_vl,
"%s: unknown direction %d. failed.\n", class_name.c_str(), dir);
600 fft(field_out, field_in);
603 m_is_forward = backup_fwbw;