18 const std::string FFT_xyz_1dim::class_name =
"FFT_xyz_1dim";
21 void FFT_xyz_1dim::FFT(
Field& field,
const bool is_forward)
26 std::vector<int> Lsize(Ndim - 1);
35 std::vector<int> Nsize(Ndim - 1);
40 const int Nin = field.
nin();
41 const int Nex = field.
nex();
43 std::vector<int> NPE(Ndim);
49 const int NPE_xyt = NPE[0] * NPE[1] * NPE[3];
50 const int NPE_xzt = NPE[0] * NPE[2] * NPE[3];
51 const int NPE_yzt = NPE[1] * NPE[2] * NPE[3];
53 if ((NPE_xyt != 1) && (NPE_xzt != 1) && (NPE_yzt != 1)) {
54 vout.
crucial(m_vl,
"%s: FFTW supports parallelization only in 1 direction of x,y,z.\n",
59 bool is_allocated =
false;
60 int Lsize_Nsize_prev = 0;
64 int threads_ok = fftw_init_threads();
72 for (
int i_dim = 0; i_dim < Ndim - 1; ++i_dim) {
74 if (Lsize_Nsize_prev != Lsize[i_dim] * Nsize[i_dim]) {
78 fftw_destroy_plan(m_plan);
83 Lsize_Nsize_prev = Lsize[i_dim] * Nsize[i_dim];
86 if (Lsize[i_dim] == Nsize[i_dim]) {
87 size_t fftw_size =
sizeof(fftw_complex) * Lsize[i_dim];
88 m_in = (fftw_complex *)fftw_malloc(fftw_size);
89 m_out = (fftw_complex *)fftw_malloc(fftw_size);
91 if (!m_in || !m_out) {
92 vout.
crucial(m_vl,
"%s: failed to allocate memory %d [Byte].\n",
93 class_name.c_str(), (int)fftw_size);
98 ptrdiff_t Lsize_p = Lsize[i_dim];
99 ptrdiff_t fftw_size_p;
102 fftw_size_p = fftw_mpi_local_size_1d(Lsize_p,
104 FFTW_FORWARD, FFTW_ESTIMATE,
105 &m_Nsize_in_p, &m_start_in_p,
106 &m_Nsize_out_p, &m_start_out_p);
108 fftw_size_p = fftw_mpi_local_size_1d(Lsize_p,
110 FFTW_BACKWARD, FFTW_ESTIMATE,
111 &m_Nsize_in_p, &m_start_in_p,
112 &m_Nsize_out_p, &m_start_out_p);
115 m_in = fftw_alloc_complex(fftw_size_p);
116 m_out = fftw_alloc_complex(fftw_size_p);
118 if (!m_in || !m_out) {
119 vout.
crucial(m_vl,
"%s: failed to allocate memory %d [Byte].\n",
120 class_name.c_str(), (int)fftw_size_p);
133 fftw_plan_with_nthreads(Nthread);
136 if (Lsize[i_dim] == Nsize[i_dim]) {
138 m_plan = fftw_plan_dft_1d(Lsize[i_dim],
140 FFTW_FORWARD, FFTW_ESTIMATE);
142 m_plan = fftw_plan_dft_1d(Lsize[i_dim],
144 FFTW_BACKWARD, FFTW_ESTIMATE);
148 ptrdiff_t Lsize_p = Lsize[i_dim];
151 m_plan = fftw_mpi_plan_dft_1d(Lsize_p,
154 FFTW_FORWARD, FFTW_ESTIMATE);
156 m_plan = fftw_mpi_plan_dft_1d(Lsize_p,
159 FFTW_BACKWARD, FFTW_ESTIMATE);
167 for (
int in2 = 0; in2 < Nin / 2; ++in2) {
168 for (
int t_global = 0; t_global < Lt; t_global++) {
169 for (
int ex = 0; ex < Nex; ++ex) {
171 for (
int z = 0; z < Nsize[(i_dim + 2) % (Ndim - 1)]; z++) {
172 for (
int y = 0; y < Nsize[(i_dim + 1) % (Ndim - 1)]; y++) {
173 for (
int xyz_local = 0; xyz_local < Nsize[i_dim]; xyz_local++) {
174 int isite = m_index.site(xyz_local, y, z, t_global);
175 int i_real = 2 * in2;
176 int i_imag = 2 * in2 + 1;
178 m_in[xyz_local][0] = field.
cmp(i_real, isite, ex);
179 m_in[xyz_local][1] = field.
cmp(i_imag, isite, ex);
183 fftw_execute(m_plan);
186 for (
int xyz_local = 0; xyz_local < Nsize[i_dim]; xyz_local++) {
187 int isite = m_index.site(xyz_local, y, z, t_global);
188 int i_real = 2 * in2;
189 int i_imag = 2 * in2 + 1;
191 field.
set(i_real, isite, ex, m_out[xyz_local][0]);
192 field.
set(i_imag, isite, ex, m_out[xyz_local][1]);
196 }
else if (i_dim == 1) {
197 for (
int x = 0; x < Nsize[(i_dim + 2) % (Ndim - 1)]; x++) {
198 for (
int z = 0; z < Nsize[(i_dim + 1) % (Ndim - 1)]; z++) {
199 for (
int xyz_local = 0; xyz_local < Nsize[i_dim]; xyz_local++) {
200 int isite = m_index.site(x, xyz_local, z, t_global);
201 int i_real = 2 * in2;
202 int i_imag = 2 * in2 + 1;
204 m_in[xyz_local][0] = field.
cmp(i_real, isite, ex);
205 m_in[xyz_local][1] = field.
cmp(i_imag, isite, ex);
209 fftw_execute(m_plan);
212 for (
int xyz_local = 0; xyz_local < Nsize[i_dim]; xyz_local++) {
213 int isite = m_index.site(x, xyz_local, z, t_global);
214 int i_real = 2 * in2;
215 int i_imag = 2 * in2 + 1;
217 field.
set(i_real, isite, ex, m_out[xyz_local][0]);
218 field.
set(i_imag, isite, ex, m_out[xyz_local][1]);
222 }
else if (i_dim == 2) {
223 for (
int y = 0; y < Nsize[(i_dim + 2) % (Ndim - 1)]; y++) {
224 for (
int x = 0; x < Nsize[(i_dim + 1) % (Ndim - 1)]; x++) {
225 for (
int xyz_local = 0; xyz_local < Nsize[i_dim]; xyz_local++) {
226 int isite = m_index.site(x, y, xyz_local, t_global);
227 int i_real = 2 * in2;
228 int i_imag = 2 * in2 + 1;
230 m_in[xyz_local][0] = field.
cmp(i_real, isite, ex);
231 m_in[xyz_local][1] = field.
cmp(i_imag, isite, ex);
235 fftw_execute(m_plan);
238 for (
int xyz_local = 0; xyz_local < Nsize[i_dim]; xyz_local++) {
239 int isite = m_index.site(x, y, xyz_local, t_global);
240 int i_real = 2 * in2;
241 int i_imag = 2 * in2 + 1;
243 field.
set(i_real, isite, ex, m_out[xyz_local][0]);
244 field.
set(i_imag, isite, ex, m_out[xyz_local][1]);
262 fftw_destroy_plan(m_plan);
268 void FFT_xyz_1dim::FFT(
Field& field_out,
const Field& field_in,
const bool is_forward)
273 std::vector<int> Lsize(Ndim - 1);
282 std::vector<int> Nsize(Ndim - 1);
287 const int Nin = field_in.
nin();
288 const int Nex = field_in.
nex();
290 std::vector<int> NPE(Ndim);
296 const int NPE_xyt = NPE[0] * NPE[1] * NPE[3];
297 const int NPE_xzt = NPE[0] * NPE[2] * NPE[3];
298 const int NPE_yzt = NPE[1] * NPE[2] * NPE[3];
300 if ((NPE_xyt != 1) && (NPE_xzt != 1) && (NPE_yzt != 1)) {
301 vout.
crucial(m_vl,
"%s: FFTW supports parallelization only in 1 direction.\n",
306 bool is_allocated =
false;
307 int Lsize_Nsize_prev = 0;
311 int threads_ok = fftw_init_threads();
319 for (
int i_dim = 0; i_dim < Ndim - 1; ++i_dim) {
321 if (Lsize_Nsize_prev != Lsize[i_dim] * Nsize[i_dim]) {
325 fftw_destroy_plan(m_plan);
328 is_allocated =
false;
330 Lsize_Nsize_prev = Lsize[i_dim] * Nsize[i_dim];
333 if (Lsize[i_dim] == Nsize[i_dim]) {
334 size_t fftw_size =
sizeof(fftw_complex) * Lsize[i_dim];
335 m_in = (fftw_complex *)fftw_malloc(fftw_size);
336 m_out = (fftw_complex *)fftw_malloc(fftw_size);
338 if (!m_in || !m_out) {
339 vout.
crucial(m_vl,
"%s: failed to allocate memory %d [Byte].\n",
340 class_name.c_str(), (int)fftw_size);
345 ptrdiff_t Lsize_p = Lsize[i_dim];
346 ptrdiff_t fftw_size_p;
349 fftw_size_p = fftw_mpi_local_size_1d(Lsize_p,
351 FFTW_FORWARD, FFTW_ESTIMATE,
352 &m_Nsize_in_p, &m_start_in_p,
353 &m_Nsize_out_p, &m_start_out_p);
355 fftw_size_p = fftw_mpi_local_size_1d(Lsize_p,
357 FFTW_BACKWARD, FFTW_ESTIMATE,
358 &m_Nsize_in_p, &m_start_in_p,
359 &m_Nsize_out_p, &m_start_out_p);
362 m_in = fftw_alloc_complex(fftw_size_p);
363 m_out = fftw_alloc_complex(fftw_size_p);
365 if (!m_in || !m_out) {
366 vout.
crucial(m_vl,
"%s: failed to allocate memory %d [Byte].\n",
367 class_name.c_str(), (int)fftw_size_p);
380 fftw_plan_with_nthreads(Nthread);
383 if (Lsize[i_dim] == Nsize[i_dim]) {
385 m_plan = fftw_plan_dft_1d(Lsize[i_dim],
387 FFTW_FORWARD, FFTW_ESTIMATE);
389 m_plan = fftw_plan_dft_1d(Lsize[i_dim],
391 FFTW_BACKWARD, FFTW_ESTIMATE);
395 ptrdiff_t Lsize_p = Lsize[i_dim];
398 m_plan = fftw_mpi_plan_dft_1d(Lsize_p,
401 FFTW_FORWARD, FFTW_ESTIMATE);
403 m_plan = fftw_mpi_plan_dft_1d(Lsize_p,
406 FFTW_BACKWARD, FFTW_ESTIMATE);
414 for (
int in2 = 0; in2 < Nin / 2; ++in2) {
415 for (
int t_global = 0; t_global < Lt; t_global++) {
416 for (
int ex = 0; ex < Nex; ++ex) {
418 for (
int z = 0; z < Nsize[(i_dim + 2) % (Ndim - 1)]; z++) {
419 for (
int y = 0; y < Nsize[(i_dim + 1) % (Ndim - 1)]; y++) {
420 for (
int xyz_local = 0; xyz_local < Nsize[i_dim]; xyz_local++) {
421 int isite = m_index.site(xyz_local, y, z, t_global);
422 int i_real = 2 * in2;
423 int i_imag = 2 * in2 + 1;
425 m_in[xyz_local][0] = field_in.
cmp(i_real, isite, ex);
426 m_in[xyz_local][1] = field_in.
cmp(i_imag, isite, ex);
430 fftw_execute(m_plan);
433 for (
int xyz_local = 0; xyz_local < Nsize[i_dim]; xyz_local++) {
434 int isite = m_index.site(xyz_local, y, z, t_global);
435 int i_real = 2 * in2;
436 int i_imag = 2 * in2 + 1;
438 field_out.
set(i_real, isite, ex, m_out[xyz_local][0]);
439 field_out.
set(i_imag, isite, ex, m_out[xyz_local][1]);
443 }
else if (i_dim == 1) {
444 for (
int x = 0; x < Nsize[(i_dim + 2) % (Ndim - 1)]; x++) {
445 for (
int z = 0; z < Nsize[(i_dim + 1) % (Ndim - 1)]; z++) {
446 for (
int xyz_local = 0; xyz_local < Nsize[i_dim]; xyz_local++) {
447 int isite = m_index.site(x, xyz_local, z, t_global);
448 int i_real = 2 * in2;
449 int i_imag = 2 * in2 + 1;
451 m_in[xyz_local][0] = field_in.
cmp(i_real, isite, ex);
452 m_in[xyz_local][1] = field_in.
cmp(i_imag, isite, ex);
456 fftw_execute(m_plan);
459 for (
int xyz_local = 0; xyz_local < Nsize[i_dim]; xyz_local++) {
460 int isite = m_index.site(x, xyz_local, z, t_global);
461 int i_real = 2 * in2;
462 int i_imag = 2 * in2 + 1;
464 field_out.
set(i_real, isite, ex, m_out[xyz_local][0]);
465 field_out.
set(i_imag, isite, ex, m_out[xyz_local][1]);
469 }
else if (i_dim == 2) {
470 for (
int y = 0; y < Nsize[(i_dim + 2) % (Ndim - 1)]; y++) {
471 for (
int x = 0; x < Nsize[(i_dim + 1) % (Ndim - 1)]; x++) {
472 for (
int xyz_local = 0; xyz_local < Nsize[i_dim]; xyz_local++) {
473 int isite = m_index.site(x, y, xyz_local, t_global);
474 int i_real = 2 * in2;
475 int i_imag = 2 * in2 + 1;
477 m_in[xyz_local][0] = field_in.
cmp(i_real, isite, ex);
478 m_in[xyz_local][1] = field_in.
cmp(i_imag, isite, ex);
482 fftw_execute(m_plan);
485 for (
int xyz_local = 0; xyz_local < Nsize[i_dim]; xyz_local++) {
486 int isite = m_index.site(x, y, xyz_local, t_global);
487 int i_real = 2 * in2;
488 int i_imag = 2 * in2 + 1;
490 field_out.
set(i_real, isite, ex, m_out[xyz_local][0]);
491 field_out.
set(i_imag, isite, ex, m_out[xyz_local][1]);
509 fftw_destroy_plan(m_plan);
static int get_num_threads()
returns available number of threads.
void set(const int jin, const int site, const int jex, double v)
Container of Field-type object.
double cmp(const int jin, const int site, const int jex) const
void crucial(const char *format,...)