18 #ifdef USE_FACTORY_AUTOREGISTER
20 bool init = FFT_xyz_1dim::register_factory();
24 const std::string FFT_xyz_1dim::class_name =
"FFT_xyz_1dim";
27 void FFT_xyz_1dim::set_parameters(
const Parameters& params)
29 const std::string str_vlevel = params.
get_string(
"verbose_level");
34 std::string str_fft_direction;
37 err += params.
fetch_string(
"FFT_direction", str_fft_direction);
40 vout.
crucial(m_vl,
"Error at %s: input parameter not found.\n", class_name.c_str());
44 set_parameters(str_fft_direction);
49 void FFT_xyz_1dim::set_parameters(
const std::string& str_fft_direction)
53 vout.
general(m_vl,
" FFT_direction = %s\n", str_fft_direction.c_str());
58 if (str_fft_direction ==
"Forward") {
60 }
else if (str_fft_direction ==
"Backward") {
63 vout.
crucial(m_vl,
"Error at %s: unsupported FFT direction \"%s\"\n", class_name.c_str(), str_fft_direction.c_str());
70 void FFT_xyz_1dim::fft(
Field& field)
75 std::vector<int> Lsize(Ndim - 1);
82 const int Lxyz = Lsize[0] * Lsize[1] * Lsize[2];
85 std::vector<int> Nsize(Ndim - 1);
90 const int Nin = field.
nin();
91 const int Nex = field.
nex();
93 std::vector<int> NPE(Ndim);
99 const int NPE_xyt = NPE[0] * NPE[1] * NPE[3];
100 const int NPE_xzt = NPE[0] * NPE[2] * NPE[3];
101 const int NPE_yzt = NPE[1] * NPE[2] * NPE[3];
103 if ((NPE_xyt != 1) && (NPE_xzt != 1) && (NPE_yzt != 1)) {
104 vout.
crucial(m_vl,
"Error at %s: FFTW supports parallelization only in 1 direction of x,y,z.\n",
109 bool is_allocated =
false;
110 int Lsize_Nsize_prev = 0;
113 int threads_ok = fftw_init_threads();
121 for (
int i_dim = 0; i_dim < Ndim - 1; ++i_dim) {
123 if (Lsize_Nsize_prev != Lsize[i_dim] * Nsize[i_dim]) {
127 fftw_destroy_plan(m_plan);
130 is_allocated =
false;
132 Lsize_Nsize_prev = Lsize[i_dim] * Nsize[i_dim];
135 if (Lsize[i_dim] == Nsize[i_dim]) {
136 size_t fftw_size =
sizeof(fftw_complex) * Lsize[i_dim];
137 m_in = (fftw_complex *)fftw_malloc(fftw_size);
138 m_out = (fftw_complex *)fftw_malloc(fftw_size);
140 if (!m_in || !m_out) {
141 vout.
crucial(m_vl,
"Error at %s: failed to allocate memory %d [Byte].\n",
142 class_name.c_str(), (int)fftw_size);
147 ptrdiff_t Lsize_p = Lsize[i_dim];
148 ptrdiff_t fftw_size_p;
151 fftw_size_p = fftw_mpi_local_size_1d(Lsize_p,
153 FFTW_FORWARD, FFTW_ESTIMATE,
154 &m_Nsize_in_p, &m_start_in_p,
155 &m_Nsize_out_p, &m_start_out_p);
157 fftw_size_p = fftw_mpi_local_size_1d(Lsize_p,
159 FFTW_BACKWARD, FFTW_ESTIMATE,
160 &m_Nsize_in_p, &m_start_in_p,
161 &m_Nsize_out_p, &m_start_out_p);
164 m_in = fftw_alloc_complex(fftw_size_p);
165 m_out = fftw_alloc_complex(fftw_size_p);
167 if (!m_in || !m_out) {
168 vout.
crucial(m_vl,
"Error at %s: failed to allocate memory %d [Byte].\n",
169 class_name.c_str(), (int)fftw_size_p);
182 fftw_plan_with_nthreads(Nthread);
185 if (Lsize[i_dim] == Nsize[i_dim]) {
187 m_plan = fftw_plan_dft_1d(Lsize[i_dim],
189 FFTW_FORWARD, FFTW_ESTIMATE);
191 m_plan = fftw_plan_dft_1d(Lsize[i_dim],
193 FFTW_BACKWARD, FFTW_ESTIMATE);
197 ptrdiff_t Lsize_p = Lsize[i_dim];
200 m_plan = fftw_mpi_plan_dft_1d(Lsize_p,
203 FFTW_FORWARD, FFTW_ESTIMATE);
205 m_plan = fftw_mpi_plan_dft_1d(Lsize_p,
208 FFTW_BACKWARD, FFTW_ESTIMATE);
216 for (
int in2 = 0; in2 < Nin / 2; ++in2) {
217 for (
int t_global = 0; t_global < Lt; t_global++) {
218 for (
int ex = 0; ex < Nex; ++ex) {
220 for (
int z = 0; z < Nsize[2]; z++) {
221 for (
int y = 0; y < Nsize[1]; y++) {
222 for (
int xyz_local = 0; xyz_local < Nsize[i_dim]; xyz_local++) {
223 int isite = m_index.site(xyz_local, y, z, t_global);
224 int i_real = 2 * in2;
225 int i_imag = 2 * in2 + 1;
227 m_in[xyz_local][0] = field.
cmp(i_real, isite, ex);
228 m_in[xyz_local][1] = field.
cmp(i_imag, isite, ex);
232 fftw_execute(m_plan);
235 for (
int xyz_local = 0; xyz_local < Nsize[i_dim]; xyz_local++) {
236 int isite = m_index.site(xyz_local, y, z, t_global);
237 int i_real = 2 * in2;
238 int i_imag = 2 * in2 + 1;
240 field.
set(i_real, isite, ex, m_out[xyz_local][0]);
241 field.
set(i_imag, isite, ex, m_out[xyz_local][1]);
245 }
else if (i_dim == 1) {
246 for (
int x = 0; x < Nsize[0]; x++) {
247 for (
int z = 0; z < Nsize[2]; z++) {
248 for (
int xyz_local = 0; xyz_local < Nsize[i_dim]; xyz_local++) {
249 int isite = m_index.site(x, xyz_local, z, t_global);
250 int i_real = 2 * in2;
251 int i_imag = 2 * in2 + 1;
253 m_in[xyz_local][0] = field.
cmp(i_real, isite, ex);
254 m_in[xyz_local][1] = field.
cmp(i_imag, isite, ex);
258 fftw_execute(m_plan);
261 for (
int xyz_local = 0; xyz_local < Nsize[i_dim]; xyz_local++) {
262 int isite = m_index.site(x, xyz_local, z, t_global);
263 int i_real = 2 * in2;
264 int i_imag = 2 * in2 + 1;
266 field.
set(i_real, isite, ex, m_out[xyz_local][0]);
267 field.
set(i_imag, isite, ex, m_out[xyz_local][1]);
271 }
else if (i_dim == 2) {
272 for (
int y = 0; y < Nsize[1]; y++) {
273 for (
int x = 0; x < Nsize[0]; x++) {
274 for (
int xyz_local = 0; xyz_local < Nsize[i_dim]; xyz_local++) {
275 int isite = m_index.site(x, y, xyz_local, t_global);
276 int i_real = 2 * in2;
277 int i_imag = 2 * in2 + 1;
279 m_in[xyz_local][0] = field.
cmp(i_real, isite, ex);
280 m_in[xyz_local][1] = field.
cmp(i_imag, isite, ex);
284 fftw_execute(m_plan);
287 for (
int xyz_local = 0; xyz_local < Nsize[i_dim]; xyz_local++) {
288 int isite = m_index.site(x, y, xyz_local, t_global);
289 int i_real = 2 * in2;
290 int i_imag = 2 * in2 + 1;
292 field.
set(i_real, isite, ex, m_out[xyz_local][0]);
293 field.
set(i_imag, isite, ex, m_out[xyz_local][1]);
308 scal(field, 1.0 / Lxyz);
315 fftw_destroy_plan(m_plan);
321 void FFT_xyz_1dim::fft(
Field& field_out,
const Field& field_in)
326 std::vector<int> Lsize(Ndim - 1);
333 const int Lxyz = Lsize[0] * Lsize[1] * Lsize[2];
336 std::vector<int> Nsize(Ndim - 1);
341 const int Nin = field_in.
nin();
342 const int Nex = field_in.
nex();
344 std::vector<int> NPE(Ndim);
350 const int NPE_xyt = NPE[0] * NPE[1] * NPE[3];
351 const int NPE_xzt = NPE[0] * NPE[2] * NPE[3];
352 const int NPE_yzt = NPE[1] * NPE[2] * NPE[3];
354 if ((NPE_xyt != 1) && (NPE_xzt != 1) && (NPE_yzt != 1)) {
355 vout.
crucial(m_vl,
"Error at %s: FFTW supports parallelization only in 1 direction.\n",
360 bool is_allocated =
false;
361 int Lsize_Nsize_prev = 0;
363 field_out = field_in;
366 int threads_ok = fftw_init_threads();
374 for (
int i_dim = 0; i_dim < Ndim - 1; ++i_dim) {
376 if (Lsize_Nsize_prev != Lsize[i_dim] * Nsize[i_dim]) {
380 fftw_destroy_plan(m_plan);
383 is_allocated =
false;
385 Lsize_Nsize_prev = Lsize[i_dim] * Nsize[i_dim];
388 if (Lsize[i_dim] == Nsize[i_dim]) {
389 size_t fftw_size =
sizeof(fftw_complex) * Lsize[i_dim];
390 m_in = (fftw_complex *)fftw_malloc(fftw_size);
391 m_out = (fftw_complex *)fftw_malloc(fftw_size);
393 if (!m_in || !m_out) {
394 vout.
crucial(m_vl,
"Error at %s: failed to allocate memory %d [Byte].\n",
395 class_name.c_str(), (int)fftw_size);
400 ptrdiff_t Lsize_p = Lsize[i_dim];
401 ptrdiff_t fftw_size_p;
404 fftw_size_p = fftw_mpi_local_size_1d(Lsize_p,
406 FFTW_FORWARD, FFTW_ESTIMATE,
407 &m_Nsize_in_p, &m_start_in_p,
408 &m_Nsize_out_p, &m_start_out_p);
410 fftw_size_p = fftw_mpi_local_size_1d(Lsize_p,
412 FFTW_BACKWARD, FFTW_ESTIMATE,
413 &m_Nsize_in_p, &m_start_in_p,
414 &m_Nsize_out_p, &m_start_out_p);
417 m_in = fftw_alloc_complex(fftw_size_p);
418 m_out = fftw_alloc_complex(fftw_size_p);
420 if (!m_in || !m_out) {
421 vout.
crucial(m_vl,
"Error at %s: failed to allocate memory %d [Byte].\n",
422 class_name.c_str(), (int)fftw_size_p);
435 fftw_plan_with_nthreads(Nthread);
438 if (Lsize[i_dim] == Nsize[i_dim]) {
440 m_plan = fftw_plan_dft_1d(Lsize[i_dim],
442 FFTW_FORWARD, FFTW_ESTIMATE);
444 m_plan = fftw_plan_dft_1d(Lsize[i_dim],
446 FFTW_BACKWARD, FFTW_ESTIMATE);
450 ptrdiff_t Lsize_p = Lsize[i_dim];
453 m_plan = fftw_mpi_plan_dft_1d(Lsize_p,
456 FFTW_FORWARD, FFTW_ESTIMATE);
458 m_plan = fftw_mpi_plan_dft_1d(Lsize_p,
461 FFTW_BACKWARD, FFTW_ESTIMATE);
469 for (
int in2 = 0; in2 < Nin / 2; ++in2) {
470 for (
int t_global = 0; t_global < Lt; t_global++) {
471 for (
int ex = 0; ex < Nex; ++ex) {
473 for (
int z = 0; z < Nsize[2]; z++) {
474 for (
int y = 0; y < Nsize[1]; y++) {
475 for (
int xyz_local = 0; xyz_local < Nsize[i_dim]; xyz_local++) {
476 int isite = m_index.site(xyz_local, y, z, t_global);
477 int i_real = 2 * in2;
478 int i_imag = 2 * in2 + 1;
480 m_in[xyz_local][0] = field_out.
cmp(i_real, isite, ex);
481 m_in[xyz_local][1] = field_out.
cmp(i_imag, isite, ex);
485 fftw_execute(m_plan);
488 for (
int xyz_local = 0; xyz_local < Nsize[i_dim]; xyz_local++) {
489 int isite = m_index.site(xyz_local, y, z, t_global);
490 int i_real = 2 * in2;
491 int i_imag = 2 * in2 + 1;
493 field_out.
set(i_real, isite, ex, m_out[xyz_local][0]);
494 field_out.
set(i_imag, isite, ex, m_out[xyz_local][1]);
498 }
else if (i_dim == 1) {
499 for (
int x = 0; x < Nsize[0]; x++) {
500 for (
int z = 0; z < Nsize[2]; z++) {
501 for (
int xyz_local = 0; xyz_local < Nsize[i_dim]; xyz_local++) {
502 int isite = m_index.site(x, xyz_local, z, t_global);
503 int i_real = 2 * in2;
504 int i_imag = 2 * in2 + 1;
506 m_in[xyz_local][0] = field_out.
cmp(i_real, isite, ex);
507 m_in[xyz_local][1] = field_out.
cmp(i_imag, isite, ex);
511 fftw_execute(m_plan);
514 for (
int xyz_local = 0; xyz_local < Nsize[i_dim]; xyz_local++) {
515 int isite = m_index.site(x, xyz_local, z, t_global);
516 int i_real = 2 * in2;
517 int i_imag = 2 * in2 + 1;
519 field_out.
set(i_real, isite, ex, m_out[xyz_local][0]);
520 field_out.
set(i_imag, isite, ex, m_out[xyz_local][1]);
524 }
else if (i_dim == 2) {
525 for (
int y = 0; y < Nsize[1]; y++) {
526 for (
int x = 0; x < Nsize[0]; x++) {
527 for (
int xyz_local = 0; xyz_local < Nsize[i_dim]; xyz_local++) {
528 int isite = m_index.site(x, y, xyz_local, t_global);
529 int i_real = 2 * in2;
530 int i_imag = 2 * in2 + 1;
532 m_in[xyz_local][0] = field_out.
cmp(i_real, isite, ex);
533 m_in[xyz_local][1] = field_out.
cmp(i_imag, isite, ex);
537 fftw_execute(m_plan);
540 for (
int xyz_local = 0; xyz_local < Nsize[i_dim]; xyz_local++) {
541 int isite = m_index.site(x, y, xyz_local, t_global);
542 int i_real = 2 * in2;
543 int i_imag = 2 * in2 + 1;
545 field_out.
set(i_real, isite, ex, m_out[xyz_local][0]);
546 field_out.
set(i_imag, isite, ex, m_out[xyz_local][1]);
561 scal(field_out, 1.0 / Lxyz);
568 fftw_destroy_plan(m_plan);
577 bool backup_fwbw = m_is_forward;
580 if (dir == FORWARD) {
582 }
else if (dir == BACKWARD) {
583 m_is_forward =
false;
585 vout.
crucial(m_vl,
"%s: unknown direction %d. failed.\n", class_name.c_str(), dir);
590 fft(field_out, field_in);
593 m_is_forward = backup_fwbw;
void scal(Field &x, const double a)
scal(x, a): x = a * x
static int get_num_threads()
returns available number of threads.
void set(const int jin, const int site, const int jex, double v)
void general(const char *format,...)
Container of Field-type object.
double cmp(const int jin, const int site, const int jex) const
int fetch_string(const string &key, string &value) const
static MPI_Comm & world()
retrieves current communicator.
void crucial(const char *format,...)
string get_string(const string &key) const
static VerboseLevel set_verbose_level(const std::string &str)