22 return new FFT_xyz_1dim();
26 bool init = FFT::Factory::Register(
"FFT_xyz_1dim", create_object);
30 const std::string FFT_xyz_1dim::class_name =
"FFT_xyz_1dim";
33 void FFT_xyz_1dim::set_parameters(
const Parameters& params)
35 const std::string str_vlevel = params.
get_string(
"verbose_level");
40 std::string str_fft_direction;
43 err += params.
fetch_string(
"FFT_direction", str_fft_direction);
46 vout.
crucial(m_vl,
"Error at %s: input parameter not found.\n", class_name.c_str());
50 set_parameters(str_fft_direction);
55 void FFT_xyz_1dim::set_parameters(
const std::string str_fft_direction)
59 vout.
general(m_vl,
" FFT_direction = %s\n", str_fft_direction.c_str());
64 if (str_fft_direction ==
"Forward") {
66 }
else if (str_fft_direction ==
"Backward") {
69 vout.
crucial(m_vl,
"Error at %s: unsupported FFT direction \"%s\"\n", class_name.c_str(), str_fft_direction.c_str());
76 void FFT_xyz_1dim::fft(
Field& field)
81 std::vector<int> Lsize(Ndim - 1);
88 const int Lxyz = Lsize[0] * Lsize[1] * Lsize[2];
91 std::vector<int> Nsize(Ndim - 1);
96 const int Nin = field.
nin();
97 const int Nex = field.
nex();
99 std::vector<int> NPE(Ndim);
105 const int NPE_xyt = NPE[0] * NPE[1] * NPE[3];
106 const int NPE_xzt = NPE[0] * NPE[2] * NPE[3];
107 const int NPE_yzt = NPE[1] * NPE[2] * NPE[3];
109 if ((NPE_xyt != 1) && (NPE_xzt != 1) && (NPE_yzt != 1)) {
110 vout.
crucial(m_vl,
"Error at %s: FFTW supports parallelization only in 1 direction of x,y,z.\n",
115 bool is_allocated =
false;
116 int Lsize_Nsize_prev = 0;
119 int threads_ok = fftw_init_threads();
127 for (
int i_dim = 0; i_dim < Ndim - 1; ++i_dim) {
129 if (Lsize_Nsize_prev != Lsize[i_dim] * Nsize[i_dim]) {
133 fftw_destroy_plan(m_plan);
136 is_allocated =
false;
138 Lsize_Nsize_prev = Lsize[i_dim] * Nsize[i_dim];
141 if (Lsize[i_dim] == Nsize[i_dim]) {
142 size_t fftw_size =
sizeof(fftw_complex) * Lsize[i_dim];
143 m_in = (fftw_complex *)fftw_malloc(fftw_size);
144 m_out = (fftw_complex *)fftw_malloc(fftw_size);
146 if (!m_in || !m_out) {
147 vout.
crucial(m_vl,
"Error at %s: failed to allocate memory %d [Byte].\n",
148 class_name.c_str(), (int)fftw_size);
153 ptrdiff_t Lsize_p = Lsize[i_dim];
154 ptrdiff_t fftw_size_p;
157 fftw_size_p = fftw_mpi_local_size_1d(Lsize_p,
159 FFTW_FORWARD, FFTW_ESTIMATE,
160 &m_Nsize_in_p, &m_start_in_p,
161 &m_Nsize_out_p, &m_start_out_p);
163 fftw_size_p = fftw_mpi_local_size_1d(Lsize_p,
165 FFTW_BACKWARD, FFTW_ESTIMATE,
166 &m_Nsize_in_p, &m_start_in_p,
167 &m_Nsize_out_p, &m_start_out_p);
170 m_in = fftw_alloc_complex(fftw_size_p);
171 m_out = fftw_alloc_complex(fftw_size_p);
173 if (!m_in || !m_out) {
174 vout.
crucial(m_vl,
"Error at %s: failed to allocate memory %d [Byte].\n",
175 class_name.c_str(), (int)fftw_size_p);
188 fftw_plan_with_nthreads(Nthread);
191 if (Lsize[i_dim] == Nsize[i_dim]) {
193 m_plan = fftw_plan_dft_1d(Lsize[i_dim],
195 FFTW_FORWARD, FFTW_ESTIMATE);
197 m_plan = fftw_plan_dft_1d(Lsize[i_dim],
199 FFTW_BACKWARD, FFTW_ESTIMATE);
203 ptrdiff_t Lsize_p = Lsize[i_dim];
206 m_plan = fftw_mpi_plan_dft_1d(Lsize_p,
209 FFTW_FORWARD, FFTW_ESTIMATE);
211 m_plan = fftw_mpi_plan_dft_1d(Lsize_p,
214 FFTW_BACKWARD, FFTW_ESTIMATE);
222 for (
int in2 = 0; in2 < Nin / 2; ++in2) {
223 for (
int t_global = 0; t_global < Lt; t_global++) {
224 for (
int ex = 0; ex < Nex; ++ex) {
226 for (
int z = 0; z < Nsize[2]; z++) {
227 for (
int y = 0; y < Nsize[1]; y++) {
228 for (
int xyz_local = 0; xyz_local < Nsize[i_dim]; xyz_local++) {
229 int isite = m_index.site(xyz_local, y, z, t_global);
230 int i_real = 2 * in2;
231 int i_imag = 2 * in2 + 1;
233 m_in[xyz_local][0] = field.
cmp(i_real, isite, ex);
234 m_in[xyz_local][1] = field.
cmp(i_imag, isite, ex);
238 fftw_execute(m_plan);
241 for (
int xyz_local = 0; xyz_local < Nsize[i_dim]; xyz_local++) {
242 int isite = m_index.site(xyz_local, y, z, t_global);
243 int i_real = 2 * in2;
244 int i_imag = 2 * in2 + 1;
246 field.
set(i_real, isite, ex, m_out[xyz_local][0]);
247 field.
set(i_imag, isite, ex, m_out[xyz_local][1]);
251 }
else if (i_dim == 1) {
252 for (
int x = 0; x < Nsize[0]; x++) {
253 for (
int z = 0; z < Nsize[2]; z++) {
254 for (
int xyz_local = 0; xyz_local < Nsize[i_dim]; xyz_local++) {
255 int isite = m_index.site(x, xyz_local, z, t_global);
256 int i_real = 2 * in2;
257 int i_imag = 2 * in2 + 1;
259 m_in[xyz_local][0] = field.
cmp(i_real, isite, ex);
260 m_in[xyz_local][1] = field.
cmp(i_imag, isite, ex);
264 fftw_execute(m_plan);
267 for (
int xyz_local = 0; xyz_local < Nsize[i_dim]; xyz_local++) {
268 int isite = m_index.site(x, xyz_local, z, t_global);
269 int i_real = 2 * in2;
270 int i_imag = 2 * in2 + 1;
272 field.
set(i_real, isite, ex, m_out[xyz_local][0]);
273 field.
set(i_imag, isite, ex, m_out[xyz_local][1]);
277 }
else if (i_dim == 2) {
278 for (
int y = 0; y < Nsize[1]; y++) {
279 for (
int x = 0; x < Nsize[0]; x++) {
280 for (
int xyz_local = 0; xyz_local < Nsize[i_dim]; xyz_local++) {
281 int isite = m_index.site(x, y, xyz_local, t_global);
282 int i_real = 2 * in2;
283 int i_imag = 2 * in2 + 1;
285 m_in[xyz_local][0] = field.
cmp(i_real, isite, ex);
286 m_in[xyz_local][1] = field.
cmp(i_imag, isite, ex);
290 fftw_execute(m_plan);
293 for (
int xyz_local = 0; xyz_local < Nsize[i_dim]; xyz_local++) {
294 int isite = m_index.site(x, y, xyz_local, t_global);
295 int i_real = 2 * in2;
296 int i_imag = 2 * in2 + 1;
298 field.
set(i_real, isite, ex, m_out[xyz_local][0]);
299 field.
set(i_imag, isite, ex, m_out[xyz_local][1]);
314 scal(field, 1.0 / Lxyz);
321 fftw_destroy_plan(m_plan);
327 void FFT_xyz_1dim::fft(
Field& field_out,
const Field& field_in)
332 std::vector<int> Lsize(Ndim - 1);
339 const int Lxyz = Lsize[0] * Lsize[1] * Lsize[2];
342 std::vector<int> Nsize(Ndim - 1);
347 const int Nin = field_in.
nin();
348 const int Nex = field_in.
nex();
350 std::vector<int> NPE(Ndim);
356 const int NPE_xyt = NPE[0] * NPE[1] * NPE[3];
357 const int NPE_xzt = NPE[0] * NPE[2] * NPE[3];
358 const int NPE_yzt = NPE[1] * NPE[2] * NPE[3];
360 if ((NPE_xyt != 1) && (NPE_xzt != 1) && (NPE_yzt != 1)) {
361 vout.
crucial(m_vl,
"Error at %s: FFTW supports parallelization only in 1 direction.\n",
366 bool is_allocated =
false;
367 int Lsize_Nsize_prev = 0;
369 field_out = field_in;
372 int threads_ok = fftw_init_threads();
380 for (
int i_dim = 0; i_dim < Ndim - 1; ++i_dim) {
382 if (Lsize_Nsize_prev != Lsize[i_dim] * Nsize[i_dim]) {
386 fftw_destroy_plan(m_plan);
389 is_allocated =
false;
391 Lsize_Nsize_prev = Lsize[i_dim] * Nsize[i_dim];
394 if (Lsize[i_dim] == Nsize[i_dim]) {
395 size_t fftw_size =
sizeof(fftw_complex) * Lsize[i_dim];
396 m_in = (fftw_complex *)fftw_malloc(fftw_size);
397 m_out = (fftw_complex *)fftw_malloc(fftw_size);
399 if (!m_in || !m_out) {
400 vout.
crucial(m_vl,
"Error at %s: failed to allocate memory %d [Byte].\n",
401 class_name.c_str(), (int)fftw_size);
406 ptrdiff_t Lsize_p = Lsize[i_dim];
407 ptrdiff_t fftw_size_p;
410 fftw_size_p = fftw_mpi_local_size_1d(Lsize_p,
412 FFTW_FORWARD, FFTW_ESTIMATE,
413 &m_Nsize_in_p, &m_start_in_p,
414 &m_Nsize_out_p, &m_start_out_p);
416 fftw_size_p = fftw_mpi_local_size_1d(Lsize_p,
418 FFTW_BACKWARD, FFTW_ESTIMATE,
419 &m_Nsize_in_p, &m_start_in_p,
420 &m_Nsize_out_p, &m_start_out_p);
423 m_in = fftw_alloc_complex(fftw_size_p);
424 m_out = fftw_alloc_complex(fftw_size_p);
426 if (!m_in || !m_out) {
427 vout.
crucial(m_vl,
"Error at %s: failed to allocate memory %d [Byte].\n",
428 class_name.c_str(), (int)fftw_size_p);
441 fftw_plan_with_nthreads(Nthread);
444 if (Lsize[i_dim] == Nsize[i_dim]) {
446 m_plan = fftw_plan_dft_1d(Lsize[i_dim],
448 FFTW_FORWARD, FFTW_ESTIMATE);
450 m_plan = fftw_plan_dft_1d(Lsize[i_dim],
452 FFTW_BACKWARD, FFTW_ESTIMATE);
456 ptrdiff_t Lsize_p = Lsize[i_dim];
459 m_plan = fftw_mpi_plan_dft_1d(Lsize_p,
462 FFTW_FORWARD, FFTW_ESTIMATE);
464 m_plan = fftw_mpi_plan_dft_1d(Lsize_p,
467 FFTW_BACKWARD, FFTW_ESTIMATE);
475 for (
int in2 = 0; in2 < Nin / 2; ++in2) {
476 for (
int t_global = 0; t_global < Lt; t_global++) {
477 for (
int ex = 0; ex < Nex; ++ex) {
480 for (
int z = 0; z < Nsize[2]; z++) {
481 for (
int y = 0; y < Nsize[1]; y++) {
482 for (
int xyz_local = 0; xyz_local < Nsize[i_dim]; xyz_local++) {
483 int isite = m_index.site(xyz_local, y, z, t_global);
484 int i_real = 2 * in2;
485 int i_imag = 2 * in2 + 1;
487 m_in[xyz_local][0] = field_out.
cmp(i_real, isite, ex);
488 m_in[xyz_local][1] = field_out.
cmp(i_imag, isite, ex);
492 fftw_execute(m_plan);
495 for (
int xyz_local = 0; xyz_local < Nsize[i_dim]; xyz_local++) {
496 int isite = m_index.site(xyz_local, y, z, t_global);
497 int i_real = 2 * in2;
498 int i_imag = 2 * in2 + 1;
500 field_out.
set(i_real, isite, ex, m_out[xyz_local][0]);
501 field_out.
set(i_imag, isite, ex, m_out[xyz_local][1]);
506 }
else if (i_dim == 1) {
507 for (
int x = 0; x < Nsize[0]; x++) {
508 for (
int z = 0; z < Nsize[2]; z++) {
509 for (
int xyz_local = 0; xyz_local < Nsize[i_dim]; xyz_local++) {
510 int isite = m_index.site(x, xyz_local, z, t_global);
511 int i_real = 2 * in2;
512 int i_imag = 2 * in2 + 1;
514 m_in[xyz_local][0] = field_out.
cmp(i_real, isite, ex);
515 m_in[xyz_local][1] = field_out.
cmp(i_imag, isite, ex);
519 fftw_execute(m_plan);
522 for (
int xyz_local = 0; xyz_local < Nsize[i_dim]; xyz_local++) {
523 int isite = m_index.site(x, xyz_local, z, t_global);
524 int i_real = 2 * in2;
525 int i_imag = 2 * in2 + 1;
527 field_out.
set(i_real, isite, ex, m_out[xyz_local][0]);
528 field_out.
set(i_imag, isite, ex, m_out[xyz_local][1]);
533 }
else if (i_dim == 2) {
534 for (
int y = 0; y < Nsize[1]; y++) {
535 for (
int x = 0; x < Nsize[0]; x++) {
536 for (
int xyz_local = 0; xyz_local < Nsize[i_dim]; xyz_local++) {
537 int isite = m_index.site(x, y, xyz_local, t_global);
538 int i_real = 2 * in2;
539 int i_imag = 2 * in2 + 1;
541 m_in[xyz_local][0] = field_out.
cmp(i_real, isite, ex);
542 m_in[xyz_local][1] = field_out.
cmp(i_imag, isite, ex);
546 fftw_execute(m_plan);
549 for (
int xyz_local = 0; xyz_local < Nsize[i_dim]; xyz_local++) {
550 int isite = m_index.site(x, y, xyz_local, t_global);
551 int i_real = 2 * in2;
552 int i_imag = 2 * in2 + 1;
554 field_out.
set(i_real, isite, ex, m_out[xyz_local][0]);
555 field_out.
set(i_imag, isite, ex, m_out[xyz_local][1]);
570 scal(field_out, 1.0 / Lxyz);
577 fftw_destroy_plan(m_plan);
void scal(Field &x, const double a)
scal(x, a): x = a * x
static int get_num_threads()
returns available number of threads.
void set(const int jin, const int site, const int jex, double v)
void general(const char *format,...)
Container of Field-type object.
double cmp(const int jin, const int site, const int jex) const
int fetch_string(const string &key, string &value) const
void crucial(const char *format,...)
string get_string(const string &key) const
static VerboseLevel set_verbose_level(const std::string &str)