18 const std::string FFT_xyz_1dim::class_name = 
"FFT_xyz_1dim";
 
   21 void FFT_xyz_1dim::FFT(
Field& field, 
const bool is_forward)
 
   26   std::vector<int> Lsize(Ndim - 1);
 
   35   std::vector<int> Nsize(Ndim - 1);
 
   40   const int Nin = field.
nin();
 
   41   const int Nex = field.
nex();
 
   43   std::vector<int> NPE(Ndim);
 
   49   const int NPE_xyt = NPE[0] * NPE[1] * NPE[3];
 
   50   const int NPE_xzt = NPE[0] * NPE[2] * NPE[3];
 
   51   const int NPE_yzt = NPE[1] * NPE[2] * NPE[3];
 
   53   if ((NPE_xyt != 1) && (NPE_xzt != 1) && (NPE_yzt != 1)) {
 
   54     vout.
crucial(m_vl, 
"%s: FFTW supports parallelization only in 1 direction of x,y,z.\n",
 
   59   bool is_allocated     = 
false;
 
   60   int  Lsize_Nsize_prev = 0;
 
   64   int threads_ok = fftw_init_threads();
 
   72   for (
int i_dim = 0; i_dim < Ndim - 1; ++i_dim) {
 
   74     if (Lsize_Nsize_prev != Lsize[i_dim] * Nsize[i_dim]) {
 
   78         fftw_destroy_plan(m_plan);
 
   83     Lsize_Nsize_prev = Lsize[i_dim] * Nsize[i_dim];
 
   86       if (Lsize[i_dim] == Nsize[i_dim]) {
 
   87         size_t fftw_size = 
sizeof(fftw_complex) * Lsize[i_dim];
 
   88         m_in  = (fftw_complex *)fftw_malloc(fftw_size);
 
   89         m_out = (fftw_complex *)fftw_malloc(fftw_size);
 
   91         if (!m_in || !m_out) {
 
   92           vout.
crucial(m_vl, 
"%s: failed to allocate memory %d [Byte].\n",
 
   93                        class_name.c_str(), (int)fftw_size);
 
   98         ptrdiff_t Lsize_p = Lsize[i_dim];
 
   99         ptrdiff_t fftw_size_p;
 
  102           fftw_size_p = fftw_mpi_local_size_1d(Lsize_p,
 
  104                                                FFTW_FORWARD, FFTW_ESTIMATE,
 
  105                                                &m_Nsize_in_p, &m_start_in_p,
 
  106                                                &m_Nsize_out_p, &m_start_out_p);
 
  108           fftw_size_p = fftw_mpi_local_size_1d(Lsize_p,
 
  110                                                FFTW_BACKWARD, FFTW_ESTIMATE,
 
  111                                                &m_Nsize_in_p, &m_start_in_p,
 
  112                                                &m_Nsize_out_p, &m_start_out_p);
 
  115         m_in  = fftw_alloc_complex(fftw_size_p);
 
  116         m_out = fftw_alloc_complex(fftw_size_p);
 
  118         if (!m_in || !m_out) {
 
  119           vout.
crucial(m_vl, 
"%s: failed to allocate memory %d [Byte].\n",
 
  120                        class_name.c_str(), (int)fftw_size_p);
 
  133     fftw_plan_with_nthreads(Nthread);
 
  136     if (Lsize[i_dim] == Nsize[i_dim]) {
 
  138         m_plan = fftw_plan_dft_1d(Lsize[i_dim],
 
  140                                   FFTW_FORWARD, FFTW_ESTIMATE);
 
  142         m_plan = fftw_plan_dft_1d(Lsize[i_dim],
 
  144                                   FFTW_BACKWARD, FFTW_ESTIMATE);
 
  148       ptrdiff_t Lsize_p = Lsize[i_dim];
 
  151         m_plan = fftw_mpi_plan_dft_1d(Lsize_p,
 
  154                                       FFTW_FORWARD, FFTW_ESTIMATE);
 
  156         m_plan = fftw_mpi_plan_dft_1d(Lsize_p,
 
  159                                       FFTW_BACKWARD, FFTW_ESTIMATE);
 
  167     for (
int in2 = 0; in2 < Nin / 2; ++in2) {
 
  168       for (
int t_global = 0; t_global < Lt; t_global++) {
 
  169         for (
int ex = 0; ex < Nex; ++ex) {
 
  171             for (
int z = 0; z < Nsize[(i_dim + 2) % (Ndim - 1)]; z++) {
 
  172               for (
int y = 0; y < Nsize[(i_dim + 1) % (Ndim - 1)]; y++) {
 
  173                 for (
int xyz_local = 0; xyz_local < Nsize[i_dim]; xyz_local++) {
 
  174                   int isite  = m_index.site(xyz_local, y, z, t_global);
 
  175                   int i_real = 2 * in2;
 
  176                   int i_imag = 2 * in2 + 1;
 
  178                   m_in[xyz_local][0] = field.
cmp(i_real, isite, ex);
 
  179                   m_in[xyz_local][1] = field.
cmp(i_imag, isite, ex);
 
  183                 fftw_execute(m_plan);
 
  186                 for (
int xyz_local = 0; xyz_local < Nsize[i_dim]; xyz_local++) {
 
  187                   int isite  = m_index.site(xyz_local, y, z, t_global);
 
  188                   int i_real = 2 * in2;
 
  189                   int i_imag = 2 * in2 + 1;
 
  191                   field.
set(i_real, isite, ex, m_out[xyz_local][0]);
 
  192                   field.
set(i_imag, isite, ex, m_out[xyz_local][1]);
 
  196           } 
else if (i_dim == 1) {
 
  197             for (
int x = 0; x < Nsize[(i_dim + 2) % (Ndim - 1)]; x++) {
 
  198               for (
int z = 0; z < Nsize[(i_dim + 1) % (Ndim - 1)]; z++) {
 
  199                 for (
int xyz_local = 0; xyz_local < Nsize[i_dim]; xyz_local++) {
 
  200                   int isite  = m_index.site(x, xyz_local, z, t_global);
 
  201                   int i_real = 2 * in2;
 
  202                   int i_imag = 2 * in2 + 1;
 
  204                   m_in[xyz_local][0] = field.
cmp(i_real, isite, ex);
 
  205                   m_in[xyz_local][1] = field.
cmp(i_imag, isite, ex);
 
  209                 fftw_execute(m_plan);
 
  212                 for (
int xyz_local = 0; xyz_local < Nsize[i_dim]; xyz_local++) {
 
  213                   int isite  = m_index.site(x, xyz_local, z, t_global);
 
  214                   int i_real = 2 * in2;
 
  215                   int i_imag = 2 * in2 + 1;
 
  217                   field.
set(i_real, isite, ex, m_out[xyz_local][0]);
 
  218                   field.
set(i_imag, isite, ex, m_out[xyz_local][1]);
 
  222           } 
else if (i_dim == 2) {
 
  223             for (
int y = 0; y < Nsize[(i_dim + 2) % (Ndim - 1)]; y++) {
 
  224               for (
int x = 0; x < Nsize[(i_dim + 1) % (Ndim - 1)]; x++) {
 
  225                 for (
int xyz_local = 0; xyz_local < Nsize[i_dim]; xyz_local++) {
 
  226                   int isite  = m_index.site(x, y, xyz_local, t_global);
 
  227                   int i_real = 2 * in2;
 
  228                   int i_imag = 2 * in2 + 1;
 
  230                   m_in[xyz_local][0] = field.
cmp(i_real, isite, ex);
 
  231                   m_in[xyz_local][1] = field.
cmp(i_imag, isite, ex);
 
  235                 fftw_execute(m_plan);
 
  238                 for (
int xyz_local = 0; xyz_local < Nsize[i_dim]; xyz_local++) {
 
  239                   int isite  = m_index.site(x, y, xyz_local, t_global);
 
  240                   int i_real = 2 * in2;
 
  241                   int i_imag = 2 * in2 + 1;
 
  243                   field.
set(i_real, isite, ex, m_out[xyz_local][0]);
 
  244                   field.
set(i_imag, isite, ex, m_out[xyz_local][1]);
 
  262     fftw_destroy_plan(m_plan);
 
  268 void FFT_xyz_1dim::FFT(
Field& field_out, 
const Field& field_in, 
const bool is_forward)
 
  273   std::vector<int> Lsize(Ndim - 1);
 
  282   std::vector<int> Nsize(Ndim - 1);
 
  287   const int Nin = field_in.
nin();
 
  288   const int Nex = field_in.
nex();
 
  290   std::vector<int> NPE(Ndim);
 
  296   const int NPE_xyt = NPE[0] * NPE[1] * NPE[3];
 
  297   const int NPE_xzt = NPE[0] * NPE[2] * NPE[3];
 
  298   const int NPE_yzt = NPE[1] * NPE[2] * NPE[3];
 
  300   if ((NPE_xyt != 1) && (NPE_xzt != 1) && (NPE_yzt != 1)) {
 
  301     vout.
crucial(m_vl, 
"%s: FFTW supports parallelization only in 1 direction.\n",
 
  306   bool is_allocated     = 
false;
 
  307   int  Lsize_Nsize_prev = 0;
 
  311   int threads_ok = fftw_init_threads();
 
  319   for (
int i_dim = 0; i_dim < Ndim - 1; ++i_dim) {
 
  321     if (Lsize_Nsize_prev != Lsize[i_dim] * Nsize[i_dim]) {
 
  325         fftw_destroy_plan(m_plan);
 
  328       is_allocated = 
false;
 
  330     Lsize_Nsize_prev = Lsize[i_dim] * Nsize[i_dim];
 
  333       if (Lsize[i_dim] == Nsize[i_dim]) {
 
  334         size_t fftw_size = 
sizeof(fftw_complex) * Lsize[i_dim];
 
  335         m_in  = (fftw_complex *)fftw_malloc(fftw_size);
 
  336         m_out = (fftw_complex *)fftw_malloc(fftw_size);
 
  338         if (!m_in || !m_out) {
 
  339           vout.
crucial(m_vl, 
"%s: failed to allocate memory %d [Byte].\n",
 
  340                        class_name.c_str(), (int)fftw_size);
 
  345         ptrdiff_t Lsize_p = Lsize[i_dim];
 
  346         ptrdiff_t fftw_size_p;
 
  349           fftw_size_p = fftw_mpi_local_size_1d(Lsize_p,
 
  351                                                FFTW_FORWARD, FFTW_ESTIMATE,
 
  352                                                &m_Nsize_in_p, &m_start_in_p,
 
  353                                                &m_Nsize_out_p, &m_start_out_p);
 
  355           fftw_size_p = fftw_mpi_local_size_1d(Lsize_p,
 
  357                                                FFTW_BACKWARD, FFTW_ESTIMATE,
 
  358                                                &m_Nsize_in_p, &m_start_in_p,
 
  359                                                &m_Nsize_out_p, &m_start_out_p);
 
  362         m_in  = fftw_alloc_complex(fftw_size_p);
 
  363         m_out = fftw_alloc_complex(fftw_size_p);
 
  365         if (!m_in || !m_out) {
 
  366           vout.
crucial(m_vl, 
"%s: failed to allocate memory %d [Byte].\n",
 
  367                        class_name.c_str(), (int)fftw_size_p);
 
  380     fftw_plan_with_nthreads(Nthread);
 
  383     if (Lsize[i_dim] == Nsize[i_dim]) {
 
  385         m_plan = fftw_plan_dft_1d(Lsize[i_dim],
 
  387                                   FFTW_FORWARD, FFTW_ESTIMATE);
 
  389         m_plan = fftw_plan_dft_1d(Lsize[i_dim],
 
  391                                   FFTW_BACKWARD, FFTW_ESTIMATE);
 
  395       ptrdiff_t Lsize_p = Lsize[i_dim];
 
  398         m_plan = fftw_mpi_plan_dft_1d(Lsize_p,
 
  401                                       FFTW_FORWARD, FFTW_ESTIMATE);
 
  403         m_plan = fftw_mpi_plan_dft_1d(Lsize_p,
 
  406                                       FFTW_BACKWARD, FFTW_ESTIMATE);
 
  414     for (
int in2 = 0; in2 < Nin / 2; ++in2) {
 
  415       for (
int t_global = 0; t_global < Lt; t_global++) {
 
  416         for (
int ex = 0; ex < Nex; ++ex) {
 
  418             for (
int z = 0; z < Nsize[(i_dim + 2) % (Ndim - 1)]; z++) {
 
  419               for (
int y = 0; y < Nsize[(i_dim + 1) % (Ndim - 1)]; y++) {
 
  420                 for (
int xyz_local = 0; xyz_local < Nsize[i_dim]; xyz_local++) {
 
  421                   int isite  = m_index.site(xyz_local, y, z, t_global);
 
  422                   int i_real = 2 * in2;
 
  423                   int i_imag = 2 * in2 + 1;
 
  425                   m_in[xyz_local][0] = field_in.
cmp(i_real, isite, ex);
 
  426                   m_in[xyz_local][1] = field_in.
cmp(i_imag, isite, ex);
 
  430                 fftw_execute(m_plan);
 
  433                 for (
int xyz_local = 0; xyz_local < Nsize[i_dim]; xyz_local++) {
 
  434                   int isite  = m_index.site(xyz_local, y, z, t_global);
 
  435                   int i_real = 2 * in2;
 
  436                   int i_imag = 2 * in2 + 1;
 
  438                   field_out.
set(i_real, isite, ex, m_out[xyz_local][0]);
 
  439                   field_out.
set(i_imag, isite, ex, m_out[xyz_local][1]);
 
  443           } 
else if (i_dim == 1) {
 
  444             for (
int x = 0; x < Nsize[(i_dim + 2) % (Ndim - 1)]; x++) {
 
  445               for (
int z = 0; z < Nsize[(i_dim + 1) % (Ndim - 1)]; z++) {
 
  446                 for (
int xyz_local = 0; xyz_local < Nsize[i_dim]; xyz_local++) {
 
  447                   int isite  = m_index.site(x, xyz_local, z, t_global);
 
  448                   int i_real = 2 * in2;
 
  449                   int i_imag = 2 * in2 + 1;
 
  451                   m_in[xyz_local][0] = field_in.
cmp(i_real, isite, ex);
 
  452                   m_in[xyz_local][1] = field_in.
cmp(i_imag, isite, ex);
 
  456                 fftw_execute(m_plan);
 
  459                 for (
int xyz_local = 0; xyz_local < Nsize[i_dim]; xyz_local++) {
 
  460                   int isite  = m_index.site(x, xyz_local, z, t_global);
 
  461                   int i_real = 2 * in2;
 
  462                   int i_imag = 2 * in2 + 1;
 
  464                   field_out.
set(i_real, isite, ex, m_out[xyz_local][0]);
 
  465                   field_out.
set(i_imag, isite, ex, m_out[xyz_local][1]);
 
  469           } 
else if (i_dim == 2) {
 
  470             for (
int y = 0; y < Nsize[(i_dim + 2) % (Ndim - 1)]; y++) {
 
  471               for (
int x = 0; x < Nsize[(i_dim + 1) % (Ndim - 1)]; x++) {
 
  472                 for (
int xyz_local = 0; xyz_local < Nsize[i_dim]; xyz_local++) {
 
  473                   int isite  = m_index.site(x, y, xyz_local, t_global);
 
  474                   int i_real = 2 * in2;
 
  475                   int i_imag = 2 * in2 + 1;
 
  477                   m_in[xyz_local][0] = field_in.
cmp(i_real, isite, ex);
 
  478                   m_in[xyz_local][1] = field_in.
cmp(i_imag, isite, ex);
 
  482                 fftw_execute(m_plan);
 
  485                 for (
int xyz_local = 0; xyz_local < Nsize[i_dim]; xyz_local++) {
 
  486                   int isite  = m_index.site(x, y, xyz_local, t_global);
 
  487                   int i_real = 2 * in2;
 
  488                   int i_imag = 2 * in2 + 1;
 
  490                   field_out.
set(i_real, isite, ex, m_out[xyz_local][0]);
 
  491                   field_out.
set(i_imag, isite, ex, m_out[xyz_local][1]);
 
  509     fftw_destroy_plan(m_plan);
 
static int get_num_threads()
returns available number of threads. 
 
void set(const int jin, const int site, const int jex, double v)
 
Container of Field-type object. 
 
double cmp(const int jin, const int site, const int jex) const 
 
void crucial(const char *format,...)