22     return new FFT_xyz_1dim();
 
   26   bool init = FFT::Factory::Register(
"FFT_xyz_1dim", create_object);
 
   30 const std::string FFT_xyz_1dim::class_name = 
"FFT_xyz_1dim";
 
   33 void FFT_xyz_1dim::set_parameters(
const Parameters& params)
 
   35   const std::string str_vlevel = params.
get_string(
"verbose_level");
 
   40   std::string str_fft_direction;
 
   43   err += params.
fetch_string(
"FFT_direction", str_fft_direction);
 
   46     vout.
crucial(m_vl, 
"Error at %s: input parameter not found.\n", class_name.c_str());
 
   50   set_parameters(str_fft_direction);
 
   55 void FFT_xyz_1dim::set_parameters(
const std::string str_fft_direction)
 
   59   vout.
general(m_vl, 
"  FFT_direction = %s\n", str_fft_direction.c_str());
 
   64   if (str_fft_direction == 
"Forward") {
 
   66   } 
else if (str_fft_direction == 
"Backward") {
 
   69     vout.
crucial(m_vl, 
"Error at %s: unsupported FFT direction \"%s\"\n", class_name.c_str(), str_fft_direction.c_str());
 
   76 void FFT_xyz_1dim::fft(
Field& field)
 
   81   std::vector<int> Lsize(Ndim - 1);
 
   88   const int Lxyz = Lsize[0] * Lsize[1] * Lsize[2];
 
   91   std::vector<int> Nsize(Ndim - 1);
 
   96   const int Nin = field.
nin();
 
   97   const int Nex = field.
nex();
 
   99   std::vector<int> NPE(Ndim);
 
  105   const int NPE_xyt = NPE[0] * NPE[1] * NPE[3];
 
  106   const int NPE_xzt = NPE[0] * NPE[2] * NPE[3];
 
  107   const int NPE_yzt = NPE[1] * NPE[2] * NPE[3];
 
  109   if ((NPE_xyt != 1) && (NPE_xzt != 1) && (NPE_yzt != 1)) {
 
  110     vout.
crucial(m_vl, 
"Error at %s: FFTW supports parallelization only in 1 direction of x,y,z.\n",
 
  115   bool is_allocated     = 
false;
 
  116   int  Lsize_Nsize_prev = 0;
 
  119   int threads_ok = fftw_init_threads();
 
  127   for (
int i_dim = 0; i_dim < Ndim - 1; ++i_dim) {
 
  129     if (Lsize_Nsize_prev != Lsize[i_dim] * Nsize[i_dim]) {
 
  133         fftw_destroy_plan(m_plan);
 
  136       is_allocated = 
false;
 
  138     Lsize_Nsize_prev = Lsize[i_dim] * Nsize[i_dim];
 
  141       if (Lsize[i_dim] == Nsize[i_dim]) {
 
  142         size_t fftw_size = 
sizeof(fftw_complex) * Lsize[i_dim];
 
  143         m_in  = (fftw_complex *)fftw_malloc(fftw_size);
 
  144         m_out = (fftw_complex *)fftw_malloc(fftw_size);
 
  146         if (!m_in || !m_out) {
 
  147           vout.
crucial(m_vl, 
"Error at %s: failed to allocate memory %d [Byte].\n",
 
  148                        class_name.c_str(), (int)fftw_size);
 
  153         ptrdiff_t Lsize_p = Lsize[i_dim];
 
  154         ptrdiff_t fftw_size_p;
 
  157           fftw_size_p = fftw_mpi_local_size_1d(Lsize_p,
 
  159                                                FFTW_FORWARD, FFTW_ESTIMATE,
 
  160                                                &m_Nsize_in_p, &m_start_in_p,
 
  161                                                &m_Nsize_out_p, &m_start_out_p);
 
  163           fftw_size_p = fftw_mpi_local_size_1d(Lsize_p,
 
  165                                                FFTW_BACKWARD, FFTW_ESTIMATE,
 
  166                                                &m_Nsize_in_p, &m_start_in_p,
 
  167                                                &m_Nsize_out_p, &m_start_out_p);
 
  170         m_in  = fftw_alloc_complex(fftw_size_p);
 
  171         m_out = fftw_alloc_complex(fftw_size_p);
 
  173         if (!m_in || !m_out) {
 
  174           vout.
crucial(m_vl, 
"Error at %s: failed to allocate memory %d [Byte].\n",
 
  175                        class_name.c_str(), (int)fftw_size_p);
 
  188     fftw_plan_with_nthreads(Nthread);
 
  191     if (Lsize[i_dim] == Nsize[i_dim]) {
 
  193         m_plan = fftw_plan_dft_1d(Lsize[i_dim],
 
  195                                   FFTW_FORWARD, FFTW_ESTIMATE);
 
  197         m_plan = fftw_plan_dft_1d(Lsize[i_dim],
 
  199                                   FFTW_BACKWARD, FFTW_ESTIMATE);
 
  203       ptrdiff_t Lsize_p = Lsize[i_dim];
 
  206         m_plan = fftw_mpi_plan_dft_1d(Lsize_p,
 
  209                                       FFTW_FORWARD, FFTW_ESTIMATE);
 
  211         m_plan = fftw_mpi_plan_dft_1d(Lsize_p,
 
  214                                       FFTW_BACKWARD, FFTW_ESTIMATE);
 
  222     for (
int in2 = 0; in2 < Nin / 2; ++in2) {
 
  223       for (
int t_global = 0; t_global < Lt; t_global++) {
 
  224         for (
int ex = 0; ex < Nex; ++ex) {
 
  226             for (
int z = 0; z < Nsize[2]; z++) {
 
  227               for (
int y = 0; y < Nsize[1]; y++) {
 
  228                 for (
int xyz_local = 0; xyz_local < Nsize[i_dim]; xyz_local++) {
 
  229                   int isite  = m_index.site(xyz_local, y, z, t_global);
 
  230                   int i_real = 2 * in2;
 
  231                   int i_imag = 2 * in2 + 1;
 
  233                   m_in[xyz_local][0] = field.
cmp(i_real, isite, ex);
 
  234                   m_in[xyz_local][1] = field.
cmp(i_imag, isite, ex);
 
  238                 fftw_execute(m_plan);
 
  241                 for (
int xyz_local = 0; xyz_local < Nsize[i_dim]; xyz_local++) {
 
  242                   int isite  = m_index.site(xyz_local, y, z, t_global);
 
  243                   int i_real = 2 * in2;
 
  244                   int i_imag = 2 * in2 + 1;
 
  246                   field.
set(i_real, isite, ex, m_out[xyz_local][0]);
 
  247                   field.
set(i_imag, isite, ex, m_out[xyz_local][1]);
 
  251           } 
else if (i_dim == 1) {
 
  252             for (
int x = 0; x < Nsize[0]; x++) {
 
  253               for (
int z = 0; z < Nsize[2]; z++) {
 
  254                 for (
int xyz_local = 0; xyz_local < Nsize[i_dim]; xyz_local++) {
 
  255                   int isite  = m_index.site(x, xyz_local, z, t_global);
 
  256                   int i_real = 2 * in2;
 
  257                   int i_imag = 2 * in2 + 1;
 
  259                   m_in[xyz_local][0] = field.
cmp(i_real, isite, ex);
 
  260                   m_in[xyz_local][1] = field.
cmp(i_imag, isite, ex);
 
  264                 fftw_execute(m_plan);
 
  267                 for (
int xyz_local = 0; xyz_local < Nsize[i_dim]; xyz_local++) {
 
  268                   int isite  = m_index.site(x, xyz_local, z, t_global);
 
  269                   int i_real = 2 * in2;
 
  270                   int i_imag = 2 * in2 + 1;
 
  272                   field.
set(i_real, isite, ex, m_out[xyz_local][0]);
 
  273                   field.
set(i_imag, isite, ex, m_out[xyz_local][1]);
 
  277           } 
else if (i_dim == 2) {
 
  278             for (
int y = 0; y < Nsize[1]; y++) {
 
  279               for (
int x = 0; x < Nsize[0]; x++) {
 
  280                 for (
int xyz_local = 0; xyz_local < Nsize[i_dim]; xyz_local++) {
 
  281                   int isite  = m_index.site(x, y, xyz_local, t_global);
 
  282                   int i_real = 2 * in2;
 
  283                   int i_imag = 2 * in2 + 1;
 
  285                   m_in[xyz_local][0] = field.
cmp(i_real, isite, ex);
 
  286                   m_in[xyz_local][1] = field.
cmp(i_imag, isite, ex);
 
  290                 fftw_execute(m_plan);
 
  293                 for (
int xyz_local = 0; xyz_local < Nsize[i_dim]; xyz_local++) {
 
  294                   int isite  = m_index.site(x, y, xyz_local, t_global);
 
  295                   int i_real = 2 * in2;
 
  296                   int i_imag = 2 * in2 + 1;
 
  298                   field.
set(i_real, isite, ex, m_out[xyz_local][0]);
 
  299                   field.
set(i_imag, isite, ex, m_out[xyz_local][1]);
 
  314     scal(field, 1.0 / Lxyz);
 
  321     fftw_destroy_plan(m_plan);
 
  327 void FFT_xyz_1dim::fft(
Field& field_out, 
const Field& field_in)
 
  332   std::vector<int> Lsize(Ndim - 1);
 
  339   const int Lxyz = Lsize[0] * Lsize[1] * Lsize[2];
 
  342   std::vector<int> Nsize(Ndim - 1);
 
  347   const int Nin = field_in.
nin();
 
  348   const int Nex = field_in.
nex();
 
  350   std::vector<int> NPE(Ndim);
 
  356   const int NPE_xyt = NPE[0] * NPE[1] * NPE[3];
 
  357   const int NPE_xzt = NPE[0] * NPE[2] * NPE[3];
 
  358   const int NPE_yzt = NPE[1] * NPE[2] * NPE[3];
 
  360   if ((NPE_xyt != 1) && (NPE_xzt != 1) && (NPE_yzt != 1)) {
 
  361     vout.
crucial(m_vl, 
"Error at %s: FFTW supports parallelization only in 1 direction.\n",
 
  366   bool is_allocated     = 
false;
 
  367   int  Lsize_Nsize_prev = 0;
 
  369   field_out = field_in;
 
  372   int threads_ok = fftw_init_threads();
 
  380   for (
int i_dim = 0; i_dim < Ndim - 1; ++i_dim) {
 
  382     if (Lsize_Nsize_prev != Lsize[i_dim] * Nsize[i_dim]) {
 
  386         fftw_destroy_plan(m_plan);
 
  389       is_allocated = 
false;
 
  391     Lsize_Nsize_prev = Lsize[i_dim] * Nsize[i_dim];
 
  394       if (Lsize[i_dim] == Nsize[i_dim]) {
 
  395         size_t fftw_size = 
sizeof(fftw_complex) * Lsize[i_dim];
 
  396         m_in  = (fftw_complex *)fftw_malloc(fftw_size);
 
  397         m_out = (fftw_complex *)fftw_malloc(fftw_size);
 
  399         if (!m_in || !m_out) {
 
  400           vout.
crucial(m_vl, 
"Error at %s: failed to allocate memory %d [Byte].\n",
 
  401                        class_name.c_str(), (int)fftw_size);
 
  406         ptrdiff_t Lsize_p = Lsize[i_dim];
 
  407         ptrdiff_t fftw_size_p;
 
  410           fftw_size_p = fftw_mpi_local_size_1d(Lsize_p,
 
  412                                                FFTW_FORWARD, FFTW_ESTIMATE,
 
  413                                                &m_Nsize_in_p, &m_start_in_p,
 
  414                                                &m_Nsize_out_p, &m_start_out_p);
 
  416           fftw_size_p = fftw_mpi_local_size_1d(Lsize_p,
 
  418                                                FFTW_BACKWARD, FFTW_ESTIMATE,
 
  419                                                &m_Nsize_in_p, &m_start_in_p,
 
  420                                                &m_Nsize_out_p, &m_start_out_p);
 
  423         m_in  = fftw_alloc_complex(fftw_size_p);
 
  424         m_out = fftw_alloc_complex(fftw_size_p);
 
  426         if (!m_in || !m_out) {
 
  427           vout.
crucial(m_vl, 
"Error at %s: failed to allocate memory %d [Byte].\n",
 
  428                        class_name.c_str(), (int)fftw_size_p);
 
  441     fftw_plan_with_nthreads(Nthread);
 
  444     if (Lsize[i_dim] == Nsize[i_dim]) {
 
  446         m_plan = fftw_plan_dft_1d(Lsize[i_dim],
 
  448                                   FFTW_FORWARD, FFTW_ESTIMATE);
 
  450         m_plan = fftw_plan_dft_1d(Lsize[i_dim],
 
  452                                   FFTW_BACKWARD, FFTW_ESTIMATE);
 
  456       ptrdiff_t Lsize_p = Lsize[i_dim];
 
  459         m_plan = fftw_mpi_plan_dft_1d(Lsize_p,
 
  462                                       FFTW_FORWARD, FFTW_ESTIMATE);
 
  464         m_plan = fftw_mpi_plan_dft_1d(Lsize_p,
 
  467                                       FFTW_BACKWARD, FFTW_ESTIMATE);
 
  475     for (
int in2 = 0; in2 < Nin / 2; ++in2) {
 
  476       for (
int t_global = 0; t_global < Lt; t_global++) {
 
  477         for (
int ex = 0; ex < Nex; ++ex) {
 
  480             for (
int z = 0; z < Nsize[2]; z++) {
 
  481               for (
int y = 0; y < Nsize[1]; y++) {
 
  482                 for (
int xyz_local = 0; xyz_local < Nsize[i_dim]; xyz_local++) {
 
  483                   int isite  = m_index.site(xyz_local, y, z, t_global);
 
  484                   int i_real = 2 * in2;
 
  485                   int i_imag = 2 * in2 + 1;
 
  487                   m_in[xyz_local][0] = field_out.
cmp(i_real, isite, ex);
 
  488                   m_in[xyz_local][1] = field_out.
cmp(i_imag, isite, ex);
 
  492                 fftw_execute(m_plan);
 
  495                 for (
int xyz_local = 0; xyz_local < Nsize[i_dim]; xyz_local++) {
 
  496                   int isite  = m_index.site(xyz_local, y, z, t_global);
 
  497                   int i_real = 2 * in2;
 
  498                   int i_imag = 2 * in2 + 1;
 
  500                   field_out.
set(i_real, isite, ex, m_out[xyz_local][0]);
 
  501                   field_out.
set(i_imag, isite, ex, m_out[xyz_local][1]);
 
  506           } 
else if (i_dim == 1) {
 
  507             for (
int x = 0; x < Nsize[0]; x++) {
 
  508               for (
int z = 0; z < Nsize[2]; z++) {
 
  509                 for (
int xyz_local = 0; xyz_local < Nsize[i_dim]; xyz_local++) {
 
  510                   int isite  = m_index.site(x, xyz_local, z, t_global);
 
  511                   int i_real = 2 * in2;
 
  512                   int i_imag = 2 * in2 + 1;
 
  514                   m_in[xyz_local][0] = field_out.
cmp(i_real, isite, ex);
 
  515                   m_in[xyz_local][1] = field_out.
cmp(i_imag, isite, ex);
 
  519                 fftw_execute(m_plan);
 
  522                 for (
int xyz_local = 0; xyz_local < Nsize[i_dim]; xyz_local++) {
 
  523                   int isite  = m_index.site(x, xyz_local, z, t_global);
 
  524                   int i_real = 2 * in2;
 
  525                   int i_imag = 2 * in2 + 1;
 
  527                   field_out.
set(i_real, isite, ex, m_out[xyz_local][0]);
 
  528                   field_out.
set(i_imag, isite, ex, m_out[xyz_local][1]);
 
  533           } 
else if (i_dim == 2) {
 
  534             for (
int y = 0; y < Nsize[1]; y++) {
 
  535               for (
int x = 0; x < Nsize[0]; x++) {
 
  536                 for (
int xyz_local = 0; xyz_local < Nsize[i_dim]; xyz_local++) {
 
  537                   int isite  = m_index.site(x, y, xyz_local, t_global);
 
  538                   int i_real = 2 * in2;
 
  539                   int i_imag = 2 * in2 + 1;
 
  541                   m_in[xyz_local][0] = field_out.
cmp(i_real, isite, ex);
 
  542                   m_in[xyz_local][1] = field_out.
cmp(i_imag, isite, ex);
 
  546                 fftw_execute(m_plan);
 
  549                 for (
int xyz_local = 0; xyz_local < Nsize[i_dim]; xyz_local++) {
 
  550                   int isite  = m_index.site(x, y, xyz_local, t_global);
 
  551                   int i_real = 2 * in2;
 
  552                   int i_imag = 2 * in2 + 1;
 
  554                   field_out.
set(i_real, isite, ex, m_out[xyz_local][0]);
 
  555                   field_out.
set(i_imag, isite, ex, m_out[xyz_local][1]);
 
  570     scal(field_out, 1.0 / Lxyz);
 
  577     fftw_destroy_plan(m_plan);
 
void scal(Field &x, const double a)
scal(x, a): x = a * x 
 
static int get_num_threads()
returns available number of threads. 
 
void set(const int jin, const int site, const int jex, double v)
 
void general(const char *format,...)
 
Container of Field-type object. 
 
double cmp(const int jin, const int site, const int jex) const 
 
int fetch_string(const string &key, string &value) const 
 
void crucial(const char *format,...)
 
string get_string(const string &key) const 
 
static VerboseLevel set_verbose_level(const std::string &str)