17   template<
typename INDEX, 
typename AFIELD>
 
   18   void gramschmidt_in_block_impl(std::vector<AFIELD>& v,
 
   20                                  const INDEX& block_index,
 
   28     vout.
detailed(
"gramschimdt_in_block, called (general version with smaller memory): size of(real_t)=%d\n", 
sizeof(
real_t));
 
   32       double norm2 = v[0].norm2();
 
   37     int nvector = v.size();
 
   39     int coarse_nvol = block_index.coarse_nvol();
 
   41     set_threadtask(ith, nth, is, ns, coarse_nvol);
 
   45     for (
int i = 0; i < nvector; i++) {
 
   58 #pragma omp barrier                   // work on the fine lattice, done 
   62       for (
int j = 0; j < i; j++) {
 
   64         block_dotc(block_prod, v[j], v[i], block_index);
 
   67 #pragma omp barrier  // work on the blocks, done 
   73 #pragma omp barrier                   // work on the fine lattice, done 
   77 #pragma omp barrier  // work on the blocks, done 
   79       for (
int k = is; k < ns; k++) {
 
   80         double n2 = block_norm[k];
 
   81         block_norm[k] = 1.0 / sqrt(n2);
 
   89       for (
int j = 0; j < i; ++j) {
 
   91         block_dotc(block_prod, v[j], tmp1, block_index);
 
   94 #pragma omp barrier // work on blocks, done 
  102 #pragma omp barrier                  // work on the fine lattice, done 
  106 #pragma omp barrier  // work on the blocks, done 
  108       for (
int k = is; k < ns; ++k) {
 
  109         double n2 = block_norm[k];
 
  110         block_norm[k] = 1.0 / sqrt(n2);
 
  122       double norm2 = v[0].norm2();
 
  130   template<
typename INDEX, 
class AFIELD1, 
class AFIELD2>
 
  131   void make_coarse_vector_impl(AFIELD1& coarse_vector, 
const AFIELD2& fine_vector,
 
  132                                const std::vector<AFIELD2>& testvectors,
 
  142     const int num_vectors = testvectors.size();
 
  143     assert(coarse_vector.nin() == 4 * num_vectors);  
 
  145     int ith, nth, is, ns;
 
  146     set_threadtask(ith, nth, is, ns, block_index.coarse_nvol());
 
  147     int coarse_Nx = block_index.coarse_lattice_size(0);
 
  148     int coarse_Ny = block_index.coarse_lattice_size(1);
 
  149     int coarse_Nz = block_index.coarse_lattice_size(2);
 
  150     int coarse_Nt = block_index.coarse_lattice_size(3);
 
  155     for (
int i = 0; i < num_vectors; i++) {
 
  159       copy(tmp2, testvectors[i]);
 
  161       axpy(tmp2, -1.0, tmp1);            
 
  162       axpy(tmp1, +1.0, testvectors[i]);  
 
  169       block_dotc(inner_prod, tmp2, fine_vector, block_index);
 
  170       for (
int block_id = is; block_id < ns; block_id++) {
 
  171         int index_re = index.idx_SPr(i, 0, block_id, 0);
 
  172         int index_im = index.idx_SPi(i, 0, block_id, 0);
 
  173         coarse_vector.set(index_re, 0.5 * real(inner_prod[block_id]));
 
  174         coarse_vector.set(index_im, 0.5 * imag(inner_prod[block_id]));
 
  178       block_dotc(inner_prod, tmp1, fine_vector, block_index);
 
  179       for (
int block_id = is; block_id < ns; block_id++) {
 
  180         int index_re = index.idx_SPr(i, 1, block_id, 0);
 
  181         int index_im = index.idx_SPi(i, 1, block_id, 0);
 
  182         coarse_vector.set(index_re, 0.5 * real(inner_prod[block_id]));
 
  183         coarse_vector.set(index_im, 0.5 * imag(inner_prod[block_id]));
 
  191   template<
typename INDEX, 
class AFIELD1, 
class AFIELD2>
 
  192   void make_fine_vector_impl(AFIELD2& fine_vector, 
const AFIELD1& coarse_vector,
 
  193                              const std::vector<AFIELD2>& testvectors,
 
  204     const int num_vectors = testvectors.size();
 
  205     assert(coarse_vector.nin() == 4 * num_vectors);  
 
  207     int ith, nth, is, ns;
 
  208     set_threadtask(ith, nth, is, ns, block_index.coarse_nvol());
 
  209     int coarse_Nx = block_index.coarse_lattice_size(0);
 
  210     int coarse_Ny = block_index.coarse_lattice_size(1);
 
  211     int coarse_Nz = block_index.coarse_lattice_size(2);
 
  212     int coarse_Nt = block_index.coarse_lattice_size(3);
 
  219     fine_vector.set(0.0);
 
  221     for (
int i = 0; i < num_vectors; i++) {
 
  222       copy(tmp2, testvectors[i]);
 
  224       axpy(tmp2, -1.0, tmp1);            
 
  225       axpy(tmp1, +1.0, testvectors[i]);  
 
  230       for (
int block_id = is; block_id < ns; block_id++) { 
 
  231         int    index_re = index.idx_SPr(i, 0, block_id, 0);
 
  232         int    index_im = index.idx_SPi(i, 0, block_id, 0);
 
  233         real_t re       = coarse_vector.cmp(index_re);
 
  234         real_t im       = coarse_vector.cmp(index_im);
 
  235         complex_array[block_id] = 
complex_t(re, im);
 
  240       for (
int block_id = is; block_id < ns; block_id++) { 
 
  241         int    index_re = index.idx_SPr(i, 1, block_id, 0);
 
  242         int    index_im = index.idx_SPi(i, 1, block_id, 0);
 
  243         real_t re       = coarse_vector.cmp(index_re);
 
  244         real_t im       = coarse_vector.cmp(index_im);
 
  245         complex_array[block_id] = 
complex_t(re, im);
 
  254   template<
class AFIELD>
 
  255   void set_testvectors_impl_global(std::vector<AFIELD>& testvectors, 
AFopr<AFIELD> *afopr, 
Field& vec_tmp)
 
  257     vout.
general(
"using global random numbers for the initial testvectors\n");
 
  260     int nvec = testvectors.size();
 
  263     for (
int i = 0; i < nvec; i++) {
 
  271           afopr->
convert(testvectors[i], vec_tmp);
 
  277           convert(index_f, testvectors[i], vec_tmp);
 
  286   template<
class AFIELD>
 
  287   void set_testvectors_impl_global(std::vector<AFIELD>& testvectors, 
AFopr<AFIELD> *afopr)
 
  289     std::string class_name = __func__;
 
  292     int    nin  = testvectors[0].nin();
 
  293     size_t nvol = testvectors[0].nvol();
 
  294     Field  vec_tmp(nin, nvol, 1);
 
  297       set_testvectors_impl_global(testvectors, afopr, vec_tmp);
 
  303   template<
class AFIELD>
 
  304   void set_testvectors_impl_local(std::vector<AFIELD>& testvectors, 
AFopr<AFIELD> *afopr, 
Field& vec_tmp)
 
  306     vout.
general(
"using local random numbers for the initial testvectors\n");
 
  310     int    nvec = testvectors.size();
 
  311     int    nin  = testvectors[0].nin();
 
  312     size_t nvol = testvectors[0].nvol();
 
  313     assert(nin == vec_tmp.
nin() && nvol == vec_tmp.
nvol());
 
  316       const int seed_base = 1395704; 
 
  319       int myrank = Communicator::nodeid();
 
  320       int is0, ns0, ith, nth;
 
  321       set_threadtask(ith, nth, is0, ns0, nvol);
 
  322       int seed = seed_base + nth * myrank + ith;
 
  324       size_t                is = is0 * nin;
 
  325       size_t                ns = ns0 * nin;
 
  326       for (
int i = 0; i < nvec; i++) {
 
  327         double *pv = vec_tmp.
ptr(0);
 
  328         for (
size_t s = is; s < ns; s++) {
 
  329           pv[s] = random.
get();
 
  336           afopr->
convert(testvectors[i], vec_tmp);
 
  342           convert(index_f, testvectors[i], vec_tmp);
 
  351   template<
class AFIELD>
 
  352   void set_testvectors_impl_local(std::vector<AFIELD>& testvectors, 
AFopr<AFIELD> *afopr)
 
  354     std::string class_name = __func__;
 
  357     int    nin  = testvectors[0].nin();
 
  358     size_t nvol = testvectors[0].nvol();
 
  359     Field  vec_tmp(nin, nvol, 1);
 
  363       set_testvectors_impl_local(testvectors, afopr, vec_tmp);
 
  369 template<
class AFIELD1, 
class AFIELD2>
 
  372   m_tmp1.reset(m_nin, m_fine_nvol, 1);
 
  373   m_tmp2.reset(m_nin, m_fine_nvol, 1);
 
  374   m_field_tmp.reset(m_nin, m_fine_nvol, 1);
 
  376   int coarse_nvol = m_block_index.coarse_nvol();
 
  377   m_real_array.resize(coarse_nvol);
 
  378   m_complex_array.resize(coarse_nvol);
 
  379   m_coarse_array.resize(coarse_nvol * 2 * m_nvec);
 
  384 template<
class AFIELD1, 
class AFIELD2>
 
  387   assert(w.size() == m_nvec);
 
  388   for (
int ivec = 0; ivec < m_nvec; ++ivec) {
 
  389     copy(m_testvectors[ivec], w[ivec]);
 
  394 template<
class AFIELD1, 
class AFIELD2>
 
  401   set_testvectors_impl_local(m_testvectors, m_afopr_fine, m_field_tmp);
 
  405   vout.
detailed(m_vl, 
"%s: time for set_testvector = %e sec\n",
 
  406                 class_name.c_str(), elapsed_time);
 
  411 template<
class AFIELD1, 
class AFIELD2>
 
  413   const AFIELD1& coarse_vector)
 const 
  419   const int coarse_nvol = m_block_index.coarse_nvol();
 
  420   const int num_vectors = m_testvectors.size();
 
  422   const int Nxc = m_block_index.coarse_lattice_size(0);
 
  423   const int Nyc = m_block_index.coarse_lattice_size(1);
 
  424   const int Nzc = m_block_index.coarse_lattice_size(2);
 
  425   const int Ntc = m_block_index.coarse_lattice_size(3);
 
  428     Nxc, Nyc, Nzc, Ntc, num_vectors, 2);
 
  430   int ith, nth, is_coarse, ns_coarse;
 
  431   set_threadtask(ith, nth, is_coarse, ns_coarse, coarse_nvol);
 
  435   for (
int ivec = 0; ivec < num_vectors; ++ivec) {
 
  436     for (
int block = is_coarse; block < ns_coarse; ++block) {
 
  437       int    index_re1 = index_c.idx_SPr(ivec, 0, block, 0);
 
  438       int    index_im1 = index_c.idx_SPi(ivec, 0, block, 0);
 
  439       real_t re1       = coarse_vector.cmp(index_re1);
 
  440       real_t im1       = coarse_vector.cmp(index_im1);
 
  442       int    index_re2 = index_c.idx_SPr(ivec, 1, block, 0);
 
  443       int    index_im2 = index_c.idx_SPi(ivec, 1, block, 0);
 
  444       real_t re2       = coarse_vector.cmp(index_re2);
 
  445       real_t im2       = coarse_vector.cmp(index_im2);
 
  451       m_coarse_array[block + coarse_nvol * (2 * ivec)]
 
  453       m_coarse_array[block + coarse_nvol * (2 * ivec + 1)]
 
  471 template<
class AFIELD1, 
class AFIELD2>
 
  473   AFIELD1& coarse_vector)
 const 
  478   const int coarse_nvol = m_block_index.coarse_nvol();
 
  479   const int num_vectors = m_testvectors.size();
 
  481   const int Nxc = m_block_index.coarse_lattice_size(0);
 
  482   const int Nyc = m_block_index.coarse_lattice_size(1);
 
  483   const int Nzc = m_block_index.coarse_lattice_size(2);
 
  484   const int Ntc = m_block_index.coarse_lattice_size(3);
 
  487     Nxc, Nyc, Nzc, Ntc, num_vectors, 2);
 
  491   int ith, nth, is_coarse, ns_coarse;
 
  492   set_threadtask(ith, nth, is_coarse, ns_coarse, coarse_nvol);
 
  496   for (
int ivec = 0; ivec < num_vectors; ++ivec) {
 
  497     for (
int block = is_coarse; block < ns_coarse; ++block) {
 
  498       real_t re1       = real(array1[block + coarse_nvol * (2 * ivec)]);
 
  499       real_t im1       = imag(array1[block + coarse_nvol * (2 * ivec)]);
 
  500       int    index_re1 = index_c.idx_SPr(ivec, 0, block, 0);
 
  501       int    index_im1 = index_c.idx_SPi(ivec, 0, block, 0);
 
  502       coarse_vector.set(index_re1, re1);
 
  503       coarse_vector.set(index_im1, im1);
 
  505       real_t re2       = real(array1[block + coarse_nvol * (2 * ivec + 1)]);
 
  506       real_t im2       = imag(array1[block + coarse_nvol * (2 * ivec + 1)]);
 
  507       int    index_re2 = index_c.idx_SPr(ivec, 1, block, 0);
 
  508       int    index_im2 = index_c.idx_SPi(ivec, 1, block, 0);
 
  509       coarse_vector.set(index_re2, re2);
 
  510       coarse_vector.set(index_im2, im2);
 
  514   coarse_vector.scal(
real_t(0.5));
 
  520 template<
class AFIELD1, 
class AFIELD2>
 
  523   vout.
general(
"%s: make_fine_vector is called\n", class_name.c_str());
 
  543   const int num_vectors = m_testvectors.size();
 
  544   assert(coarse_vector.nin() == 4 * num_vectors);
 
  547   const int coarse_nvol = m_block_index.coarse_nvol();
 
  549   set_coarse_array(coarse_vector);
 
  553   fine_vector.set(0.0);
 
  555   for (
int ivec = 0; ivec < num_vectors; ++ivec) {
 
  557     m_afopr_fine->mult_gm5(m_tmp1, m_testvectors[ivec]);
 
  567     complex_t *array1 = &m_coarse_array[coarse_nvol * (2 * ivec)];
 
  576     complex_t *array2 = &m_coarse_array[coarse_nvol * (2 * ivec + 1)];
 
  588 template<
class AFIELD1, 
class AFIELD2>
 
  591   vout.
general(
"%s: make_coarse_vector is called\n", class_name.c_str());
 
  604   const int num_vectors = m_testvectors.size();
 
  605   assert(coarse_vector.nin() == 4 * num_vectors);
 
  608   const int coarse_nvol = m_block_index.coarse_nvol();
 
  610   for (
int ivec = 0; ivec < num_vectors; ++ivec) {
 
  611     copy(m_tmp1, m_testvectors[ivec]);
 
  612     m_afopr_fine->mult_gm5(m_tmp2, m_testvectors[ivec]);
 
  614     axpy(m_tmp1, -1.0, m_tmp2);              
 
  615     axpy(m_tmp2, +1.0, m_testvectors[ivec]); 
 
  619     complex_t *array1 = &m_coarse_array[coarse_nvol * (2 * ivec)];
 
  620     block_dotc(array1, m_tmp1, fine_vector, m_block_index);
 
  623     complex_t *array2 = &m_coarse_array[coarse_nvol * (2 * ivec + 1)];
 
  624     block_dotc(array2, m_tmp2, fine_vector, m_block_index);
 
  629   set_coarse_vector(coarse_vector);
 
  638 template<
class AFIELD1, 
class AFIELD2>
 
  641   gramschmidt(m_testvectors);
 
  646 template<
class AFIELD1, 
class AFIELD2>
 
  648   std::vector<AFIELD2>& vectors)
 const 
  650   vout.
general(
"%s: ::gramschmidt is called\n", class_name.c_str());
 
  652   gramschmidt_in_block_impl(vectors, m_afopr_fine, m_block_index,
 
  654                             &(m_real_array[0]), &(m_complex_array[0]));
 
  656   gramschmidt_in_block_impl(vectors, m_afopr_fine, m_block_index,
 
  658                             &(m_real_array[0]), &(m_complex_array[0]));
 
  663 template<
class AFIELD1, 
class AFIELD2>
 
  665   std::vector<AFIELD2>& vectors)
 const 
  667   vout.
crucial(
"%s: gramschmed_double is called\n", class_name.c_str());