17 template<
typename INDEX,
typename AFIELD>
18 void gramschmidt_in_block_impl(std::vector<AFIELD>& v,
20 const INDEX& block_index,
28 vout.
detailed(
"gramschimdt_in_block, called (general version with smaller memory): size of(real_t)=%d\n",
sizeof(
real_t));
32 double norm2 = v[0].norm2();
37 int nvector = v.size();
39 int coarse_nvol = block_index.coarse_nvol();
41 set_threadtask(ith, nth, is, ns, coarse_nvol);
45 for (
int i = 0; i < nvector; i++) {
58 #pragma omp barrier // work on the fine lattice, done
62 for (
int j = 0; j < i; j++) {
64 block_dotc(block_prod, v[j], v[i], block_index);
67 #pragma omp barrier // work on the blocks, done
73 #pragma omp barrier // work on the fine lattice, done
77 #pragma omp barrier // work on the blocks, done
79 for (
int k = is; k < ns; k++) {
80 double n2 = block_norm[k];
81 block_norm[k] = 1.0 / sqrt(n2);
89 for (
int j = 0; j < i; ++j) {
91 block_dotc(block_prod, v[j], tmp1, block_index);
94 #pragma omp barrier // work on blocks, done
102 #pragma omp barrier // work on the fine lattice, done
106 #pragma omp barrier // work on the blocks, done
108 for (
int k = is; k < ns; ++k) {
109 double n2 = block_norm[k];
110 block_norm[k] = 1.0 / sqrt(n2);
122 double norm2 = v[0].norm2();
130 template<
typename INDEX,
class AFIELD1,
class AFIELD2>
131 void make_coarse_vector_impl(AFIELD1& coarse_vector,
const AFIELD2& fine_vector,
132 const std::vector<AFIELD2>& testvectors,
142 const int num_vectors = testvectors.size();
143 assert(coarse_vector.nin() == 4 * num_vectors);
145 int ith, nth, is, ns;
146 set_threadtask(ith, nth, is, ns, block_index.coarse_nvol());
147 int coarse_Nx = block_index.coarse_lattice_size(0);
148 int coarse_Ny = block_index.coarse_lattice_size(1);
149 int coarse_Nz = block_index.coarse_lattice_size(2);
150 int coarse_Nt = block_index.coarse_lattice_size(3);
155 for (
int i = 0; i < num_vectors; i++) {
159 copy(tmp2, testvectors[i]);
161 axpy(tmp2, -1.0, tmp1);
162 axpy(tmp1, +1.0, testvectors[i]);
169 block_dotc(inner_prod, tmp2, fine_vector, block_index);
170 for (
int block_id = is; block_id < ns; block_id++) {
171 int index_re = index.idx_SPr(i, 0, block_id, 0);
172 int index_im = index.idx_SPi(i, 0, block_id, 0);
173 coarse_vector.set(index_re, 0.5 * real(inner_prod[block_id]));
174 coarse_vector.set(index_im, 0.5 * imag(inner_prod[block_id]));
178 block_dotc(inner_prod, tmp1, fine_vector, block_index);
179 for (
int block_id = is; block_id < ns; block_id++) {
180 int index_re = index.idx_SPr(i, 1, block_id, 0);
181 int index_im = index.idx_SPi(i, 1, block_id, 0);
182 coarse_vector.set(index_re, 0.5 * real(inner_prod[block_id]));
183 coarse_vector.set(index_im, 0.5 * imag(inner_prod[block_id]));
191 template<
typename INDEX,
class AFIELD1,
class AFIELD2>
192 void make_fine_vector_impl(AFIELD2& fine_vector,
const AFIELD1& coarse_vector,
193 const std::vector<AFIELD2>& testvectors,
204 const int num_vectors = testvectors.size();
205 assert(coarse_vector.nin() == 4 * num_vectors);
207 int ith, nth, is, ns;
208 set_threadtask(ith, nth, is, ns, block_index.coarse_nvol());
209 int coarse_Nx = block_index.coarse_lattice_size(0);
210 int coarse_Ny = block_index.coarse_lattice_size(1);
211 int coarse_Nz = block_index.coarse_lattice_size(2);
212 int coarse_Nt = block_index.coarse_lattice_size(3);
219 fine_vector.set(0.0);
221 for (
int i = 0; i < num_vectors; i++) {
222 copy(tmp2, testvectors[i]);
224 axpy(tmp2, -1.0, tmp1);
225 axpy(tmp1, +1.0, testvectors[i]);
230 for (
int block_id = is; block_id < ns; block_id++) {
231 int index_re = index.idx_SPr(i, 0, block_id, 0);
232 int index_im = index.idx_SPi(i, 0, block_id, 0);
233 real_t re = coarse_vector.cmp(index_re);
234 real_t im = coarse_vector.cmp(index_im);
235 complex_array[block_id] =
complex_t(re, im);
240 for (
int block_id = is; block_id < ns; block_id++) {
241 int index_re = index.idx_SPr(i, 1, block_id, 0);
242 int index_im = index.idx_SPi(i, 1, block_id, 0);
243 real_t re = coarse_vector.cmp(index_re);
244 real_t im = coarse_vector.cmp(index_im);
245 complex_array[block_id] =
complex_t(re, im);
254 template<
class AFIELD>
255 void set_testvectors_impl_global(std::vector<AFIELD>& testvectors,
AFopr<AFIELD> *afopr,
Field& vec_tmp)
257 vout.
general(
"using global random numbers for the initial testvectors\n");
260 int nvec = testvectors.size();
263 for (
int i = 0; i < nvec; i++) {
271 afopr->
convert(testvectors[i], vec_tmp);
277 convert(index_f, testvectors[i], vec_tmp);
286 template<
class AFIELD>
287 void set_testvectors_impl_global(std::vector<AFIELD>& testvectors,
AFopr<AFIELD> *afopr)
289 std::string class_name = __func__;
292 int nin = testvectors[0].nin();
293 size_t nvol = testvectors[0].nvol();
294 Field vec_tmp(nin, nvol, 1);
297 set_testvectors_impl_global(testvectors, afopr, vec_tmp);
303 template<
class AFIELD>
304 void set_testvectors_impl_local(std::vector<AFIELD>& testvectors,
AFopr<AFIELD> *afopr,
Field& vec_tmp)
306 vout.
general(
"using local random numbers for the initial testvectors\n");
310 int nvec = testvectors.size();
311 int nin = testvectors[0].nin();
312 size_t nvol = testvectors[0].nvol();
313 assert(nin == vec_tmp.
nin() && nvol == vec_tmp.
nvol());
316 const int seed_base = 1395704;
319 int myrank = Communicator::nodeid();
320 int is0, ns0, ith, nth;
321 set_threadtask(ith, nth, is0, ns0, nvol);
322 int seed = seed_base + nth * myrank + ith;
324 size_t is = is0 * nin;
325 size_t ns = ns0 * nin;
326 for (
int i = 0; i < nvec; i++) {
327 double *pv = vec_tmp.
ptr(0);
328 for (
size_t s = is; s < ns; s++) {
329 pv[s] = random.
get();
336 afopr->
convert(testvectors[i], vec_tmp);
342 convert(index_f, testvectors[i], vec_tmp);
351 template<
class AFIELD>
352 void set_testvectors_impl_local(std::vector<AFIELD>& testvectors,
AFopr<AFIELD> *afopr)
354 std::string class_name = __func__;
357 int nin = testvectors[0].nin();
358 size_t nvol = testvectors[0].nvol();
359 Field vec_tmp(nin, nvol, 1);
363 set_testvectors_impl_local(testvectors, afopr, vec_tmp);
369 template<
class AFIELD1,
class AFIELD2>
372 m_tmp1.reset(m_nin, m_fine_nvol, 1);
373 m_tmp2.reset(m_nin, m_fine_nvol, 1);
374 m_field_tmp.reset(m_nin, m_fine_nvol, 1);
376 int coarse_nvol = m_block_index.coarse_nvol();
377 m_real_array.resize(coarse_nvol);
378 m_complex_array.resize(coarse_nvol);
379 m_coarse_array.resize(coarse_nvol * 2 * m_nvec);
384 template<
class AFIELD1,
class AFIELD2>
387 assert(w.size() == m_nvec);
388 for (
int ivec = 0; ivec < m_nvec; ++ivec) {
389 copy(m_testvectors[ivec], w[ivec]);
394 template<
class AFIELD1,
class AFIELD2>
401 set_testvectors_impl_local(m_testvectors, m_afopr_fine, m_field_tmp);
405 vout.
detailed(m_vl,
"%s: time for set_testvector = %e sec\n",
406 class_name.c_str(), elapsed_time);
411 template<
class AFIELD1,
class AFIELD2>
413 const AFIELD1& coarse_vector)
const
419 const int coarse_nvol = m_block_index.coarse_nvol();
420 const int num_vectors = m_testvectors.size();
422 const int Nxc = m_block_index.coarse_lattice_size(0);
423 const int Nyc = m_block_index.coarse_lattice_size(1);
424 const int Nzc = m_block_index.coarse_lattice_size(2);
425 const int Ntc = m_block_index.coarse_lattice_size(3);
428 Nxc, Nyc, Nzc, Ntc, num_vectors, 2);
430 int ith, nth, is_coarse, ns_coarse;
431 set_threadtask(ith, nth, is_coarse, ns_coarse, coarse_nvol);
435 for (
int ivec = 0; ivec < num_vectors; ++ivec) {
436 for (
int block = is_coarse; block < ns_coarse; ++block) {
437 int index_re1 = index_c.idx_SPr(ivec, 0, block, 0);
438 int index_im1 = index_c.idx_SPi(ivec, 0, block, 0);
439 real_t re1 = coarse_vector.cmp(index_re1);
440 real_t im1 = coarse_vector.cmp(index_im1);
442 int index_re2 = index_c.idx_SPr(ivec, 1, block, 0);
443 int index_im2 = index_c.idx_SPi(ivec, 1, block, 0);
444 real_t re2 = coarse_vector.cmp(index_re2);
445 real_t im2 = coarse_vector.cmp(index_im2);
451 m_coarse_array[block + coarse_nvol * (2 * ivec)]
453 m_coarse_array[block + coarse_nvol * (2 * ivec + 1)]
471 template<
class AFIELD1,
class AFIELD2>
473 AFIELD1& coarse_vector)
const
478 const int coarse_nvol = m_block_index.coarse_nvol();
479 const int num_vectors = m_testvectors.size();
481 const int Nxc = m_block_index.coarse_lattice_size(0);
482 const int Nyc = m_block_index.coarse_lattice_size(1);
483 const int Nzc = m_block_index.coarse_lattice_size(2);
484 const int Ntc = m_block_index.coarse_lattice_size(3);
487 Nxc, Nyc, Nzc, Ntc, num_vectors, 2);
491 int ith, nth, is_coarse, ns_coarse;
492 set_threadtask(ith, nth, is_coarse, ns_coarse, coarse_nvol);
496 for (
int ivec = 0; ivec < num_vectors; ++ivec) {
497 for (
int block = is_coarse; block < ns_coarse; ++block) {
498 real_t re1 = real(array1[block + coarse_nvol * (2 * ivec)]);
499 real_t im1 = imag(array1[block + coarse_nvol * (2 * ivec)]);
500 int index_re1 = index_c.idx_SPr(ivec, 0, block, 0);
501 int index_im1 = index_c.idx_SPi(ivec, 0, block, 0);
502 coarse_vector.set(index_re1, re1);
503 coarse_vector.set(index_im1, im1);
505 real_t re2 = real(array1[block + coarse_nvol * (2 * ivec + 1)]);
506 real_t im2 = imag(array1[block + coarse_nvol * (2 * ivec + 1)]);
507 int index_re2 = index_c.idx_SPr(ivec, 1, block, 0);
508 int index_im2 = index_c.idx_SPi(ivec, 1, block, 0);
509 coarse_vector.set(index_re2, re2);
510 coarse_vector.set(index_im2, im2);
514 coarse_vector.scal(
real_t(0.5));
520 template<
class AFIELD1,
class AFIELD2>
523 vout.
general(
"%s: make_fine_vector is called\n", class_name.c_str());
543 const int num_vectors = m_testvectors.size();
544 assert(coarse_vector.nin() == 4 * num_vectors);
547 const int coarse_nvol = m_block_index.coarse_nvol();
549 set_coarse_array(coarse_vector);
553 fine_vector.set(0.0);
555 for (
int ivec = 0; ivec < num_vectors; ++ivec) {
557 m_afopr_fine->mult_gm5(m_tmp1, m_testvectors[ivec]);
567 complex_t *array1 = &m_coarse_array[coarse_nvol * (2 * ivec)];
576 complex_t *array2 = &m_coarse_array[coarse_nvol * (2 * ivec + 1)];
588 template<
class AFIELD1,
class AFIELD2>
591 vout.
general(
"%s: make_coarse_vector is called\n", class_name.c_str());
604 const int num_vectors = m_testvectors.size();
605 assert(coarse_vector.nin() == 4 * num_vectors);
608 const int coarse_nvol = m_block_index.coarse_nvol();
610 for (
int ivec = 0; ivec < num_vectors; ++ivec) {
611 copy(m_tmp1, m_testvectors[ivec]);
612 m_afopr_fine->mult_gm5(m_tmp2, m_testvectors[ivec]);
614 axpy(m_tmp1, -1.0, m_tmp2);
615 axpy(m_tmp2, +1.0, m_testvectors[ivec]);
619 complex_t *array1 = &m_coarse_array[coarse_nvol * (2 * ivec)];
620 block_dotc(array1, m_tmp1, fine_vector, m_block_index);
623 complex_t *array2 = &m_coarse_array[coarse_nvol * (2 * ivec + 1)];
624 block_dotc(array2, m_tmp2, fine_vector, m_block_index);
629 set_coarse_vector(coarse_vector);
638 template<
class AFIELD1,
class AFIELD2>
641 gramschmidt(m_testvectors);
646 template<
class AFIELD1,
class AFIELD2>
648 std::vector<AFIELD2>& vectors)
const
650 vout.
general(
"%s: ::gramschmidt is called\n", class_name.c_str());
652 gramschmidt_in_block_impl(vectors, m_afopr_fine, m_block_index,
654 &(m_real_array[0]), &(m_complex_array[0]));
656 gramschmidt_in_block_impl(vectors, m_afopr_fine, m_block_index,
658 &(m_real_array[0]), &(m_complex_array[0]));
663 template<
class AFIELD1,
class AFIELD2>
665 std::vector<AFIELD2>& vectors)
const
667 vout.
crucial(
"%s: gramschmed_double is called\n", class_name.c_str());