Bridge++  Ver. 2.0.2
afopr_Clover_dd-tmpl.h
Go to the documentation of this file.
1 
11 
12 template<typename AFIELD>
13 const std::string AFopr_Clover_dd<AFIELD>::class_name = "AFopr_Clover_dd";
14 
15 //====================================================================
16 template<typename AFIELD>
18 {
20 
21  int req_comm = 1; // set 1 if communication forced any time
22  //int req_comm = 0; // set 0 if communication called in necessary
23 
24  std::string vlevel;
25  if (!params.fetch_string("verbose_level", vlevel)) {
26  m_vl = vout.set_verbose_level(vlevel);
27  } else {
28  m_vl = CommonParameters::Vlevel();
29  }
30 
31  vout.general(m_vl, "%s: construction\n", class_name.c_str());
32 
33  m_repr = "Dirac"; // now only the Dirac repr is available.
34 
35  std::string repr;
36  if (!params.fetch_string("gamma_matrix_type", repr)) {
37  if (repr != "Dirac") {
38  vout.crucial(" Error at %s: unsupported gamma-matrix type: %s\n",
39  class_name.c_str(), repr.c_str());
40  exit(EXIT_FAILURE);
41  }
42  }
43 
44  m_Nc = CommonParameters::Nc();
45  if (m_Nc != 3) {
46  vout.crucial("%s: only applicable to Nc = 3\n",
47  class_name.c_str());
48  exit(EXIT_FAILURE);
49  }
50 
51  m_Nd = CommonParameters::Nd();
52  m_Nvc = 2 * m_Nc;
53  m_Ndf = 2 * m_Nc * m_Nc;
54 
55  m_Nx = CommonParameters::Nx();
56  m_Ny = CommonParameters::Ny();
57  m_Nz = CommonParameters::Nz();
58  m_Nt = CommonParameters::Nt();
59  m_Nst = CommonParameters::Nvol();
60  m_Ndim = CommonParameters::Ndim();
61 
62  m_Nxv = m_Nx / VLENX;
63  m_Nyv = m_Ny / VLENY;
64  m_Nstv = m_Nst / VLEN;
65 
66  vout.general(m_vl, " VLENX = %2d Nxv = %d\n", VLENX, m_Nxv);
67  vout.general(m_vl, " VLENY = %2d Nyv = %d\n", VLENY, m_Nyv);
68  vout.general(m_vl, " VLEN = %2d Nstv = %d\n", VLEN, m_Nstv);
69 
70  m_Nsize[0] = m_Nxv;
71  m_Nsize[1] = m_Nyv;
72  m_Nsize[2] = m_Nz;
73  m_Nsize[3] = m_Nt;
74 
75  do_comm_any = 0;
76  for (int mu = 0; mu < m_Ndim; ++mu) {
77  do_comm[mu] = 1;
78  if ((req_comm == 0) && (Communicator::npe(mu) == 1)) do_comm[mu] = 0;
79  do_comm_any += do_comm[mu];
80  vout.general(" do_comm[%d] = %d\n", mu, do_comm[mu]);
81  }
82 
83  m_bdsize.resize(m_Ndim);
84  int Nd2 = m_Nd / 2;
85  m_bdsize[0] = m_Nvc * Nd2 * m_Ny * m_Nz * m_Nt;
86  m_bdsize[1] = m_Nvc * Nd2 * m_Nx * m_Nz * m_Nt;
87  m_bdsize[2] = m_Nvc * Nd2 * m_Nx * m_Ny * m_Nt;
88  m_bdsize[3] = m_Nvc * Nd2 * m_Nx * m_Ny * m_Nz;
89 
90  setup_channels();
91 
93 
94  Parameters params_ct = params;
95 
96 #ifdef CHIRAL_ROTATION
97  params_ct.set_string("gamma_matrix_type", "Chiral");
98  // m_fopr_csw = new Fopr_CloverTerm("Chiral");
99 #else
100  // m_fopr_csw = new Fopr_CloverTerm(m_repr);
101  // constructed here because Chiral repr is not implemented.
102 #endif
103  m_fopr_csw = new Fopr_CloverTerm(params_ct);
104 
105  set_parameters(params);
106 
108 
109  // gauge configuration.
110  m_U.reset(NDF, m_Nst, m_Ndim);
111 
112  // gauge configuration with block condition.
113  m_Ublock.reset(NDF, m_Nst, m_Ndim);
114 
115  // working vectors.
116  int NinF = 2 * m_Nc * m_Nd;
117  m_v2.reset(NinF, m_Nst, 1);
118 
119  int Ndm2 = m_Nd * m_Nd / 2;
120 
121 #ifdef CHIRAL_ROTATION
122  m_T.reset(NDF * Ndm2, m_Nst, 1); // to do: reduce the size to 1/2
123 #else
124  m_T.reset(NDF * Ndm2, m_Nst, 1);
125 #endif
126 
127  vout.general(m_vl, "%s: construction finished.\n",
128  class_name.c_str());
129 }
130 
131 
132 //====================================================================
133 template<typename AFIELD>
135 {
136  chsend_up.resize(m_Ndim);
137  chrecv_up.resize(m_Ndim);
138  chsend_dn.resize(m_Ndim);
139  chrecv_dn.resize(m_Ndim);
140 
141  for (int mu = 0; mu < m_Ndim; ++mu) {
142  size_t Nvsize = m_bdsize[mu] * sizeof(real_t);
143 
144  chsend_dn[mu].send_init(Nvsize, mu, -1);
145  chsend_up[mu].send_init(Nvsize, mu, 1);
146 #ifdef USE_MPI
147  chrecv_up[mu].recv_init(Nvsize, mu, 1);
148  chrecv_dn[mu].recv_init(Nvsize, mu, -1);
149 #else
150  void *buf_up = (void *)chsend_dn[mu].ptr();
151  chrecv_up[mu].recv_init(Nvsize, mu, 1, buf_up);
152  void *buf_dn = (void *)chsend_up[mu].ptr();
153  chrecv_dn[mu].recv_init(Nvsize, mu, -1, buf_dn);
154 #endif
155 
156  if (do_comm[mu] == 1) {
157  chset_send.append(chsend_up[mu]);
158  chset_send.append(chsend_dn[mu]);
159  chset_recv.append(chrecv_up[mu]);
160  chset_recv.append(chrecv_dn[mu]);
161  }
162  }
163 }
164 
165 
166 //====================================================================
167 template<typename AFIELD>
169 {
171 
172  delete m_fopr_csw;
173 }
174 
175 
176 //====================================================================
177 template<typename AFIELD>
179 {
180  const string str_vlevel = params.get_string("verbose_level");
181  m_vl = vout.set_verbose_level(str_vlevel);
182 
183  //- fetch and check input parameters
184  double kappa, csw;
185  std::vector<int> bc;
186  std::vector<int> block_size;
187 
188  int err = 0;
189  err += params.fetch_double("hopping_parameter", kappa);
190  err += params.fetch_double("clover_coefficient", csw);
191  err += params.fetch_int_vector("boundary_condition", bc);
192  err += params.fetch_int_vector("block_size", block_size);
193  if (err) {
194  vout.crucial(m_vl, "Error at %s: input parameter not found.\n",
195  class_name.c_str());
196  exit(EXIT_FAILURE);
197  }
198 
199  set_parameters(real_t(kappa), real_t(csw), bc, block_size);
200 
201  Parameters params_csw = params;
202 #ifdef CHIRAL_ROTATION
203  params_csw.set_string("gamma_matrix_type", "Chiral");
204 #endif
205  m_fopr_csw->set_parameters(params_csw);
206 }
207 
208 
209 //====================================================================
210 template<typename AFIELD>
212  const real_t CKs,
213  const real_t csw,
214  const std::vector<int> bc,
215  const std::vector<int> block_size)
216 {
217  assert(bc.size() == m_Ndim);
218 
219 #pragma omp barrier
220 
221  int ith = ThreadManager::get_thread_id();
222 
223  if (ith == 0) {
224  m_CKs = CKs;
225  m_csw = csw;
226  m_boundary.resize(m_Ndim);
227  m_block_size.resize(m_Ndim);
228  for (int mu = 0; mu < m_Ndim; ++mu) {
229  m_boundary[mu] = bc[mu];
230  m_block_size[mu] = block_size[mu];
231  }
232  }
233 #pragma omp barrier
234 
235  vout.general(m_vl, "%s: set parameters\n", class_name.c_str());
236  vout.general(m_vl, " gamma-matrix type = %s\n", m_repr.c_str());
237  vout.general(m_vl, " kappa = %8.4f\n", m_CKs);
238  vout.general(m_vl, " cSW = %12.8f\n", m_csw);
239  for (int mu = 0; mu < m_Ndim; ++mu) {
240  vout.general(m_vl, " boundary[%d] = %2d\n", mu, m_boundary[mu]);
241  }
242  for (int mu = 0; mu < m_Ndim; ++mu) {
243  vout.general(m_vl, " block_size[%d] = %2d\n", mu, m_block_size[mu]);
244  }
245 
246  int rem = m_Nx % m_block_size[0]
247  + m_Ny % m_block_size[1]
248  + m_Nz % m_block_size[2]
249  + m_Nt % m_block_size[3];
250  if (rem != 0) {
251  vout.crucial(m_vl, "%s: block_size is irelevant.\n",
252  class_name.c_str());
253  exit(EXIT_FAILURE);
254  }
255 
256  if (ith == 0) {
257  // note that m_block_sizev is simple array
258  m_block_sizev[0] = m_block_size[0] / VLENX;
259  m_block_sizev[1] = m_block_size[1] / VLENY;
260  m_block_sizev[2] = m_block_size[2];
261  m_block_sizev[3] = m_block_size[3];
262  }
263 #pragma omp barrier
264 
265  if ((m_block_sizev[0] * VLENX != m_block_size[0]) ||
266  (m_block_sizev[1] * VLENY != m_block_size[1])) {
267  vout.crucial(m_vl, "%s: bad blocksize in XY: must be divided by"
268  " VLENX=%d, VLENY=%d but are %d, %d\n",
269  class_name.c_str(), VLENX, VLENY,
270  m_block_size[0], m_block_size[1]);
271  exit(EXIT_FAILURE);
272  }
273 
274  if (ith == 0) {
275  int NBx = m_Nx / m_block_size[0];
276  int NBy = m_Ny / m_block_size[1];
277  int NBz = m_Nz / m_block_size[2];
278  int NBt = m_Nt / m_block_size[3];
279  int ipex = Communicator::ipe(0);
280  int ipey = Communicator::ipe(1);
281  int ipez = Communicator::ipe(2);
282  int ipet = Communicator::ipe(3);
283  m_Ieo = (NBx * ipex + NBy * ipey + NBz * ipez + NBt * ipet) % 2;
284  }
285 #pragma omp barrier
286 }
287 
288 
289 //====================================================================
290 template<typename AFIELD>
292 {
293  params.set_double("hopping_parameter", double(m_CKs));
294  params.set_double("clover_coefficient", double(m_csw));
295  params.set_int_vector("boundary_condition", m_boundary);
296  params.set_int_vector("block_size", m_block_size);
297  params.set_string("gamma_matrix_type", m_repr);
298 
299  params.set_string("verbose_level", vout.get_verbose_level(m_vl));
300 }
301 
302 
303 //====================================================================
304 template<typename AFIELD>
306 {
307  int nth = ThreadManager::get_num_threads();
308 
309  vout.detailed(m_vl, "%s: set_config is called: num_threads = %d\n",
310  class_name.c_str(), nth);
311 
312  if (nth > 1) {
313  set_config_impl(u);
314  } else {
315  set_config_omp(u);
316  }
317 
318  vout.detailed(m_vl, "%s: set_config finished\n", class_name.c_str());
319 }
320 
321 
322 //====================================================================
323 template<typename AFIELD>
325 {
326  vout.detailed(m_vl, " set_config_omp is called.\n");
327 
328 #pragma omp parallel
329  {
330  set_config_impl(u);
331  }
332 }
333 
334 
335 //====================================================================
336 template<typename AFIELD>
338 {
339 #pragma omp barrier
340 
341  int ith = ThreadManager::get_thread_id();
342 
343  if (ith == 0) m_conf = u;
344 
345  m_fopr_csw->set_config(u);
346 
348 
349  convert_gauge(index, m_U, *u);
350 
351  QXS_Gauge::set_boundary(m_U, m_boundary);
352 
353  copy(m_Ublock, m_U);
354 #pragma omp barrier
355 
356  set_block_config(m_Ublock);
357 
358  m_fopr_csw->set_config(u);
359 
360 #ifdef CHIRAL_ROTATION
361  set_csw_chrot();
362 #else
363  set_csw();
364 #endif
365 }
366 
367 
368 //====================================================================
369 template<typename AFIELD>
371 {
372 #pragma omp barrier
373 
375 
376  int ith, nth, is, ns;
377  set_threadtask_mult(ith, nth, is, ns, m_Nst);
378 
379  for (int site = is; site < ns; ++site) {
380  int ix = site % m_Nx;
381  int iy = (site / m_Nx) % m_Ny;
382  int iz = (site / (m_Nx * m_Ny)) % m_Nz;
383  int it = site / (m_Nx * m_Ny * m_Nz);
384 
385  int mu = 0;
386  if ((ix + 1) % m_block_size[mu] == 0) {
387  for (int in = 0; in < NDF; ++in) {
388  int i = index.idx_G(in, site, mu);
389  U.set(i, real_t(0.0));
390  }
391  }
392 
393  mu = 1;
394  if ((iy + 1) % m_block_size[mu] == 0) {
395  for (int in = 0; in < NDF; ++in) {
396  int i = index.idx_G(in, site, mu);
397  U.set(i, real_t(0.0));
398  }
399  }
400 
401  mu = 2;
402  if ((iz + 1) % m_block_size[mu] == 0) {
403  for (int in = 0; in < NDF; ++in) {
404  int i = index.idx_G(in, site, mu);
405  U.set(i, real_t(0.0));
406  }
407  }
408 
409  mu = 3;
410  if ((it + 1) % m_block_size[mu] == 0) {
411  for (int in = 0; in < NDF; ++in) {
412  int i = index.idx_G(in, site, mu);
413  U.set(i, real_t(0.0));
414  }
415  }
416  }
417 
418 #pragma omp barrier
419 }
420 
421 
422 //====================================================================
423 template<typename AFIELD>
425 {
426  // The Dirac representation is assumed.
427  // Current implementation makes use of the corelib Fopr_CloverTerm
428  // that requires change of the gamma-matrix convention from
429  // Bridge++ to QXS (BQCD) by multiplying gamma_4 before and after
430  // m_fopr_csw->mult().
431  // For numerical efficiency, proper implementation is necessaty.
432  // [08 Mar 2021 H.Matsufuru]
433 
434 #pragma omp barrier
435 
437 
438  const int Nin = NDF * ND * 2;
439 
440  m_fopr_csw->set_mode("D");
441 
442  int ith, nth, is, ns;
443  set_threadtask_mult(ith, nth, is, ns, m_Nst);
444 
445  for (int id = 0; id < m_Nd / 2; ++id) {
446  for (int ic = 0; ic < m_Nc; ++ic) {
447  m_w1.set(0.0);
448 #pragma omp barrier
449 
450  for (int site = is; site < ns; ++site) {
451  m_w1.set_r(ic, id, site, 0, 1.0);
452  }
453 #pragma omp barrier
454 
455  m_fopr_csw->mult(m_w2, m_w1);
456 #pragma omp barrier
457 
458  for (int site = is; site < ns; ++site) {
459  for (int ic2 = 0; ic2 < m_Nc; ++ic2) {
460  real_t vt_r = m_w2.cmp_r(ic2, 0, site, 0);
461  real_t vt_i = m_w2.cmp_i(ic2, 0, site, 0);
462  int in = ic2 + NC * (ic + NC * (id + 0));
463  int idx_r = index.idx(2 * in, Nin, site, 0);
464  int idx_i = index.idx(2 * in + 1, Nin, site, 0);
465  m_T.set(idx_r, vt_r);
466  m_T.set(idx_i, vt_i);
467  }
468 
469  for (int ic2 = 0; ic2 < m_Nc; ++ic2) {
470  real_t vt_r = m_w2.cmp_r(ic2, 1, site, 0);
471  real_t vt_i = m_w2.cmp_i(ic2, 1, site, 0);
472  int in = ic2 + NC * (ic + NC * (id + 4));
473  int idx_r = index.idx(2 * in, Nin, site, 0);
474  int idx_i = index.idx(2 * in + 1, Nin, site, 0);
475  m_T.set(idx_r, vt_r);
476  m_T.set(idx_i, vt_i);
477  }
478 
479  for (int ic2 = 0; ic2 < m_Nc; ++ic2) {
480  real_t vt_r = -m_w2.cmp_r(ic2, 2, site, 0);
481  real_t vt_i = -m_w2.cmp_i(ic2, 2, site, 0);
482  int in = ic2 + NC * (ic + NC * (id + 2));
483  int idx_r = index.idx(2 * in, Nin, site, 0);
484  int idx_i = index.idx(2 * in + 1, Nin, site, 0);
485  m_T.set(idx_r, vt_r);
486  m_T.set(idx_i, vt_i);
487  }
488 
489  for (int ic2 = 0; ic2 < m_Nc; ++ic2) {
490  real_t vt_r = -m_w2.cmp_r(ic2, 3, site, 0);
491  real_t vt_i = -m_w2.cmp_i(ic2, 3, site, 0);
492  int in = ic2 + NC * (ic + NC * (id + 6));
493  int idx_r = index.idx(2 * in, Nin, site, 0);
494  int idx_i = index.idx(2 * in + 1, Nin, site, 0);
495  m_T.set(idx_r, vt_r);
496  m_T.set(idx_i, vt_i);
497  }
498  } // site loop
499 #pragma omp barrier
500  }
501  }
502 #pragma omp barrier
503 
504  real_t kappaR = 1.0 / m_CKs;
505  scal(m_T, kappaR);
506 
507 #pragma omp barrier
508 }
509 
510 
511 //====================================================================
512 template<typename AFIELD>
514 {
515  // This method set the clover term matrix assuming the chiral
516  // representation for gamma-matrix in Bridge++ convention.
517  // The conversion from the Bridge++ to QWS convention and
518  // from Dirac to chiral representations are assumed to be cared
519  // in BridgeQXS functions.
520  // [17 Jul 2021 H.Matsufuru]
521 
523 
524  const int Nin = NDF * ND * 2;
525 
526  m_fopr_csw->set_mode("D");
527 
528  int ith, nth, is, ns;
529  set_threadtask_mult(ith, nth, is, ns, m_Nst);
530 
531  //11 22 33 44 55 66 12 34 56 23 45 13 24 35 46 15 26 14 36 25 16
532  constexpr int idx[72] = {
533  0, -1, 6, 7, 16, 17, 28, 29, 24, 25, 34, 35,
534  -1, -1, 1, -1, 12, 13, 18, 19, 32, 33, 26, 27,
535  -1, -1, -1, -5, 2, -1, 8, 9, 20, 21, 30, 31,
536  -1, -1, -1, -1, -1, -1, 3, -1, 14, 15, 22, 23,
537  -1, -1, -1, -1, -1, -1, -1, -1, 4, -1, 10, 11,
538  -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 5, -1,
539  };
540 
541  for (int id = 0; id < m_Nd / 2; ++id) {
542  for (int ic = 0; ic < m_Nc; ++ic) {
543  m_w1.set(0.0);
544 #pragma omp barrier
545 
546  for (int site = is; site < ns; ++site) {
547  m_w1.set_r(ic, id, site, 0, 1.0);
548  m_w1.set_r(ic, id + 2, site, 0, 1.0);
549  }
550 #pragma omp barrier
551 
552  m_fopr_csw->mult(m_w2, m_w1);
553 #pragma omp barrier
554 
555  for (int site = is; site < ns; ++site) {
556  for (int id2 = 0; id2 < m_Nd; ++id2) {
557  for (int ic2 = 0; ic2 < m_Nc; ++ic2) {
558  real_t vt_r = 0.5 * m_w2.cmp_r(ic2, id2, site, 0);
559  real_t vt_i = 0.5 * m_w2.cmp_i(ic2, id2, site, 0);
560  int i = ic2 + m_Nc * (id2 % 2);
561  int j = ic + m_Nc * id;
562  int ij = m_Nc * 2 * i + j;
563  int in_r = idx[2 * ij];
564  int in_i = idx[2 * ij + 1];
565  if (in_r >= 0) {
566  in_r += 36 * (id2 / 2);
567  int idx_r = index.idx(in_r, Nin, site, 0);
568  m_T.set(idx_r, vt_r);
569  }
570  if (in_i >= 0) {
571  in_i += 36 * (id2 / 2);
572  int idx_i = index.idx(in_i, Nin, site, 0);
573  m_T.set(idx_i, vt_i);
574  }
575  }
576  }
577  } // site loop
578 #pragma omp barrier
579  }
580  }
581 #pragma omp barrier
582 
583  real_t kappaR = 1.0 / m_CKs;
584  scal(m_T, kappaR);
585 
586 #pragma omp barrier
587 }
588 
589 
590 //====================================================================
591 template<typename AFIELD>
593 {
595  convert_spinor(index_lex, m_v2, w);
596 
597  mult_gm4(v, m_v2);
598 }
599 
600 
601 //====================================================================
602 template<typename AFIELD>
604 {
605  mult_gm4(m_v2, w);
606 
608  reverse_spinor(index_lex, v, m_v2);
609 }
610 
611 
612 //====================================================================
613 template<typename AFIELD>
615 {
616  real_t *vp = v.ptr(0);
617  real_t *wp = const_cast<AFIELD *>(&w)->ptr(0);
618 
619  if (mu == 0) {
620  mult_xp(vp, wp, 0);
621  } else if (mu == 1) {
622  mult_yp(vp, wp, 0);
623  } else if (mu == 2) {
624  mult_zp(vp, wp, 0);
625  } else if (mu == 3) {
626  mult_tp(vp, wp, 0);
627  } else {
628  vout.crucial(m_vl, "%s: mult_up for %d direction is undefined.",
629  class_name.c_str(), mu);
630  exit(EXIT_FAILURE);
631  }
632 }
633 
634 
635 //====================================================================
636 template<typename AFIELD>
638 {
639  real_t *vp = v.ptr(0);
640  real_t *wp = const_cast<AFIELD *>(&w)->ptr(0);
641 
642  if (mu == 0) {
643  mult_xm(vp, wp, 0);
644  } else if (mu == 1) {
645  mult_ym(vp, wp, 0);
646  } else if (mu == 2) {
647  mult_zm(vp, wp, 0);
648  } else if (mu == 3) {
649  mult_tm(vp, wp, 0);
650  } else {
651  vout.crucial(m_vl, "%s: mult_dn for %d direction is undefined.",
652  class_name.c_str(), mu);
653  exit(EXIT_FAILURE);
654  }
655 }
656 
657 
658 //====================================================================
659 template<typename AFIELD>
660 void AFopr_Clover_dd<AFIELD>::set_mode(std::string mode)
661 {
662 #pragma omp barrier
663 
664  int ith = ThreadManager::get_thread_id();
665  if (ith == 0) m_mode = mode;
666 
667 #pragma omp barrier
668 }
669 
670 
671 //====================================================================
672 template<typename AFIELD>
674 {
675  return m_mode;
676 }
677 
678 
679 //====================================================================
680 template<typename AFIELD>
682 {
683  if (m_mode == "D") {
684  return D(v, w);
685  } else if (m_mode == "DdagD") {
686  return DdagD(v, w);
687  } else if (m_mode == "Ddag") {
688  return Ddag(v, w);
689  } else if (m_mode == "H") {
690  return H(v, w);
691  } else {
692  printf("mode=%s\n", m_mode.c_str());
693  fflush(stdout);
694  vout.crucial(m_vl, "%s: mode undefined.\n", class_name.c_str());
695  fflush(stdout);
696  exit(EXIT_FAILURE);
697  }
698 }
699 
700 
701 //====================================================================
702 template<typename AFIELD>
704 {
705  if (m_mode == "D") {
706  return Ddag(v, w);
707  } else if (m_mode == "DdagD") {
708  return DdagD(v, w);
709  } else if (m_mode == "Ddag") {
710  return D(v, w);
711  } else if (m_mode == "H") {
712  return H(v, w);
713  } else {
714  vout.crucial(m_vl, "%s: mode undefined.\n", class_name.c_str());
715  abort();
716  exit(EXIT_FAILURE);
717  }
718 }
719 
720 
721 //====================================================================
722 template<typename AFIELD>
724 {
725  mult_D(v, w);
726  //mult_D_alt(v, w);
727 }
728 
729 
730 //====================================================================
731 template<typename AFIELD>
733 {
734  D(m_v2, w);
735  mult_gm5(v, m_v2);
736  D(m_v2, v);
737  mult_gm5(v, m_v2);
738 }
739 
740 
741 //====================================================================
742 template<typename AFIELD>
744 {
745  mult_gm5(v, w);
746  D(m_v2, v);
747  mult_gm5(v, m_v2);
748 }
749 
750 
751 //====================================================================
752 template<typename AFIELD>
754  const int ch)
755 {
756  real_t *vp = v.ptr(0);
757  real_t *wp = const_cast<AFIELD *>(&w)->ptr(0);
758 
759  mult_gm5(vp, wp);
760  if (ch == 1) {
761  aypx(real_t(1.0), vp, wp);
762  } else {
763  aypx(real_t(-1.0), vp, wp);
764  }
765 }
766 
767 
768 //====================================================================
769 template<typename AFIELD>
771 {
772  real_t *vp = v.ptr(0);
773  real_t *wp = const_cast<AFIELD *>(&w)->ptr(0);
774 
775 #pragma omp barrier
776 
777  mult_gm5(vp, wp);
778 }
779 
780 
781 //====================================================================
782 template<typename AFIELD>
784 { // Dirac representation.
785 #ifdef USE_QXS_ACLE
786  BridgeQXS::mult_wilson_gm5_dirac(v, w, m_Nsize);
787 #else
788  int ith, nth, is, ns;
789  set_threadtask_mult(ith, nth, is, ns, m_Nstv);
790  Vsimd_t wt[2];
791 
792  for (int site = is; site < ns; ++site) {
793  for (int ic = 0; ic < NC; ++ic) {
794  for (int id = 0; id < ND; ++id) {
795  int idx1 = 2 * (id + ND * ic) + NVCD * site;
796  load_vec(wt, &w[VLEN * idx1], 2);
797  // sign flip is needed for unitray equivalent gamma5
798  scal_vec(wt, (real_t)(-1.0), 2);
799  int id2 = (id + 2) % ND;
800  int idx2 = 2 * (id2 + ND * ic) + NVCD * site;
801  save_vec(&v[VLEN * idx2], wt, 2);
802  }
803  }
804  }
805 #endif
806 
807 #pragma omp barrier
808 }
809 
810 
811 //====================================================================
812 template<typename AFIELD>
814 {
815  int ith, nth, is, ns;
816  set_threadtask_mult(ith, nth, is, ns, m_Nstv);
817 
818  for (int site = is; site < ns; ++site) {
819  Vsimd_t vt[NVCD];
820  load_vec(vt, &vp[VLEN * NVCD * site], NVCD);
821  scal_vec(vt, a, NVCD);
822  save_vec(&vp[VLEN * NVCD * site], vt, NVCD);
823  }
824 }
825 
826 
827 //====================================================================
828 template<typename AFIELD>
830 {
831  real_t *vp = v.ptr(0);
832  real_t *wp = const_cast<AFIELD *>(&w)->ptr(0);
833 
834  int ith, nth, is, ns;
835  set_threadtask_mult(ith, nth, is, ns, m_Nstv);
836 
837  Vsimd_t wt[2];
838 
839  for (int site = is; site < ns; ++site) {
840  for (int ic = 0; ic < NC; ++ic) {
841  for (int id = 0; id < ND2; ++id) {
842  int idx1 = 2 * (id + ND * ic) + NVCD * site;
843  load_vec(wt, &wp[VLEN * idx1], 2);
844  save_vec(&vp[VLEN * idx1], wt, 2);
845  }
846 
847  for (int id = ND2; id < ND; ++id) {
848  int idx1 = 2 * (id + ND * ic) + NVCD * site;
849  load_vec(wt, &wp[VLEN * idx1], 2);
850  scal_vec(wt, real_t(-1.0), 2);
851  save_vec(&vp[VLEN * idx1], wt, 2);
852  }
853  }
854  }
855 
856 #pragma omp barrier
857 }
858 
859 
860 //====================================================================
861 template<typename AFIELD>
863 { // Dirac representation is assumed.
864  real_t *u = m_T.ptr(0);
865 
866  int ith, nth, is, ns;
867  set_threadtask(ith, nth, is, ns, m_Nstv);
868 
869 #pragma omp barrier
870 
871  for (int site = is; site < ns; ++site) {
872  Vsimd_t v2v[NVCD];
873  load_vec(v2v, &v2[VLEN * NVCD * site], NVCD);
874 
875  Vsimd_t v1v[NVCD];
876  load_vec(v1v, &v1[VLEN * NVCD * site], NVCD);
877 
878  Vsimd_t ut[NDF], wt1[2], wt2[2];
879 
880  for (int jd = 0; jd < ND2; ++jd) {
881  for (int id = 0; id < ND; ++id) {
882  int ig = VLEN * NDF * (site + m_Nstv * (id + ND * jd));
883  load_vec(ut, &u[ig], NDF);
884  for (int ic = 0; ic < NC; ++ic) {
885  int ic2 = 2 * ic;
886  int id2 = (id + ND2) % ND;
887  mult_ctv(wt1, &ut[ic2], &v1v[2 * id], NC);
888  mult_ctv(wt2, &ut[ic2], &v1v[2 * id2], NC);
889  int icd1 = 2 * (jd + ND * ic);
890  int icd2 = 2 * (jd + ND2 + ND * ic);
891  axpy_vec(&v2v[icd1], real_t(1.0), wt1, 2);
892  axpy_vec(&v2v[icd2], real_t(1.0), wt2, 2);
893  }
894  }
895  }
896 
897  save_vec(&v2[VLEN * NVCD * site], v2v, NVCD);
898  }
899 
900 #pragma omp barrier
901 }
902 
903 
904 //====================================================================
905 template<typename AFIELD>
907 {
908  real_t *v2 = v.ptr(0);
909  real_t *v1 = const_cast<AFIELD *>(&w)->ptr(0);
910  real_t *up = m_U.ptr(0);
911  real_t *ct = m_T.ptr(0);
912 
913  int ith = ThreadManager::get_thread_id();
914 
915 #pragma omp barrier
916 
917  if (do_comm_any > 0) {
918  if (ith == 0) chset_recv.start();
919 
920  real_t *buf1_xp = (real_t *)chsend_dn[0].ptr();
921  real_t *buf1_xm = (real_t *)chsend_up[0].ptr();
922  real_t *buf1_yp = (real_t *)chsend_dn[1].ptr();
923  real_t *buf1_ym = (real_t *)chsend_up[1].ptr();
924  real_t *buf1_zp = (real_t *)chsend_dn[2].ptr();
925  real_t *buf1_zm = (real_t *)chsend_up[2].ptr();
926  real_t *buf1_tp = (real_t *)chsend_dn[3].ptr();
927  real_t *buf1_tm = (real_t *)chsend_up[3].ptr();
928 
929  BridgeQXS::mult_wilson_1_dirac(buf1_xp, buf1_xm, buf1_yp, buf1_ym,
930  buf1_zp, buf1_zm, buf1_tp, buf1_tm,
931  up, v1, &m_boundary[0], m_Nsize, do_comm);
932 
933 #pragma omp barrier
934 
935  if (ith == 0) chset_send.start();
936  }
937 
938  v.set(real_t(0.0));
939 #pragma omp barrier
940 
941 #ifdef CHIRAL_ROTATION
943  m_CKs, &m_boundary[0], m_Nsize, do_comm);
944 #else
945  BridgeQXS::mult_clover_bulk_dirac(v2, up, ct, v1,
946  m_CKs, &m_boundary[0], m_Nsize, do_comm);
947 #endif
948 
949  if (do_comm_any > 0) {
950  if (ith == 0) chset_recv.wait();
951 
952 #pragma omp barrier
953 
954  real_t *buf2_xp = (real_t *)chrecv_up[0].ptr();
955  real_t *buf2_xm = (real_t *)chrecv_dn[0].ptr();
956  real_t *buf2_yp = (real_t *)chrecv_up[1].ptr();
957  real_t *buf2_ym = (real_t *)chrecv_dn[1].ptr();
958  real_t *buf2_zp = (real_t *)chrecv_up[2].ptr();
959  real_t *buf2_zm = (real_t *)chrecv_dn[2].ptr();
960  real_t *buf2_tp = (real_t *)chrecv_up[3].ptr();
961  real_t *buf2_tm = (real_t *)chrecv_dn[3].ptr();
962 
964  buf2_xp, buf2_xm, buf2_yp, buf2_ym,
965  buf2_zp, buf2_zm, buf2_tp, buf2_tm,
966  m_CKs, &m_boundary[0], m_Nsize, do_comm);
967 
968  if (ith == 0) chset_send.wait();
969  }
970 
971 #pragma omp barrier
972 }
973 
974 
975 //====================================================================
976 template<typename AFIELD>
978  const int ieo)
979 {
980  real_t *v2 = v.ptr(0);
981  real_t *v1 = const_cast<AFIELD *>(&w)->ptr(0);
982  real_t *up = m_Ublock.ptr(0);
983  real_t *ct = m_T.ptr(0);
984 
985  int jeo = (ieo + m_Ieo) % 2;
986 
987 #pragma omp barrier
988 
989 
990 #ifdef CHIRAL_ROTATION
992  m_CKs, &m_boundary[0],
993  m_Nsize, m_block_sizev, jeo);
994 #else
995  BridgeQXS::mult_clover_dd_dirac(v2, up, ct, v1,
996  m_CKs, &m_boundary[0],
997  m_Nsize, m_block_sizev, jeo);
998 #endif
999 
1000 
1001 
1002 #pragma omp barrier
1003 }
1004 
1005 
1006 //====================================================================
1007 template<typename AFIELD>
1009 {
1010  real_t *v2 = v.ptr(0);
1011  real_t *v1 = const_cast<AFIELD *>(&w)->ptr(0);
1012  real_t *up = m_Ublock.ptr(0);
1013  real_t *ct = m_T.ptr(0);
1014 
1015 #pragma omp barrier
1016 
1017 #ifdef CHIRAL_ROTATION
1019  v2, up, ct, v1,
1020  m_CKs, &m_boundary[0],
1021  m_Nsize, m_block_sizev, -1);
1022 #else
1023  BridgeQXS::mult_clover_dd_dirac(v2, up, ct, v1,
1024  m_CKs, &m_boundary[0],
1025  m_Nsize, m_block_sizev, -1);
1026 #endif
1027 
1028  // vector varialbes at the boundary of the domain are not treated here...
1029  // BridgeQXS::mult_clover_bulk_dirac(v2, up, ct, v1,
1030  // m_CKs, &m_boundary[0], m_Nsize, do_comm);
1031 
1032 #pragma omp barrier
1033 }
1034 
1035 
1036 //====================================================================
1037 template<typename AFIELD>
1039 {
1040  real_t *vp = v.ptr(0);
1041  real_t *wp = const_cast<AFIELD *>(&w)->ptr(0);
1042 
1043  clear(vp);
1044 
1045  mult_xp(vp, wp, 0);
1046  mult_xm(vp, wp, 0);
1047  mult_yp(vp, wp, 0);
1048  mult_ym(vp, wp, 0);
1049  mult_zp(vp, wp, 0);
1050  mult_zm(vp, wp, 0);
1051  mult_tp(vp, wp, 0);
1052  mult_tm(vp, wp, 0);
1053 
1054  mult_csw(vp, wp);
1055 
1056  aypx(-m_CKs, vp, wp);
1057 
1058 #pragma omp barrier
1059 }
1060 
1061 
1062 //====================================================================
1063 template<typename AFIELD>
1065  const int mu)
1066 {
1067  real_t *vp = v.ptr(0);
1068  real_t *wp = const_cast<AFIELD *>(&w)->ptr(0);
1069 
1070 #pragma omp barrier
1071 
1072  clear(vp);
1073 
1074  if (mu == 0) {
1075  mult_xp(vp, wp, 1);
1076  scal_local(vp, real_t(-1.0));
1077  mult_xp(vp, wp, 0);
1078  } else if (mu == 1) {
1079  mult_yp(vp, wp, 1);
1080  scal_local(vp, real_t(-1.0));
1081  mult_yp(vp, wp, 0);
1082  } else if (mu == 2) {
1083  mult_zp(vp, wp, 1);
1084  scal_local(vp, real_t(-1.0));
1085  mult_zp(vp, wp, 0);
1086  } else if (mu == 3) {
1087  mult_tp(vp, wp, 1);
1088  scal_local(vp, real_t(-1.0));
1089  mult_tp(vp, wp, 0);
1090  }
1091 
1092  scal_local(vp, -m_CKs);
1093 
1094 #pragma omp barrier
1095 }
1096 
1097 
1098 //====================================================================
1099 template<typename AFIELD>
1101  const int mu)
1102 {
1103  real_t *vp = v.ptr(0);
1104  real_t *wp = const_cast<AFIELD *>(&w)->ptr(0);
1105 
1106 #pragma omp barrier
1107 
1108  clear(vp);
1109 
1110  if (mu == 0) {
1111  mult_xm(vp, wp, 1);
1112  scal_local(vp, real_t(-1.0));
1113  mult_xm(vp, wp, 0);
1114  } else if (mu == 1) {
1115  mult_ym(vp, wp, 1);
1116  scal_local(vp, real_t(-1.0));
1117  mult_ym(vp, wp, 0);
1118  } else if (mu == 2) {
1119  mult_zm(vp, wp, 1);
1120  scal_local(vp, real_t(-1.0));
1121  mult_zm(vp, wp, 0);
1122  } else if (mu == 3) {
1123  mult_tm(vp, wp, 1);
1124  scal_local(vp, real_t(-1.0));
1125  mult_tm(vp, wp, 0);
1126  }
1127 
1128  scal_local(vp, -m_CKs);
1129 
1130 #pragma omp barrier
1131 }
1132 
1133 
1134 //====================================================================
1135 
1136 /*
1137 template<typename AFIELD>
1138 void AFopr_Clover_dd<AFIELD>::mult_block_hop(AFIELD &v, const AFIELD &w,
1139  int mu)
1140 {
1141  real_t *vp = v.ptr(0);
1142  real_t *wp = const_cast<AFIELD*>(&w)->ptr(0);
1143 
1144 #pragma omp barrier
1145 
1146  clear(vp);
1147 
1148  if(mu == 0){
1149  mult_xp(vp, wp, 1);
1150  scal_local(vp, real_t(-1.0));
1151  mult_xp(vp, wp, 0);
1152  }else if(mu == 1){
1153  mult_yp(vp, wp, 1);
1154  scal_local(vp, real_t(-1.0));
1155  mult_yp(vp, wp, 0);
1156  }else if(mu == 2){
1157  mult_zp(vp, wp, 1);
1158  scal_local(vp, real_t(-1.0));
1159  mult_zp(vp, wp, 0);
1160  }else if(mu == 3){
1161  mult_tp(vp, wp, 1);
1162  scal_local(vp, real_t(-1.0));
1163  mult_tp(vp, wp, 0);
1164  }
1165 
1166  scal_local(vp, -m_CKs);
1167 
1168 #pragma omp barrier
1169 
1170 }
1171 */
1172 
1173 //====================================================================
1174 template<typename AFIELD>
1176 {
1177  D(m_v2, w);
1178  mult_gm5(v, m_v2);
1179 }
1180 
1181 
1182 //====================================================================
1183 template<typename AFIELD>
1185 {
1186  int ith, nth, is, ns;
1187  set_threadtask_mult(ith, nth, is, ns, m_Nstv);
1188 
1189  Vsimd_t vt[NVCD], wt[NVCD];
1190 
1191  for (int site = is; site < ns; ++site) {
1192  load_vec(vt, &v[VLEN * NVCD * site], NVCD);
1193  load_vec(wt, &w[VLEN * NVCD * site], NVCD);
1194  aypx_vec(a, vt, wt, NVCD);
1195  save_vec(&v[VLEN * NVCD * site], vt, NVCD);
1196  }
1197 }
1198 
1199 
1200 //====================================================================
1201 template<typename AFIELD>
1203 {
1204  int ith, nth, is, ns;
1205  set_threadtask_mult(ith, nth, is, ns, m_Nstv);
1206 
1207  Vsimd_t vt[NVCD];
1208  clear_vec(vt, NVCD);
1209 
1210  for (int site = is; site < ns; ++site) {
1211  save_vec(&v[VLEN * NVCD * site], vt, NVCD);
1212  }
1213 }
1214 
1215 
1216 //====================================================================
1217 template<typename AFIELD>
1219 {
1220  int idir = 0;
1221 
1222  int ith, nth, is, ns;
1223  set_threadtask_mult(ith, nth, is, ns, m_Nstv);
1224 
1225  Vsimd_t v2v[NVCD];
1226 
1227  real_t *buf1 = (real_t *)chsend_dn[0].ptr();
1228  real_t *buf2 = (real_t *)chrecv_up[0].ptr();
1229 
1230  real_t *u = m_U.ptr(m_Ndf * m_Nst * idir);
1231  if (isap != 0) u = m_Ublock.ptr(m_Ndf * m_Nst * idir);
1232 
1233 #pragma omp barrier
1234 
1235  if (do_comm[0] > 0) {
1236  for (int site = is; site < ns; ++site) {
1237  int ix = site % m_Nxv;
1238  int iyzt = site / m_Nxv;
1239  if (ix == 0) {
1240  int ibf = VLENY * NVC * ND2 * iyzt;
1241  mult_wilson_xp1(&buf1[ibf], &v1[VLEN * NVCD * site]);
1242  }
1243  }
1244 
1245 #pragma omp barrier
1246 
1247 #pragma omp master
1248  {
1249  chrecv_up[0].start();
1250  chsend_dn[0].start();
1251  chrecv_up[0].wait();
1252  chsend_dn[0].wait();
1253  }
1254 #pragma omp barrier
1255  } // if(do_comm[0] == 1)
1256 
1257  for (int site = is; site < ns; ++site) {
1258  int ix = site % m_Nxv;
1259  int iyzt = site / m_Nxv;
1260 
1261  Vsimd_t v2v[NVCD];
1262  clear_vec(v2v, NVCD);
1263 
1264  real_t zL[VLEN * NVCD];
1265 
1266  if ((ix < m_Nxv - 1) || (do_comm[0] == 0)) {
1267  int nei = ix + 1 + m_Nxv * iyzt;
1268  if (ix == m_Nxv - 1) nei = 0 + m_Nxv * iyzt;
1269  shift_vec2_xbw(zL, &v1[VLEN * NVCD * site], &v1[VLEN * NVCD * nei], NVCD);
1270  mult_wilson_xpb(v2v, &u[VLEN * NDF * site], zL);
1271  } else {
1272  int ibf = VLENY * NVC * ND2 * iyzt;
1273  shift_vec0_xbw(zL, &v1[VLEN * NVCD * site], NVCD);
1274  mult_wilson_xpb(v2v, &u[VLEN * NDF * site], zL);
1275  mult_wilson_xp2(v2v, &u[VLEN * NDF * site], &buf2[ibf]);
1276  }
1277 
1278  add_vec(&v2[VLEN * NVCD * site], v2v, NVCD);
1279  }
1280 
1281 #pragma omp barrier
1282 }
1283 
1284 
1285 //====================================================================
1286 template<typename AFIELD>
1288 {
1289  int idir = 0;
1290 
1291  int ith, nth, is, ns;
1292  set_threadtask_mult(ith, nth, is, ns, m_Nstv);
1293 
1294  Vsimd_t v2v[NVCD];
1295 
1296  real_t *buf1 = (real_t *)chsend_up[0].ptr();
1297  real_t *buf2 = (real_t *)chrecv_dn[0].ptr();
1298 
1299  real_t *u = m_U.ptr(m_Ndf * m_Nst * idir);
1300  if (isap != 0) u = m_Ublock.ptr(m_Ndf * m_Nst * idir);
1301 
1302 #pragma omp barrier
1303 
1304  if (do_comm[0] > 0) {
1305  for (int site = is; site < ns; ++site) {
1306  int ix = site % m_Nxv;
1307  int iyzt = site / m_Nxv;
1308  if (ix == m_Nxv - 1) {
1309  int ibf = VLENY * NVC * ND2 * iyzt;
1310  mult_wilson_xm1(&buf1[ibf], &u[VLEN * NDF * site],
1311  &v1[VLEN * NVCD * site]);
1312  }
1313  }
1314 
1315 #pragma omp barrier
1316 #pragma omp master
1317  {
1318  chrecv_dn[0].start();
1319  chsend_up[0].start();
1320  chrecv_dn[0].wait();
1321  chsend_up[0].wait();
1322  }
1323 #pragma omp barrier
1324  } // end of if(do_comm[0] > 0)
1325 
1326  for (int site = is; site < ns; ++site) {
1327  int ix = site % m_Nxv;
1328  int iyzt = site / m_Nxv;
1329 
1330  real_t zL[VLEN * NVCD];
1331  real_t uL[VLEN * NDF];
1332 
1333  clear_vec(v2v, NVCD);
1334 
1335  if ((ix > 0) || (do_comm[0] == 0)) {
1336  int nei = ix - 1 + m_Nxv * iyzt;
1337  if (ix == 0) nei = m_Nxv - 1 + m_Nxv * iyzt;
1338  shift_vec2_xfw(zL, &v1[VLEN * NVCD * site], &v1[VLEN * NVCD * nei], NVCD);
1339  shift_vec2_xfw(uL, &u[VLEN * NDF * site], &u[VLEN * NDF * nei], NDF);
1340  mult_wilson_xmb(v2v, uL, zL);
1341  } else {
1342  int ibf = VLENY * NVC * ND2 * iyzt;
1343  shift_vec0_xfw(zL, &v1[VLEN * NVCD * site], NVCD);
1344  shift_vec0_xfw(uL, &u[VLEN * NDF * site], NDF);
1345  mult_wilson_xmb(v2v, uL, zL);
1346  mult_wilson_xm2(v2v, &buf2[ibf]);
1347  }
1348 
1349  add_vec(&v2[VLEN * NVCD * site], v2v, NVCD);
1350  }
1351 
1352 #pragma omp barrier
1353 }
1354 
1355 
1356 //====================================================================
1357 template<typename AFIELD>
1359 {
1360  int idir = 1;
1361  int Nxy = m_Nxv * m_Nyv;
1362 
1363  int ith, nth, is, ns;
1364  set_threadtask_mult(ith, nth, is, ns, m_Nstv);
1365 
1366  real_t *buf1 = (real_t *)chsend_dn[1].ptr();
1367  real_t *buf2 = (real_t *)chrecv_up[1].ptr();
1368 
1369  real_t *u = m_U.ptr(m_Ndf * m_Nst * idir);
1370  if (isap != 0) u = m_Ublock.ptr(m_Ndf * m_Nst * idir);
1371 
1372 #pragma omp barrier
1373 
1374  if (do_comm[1] > 0) {
1375  for (int site = is; site < ns; ++site) {
1376  int ix = site % m_Nxv;
1377  int iy = (site / m_Nxv) % m_Nyv;
1378  int izt = site / Nxy;
1379  if (iy == 0) {
1380  int ibf = VLENX * NVC * ND2 * (ix + m_Nxv * izt);
1381  mult_wilson_yp1(&buf1[ibf], &v1[VLEN * NVCD * site]);
1382  }
1383  }
1384 
1385 #pragma omp barrier
1386 
1387 #pragma omp master
1388  {
1389  chrecv_up[1].start();
1390  chsend_dn[1].start();
1391  chrecv_up[1].wait();
1392  chsend_dn[1].wait();
1393  }
1394 
1395 #pragma omp barrier
1396  } // end of if(do_comm[1] > 0)
1397 
1398  for (int site = is; site < ns; ++site) {
1399  int ix = site % m_Nxv;
1400  int iy = (site / m_Nxv) % m_Nyv;
1401  int izt = site / Nxy;
1402 
1403  Vsimd_t v2v[NVCD];
1404  clear_vec(v2v, NVCD);
1405 
1406  real_t zL[VLEN * NVCD];
1407 
1408  if ((iy < m_Nyv - 1) || (do_comm[1] == 0)) {
1409  int iy2 = (iy + 1) % m_Nyv;
1410  int nei = ix + m_Nxv * (iy2 + m_Nyv * izt);
1411  shift_vec2_ybw(zL, &v1[VLEN * NVCD * site], &v1[VLEN * NVCD * nei], NVCD);
1412  mult_wilson_ypb(v2v, &u[VLEN * NDF * site], zL);
1413  } else {
1414  int ibf = VLENX * NVC * ND2 * (ix + m_Nxv * izt);
1415  shift_vec0_ybw(zL, &v1[VLEN * NVCD * site], NVCD);
1416  mult_wilson_ypb(v2v, &u[VLEN * NDF * site], zL);
1417  mult_wilson_yp2(v2v, &u[VLEN * NDF * site], &buf2[ibf]);
1418  }
1419 
1420  add_vec(&v2[VLEN * NVCD * site], v2v, NVCD);
1421  }
1422 
1423 #pragma omp barrier
1424 }
1425 
1426 
1427 //====================================================================
1428 template<typename AFIELD>
1430 {
1431  int idir = 1;
1432  int Nxy = m_Nxv * m_Nyv;
1433 
1434  int ith, nth, is, ns;
1435  set_threadtask_mult(ith, nth, is, ns, m_Nstv);
1436 
1437  real_t *buf1 = (real_t *)chsend_up[1].ptr();
1438  real_t *buf2 = (real_t *)chrecv_dn[1].ptr();
1439 
1440  real_t *u = m_U.ptr(m_Ndf * m_Nst * idir);
1441  if (isap != 0) u = m_Ublock.ptr(m_Ndf * m_Nst * idir);
1442 
1443 #pragma omp barrier
1444 
1445  if (do_comm[1] > 0) {
1446  for (int site = is; site < ns; ++site) {
1447  int ix = site % m_Nxv;
1448  int iy = (site / m_Nxv) % m_Nyv;
1449  int izt = site / Nxy;
1450  if (iy == m_Nyv - 1) {
1451  int ibf = VLENX * NVC * ND2 * (ix + m_Nxv * izt);
1452  mult_wilson_ym1(&buf1[ibf], &u[VLEN * NDF * site],
1453  &v1[VLEN * NVCD * site]);
1454  }
1455  }
1456 
1457 #pragma omp barrier
1458 
1459 #pragma omp master
1460  {
1461  chrecv_dn[1].start();
1462  chsend_up[1].start();
1463  chrecv_dn[1].wait();
1464  chsend_up[1].wait();
1465  }
1466 
1467 #pragma omp barrier
1468  }
1469 
1470  for (int site = is; site < ns; ++site) {
1471  int ix = site % m_Nxv;
1472  int iy = (site / m_Nxv) % m_Nyv;
1473  int izt = site / Nxy;
1474 
1475  Vsimd_t v2v[NVCD];
1476  clear_vec(v2v, NVCD);
1477 
1478  real_t zL[VLEN * NVCD];
1479  real_t uL[VLEN * NDF];
1480 
1481  if ((iy != 0) || (do_comm[idir] == 0)) {
1482  int iy2 = (iy - 1 + m_Nyv) % m_Nyv;
1483  int nei = ix + m_Nxv * (iy2 + m_Nyv * izt);
1484  shift_vec2_yfw(zL, &v1[VLEN * NVCD * site], &v1[VLEN * NVCD * nei], NVCD);
1485  shift_vec2_yfw(uL, &u[VLEN * NDF * site], &u[VLEN * NDF * nei], NDF);
1486  mult_wilson_ymb(v2v, uL, zL);
1487  } else {
1488  int ibf = VLENX * NVC * ND2 * (ix + m_Nxv * izt);
1489  shift_vec0_yfw(zL, &v1[VLEN * NVCD * site], NVCD);
1490  shift_vec0_yfw(uL, &u[VLEN * NDF * site], NDF);
1491  mult_wilson_ymb(v2v, uL, zL);
1492  mult_wilson_ym2(v2v, &buf2[ibf]);
1493  }
1494 
1495  add_vec(&v2[VLEN * NVCD * site], v2v, NVCD);
1496  }
1497 
1498 #pragma omp barrier
1499 }
1500 
1501 
1502 //====================================================================
1503 template<typename AFIELD>
1505 {
1506  int idir = 2;
1507  int Nxy = m_Nxv * m_Nyv;
1508 
1509  int ith, nth, is, ns;
1510  set_threadtask_mult(ith, nth, is, ns, m_Nstv);
1511 
1512  real_t *buf1 = (real_t *)chsend_dn[2].ptr();
1513  real_t *buf2 = (real_t *)chrecv_up[2].ptr();
1514 
1515  real_t *u = m_U.ptr(m_Ndf * m_Nst * idir);
1516  if (isap != 0) u = m_Ublock.ptr(m_Ndf * m_Nst * idir);
1517 
1518 #pragma omp barrier
1519 
1520  if (do_comm[2] > 0) {
1521  for (int site = is; site < ns; ++site) {
1522  int ixy = site % Nxy;
1523  int iz = (site / Nxy) % m_Nz;
1524  int it = site / (Nxy * m_Nz);
1525  if (iz == 0) {
1526  int ibf = VLEN * NVC * ND2 * (ixy + Nxy * it);
1527  mult_wilson_zp1(&buf1[ibf], &v1[VLEN * NVCD * site]);
1528  }
1529  }
1530 
1531 #pragma omp barrier
1532 
1533 #pragma omp master
1534  {
1535  chrecv_up[2].start();
1536  chsend_dn[2].start();
1537  chrecv_up[2].wait();
1538  chsend_dn[2].wait();
1539  }
1540 
1541 #pragma omp barrier
1542  }
1543 
1544  for (int site = is; site < ns; ++site) {
1545  int ixy = site % Nxy;
1546  int iz = (site / Nxy) % m_Nz;
1547  int it = site / (Nxy * m_Nz);
1548 
1549  Vsimd_t v2v[NVCD];
1550  clear_vec(v2v, NVCD);
1551 
1552  if ((iz != m_Nz - 1) || (do_comm[2] == 0)) {
1553  int iz2 = (iz + 1) % m_Nz;
1554  int nei = ixy + Nxy * (iz2 + m_Nz * it);
1555  mult_wilson_zpb(v2v, &u[VLEN * NDF * site], &v1[VLEN * NVCD * nei]);
1556  } else {
1557  int ibf = VLEN * NVC * ND2 * (ixy + Nxy * it);
1558  mult_wilson_zp2(v2v, &u[VLEN * NDF * site], &buf2[ibf]);
1559  }
1560 
1561  add_vec(&v2[VLEN * NVCD * site], v2v, NVCD);
1562  }
1563 
1564 #pragma omp barrier
1565 }
1566 
1567 
1568 //====================================================================
1569 template<typename AFIELD>
1571 {
1572  int idir = 2;
1573  int Nxy = m_Nxv * m_Nyv;
1574 
1575  int ith, nth, is, ns;
1576  set_threadtask_mult(ith, nth, is, ns, m_Nstv);
1577 
1578  real_t *buf1 = (real_t *)chsend_up[2].ptr();
1579  real_t *buf2 = (real_t *)chrecv_dn[2].ptr();
1580 
1581  real_t *u = m_U.ptr(m_Ndf * m_Nst * idir);
1582  if (isap != 0) u = m_Ublock.ptr(m_Ndf * m_Nst * idir);
1583 
1584 #pragma omp barrier
1585 
1586  if (do_comm[2] > 0) {
1587  for (int site = is; site < ns; ++site) {
1588  int ixy = site % Nxy;
1589  int iz = (site / Nxy) % m_Nz;
1590  int it = site / (Nxy * m_Nz);
1591  if (iz == m_Nz - 1) {
1592  int ibf = VLEN * NVC * ND2 * (ixy + Nxy * it);
1593  mult_wilson_zm1(&buf1[ibf], &u[VLEN * NDF * site],
1594  &v1[VLEN * NVCD * site]);
1595  }
1596  }
1597 
1598 #pragma omp barrier
1599 
1600 #pragma omp master
1601  {
1602  chrecv_dn[2].start();
1603  chsend_up[2].start();
1604  chrecv_dn[2].wait();
1605  chsend_up[2].wait();
1606  }
1607 
1608 #pragma omp barrier
1609  }
1610 
1611  for (int site = is; site < ns; ++site) {
1612  int ixy = site % Nxy;
1613  int iz = (site / Nxy) % m_Nz;
1614  int it = site / (Nxy * m_Nz);
1615 
1616  Vsimd_t v2v[NVCD];
1617  clear_vec(v2v, NVCD);
1618 
1619  if ((iz > 0) || (do_comm[2] == 0)) {
1620  int iz2 = (iz - 1 + m_Nz) % m_Nz;
1621  int nei = ixy + Nxy * (iz2 + m_Nz * it);
1622  mult_wilson_zmb(v2v, &u[VLEN * NDF * nei], &v1[VLEN * NVCD * nei]);
1623  } else {
1624  int ibf = VLEN * NVC * ND2 * (ixy + Nxy * it);
1625  mult_wilson_zm2(v2v, &buf2[ibf]);
1626  }
1627 
1628  add_vec(&v2[VLEN * NVCD * site], v2v, NVCD);
1629  }
1630 
1631 #pragma omp barrier
1632 }
1633 
1634 
1635 //====================================================================
1636 template<typename AFIELD>
1638 {
1639  int idir = 3;
1640  int Nxyz = m_Nxv * m_Nyv * m_Nz;
1641 
1642  int ith, nth, is, ns;
1643  set_threadtask_mult(ith, nth, is, ns, m_Nstv);
1644 
1645  real_t *buf1 = (real_t *)chsend_dn[3].ptr();
1646  real_t *buf2 = (real_t *)chrecv_up[3].ptr();
1647 
1648  real_t *u = m_U.ptr(m_Ndf * m_Nst * idir);
1649  if (isap != 0) u = m_Ublock.ptr(m_Ndf * m_Nst * idir);
1650 
1651 #pragma omp barrier
1652 
1653  if (do_comm[3] > 0) {
1654  for (int site = is; site < ns; ++site) {
1655  int ixyz = site % Nxyz;
1656  int it = site / Nxyz;
1657  if (it == 0) {
1658  mult_wilson_tp1_dirac(&buf1[VLEN * NVC * ND2 * ixyz],
1659  &v1[VLEN * NVCD * site]);
1660  }
1661  }
1662 
1663 #pragma omp barrier
1664 
1665 #pragma omp master
1666  {
1667  chrecv_up[3].start();
1668  chsend_dn[3].start();
1669  chrecv_up[3].wait();
1670  chsend_dn[3].wait();
1671  }
1672 
1673 #pragma omp barrier
1674  }
1675 
1676  for (int site = is; site < ns; ++site) {
1677  int ixyz = site % Nxyz;
1678  int it = site / Nxyz;
1679 
1680  Vsimd_t v2v[NVCD];
1681  clear_vec(v2v, NVCD);
1682 
1683  if ((it < m_Nt - 1) || (do_comm[3] == 0)) {
1684  int it2 = (it + 1) % m_Nt;
1685  int nei = ixyz + Nxyz * it2;
1686  mult_wilson_tpb_dirac(v2v, &u[VLEN * NDF * site],
1687  &v1[VLEN * NVCD * nei]);
1688  } else {
1689  mult_wilson_tp2_dirac(v2v, &u[VLEN * NDF * site],
1690  &buf2[VLEN * NVC * ND2 * ixyz]);
1691  }
1692 
1693  add_vec(&v2[VLEN * NVCD * site], v2v, NVCD);
1694  }
1695 
1696 #pragma omp barrier
1697 }
1698 
1699 
1700 //====================================================================
1701 template<typename AFIELD>
1703 {
1704  int idir = 3;
1705  int Nxyz = m_Nxv * m_Nyv * m_Nz;
1706 
1707  int ith, nth, is, ns;
1708  set_threadtask_mult(ith, nth, is, ns, m_Nstv);
1709 
1710  real_t *buf1 = (real_t *)chsend_up[3].ptr();
1711  real_t *buf2 = (real_t *)chrecv_dn[3].ptr();
1712 
1713  real_t *u = m_U.ptr(m_Ndf * m_Nst * idir);
1714  if (isap != 0) u = m_Ublock.ptr(m_Ndf * m_Nst * idir);
1715 
1716 #pragma omp barrier
1717 
1718  if (do_comm[3] > 0) {
1719  for (int site = is; site < ns; ++site) {
1720  int ixyz = site % Nxyz;
1721  int it = site / Nxyz;
1722  if (it == m_Nt - 1) {
1723  mult_wilson_tm1_dirac(&buf1[VLEN * NVC * ND2 * ixyz],
1724  &u[VLEN * NDF * site], &v1[VLEN * NVCD * site]);
1725  }
1726  }
1727 
1728 #pragma omp barrier
1729 
1730 #pragma omp master
1731  {
1732  chrecv_dn[3].start();
1733  chsend_up[3].start();
1734  chrecv_dn[3].wait();
1735  chsend_up[3].wait();
1736  }
1737 #pragma omp barrier
1738  }
1739 
1740  for (int site = is; site < ns; ++site) {
1741  int ixyz = site % Nxyz;
1742  int it = site / Nxyz;
1743 
1744  Vsimd_t v2v[NVCD];
1745  clear_vec(v2v, NVCD);
1746 
1747  if ((it > 0) || (do_comm[3] == 0)) {
1748  int it2 = (it - 1 + m_Nt) % m_Nt;
1749  int nei = ixyz + Nxyz * it2;
1750  mult_wilson_tmb_dirac(v2v, &u[VLEN * NDF * nei],
1751  &v1[VLEN * NVCD * nei]);
1752  } else {
1753  mult_wilson_tm2_dirac(v2v, &buf2[VLEN * NVC * ND2 * ixyz]);
1754  }
1755 
1756  add_vec(&v2[VLEN * NVCD * site], v2v, NVCD);
1757  }
1758 
1759 #pragma omp barrier
1760 }
1761 
1762 
1763 //====================================================================
1764 template<typename AFIELD>
1765 double AFopr_Clover_dd<AFIELD>::flop_count(const std::string mode)
1766 {
1767  // The following counting explicitly depends on the implementation.
1768  // It will be recalculated when the code is modified.
1769  // The present counting is based on rev.1107. [24 Aug 2014 H.Matsufuru]
1770 
1771  int Lvol = CommonParameters::Lvol();
1772  double flop_wilson, flop_clover, flop_site, flop;
1773 
1774  if (m_repr == "Dirac") {
1775  flop_wilson = static_cast<double>(
1776  m_Nc * m_Nd * (4 // aypx
1777  + 6 * (4 * m_Nc + 2) // spatial hopping
1778  + 2 * (4 * m_Nc + 1))); // temporal hopping
1779 
1780  flop_clover = static_cast<double>(
1781  m_Nc * m_Nd * (2 // Dirac -> chiral
1782  + 2 * (2 * (m_Nc * m_Nd - 1) + 1) // clover term mult
1783  + 2 // chiral -> Dirac
1784  + 2)); // addition to vector
1785  } else if (m_repr == "Chiral") {
1786  flop_wilson = static_cast<double>(
1787  m_Nc * m_Nd * (4 + 8 * (4 * m_Nc + 2)));
1788 
1789  flop_clover = static_cast<double>(
1790  m_Nc * m_Nd * (2 * (2 * (m_Nc * m_Nd - 1) + 1) // clover term mult
1791  + 2)); // addition to vector
1792  } else {
1793  vout.crucial(m_vl, "%s: input repr is undefined.\n");
1794  abort();
1795  }
1796 
1797  flop_site = flop_wilson + flop_clover;
1798 
1799  flop = flop_site * static_cast<double>(Lvol);
1800  if ((mode == "DdagD") || (mode == "DDdag")) flop *= 2.0;
1801 
1802  return flop;
1803 }
1804 
1805 
1806 //====================================================================
1807 template<typename AFIELD>
1809 {
1810  // The following implentastion was copied from SIMD implementation
1811  // by Kanamori-san. To be confirmed. [08 May 2021 H.Matsufuru]
1812 
1813  int NPE = CommonParameters::NPE();
1814  int Nc = CommonParameters::Nc();
1815  int Nd = CommonParameters::Nd();
1816 
1817  int NBx = m_Nx / m_block_size[0];
1818  int NBy = m_Ny / m_block_size[1];
1819  int NBz = m_Nz / m_block_size[2];
1820  int NBt = m_Nt / m_block_size[3];
1821  size_t nvol2 = NBx * NBy * NBz * NBt / 2;
1822 
1823  int block_x = m_block_size[0];
1824  int block_y = m_block_size[1];
1825  int block_z = m_block_size[2];
1826  int block_t = m_block_size[3];
1827 
1828  // int clover_site = 4 * Nc * Nc * Nd * Nd;
1829  int hop_x_site = Nc * Nd * (4 * Nc + 2);
1830  int hop_y_site = Nc * Nd * (4 * Nc + 2);
1831  int hop_z_site = Nc * Nd * (4 * Nc + 2);
1832  int hop_t_site = Nc * Nd * (4 * Nc + 1);
1833  int accum_site = 4 * Nc * Nd;
1834 
1835  double flop_sap_mult;
1836 
1837  if (m_repr == "Dirac") {
1838  double flop_x = 2.0 * static_cast<double>(hop_x_site
1839  * (block_x - 1) * block_y * block_z * block_t);
1840  double flop_y = 2.0 * static_cast<double>(hop_y_site
1841  * block_x * (block_y - 1) * block_z * block_t);
1842  double flop_z = 2.0 * static_cast<double>(hop_z_site
1843  * block_x * block_y * (block_z - 1) * block_t);
1844  double flop_t = 2.0 * static_cast<double>(hop_t_site
1845  * block_x * block_y * block_z * (block_t - 1));
1846 
1847  // colver term mult assumes rotation to chiral repr.
1848  double flop_clover = static_cast<double>(
1849  m_Nc * m_Nd * (2 // Dirac -> chiral
1850  + 2 * (2 * (m_Nc * m_Nd - 1) + 1) // clover term mult
1851  + 2 // chiral -> Dirac
1852  + 2)); // addition to vector
1853 
1854  double flop_aypx = static_cast<double>(accum_site);
1855 
1856  flop_sap_mult = flop_x + flop_y + flop_z + flop_t
1857  + (flop_clover + flop_aypx)
1858  * static_cast<double>(block_x * block_y * block_z * block_t);
1859  } else {
1860  vout.crucial(m_vl, "%s: input repr is undefined.\n");
1861  abort();
1862  }
1863 
1864  return flop_sap_mult * static_cast<double>(nvol2)
1865  * static_cast<double>(NPE);
1866 }
1867 
1868 
1869 //============================================================END=====
AFopr_Clover_dd::get_parameters
void get_parameters(Parameters &params) const
get parameters via a Parameter object
Definition: afopr_Clover_dd-tmpl.h:291
AFopr_Clover_dd::clear
void clear(real_t *)
Definition: afopr_Clover_dd-tmpl.h:1202
AFopr_Clover_dd::set_block_config
void set_block_config(AFIELD &)
inpose the block condition to link variable.
Definition: afopr_Clover_dd-tmpl.h:370
CommonParameters::Ny
static int Ny()
Definition: commonParameters.h:106
AFopr_Clover_dd::scal_local
void scal_local(real_t *, real_t)
Definition: afopr_Clover_dd-tmpl.h:813
CommonParameters::Nz
static int Nz()
Definition: commonParameters.h:107
AFopr_Clover_dd::mult_D
void mult_D(AFIELD &, const AFIELD &)
standard D mult.
Definition: afopr_Clover_dd-tmpl.h:906
BridgeQXS::mult_wilson_gm5_dirac
void mult_wilson_gm5_dirac(double *v2, double *v1, int *Nsize)
Definition: mult_Wilson_qxs-inc.h:411
AFopr_Clover_dd::tidyup
void tidyup()
final tidy-up.
Definition: afopr_Clover_dd-tmpl.h:168
CommonParameters::Lvol
static long_t Lvol()
Definition: commonParameters.h:95
AFopr_Clover_dd::set_config
void set_config(Field *u)
setting gauge configuration (common interface).
Definition: afopr_Clover_dd-tmpl.h:305
AFopr_Clover_dd::convert
void convert(AFIELD &v, const Field &w)
convert of spinor field.
Definition: afopr_Clover_dd-tmpl.h:592
Parameters::set_string
void set_string(const string &key, const string &value)
Definition: parameters.cpp:39
AFopr_Clover_dd::set_config_impl
void set_config_impl(Field *u)
setting gauge configuration (implementation).
Definition: afopr_Clover_dd-tmpl.h:337
AFopr_Clover_dd::mult_ddn
void mult_ddn(AFIELD &, const AFIELD &, const int mu)
Downward hopping part of mult.
Definition: afopr_Clover_dd-tmpl.h:1100
BridgeQXS::mult_wilson_2_dirac
void mult_wilson_2_dirac(double *v2, double *up, double *v1, double *buf_xp, double *buf_xm, double *buf_yp, double *buf_ym, double *buf_zp, double *buf_zm, double *buf_tp, double *buf_tm, double kappa, int *bc, int *Nsize, int *do_comm)
Definition: mult_Wilson_qxs-inc.h:296
BridgeQXS::mult_wilson_1_dirac
void mult_wilson_1_dirac(double *buf_xp, double *buf_xm, double *buf_yp, double *buf_ym, double *buf_zp, double *buf_zm, double *buf_tp, double *buf_tm, double *up, double *v1, int *bc, int *Nsize, int *do_comm)
Definition: mult_Wilson_qxs-inc.h:153
AFopr_Clover_dd::Ddag
void Ddag(AFIELD &, const AFIELD &)
Definition: afopr_Clover_dd-tmpl.h:743
ThreadManager::get_num_threads
static int get_num_threads()
returns available number of threads.
Definition: threadManager.cpp:246
CommonParameters::Ndim
static int Ndim()
Definition: commonParameters.h:117
Field::set
void set(const int jin, const int site, const int jex, double v)
Definition: field.h:175
AFopr_Clover_dd::mult_tm
void mult_tm(real_t *, real_t *, int)
Definition: afopr_Clover_dd-tmpl.h:1702
Parameters
Class for parameters.
Definition: parameters.h:46
AIndex_lex
Definition: aindex_lex_base.h:17
Parameters::set_double
void set_double(const string &key, const double value)
Definition: parameters.cpp:33
AFopr_Clover_dd::set_config_omp
void set_config_omp(Field *u)
setting gauge configuration (setting omp parallel).
Definition: afopr_Clover_dd-tmpl.h:324
Bridge::BridgeIO::decrease_indent
void decrease_indent()
Definition: bridgeIO.h:86
AFopr_Clover_dd::mult_ym
void mult_ym(real_t *, real_t *, int)
Definition: afopr_Clover_dd-tmpl.h:1429
NVCD
#define NVCD
Definition: define_params_SU3.h:20
Bridge::BridgeIO::increase_indent
void increase_indent()
Definition: bridgeIO.h:85
VLEN
#define VLEN
Definition: bridgeQXS_Clover_coarse_double.cpp:12
Bridge::BridgeIO::detailed
void detailed(const char *format,...)
Definition: bridgeIO.cpp:219
AFopr_Clover_dd::mult_dd
void mult_dd(AFIELD &, const AFIELD &)
Mult only inside domain.
Definition: afopr_Clover_dd-tmpl.h:1008
AFopr_Clover_dd::mult_zp
void mult_zp(real_t *, real_t *, int)
Definition: afopr_Clover_dd-tmpl.h:1504
AFopr_Clover_dd::init
void init(const Parameters &params)
initial setup.
Definition: afopr_Clover_dd-tmpl.h:17
NDF
#define NDF
Definition: field_F_imp_SU2-inc.h:4
Fopr_CloverTerm
Org::Fopr_CloverTerm Fopr_CloverTerm
Clover term operator.
Definition: fopr_CloverTerm.h:58
CommonParameters::Nvol
static int Nvol()
Definition: commonParameters.h:109
AFopr_Clover_dd::flop_count
double flop_count()
returns floating operation counts.
Definition: afopr_Clover_dd.h:162
convert_gauge
void convert_gauge(INDEX &index, FIELD &v, const Field &w)
Definition: afield-inc.h:224
aypx
void aypx(const double a, Field &y, const Field &x)
aypx(y, a, x): y := a * y + x
Definition: field.cpp:509
Vsimd_t
Definition: vsimd_double-inc.h:13
AFopr_Clover_dd::flop_count_sap
double flop_count_sap()
returns floating operation counts of mult_sap.
Definition: afopr_Clover_dd-tmpl.h:1808
AFopr_Clover_dd::mult_D_alt
void mult_D_alt(AFIELD &, const AFIELD &)
D mult using mult_xp, etc.
Definition: afopr_Clover_dd-tmpl.h:1038
AFopr_Clover_dd::mult_dup
void mult_dup(AFIELD &, const AFIELD &, const int mu)
Upward hopping part of mult.
Definition: afopr_Clover_dd-tmpl.h:1064
real_t
double real_t
Definition: bridgeQXS_Clover_coarse_double.cpp:16
AFopr_Clover_dd::setup_channels
void setup_channels()
setup channels for communication.
Definition: afopr_Clover_dd-tmpl.h:134
AFopr_Clover_dd::aypx
void aypx(real_t, real_t *, real_t *)
Definition: afopr_Clover_dd-tmpl.h:1184
AFopr_Clover_dd::D
void D(AFIELD &, const AFIELD &)
Definition: afopr_Clover_dd-tmpl.h:723
AFopr_Clover_dd::mult_sap
void mult_sap(AFIELD &, const AFIELD &, const int ieo)
SAP operator.
Definition: afopr_Clover_dd-tmpl.h:977
copy
void copy(Field &y, const Field &x)
copy(y, x): y = x
Definition: field.cpp:212
AFopr_Clover_dd::mult_csw
void mult_csw(real_t *, real_t *)
set_csw now assumes Dirac repr.
Definition: afopr_Clover_dd-tmpl.h:862
AFopr_Clover_dd::mult_gm5
void mult_gm5(AFIELD &, const AFIELD &)
multiplies gamma_5 matrix.
Definition: afopr_Clover_dd-tmpl.h:770
AFopr_Clover_dd::reverse
void reverse(Field &v, const AFIELD &w)
reverse of spinor field.
Definition: afopr_Clover_dd-tmpl.h:603
BridgeQXS::mult_clover_dd_dirac_chrot
void mult_clover_dd_dirac_chrot(double *v2, double *up, double *ct, double *v1, double kappa, int *bc, int *Nsize, int *block_size, int ieo)
Definition: mult_Clover_dd_qxs-inc.h:169
AFopr_Clover_dd::set_parameters
void set_parameters(const Parameters &params)
setting parameters by a Parameter object.
Definition: afopr_Clover_dd-tmpl.h:178
CommonParameters::Nx
static int Nx()
Definition: commonParameters.h:105
AFopr_Clover_dd::mult_xp
void mult_xp(real_t *, real_t *, int)
Definition: afopr_Clover_dd-tmpl.h:1218
AFopr_Clover_dd::mult_zm
void mult_zm(real_t *, real_t *, int)
Definition: afopr_Clover_dd-tmpl.h:1570
CommonParameters::Nc
static int Nc()
Definition: commonParameters.h:115
NC
#define NC
Definition: field_F_imp_SU2-inc.h:2
AFopr_Clover_dd::mult
void mult(AFIELD &, const AFIELD &)
multiplies fermion operator to a given field.
Definition: afopr_Clover_dd-tmpl.h:681
QXS_Gauge::set_boundary
void set_boundary(AField< REALTYPE, QXS > &ulex, const std::vector< int > &boundary)
Definition: afield_Gauge-inc.h:24
AFopr_Clover_dd::mult_up
void mult_up(int mu, AFIELD &, const AFIELD &)
upward nearest neighbor hopping term.
Definition: afopr_Clover_dd-tmpl.h:614
reverse_spinor
void reverse_spinor(INDEX &index, Field &v, FIELD &w)
Definition: afield-inc.h:380
CommonParameters::Nt
static int Nt()
Definition: commonParameters.h:108
Parameters::fetch_int_vector
int fetch_int_vector(const string &key, vector< int > &value) const
Definition: parameters.cpp:429
Communicator::npe
static int npe(const int dir)
logical grid extent
Definition: communicator.cpp:112
BridgeQXS::mult_clover_bulk_dirac
void mult_clover_bulk_dirac(double *v2, double *up, double *ct, double *v1, double kappa, int *bc, int *Nsize, int *do_comm)
Definition: mult_Clover_qxs-inc.h:16
AFopr_Clover_dd::set_csw_chrot
void set_csw_chrot()
set_csw with rotation to chiral repr.
Definition: afopr_Clover_dd-tmpl.h:513
AFopr_Clover_dd::mult_yp
void mult_yp(real_t *, real_t *, int)
Definition: afopr_Clover_dd-tmpl.h:1358
AIndex_eo_qxs::idx
int idx(const int in, const int Nin, const int ist, const int Nx2, const int Ny, const int leo, const int Nvol2, const int ex)
Definition: aindex_eo.h:27
AFopr_Clover_dd::DdagD
void DdagD(AFIELD &, const AFIELD &)
Definition: afopr_Clover_dd-tmpl.h:732
AFopr_Clover_dd::H
void H(AFIELD &, const AFIELD &)
Definition: afopr_Clover_dd-tmpl.h:1175
threadManager.h
ND
#define ND
Definition: field_F_imp_SU2-inc.h:5
AFopr_Clover_dd::get_mode
std::string get_mode() const
returns mult mode.
Definition: afopr_Clover_dd-tmpl.h:673
AFopr_Clover_dd::mult_tp
void mult_tp(real_t *, real_t *, int)
Definition: afopr_Clover_dd-tmpl.h:1637
CommonParameters::NPE
static int NPE()
Definition: commonParameters.h:101
AFopr_Clover_dd::set_mode
void set_mode(std::string mode)
setting mult mode.
Definition: afopr_Clover_dd-tmpl.h:660
Parameters::set_int_vector
void set_int_vector(const string &key, const vector< int > &value)
Definition: parameters.cpp:45
BridgeQXS::mult_clover_bulk_dirac_chrot
void mult_clover_bulk_dirac_chrot(double *v2, double *up, double *ct, double *v1, double kappa, int *bc, int *Nsize, int *do_comm)
Definition: mult_Clover_qxs-inc.h:152
VLENY
#define VLENY
Definition: bridgeQXS_Clover_coarse_double.cpp:14
Field::ptr
const double * ptr(const int jin, const int site, const int jex) const
Definition: field.h:153
CommonParameters::Nd
static int Nd()
Definition: commonParameters.h:116
NVC
#define NVC
Definition: fopr_Wilson_impl_SU2-inc.h:15
CommonParameters::Vlevel
static Bridge::VerboseLevel Vlevel()
Definition: commonParameters.h:122
AFopr_Clover_dd::mult_dn
void mult_dn(int mu, AFIELD &, const AFIELD &)
downward nearest neighbor hopping term.
Definition: afopr_Clover_dd-tmpl.h:637
AFopr_Clover_dd::real_t
AFIELD::real_t real_t
Definition: afopr_Clover_dd.h:48
AFopr_Clover_dd
Definition: afopr_Clover_dd.h:45
Bridge::BridgeIO::set_verbose_level
static VerboseLevel set_verbose_level(const std::string &str)
Definition: bridgeIO.cpp:133
AFopr_Clover_dd::set_csw
void set_csw()
set_csw now assumes Dirac repr.
Definition: afopr_Clover_dd-tmpl.h:424
AFopr_Clover_dd::project_chiral
void project_chiral(AFIELD &, const AFIELD &, int ch)
Definition: afopr_Clover_dd-tmpl.h:753
VLENX
#define VLENX
Definition: bridgeQXS_Clover_coarse_double.cpp:13
scal
void scal(Field &x, const double a)
scal(x, a): x = a * x
Definition: field.cpp:261
Communicator::ipe
static int ipe(const int dir)
logical coordinate of current proc.
Definition: communicator.cpp:105
AFopr_Clover_dd::mult_dag
void mult_dag(AFIELD &, const AFIELD &)
hermitian conjugate of mult.
Definition: afopr_Clover_dd-tmpl.h:703
AFopr_Clover_dd::mult_xm
void mult_xm(real_t *, real_t *, int)
Definition: afopr_Clover_dd-tmpl.h:1287
Parameters::fetch_string
int fetch_string(const string &key, string &value) const
Definition: parameters.cpp:378
Parameters::fetch_double
int fetch_double(const string &key, double &value) const
Definition: parameters.cpp:327
Parameters::get_string
string get_string(const string &key) const
Definition: parameters.cpp:221
Bridge::BridgeIO::crucial
void crucial(const char *format,...)
Definition: bridgeIO.cpp:180
BridgeQXS::mult_clover_dd_dirac
void mult_clover_dd_dirac(double *v2, double *up, double *ct, double *v1, double kappa, int *bc, int *Nsize, int *block_size, int ieo)
Definition: mult_Clover_dd_qxs-inc.h:15
Field
Container of Field-type object.
Definition: field.h:46
ThreadManager::get_thread_id
static int get_thread_id()
returns thread id.
Definition: threadManager.cpp:253
ND2
#define ND2
Definition: define_params_SU3.h:18
Bridge::BridgeIO::general
void general(const char *format,...)
Definition: bridgeIO.cpp:200
ThreadManager::assert_single_thread
static void assert_single_thread(const std::string &class_name)
assert currently running on single thread.
Definition: threadManager.cpp:372
Bridge::vout
BridgeIO vout
Definition: bridgeIO.cpp:512
Bridge::BridgeIO::get_verbose_level
static std::string get_verbose_level(const VerboseLevel vl)
Definition: bridgeIO.cpp:154
convert_spinor
void convert_spinor(INDEX &index, FIELD &v, const Field &w)
Definition: afield-inc.h:187
AFopr_Clover_dd::mult_gm4
void mult_gm4(AFIELD &, const AFIELD &)
Definition: afopr_Clover_dd-tmpl.h:829