Bridge++  Ver. 2.0.2
afopr_Clover-tmpl.h
Go to the documentation of this file.
1 
11 
12 template<typename AFIELD>
13 const std::string AFopr_Clover<AFIELD>::class_name = "AFopr_Clover";
14 
15 //====================================================================
16 template<typename AFIELD>
18 {
20 
21  // switch of coomunication
22  int req_comm = 1; // set 1 if communication forced any time
23  //int req_comm = 0; // set 0 if communication called in necessary
24 
25  std::string vlevel;
26  if (!params.fetch_string("verbose_level", vlevel)) {
27  m_vl = vout.set_verbose_level(vlevel);
28  } else {
29  m_vl = CommonParameters::Vlevel();
30  }
31 
32  vout.general(m_vl, "%s: construction\n", class_name.c_str());
33 
34  m_repr = "Dirac"; // now only the Dirac repr is available.
35 
36  std::string repr;
37  if (!params.fetch_string("gamma_matrix_type", repr)) {
38  if (repr != "Dirac") {
39  vout.crucial(" Error at %s: unsupported gamma-matrix type: %s\n",
40  class_name.c_str(), repr.c_str());
41  exit(EXIT_FAILURE);
42  }
43  }
44 
45  m_Nc = CommonParameters::Nc();
46  if (m_Nc != 3) {
47  vout.crucial("%s: only applicable to Nc = 3\n",
48  class_name.c_str());
49  exit(EXIT_FAILURE);
50  }
51 
52  m_Nd = CommonParameters::Nd();
53  m_Nvc = 2 * m_Nc;
54  m_Ndf = 2 * m_Nc * m_Nc;
55 
56  m_Nx = CommonParameters::Nx();
57  m_Ny = CommonParameters::Ny();
58  m_Nz = CommonParameters::Nz();
59  m_Nt = CommonParameters::Nt();
60  m_Nst = CommonParameters::Nvol();
61  m_Ndim = CommonParameters::Ndim();
62 
63  m_Nxv = m_Nx / VLENX;
64  m_Nyv = m_Ny / VLENY;
65  m_Nstv = m_Nst / VLEN;
66 
67  if (VLENX * m_Nxv != m_Nx) {
68  vout.crucial(m_vl, "%s: Nx must be multiple of VLENX.\n",
69  class_name.c_str());
70  exit(EXIT_FAILURE);
71  }
72  if (VLENY * m_Nyv != m_Ny) {
73  vout.crucial(m_vl, "%s: Ny must be multiple of VLENY.\n",
74  class_name.c_str());
75  exit(EXIT_FAILURE);
76  }
77 
78  vout.general(m_vl, " VLENX = %2d Nxv = %d\n", VLENX, m_Nxv);
79  vout.general(m_vl, " VLENY = %2d Nyv = %d\n", VLENY, m_Nyv);
80  vout.general(m_vl, " VLEN = %2d Nstv = %d\n", VLEN, m_Nstv);
81 
82  m_Nsize[0] = m_Nxv;
83  m_Nsize[1] = m_Nyv;
84  m_Nsize[2] = m_Nz;
85  m_Nsize[3] = m_Nt;
86 
87  do_comm_any = 0;
88  for (int mu = 0; mu < m_Ndim; ++mu) {
89  do_comm[mu] = 1;
90  if ((req_comm == 0) && (Communicator::npe(mu) == 1)) do_comm[mu] = 0;
91  do_comm_any += do_comm[mu];
92  vout.general(" do_comm[%d] = %d\n", mu, do_comm[mu]);
93  }
94 
95  m_bdsize.resize(m_Ndim);
96  int Nd2 = m_Nd / 2;
97  m_bdsize[0] = m_Nvc * Nd2 * m_Ny * m_Nz * m_Nt;
98  m_bdsize[1] = m_Nvc * Nd2 * m_Nx * m_Nz * m_Nt;
99  m_bdsize[2] = m_Nvc * Nd2 * m_Nx * m_Ny * m_Nt;
100  m_bdsize[3] = m_Nvc * Nd2 * m_Nx * m_Ny * m_Nz;
101 
102  setup_channels();
103 
105 
106  Parameters params_ct = params;
107 
108 #ifdef CHIRAL_ROTATION
109  params_ct.set_string("gamma_matrix_type", "Chiral");
110  m_fopr_csw = new Fopr_CloverTerm(params_ct);
111 #else
112  m_fopr_csw = new Fopr_CloverTerm(params_ct);
113 #endif
114 
115  set_parameters(params);
116 
118 
119  // gauge configuration.
120  m_U.reset(NDF, m_Nst, m_Ndim);
121 
122  // working vectors.
123  int NinF = 2 * m_Nc * m_Nd;
124  m_v2.reset(NinF, m_Nst, 1);
125 
126  int Ndm2 = m_Nd * m_Nd / 2;
127  // m_T.reset(NDF, m_Nst, Ndm2);
128  m_T.reset(NDF * Ndm2, m_Nst, 1);
129 
130  vout.general(m_vl, "%s: construction finished.\n",
131  class_name.c_str());
132 }
133 
134 
135 //====================================================================
136 template<typename AFIELD>
138 {
139  chsend_up.resize(m_Ndim);
140  chrecv_up.resize(m_Ndim);
141  chsend_dn.resize(m_Ndim);
142  chrecv_dn.resize(m_Ndim);
143 
144  for (int mu = 0; mu < m_Ndim; ++mu) {
145  size_t Nvsize = m_bdsize[mu] * sizeof(real_t);
146 
147  chsend_dn[mu].send_init(Nvsize, mu, -1);
148  chsend_up[mu].send_init(Nvsize, mu, 1);
149 #ifdef USE_MPI
150  chrecv_up[mu].recv_init(Nvsize, mu, 1);
151  chrecv_dn[mu].recv_init(Nvsize, mu, -1);
152 #else
153  void *buf_up = (void *)chsend_dn[mu].ptr();
154  chrecv_up[mu].recv_init(Nvsize, mu, 1, buf_up);
155  void *buf_dn = (void *)chsend_up[mu].ptr();
156  chrecv_dn[mu].recv_init(Nvsize, mu, -1, buf_dn);
157 #endif
158 
159  if (do_comm[mu] == 1) {
160  chset_send.append(chsend_up[mu]);
161  chset_send.append(chsend_dn[mu]);
162  chset_recv.append(chrecv_up[mu]);
163  chset_recv.append(chrecv_dn[mu]);
164  }
165  }
166 }
167 
168 
169 //====================================================================
170 template<typename AFIELD>
172 {
174 
175  delete m_fopr_csw;
176 }
177 
178 
179 //====================================================================
180 template<typename AFIELD>
182 {
183  double kappa, csw;
184  std::vector<int> bc;
185 
186  int err = 0;
187  err += params.fetch_double("hopping_parameter", kappa);
188  err += params.fetch_double("clover_coefficient", csw);
189  err += params.fetch_int_vector("boundary_condition", bc);
190  if (err) {
191  vout.crucial(m_vl, "Error at %s: input parameter not found.\n",
192  class_name.c_str());
193  exit(EXIT_FAILURE);
194  }
195 
196  set_parameters(real_t(kappa), real_t(csw), bc);
197 
198  Parameters params_csw = params;
199 #ifdef CHIRAL_ROTATION
200  params_csw.set_string("gamma_matrix_type", "Chiral");
201 #endif
202  m_fopr_csw->set_parameters(params_csw);
203 }
204 
205 
206 //====================================================================
207 template<typename AFIELD>
209  const real_t csw,
210  const std::vector<int> bc)
211 {
212  assert(bc.size() == m_Ndim);
213 
214 #pragma omp barrier
215 
216  int ith = ThreadManager::get_thread_id();
217 
218  if (ith == 0) {
219  m_CKs = CKs;
220  m_csw = csw;
221  m_boundary.resize(m_Ndim);
222  for (int mu = 0; mu < m_Ndim; ++mu) {
223  m_boundary[mu] = bc[mu];
224  }
225  }
226 
227  vout.general(m_vl, "%s: set parameters\n", class_name.c_str());
228  vout.general(m_vl, " gamma-matrix type = %s\n", m_repr.c_str());
229  vout.general(m_vl, " kappa = %8.4f\n", m_CKs);
230  vout.general(m_vl, " cSW = %12.8f\n", m_csw);
231  for (int mu = 0; mu < m_Ndim; ++mu) {
232  vout.general(m_vl, " boundary[%d] = %2d\n", mu, m_boundary[mu]);
233  }
234 
235 #pragma omp barrier
236 }
237 
238 
239 //====================================================================
240 template<typename AFIELD>
242 {
243  params.set_double("hopping_parameter", double(m_CKs));
244  params.set_double("clover_coefficient", double(m_csw));
245  params.set_int_vector("boundary_condition", m_boundary);
246  params.set_string("gamma_matrix_type", m_repr);
247 
248  params.set_string("verbose_level", vout.get_verbose_level(m_vl));
249 }
250 
251 
252 //====================================================================
253 template<typename AFIELD>
255 {
256  int nth = ThreadManager::get_num_threads();
257 
258  vout.detailed(m_vl, "%s: set_config is called: num_threads = %d\n",
259  class_name.c_str(), nth);
260 
261  if (nth > 1) {
262  set_config_impl(u);
263  } else {
264  set_config_omp(u);
265  }
266 
267  vout.detailed(m_vl, "%s: set_config finished\n", class_name.c_str());
268 }
269 
270 
271 //====================================================================
272 template<typename AFIELD>
274 {
275  vout.detailed(m_vl, " set_config_omp is called.\n");
276 
277 #pragma omp parallel
278  {
279  set_config_impl(u);
280  }
281 }
282 
283 
284 //====================================================================
285 template<typename AFIELD>
287 {
288 #pragma omp barrier
289 
290  int ith = ThreadManager::get_thread_id();
291 
292  if (ith == 0) m_conf = u;
293 
295 
296  convert_gauge(index, m_U, *u);
297 
298  QXS_Gauge::set_boundary(m_U, m_boundary);
299 
300  m_fopr_csw->set_config(u);
301 
302 #ifdef CHIRAL_ROTATION
303  set_csw_chrot();
304 #else
305  set_csw();
306 #endif
307 }
308 
309 
310 //====================================================================
311 template<typename AFIELD>
313 {
314  // The Dirac representation is assumed.
315  // Current implementation makes use of the corelib Fopr_CloverTerm
316  // that requires change of the gamma-matrix convention from
317  // Bridge++ to QXS (BQCD) by multiplying gamma_4 before and after
318  // m_fopr_csw->mult().
319  // For numerical efficiency, proper implementation is necessaty.
320  // [08 Mar 2021 H.Matsufuru]
321 
322 #pragma omp barrier
323 
325 
326  const int Nin = NDF * ND * 2;
327 
328  m_fopr_csw->set_mode("D");
329 
330  int ith, nth, is, ns;
331  set_threadtask_mult(ith, nth, is, ns, m_Nst);
332 
333  for (int id = 0; id < m_Nd / 2; ++id) {
334  for (int ic = 0; ic < m_Nc; ++ic) {
335  m_w1.set(0.0);
336 #pragma omp barrier
337 
338  for (int site = is; site < ns; ++site) {
339  m_w1.set_r(ic, id, site, 0, 1.0);
340  }
341 #pragma omp barrier
342 
343  m_fopr_csw->mult(m_w2, m_w1);
344 #pragma omp barrier
345 
346  for (int site = is; site < ns; ++site) {
347  for (int ic2 = 0; ic2 < m_Nc; ++ic2) {
348  real_t vt_r = m_w2.cmp_r(ic2, 0, site, 0);
349  real_t vt_i = m_w2.cmp_i(ic2, 0, site, 0);
350  int in = ic2 + NC * (ic + NC * (id + 0));
351  int idx_r = index.idx(2 * in, Nin, site, 0);
352  int idx_i = index.idx(2 * in + 1, Nin, site, 0);
353  m_T.set(idx_r, vt_r);
354  m_T.set(idx_i, vt_i);
355  }
356 
357  for (int ic2 = 0; ic2 < m_Nc; ++ic2) {
358  real_t vt_r = m_w2.cmp_r(ic2, 1, site, 0);
359  real_t vt_i = m_w2.cmp_i(ic2, 1, site, 0);
360  int in = ic2 + NC * (ic + NC * (id + 4));
361  int idx_r = index.idx(2 * in, Nin, site, 0);
362  int idx_i = index.idx(2 * in + 1, Nin, site, 0);
363  m_T.set(idx_r, vt_r);
364  m_T.set(idx_i, vt_i);
365  }
366 
367  for (int ic2 = 0; ic2 < m_Nc; ++ic2) {
368  real_t vt_r = -m_w2.cmp_r(ic2, 2, site, 0);
369  real_t vt_i = -m_w2.cmp_i(ic2, 2, site, 0);
370  int in = ic2 + NC * (ic + NC * (id + 2));
371  int idx_r = index.idx(2 * in, Nin, site, 0);
372  int idx_i = index.idx(2 * in + 1, Nin, site, 0);
373  m_T.set(idx_r, vt_r);
374  m_T.set(idx_i, vt_i);
375  }
376 
377  for (int ic2 = 0; ic2 < m_Nc; ++ic2) {
378  real_t vt_r = -m_w2.cmp_r(ic2, 3, site, 0);
379  real_t vt_i = -m_w2.cmp_i(ic2, 3, site, 0);
380  int in = ic2 + NC * (ic + NC * (id + 6));
381  int idx_r = index.idx(2 * in, Nin, site, 0);
382  int idx_i = index.idx(2 * in + 1, Nin, site, 0);
383  m_T.set(idx_r, vt_r);
384  m_T.set(idx_i, vt_i);
385  }
386  } // site loop
387 #pragma omp barrier
388  }
389  }
390 #pragma omp barrier
391 
392  real_t kappaR = 1.0 / m_CKs;
393  scal(m_T, kappaR);
394 
395 #pragma omp barrier
396 }
397 
398 
399 //====================================================================
400 template<typename AFIELD>
402 {
403  // This method set the clover term matrix assuming the chiral
404  // representation for gamma-matrix in Bridge++ convention.
405  // The conversion from the Bridge++ to QWS convention and
406  // from Dirac to chiral representations are assumed to be cared
407  // in BridgeQXS functions.
408  // [17 Jul 2021 H.Matsufuru]
409 
411 
412  const int Nin = NDF * ND * 2;
413 
414  m_fopr_csw->set_mode("D");
415 
416  int ith, nth, is, ns;
417  set_threadtask_mult(ith, nth, is, ns, m_Nst);
418 
419  //11 22 33 44 55 66 12 34 56 23 45 13 24 35 46 15 26 14 36 25 16
420  constexpr int idx[72] = {
421  0, -1, 6, 7, 16, 17, 28, 29, 24, 25, 34, 35,
422  -1, -1, 1, -1, 12, 13, 18, 19, 32, 33, 26, 27,
423  -1, -1, -1, -5, 2, -1, 8, 9, 20, 21, 30, 31,
424  -1, -1, -1, -1, -1, -1, 3, -1, 14, 15, 22, 23,
425  -1, -1, -1, -1, -1, -1, -1, -1, 4, -1, 10, 11,
426  -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 5, -1,
427  };
428 
429  for (int id = 0; id < m_Nd / 2; ++id) {
430  for (int ic = 0; ic < m_Nc; ++ic) {
431  m_w1.set(0.0);
432 #pragma omp barrier
433 
434  for (int site = is; site < ns; ++site) {
435  m_w1.set_r(ic, id, site, 0, 1.0);
436  m_w1.set_r(ic, id + 2, site, 0, 1.0);
437  }
438 #pragma omp barrier
439 
440  m_fopr_csw->mult(m_w2, m_w1);
441 #pragma omp barrier
442 
443  for (int site = is; site < ns; ++site) {
444  for (int id2 = 0; id2 < m_Nd; ++id2) {
445  for (int ic2 = 0; ic2 < m_Nc; ++ic2) {
446  real_t vt_r = 0.5 * m_w2.cmp_r(ic2, id2, site, 0);
447  real_t vt_i = 0.5 * m_w2.cmp_i(ic2, id2, site, 0);
448  int i = ic2 + m_Nc * (id2 % 2);
449  int j = ic + m_Nc * id;
450  int ij = m_Nc * 2 * i + j;
451  int in_r = idx[2 * ij];
452  int in_i = idx[2 * ij + 1];
453  if (in_r >= 0) {
454  in_r += 36 * (id2 / 2);
455  int idx_r = index.idx(in_r, Nin, site, 0);
456  m_T.set(idx_r, vt_r);
457  }
458  if (in_i >= 0) {
459  in_i += 36 * (id2 / 2);
460  int idx_i = index.idx(in_i, Nin, site, 0);
461  m_T.set(idx_i, vt_i);
462  }
463  }
464  }
465  } // site loop
466 #pragma omp barrier
467  }
468  }
469 #pragma omp barrier
470 
471  real_t kappaR = 1.0 / m_CKs;
472  scal(m_T, kappaR);
473 
474 #pragma omp barrier
475 }
476 
477 
478 //====================================================================
479 template<typename AFIELD>
481 {
483  convert_spinor(index_lex, m_v2, w);
484 
485  mult_gm4(v, m_v2);
486 }
487 
488 
489 //====================================================================
490 template<typename AFIELD>
492 {
493  mult_gm4(m_v2, w);
494 
496  reverse_spinor(index_lex, v, m_v2);
497 }
498 
499 
500 //====================================================================
501 template<typename AFIELD>
502 void AFopr_Clover<AFIELD>::mult_up(int mu, AFIELD& v, const AFIELD& w)
503 {
504  real_t *vp = v.ptr(0);
505  real_t *wp = const_cast<AFIELD *>(&w)->ptr(0);
506 
507  if (mu == 0) {
508  mult_xp(vp, wp);
509  } else if (mu == 1) {
510  mult_yp(vp, wp);
511  } else if (mu == 2) {
512  mult_zp(vp, wp);
513  } else if (mu == 3) {
514  mult_tp(vp, wp);
515  } else {
516  vout.crucial(m_vl, "%s: mult_up for %d direction is undefined.",
517  class_name.c_str(), mu);
518  exit(EXIT_FAILURE);
519  }
520 }
521 
522 
523 //====================================================================
524 template<typename AFIELD>
525 void AFopr_Clover<AFIELD>::mult_dn(int mu, AFIELD& v, const AFIELD& w)
526 {
527  real_t *vp = v.ptr(0);
528  real_t *wp = const_cast<AFIELD *>(&w)->ptr(0);
529 
530  if (mu == 0) {
531  mult_xm(vp, wp);
532  } else if (mu == 1) {
533  mult_ym(vp, wp);
534  } else if (mu == 2) {
535  mult_zm(vp, wp);
536  } else if (mu == 3) {
537  mult_tm(vp, wp);
538  } else {
539  vout.crucial(m_vl, "%s: mult_dn for %d direction is undefined.",
540  class_name.c_str(), mu);
541  exit(EXIT_FAILURE);
542  }
543 }
544 
545 
546 //====================================================================
547 template<typename AFIELD>
548 void AFopr_Clover<AFIELD>::set_mode(std::string mode)
549 {
550 #pragma omp barrier
551 
552  int ith = ThreadManager::get_thread_id();
553  if (ith == 0) m_mode = mode;
554 
555 #pragma omp barrier
556 }
557 
558 
559 //====================================================================
560 template<typename AFIELD>
562 {
563  return m_mode;
564 }
565 
566 
567 //====================================================================
568 template<typename AFIELD>
570 {
571  if (m_mode == "D") {
572  return D(v, w);
573  } else if (m_mode == "DdagD") {
574  return DdagD(v, w);
575  } else if (m_mode == "Ddag") {
576  return Ddag(v, w);
577  } else if (m_mode == "H") {
578  return H(v, w);
579  } else {
580  vout.crucial(m_vl, "%s: mode undefined.\n", class_name.c_str());
581  exit(EXIT_FAILURE);
582  }
583 }
584 
585 
586 //====================================================================
587 template<typename AFIELD>
589 {
590  if (m_mode == "D") {
591  return Ddag(v, w);
592  } else if (m_mode == "DdagD") {
593  return DdagD(v, w);
594  } else if (m_mode == "Ddag") {
595  return D(v, w);
596  } else if (m_mode == "H") {
597  return H(v, w);
598  } else {
599  vout.crucial(m_vl, "%s: mode undefined.\n", class_name.c_str());
600  exit(EXIT_FAILURE);
601  }
602 }
603 
604 
605 //====================================================================
606 template<typename AFIELD>
608 {
609  mult_D(v, w);
610  //mult_D_alt(v, w);
611 }
612 
613 
614 //====================================================================
615 template<typename AFIELD>
617 {
618  D(m_v2, w);
619  mult_gm5(v, m_v2);
620  D(m_v2, v);
621  mult_gm5(v, m_v2);
622 }
623 
624 
625 //====================================================================
626 template<typename AFIELD>
628 {
629  mult_gm5(v, w);
630  D(m_v2, v);
631  mult_gm5(v, m_v2);
632 }
633 
634 
635 //====================================================================
636 template<typename AFIELD>
638 {
639  real_t *vp = v.ptr(0);
640  real_t *wp = const_cast<AFIELD *>(&w)->ptr(0);
641 
642 #pragma omp barrier
643 
644  BridgeQXS::mult_wilson_gm5_dirac(vp, wp, m_Nsize);
645 
646 #pragma omp barrier
647 
648  // mult_gm5(vp, wp);
649 }
650 
651 
652 //====================================================================
653 
654 /*
655 template<typename AFIELD>
656 void AFopr_Clover<AFIELD>::mult_gm5(real_t *v, real_t *w)
657 { // Dirac representation.
658 
659 #pragma omp barrier
660 
661  BridgeQXS::mult_wilson_gm5_dirac(v, w, m_Nsize);
662 
663 #pragma omp barrier
664 
665 }
666 */
667 
668 //====================================================================
669 template<typename AFIELD>
671 {
672  real_t *vp = v.ptr(0);
673  real_t *wp = const_cast<AFIELD *>(&w)->ptr(0);
674 
675  int ith, nth, is, ns;
676  set_threadtask_mult(ith, nth, is, ns, m_Nstv);
677 
678  Vsimd_t wt[2];
679 
680  for (int site = is; site < ns; ++site) {
681  for (int ic = 0; ic < NC; ++ic) {
682  for (int id = 0; id < ND2; ++id) {
683  int idx1 = 2 * (id + ND * ic) + NVCD * site;
684  load_vec(wt, &wp[VLEN * idx1], 2);
685  save_vec(&vp[VLEN * idx1], wt, 2);
686  }
687 
688  for (int id = ND2; id < ND; ++id) {
689  int idx1 = 2 * (id + ND * ic) + NVCD * site;
690  load_vec(wt, &wp[VLEN * idx1], 2);
691  scal_vec(wt, real_t(-1.0), 2);
692  save_vec(&vp[VLEN * idx1], wt, 2);
693  }
694  }
695  }
696 
697 #pragma omp barrier
698 }
699 
700 
701 //====================================================================
702 template<typename AFIELD>
704 { // Dirac representation is assumed.
705  real_t *u = m_T.ptr(0);
706 
707  int ith, nth, is, ns;
708  set_threadtask(ith, nth, is, ns, m_Nstv);
709 
710 #pragma omp barrier
711 
712  for (int site = is; site < ns; ++site) {
713  Vsimd_t v2v[NVCD];
714  // clear_vec(v2v, NVCD);
715  load_vec(v2v, &v2[VLEN * NVCD * site], NVCD);
716 
717  Vsimd_t v1v[NVCD];
718  load_vec(v1v, &v1[VLEN * NVCD * site], NVCD);
719 
720  Vsimd_t ut[NDF], wt1[2], wt2[2];
721 
722  for (int jd = 0; jd < ND2; ++jd) {
723  for (int id = 0; id < ND; ++id) {
724  int ig = VLEN * NDF * (site + m_Nstv * (id + ND * jd));
725  load_vec(ut, &u[ig], NDF);
726  for (int ic = 0; ic < NC; ++ic) {
727  int ic2 = 2 * ic;
728  int id2 = (id + ND2) % ND;
729  mult_ctv(wt1, &ut[ic2], &v1v[2 * id], NC);
730  mult_ctv(wt2, &ut[ic2], &v1v[2 * id2], NC);
731  int icd1 = 2 * (jd + ND * ic);
732  int icd2 = 2 * (jd + ND2 + ND * ic);
733  axpy_vec(&v2v[icd1], real_t(1.0), wt1, 2);
734  axpy_vec(&v2v[icd2], real_t(1.0), wt2, 2);
735  }
736  }
737  }
738 
739  save_vec(&v2[VLEN * NVCD * site], v2v, NVCD);
740  }
741 
742 #pragma omp barrier
743 }
744 
745 
746 //====================================================================
747 template<typename AFIELD>
749 {
750  real_t *v2 = v.ptr(0);
751  real_t *v1 = const_cast<AFIELD *>(&w)->ptr(0);
752  real_t *up = m_U.ptr(0);
753  real_t *ct = m_T.ptr(0);
754 
755  int ith = ThreadManager::get_thread_id();
756 
757 #pragma omp barrier
758 
759  if (do_comm_any > 0) {
760  if (ith == 0) chset_recv.start();
761 
762  real_t *buf1_xp = (real_t *)chsend_dn[0].ptr();
763  real_t *buf1_xm = (real_t *)chsend_up[0].ptr();
764  real_t *buf1_yp = (real_t *)chsend_dn[1].ptr();
765  real_t *buf1_ym = (real_t *)chsend_up[1].ptr();
766  real_t *buf1_zp = (real_t *)chsend_dn[2].ptr();
767  real_t *buf1_zm = (real_t *)chsend_up[2].ptr();
768  real_t *buf1_tp = (real_t *)chsend_dn[3].ptr();
769  real_t *buf1_tm = (real_t *)chsend_up[3].ptr();
770 
771  BridgeQXS::mult_wilson_1_dirac(buf1_xp, buf1_xm, buf1_yp, buf1_ym,
772  buf1_zp, buf1_zm, buf1_tp, buf1_tm,
773  up, v1, &m_boundary[0], m_Nsize, do_comm);
774 
775 #pragma omp barrier
776 
777  if (ith == 0) chset_send.start();
778  }
779 
780 #ifdef CHIRAL_ROTATION
782  m_CKs, &m_boundary[0], m_Nsize, do_comm);
783 #else
784  BridgeQXS::mult_clover_bulk_dirac(v2, up, ct, v1,
785  m_CKs, &m_boundary[0], m_Nsize, do_comm);
786 #endif
787 
788  if (do_comm_any > 0) {
789  if (ith == 0) chset_recv.wait();
790 
791 #pragma omp barrier
792 
793  real_t *buf2_xp = (real_t *)chrecv_up[0].ptr();
794  real_t *buf2_xm = (real_t *)chrecv_dn[0].ptr();
795  real_t *buf2_yp = (real_t *)chrecv_up[1].ptr();
796  real_t *buf2_ym = (real_t *)chrecv_dn[1].ptr();
797  real_t *buf2_zp = (real_t *)chrecv_up[2].ptr();
798  real_t *buf2_zm = (real_t *)chrecv_dn[2].ptr();
799  real_t *buf2_tp = (real_t *)chrecv_up[3].ptr();
800  real_t *buf2_tm = (real_t *)chrecv_dn[3].ptr();
801 
803  buf2_xp, buf2_xm, buf2_yp, buf2_ym,
804  buf2_zp, buf2_zm, buf2_tp, buf2_tm,
805  m_CKs, &m_boundary[0], m_Nsize, do_comm);
806 
807  if (ith == 0) chset_send.wait();
808  }
809 
810 #pragma omp barrier
811 }
812 
813 
814 //====================================================================
815 template<typename AFIELD>
817 {
818  real_t *vp = v.ptr(0);
819  real_t *wp = const_cast<AFIELD *>(&w)->ptr(0);
820 
821  clear(vp);
822 
823  mult_xp(vp, wp);
824  mult_xm(vp, wp);
825  mult_yp(vp, wp);
826  mult_ym(vp, wp);
827  mult_zp(vp, wp);
828  mult_zm(vp, wp);
829  mult_tp(vp, wp);
830  mult_tm(vp, wp);
831 
832  mult_csw(vp, wp);
833 
834  aypx(-m_CKs, vp, wp);
835 
836 #pragma omp barrier
837 }
838 
839 
840 //====================================================================
841 template<typename AFIELD>
843 {
844  D(m_v2, w);
845  mult_gm5(v, m_v2);
846 }
847 
848 
849 //====================================================================
850 template<typename AFIELD>
852 {
853  int ith, nth, is, ns;
854  set_threadtask_mult(ith, nth, is, ns, m_Nstv);
855 
856  Vsimd_t vt[NVCD], wt[NVCD];
857 
858  for (int site = is; site < ns; ++site) {
859  load_vec(vt, &v[VLEN * NVCD * site], NVCD);
860  load_vec(wt, &w[VLEN * NVCD * site], NVCD);
861  aypx_vec(a, vt, wt, NVCD);
862  save_vec(&v[VLEN * NVCD * site], vt, NVCD);
863  }
864 }
865 
866 
867 //====================================================================
868 template<typename AFIELD>
870 {
871  int ith, nth, is, ns;
872  set_threadtask_mult(ith, nth, is, ns, m_Nstv);
873 
874  Vsimd_t vt[NVCD];
875  clear_vec(vt, NVCD);
876 
877  for (int site = is; site < ns; ++site) {
878  save_vec(&v[VLEN * NVCD * site], vt, NVCD);
879  }
880 }
881 
882 
883 //====================================================================
884 template<typename AFIELD>
886 {
887  int idir = 0;
888 
889  int ith, nth, is, ns;
890  set_threadtask_mult(ith, nth, is, ns, m_Nstv);
891 
892  Vsimd_t v2v[NVCD];
893 
894  real_t *buf1 = (real_t *)chsend_dn[0].ptr();
895  real_t *buf2 = (real_t *)chrecv_up[0].ptr();
896 
897  real_t *u = m_U.ptr(m_Ndf * m_Nst * idir);
898 
899 #pragma omp barrier
900 
901  if (do_comm[0] > 0) {
902  for (int site = is; site < ns; ++site) {
903  int ix = site % m_Nxv;
904  int iyzt = site / m_Nxv;
905  if (ix == 0) {
906  int ibf = VLENY * NVC * ND2 * iyzt;
907  mult_wilson_xp1(&buf1[ibf], &v1[VLEN * NVCD * site]);
908  }
909  }
910 
911 #pragma omp barrier
912 
913 #pragma omp master
914  {
915  chrecv_up[0].start();
916  chsend_dn[0].start();
917  chrecv_up[0].wait();
918  chsend_dn[0].wait();
919  }
920 #pragma omp barrier
921  } // if(do_comm[0] == 1)
922 
923  for (int site = is; site < ns; ++site) {
924  int ix = site % m_Nxv;
925  int iyzt = site / m_Nxv;
926 
927  Vsimd_t v2v[NVCD];
928  clear_vec(v2v, NVCD);
929 
930  real_t zL[VLEN * NVCD];
931 
932  if ((ix < m_Nxv - 1) || (do_comm[0] == 0)) {
933  int nei = ix + 1 + m_Nxv * iyzt;
934  if (ix == m_Nxv - 1) nei = 0 + m_Nxv * iyzt;
935  shift_vec2_xbw(zL, &v1[VLEN * NVCD * site], &v1[VLEN * NVCD * nei], NVCD);
936  mult_wilson_xpb(v2v, &u[VLEN * NDF * site], zL);
937  } else {
938  int ibf = VLENY * NVC * ND2 * iyzt;
939  shift_vec0_xbw(zL, &v1[VLEN * NVCD * site], NVCD);
940  mult_wilson_xpb(v2v, &u[VLEN * NDF * site], zL);
941  mult_wilson_xp2(v2v, &u[VLEN * NDF * site], &buf2[ibf]);
942  }
943 
944  add_vec(&v2[VLEN * NVCD * site], v2v, NVCD);
945  }
946 
947 #pragma omp barrier
948 }
949 
950 
951 //====================================================================
952 template<typename AFIELD>
954 {
955  int idir = 0;
956 
957  int ith, nth, is, ns;
958  set_threadtask_mult(ith, nth, is, ns, m_Nstv);
959 
960  Vsimd_t v2v[NVCD];
961 
962  real_t *buf1 = (real_t *)chsend_up[0].ptr();
963  real_t *buf2 = (real_t *)chrecv_dn[0].ptr();
964 
965  real_t *u = m_U.ptr(m_Ndf * m_Nst * idir);
966 
967 #pragma omp barrier
968 
969  if (do_comm[0] > 0) {
970  for (int site = is; site < ns; ++site) {
971  int ix = site % m_Nxv;
972  int iyzt = site / m_Nxv;
973  if (ix == m_Nxv - 1) {
974  int ibf = VLENY * NVC * ND2 * iyzt;
975  mult_wilson_xm1(&buf1[ibf], &u[VLEN * NDF * site],
976  &v1[VLEN * NVCD * site]);
977  }
978  }
979 
980 #pragma omp barrier
981 #pragma omp master
982  {
983  chrecv_dn[0].start();
984  chsend_up[0].start();
985  chrecv_dn[0].wait();
986  chsend_up[0].wait();
987  }
988 #pragma omp barrier
989  } // end of if(do_comm[0] > 0)
990 
991  for (int site = is; site < ns; ++site) {
992  int ix = site % m_Nxv;
993  int iyzt = site / m_Nxv;
994 
995  real_t zL[VLEN * NVCD];
996  real_t uL[VLEN * NDF];
997 
998  clear_vec(v2v, NVCD);
999 
1000  if ((ix > 0) || (do_comm[0] == 0)) {
1001  int nei = ix - 1 + m_Nxv * iyzt;
1002  if (ix == 0) nei = m_Nxv - 1 + m_Nxv * iyzt;
1003  shift_vec2_xfw(zL, &v1[VLEN * NVCD * site], &v1[VLEN * NVCD * nei], NVCD);
1004  shift_vec2_xfw(uL, &u[VLEN * NDF * site], &u[VLEN * NDF * nei], NDF);
1005  mult_wilson_xmb(v2v, uL, zL);
1006  } else {
1007  int ibf = VLENY * NVC * ND2 * iyzt;
1008  shift_vec0_xfw(zL, &v1[VLEN * NVCD * site], NVCD);
1009  shift_vec0_xfw(uL, &u[VLEN * NDF * site], NDF);
1010  mult_wilson_xmb(v2v, uL, zL);
1011  mult_wilson_xm2(v2v, &buf2[ibf]);
1012  }
1013 
1014  add_vec(&v2[VLEN * NVCD * site], v2v, NVCD);
1015  }
1016 
1017 #pragma omp barrier
1018 }
1019 
1020 
1021 //====================================================================
1022 template<typename AFIELD>
1024 {
1025  int idir = 1;
1026  int Nxy = m_Nxv * m_Nyv;
1027 
1028  int ith, nth, is, ns;
1029  set_threadtask_mult(ith, nth, is, ns, m_Nstv);
1030 
1031  real_t *buf1 = (real_t *)chsend_dn[1].ptr();
1032  real_t *buf2 = (real_t *)chrecv_up[1].ptr();
1033 
1034  real_t *u = m_U.ptr(m_Ndf * m_Nst * idir);
1035 
1036 #pragma omp barrier
1037 
1038  if (do_comm[1] > 0) {
1039  for (int site = is; site < ns; ++site) {
1040  int ix = site % m_Nxv;
1041  int iy = (site / m_Nxv) % m_Nyv;
1042  int izt = site / Nxy;
1043  if (iy == 0) {
1044  int ibf = VLENX * NVC * ND2 * (ix + m_Nxv * izt);
1045  mult_wilson_yp1(&buf1[ibf], &v1[VLEN * NVCD * site]);
1046  }
1047  }
1048 
1049 #pragma omp barrier
1050 
1051 #pragma omp master
1052  {
1053  chrecv_up[1].start();
1054  chsend_dn[1].start();
1055  chrecv_up[1].wait();
1056  chsend_dn[1].wait();
1057  }
1058 
1059 #pragma omp barrier
1060  } // end of if(do_comm[1] > 0)
1061 
1062  for (int site = is; site < ns; ++site) {
1063  int ix = site % m_Nxv;
1064  int iy = (site / m_Nxv) % m_Nyv;
1065  int izt = site / Nxy;
1066 
1067  Vsimd_t v2v[NVCD];
1068  clear_vec(v2v, NVCD);
1069 
1070  real_t zL[VLEN * NVCD];
1071 
1072  if ((iy < m_Nyv - 1) || (do_comm[1] == 0)) {
1073  int iy2 = (iy + 1) % m_Nyv;
1074  int nei = ix + m_Nxv * (iy2 + m_Nyv * izt);
1075  shift_vec2_ybw(zL, &v1[VLEN * NVCD * site], &v1[VLEN * NVCD * nei], NVCD);
1076  mult_wilson_ypb(v2v, &u[VLEN * NDF * site], zL);
1077  } else {
1078  int ibf = VLENX * NVC * ND2 * (ix + m_Nxv * izt);
1079  shift_vec0_ybw(zL, &v1[VLEN * NVCD * site], NVCD);
1080  mult_wilson_ypb(v2v, &u[VLEN * NDF * site], zL);
1081  mult_wilson_yp2(v2v, &u[VLEN * NDF * site], &buf2[ibf]);
1082  }
1083 
1084  add_vec(&v2[VLEN * NVCD * site], v2v, NVCD);
1085  }
1086 
1087 #pragma omp barrier
1088 }
1089 
1090 
1091 //====================================================================
1092 template<typename AFIELD>
1094 {
1095  int idir = 1;
1096  int Nxy = m_Nxv * m_Nyv;
1097 
1098  int ith, nth, is, ns;
1099  set_threadtask_mult(ith, nth, is, ns, m_Nstv);
1100 
1101  real_t *buf1 = (real_t *)chsend_up[1].ptr();
1102  real_t *buf2 = (real_t *)chrecv_dn[1].ptr();
1103 
1104  real_t *u = m_U.ptr(m_Ndf * m_Nst * idir);
1105 
1106 #pragma omp barrier
1107 
1108  if (do_comm[1] > 0) {
1109  for (int site = is; site < ns; ++site) {
1110  int ix = site % m_Nxv;
1111  int iy = (site / m_Nxv) % m_Nyv;
1112  int izt = site / Nxy;
1113  if (iy == m_Nyv - 1) {
1114  int ibf = VLENX * NVC * ND2 * (ix + m_Nxv * izt);
1115  mult_wilson_ym1(&buf1[ibf], &u[VLEN * NDF * site],
1116  &v1[VLEN * NVCD * site]);
1117  }
1118  }
1119 
1120 #pragma omp barrier
1121 
1122 #pragma omp master
1123  {
1124  chrecv_dn[1].start();
1125  chsend_up[1].start();
1126  chrecv_dn[1].wait();
1127  chsend_up[1].wait();
1128  }
1129 
1130 #pragma omp barrier
1131  }
1132 
1133  for (int site = is; site < ns; ++site) {
1134  int ix = site % m_Nxv;
1135  int iy = (site / m_Nxv) % m_Nyv;
1136  int izt = site / Nxy;
1137 
1138  Vsimd_t v2v[NVCD];
1139  clear_vec(v2v, NVCD);
1140 
1141  real_t zL[VLEN * NVCD];
1142  real_t uL[VLEN * NDF];
1143 
1144  if ((iy != 0) || (do_comm[idir] == 0)) {
1145  int iy2 = (iy - 1 + m_Nyv) % m_Nyv;
1146  int nei = ix + m_Nxv * (iy2 + m_Nyv * izt);
1147  shift_vec2_yfw(zL, &v1[VLEN * NVCD * site], &v1[VLEN * NVCD * nei], NVCD);
1148  shift_vec2_yfw(uL, &u[VLEN * NDF * site], &u[VLEN * NDF * nei], NDF);
1149  mult_wilson_ymb(v2v, uL, zL);
1150  } else {
1151  int ibf = VLENX * NVC * ND2 * (ix + m_Nxv * izt);
1152  shift_vec0_yfw(zL, &v1[VLEN * NVCD * site], NVCD);
1153  shift_vec0_yfw(uL, &u[VLEN * NDF * site], NDF);
1154  mult_wilson_ymb(v2v, uL, zL);
1155  mult_wilson_ym2(v2v, &buf2[ibf]);
1156  }
1157 
1158  add_vec(&v2[VLEN * NVCD * site], v2v, NVCD);
1159  }
1160 
1161 #pragma omp barrier
1162 }
1163 
1164 
1165 //====================================================================
1166 template<typename AFIELD>
1168 {
1169  int idir = 2;
1170  int Nxy = m_Nxv * m_Nyv;
1171 
1172  int ith, nth, is, ns;
1173  set_threadtask_mult(ith, nth, is, ns, m_Nstv);
1174 
1175  real_t *buf1 = (real_t *)chsend_dn[2].ptr();
1176  real_t *buf2 = (real_t *)chrecv_up[2].ptr();
1177 
1178  real_t *u = m_U.ptr(m_Ndf * m_Nst * idir);
1179 
1180 #pragma omp barrier
1181 
1182  if (do_comm[2] > 0) {
1183  for (int site = is; site < ns; ++site) {
1184  int ixy = site % Nxy;
1185  int iz = (site / Nxy) % m_Nz;
1186  int it = site / (Nxy * m_Nz);
1187  if (iz == 0) {
1188  int ibf = VLEN * NVC * ND2 * (ixy + Nxy * it);
1189  mult_wilson_zp1(&buf1[ibf], &v1[VLEN * NVCD * site]);
1190  }
1191  }
1192 
1193 #pragma omp barrier
1194 
1195 #pragma omp master
1196  {
1197  chrecv_up[2].start();
1198  chsend_dn[2].start();
1199  chrecv_up[2].wait();
1200  chsend_dn[2].wait();
1201  }
1202 
1203 #pragma omp barrier
1204  }
1205 
1206  for (int site = is; site < ns; ++site) {
1207  int ixy = site % Nxy;
1208  int iz = (site / Nxy) % m_Nz;
1209  int it = site / (Nxy * m_Nz);
1210 
1211  Vsimd_t v2v[NVCD];
1212  clear_vec(v2v, NVCD);
1213 
1214  if ((iz != m_Nz - 1) || (do_comm[2] == 0)) {
1215  int iz2 = (iz + 1) % m_Nz;
1216  int nei = ixy + Nxy * (iz2 + m_Nz * it);
1217  mult_wilson_zpb(v2v, &u[VLEN * NDF * site], &v1[VLEN * NVCD * nei]);
1218  } else {
1219  int ibf = VLEN * NVC * ND2 * (ixy + Nxy * it);
1220  mult_wilson_zp2(v2v, &u[VLEN * NDF * site], &buf2[ibf]);
1221  }
1222 
1223  add_vec(&v2[VLEN * NVCD * site], v2v, NVCD);
1224  }
1225 
1226 #pragma omp barrier
1227 }
1228 
1229 
1230 //====================================================================
1231 template<typename AFIELD>
1233 {
1234  int idir = 2;
1235  int Nxy = m_Nxv * m_Nyv;
1236 
1237  int ith, nth, is, ns;
1238  set_threadtask_mult(ith, nth, is, ns, m_Nstv);
1239 
1240  real_t *buf1 = (real_t *)chsend_up[2].ptr();
1241  real_t *buf2 = (real_t *)chrecv_dn[2].ptr();
1242 
1243  real_t *u = m_U.ptr(m_Ndf * m_Nst * idir);
1244 
1245 #pragma omp barrier
1246 
1247  if (do_comm[2] > 0) {
1248  for (int site = is; site < ns; ++site) {
1249  int ixy = site % Nxy;
1250  int iz = (site / Nxy) % m_Nz;
1251  int it = site / (Nxy * m_Nz);
1252  if (iz == m_Nz - 1) {
1253  int ibf = VLEN * NVC * ND2 * (ixy + Nxy * it);
1254  mult_wilson_zm1(&buf1[ibf], &u[VLEN * NDF * site],
1255  &v1[VLEN * NVCD * site]);
1256  }
1257  }
1258 
1259 #pragma omp barrier
1260 
1261 #pragma omp master
1262  {
1263  chrecv_dn[2].start();
1264  chsend_up[2].start();
1265  chrecv_dn[2].wait();
1266  chsend_up[2].wait();
1267  }
1268 
1269 #pragma omp barrier
1270  }
1271 
1272  for (int site = is; site < ns; ++site) {
1273  int ixy = site % Nxy;
1274  int iz = (site / Nxy) % m_Nz;
1275  int it = site / (Nxy * m_Nz);
1276 
1277  Vsimd_t v2v[NVCD];
1278  clear_vec(v2v, NVCD);
1279 
1280  if ((iz > 0) || (do_comm[2] == 0)) {
1281  int iz2 = (iz - 1 + m_Nz) % m_Nz;
1282  int nei = ixy + Nxy * (iz2 + m_Nz * it);
1283  mult_wilson_zmb(v2v, &u[VLEN * NDF * nei], &v1[VLEN * NVCD * nei]);
1284  } else {
1285  int ibf = VLEN * NVC * ND2 * (ixy + Nxy * it);
1286  mult_wilson_zm2(v2v, &buf2[ibf]);
1287  }
1288 
1289  add_vec(&v2[VLEN * NVCD * site], v2v, NVCD);
1290  }
1291 
1292 #pragma omp barrier
1293 }
1294 
1295 
1296 //====================================================================
1297 template<typename AFIELD>
1299 {
1300  int idir = 3;
1301  int Nxyz = m_Nxv * m_Nyv * m_Nz;
1302 
1303  int ith, nth, is, ns;
1304  set_threadtask_mult(ith, nth, is, ns, m_Nstv);
1305 
1306  real_t *buf1 = (real_t *)chsend_dn[3].ptr();
1307  real_t *buf2 = (real_t *)chrecv_up[3].ptr();
1308 
1309  real_t *u = m_U.ptr(m_Ndf * m_Nst * idir);
1310 
1311 #pragma omp barrier
1312 
1313  if (do_comm[3] > 0) {
1314  for (int site = is; site < ns; ++site) {
1315  int ixyz = site % Nxyz;
1316  int it = site / Nxyz;
1317  if (it == 0) {
1318  mult_wilson_tp1_dirac(&buf1[VLEN * NVC * ND2 * ixyz],
1319  &v1[VLEN * NVCD * site]);
1320  }
1321  }
1322 
1323 #pragma omp barrier
1324 
1325 #pragma omp master
1326  {
1327  chrecv_up[3].start();
1328  chsend_dn[3].start();
1329  chrecv_up[3].wait();
1330  chsend_dn[3].wait();
1331  }
1332 
1333 #pragma omp barrier
1334  }
1335 
1336  for (int site = is; site < ns; ++site) {
1337  int ixyz = site % Nxyz;
1338  int it = site / Nxyz;
1339 
1340  Vsimd_t v2v[NVCD];
1341  clear_vec(v2v, NVCD);
1342 
1343  if ((it < m_Nt - 1) || (do_comm[3] == 0)) {
1344  int it2 = (it + 1) % m_Nt;
1345  int nei = ixyz + Nxyz * it2;
1346  mult_wilson_tpb_dirac(v2v, &u[VLEN * NDF * site],
1347  &v1[VLEN * NVCD * nei]);
1348  } else {
1349  mult_wilson_tp2_dirac(v2v, &u[VLEN * NDF * site],
1350  &buf2[VLEN * NVC * ND2 * ixyz]);
1351  }
1352 
1353  add_vec(&v2[VLEN * NVCD * site], v2v, NVCD);
1354  }
1355 
1356 #pragma omp barrier
1357 }
1358 
1359 
1360 //====================================================================
1361 template<typename AFIELD>
1363 {
1364  int idir = 3;
1365  int Nxyz = m_Nxv * m_Nyv * m_Nz;
1366 
1367  int ith, nth, is, ns;
1368  set_threadtask_mult(ith, nth, is, ns, m_Nstv);
1369 
1370  real_t *buf1 = (real_t *)chsend_up[3].ptr();
1371  real_t *buf2 = (real_t *)chrecv_dn[3].ptr();
1372 
1373  real_t *u = m_U.ptr(m_Ndf * m_Nst * idir);
1374 
1375 #pragma omp barrier
1376 
1377  if (do_comm[3] > 0) {
1378  for (int site = is; site < ns; ++site) {
1379  int ixyz = site % Nxyz;
1380  int it = site / Nxyz;
1381  if (it == m_Nt - 1) {
1382  mult_wilson_tm1_dirac(&buf1[VLEN * NVC * ND2 * ixyz],
1383  &u[VLEN * NDF * site], &v1[VLEN * NVCD * site]);
1384  }
1385  }
1386 
1387 #pragma omp barrier
1388 
1389 #pragma omp master
1390  {
1391  chrecv_dn[3].start();
1392  chsend_up[3].start();
1393  chrecv_dn[3].wait();
1394  chsend_up[3].wait();
1395  }
1396 #pragma omp barrier
1397  }
1398 
1399  for (int site = is; site < ns; ++site) {
1400  int ixyz = site % Nxyz;
1401  int it = site / Nxyz;
1402 
1403  Vsimd_t v2v[NVCD];
1404  clear_vec(v2v, NVCD);
1405 
1406  if ((it > 0) || (do_comm[3] == 0)) {
1407  int it2 = (it - 1 + m_Nt) % m_Nt;
1408  int nei = ixyz + Nxyz * it2;
1409  mult_wilson_tmb_dirac(v2v, &u[VLEN * NDF * nei],
1410  &v1[VLEN * NVCD * nei]);
1411  } else {
1412  mult_wilson_tm2_dirac(v2v, &buf2[VLEN * NVC * ND2 * ixyz]);
1413  }
1414 
1415  add_vec(&v2[VLEN * NVCD * site], v2v, NVCD);
1416  }
1417 
1418 #pragma omp barrier
1419 }
1420 
1421 
1422 //====================================================================
1423 template<typename AFIELD>
1424 double AFopr_Clover<AFIELD>::flop_count(const std::string mode)
1425 {
1426  // The following counting explicitly depends on the implementation.
1427  // It will be recalculated when the code is modified.
1428 
1429  int Lvol = CommonParameters::Lvol();
1430  double flop_wilson, flop_clover, flop_site, flop;
1431 
1432  if (m_repr == "Dirac") {
1433  flop_wilson = static_cast<double>(
1434  m_Nc * m_Nd * (4 // aypx
1435  + 6 * (4 * m_Nc + 2) // spatial hopping
1436  + 2 * (4 * m_Nc + 1))); // temporal hopping
1437 
1438  // clover term mult assumes rotation to chiral repr.
1439  flop_clover = static_cast<double>(
1440  m_Nc * m_Nd * (2 // Dirac -> chiral
1441  + 2 * (2 * (m_Nc * m_Nd - 1) + 1) // clover term mult
1442  + 2 // chiral -> Dirac
1443  + 2)); // addition to vector
1444  } else if (m_repr == "Chiral") {
1445  flop_wilson = static_cast<double>(
1446  m_Nc * m_Nd * (4 + 8 * (4 * m_Nc + 2)));
1447 
1448  flop_clover = static_cast<double>(
1449  m_Nc * m_Nd * (2 * (2 * (m_Nc * m_Nd - 1) + 1) // clover term mult
1450  + 2)); // addition to vector
1451  } else {
1452  vout.crucial(m_vl, "%s: input repr is undefined.\n");
1453  exit(EXIT_FAILURE);
1454  }
1455 
1456  flop_site = flop_wilson + flop_clover;
1457 
1458  flop = flop_site * static_cast<double>(Lvol);
1459  if ((mode == "DdagD") || (mode == "DDdag")) flop *= 2.0;
1460 
1461  return flop;
1462 }
1463 
1464 
1465 //============================================================END=====
CommonParameters::Ny
static int Ny()
Definition: commonParameters.h:106
AFopr_Clover::mult_dag
void mult_dag(AFIELD &, const AFIELD &)
hermitian conjugate of mult.
Definition: afopr_Clover-tmpl.h:588
CommonParameters::Nz
static int Nz()
Definition: commonParameters.h:107
BridgeQXS::mult_wilson_gm5_dirac
void mult_wilson_gm5_dirac(double *v2, double *v1, int *Nsize)
Definition: mult_Wilson_qxs-inc.h:411
CommonParameters::Lvol
static long_t Lvol()
Definition: commonParameters.h:95
Parameters::set_string
void set_string(const string &key, const string &value)
Definition: parameters.cpp:39
BridgeQXS::mult_wilson_2_dirac
void mult_wilson_2_dirac(double *v2, double *up, double *v1, double *buf_xp, double *buf_xm, double *buf_yp, double *buf_ym, double *buf_zp, double *buf_zm, double *buf_tp, double *buf_tm, double kappa, int *bc, int *Nsize, int *do_comm)
Definition: mult_Wilson_qxs-inc.h:296
BridgeQXS::mult_wilson_1_dirac
void mult_wilson_1_dirac(double *buf_xp, double *buf_xm, double *buf_yp, double *buf_ym, double *buf_zp, double *buf_zm, double *buf_tp, double *buf_tm, double *up, double *v1, int *bc, int *Nsize, int *do_comm)
Definition: mult_Wilson_qxs-inc.h:153
AFopr_Clover::flop_count
double flop_count()
returns floating operation counts.
Definition: afopr_Clover.h:135
ThreadManager::get_num_threads
static int get_num_threads()
returns available number of threads.
Definition: threadManager.cpp:246
CommonParameters::Ndim
static int Ndim()
Definition: commonParameters.h:117
Parameters
Class for parameters.
Definition: parameters.h:46
AIndex_lex
Definition: aindex_lex_base.h:17
AFopr_Clover::clear
void clear(real_t *)
Definition: afopr_Clover-tmpl.h:869
Parameters::set_double
void set_double(const string &key, const double value)
Definition: parameters.cpp:33
Bridge::BridgeIO::decrease_indent
void decrease_indent()
Definition: bridgeIO.h:86
NVCD
#define NVCD
Definition: define_params_SU3.h:20
AFopr_Clover::Ddag
void Ddag(AFIELD &, const AFIELD &)
Definition: afopr_Clover-tmpl.h:627
Bridge::BridgeIO::increase_indent
void increase_indent()
Definition: bridgeIO.h:85
VLEN
#define VLEN
Definition: bridgeQXS_Clover_coarse_double.cpp:12
Bridge::BridgeIO::detailed
void detailed(const char *format,...)
Definition: bridgeIO.cpp:219
AFopr_Clover::mult_csw
void mult_csw(real_t *, real_t *)
set_csw now assumes Dirac repr.
Definition: afopr_Clover-tmpl.h:703
AFopr_Clover::aypx
void aypx(real_t, real_t *, real_t *)
Definition: afopr_Clover-tmpl.h:851
NDF
#define NDF
Definition: field_F_imp_SU2-inc.h:4
Fopr_CloverTerm
Org::Fopr_CloverTerm Fopr_CloverTerm
Clover term operator.
Definition: fopr_CloverTerm.h:58
CommonParameters::Nvol
static int Nvol()
Definition: commonParameters.h:109
AFopr_Clover::setup_channels
void setup_channels()
setup channels for communication.
Definition: afopr_Clover-tmpl.h:137
AFopr_Clover::D
void D(AFIELD &, const AFIELD &)
Definition: afopr_Clover-tmpl.h:607
AFopr_Clover::mult_up
void mult_up(int mu, AFIELD &, const AFIELD &)
upward nearest neighbor hopping term.
Definition: afopr_Clover-tmpl.h:502
convert_gauge
void convert_gauge(INDEX &index, FIELD &v, const Field &w)
Definition: afield-inc.h:224
aypx
void aypx(const double a, Field &y, const Field &x)
aypx(y, a, x): y := a * y + x
Definition: field.cpp:509
Vsimd_t
Definition: vsimd_double-inc.h:13
AFopr_Clover::mult_xm
void mult_xm(real_t *, real_t *)
Definition: afopr_Clover-tmpl.h:953
AFopr_Clover::mult_gm4
void mult_gm4(AFIELD &, const AFIELD &)
Definition: afopr_Clover-tmpl.h:670
real_t
double real_t
Definition: bridgeQXS_Clover_coarse_double.cpp:16
AFopr_Clover::set_config_omp
void set_config_omp(Field *u)
setting gauge configuration (setting omp parallel).
Definition: afopr_Clover-tmpl.h:273
CommonParameters::Nx
static int Nx()
Definition: commonParameters.h:105
AFopr_Clover::real_t
AFIELD::real_t real_t
Definition: afopr_Clover.h:44
AFopr_Clover::get_mode
std::string get_mode() const
returns mult mode.
Definition: afopr_Clover-tmpl.h:561
AFopr_Clover::mult_zp
void mult_zp(real_t *, real_t *)
Definition: afopr_Clover-tmpl.h:1167
AFopr_Clover::mult_gm5
void mult_gm5(AFIELD &, const AFIELD &)
multiplies gamma_5 matrix.
Definition: afopr_Clover-tmpl.h:637
AFopr_Clover::set_mode
void set_mode(std::string mode)
setting mult mode.
Definition: afopr_Clover-tmpl.h:548
AFopr_Clover::mult_D_alt
void mult_D_alt(AFIELD &, const AFIELD &)
D mult using mult_xp, etc.
Definition: afopr_Clover-tmpl.h:816
CommonParameters::Nc
static int Nc()
Definition: commonParameters.h:115
NC
#define NC
Definition: field_F_imp_SU2-inc.h:2
AFopr_Clover::init
void init(const Parameters &params)
initial setup.
Definition: afopr_Clover-tmpl.h:17
QXS_Gauge::set_boundary
void set_boundary(AField< REALTYPE, QXS > &ulex, const std::vector< int > &boundary)
Definition: afield_Gauge-inc.h:24
reverse_spinor
void reverse_spinor(INDEX &index, Field &v, FIELD &w)
Definition: afield-inc.h:380
CommonParameters::Nt
static int Nt()
Definition: commonParameters.h:108
Parameters::fetch_int_vector
int fetch_int_vector(const string &key, vector< int > &value) const
Definition: parameters.cpp:429
Communicator::npe
static int npe(const int dir)
logical grid extent
Definition: communicator.cpp:112
BridgeQXS::mult_clover_bulk_dirac
void mult_clover_bulk_dirac(double *v2, double *up, double *ct, double *v1, double kappa, int *bc, int *Nsize, int *do_comm)
Definition: mult_Clover_qxs-inc.h:16
AIndex_eo_qxs::idx
int idx(const int in, const int Nin, const int ist, const int Nx2, const int Ny, const int leo, const int Nvol2, const int ex)
Definition: aindex_eo.h:27
AFopr_Clover::mult_xp
void mult_xp(real_t *, real_t *)
Definition: afopr_Clover-tmpl.h:885
AFopr_Clover::mult_ym
void mult_ym(real_t *, real_t *)
Definition: afopr_Clover-tmpl.h:1093
threadManager.h
ND
#define ND
Definition: field_F_imp_SU2-inc.h:5
AFopr_Clover::set_parameters
void set_parameters(const Parameters &params)
setting parameters by a Parameter object.
Definition: afopr_Clover-tmpl.h:181
AFopr_Clover::tidyup
void tidyup()
final tidy-up.
Definition: afopr_Clover-tmpl.h:171
Parameters::set_int_vector
void set_int_vector(const string &key, const vector< int > &value)
Definition: parameters.cpp:45
AFopr_Clover::mult_tp
void mult_tp(real_t *, real_t *)
Definition: afopr_Clover-tmpl.h:1298
AFopr_Clover::H
void H(AFIELD &, const AFIELD &)
Definition: afopr_Clover-tmpl.h:842
BridgeQXS::mult_clover_bulk_dirac_chrot
void mult_clover_bulk_dirac_chrot(double *v2, double *up, double *ct, double *v1, double kappa, int *bc, int *Nsize, int *do_comm)
Definition: mult_Clover_qxs-inc.h:152
AFopr_Clover::mult_D
void mult_D(AFIELD &, const AFIELD &)
standard D mult.
Definition: afopr_Clover-tmpl.h:748
AFopr_Clover::set_csw
void set_csw()
set_csw now assumes Dirac repr.
Definition: afopr_Clover-tmpl.h:312
AFopr_Clover::set_config
void set_config(Field *u)
setting gauge configuration (common interface).
Definition: afopr_Clover-tmpl.h:254
AFopr_Clover::mult_zm
void mult_zm(real_t *, real_t *)
Definition: afopr_Clover-tmpl.h:1232
VLENY
#define VLENY
Definition: bridgeQXS_Clover_coarse_double.cpp:14
Field::ptr
const double * ptr(const int jin, const int site, const int jex) const
Definition: field.h:153
CommonParameters::Nd
static int Nd()
Definition: commonParameters.h:116
AFopr_Clover::set_csw_chrot
void set_csw_chrot()
set_csw with rotation to chiral repr.
Definition: afopr_Clover-tmpl.h:401
NVC
#define NVC
Definition: fopr_Wilson_impl_SU2-inc.h:15
CommonParameters::Vlevel
static Bridge::VerboseLevel Vlevel()
Definition: commonParameters.h:122
Bridge::BridgeIO::set_verbose_level
static VerboseLevel set_verbose_level(const std::string &str)
Definition: bridgeIO.cpp:133
VLENX
#define VLENX
Definition: bridgeQXS_Clover_coarse_double.cpp:13
AFopr_Clover::get_parameters
void get_parameters(Parameters &params) const
get parameters via a Parameter object
Definition: afopr_Clover-tmpl.h:241
AFopr_Clover::set_config_impl
void set_config_impl(Field *u)
setting gauge configuration (implementation).
Definition: afopr_Clover-tmpl.h:286
scal
void scal(Field &x, const double a)
scal(x, a): x = a * x
Definition: field.cpp:261
Parameters::fetch_string
int fetch_string(const string &key, string &value) const
Definition: parameters.cpp:378
Parameters::fetch_double
int fetch_double(const string &key, double &value) const
Definition: parameters.cpp:327
AFopr_Clover
Definition: afopr_Clover.h:41
Bridge::BridgeIO::crucial
void crucial(const char *format,...)
Definition: bridgeIO.cpp:180
Field
Container of Field-type object.
Definition: field.h:46
AFopr_Clover::mult_tm
void mult_tm(real_t *, real_t *)
Definition: afopr_Clover-tmpl.h:1362
AFopr_Clover::convert
void convert(AFIELD &v, const Field &w)
convert of spinor field.
Definition: afopr_Clover-tmpl.h:480
ThreadManager::get_thread_id
static int get_thread_id()
returns thread id.
Definition: threadManager.cpp:253
AFopr_Clover::mult_dn
void mult_dn(int mu, AFIELD &, const AFIELD &)
downward nearest neighbor hopping term.
Definition: afopr_Clover-tmpl.h:525
ND2
#define ND2
Definition: define_params_SU3.h:18
AFopr_Clover::reverse
void reverse(Field &v, const AFIELD &w)
reverse of spinor field.
Definition: afopr_Clover-tmpl.h:491
AFopr_Clover::DdagD
void DdagD(AFIELD &, const AFIELD &)
Definition: afopr_Clover-tmpl.h:616
Bridge::BridgeIO::general
void general(const char *format,...)
Definition: bridgeIO.cpp:200
AFopr_Clover::mult_yp
void mult_yp(real_t *, real_t *)
Definition: afopr_Clover-tmpl.h:1023
AFopr_Clover::mult
void mult(AFIELD &, const AFIELD &)
multiplies fermion operator to a given field.
Definition: afopr_Clover-tmpl.h:569
ThreadManager::assert_single_thread
static void assert_single_thread(const std::string &class_name)
assert currently running on single thread.
Definition: threadManager.cpp:372
Bridge::vout
BridgeIO vout
Definition: bridgeIO.cpp:512
Bridge::BridgeIO::get_verbose_level
static std::string get_verbose_level(const VerboseLevel vl)
Definition: bridgeIO.cpp:154
convert_spinor
void convert_spinor(INDEX &index, FIELD &v, const Field &w)
Definition: afield-inc.h:187