Bridge++  Ver. 2.0.2
afopr_Clover_QWS_dd-tmpl.h
Go to the documentation of this file.
1 
11 
12 #ifdef USE_QWSLIB
14 #endif
15 
16 template<typename AFIELD>
17 const std::string AFopr_Clover_QWS_dd<AFIELD>::class_name = "AFopr_Clover_QWS_dd";
18 
19 
20 //====================================================================
21 template<typename AFIELD>
23 {
25 
26  m_repr = "Dirac"; // now only the Dirac repr is available.
27 
28  //int req_comm = 1; // set 1 if communication forced any time
29  int req_comm = 0; // set 0 if communication called in necessary
30 
31  m_Nc = CommonParameters::Nc();
32  m_Nd = CommonParameters::Nd();
33  m_Nvc = 2 * m_Nc;
34  m_Ndf = 2 * m_Nc * m_Nc;
35 
36  m_Nx = CommonParameters::Nx();
37  m_Ny = CommonParameters::Ny();
38  m_Nz = CommonParameters::Nz();
39  m_Nt = CommonParameters::Nt();
40  m_Nst = CommonParameters::Nvol();
41  m_Ndim = CommonParameters::Ndim();
42 
43  m_Nxv = m_Nx / VLENX;
44  m_Nyv = m_Ny / VLENY;
45  m_Nstv = m_Nst / VLEN;
46 
47  m_Nsize[0] = m_Nxv;
48  m_Nsize[1] = m_Nyv;
49  m_Nsize[2] = m_Nz;
50  m_Nsize[3] = m_Nt;
51 
52  do_comm_any = 0;
53  for (int mu = 0; mu < m_Ndim; ++mu) {
54  do_comm[mu] = 1;
55  if ((req_comm == 0) && (Communicator::npe(mu) == 1)) do_comm[mu] = 0;
56  do_comm_any += do_comm[mu];
57  vout.general(" do_comm[%d] = %d\n", mu, do_comm[mu]);
58  }
59 
60  m_bdsize.resize(m_Ndim);
61  int Nd2 = m_Nd / 2;
62  m_bdsize[0] = m_Nvc * Nd2 * m_Ny * m_Nz * m_Nt;
63  m_bdsize[1] = m_Nvc * Nd2 * m_Nx * m_Nz * m_Nt;
64  m_bdsize[2] = m_Nvc * Nd2 * m_Nx * m_Ny * m_Nt;
65  m_bdsize[3] = m_Nvc * Nd2 * m_Nx * m_Ny * m_Nz;
66 
67  setup_channels();
68 
69  m_fopr_csw = new Fopr_CloverTerm(m_repr);
70  m_fopr_csw_chiral = new Fopr_CloverTerm("Chiral");
71 
72  // gauge configuration.
73  m_U.reset(NDF, m_Nst, m_Ndim);
74 
75  // gauge configuration with block condition.
76  m_Ublock.reset(NDF, m_Nst, m_Ndim);
77 
78  // working vectors.
79  int NinF = 2 * m_Nc * m_Nd;
80  m_v1.reset(NinF, m_Nst, 1);
81  m_v2.reset(NinF, m_Nst, 1);
82 
83  int Ndm2 = m_Nd * m_Nd / 2;
84  m_T.reset(NDF, m_Nst, Ndm2);
85 
86  m_T_qws.reset(72, m_Nst, 1); // 72 = 2 * (Nc*Nd/2)^2
87 }
88 
89 
90 //====================================================================
91 template<typename AFIELD>
93 {
94  chsend_up.resize(m_Ndim);
95  chrecv_up.resize(m_Ndim);
96  chsend_dn.resize(m_Ndim);
97  chrecv_dn.resize(m_Ndim);
98 
99  for (int mu = 0; mu < m_Ndim; ++mu) {
100  size_t Nvsize = m_bdsize[mu] * sizeof(real_t);
101 
102  chsend_dn[mu].send_init(Nvsize, mu, -1);
103  chsend_up[mu].send_init(Nvsize, mu, 1);
104 #ifdef USE_MPI
105  chrecv_up[mu].recv_init(Nvsize, mu, 1);
106  chrecv_dn[mu].recv_init(Nvsize, mu, -1);
107 #else
108  void *buf_up = (void *)chsend_dn[mu].ptr();
109  chrecv_up[mu].recv_init(Nvsize, mu, 1, buf_up);
110  void *buf_dn = (void *)chsend_up[mu].ptr();
111  chrecv_dn[mu].recv_init(Nvsize, mu, -1, buf_dn);
112 #endif
113 
114  if (do_comm[mu] == 1) {
115  chset_send.append(chsend_up[mu]);
116  chset_send.append(chsend_dn[mu]);
117  chset_recv.append(chrecv_up[mu]);
118  chset_recv.append(chrecv_dn[mu]);
119  }
120  }
121 }
122 
123 
124 //====================================================================
125 template<typename AFIELD>
127 {
129 #ifdef USE_QWSLIB
130  // QWS_lib::tidyup_qws();
131 #endif
132  delete m_fopr_csw;
133  delete m_fopr_csw_chiral;
134 }
135 
136 
137 //====================================================================
138 template<typename AFIELD>
140 {
141  const string str_vlevel = params.get_string("verbose_level");
142  m_vl = vout.set_verbose_level(str_vlevel);
143 
144  //- fetch and check input parameters
145  double kappa;
146  std::vector<int> bc;
147  std::vector<int> block_size;
148 
149  int err = 0;
150  err += params.fetch_double("hopping_parameter", kappa);
151  err += params.fetch_int_vector("boundary_condition", bc);
152  err += params.fetch_int_vector("block_size", block_size);
153  if (err) {
154  vout.crucial(m_vl, "Error at %s: input parameter not found.\n",
155  class_name.c_str());
156  exit(EXIT_FAILURE);
157  }
158 
159  set_parameters(real_t(kappa), bc, block_size);
160 
161  Parameters params_csw_chiral = params;
162  params_csw_chiral.set_string("gamma_matrix_type", "Chiral");
163  m_fopr_csw_chiral->set_parameters(params_csw_chiral);
164 
165 #ifdef CHIRAL_ROTATION
166  // nothing to do for the other rep.
167 #else
168  m_fopr_csw->set_parameters(params);
169 #endif
170 }
171 
172 
173 //====================================================================
174 template<typename AFIELD>
176  const real_t CKs,
177  const std::vector<int> bc,
178  const std::vector<int> block_size)
179 {
180  // ThreadManager::assert_single_thread(class_name);
181  int ith = ThreadManager::get_thread_id();
182 #pragma omp barrier
183 
184  if (ith == 0) {
185  m_CKs = CKs;
186 
187  assert(bc.size() == m_Ndim);
188  m_boundary.resize(m_Ndim);
189  m_block_size.resize(m_Ndim);
190  for (int mu = 0; mu < m_Ndim; ++mu) {
191  m_boundary[mu] = bc[mu];
192  m_block_size[mu] = block_size[mu];
193  }
194  }
195 #pragma omp barrier
196 
197  //- print input parameters
198  vout.general(m_vl, "Parameters of %s:\n", class_name.c_str());
199  vout.general(m_vl, " CKs = %8.4f\n", m_CKs);
200  for (int mu = 0; mu < m_Ndim; ++mu) {
201  vout.general(m_vl, " boundary[%d] = %2d\n", mu, m_boundary[mu]);
202  }
203  for (int mu = 0; mu < m_Ndim; ++mu) {
204  vout.general(m_vl, " block_size[%d] = %2d\n", mu, m_block_size[mu]);
205  }
206 
207  int rem = m_Nx % m_block_size[0]
208  + m_Ny % m_block_size[1]
209  + m_Nz % m_block_size[2]
210  + m_Nt % m_block_size[3];
211  if (rem != 0) {
212  vout.crucial(m_vl, "%s: block_size is irelevant.\n",
213  class_name.c_str());
214  exit(EXIT_FAILURE);
215  }
216 
217  if (ith == 0) {
218  // note that m_block_sizev is simple array
219  m_block_sizev[0] = m_block_size[0] / VLENX;
220  m_block_sizev[1] = m_block_size[1] / VLENY;
221  m_block_sizev[2] = m_block_size[2];
222  m_block_sizev[3] = m_block_size[3];
223  if ((m_block_sizev[0] * VLENX != m_block_size[0]) ||
224  (m_block_sizev[1] * VLENY != m_block_size[1])) {
225  vout.crucial(m_vl, "%s: bad blocksize in XY: must be divided by VLENX=%d, VLENY=%d but are %d, %d\n", class_name.c_str(), VLENX, VLENY, m_block_size[0], m_block_size[1]);
226  exit(EXIT_FAILURE);
227  }
228 
229  int NBx = m_Nx / m_block_size[0];
230  int NBy = m_Ny / m_block_size[1];
231  int NBz = m_Nz / m_block_size[2];
232  int NBt = m_Nt / m_block_size[3];
233  int ipex = Communicator::ipe(0);
234  int ipey = Communicator::ipe(1);
235  int ipez = Communicator::ipe(2);
236  int ipet = Communicator::ipe(3);
237  m_Ieo = (NBx * ipex + NBy * ipey + NBz * ipez + NBt * ipet) % 2;
238  }
239 
240 #ifdef USE_QWSLIB
241  QWS_lib::init_qws(&m_boundary[0]);
242 #endif
243 
244 #pragma omp barrier
245 }
246 
247 
248 //====================================================================
249 template<typename AFIELD>
251 {
253 
254  m_conf = u;
255 
256  m_fopr_csw->set_config(u);
257  m_fopr_csw_chiral->set_config(u);
258 
259 
260 #pragma omp parallel
261  {
263  convert_gauge(index, m_U, *u);
264 
265 #ifdef USE_QWSLIB
266 #pragma omp barrier
267 #pragma omp master
268  {
269  // the configuration with fermion boundary condition must
270  // be set only for spatial component: temporal boundary
271  // condition is incorporated in the library.
272  int num_reconst = 0;
273  std::string str_reconst;
274 
275  if (sizeof(real_t) == 4) {
276  num_reconst = SU3_RECONSTRUCT_S;
277  str_reconst = "SU3_RECONSTRUCT_S";
278  } else if (sizeof(real_t) == 8) {
279  num_reconst = SU3_RECONSTRUCT_D;
280  str_reconst = "SU3_RECONSTRUCT_D";
281  }
282  for (int mu = 0; mu < m_Ndim - 1; ++mu) {
283  if (m_boundary[mu] != 1) {
284  if (num_reconst == 18) {
285  set_boundary_config(m_U, mu);
286  } else {
287  vout.crucial("%s: QWS is not available for\n", class_name.c_str());
288  vout.crucial(" boundary[%d] = %d\n", mu, m_boundary[mu]);
289  vout.crucial(" %s = %d\n", str_reconst.c_str(), num_reconst);
290  exit(EXIT_FAILURE);
291  }
292  }
293  }
294  } // omp master
295 #pragma omp barrier
296 
297  double kappa = (double)m_CKs;
298  if (sizeof(real_t) == 8) {
299  qws_loadg_dd_bridge_((double *)m_U.ptr(0), &kappa);
300  } else if (sizeof(real_t) == 4) {
301  qws_loadgs_dd_bridge_((float *)m_U.ptr(0), &kappa);
302  }
303 #pragma omp barrier
304 #pragma omp master
305  {
306  // now temporal boundary condition is set for Bridge++ code.
307  int mu = m_Ndim - 1;
308  if (m_boundary[mu] != 1) {
309  set_boundary_config(m_U, mu);
310  }
311  }
312 #else // USE_QWSLIB
313 #pragma omp barrier
314  set_boundary();
315 #endif
316 
317 #pragma omp barrier
318  copy(m_Ublock, m_U);
319 #pragma omp barrier
320  set_block_config(m_Ublock);
321  } // omp parallel
322 
323 #ifdef CHIRAL_ROTATION
324  set_csw_chrot(); // omp parallelized inside set_csw().
325 #else
326  set_csw(); // omp parallelized inside set_csw().
327 #endif
328 }
329 
330 
331 //====================================================================
332 template<typename AFIELD>
334  const int mu)
335 {
336  int ipe[4], Nsize[4], Lsize[4], j[4];
338 
339  for (int i = 0; i < m_Ndim; ++i) {
340  ipe[i] = Communicator::ipe(i);
341  Nsize[i] = CommonParameters::Nsize(i);
342  Lsize[i] = CommonParameters::Lsize(i);
343  }
344 
345  for (int t = 0; t < m_Nt; ++t) {
346  j[3] = ipe[3] * Nsize[3] + t;
347  for (int z = 0; z < m_Nz; ++z) {
348  j[2] = ipe[2] * Nsize[2] + z;
349  for (int y = 0; y < m_Ny; ++y) {
350  j[1] = ipe[1] * Nsize[1] + y;
351  for (int x = 0; x < m_Nx; ++x) {
352  j[0] = ipe[0] * Nsize[0] + x;
353  int site = x + m_Nx * (y + m_Ny * (z + m_Nz * t));
354 
355  if (j[mu] == Lsize[mu] - 1) {
356  double bc = (double)(m_boundary[mu]);
357  for (int in = 0; in < m_Ndf; ++in) {
358  int i = index_lex.idx_G(in, site, mu);
359  double uv = bc * U.cmp(i);
360  U.set(i, uv);
361  }
362  }
363  }
364  }
365  }
366  }
367 }
368 
369 
370 //====================================================================
371 template<typename AFIELD>
373 {
375 
376  int mu = 0;
377  int ipex = Communicator::ipe(mu);
378  int npex = Communicator::npe(mu);
379 
380  if ((m_boundary[mu] != 1) && (ipex == npex - 1)) {
381  real_t bc = real_t(m_boundary[mu]);
382  int Nyzt = m_Ny * m_Nz * m_Nt;
383 
384  int ith, nth, is, ns;
385  set_threadtask_mult(ith, nth, is, ns, Nyzt);
386 
387  for (int iyzt = is; iyzt < ns; ++iyzt) {
388  int site = m_Nx - 1 + m_Nx * iyzt;
389  for (int in = 0; in < NDF; ++in) {
390  int i = index.idx_G(in, site, mu);
391  real_t uv = m_U.cmp(i);
392  uv = uv * bc;
393  m_U.set(i, uv);
394  }
395  }
396  }
397 
398  mu = 1;
399  int ipey = Communicator::ipe(mu);
400  int npey = Communicator::npe(mu);
401 
402  if ((m_boundary[mu] != 1) && (ipey == npey - 1)) {
403  real_t bc = real_t(m_boundary[mu]);
404  int Nxzt = m_Nx * m_Nz * m_Nt;
405 
406  int ith, nth, is, ns;
407  set_threadtask_mult(ith, nth, is, ns, Nxzt);
408 
409  for (int ixzt = is; ixzt < ns; ++ixzt) {
410  int ix = ixzt % m_Nx;
411  int izt = ixzt / m_Nx;
412  int site = ix + m_Nx * (m_Ny - 1 + m_Ny * izt);
413  for (int in = 0; in < NDF; ++in) {
414  int i = index.idx_G(in, site, mu);
415  real_t uv = m_U.cmp(i);
416  uv = uv * bc;
417  m_U.set(i, uv);
418  }
419  }
420  }
421 
422  mu = 2;
423  int ipez = Communicator::ipe(mu);
424  int npez = Communicator::npe(mu);
425 
426  if ((m_boundary[mu] != 1) && (ipez == npez - 1)) {
427  real_t bc = real_t(m_boundary[mu]);
428  int Nxy = m_Nx * m_Ny;
429  int Nxyt = Nxy * m_Nt;
430 
431  int ith, nth, is, ns;
432  set_threadtask_mult(ith, nth, is, ns, Nxyt);
433 
434  for (int ixyt = is; ixyt < ns; ++ixyt) {
435  int ixy = ixyt % Nxy;
436  int it = ixyt / Nxy;
437  int site = ixy + Nxy * (m_Nz - 1 + m_Nz * it);
438  for (int in = 0; in < NDF; ++in) {
439  int i = index.idx_G(in, site, mu);
440  real_t uv = m_U.cmp(i);
441  uv = uv * bc;
442  m_U.set(i, uv);
443  }
444  }
445  }
446 
447  mu = 3;
448  int ipet = Communicator::ipe(mu);
449  int npet = Communicator::npe(mu);
450 
451  if ((m_boundary[mu] != 1) && (ipet == npet - 1)) {
452  real_t bc = real_t(m_boundary[mu]);
453  int Nxyz = m_Nx * m_Ny * m_Nz;
454 
455  int ith, nth, is, ns;
456  set_threadtask_mult(ith, nth, is, ns, Nxyz);
457 
458  for (int ixyz = is; ixyz < ns; ++ixyz) {
459  int site = ixyz + Nxyz * (m_Nt - 1);
460  for (int in = 0; in < NDF; ++in) {
461  int i = index.idx_G(in, site, mu);
462  real_t uv = m_U.cmp(i);
463  uv = uv * bc;
464  m_U.set(i, uv);
465  }
466  }
467  }
468 }
469 
470 
471 //====================================================================
472 template<typename AFIELD>
474 {
476 
477  int ith, nth, is, ns;
478  set_threadtask_mult(ith, nth, is, ns, m_Nst);
479 
480  for (int site = is; site < ns; ++site) {
481  int ix = site % m_Nx;
482  int iy = (site / m_Nx) % m_Ny;
483  int iz = (site / (m_Nx * m_Ny)) % m_Nz;
484  int it = site / (m_Nx * m_Ny * m_Nz);
485 
486  int mu = 0;
487  if ((ix + 1) % m_block_size[mu] == 0) {
488  for (int in = 0; in < NDF; ++in) {
489  int i = index.idx_G(in, site, mu);
490  U.set(i, real_t(0.0));
491  }
492  }
493 
494  mu = 1;
495  if ((iy + 1) % m_block_size[mu] == 0) {
496  for (int in = 0; in < NDF; ++in) {
497  int i = index.idx_G(in, site, mu);
498  U.set(i, real_t(0.0));
499  }
500  }
501 
502  mu = 2;
503  if ((iz + 1) % m_block_size[mu] == 0) {
504  for (int in = 0; in < NDF; ++in) {
505  int i = index.idx_G(in, site, mu);
506  U.set(i, real_t(0.0));
507  }
508  }
509 
510  mu = 3;
511  if ((it + 1) % m_block_size[mu] == 0) {
512  for (int in = 0; in < NDF; ++in) {
513  int i = index.idx_G(in, site, mu);
514  U.set(i, real_t(0.0));
515  }
516  }
517  }
518 }
519 
520 
521 //====================================================================
522 template<typename AFIELD>
524 {
525  // The Dirac representation is assumed.
526  // Current implementation makes use of the corelib Fopr_CloverTerm
527  // that requires change of the gamma-matrix convention from
528  // Bridge++ to QWS (BQCD) by multiplying gamma_4 before and after
529  // m_fopr_csw->mult().
530  // For numerical efficiency, proper implementation is necessaty.
531  // [08 Mar 2021 H.Matsufuru]
532 
533  Field_F v(m_Nst, 1), w(m_Nst, 1);
534 
536 
537  m_fopr_csw->set_mode("D");
538 
539  int ith, nth, is, ns;
540  set_threadtask_mult(ith, nth, is, ns, m_Nst);
541 
542 #pragma omp parallel
543  {
544  for (int id = 0; id < m_Nd / 2; ++id) {
545  for (int ic = 0; ic < m_Nc; ++ic) {
546  w.set(0.0);
547  for (int site = is; site < ns; ++site) {
548  w.set_r(ic, id, site, 0, 1.0);
549  }
550 #pragma omp barrier
551 
552  m_fopr_csw->mult(v, w);
553 #pragma omp barrier
554 
555  for (int site = is; site < ns; ++site) {
556  for (int ic2 = 0; ic2 < m_Nc; ++ic2) {
557  real_t vt_r = v.cmp_r(ic2, 0, site, 0);
558  real_t vt_i = v.cmp_i(ic2, 0, site, 0);
559  int idx_r = index.idx_Gr(ic, ic2, site, id + 0);
560  int idx_i = index.idx_Gi(ic, ic2, site, id + 0);
561  m_T.set(idx_r, vt_r);
562  m_T.set(idx_i, vt_i);
563  }
564 
565  for (int ic2 = 0; ic2 < m_Nc; ++ic2) {
566  real_t vt_r = v.cmp_r(ic2, 1, site, 0);
567  real_t vt_i = v.cmp_i(ic2, 1, site, 0);
568  int idx_r = index.idx_Gr(ic, ic2, site, id + 4);
569  int idx_i = index.idx_Gi(ic, ic2, site, id + 4);
570  m_T.set(idx_r, vt_r);
571  m_T.set(idx_i, vt_i);
572  }
573 
574  for (int ic2 = 0; ic2 < m_Nc; ++ic2) {
575  real_t vt_r = -v.cmp_r(ic2, 2, site, 0);
576  real_t vt_i = -v.cmp_i(ic2, 2, site, 0);
577  int idx_r = index.idx_Gr(ic, ic2, site, id + 2);
578  int idx_i = index.idx_Gi(ic, ic2, site, id + 2);
579  m_T.set(idx_r, vt_r);
580  m_T.set(idx_i, vt_i);
581  }
582 
583  for (int ic2 = 0; ic2 < m_Nc; ++ic2) {
584  real_t vt_r = -v.cmp_r(ic2, 3, site, 0);
585  real_t vt_i = -v.cmp_i(ic2, 3, site, 0);
586  int idx_r = index.idx_Gr(ic, ic2, site, id + 6);
587  int idx_i = index.idx_Gi(ic, ic2, site, id + 6);
588  m_T.set(idx_r, vt_r);
589  m_T.set(idx_i, vt_i);
590  }
591  } // site loop
592  }
593  }
594 #pragma omp barrier
595 
596  real_t kappaR = 1.0 / m_CKs;
597  scal(m_T, kappaR);
598  } // omp parallel
599 
600  set_csw_qws();
601 }
602 
603 
604 //====================================================================
605 template<typename AFIELD>
607 {
608  // This method set the clover term matrix assuming the chiral
609  // representation for gamma-matrix in Bridge++ convention.
610  // The conversion from the Bridge++ to QWS convention and
611  // from Dirac to chiral representations are assumed to be cared
612  // in BridgeQXS functions.
613  // [17 Jul 2021 H.Matsufuru]
614 
615  Field_F v(m_Nst, 1), w(m_Nst, 1);
616 
618 
619  const int Nin = NDF * ND * 2;
620 
621  m_fopr_csw_chiral->set_mode("D");
622 
623  int ith, nth, is, ns;
624  set_threadtask_mult(ith, nth, is, ns, m_Nst);
625 
626  //11 22 33 44 55 66 12 34 56 23 45 13 24 35 46 15 26 14 36 25 16
627  constexpr int idx[72] = {
628  0, -1, 6, 7, 16, 17, 28, 29, 24, 25, 34, 35,
629  -1, -1, 1, -1, 12, 13, 18, 19, 32, 33, 26, 27,
630  -1, -1, -1, -5, 2, -1, 8, 9, 20, 21, 30, 31,
631  -1, -1, -1, -1, -1, -1, 3, -1, 14, 15, 22, 23,
632  -1, -1, -1, -1, -1, -1, -1, -1, 4, -1, 10, 11,
633  -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 5, -1,
634  };
635  vout.general("hoge: chiral rep for Clover term\n");
636 #pragma omp parallel
637  {
638  for (int id = 0; id < m_Nd / 2; ++id) {
639  for (int ic = 0; ic < m_Nc; ++ic) {
640  w.set(0.0);
641 #pragma omp barrier
642 
643  for (int site = is; site < ns; ++site) {
644  w.set_r(ic, id, site, 0, 1.0);
645  w.set_r(ic, id + 2, site, 0, 1.0);
646  }
647 #pragma omp barrier
648 
649  m_fopr_csw->mult(v, w);
650 #pragma omp barrier
651 
652  for (int site = is; site < ns; ++site) {
653  for (int id2 = 0; id2 < m_Nd; ++id2) {
654  for (int ic2 = 0; ic2 < m_Nc; ++ic2) {
655  real_t vt_r = 0.5 * v.cmp_r(ic2, id2, site, 0);
656  real_t vt_i = 0.5 * v.cmp_i(ic2, id2, site, 0);
657  // int in = ic2 + NC*(ic + NC*(id + 2*id2));
658  // int idx_r = index.idx(2*in, Nin, site, 0);
659  // int idx_i = index.idx(2*in+1, Nin, site, 0);
660  // m_T.set(idx_r, vt_r);
661  // m_T.set(idx_i, vt_i);
662  int i = ic2 + m_Nc * (id2 % 2);
663  int j = ic + m_Nc * id;
664  int ij = m_Nc * 2 * i + j;
665  int in_r = idx[2 * ij];
666  int in_i = idx[2 * ij + 1];
667  if (in_r >= 0) {
668  in_r += 36 * (id2 / 2);
669  int idx_r = index.idx(in_r, Nin, site, 0);
670  m_T.set(idx_r, vt_r);
671  }
672  if (in_i >= 0) {
673  in_i += 36 * (id2 / 2);
674  int idx_i = index.idx(in_i, Nin, site, 0);
675  m_T.set(idx_i, vt_i);
676  }
677  }
678  }
679  } // site loop
680 #pragma omp barrier
681  }
682  }
683 #pragma omp barrier
684 
685  real_t kappaR = 1.0 / m_CKs;
686  scal(m_T, kappaR);
687  } // omp parallel
688 }
689 
690 
691 //====================================================================
692 template<typename AFIELD>
694 {
695  //
696  // T = 1 + clov
697  // T(Dirac,QWS) = gamma4 T(Dirac,Brdige) gamma4
698  // T(Chiral,QWS) = U T(Dirac,QWS) Udag
699  // = U gamma4 T(Dirac,Bridge) gamma4 Udag
700  // in qws,
701  // T(Dirac,QWS) psi(Dirac,QWS)
702  // = Udag U T(Dirac,QWS) Udag U psi(Dirac,QWS)
703  // = Udag T(Chiral,QWS) U psi(Dirac, QWS)
704  // = Vdag T(Chiral,QWS)/2 V psi(Dirac,QWS)
705  // with
706  // V = sqrt(2) U
707  // = [ 1 0 1 0 ]
708  // [ 0 1 0 1 ]
709  // [ 1 0 -1 0 ]
710  // [ 0 1 0 -1 ]
711  // and T(chiral,QWS)/2 is stored in the memory
712  //
713  // For Bridge++, the conversion matrix is
714  // U'
715  // = sqrt(2)^{-1} [ 1 0 1 0 ]
716  // [ 0 1 0 1 ]
717  // [-1 0 1 0 ]
718  // [ 0 -1 0 1 ]
719  // N.B. since
720  // U gamma_mu(Dirac,Bridge) Udag = - gamma_mu(Chiral,Bridge)
721  // and thus U T(Dirac,Bridge) Udag = T(Chiral,Bridge),
722  // one can use U instead for the purpose here
723  //
724  // Using U',
725  // T(Chiral,Bridge) = U' T(Dirac,Bridge) U'dag
726  // and
727  // T(Chiral,QWS) = U T(Dirac,QWS) Udag
728  // = U gamma4 T(Dirac,Bridge) gamma4 Udag
729  // = U gamma4 U'dag T(Chiral,Bridge) U' gamma4 Udag
730  // and the trasnformation matrix in the chiral rep. is
731  // U gamma4 U'dag = [ 0 0 -1 0 ]
732  // [ 0 0 0 -1 ]
733  // [ 1 0 0 0 ]
734  // [ 0 1 0 0 ]
735  // in 2x2 block notation,
736  // U gamma4 U'dag [ T1 0 ] U' gamma4 Udag
737  // [ 0 T2 ]
738  // = [ T2 0 ]
739  // [ 0 T1 ]
740  // so the upper and lower blocks are interchanged between Bridge and QWS clover term
741  //
742  // N.B.
743  // U gamma4 Udag = [ 0 0 1 0 ]
744  // [ 0 0 0 1 ]
745  // [ 1 0 0 0 ]
746  // [ 0 1 0 0 ]
747  // which also gives
748  // U gamma4 Udag [ T1 0 ] Ugamma4 Udag
749  // [ 0 T2 ]
750  // = [ T2 0 ]
751  // [ 0 T1 ]
752  //
753  // [ 20. Jul.2021 I.Kanamori]
754 
755 #ifdef USE_QWSLIB
756  Field_F v(m_Nst, 1), w(m_Nst, 1);
757  Field T(2 * 6 * 6, m_Nst, 1);
758  Field Tinv(2 * 6 * 6, m_Nst, 1);
759 
762 
763  m_fopr_csw_chiral->set_mode("D");
764 
765  int ith, nth, is, ns;
766  set_threadtask_mult(ith, nth, is, ns, m_Nst);
767 
768 #pragma omp parallel
769  {
770  int idx[36]
771  = { 0, 6, 8, 10, 12, 14,
772  -1, 1, 16, 18, 20, 22,
773  -1, -1, 2, 24, 26, 28,
774  -1, -1, -1, 3, 30, 32,
775  -1, -1, -1, -1, 4, 34,
776  -1, -1, -1, -1, -1, 5 };
777  constexpr double factor = 0.5;
778 
779  T.set(0.0);
780  for (int ud = 0; ud < 2; ++ud) {
781  for (int id = 0; id < m_Nd / 2; ++id) {
782  for (int ic = 0; ic < m_Nc; ++ic) {
783  w.set(0.0);
784  for (int site = is; site < ns; ++site) {
785  w.set_r(ic, 2 * ud + id, site, 0, 1.0);
786  }
787 #pragma omp barrier
788  m_fopr_csw_chiral->mult(v, w);
789 #pragma omp barrier
790  int j = 3 * id + ic;
791  for (int site = is; site < ns; ++site) {
792  int ud2 = 1 - ud;
793  int offset = ud2 * 36;
794  // diagonal: real
795  double vt_r = v.cmp_r(ic, 2 * ud + id, site, 0);
796  T.set(offset + j, site, 0, factor * (1.0 - vt_r));
797  for (int i = 0; i < 6; ++i) {
798  int ij = idx[6 * i + j];
799  if (ij > 5) {
800  int id2 = i / 3 + 2 * ud;
801  int ic2 = i % 3;
802  double vt_r = v.cmp_r(ic2, id2, site, 0);
803  double vt_i = v.cmp_i(ic2, id2, site, 0);
804  T.set(offset + ij, site, 0, -factor * vt_r);
805  T.set(offset + ij + 1, site, 0, -factor * vt_i);
806  }
807  } // i
808  } // site
809  } // ic
810  } // id
811  } // ud
812 #pragma omp barrier
813  solve_csw_qws_inv(Tinv, T);
814  ::convert(index, m_T_qws, Tinv);
815 #pragma omp barrier
816  if (sizeof(real_t) == 8) {
817  qws_loadc_dd_bridge_((double *)m_T_qws.ptr(0));
818  } else if (sizeof(real_t) == 4) {
819  qws_loadcs_dd_bridge_((float *)m_T_qws.ptr(0));
820  }
821  m_T_qws.set(0.0);
822  ::convert(index_qws, m_T_qws, T);
823  } // omp parallel
824 #endif // USE_QWSLIB
825 }
826 
827 
828 //====================================================================
829 template<class AFIELD>
831 {
832  // inversion with LU factorization + overall rescaling
833  // in Driac rep., in order to absorb 1/sqrt{2} in the uniary trf,
834  // the rescaling factor must be set to 1/4. [default]
835  // in Chiral rep., the factor must be set to 1
836 
837 #ifdef USE_QWSLIB
838  constexpr double factor = 0.25;
839 
840  typedef dcomplex complex_t;
841 
842  constexpr int NS = 4; // 4 sprinor
843  constexpr int N = 6; // matrix size to invert [ 6x6 in each block]
844 
845 
846  // size of Feild
847  assert(NCOL == CommonParameters::Nc());
848  assert(NS == CommonParameters::Nd());
849  assert(N == NS * NCOL / 2);
850  assert(T.check_size(2 * 36, T.nvol(), 1));
851  assert(Tinv.check_size(2 * 36, T.nvol(), 1));
852  const size_t vol = T.nvol();
853 
854  const double *pT = T.ptr(0);
855  double *pTinv = Tinv.ptr(0);
856 
857  int ith, nth, is, ns;
858  set_threadtask(ith, nth, is, ns, T.nvol());
859 
860  complex_t LU[N * N];
861 
862  // working area to store the inversion
863  complex_t workT[N * N];
864 
865 
866  for (int s = is; s < ns; ++s) {
867  size_t offset_clov = 2 * s * N * N;
868 
869  const double *clov = pT + offset_clov;
870  double *clov_inv = pTinv + offset_clov;
871 
872  for (int block = 0; block < 2; block++) {
873  // fill U part
874  // diagonal
875  for (int i = 0; i < N; i++) {
876  LU[N * i + i] = clov[i];
877  }
878  // offdiag
879  int ii = N;
880  for (int i = 0; i < N; i++) {
881  for (int j = i + 1; j < N; j++) {
882  LU[N * i + j] = complex_t(clov[ii], clov[ii + 1]);
883  ii += 2;
884  }
885  }
886 
887  // LU decomposition
888  for (int i = 0; i < N; i++) {
889  double diag_re = LU[N * i + i].real(); // (re im) w/ im \simeq 0 --> re
890  double diag_inv = 1.0 / diag_re;
891  LU[N * i + i] = diag_inv; // preparaton for inv.
892  for (int k = i + 1; k < N; k++) {
893  complex_t c = LU[N * i + k] * diag_inv;
894  LU[N * k + i] = c; // L w/o c.c.
895  for (int j = i + 1; j < N; j++) { // U
896  LU[N * k + j] -= conj(c) * LU[N * i + j];
897  }
898  }
899  } // i
900 
901  for (int i = 0; i < N; i++) { // for i-th vector
902  complex_t *x = &workT[N * i];
903  for (int j = 0; j < N; j++) {
904  x[j] = 0.0;
905  }
906  x[i] = factor;
907 
908  // solving L
909  for (int j = 1; j < N; j++) {
910  complex_t x_j = x[j];
911  for (int k = i; k < j; k++) {
912  x_j -= conj(LU[N * j + k]) * x[k];
913  }
914  x[j] = x_j;
915  }
916  // solving U
917  for (int j = N - 1; j >= i; j--) {
918  complex_t x_j = x[j];
919  for (int k = j + 1; k < N; k++) {
920  x_j -= LU[N * j + k] * x[k];
921  }
922  x_j *= LU[N * j + j].real();
923  x[j] = x_j;
924  }
925  }
926 
927  // pack to the result
928  ii = 0;
929  for (int i = 0; i < N; i++) {
930  clov_inv[ii] = workT[N * i + i].real();
931  ii++;
932  }
933  for (int i = 0; i < N; i++) {
934  for (int j = i + 1; j < N; j++) {
935  complex_t val = conj(workT[i * N + j]);
936  clov_inv[ii] = val.real();
937  ii++;
938  clov_inv[ii] = val.imag();
939  ii++;
940  }
941  }
942  clov += N * N;
943  clov_inv += N * N;
944  } // block
945  } //s
946 #endif // USE_QWSLIB
947 }
948 
949 
950 //====================================================================
951 template<typename AFIELD>
953 {
954 #ifdef USE_QWSLIB
955  if (sizeof(real_t) == 4) {
956  qws_loadfs_dd_bridge_((scs_t *)v.ptr(0), (const float *)w.ptr(0));
957  } else if (sizeof(real_t) == 8) {
958  qws_loadfd_dd_bridge_((scd_t *)v.ptr(0), (const double *)w.ptr(0));
959  } else {
960  vout.crucial(" %s: cannot happen, unknown sizeof(real_t)=%ul in convert()\n", class_name.c_str(), sizeof(real_t));
961  exit(EXIT_FAILURE);
962  }
963 #else
964  copy(v, w);
965 #endif
966 }
967 
968 
969 //====================================================================
970 template<typename AFIELD>
972 {
974  convert_spinor(index_lex, v, w);
975  mult_gm4(m_v2, v);
976  convert(v, m_v2);
977 }
978 
979 
980 //====================================================================
981 template<typename AFIELD>
983 {
984 #ifdef USE_QWSLIB
985  if (sizeof(real_t) == 4) {
986  qws_storefs_dd_bridge_((float *)v.ptr(0), (const scs_t *)w.ptr(0));
987  } else if (sizeof(real_t) == 8) {
988  qws_storefd_dd_bridge_((double *)v.ptr(0), (const scd_t *)w.ptr(0));
989  } else {
990  vout.crucial(" %s: cannot happen, unknown sizeof(real_t)=%ul in reverse()\n", class_name.c_str(), sizeof(real_t));
991  exit(EXIT_FAILURE);
992  }
993 #else
994  copy(v, w);
995 #endif // USE_QWSLIB
996 }
997 
998 
999 //====================================================================
1000 template<typename AFIELD>
1002 {
1004 
1005  reverse(m_v1, w);
1006  mult_gm4(m_v2, m_v1);
1007  reverse_spinor(index_lex, v, m_v2);
1008 }
1009 
1010 
1011 //====================================================================
1012 template<typename AFIELD>
1014 {
1015  real_t *vp = v.ptr(0);
1016  real_t *wp = const_cast<AFIELD *>(&w)->ptr(0);
1017 
1018  if (mu == 0) {
1019  mult_xp(vp, wp, 0);
1020  } else if (mu == 1) {
1021  mult_yp(vp, wp, 0);
1022  } else if (mu == 2) {
1023  mult_zp(vp, wp, 0);
1024  } else if (mu == 3) {
1025  mult_tp(vp, wp, 0);
1026  } else {
1027  vout.crucial(m_vl, "%s: mult_up for %d direction is undefined.",
1028  class_name.c_str(), mu);
1029  exit(EXIT_FAILURE);
1030  }
1031 }
1032 
1033 
1034 //====================================================================
1035 template<typename AFIELD>
1037 {
1038  real_t *vp = v.ptr(0);
1039  real_t *wp = const_cast<AFIELD *>(&w)->ptr(0);
1040 
1041  if (mu == 0) {
1042  mult_xm(vp, wp, 0);
1043  } else if (mu == 1) {
1044  mult_ym(vp, wp, 0);
1045  } else if (mu == 2) {
1046  mult_zm(vp, wp, 0);
1047  } else if (mu == 3) {
1048  mult_tm(vp, wp, 0);
1049  } else {
1050  vout.crucial(m_vl, "%s: mult_dn for %d direction is undefined.",
1051  class_name.c_str(), mu);
1052  exit(EXIT_FAILURE);
1053  }
1054 }
1055 
1056 
1057 //====================================================================
1058 template<typename AFIELD>
1060 {
1061  return m_mode;
1062 }
1063 
1064 
1065 //====================================================================
1066 template<typename AFIELD>
1068 {
1069 #pragma omp barrier
1070 
1071  int ith = ThreadManager::get_thread_id();
1072  if (ith == 0) m_mode = mode;
1073 
1074 #pragma omp barrier
1075 }
1076 
1077 
1078 //====================================================================
1079 template<typename AFIELD>
1081 {
1082  if (m_mode == "D") {
1083  return D(v, w);
1084  } else if (m_mode == "DdagD") {
1085  return DdagD(v, w);
1086  } else if (m_mode == "Ddag") {
1087  return Ddag(v, w);
1088  } else if (m_mode == "H") {
1089  return H(v, w);
1090  } else {
1091  vout.crucial(m_vl, "%s: mode undefined.\n", class_name.c_str());
1092  exit(EXIT_FAILURE);
1093  }
1094 }
1095 
1096 
1097 //====================================================================
1098 template<typename AFIELD>
1100 {
1101  if (m_mode == "D") {
1102  return Ddag(v, w);
1103  } else if (m_mode == "DdagD") {
1104  return DdagD(v, w);
1105  } else if (m_mode == "Ddag") {
1106  return D(v, w);
1107  } else if (m_mode == "H") {
1108  return H(v, w);
1109  } else {
1110  vout.crucial(m_vl, "%s: mode undefined.\n", class_name.c_str());
1111  exit(EXIT_FAILURE);
1112  }
1113 }
1114 
1115 
1116 //====================================================================
1117 template<typename AFIELD>
1119 {
1120 #ifdef USE_QWSLIB
1121  mult_D_qws(v, w);
1122 #else
1123  mult_D(v, w);
1124  //mult_D_alt(v, w);
1125 #endif
1126 }
1127 
1128 
1129 //====================================================================
1130 template<typename AFIELD>
1132 {
1133  D(m_v2, w);
1134  mult_gm5(v, m_v2);
1135  D(m_v2, v);
1136  mult_gm5(v, m_v2);
1137 }
1138 
1139 
1140 //====================================================================
1141 template<typename AFIELD>
1143 {
1144  mult_gm5(v, w);
1145  D(m_v2, v);
1146  mult_gm5(v, m_v2);
1147 }
1148 
1149 
1150 //====================================================================
1151 template<typename AFIELD>
1153  const int ch)
1154 {
1155  real_t *vp = v.ptr(0);
1156  real_t *wp = const_cast<AFIELD *>(&w)->ptr(0);
1157 
1158  mult_gm5(vp, wp);
1159  if (ch == 1) {
1160  aypx(real_t(1.0), vp, wp);
1161  } else {
1162  aypx(real_t(-1.0), vp, wp);
1163  }
1164 }
1165 
1166 
1167 //====================================================================
1168 template<typename AFIELD>
1170 {
1171  real_t *vp = v.ptr(0);
1172  real_t *wp = const_cast<AFIELD *>(&w)->ptr(0);
1173 
1174 #pragma omp barrier
1175 
1176  mult_gm5(vp, wp);
1177 }
1178 
1179 
1180 //====================================================================
1181 template<typename AFIELD>
1183 { // Dirac representation.
1184 #ifdef USE_QXS_ACLE
1185  BridgeQXS::mult_wilson_gm5_dirac(v, w, m_Nsize);
1186 #else
1187  int ith, nth, is, ns;
1188  set_threadtask_mult(ith, nth, is, ns, m_Nstv);
1189  Vsimd_t wt[2];
1190 
1191  for (int site = is; site < ns; ++site) {
1192  for (int ic = 0; ic < NC; ++ic) {
1193  for (int id = 0; id < ND; ++id) {
1194  int idx1 = 2 * (id + ND * ic) + NVCD * site;
1195  load_vec(wt, &w[VLEN * idx1], 2);
1196  scal_vec(wt, (real_t)(-1.0), 2);
1197  int id2 = (id + 2) % ND;
1198  int idx2 = 2 * (id2 + ND * ic) + NVCD * site;
1199  save_vec(&v[VLEN * idx2], wt, 2);
1200  }
1201  }
1202  }
1203 #endif
1204 
1205 #pragma omp barrier
1206 }
1207 
1208 
1209 //====================================================================
1210 template<typename AFIELD>
1212 {
1213  int ith, nth, is, ns;
1214  set_threadtask_mult(ith, nth, is, ns, m_Nstv);
1215 
1216  for (int site = is; site < ns; ++site) {
1217  Vsimd_t vt[NVCD];
1218  load_vec(vt, &vp[VLEN * NVCD * site], NVCD);
1219  scal_vec(vt, a, NVCD);
1220  save_vec(&vp[VLEN * NVCD * site], vt, NVCD);
1221  }
1222 }
1223 
1224 
1225 //====================================================================
1226 template<typename AFIELD>
1228 {
1229  real_t *vp = v.ptr(0);
1230  real_t *wp = const_cast<AFIELD *>(&w)->ptr(0);
1231 
1232  int ith, nth, is, ns;
1233  set_threadtask_mult(ith, nth, is, ns, m_Nstv);
1234 
1235  Vsimd_t wt[2];
1236 
1237  for (int site = is; site < ns; ++site) {
1238  for (int ic = 0; ic < NC; ++ic) {
1239  for (int id = 0; id < ND2; ++id) {
1240  int idx1 = 2 * (id + ND * ic) + NVCD * site;
1241  load_vec(wt, &wp[VLEN * idx1], 2);
1242  save_vec(&vp[VLEN * idx1], wt, 2);
1243  }
1244 
1245  for (int id = ND2; id < ND; ++id) {
1246  int idx1 = 2 * (id + ND * ic) + NVCD * site;
1247  load_vec(wt, &wp[VLEN * idx1], 2);
1248  scal_vec(wt, real_t(-1.0), 2);
1249  save_vec(&vp[VLEN * idx1], wt, 2);
1250  }
1251  }
1252  }
1253 
1254 #pragma omp barrier
1255 }
1256 
1257 
1258 //====================================================================
1259 template<typename AFIELD>
1261 { // Dirac representation is assumed.
1262  real_t *u = m_T.ptr(0);
1263 
1264  int ith, nth, is, ns;
1265  set_threadtask(ith, nth, is, ns, m_Nstv);
1266 
1267 #pragma omp barrier
1268 
1269  for (int site = is; site < ns; ++site) {
1270  Vsimd_t v2v[NVCD];
1271  load_vec(v2v, &v2[VLEN * NVCD * site], NVCD);
1272 
1273  Vsimd_t v1v[NVCD];
1274  load_vec(v1v, &v1[VLEN * NVCD * site], NVCD);
1275 
1276  Vsimd_t ut[NDF], wt1[2], wt2[2];
1277 
1278  for (int jd = 0; jd < ND2; ++jd) {
1279  for (int id = 0; id < ND; ++id) {
1280  int ig = VLEN * NDF * (site + m_Nstv * (id + ND * jd));
1281  load_vec(ut, &u[ig], NDF);
1282  for (int ic = 0; ic < NC; ++ic) {
1283  int ic2 = 2 * ic;
1284  int id2 = (id + ND2) % ND;
1285  mult_ctv(wt1, &ut[ic2], &v1v[2 * id], NC);
1286  mult_ctv(wt2, &ut[ic2], &v1v[2 * id2], NC);
1287  int icd1 = 2 * (jd + ND * ic);
1288  int icd2 = 2 * (jd + ND2 + ND * ic);
1289  axpy_vec(&v2v[icd1], real_t(1.0), wt1, 2);
1290  axpy_vec(&v2v[icd2], real_t(1.0), wt2, 2);
1291  }
1292  }
1293  }
1294 
1295  save_vec(&v2[VLEN * NVCD * site], v2v, NVCD);
1296  }
1297 
1298 #pragma omp barrier
1299 }
1300 
1301 
1302 //====================================================================
1303 template<typename AFIELD>
1305 { // Dirac representation is assumed.
1306 #ifdef USE_QWSLIB
1307  // (1+csw)^{-1}
1308  if (sizeof(real_t) == 4) {
1309  int deo = 0;
1310  clv_s_dd_(&deo, (scs_t *)v.ptr(0));
1311  deo = 1;
1312  clv_s_dd_(&deo, (scs_t *)v.ptr(0));
1313  } else if (sizeof(real_t) == 8) {
1314  int deo = 0;
1315  clv_d_dd_(&deo, (scd_t *)v.ptr(0));
1316  deo = 1;
1317  clv_d_dd_(&deo, (scd_t *)v.ptr(0));
1318  }
1319 #endif
1320 }
1321 
1322 
1323 //====================================================================
1324 template<typename AFIELD>
1326 { // Dirac representation is assumed.
1327 #ifdef USE_QWSLIB
1328  // (1+csw)
1329  if (sizeof(real_t) == 4) {
1330  int deo = 0;
1331  clv_matvec_s_dd_(&deo, (scs_t *)v.ptr(0), (clvs_t *)m_T_qws.ptr(0), (scs_t *)v.ptr(0));
1332  deo = 1;
1333  clv_matvec_s_dd_(&deo, (scs_t *)v.ptr(0), (clvs_t *)m_T_qws.ptr(0), (scs_t *)v.ptr(0));
1334  } else if (sizeof(real_t) == 8) {
1335  int deo = 0;
1336  clv_matvec_d_dd_(&deo, (scd_t *)v.ptr(0), (clvd_t *)m_T_qws.ptr(0), (scd_t *)v.ptr(0));
1337  deo = 1;
1338  clv_matvec_d_dd_(&deo, (scd_t *)v.ptr(0), (clvd_t *)m_T_qws.ptr(0), (scd_t *)v.ptr(0));
1339  }
1340 #endif
1341 }
1342 
1343 
1344 //====================================================================
1345 template<typename AFIELD>
1347 {
1348  real_t *v2 = v.ptr(0);
1349  real_t *v1 = const_cast<AFIELD *>(&w)->ptr(0);
1350  real_t *up = m_U.ptr(0);
1351  real_t *ct = m_T.ptr(0);
1352 
1353 #pragma omp barrier
1354 
1355  if (do_comm_any > 0) {
1356  real_t *buf1_xp = (real_t *)chsend_dn[0].ptr();
1357  real_t *buf1_xm = (real_t *)chsend_up[0].ptr();
1358  real_t *buf1_yp = (real_t *)chsend_dn[1].ptr();
1359  real_t *buf1_ym = (real_t *)chsend_up[1].ptr();
1360  real_t *buf1_zp = (real_t *)chsend_dn[2].ptr();
1361  real_t *buf1_zm = (real_t *)chsend_up[2].ptr();
1362  real_t *buf1_tp = (real_t *)chsend_dn[3].ptr();
1363  real_t *buf1_tm = (real_t *)chsend_up[3].ptr();
1364 
1365  BridgeQXS::mult_wilson_1_dirac(buf1_xp, buf1_xm, buf1_yp, buf1_ym,
1366  buf1_zp, buf1_zm, buf1_tp, buf1_tm,
1367  up, v1, &m_boundary[0], m_Nsize, do_comm);
1368 
1369 #pragma omp barrier
1370 
1371 #pragma omp master
1372  {
1373  chset_recv.start();
1374  chset_send.start();
1375  }
1376  }
1377 
1378  BridgeQXS::mult_clover_bulk_dirac(v2, up, ct, v1,
1379  m_CKs, &m_boundary[0], m_Nsize, do_comm);
1380 
1381  if (do_comm_any > 0) {
1382 #pragma omp master
1383  {
1384  chset_recv.wait();
1385  chset_send.wait();
1386  }
1387 
1388 #pragma omp barrier
1389 
1390  real_t *buf2_xp = (real_t *)chrecv_up[0].ptr();
1391  real_t *buf2_xm = (real_t *)chrecv_dn[0].ptr();
1392  real_t *buf2_yp = (real_t *)chrecv_up[1].ptr();
1393  real_t *buf2_ym = (real_t *)chrecv_dn[1].ptr();
1394  real_t *buf2_zp = (real_t *)chrecv_up[2].ptr();
1395  real_t *buf2_zm = (real_t *)chrecv_dn[2].ptr();
1396  real_t *buf2_tp = (real_t *)chrecv_up[3].ptr();
1397  real_t *buf2_tm = (real_t *)chrecv_dn[3].ptr();
1398 
1399  BridgeQXS::mult_wilson_2_dirac(v2, up, v1,
1400  buf2_xp, buf2_xm, buf2_yp, buf2_ym,
1401  buf2_zp, buf2_zm, buf2_tp, buf2_tm,
1402  m_CKs, &m_boundary[0], m_Nsize, do_comm);
1403  }
1404 
1405 #pragma omp barrier
1406 }
1407 
1408 
1409 //====================================================================
1410 template<typename AFIELD>
1412  const int ieo)
1413 {
1414  real_t *v2 = v.ptr(0);
1415  real_t *v1 = const_cast<AFIELD *>(&w)->ptr(0);
1416  real_t *up = m_Ublock.ptr(0);
1417  real_t *ct = m_T.ptr(0);
1418 
1419  int jeo = (ieo + m_Ieo) % 2;
1420 
1421 #pragma omp barrier
1422 
1423  BridgeQXS::mult_clover_dd_dirac(v2, up, ct, v1,
1424  m_CKs, &m_boundary[0],
1425  m_Nsize, m_block_sizev, jeo);
1426 
1427 #pragma omp barrier
1428 }
1429 
1430 
1431 //====================================================================
1432 template<typename AFIELD>
1434 {
1435  //#ifdef USE_QWSLIB
1436 #if 0
1437 #else
1438  real_t *v2 = v.ptr(0);
1439  real_t *v1 = const_cast<AFIELD *>(&w)->ptr(0);
1440  real_t *up = m_Ublock.ptr(0);
1441  real_t *ct = m_T.ptr(0);
1442 
1443 #pragma omp barrier
1444 
1445  BridgeQXS::mult_clover_dd_dirac(v2, up, ct, v1,
1446  m_CKs, &m_boundary[0],
1447  m_Nsize, m_block_sizev, -1);
1448 
1449  // vector varialbes at the boundary of the domain are not treated here...
1450  // BridgeQXS::mult_clover_bulk_dirac(v2, up, ct, v1,
1451  // m_CKs, &m_boundary[0], m_Nsize, do_comm);
1452 
1453 #pragma omp barrier
1454 #endif
1455 }
1456 
1457 
1458 //====================================================================
1459 template<typename AFIELD>
1461 {
1462 #ifdef USE_QWSLIB
1463  if (sizeof(real_t) == 4) {
1464  // vout.general("hoge: calling ddd_s_noprl_\n");
1465  ddd_s_noprl_((scs_t *)v.ptr(0), (scs_t *)const_cast<AFIELD *>(&w)->ptr(0));
1466  } else if (sizeof(real_t) == 8) {
1467  // vout.general("hoge: calling ddd_d_\n");
1468  int nth = ThreadManager::get_num_threads();
1469  if (nth > 1) {
1470  vout.crucial(m_vl, "%s: mult_D_qws cannot be called in thread parallel region\n", class_name.c_str());
1471  abort();
1472  }
1473  ddd_d_((scd_t *)v.ptr(0), (scd_t *)const_cast<AFIELD *>(&w)->ptr(0));
1474  }
1475 #pragma omp barrier
1476  mult_clv(v);
1477 #pragma omp barrier
1478  vout.detailed("mult_D_qws, done.\n");
1479 #ifdef DEBUG
1480  {
1481  double w2 = w.norm2();
1482  double v2 = v.norm2();
1483  vout.general(" mult_D_qws: (in) w2=%e\n", w2);
1484  vout.general(" mult_D_qws: (out) v2=%e\n", v2);
1485  }
1486 #endif
1487 #endif
1488 }
1489 
1490 
1491 //====================================================================
1492 template<typename AFIELD>
1494 {
1495  real_t *vp = v.ptr(0);
1496  real_t *wp = const_cast<AFIELD *>(&w)->ptr(0);
1497 
1498  clear(vp);
1499 
1500  mult_xp(vp, wp, 0);
1501  mult_xm(vp, wp, 0);
1502  mult_yp(vp, wp, 0);
1503  mult_ym(vp, wp, 0);
1504  mult_zp(vp, wp, 0);
1505  mult_zm(vp, wp, 0);
1506  mult_tp(vp, wp, 0);
1507  mult_tm(vp, wp, 0);
1508 
1509  mult_csw(vp, wp);
1510 
1511  aypx(-m_CKs, vp, wp);
1512 
1513 #pragma omp barrier
1514 }
1515 
1516 
1517 //====================================================================
1518 template<typename AFIELD>
1520  const int mu)
1521 {
1522  real_t *vp = v.ptr(0);
1523  real_t *wp = const_cast<AFIELD *>(&w)->ptr(0);
1524 
1525 #pragma omp barrier
1526 
1527  clear(vp);
1528 
1529  if (mu == 0) {
1530  mult_xp(vp, wp, 1);
1531  scal_local(vp, real_t(-1.0));
1532  mult_xp(vp, wp, 0);
1533  } else if (mu == 1) {
1534  mult_yp(vp, wp, 1);
1535  scal_local(vp, real_t(-1.0));
1536  mult_yp(vp, wp, 0);
1537  } else if (mu == 2) {
1538  mult_zp(vp, wp, 1);
1539  scal_local(vp, real_t(-1.0));
1540  mult_zp(vp, wp, 0);
1541  } else if (mu == 3) {
1542  mult_tp(vp, wp, 1);
1543  scal_local(vp, real_t(-1.0));
1544  mult_tp(vp, wp, 0);
1545  }
1546 
1547  scal_local(vp, -m_CKs);
1548 
1549 #pragma omp barrier
1550 }
1551 
1552 
1553 //====================================================================
1554 template<typename AFIELD>
1556  const int mu)
1557 {
1558  real_t *vp = v.ptr(0);
1559  real_t *wp = const_cast<AFIELD *>(&w)->ptr(0);
1560 
1561 #pragma omp barrier
1562 
1563  clear(vp);
1564 
1565  if (mu == 0) {
1566  mult_xm(vp, wp, 1);
1567  scal_local(vp, real_t(-1.0));
1568  mult_xm(vp, wp, 0);
1569  } else if (mu == 1) {
1570  mult_ym(vp, wp, 1);
1571  scal_local(vp, real_t(-1.0));
1572  mult_ym(vp, wp, 0);
1573  } else if (mu == 2) {
1574  mult_zm(vp, wp, 1);
1575  scal_local(vp, real_t(-1.0));
1576  mult_zm(vp, wp, 0);
1577  } else if (mu == 3) {
1578  mult_tm(vp, wp, 1);
1579  scal_local(vp, real_t(-1.0));
1580  mult_tm(vp, wp, 0);
1581  }
1582 
1583  scal_local(vp, -m_CKs);
1584 
1585 #pragma omp barrier
1586 }
1587 
1588 
1589 //====================================================================
1590 
1591 /*
1592 template<typename AFIELD>
1593 void AFopr_Clover_QWS_dd<AFIELD>::mult_block_hop(AFIELD &v, const AFIELD &w,
1594  int mu)
1595 {
1596  real_t *vp = v.ptr(0);
1597  real_t *wp = const_cast<AFIELD*>(&w)->ptr(0);
1598 
1599 #pragma omp barrier
1600 
1601  clear(vp);
1602 
1603  if(mu == 0){
1604  mult_xp(vp, wp, 1);
1605  scal_local(vp, real_t(-1.0));
1606  mult_xp(vp, wp, 0);
1607  }else if(mu == 1){
1608  mult_yp(vp, wp, 1);
1609  scal_local(vp, real_t(-1.0));
1610  mult_yp(vp, wp, 0);
1611  }else if(mu == 2){
1612  mult_zp(vp, wp, 1);
1613  scal_local(vp, real_t(-1.0));
1614  mult_zp(vp, wp, 0);
1615  }else if(mu == 3){
1616  mult_tp(vp, wp, 1);
1617  scal_local(vp, real_t(-1.0));
1618  mult_tp(vp, wp, 0);
1619  }
1620 
1621  scal_local(vp, -m_CKs);
1622 
1623 #pragma omp barrier
1624 
1625 }
1626 */
1627 
1628 //====================================================================
1629 template<typename AFIELD>
1631 {
1632  D(m_v2, w);
1633  mult_gm5(v, m_v2);
1634 }
1635 
1636 
1637 //====================================================================
1638 template<typename AFIELD>
1640 {
1641  int ith, nth, is, ns;
1642  set_threadtask_mult(ith, nth, is, ns, m_Nstv);
1643 
1644  Vsimd_t vt[NVCD], wt[NVCD];
1645 
1646  for (int site = is; site < ns; ++site) {
1647  load_vec(vt, &v[VLEN * NVCD * site], NVCD);
1648  load_vec(wt, &w[VLEN * NVCD * site], NVCD);
1649  aypx_vec(a, vt, wt, NVCD);
1650  save_vec(&v[VLEN * NVCD * site], vt, NVCD);
1651  }
1652 }
1653 
1654 
1655 //====================================================================
1656 template<typename AFIELD>
1658 {
1659  int ith, nth, is, ns;
1660  set_threadtask_mult(ith, nth, is, ns, m_Nstv);
1661 
1662  Vsimd_t vt[NVCD];
1663  clear_vec(vt, NVCD);
1664 
1665  for (int site = is; site < ns; ++site) {
1666  save_vec(&v[VLEN * NVCD * site], vt, NVCD);
1667  }
1668 }
1669 
1670 
1671 //====================================================================
1672 template<typename AFIELD>
1674 {
1675  int idir = 0;
1676 
1677  int ith, nth, is, ns;
1678  set_threadtask_mult(ith, nth, is, ns, m_Nstv);
1679 
1680  Vsimd_t v2v[NVCD];
1681 
1682  real_t *buf1 = (real_t *)chsend_dn[0].ptr();
1683  real_t *buf2 = (real_t *)chrecv_up[0].ptr();
1684 
1685  real_t *u = m_U.ptr(m_Ndf * m_Nst * idir);
1686  if (isap != 0) u = m_Ublock.ptr(m_Ndf * m_Nst * idir);
1687 
1688 #pragma omp barrier
1689 
1690  if (do_comm[0] > 0) {
1691  for (int site = is; site < ns; ++site) {
1692  int ix = site % m_Nxv;
1693  int iyzt = site / m_Nxv;
1694  if (ix == 0) {
1695  int ibf = VLENY * NVC * ND2 * iyzt;
1696  mult_wilson_xp1(&buf1[ibf], &v1[VLEN * NVCD * site]);
1697  }
1698  }
1699 
1700 #pragma omp barrier
1701 
1702 #pragma omp master
1703  {
1704  chrecv_up[0].start();
1705  chsend_dn[0].start();
1706  chrecv_up[0].wait();
1707  chsend_dn[0].wait();
1708  }
1709 #pragma omp barrier
1710  } // if(do_comm[0] == 1)
1711 
1712  for (int site = is; site < ns; ++site) {
1713  int ix = site % m_Nxv;
1714  int iyzt = site / m_Nxv;
1715 
1716  Vsimd_t v2v[NVCD];
1717  clear_vec(v2v, NVCD);
1718 
1719  real_t zL[VLEN * NVCD];
1720 
1721  if ((ix < m_Nxv - 1) || (do_comm[0] == 0)) {
1722  int nei = ix + 1 + m_Nxv * iyzt;
1723  if (ix == m_Nxv - 1) nei = 0 + m_Nxv * iyzt;
1724  shift_vec2_xbw(zL, &v1[VLEN * NVCD * site], &v1[VLEN * NVCD * nei], NVCD);
1725  mult_wilson_xpb(v2v, &u[VLEN * NDF * site], zL);
1726  } else {
1727  int ibf = VLENY * NVC * ND2 * iyzt;
1728  shift_vec0_xbw(zL, &v1[VLEN * NVCD * site], NVCD);
1729  mult_wilson_xpb(v2v, &u[VLEN * NDF * site], zL);
1730  mult_wilson_xp2(v2v, &u[VLEN * NDF * site], &buf2[ibf]);
1731  }
1732 
1733  add_vec(&v2[VLEN * NVCD * site], v2v, NVCD);
1734  }
1735 
1736 #pragma omp barrier
1737 }
1738 
1739 
1740 //====================================================================
1741 template<typename AFIELD>
1743 {
1744  int idir = 0;
1745 
1746  int ith, nth, is, ns;
1747  set_threadtask_mult(ith, nth, is, ns, m_Nstv);
1748 
1749  Vsimd_t v2v[NVCD];
1750 
1751  real_t *buf1 = (real_t *)chsend_up[0].ptr();
1752  real_t *buf2 = (real_t *)chrecv_dn[0].ptr();
1753 
1754  real_t *u = m_U.ptr(m_Ndf * m_Nst * idir);
1755  if (isap != 0) u = m_Ublock.ptr(m_Ndf * m_Nst * idir);
1756 
1757 #pragma omp barrier
1758 
1759  if (do_comm[0] > 0) {
1760  for (int site = is; site < ns; ++site) {
1761  int ix = site % m_Nxv;
1762  int iyzt = site / m_Nxv;
1763  if (ix == m_Nxv - 1) {
1764  int ibf = VLENY * NVC * ND2 * iyzt;
1765  mult_wilson_xm1(&buf1[ibf], &u[VLEN * NDF * site],
1766  &v1[VLEN * NVCD * site]);
1767  }
1768  }
1769 
1770 #pragma omp barrier
1771 #pragma omp master
1772  {
1773  chrecv_dn[0].start();
1774  chsend_up[0].start();
1775  chrecv_dn[0].wait();
1776  chsend_up[0].wait();
1777  }
1778 #pragma omp barrier
1779  } // end of if(do_comm[0] > 0)
1780 
1781  for (int site = is; site < ns; ++site) {
1782  int ix = site % m_Nxv;
1783  int iyzt = site / m_Nxv;
1784 
1785  real_t zL[VLEN * NVCD];
1786  real_t uL[VLEN * NDF];
1787 
1788  clear_vec(v2v, NVCD);
1789 
1790  if ((ix > 0) || (do_comm[0] == 0)) {
1791  int nei = ix - 1 + m_Nxv * iyzt;
1792  if (ix == 0) nei = m_Nxv - 1 + m_Nxv * iyzt;
1793  shift_vec2_xfw(zL, &v1[VLEN * NVCD * site], &v1[VLEN * NVCD * nei], NVCD);
1794  shift_vec2_xfw(uL, &u[VLEN * NDF * site], &u[VLEN * NDF * nei], NDF);
1795  mult_wilson_xmb(v2v, uL, zL);
1796  } else {
1797  int ibf = VLENY * NVC * ND2 * iyzt;
1798  shift_vec0_xfw(zL, &v1[VLEN * NVCD * site], NVCD);
1799  shift_vec0_xfw(uL, &u[VLEN * NDF * site], NDF);
1800  mult_wilson_xmb(v2v, uL, zL);
1801  mult_wilson_xm2(v2v, &buf2[ibf]);
1802  }
1803 
1804  add_vec(&v2[VLEN * NVCD * site], v2v, NVCD);
1805  }
1806 
1807 #pragma omp barrier
1808 }
1809 
1810 
1811 //====================================================================
1812 template<typename AFIELD>
1814 {
1815  int idir = 1;
1816  int Nxy = m_Nxv * m_Nyv;
1817 
1818  int ith, nth, is, ns;
1819  set_threadtask_mult(ith, nth, is, ns, m_Nstv);
1820 
1821  real_t *buf1 = (real_t *)chsend_dn[1].ptr();
1822  real_t *buf2 = (real_t *)chrecv_up[1].ptr();
1823 
1824  real_t *u = m_U.ptr(m_Ndf * m_Nst * idir);
1825  if (isap != 0) u = m_Ublock.ptr(m_Ndf * m_Nst * idir);
1826 
1827 #pragma omp barrier
1828 
1829  if (do_comm[1] > 0) {
1830  for (int site = is; site < ns; ++site) {
1831  int ix = site % m_Nxv;
1832  int iy = (site / m_Nxv) % m_Nyv;
1833  int izt = site / Nxy;
1834  if (iy == 0) {
1835  int ibf = VLENX * NVC * ND2 * (ix + m_Nxv * izt);
1836  mult_wilson_yp1(&buf1[ibf], &v1[VLEN * NVCD * site]);
1837  }
1838  }
1839 
1840 #pragma omp barrier
1841 
1842 #pragma omp master
1843  {
1844  chrecv_up[1].start();
1845  chsend_dn[1].start();
1846  chrecv_up[1].wait();
1847  chsend_dn[1].wait();
1848  }
1849 
1850 #pragma omp barrier
1851  } // end of if(do_comm[1] > 0)
1852 
1853  for (int site = is; site < ns; ++site) {
1854  int ix = site % m_Nxv;
1855  int iy = (site / m_Nxv) % m_Nyv;
1856  int izt = site / Nxy;
1857 
1858  Vsimd_t v2v[NVCD];
1859  clear_vec(v2v, NVCD);
1860 
1861  real_t zL[VLEN * NVCD];
1862 
1863  if ((iy < m_Nyv - 1) || (do_comm[1] == 0)) {
1864  int iy2 = (iy + 1) % m_Nyv;
1865  int nei = ix + m_Nxv * (iy2 + m_Nyv * izt);
1866  shift_vec2_ybw(zL, &v1[VLEN * NVCD * site], &v1[VLEN * NVCD * nei], NVCD);
1867  mult_wilson_ypb(v2v, &u[VLEN * NDF * site], zL);
1868  } else {
1869  int ibf = VLENX * NVC * ND2 * (ix + m_Nxv * izt);
1870  shift_vec0_ybw(zL, &v1[VLEN * NVCD * site], NVCD);
1871  mult_wilson_ypb(v2v, &u[VLEN * NDF * site], zL);
1872  mult_wilson_yp2(v2v, &u[VLEN * NDF * site], &buf2[ibf]);
1873  }
1874 
1875  add_vec(&v2[VLEN * NVCD * site], v2v, NVCD);
1876  }
1877 
1878 #pragma omp barrier
1879 }
1880 
1881 
1882 //====================================================================
1883 template<typename AFIELD>
1885 {
1886  int idir = 1;
1887  int Nxy = m_Nxv * m_Nyv;
1888 
1889  int ith, nth, is, ns;
1890  set_threadtask_mult(ith, nth, is, ns, m_Nstv);
1891 
1892  real_t *buf1 = (real_t *)chsend_up[1].ptr();
1893  real_t *buf2 = (real_t *)chrecv_dn[1].ptr();
1894 
1895  real_t *u = m_U.ptr(m_Ndf * m_Nst * idir);
1896  if (isap != 0) u = m_Ublock.ptr(m_Ndf * m_Nst * idir);
1897 
1898 #pragma omp barrier
1899 
1900  if (do_comm[1] > 0) {
1901  for (int site = is; site < ns; ++site) {
1902  int ix = site % m_Nxv;
1903  int iy = (site / m_Nxv) % m_Nyv;
1904  int izt = site / Nxy;
1905  if (iy == m_Nyv - 1) {
1906  int ibf = VLENX * NVC * ND2 * (ix + m_Nxv * izt);
1907  mult_wilson_ym1(&buf1[ibf], &u[VLEN * NDF * site],
1908  &v1[VLEN * NVCD * site]);
1909  }
1910  }
1911 
1912 #pragma omp barrier
1913 
1914 #pragma omp master
1915  {
1916  chrecv_dn[1].start();
1917  chsend_up[1].start();
1918  chrecv_dn[1].wait();
1919  chsend_up[1].wait();
1920  }
1921 
1922 #pragma omp barrier
1923  }
1924 
1925  for (int site = is; site < ns; ++site) {
1926  int ix = site % m_Nxv;
1927  int iy = (site / m_Nxv) % m_Nyv;
1928  int izt = site / Nxy;
1929 
1930  Vsimd_t v2v[NVCD];
1931  clear_vec(v2v, NVCD);
1932 
1933  real_t zL[VLEN * NVCD];
1934  real_t uL[VLEN * NDF];
1935 
1936  if ((iy != 0) || (do_comm[idir] == 0)) {
1937  int iy2 = (iy - 1 + m_Nyv) % m_Nyv;
1938  int nei = ix + m_Nxv * (iy2 + m_Nyv * izt);
1939  shift_vec2_yfw(zL, &v1[VLEN * NVCD * site], &v1[VLEN * NVCD * nei], NVCD);
1940  shift_vec2_yfw(uL, &u[VLEN * NDF * site], &u[VLEN * NDF * nei], NDF);
1941  mult_wilson_ymb(v2v, uL, zL);
1942  } else {
1943  int ibf = VLENX * NVC * ND2 * (ix + m_Nxv * izt);
1944  shift_vec0_yfw(zL, &v1[VLEN * NVCD * site], NVCD);
1945  shift_vec0_yfw(uL, &u[VLEN * NDF * site], NDF);
1946  mult_wilson_ymb(v2v, uL, zL);
1947  mult_wilson_ym2(v2v, &buf2[ibf]);
1948  }
1949 
1950  add_vec(&v2[VLEN * NVCD * site], v2v, NVCD);
1951  }
1952 
1953 #pragma omp barrier
1954 }
1955 
1956 
1957 //====================================================================
1958 template<typename AFIELD>
1960 {
1961  int idir = 2;
1962  int Nxy = m_Nxv * m_Nyv;
1963 
1964  int ith, nth, is, ns;
1965  set_threadtask_mult(ith, nth, is, ns, m_Nstv);
1966 
1967  real_t *buf1 = (real_t *)chsend_dn[2].ptr();
1968  real_t *buf2 = (real_t *)chrecv_up[2].ptr();
1969 
1970  real_t *u = m_U.ptr(m_Ndf * m_Nst * idir);
1971  if (isap != 0) u = m_Ublock.ptr(m_Ndf * m_Nst * idir);
1972 
1973 #pragma omp barrier
1974 
1975  if (do_comm[2] > 0) {
1976  for (int site = is; site < ns; ++site) {
1977  int ixy = site % Nxy;
1978  int iz = (site / Nxy) % m_Nz;
1979  int it = site / (Nxy * m_Nz);
1980  if (iz == 0) {
1981  int ibf = VLEN * NVC * ND2 * (ixy + Nxy * it);
1982  mult_wilson_zp1(&buf1[ibf], &v1[VLEN * NVCD * site]);
1983  }
1984  }
1985 
1986 #pragma omp barrier
1987 
1988 #pragma omp master
1989  {
1990  chrecv_up[2].start();
1991  chsend_dn[2].start();
1992  chrecv_up[2].wait();
1993  chsend_dn[2].wait();
1994  }
1995 
1996 #pragma omp barrier
1997  }
1998 
1999  for (int site = is; site < ns; ++site) {
2000  int ixy = site % Nxy;
2001  int iz = (site / Nxy) % m_Nz;
2002  int it = site / (Nxy * m_Nz);
2003 
2004  Vsimd_t v2v[NVCD];
2005  clear_vec(v2v, NVCD);
2006 
2007  if ((iz != m_Nz - 1) || (do_comm[2] == 0)) {
2008  int iz2 = (iz + 1) % m_Nz;
2009  int nei = ixy + Nxy * (iz2 + m_Nz * it);
2010  mult_wilson_zpb(v2v, &u[VLEN * NDF * site], &v1[VLEN * NVCD * nei]);
2011  } else {
2012  int ibf = VLEN * NVC * ND2 * (ixy + Nxy * it);
2013  mult_wilson_zp2(v2v, &u[VLEN * NDF * site], &buf2[ibf]);
2014  }
2015 
2016  add_vec(&v2[VLEN * NVCD * site], v2v, NVCD);
2017  }
2018 
2019 #pragma omp barrier
2020 }
2021 
2022 
2023 //====================================================================
2024 template<typename AFIELD>
2026 {
2027  int idir = 2;
2028  int Nxy = m_Nxv * m_Nyv;
2029 
2030  int ith, nth, is, ns;
2031  set_threadtask_mult(ith, nth, is, ns, m_Nstv);
2032 
2033  real_t *buf1 = (real_t *)chsend_up[2].ptr();
2034  real_t *buf2 = (real_t *)chrecv_dn[2].ptr();
2035 
2036  real_t *u = m_U.ptr(m_Ndf * m_Nst * idir);
2037  if (isap != 0) u = m_Ublock.ptr(m_Ndf * m_Nst * idir);
2038 
2039 #pragma omp barrier
2040 
2041  if (do_comm[2] > 0) {
2042  for (int site = is; site < ns; ++site) {
2043  int ixy = site % Nxy;
2044  int iz = (site / Nxy) % m_Nz;
2045  int it = site / (Nxy * m_Nz);
2046  if (iz == m_Nz - 1) {
2047  int ibf = VLEN * NVC * ND2 * (ixy + Nxy * it);
2048  mult_wilson_zm1(&buf1[ibf], &u[VLEN * NDF * site],
2049  &v1[VLEN * NVCD * site]);
2050  }
2051  }
2052 
2053 #pragma omp barrier
2054 
2055 #pragma omp master
2056  {
2057  chrecv_dn[2].start();
2058  chsend_up[2].start();
2059  chrecv_dn[2].wait();
2060  chsend_up[2].wait();
2061  }
2062 
2063 #pragma omp barrier
2064  }
2065 
2066  for (int site = is; site < ns; ++site) {
2067  int ixy = site % Nxy;
2068  int iz = (site / Nxy) % m_Nz;
2069  int it = site / (Nxy * m_Nz);
2070 
2071  Vsimd_t v2v[NVCD];
2072  clear_vec(v2v, NVCD);
2073 
2074  if ((iz > 0) || (do_comm[2] == 0)) {
2075  int iz2 = (iz - 1 + m_Nz) % m_Nz;
2076  int nei = ixy + Nxy * (iz2 + m_Nz * it);
2077  mult_wilson_zmb(v2v, &u[VLEN * NDF * nei], &v1[VLEN * NVCD * nei]);
2078  } else {
2079  int ibf = VLEN * NVC * ND2 * (ixy + Nxy * it);
2080  mult_wilson_zm2(v2v, &buf2[ibf]);
2081  }
2082 
2083  add_vec(&v2[VLEN * NVCD * site], v2v, NVCD);
2084  }
2085 
2086 #pragma omp barrier
2087 }
2088 
2089 
2090 //====================================================================
2091 template<typename AFIELD>
2093 {
2094  int idir = 3;
2095  int Nxyz = m_Nxv * m_Nyv * m_Nz;
2096 
2097  int ith, nth, is, ns;
2098  set_threadtask_mult(ith, nth, is, ns, m_Nstv);
2099 
2100  real_t *buf1 = (real_t *)chsend_dn[3].ptr();
2101  real_t *buf2 = (real_t *)chrecv_up[3].ptr();
2102 
2103  real_t *u = m_U.ptr(m_Ndf * m_Nst * idir);
2104  if (isap != 0) u = m_Ublock.ptr(m_Ndf * m_Nst * idir);
2105 
2106 #pragma omp barrier
2107 
2108  if (do_comm[3] > 0) {
2109  for (int site = is; site < ns; ++site) {
2110  int ixyz = site % Nxyz;
2111  int it = site / Nxyz;
2112  if (it == 0) {
2113  mult_wilson_tp1_dirac(&buf1[VLEN * NVC * ND2 * ixyz],
2114  &v1[VLEN * NVCD * site]);
2115  }
2116  }
2117 
2118 #pragma omp barrier
2119 
2120 #pragma omp master
2121  {
2122  chrecv_up[3].start();
2123  chsend_dn[3].start();
2124  chrecv_up[3].wait();
2125  chsend_dn[3].wait();
2126  }
2127 
2128 #pragma omp barrier
2129  }
2130 
2131  for (int site = is; site < ns; ++site) {
2132  int ixyz = site % Nxyz;
2133  int it = site / Nxyz;
2134 
2135  Vsimd_t v2v[NVCD];
2136  clear_vec(v2v, NVCD);
2137 
2138  if ((it < m_Nt - 1) || (do_comm[3] == 0)) {
2139  int it2 = (it + 1) % m_Nt;
2140  int nei = ixyz + Nxyz * it2;
2141  mult_wilson_tpb_dirac(v2v, &u[VLEN * NDF * site],
2142  &v1[VLEN * NVCD * nei]);
2143  } else {
2144  mult_wilson_tp2_dirac(v2v, &u[VLEN * NDF * site],
2145  &buf2[VLEN * NVC * ND2 * ixyz]);
2146  }
2147 
2148  add_vec(&v2[VLEN * NVCD * site], v2v, NVCD);
2149  }
2150 
2151 #pragma omp barrier
2152 }
2153 
2154 
2155 //====================================================================
2156 template<typename AFIELD>
2158 {
2159  int idir = 3;
2160  int Nxyz = m_Nxv * m_Nyv * m_Nz;
2161 
2162  int ith, nth, is, ns;
2163  set_threadtask_mult(ith, nth, is, ns, m_Nstv);
2164 
2165  real_t *buf1 = (real_t *)chsend_up[3].ptr();
2166  real_t *buf2 = (real_t *)chrecv_dn[3].ptr();
2167 
2168  real_t *u = m_U.ptr(m_Ndf * m_Nst * idir);
2169  if (isap != 0) u = m_Ublock.ptr(m_Ndf * m_Nst * idir);
2170 
2171 #pragma omp barrier
2172 
2173  if (do_comm[3] > 0) {
2174  for (int site = is; site < ns; ++site) {
2175  int ixyz = site % Nxyz;
2176  int it = site / Nxyz;
2177  if (it == m_Nt - 1) {
2178  mult_wilson_tm1_dirac(&buf1[VLEN * NVC * ND2 * ixyz],
2179  &u[VLEN * NDF * site], &v1[VLEN * NVCD * site]);
2180  }
2181  }
2182 
2183 #pragma omp barrier
2184 
2185 #pragma omp master
2186  {
2187  chrecv_dn[3].start();
2188  chsend_up[3].start();
2189  chrecv_dn[3].wait();
2190  chsend_up[3].wait();
2191  }
2192 #pragma omp barrier
2193  }
2194 
2195  for (int site = is; site < ns; ++site) {
2196  int ixyz = site % Nxyz;
2197  int it = site / Nxyz;
2198 
2199  Vsimd_t v2v[NVCD];
2200  clear_vec(v2v, NVCD);
2201 
2202  if ((it > 0) || (do_comm[3] == 0)) {
2203  int it2 = (it - 1 + m_Nt) % m_Nt;
2204  int nei = ixyz + Nxyz * it2;
2205  mult_wilson_tmb_dirac(v2v, &u[VLEN * NDF * nei],
2206  &v1[VLEN * NVCD * nei]);
2207  } else {
2208  mult_wilson_tm2_dirac(v2v, &buf2[VLEN * NVC * ND2 * ixyz]);
2209  }
2210 
2211  add_vec(&v2[VLEN * NVCD * site], v2v, NVCD);
2212  }
2213 
2214 #pragma omp barrier
2215 }
2216 
2217 
2218 //====================================================================
2219 template<typename AFIELD>
2220 double AFopr_Clover_QWS_dd<AFIELD>::flop_count(const std::string mode)
2221 {
2222  // The following counting explicitly depends on the implementation.
2223  // It will be recalculated when the code is modified.
2224  // The present counting is based on rev.1107. [24 Aug 2014 H.Matsufuru]
2225 
2226  int Lvol = CommonParameters::Lvol();
2227  double flop_site, flop;
2228 
2229  if (m_repr == "Dirac") {
2230  flop_site = static_cast<double>(
2231  m_Nc * m_Nd * (4 + 6 * (4 * m_Nc + 2) + 2 * (4 * m_Nc + 1)));
2232  } else if (m_repr == "Chiral") {
2233  flop_site = static_cast<double>(
2234  m_Nc * m_Nd * (4 + 8 * (4 * m_Nc + 2)));
2235  } else {
2236  vout.crucial(m_vl, "%s: input repr is undefined.\n");
2237  abort();
2238  }
2239 
2240  flop = flop_site * static_cast<double>(Lvol);
2241  if ((mode == "DdagD") || (mode == "DDdag")) flop *= 2.0;
2242 
2243  return flop;
2244 }
2245 
2246 
2247 //====================================================================
2248 template<typename AFIELD>
2250 {
2251  // The following implentastion was copied from SIMD implementation
2252  // by Kanamori-san. To be confirmed. [08 May 2021 H.Matsufuru]
2253 
2254  int NPE = CommonParameters::NPE();
2255  int Nc = CommonParameters::Nc();
2256  int Nd = CommonParameters::Nd();
2257 
2258  int NBx = m_Nx / m_block_size[0];
2259  int NBy = m_Ny / m_block_size[1];
2260  int NBz = m_Nz / m_block_size[2];
2261  int NBt = m_Nt / m_block_size[3];
2262  size_t nvol2 = NBx * NBy * NBz * NBt / 2;
2263 
2264  int block_x = m_block_size[0];
2265  int block_y = m_block_size[1];
2266  int block_z = m_block_size[2];
2267  int block_t = m_block_size[3];
2268 
2269  int clover_site = 4 * Nc * Nc * Nd * Nd;
2270  int hop_x_site = Nc * Nd * (4 * Nc + 2);
2271  int hop_y_site = Nc * Nd * (4 * Nc + 2);
2272  int hop_z_site = Nc * Nd * (4 * Nc + 2);
2273  int hop_t_site = Nc * Nd * (4 * Nc + 1);
2274  int accum_site = 4 * Nc * Nd;
2275 
2276  // Dirac representation is assumed.
2277  double flop_x = 2.0 * static_cast<double>(hop_x_site
2278  * (block_x - 1) * block_y * block_z * block_t);
2279  double flop_y = 2.0 * static_cast<double>(hop_y_site
2280  * block_x * (block_y - 1) * block_z * block_t);
2281  double flop_z = 2.0 * static_cast<double>(hop_z_site
2282  * block_x * block_y * (block_z - 1) * block_t);
2283  double flop_t = 2.0 * static_cast<double>(hop_t_site
2284  * block_x * block_y * block_z * (block_t - 1));
2285  double flop_sap_mult = flop_x + flop_y + flop_z + flop_t
2286  + static_cast<double>(clover_site + accum_site);
2287 
2288  return flop_sap_mult * static_cast<double>(nvol2)
2289  * static_cast<double>(NPE);
2290 }
2291 
2292 
2293 //============================================================END=====
AFopr_Clover_QWS_dd::H
void H(AFIELD &, const AFIELD &)
Definition: afopr_Clover_QWS_dd-tmpl.h:1630
AFopr_Clover_QWS_dd::scal_local
void scal_local(real_t *, real_t)
Definition: afopr_Clover_QWS_dd-tmpl.h:1211
CommonParameters::Ny
static int Ny()
Definition: commonParameters.h:106
CommonParameters::Nz
static int Nz()
Definition: commonParameters.h:107
BridgeQXS::mult_wilson_gm5_dirac
void mult_wilson_gm5_dirac(double *v2, double *v1, int *Nsize)
Definition: mult_Wilson_qxs-inc.h:411
AFopr_Clover_QWS_dd::mult_xm
void mult_xm(real_t *, real_t *, int)
Definition: afopr_Clover_QWS_dd-tmpl.h:1742
CommonParameters::Lvol
static long_t Lvol()
Definition: commonParameters.h:95
AFopr_Clover_QWS_dd::mult_up
void mult_up(int mu, AFIELD &, const AFIELD &)
upward nearest neighbor hopping term.
Definition: afopr_Clover_QWS_dd-tmpl.h:1013
Parameters::set_string
void set_string(const string &key, const string &value)
Definition: parameters.cpp:39
BridgeQXS::mult_wilson_2_dirac
void mult_wilson_2_dirac(double *v2, double *up, double *v1, double *buf_xp, double *buf_xm, double *buf_yp, double *buf_ym, double *buf_zp, double *buf_zm, double *buf_tp, double *buf_tm, double kappa, int *bc, int *Nsize, int *do_comm)
Definition: mult_Wilson_qxs-inc.h:296
BridgeQXS::mult_wilson_1_dirac
void mult_wilson_1_dirac(double *buf_xp, double *buf_xm, double *buf_yp, double *buf_ym, double *buf_zp, double *buf_zm, double *buf_tp, double *buf_tm, double *up, double *v1, int *bc, int *Nsize, int *do_comm)
Definition: mult_Wilson_qxs-inc.h:153
AFopr_Clover_QWS_dd::set_csw
void set_csw()
set_csw now assumes Dirac repr.
Definition: afopr_Clover_QWS_dd-tmpl.h:523
AFopr_Clover_QWS_dd::mult_yp
void mult_yp(real_t *, real_t *, int)
Definition: afopr_Clover_QWS_dd-tmpl.h:1813
ThreadManager::get_num_threads
static int get_num_threads()
returns available number of threads.
Definition: threadManager.cpp:246
CommonParameters::Ndim
static int Ndim()
Definition: commonParameters.h:117
AFopr_Clover_QWS_dd::convert
void convert(AFIELD &v, const Field &w)
convert of spinor field (to QWS specific).
Definition: afopr_Clover_QWS_dd-tmpl.h:952
Field::set
void set(const int jin, const int site, const int jex, double v)
Definition: field.h:175
Parameters
Class for parameters.
Definition: parameters.h:46
AIndex_lex
Definition: aindex_lex_base.h:17
AFopr_Clover_QWS_dd::mult_clv
void mult_clv(AFIELD &v)
Definition: afopr_Clover_QWS_dd-tmpl.h:1325
convert
void convert(INDEX &index, AFIELD &v, const Field &w)
Definition: afield-inc.h:129
NVCD
#define NVCD
Definition: define_params_SU3.h:20
AFopr_Clover_QWS_dd::Ddag
void Ddag(AFIELD &, const AFIELD &)
Definition: afopr_Clover_QWS_dd-tmpl.h:1142
AFopr_Clover_QWS_dd::flop_count
double flop_count()
returns floating operation counts.
Definition: afopr_Clover_QWS_dd.h:164
AFopr_Clover_QWS_dd::setup_channels
void setup_channels()
setup channels for communication.
Definition: afopr_Clover_QWS_dd-tmpl.h:92
VLEN
#define VLEN
Definition: bridgeQXS_Clover_coarse_double.cpp:12
Bridge::BridgeIO::detailed
void detailed(const char *format,...)
Definition: bridgeIO.cpp:219
Field::check_size
bool check_size(const int nin, const int nvol, const int nex) const
checking size parameters. [23 May 2016 H.Matsufuru]
Definition: field.h:135
NDF
#define NDF
Definition: field_F_imp_SU2-inc.h:4
Fopr_CloverTerm
Org::Fopr_CloverTerm Fopr_CloverTerm
Clover term operator.
Definition: fopr_CloverTerm.h:58
CommonParameters::Nvol
static int Nvol()
Definition: commonParameters.h:109
convert_gauge
void convert_gauge(INDEX &index, FIELD &v, const Field &w)
Definition: afield-inc.h:224
aypx
void aypx(const double a, Field &y, const Field &x)
aypx(y, a, x): y := a * y + x
Definition: field.cpp:509
Vsimd_t
Definition: vsimd_double-inc.h:13
AFopr_Clover_QWS_dd::mult_gm5
void mult_gm5(AFIELD &, const AFIELD &)
multiplies gamma_5 matrix.
Definition: afopr_Clover_QWS_dd-tmpl.h:1169
AFopr_Clover_QWS_dd::tidyup
void tidyup()
final tidy-up.
Definition: afopr_Clover_QWS_dd-tmpl.h:126
AFopr_Clover_QWS_dd::mult_clv_inv
void mult_clv_inv(AFIELD &v)
Definition: afopr_Clover_QWS_dd-tmpl.h:1304
afopr_Clover_QWS_dd.h
real_t
double real_t
Definition: bridgeQXS_Clover_coarse_double.cpp:16
AFopr_Clover_QWS_dd::mult_zp
void mult_zp(real_t *, real_t *, int)
Definition: afopr_Clover_QWS_dd-tmpl.h:1959
AFopr_Clover_QWS_dd::mult_tm
void mult_tm(real_t *, real_t *, int)
Definition: afopr_Clover_QWS_dd-tmpl.h:2157
AFopr_Clover_QWS_dd::init
void init()
initial setup.
Definition: afopr_Clover_QWS_dd-tmpl.h:22
copy
void copy(Field &y, const Field &x)
copy(y, x): y = x
Definition: field.cpp:212
AFopr_Clover_QWS_dd::set_config
void set_config(Field *u)
setting gauge configuration.
Definition: afopr_Clover_QWS_dd-tmpl.h:250
AFopr_Clover_QWS_dd::D
void D(AFIELD &, const AFIELD &)
Definition: afopr_Clover_QWS_dd-tmpl.h:1118
AFopr_Clover_QWS_dd::mult_ddn
void mult_ddn(AFIELD &, const AFIELD &, const int mu)
Downward hopping part of mult.
Definition: afopr_Clover_QWS_dd-tmpl.h:1555
Field::norm2
double norm2() const
Definition: field.cpp:113
CommonParameters::Nx
static int Nx()
Definition: commonParameters.h:105
qws_lib.h
AFopr_Clover_QWS_dd::set_csw_qws
void set_csw_qws()
set_csw to use qws implemenation
Definition: afopr_Clover_QWS_dd-tmpl.h:693
CommonParameters::Nsize
static int Nsize(const int dir)
Definition: commonParameters.cpp:146
CommonParameters::Nc
static int Nc()
Definition: commonParameters.h:115
AFopr_Clover_QWS_dd< Field >::real_t
Field ::real_t real_t
Definition: afopr_Clover_QWS_dd.h:43
NC
#define NC
Definition: field_F_imp_SU2-inc.h:2
AFopr_Clover_QWS_dd::clear
void clear(real_t *)
Definition: afopr_Clover_QWS_dd-tmpl.h:1657
AFopr_Clover_QWS_dd::DdagD
void DdagD(AFIELD &, const AFIELD &)
Definition: afopr_Clover_QWS_dd-tmpl.h:1131
QXS_Gauge::set_boundary
void set_boundary(AField< REALTYPE, QXS > &ulex, const std::vector< int > &boundary)
Definition: afield_Gauge-inc.h:24
reverse_spinor
void reverse_spinor(INDEX &index, Field &v, FIELD &w)
Definition: afield-inc.h:380
CommonParameters::Nt
static int Nt()
Definition: commonParameters.h:108
Parameters::fetch_int_vector
int fetch_int_vector(const string &key, vector< int > &value) const
Definition: parameters.cpp:429
AFopr_Clover_QWS_dd::mult_dup
void mult_dup(AFIELD &, const AFIELD &, const int mu)
Upward hopping part of mult.
Definition: afopr_Clover_QWS_dd-tmpl.h:1519
Communicator::npe
static int npe(const int dir)
logical grid extent
Definition: communicator.cpp:112
BridgeQXS::mult_clover_bulk_dirac
void mult_clover_bulk_dirac(double *v2, double *up, double *ct, double *v1, double kappa, int *bc, int *Nsize, int *do_comm)
Definition: mult_Clover_qxs-inc.h:16
AFopr_Clover_QWS_dd::mult_D_qws
void mult_D_qws(AFIELD &, const AFIELD &)
D mult using QWS library.
Definition: afopr_Clover_QWS_dd-tmpl.h:1460
AFopr_Clover_QWS_dd::set_block_config
void set_block_config(AFIELD &)
inpose the block condition to link variable.
Definition: afopr_Clover_QWS_dd-tmpl.h:473
NCOL
#define NCOL
Definition: field_G_imp_SU2-inc.h:2
AIndex_eo_qxs::idx
int idx(const int in, const int Nin, const int ist, const int Nx2, const int Ny, const int leo, const int Nvol2, const int ex)
Definition: aindex_eo.h:27
ND
#define ND
Definition: field_F_imp_SU2-inc.h:5
Field::nvol
int nvol() const
Definition: field.h:127
AFopr_Clover_QWS_dd::set_mode
void set_mode(std::string mode)
setting mult mode.
Definition: afopr_Clover_QWS_dd-tmpl.h:1067
CommonParameters::NPE
static int NPE()
Definition: commonParameters.h:101
AFopr_Clover_QWS_dd::project_chiral
void project_chiral(AFIELD &, const AFIELD &, int ch)
Definition: afopr_Clover_QWS_dd-tmpl.h:1152
AFopr_Clover_QWS_dd::mult_ym
void mult_ym(real_t *, real_t *, int)
Definition: afopr_Clover_QWS_dd-tmpl.h:1884
AFopr_Clover_QWS_dd::mult_dn
void mult_dn(int mu, AFIELD &, const AFIELD &)
downward nearest neighbor hopping term.
Definition: afopr_Clover_QWS_dd-tmpl.h:1036
Field::cmp
double cmp(const int jin, const int site, const int jex) const
Definition: field.h:143
VLENY
#define VLENY
Definition: bridgeQXS_Clover_coarse_double.cpp:14
Field::ptr
const double * ptr(const int jin, const int site, const int jex) const
Definition: field.h:153
AFopr_Clover_QWS_dd::set_parameters
void set_parameters(const Parameters &params)
setting parameters by a Parameter object.
Definition: afopr_Clover_QWS_dd-tmpl.h:139
AFopr_Clover_QWS_dd::mult_dd
void mult_dd(AFIELD &, const AFIELD &)
Mult only inside domain.
Definition: afopr_Clover_QWS_dd-tmpl.h:1433
AFopr_Clover_QWS_dd::mult
void mult(AFIELD &, const AFIELD &)
multiplies fermion operator to a given field.
Definition: afopr_Clover_QWS_dd-tmpl.h:1080
CommonParameters::Nd
static int Nd()
Definition: commonParameters.h:116
NVC
#define NVC
Definition: fopr_Wilson_impl_SU2-inc.h:15
CommonParameters::Lsize
static int Lsize(const int dir)
Definition: commonParameters.cpp:120
reverse
void reverse(INDEX &index, Field &v, const AFIELD &w)
Definition: afield-inc.h:159
Bridge::BridgeIO::set_verbose_level
static VerboseLevel set_verbose_level(const std::string &str)
Definition: bridgeIO.cpp:133
AFopr_Clover_QWS_dd::set_boundary
void set_boundary()
inpose the boundary condition to link variable.
Definition: afopr_Clover_QWS_dd-tmpl.h:372
AFopr_Clover_QWS_dd
Definition: afopr_Clover_QWS_dd.h:40
VLENX
#define VLENX
Definition: bridgeQXS_Clover_coarse_double.cpp:13
AFopr_Clover_QWS_dd::set_boundary_config
void set_boundary_config(AFIELD &, const int)
partially inpose the boundary condition to link variable.
Definition: afopr_Clover_QWS_dd-tmpl.h:333
AFopr_Clover_QWS_dd::mult_dag
void mult_dag(AFIELD &, const AFIELD &)
hermitian conjugate of mult.
Definition: afopr_Clover_QWS_dd-tmpl.h:1099
AFopr_Clover_QWS_dd::mult_D_alt
void mult_D_alt(AFIELD &, const AFIELD &)
D mult using mult_xp, etc.
Definition: afopr_Clover_QWS_dd-tmpl.h:1493
scal
void scal(Field &x, const double a)
scal(x, a): x = a * x
Definition: field.cpp:261
Communicator::ipe
static int ipe(const int dir)
logical coordinate of current proc.
Definition: communicator.cpp:105
Field_F
Wilson-type fermion field.
Definition: field_F.h:37
Parameters::fetch_double
int fetch_double(const string &key, double &value) const
Definition: parameters.cpp:327
AFopr_Clover_QWS_dd::aypx
void aypx(real_t, real_t *, real_t *)
Definition: afopr_Clover_QWS_dd-tmpl.h:1639
complex_t
ComplexTraits< double >::complex_t complex_t
Definition: afopr_Clover_coarse_double.cpp:23
Parameters::get_string
string get_string(const string &key) const
Definition: parameters.cpp:221
Field_F::set_r
void set_r(const int cc, const int s, const int site, const int e, const double re)
Definition: field_F.h:106
Bridge::BridgeIO::crucial
void crucial(const char *format,...)
Definition: bridgeIO.cpp:180
AFopr_Clover_QWS_dd::mult_csw
void mult_csw(real_t *, real_t *)
set_csw now assumes Dirac repr.
Definition: afopr_Clover_QWS_dd-tmpl.h:1260
BridgeQXS::mult_clover_dd_dirac
void mult_clover_dd_dirac(double *v2, double *up, double *ct, double *v1, double kappa, int *bc, int *Nsize, int *block_size, int ieo)
Definition: mult_Clover_dd_qxs-inc.h:15
AFopr_Clover_QWS_dd::mult_tp
void mult_tp(real_t *, real_t *, int)
Definition: afopr_Clover_QWS_dd-tmpl.h:2092
AFopr_Clover_QWS_dd::mult_zm
void mult_zm(real_t *, real_t *, int)
Definition: afopr_Clover_QWS_dd-tmpl.h:2025
Field
Container of Field-type object.
Definition: field.h:46
AIndex_lex_QWS_dd
Lexical site index.
Definition: aindex_lex_QWS_dd.h:82
ThreadManager::get_thread_id
static int get_thread_id()
returns thread id.
Definition: threadManager.cpp:253
AFopr_Clover_QWS_dd::mult_sap
void mult_sap(AFIELD &, const AFIELD &, const int ieo)
SAP operator.
Definition: afopr_Clover_QWS_dd-tmpl.h:1411
AFopr_Clover_QWS_dd::solve_csw_qws_inv
void solve_csw_qws_inv(Field &, const Field &)
Definition: afopr_Clover_QWS_dd-tmpl.h:830
AFopr_Clover_QWS_dd::set_csw_chrot
void set_csw_chrot()
set_csw now assumes Dirac repr. with internally using Chiral repr.
Definition: afopr_Clover_QWS_dd-tmpl.h:606
ND2
#define ND2
Definition: define_params_SU3.h:18
AFopr_Clover_QWS_dd::flop_count_sap
double flop_count_sap()
returns floating operation counts of mult_sap.
Definition: afopr_Clover_QWS_dd-tmpl.h:2249
AFopr_Clover_QWS_dd::mult_gm4
void mult_gm4(AFIELD &, const AFIELD &)
Definition: afopr_Clover_QWS_dd-tmpl.h:1227
Bridge::BridgeIO::general
void general(const char *format,...)
Definition: bridgeIO.cpp:200
AFopr_Clover_QWS_dd::reverse
void reverse(Field &v, const AFIELD &w)
reverse of spinor field (from QWS specific).
Definition: afopr_Clover_QWS_dd-tmpl.h:982
AFopr_Clover_QWS_dd::get_mode
std::string get_mode() const
returns mult mode.
Definition: afopr_Clover_QWS_dd-tmpl.h:1059
ThreadManager::assert_single_thread
static void assert_single_thread(const std::string &class_name)
assert currently running on single thread.
Definition: threadManager.cpp:372
AFopr_Clover_QWS_dd::mult_xp
void mult_xp(real_t *, real_t *, int)
Definition: afopr_Clover_QWS_dd-tmpl.h:1673
Bridge::vout
BridgeIO vout
Definition: bridgeIO.cpp:512
AFopr_Clover_QWS_dd::mult_D
void mult_D(AFIELD &, const AFIELD &)
standard D mult.
Definition: afopr_Clover_QWS_dd-tmpl.h:1346
convert_spinor
void convert_spinor(INDEX &index, FIELD &v, const Field &w)
Definition: afield-inc.h:187