Bridge++  Ver. 2.0.2
fopr_Wilson_impl.cpp
Go to the documentation of this file.
1 
14 #include "fopr_Wilson_impl.h"
15 
16 #include "Fopr/fopr_thread-inc.h"
17 
18 #if defined USE_GROUP_SU3
20 #elif defined USE_GROUP_SU2
22 #elif defined USE_GROUP_SU_N
24 #endif
25 
27 
28 namespace Imp {
29 #ifdef USE_FACTORY_AUTOREGISTER
30  namespace {
31  bool init = Fopr_Wilson::register_factory();
32  }
33 #endif
34 
35  const std::string Fopr_Wilson::class_name = "Imp::Fopr_Wilson";
36 
37 //====================================================================
38  void Fopr_Wilson::init(const Parameters& params)
39  {
41 
43 
44  vout.general(m_vl, "%s: construction\n", class_name.c_str());
46 
47  setup();
48 
49  std::string repr;
50  if (!params.fetch_string("gamma_matrix_type", repr)) {
51  m_repr = repr;
52  } else {
53  m_repr = "Dirac"; // default gamma-matrix type
54  vout.general(m_vl, "gamma_matrix_type is not given: defalt = %s\n",
55  m_repr.c_str());
56  }
57  if ((m_repr != "Dirac") && (m_repr != "Chiral")) {
58  vout.crucial("Error at %s: unsupported gamma-matrix type: %s\n",
59  class_name.c_str(), m_repr.c_str());
60  exit(EXIT_FAILURE);
61  }
62 
63  set_parameters(params);
64 
66  vout.general(m_vl, "%s: construction finished.\n",
67  class_name.c_str());
68  }
69 
70 
71 //====================================================================
72  void Fopr_Wilson::init(const std::string repr)
73  {
75 
77 
78  vout.general(m_vl, "%s: construction (obsolete)\n",
79  class_name.c_str());
81 
82  setup();
83 
84  m_repr = repr;
85  vout.general(m_vl, "gamma-matrix type is set to %s\n",
86  m_repr.c_str());
87 
89  vout.general(m_vl, "%s: construction finished.\n",
90  class_name.c_str());
91  }
92 
93 
94 //====================================================================
96  {
97  // check_Nc();
98  std::string imple_gauge = imple_Nc();
99  vout.general(m_vl, "Gauge group implementation: %s\n",
100  imple_gauge.c_str());
101 
104  m_Nvc = 2 * m_Nc;
105  m_Ndf = 2 * m_Nc * m_Nc;
106 
111 
114  m_boundary.resize(m_Ndim);
116 
117  m_U = 0;
118 
119  const int Nvx = m_Nvc * 2 * m_Ny * m_Nz * m_Nt;
120  vcp1_xp = new double[Nvx];
121  vcp2_xp = new double[Nvx];
122  vcp1_xm = new double[Nvx];
123  vcp2_xm = new double[Nvx];
124 
125  const int Nvy = m_Nvc * 2 * m_Nx * m_Nz * m_Nt;
126  vcp1_yp = new double[Nvy];
127  vcp2_yp = new double[Nvy];
128  vcp1_ym = new double[Nvy];
129  vcp2_ym = new double[Nvy];
130 
131  const int Nvz = m_Nvc * 2 * m_Nx * m_Ny * m_Nt;
132  vcp1_zp = new double[Nvz];
133  vcp2_zp = new double[Nvz];
134  vcp1_zm = new double[Nvz];
135  vcp2_zm = new double[Nvz];
136 
137  const int Nvt = m_Nvc * 2 * m_Nx * m_Ny * m_Nz;
138  vcp1_tp = new double[Nvt];
139  vcp2_tp = new double[Nvt];
140  vcp1_tm = new double[Nvt];
141  vcp2_tm = new double[Nvt];
142 
143  m_w1.reset(m_Nvc * m_Nd, m_Nvol, 1);
144  m_w2.reset(m_Nvc * m_Nd, m_Nvol, 1);
145  }
146 
147 
148 //====================================================================
150  {
151  delete[] vcp1_xp;
152  delete[] vcp2_xp;
153  delete[] vcp1_xm;
154  delete[] vcp2_xm;
155 
156  delete[] vcp1_yp;
157  delete[] vcp2_yp;
158  delete[] vcp1_ym;
159  delete[] vcp2_ym;
160 
161  delete[] vcp1_zp;
162  delete[] vcp2_zp;
163  delete[] vcp1_zm;
164  delete[] vcp2_zm;
165 
166  delete[] vcp1_tp;
167  delete[] vcp2_tp;
168  delete[] vcp1_tm;
169  delete[] vcp2_tm;
170  }
171 
172 
173 //====================================================================
175  {
176 #pragma omp barrier
177  int ith = ThreadManager::get_thread_id();
178  std::string vlevel;
179  if (!params.fetch_string("verbose_level", vlevel)) {
180  if (ith == 0) m_vl = vout.set_verbose_level(vlevel);
181  }
182 #pragma omp barrier
183 
184  //- fetch and check input parameters
185  double kappa;
186  std::vector<int> bc;
187 
188  int err = 0;
189  err += params.fetch_double("hopping_parameter", kappa);
190  err += params.fetch_int_vector("boundary_condition", bc);
191 
192  if (err) {
193  vout.crucial("Error at %s: input parameter not found.\n",
194  class_name.c_str());
195  exit(EXIT_FAILURE);
196  }
197 
198  set_parameters(kappa, bc);
199  }
200 
201 
202 //====================================================================
203  void Fopr_Wilson::set_parameters(const double kappa,
204  const std::vector<int> bc)
205  {
206  assert(bc.size() == m_Ndim);
207 
208 #pragma omp barrier
209 
210  int ith = ThreadManager::get_thread_id();
211  if (ith == 0) {
212  m_kappa = kappa;
213  m_boundary = bc;
214 
215  for (int idir = 0; idir < m_Ndim; ++idir) {
216  m_boundary_each_node[idir] = 1.0;
217  if (Communicator::ipe(idir) == 0) {
218  m_boundary_each_node[idir] = m_boundary[idir];
219  }
220  }
221  }
222 #pragma omp barrier
223 
224  vout.general(m_vl, "%s: input parameters\n", class_name.c_str());
225  vout.general(m_vl, " gamma-matrix type = %s\n", m_repr.c_str());
226  vout.general(m_vl, " kappa = %12.8f\n", m_kappa);
227  for (int mu = 0; mu < m_Ndim; ++mu) {
228  vout.general(m_vl, " boundary[%d] = %2d\n", mu, m_boundary[mu]);
229  }
230  }
231 
232 
233 //====================================================================
235  {
236  params.set_double("hopping_parameter", m_kappa);
237  params.set_int_vector("boundary_condition", m_boundary);
238  params.set_string("gamma_matrix_type", m_repr);
239 
240  params.set_string("verbose_level", vout.get_verbose_level(m_vl));
241  }
242 
243 
244 //====================================================================
246  {
247 #pragma omp barrier
248  m_U = (Field_G *)U;
249 #pragma omp barrier
250  }
251 
252 
253 //====================================================================
254  void Fopr_Wilson::set_mode(const std::string mode)
255  {
256 #pragma omp barrier
257  int ith = ThreadManager::get_thread_id();
258  if (ith == 0) m_mode = mode;
259 #pragma omp barrier
260  }
261 
262 
263 //====================================================================
264  void Fopr_Wilson::mult(Field& v, const Field& w)
265  {
266  if (m_mode == "D") {
267  D(v, w);
268  } else if (m_mode == "Ddag") {
269  Ddag(v, w);
270  } else if (m_mode == "DdagD") {
271  DdagD(v, w);
272  } else if (m_mode == "DDdag") {
273  DDdag(v, w);
274  } else if (m_mode == "H") {
275  H(v, w);
276  } else {
277  vout.crucial(m_vl, "Error at %s: undefined mode: %s\n",
278  class_name.c_str(), m_mode.c_str());
279  exit(EXIT_FAILURE);
280  }
281  }
282 
283 
284 //====================================================================
285  void Fopr_Wilson::mult_dag(Field& v, const Field& w)
286  {
287  if (m_mode == "D") {
288  Ddag(v, w);
289  } else if (m_mode == "Ddag") {
290  D(v, w);
291  } else if (m_mode == "DdagD") {
292  DdagD(v, w);
293  } else if (m_mode == "DDdag") {
294  DDdag(v, w);
295  } else if (m_mode == "H") {
296  H(v, w);
297  } else {
298  vout.crucial(m_vl, "Error at %s: undefined mode: %s\n",
299  class_name.c_str(), m_mode.c_str());
300  exit(EXIT_FAILURE);
301  }
302  }
303 
304 
305 //====================================================================
306  void Fopr_Wilson::mult(Field& v, const Field& w,
307  const std::string mode)
308  {
309  if (mode == "D") {
310  D(v, w);
311  } else if (mode == "Ddag") {
312  Ddag(v, w);
313  } else if (mode == "DdagD") {
314  DdagD(v, w);
315  } else if (mode == "DDdag") {
316  DDdag(v, w);
317  } else if (mode == "H") {
318  H(v, w);
319  } else {
320  vout.crucial(m_vl, "Error at %s: undefined mode: %s\n",
321  class_name.c_str(), mode.c_str());
322  exit(EXIT_FAILURE);
323  }
324  }
325 
326 
327 //====================================================================
328  void Fopr_Wilson::mult_dag(Field& v, const Field& w,
329  const std::string mode)
330  {
331  if (mode == "D") {
332  Ddag(v, w);
333  } else if (mode == "Ddag") {
334  D(v, w);
335  } else if (mode == "DdagD") {
336  DdagD(v, w);
337  } else if (mode == "DDdag") {
338  DDdag(v, w);
339  } else if (mode == "H") {
340  H(v, w);
341  } else {
342  vout.crucial(m_vl, "Error at %s: undefined mode: %s\n",
343  class_name.c_str(), mode.c_str());
344  exit(EXIT_FAILURE);
345  }
346  }
347 
348 
349 //====================================================================
350  void Fopr_Wilson::mult_up(const int mu, Field& v, const Field& w)
351  {
352  if (mu == 0) {
353  mult_xp(v, w);
354  } else if (mu == 1) {
355  mult_yp(v, w);
356  } else if (mu == 2) {
357  mult_zp(v, w);
358  } else if (mu == 3) {
359  if (m_repr == "Dirac") {
360  mult_tp_dirac(v, w);
361  } else {
362  mult_tp_chiral(v, w);
363  }
364  } else {
365  vout.crucial(m_vl, "Error at %s::mult_up: illegal mu=%d\n",
366  class_name.c_str(), mu);
367  exit(EXIT_FAILURE);
368  }
369  }
370 
371 
372 //====================================================================
373  void Fopr_Wilson::mult_dn(const int mu, Field& v, const Field& w)
374  {
375  if (mu == 0) {
376  mult_xm(v, w);
377  } else if (mu == 1) {
378  mult_ym(v, w);
379  } else if (mu == 2) {
380  mult_zm(v, w);
381  } else if (mu == 3) {
382  if (m_repr == "Dirac") {
383  mult_tm_dirac(v, w);
384  } else {
385  mult_tm_chiral(v, w);
386  }
387  } else {
388  vout.crucial(m_vl, "Error at %s::mult_dn: illegal mu=%d\n",
389  class_name.c_str(), mu);
390  exit(EXIT_FAILURE);
391  }
392  }
393 
394 
395 //====================================================================
396  void Fopr_Wilson::mult_gm5(Field& v, const Field& w)
397  {
398  if (m_repr == "Dirac") {
399  mult_gm5_dirac(v, w);
400  } else if (m_repr == "Chiral") {
401  mult_gm5_chiral(v, w);
402  }
403  }
404 
405 
406 //====================================================================
407  void Fopr_Wilson::D(Field& v, const Field& w)
408  {
409  if (m_repr == "Dirac") {
410  D_ex_dirac(v, 0, w, 0);
411  //D_ex_dirac_alt(v, 0, w, 0);
412  } else if (m_repr == "Chiral") {
413  D_ex_chiral(v, 0, w, 0);
414  //D_ex_chiral_alt(v, 0, w, 0);
415  }
416  }
417 
418 
419 //====================================================================
420  void Fopr_Wilson::D_ex(Field& v, const int ex1,
421  const Field& w, const int ex2)
422  {
423  if (m_repr == "Dirac") {
424  D_ex_dirac(v, ex1, w, ex2);
425  } else if (m_repr == "Chiral") {
426  D_ex_chiral(v, ex1, w, ex2);
427  }
428  }
429 
430 
431 //====================================================================
432  void Fopr_Wilson::Ddag(Field& v, const Field& w)
433  {
434  mult_gm5(v, w);
435  D(m_w1, v);
436  mult_gm5(v, m_w1);
437  }
438 
439 
440 //====================================================================
441  void Fopr_Wilson::DdagD(Field& v, const Field& w)
442  {
443  D(m_w1, w);
444  mult_gm5(v, m_w1);
445  D(m_w1, v);
446  mult_gm5(v, m_w1);
447  }
448 
449 
450 //====================================================================
451  void Fopr_Wilson::DDdag(Field& v, const Field& w)
452  {
453  mult_gm5(m_w1, w);
454  D(v, m_w1);
455  mult_gm5(m_w1, v);
456  D(v, m_w1);
457  }
458 
459 
460 //====================================================================
461  void Fopr_Wilson::H(Field& v, const Field& w)
462  {
463  D(m_w1, w);
464  mult_gm5(v, m_w1);
465  }
466 
467 
468 //====================================================================
469  void Fopr_Wilson::D_ex_dirac(Field& v, const int ex1,
470  const Field& w, const int ex2)
471  {
472  const int Ninvol = m_Nvc * m_Nd * m_Nvol;
473  double *vp = v.ptr(Ninvol * ex1);
474  const double *wp = w.ptr(Ninvol * ex2);
475  const double *up = m_U->ptr(0);
476 
477  int ith, nth, is, ns;
478  set_threadtask(ith, nth, is, ns, m_Nvol);
479 
480  int Nvc2 = m_Nvc * 2;
481  int Nvcd = m_Nvc * m_Nd;
482 
483  int Nxy = m_Nx * m_Ny;
484  int Nxyz = m_Nx * m_Ny * m_Nz;
485 
486 #pragma omp barrier
487 
488  for (int site = is; site < ns; ++site) {
489  int ix = site % m_Nx;
490  int iyzt = site / m_Nx;
491  int iy = iyzt % m_Ny;
492  int izt = iyzt / m_Ny;
493  int iz = izt % m_Nz;
494  int it = izt / m_Nz;
495 
496  int ixy = ix + m_Nx * iy;
497  int ixyz = ixy + Nxy * iz;
498  int ixyt = ixy + Nxy * it;
499  int ixzt = ix + m_Nx * izt;
500 
501  int in = Nvcd * site;
502 
503  if (ix == 0) {
504  int ix1 = Nvc2 * iyzt;
505  int ix2 = ix1 + m_Nvc;
506  double bc2 = m_boundary_each_node[0];
507  double vt1[NVC], vt2[NVC];
508  set_sp2_xp(vt1, vt2, &wp[in], m_Nc);
509  for (int ivc = 0; ivc < NVC; ++ivc) {
510  vcp1_xp[ivc + ix1] = bc2 * vt1[ivc];
511  vcp1_xp[ivc + ix2] = bc2 * vt2[ivc];
512  }
513  }
514 
515  if (ix == m_Nx - 1) {
516  int ig = m_Ndf * (site + 0 * m_Nvol);
517  int ix1 = Nvc2 * iyzt;
518  int ix2 = ix1 + NVC;
519  double vt1[NVC], vt2[NVC];
520  set_sp2_xm(vt1, vt2, &wp[in], m_Nc);
521  for (int ic = 0; ic < m_Nc; ++ic) {
522  int ic2 = 2 * ic;
523  int icr = 2 * ic;
524  int ici = 2 * ic + 1;
525  vcp1_xm[icr + ix1] = mult_udagv_r(&up[ic2 + ig], vt1, m_Nc);
526  vcp1_xm[ici + ix1] = mult_udagv_i(&up[ic2 + ig], vt1, m_Nc);
527  vcp1_xm[icr + ix2] = mult_udagv_r(&up[ic2 + ig], vt2, m_Nc);
528  vcp1_xm[ici + ix2] = mult_udagv_i(&up[ic2 + ig], vt2, m_Nc);
529  }
530  }
531 
532  if (iy == 0) {
533  int ix1 = Nvc2 * ixzt;
534  int ix2 = ix1 + NVC;
535  double bc2 = m_boundary_each_node[1];
536  double vt1[NVC], vt2[NVC];
537  set_sp2_yp(vt1, vt2, &wp[in], m_Nc);
538  for (int ivc = 0; ivc < NVC; ++ivc) {
539  vcp1_yp[ivc + ix1] = bc2 * vt1[ivc];
540  vcp1_yp[ivc + ix2] = bc2 * vt2[ivc];
541  }
542  }
543 
544  if (iy == m_Ny - 1) {
545  int ig = m_Ndf * (site + 1 * m_Nvol);
546  int ix1 = Nvc2 * ixzt;
547  int ix2 = ix1 + NVC;
548  double vt1[NVC], vt2[NVC];
549  set_sp2_ym(vt1, vt2, &wp[in], m_Nc);
550  for (int ic = 0; ic < m_Nc; ++ic) {
551  int ic2 = 2 * ic;
552  int icr = 2 * ic;
553  int ici = 2 * ic + 1;
554  vcp1_ym[icr + ix1] = mult_udagv_r(&up[ic2 + ig], vt1, m_Nc);
555  vcp1_ym[ici + ix1] = mult_udagv_i(&up[ic2 + ig], vt1, m_Nc);
556  vcp1_ym[icr + ix2] = mult_udagv_r(&up[ic2 + ig], vt2, m_Nc);
557  vcp1_ym[ici + ix2] = mult_udagv_i(&up[ic2 + ig], vt2, m_Nc);
558  }
559  }
560 
561  if (iz == 0) {
562  int ix1 = Nvc2 * ixyt;
563  int ix2 = ix1 + NVC;
564  double bc2 = m_boundary_each_node[2];
565  double vt1[NVC], vt2[NVC];
566  set_sp2_zp(vt1, vt2, &wp[in], m_Nc);
567  for (int ivc = 0; ivc < NVC; ++ivc) {
568  vcp1_zp[ivc + ix1] = bc2 * vt1[ivc];
569  vcp1_zp[ivc + ix2] = bc2 * vt2[ivc];
570  }
571  }
572 
573  if (iz == m_Nz - 1) {
574  int ig = m_Ndf * (site + 2 * m_Nvol);
575  int ix1 = Nvc2 * ixyt;
576  int ix2 = ix1 + NVC;
577  double vt1[NVC], vt2[NVC];
578  set_sp2_zm(vt1, vt2, &wp[in], m_Nc);
579  for (int ic = 0; ic < m_Nc; ++ic) {
580  int ic2 = 2 * ic;
581  int icr = 2 * ic;
582  int ici = 2 * ic + 1;
583  vcp1_zm[icr + ix1] = mult_udagv_r(&up[ic2 + ig], vt1, m_Nc);
584  vcp1_zm[ici + ix1] = mult_udagv_i(&up[ic2 + ig], vt1, m_Nc);
585  vcp1_zm[icr + ix2] = mult_udagv_r(&up[ic2 + ig], vt2, m_Nc);
586  vcp1_zm[ici + ix2] = mult_udagv_i(&up[ic2 + ig], vt2, m_Nc);
587  }
588  }
589 
590  if (it == 0) {
591  int ix1 = Nvc2 * ixyz;
592  int ix2 = ix1 + NVC;
593  double bc2 = m_boundary_each_node[3];
594  double vt1[NVC], vt2[NVC];
595  set_sp2_tp_dirac(vt1, vt2, &wp[in], m_Nc);
596  for (int ivc = 0; ivc < NVC; ++ivc) {
597  vcp1_tp[ivc + ix1] = bc2 * vt1[ivc];
598  vcp1_tp[ivc + ix2] = bc2 * vt2[ivc];
599  }
600  }
601 
602  if (it == m_Nt - 1) {
603  int ig = m_Ndf * (site + 3 * m_Nvol);
604  int ix1 = Nvc2 * ixyz;
605  int ix2 = ix1 + NVC;
606  double vt1[NVC], vt2[NVC];
607  set_sp2_tm_dirac(vt1, vt2, &wp[in], m_Nc);
608  for (int ic = 0; ic < m_Nc; ++ic) {
609  int ic2 = 2 * ic;
610  int icr = 2 * ic;
611  int ici = 2 * ic + 1;
612  vcp1_tm[icr + ix1] = mult_udagv_r(&up[ic2 + ig], vt1, m_Nc);
613  vcp1_tm[ici + ix1] = mult_udagv_i(&up[ic2 + ig], vt1, m_Nc);
614  vcp1_tm[icr + ix2] = mult_udagv_r(&up[ic2 + ig], vt2, m_Nc);
615  vcp1_tm[ici + ix2] = mult_udagv_i(&up[ic2 + ig], vt2, m_Nc);
616  }
617  }
618  }
619 
620 #pragma omp barrier
621 
622 #pragma omp master
623  {
624  int Nvx = m_Nvc * 2 * m_Ny * m_Nz * m_Nt;
625  Communicator::exchange(Nvx, vcp2_xp, vcp1_xp, 0, 1, 1);
626  Communicator::exchange(Nvx, vcp2_xm, vcp1_xm, 0, -1, 2);
627 
628  int Nvy = m_Nvc * 2 * m_Nx * m_Nz * m_Nt;
629  Communicator::exchange(Nvy, vcp2_yp, vcp1_yp, 1, 1, 3);
630  Communicator::exchange(Nvy, vcp2_ym, vcp1_ym, 1, -1, 4);
631 
632  int Nvz = m_Nvc * 2 * m_Nx * m_Ny * m_Nt;
633  Communicator::exchange(Nvz, vcp2_zp, vcp1_zp, 2, 1, 5);
634  Communicator::exchange(Nvz, vcp2_zm, vcp1_zm, 2, -1, 6);
635 
636  int Nvt = m_Nvc * 2 * m_Nx * m_Ny * m_Nz;
637  Communicator::exchange(Nvt, vcp2_tp, vcp1_tp, 3, 1, 7);
638  Communicator::exchange(Nvt, vcp2_tm, vcp1_tm, 3, -1, 8);
639  }
640 #pragma omp barrier
641 
642  for (int site = is; site < ns; ++site) {
643  int ix = site % m_Nx;
644  int iyzt = site / m_Nx;
645  int iy = iyzt % m_Ny;
646  int izt = iyzt / m_Ny;
647  int iz = izt % m_Nz;
648  int it = izt / m_Nz;
649 
650  int ixy = site % Nxy;
651  int ixyz = ixy + Nxy * iz;
652  int ixyt = ixy + Nxy * it;
653  int ixzt = ix + m_Nx * izt;
654 
655  int iv = Nvcd * site;
656 
657  for (int ivcd = 0; ivcd < Nvcd; ++ivcd) {
658  vp[ivcd + iv] = 0.0;
659  }
660 
661  if (ix < m_Nx - 1) {
662  int nei = ix + 1 + m_Nx * iyzt;
663  int in = Nvcd * nei;
664  int ig = m_Ndf * (site + 0 * m_Nvol);
665  double vt1[NVC], vt2[NVC];
666  set_sp2_xp(vt1, vt2, &wp[in], m_Nc);
667  for (int ic = 0; ic < m_Nc; ++ic) {
668  int ic2 = ic * NVC;
669  double wt1r = mult_uv_r(&up[ic2 + ig], vt1, m_Nc);
670  double wt1i = mult_uv_i(&up[ic2 + ig], vt1, m_Nc);
671  double wt2r = mult_uv_r(&up[ic2 + ig], vt2, m_Nc);
672  double wt2i = mult_uv_i(&up[ic2 + ig], vt2, m_Nc);
673  set_sp4_xp(&vp[2 * ic + iv], wt1r, wt1i, wt2r, wt2i, m_Nc);
674  }
675  } else {
676  int ix1 = Nvc2 * iyzt;
677  int ix2 = ix1 + NVC;
678  int ig = m_Ndf * (site + 0 * m_Nvol);
679  for (int ic = 0; ic < m_Nc; ++ic) {
680  int ic2 = ic * NVC;
681  double wt1r = mult_uv_r(&up[ic2 + ig], &vcp2_xp[ix1], m_Nc);
682  double wt1i = mult_uv_i(&up[ic2 + ig], &vcp2_xp[ix1], m_Nc);
683  double wt2r = mult_uv_r(&up[ic2 + ig], &vcp2_xp[ix2], m_Nc);
684  double wt2i = mult_uv_i(&up[ic2 + ig], &vcp2_xp[ix2], m_Nc);
685  set_sp4_xp(&vp[2 * ic + iv], wt1r, wt1i, wt2r, wt2i, m_Nc);
686  }
687  }
688 
689  if (ix > 0) {
690  int nei = ix - 1 + m_Nx * iyzt;
691  int ig = m_Ndf * (nei + 0 * m_Nvol);
692  int in = Nvcd * nei;
693  double vt1[NVC], vt2[NVC];
694  set_sp2_xm(vt1, vt2, &wp[in], m_Nc);
695  for (int ic = 0; ic < m_Nc; ++ic) {
696  int ic2 = 2 * ic;
697  double wt1r = mult_udagv_r(&up[ic2 + ig], vt1, m_Nc);
698  double wt1i = mult_udagv_i(&up[ic2 + ig], vt1, m_Nc);
699  double wt2r = mult_udagv_r(&up[ic2 + ig], vt2, m_Nc);
700  double wt2i = mult_udagv_i(&up[ic2 + ig], vt2, m_Nc);
701  set_sp4_xm(&vp[2 * ic + iv], wt1r, wt1i, wt2r, wt2i, m_Nc);
702  }
703  } else {
704  int ix1 = Nvc2 * iyzt;
705  int ix2 = ix1 + NVC;
706  double bc2 = m_boundary_each_node[0];
707  for (int ic = 0; ic < m_Nc; ++ic) {
708  double wt1r = bc2 * vcp2_xm[2 * ic + ix1];
709  double wt1i = bc2 * vcp2_xm[2 * ic + 1 + ix1];
710  double wt2r = bc2 * vcp2_xm[2 * ic + ix2];
711  double wt2i = bc2 * vcp2_xm[2 * ic + 1 + ix2];
712  set_sp4_xm(&vp[2 * ic + iv], wt1r, wt1i, wt2r, wt2i, m_Nc);
713  }
714  }
715 
716  if (iy < m_Ny - 1) {
717  int nei = ix + m_Nx * (iy + 1 + m_Ny * izt);
718  int ig = m_Ndf * (site + 1 * m_Nvol);
719  int in = Nvcd * nei;
720  double vt1[NVC], vt2[NVC];
721  set_sp2_yp(vt1, vt2, &wp[in], m_Nc);
722  for (int ic = 0; ic < m_Nc; ++ic) {
723  int ic2 = ic * NVC;
724  double wt1r = mult_uv_r(&up[ic2 + ig], vt1, m_Nc);
725  double wt1i = mult_uv_i(&up[ic2 + ig], vt1, m_Nc);
726  double wt2r = mult_uv_r(&up[ic2 + ig], vt2, m_Nc);
727  double wt2i = mult_uv_i(&up[ic2 + ig], vt2, m_Nc);
728  set_sp4_yp(&vp[2 * ic + iv], wt1r, wt1i, wt2r, wt2i, m_Nc);
729  }
730  } else {
731  int ig = m_Ndf * (site + 1 * m_Nvol);
732  int ix1 = Nvc2 * ixzt;
733  int ix2 = ix1 + NVC;
734  for (int ic = 0; ic < m_Nc; ++ic) {
735  int ic2 = ic * NVC;
736  double wt1r = mult_uv_r(&up[ic2 + ig], &vcp2_yp[ix1], m_Nc);
737  double wt1i = mult_uv_i(&up[ic2 + ig], &vcp2_yp[ix1], m_Nc);
738  double wt2r = mult_uv_r(&up[ic2 + ig], &vcp2_yp[ix2], m_Nc);
739  double wt2i = mult_uv_i(&up[ic2 + ig], &vcp2_yp[ix2], m_Nc);
740  set_sp4_yp(&vp[2 * ic + iv], wt1r, wt1i, wt2r, wt2i, m_Nc);
741  }
742  }
743 
744  if (iy > 0) {
745  int nei = ix + m_Nx * (iy - 1 + m_Ny * izt);
746  int ig = m_Ndf * (nei + 1 * m_Nvol);
747  int in = Nvcd * nei;
748  double vt1[NVC], vt2[NVC];
749  set_sp2_ym(vt1, vt2, &wp[in], m_Nc);
750  for (int ic = 0; ic < m_Nc; ++ic) {
751  int ic2 = 2 * ic;
752  double wt1r = mult_udagv_r(&up[ic2 + ig], vt1, m_Nc);
753  double wt1i = mult_udagv_i(&up[ic2 + ig], vt1, m_Nc);
754  double wt2r = mult_udagv_r(&up[ic2 + ig], vt2, m_Nc);
755  double wt2i = mult_udagv_i(&up[ic2 + ig], vt2, m_Nc);
756  set_sp4_ym(&vp[2 * ic + iv], wt1r, wt1i, wt2r, wt2i, m_Nc);
757  }
758  } else {
759  int ix1 = Nvc2 * ixzt;
760  int ix2 = ix1 + NVC;
761  double bc2 = m_boundary_each_node[1];
762  for (int ic = 0; ic < m_Nc; ++ic) {
763  double wt1r = bc2 * vcp2_ym[2 * ic + ix1];
764  double wt1i = bc2 * vcp2_ym[2 * ic + 1 + ix1];
765  double wt2r = bc2 * vcp2_ym[2 * ic + ix2];
766  double wt2i = bc2 * vcp2_ym[2 * ic + 1 + ix2];
767  set_sp4_ym(&vp[2 * ic + iv], wt1r, wt1i, wt2r, wt2i, m_Nc);
768  }
769  }
770 
771  if (iz < m_Nz - 1) {
772  int nei = ixy + Nxy * (iz + 1 + m_Nz * it);
773  int ig = m_Ndf * (site + 2 * m_Nvol);
774  int in = Nvcd * nei;
775  double vt1[NVC], vt2[NVC];
776  set_sp2_zp(vt1, vt2, &wp[in], m_Nc);
777  for (int ic = 0; ic < m_Nc; ++ic) {
778  int ic2 = ic * NVC;
779  double wt1r = mult_uv_r(&up[ic2 + ig], vt1, m_Nc);
780  double wt1i = mult_uv_i(&up[ic2 + ig], vt1, m_Nc);
781  double wt2r = mult_uv_r(&up[ic2 + ig], vt2, m_Nc);
782  double wt2i = mult_uv_i(&up[ic2 + ig], vt2, m_Nc);
783  set_sp4_zp(&vp[2 * ic + iv], wt1r, wt1i, wt2r, wt2i, m_Nc);
784  }
785  } else {
786  int ig = m_Ndf * (site + 2 * m_Nvol);
787  int ix1 = Nvc2 * ixyt;
788  int ix2 = ix1 + NVC;
789  for (int ic = 0; ic < m_Nc; ++ic) {
790  int ic2 = ic * NVC;
791  double wt1r = mult_uv_r(&up[ic2 + ig], &vcp2_zp[ix1], m_Nc);
792  double wt1i = mult_uv_i(&up[ic2 + ig], &vcp2_zp[ix1], m_Nc);
793  double wt2r = mult_uv_r(&up[ic2 + ig], &vcp2_zp[ix2], m_Nc);
794  double wt2i = mult_uv_i(&up[ic2 + ig], &vcp2_zp[ix2], m_Nc);
795  set_sp4_zp(&vp[2 * ic + iv], wt1r, wt1i, wt2r, wt2i, m_Nc);
796  }
797  }
798 
799  if (iz > 0) {
800  int nei = ixy + Nxy * (iz - 1 + m_Nz * it);
801  int ig = m_Ndf * (nei + 2 * m_Nvol);
802  int in = Nvcd * nei;
803  double vt1[NVC], vt2[NVC];
804  set_sp2_zm(vt1, vt2, &wp[in], m_Nc);
805  for (int ic = 0; ic < m_Nc; ++ic) {
806  int ic2 = 2 * ic;
807  double wt1r = mult_udagv_r(&up[ic2 + ig], vt1, m_Nc);
808  double wt1i = mult_udagv_i(&up[ic2 + ig], vt1, m_Nc);
809  double wt2r = mult_udagv_r(&up[ic2 + ig], vt2, m_Nc);
810  double wt2i = mult_udagv_i(&up[ic2 + ig], vt2, m_Nc);
811  set_sp4_zm(&vp[2 * ic + iv], wt1r, wt1i, wt2r, wt2i, m_Nc);
812  }
813  } else {
814  int ix1 = Nvc2 * ixyt;
815  int ix2 = ix1 + NVC;
816  double bc2 = m_boundary_each_node[2];
817  for (int ic = 0; ic < m_Nc; ++ic) {
818  double wt1r = bc2 * vcp2_zm[2 * ic + ix1];
819  double wt1i = bc2 * vcp2_zm[2 * ic + 1 + ix1];
820  double wt2r = bc2 * vcp2_zm[2 * ic + ix2];
821  double wt2i = bc2 * vcp2_zm[2 * ic + 1 + ix2];
822  set_sp4_zm(&vp[2 * ic + iv], wt1r, wt1i, wt2r, wt2i, m_Nc);
823  }
824  }
825 
826  if (it < m_Nt - 1) {
827  int nei = ixyz + Nxyz * (it + 1);
828  int ig = m_Ndf * (site + 3 * m_Nvol);
829  int in = Nvcd * nei;
830  double vt1[NVC], vt2[NVC];
831  set_sp2_tp_dirac(vt1, vt2, &wp[in], m_Nc);
832  for (int ic = 0; ic < m_Nc; ++ic) {
833  int ic2 = ic * NVC;
834  double wt1r = mult_uv_r(&up[ic2 + ig], vt1, m_Nc);
835  double wt1i = mult_uv_i(&up[ic2 + ig], vt1, m_Nc);
836  double wt2r = mult_uv_r(&up[ic2 + ig], vt2, m_Nc);
837  double wt2i = mult_uv_i(&up[ic2 + ig], vt2, m_Nc);
838  set_sp4_tp_dirac(&vp[2 * ic + iv], wt1r, wt1i, wt2r, wt2i, m_Nc);
839  }
840  } else {
841  int ig = m_Ndf * (site + 3 * m_Nvol);
842  int ix1 = Nvc2 * ixyz;
843  int ix2 = ix1 + NVC;
844  for (int ic = 0; ic < m_Nc; ++ic) {
845  int ic2 = ic * NVC;
846  double wt1r = mult_uv_r(&up[ic2 + ig], &vcp2_tp[ix1], m_Nc);
847  double wt1i = mult_uv_i(&up[ic2 + ig], &vcp2_tp[ix1], m_Nc);
848  double wt2r = mult_uv_r(&up[ic2 + ig], &vcp2_tp[ix2], m_Nc);
849  double wt2i = mult_uv_i(&up[ic2 + ig], &vcp2_tp[ix2], m_Nc);
850  set_sp4_tp_dirac(&vp[2 * ic + iv], wt1r, wt1i, wt2r, wt2i, m_Nc);
851  }
852  }
853 
854  if (it > 0) {
855  int nei = ixyz + Nxyz * (it - 1);
856  int ig = m_Ndf * (nei + 3 * m_Nvol);
857  int in = Nvcd * nei;
858  double vt1[NVC], vt2[NVC];
859  set_sp2_tm_dirac(vt1, vt2, &wp[in], m_Nc);
860  for (int ic = 0; ic < m_Nc; ++ic) {
861  int ic2 = 2 * ic;
862  double wt1r = mult_udagv_r(&up[ic2 + ig], vt1, m_Nc);
863  double wt1i = mult_udagv_i(&up[ic2 + ig], vt1, m_Nc);
864  double wt2r = mult_udagv_r(&up[ic2 + ig], vt2, m_Nc);
865  double wt2i = mult_udagv_i(&up[ic2 + ig], vt2, m_Nc);
866  set_sp4_tm_dirac(&vp[2 * ic + iv], wt1r, wt1i, wt2r, wt2i, m_Nc);
867  }
868  } else {
869  int ix1 = Nvc2 * ixyz;
870  int ix2 = ix1 + NVC;
871  double bc2 = m_boundary_each_node[3];
872  for (int ic = 0; ic < m_Nc; ++ic) {
873  int icr = 2 * ic;
874  int ici = 2 * ic + 1;
875  double wt1r = bc2 * vcp2_tm[icr + ix1];
876  double wt1i = bc2 * vcp2_tm[ici + ix1];
877  double wt2r = bc2 * vcp2_tm[icr + ix2];
878  double wt2i = bc2 * vcp2_tm[ici + ix2];
879  set_sp4_tm_dirac(&vp[2 * ic + iv], wt1r, wt1i, wt2r, wt2i, m_Nc);
880  }
881  }
882 
883  for (int ivcd = 0; ivcd < Nvcd; ++ivcd) {
884  vp[ivcd + iv] = -m_kappa * vp[ivcd + iv] + wp[ivcd + iv];
885  }
886  }
887 
888 #pragma omp barrier
889  }
890 
891 
892 //====================================================================
893  void Fopr_Wilson::D_ex_chiral(Field& v, const int ex1,
894  const Field& w, const int ex2)
895  {
896  const int Ninvol = m_Nvc * m_Nd * m_Nvol;
897  double *vp = v.ptr(Ninvol * ex1);
898  const double *wp = w.ptr(Ninvol * ex2);
899  const double *up = m_U->ptr(0);
900 
901  int ith, nth, is, ns;
902  set_threadtask(ith, nth, is, ns, m_Nvol);
903 
904  int Nvc2 = m_Nvc * 2;
905  int Nvcd = m_Nvc * m_Nd;
906 
907  int Nxy = m_Nx * m_Ny;
908  int Nxyz = m_Nx * m_Ny * m_Nz;
909 
910 #pragma omp barrier
911 
912  for (int site = is; site < ns; ++site) {
913  int ix = site % m_Nx;
914  int iyzt = site / m_Nx;
915  int iy = iyzt % m_Ny;
916  int izt = iyzt / m_Ny;
917  int iz = izt % m_Nz;
918  int it = izt / m_Nz;
919 
920  int ixy = ix + m_Nx * iy;
921  int ixyz = ixy + Nxy * iz;
922  int ixyt = ixy + Nxy * it;
923  int ixzt = ix + m_Nx * izt;
924 
925  int in = Nvcd * site;
926 
927  if (ix == 0) {
928  int ix1 = Nvc2 * iyzt;
929  int ix2 = ix1 + NVC;
930  double bc2 = m_boundary_each_node[0];
931  double vt1[NVC], vt2[NVC];
932  set_sp2_xp(vt1, vt2, &wp[in], m_Nc);
933  for (int ivc = 0; ivc < NVC; ++ivc) {
934  vcp1_xp[ivc + ix1] = bc2 * vt1[ivc];
935  vcp1_xp[ivc + ix2] = bc2 * vt2[ivc];
936  }
937  }
938 
939  if (ix == m_Nx - 1) {
940  int ig = m_Ndf * (site + 0 * m_Nvol);
941  int ix1 = Nvc2 * iyzt;
942  int ix2 = ix1 + NVC;
943  double vt1[NVC], vt2[NVC];
944  set_sp2_xm(vt1, vt2, &wp[in], m_Nc);
945  for (int ic = 0; ic < m_Nc; ++ic) {
946  int ic2 = 2 * ic;
947  int icr = 2 * ic;
948  int ici = 2 * ic + 1;
949  vcp1_xm[icr + ix1] = mult_udagv_r(&up[ic2 + ig], vt1, m_Nc);
950  vcp1_xm[ici + ix1] = mult_udagv_i(&up[ic2 + ig], vt1, m_Nc);
951  vcp1_xm[icr + ix2] = mult_udagv_r(&up[ic2 + ig], vt2, m_Nc);
952  vcp1_xm[ici + ix2] = mult_udagv_i(&up[ic2 + ig], vt2, m_Nc);
953  }
954  }
955 
956  if (iy == 0) {
957  int ix1 = Nvc2 * ixzt;
958  int ix2 = ix1 + NVC;
959  double bc2 = m_boundary_each_node[1];
960  double vt1[NVC], vt2[NVC];
961  set_sp2_yp(vt1, vt2, &wp[in], m_Nc);
962  for (int ivc = 0; ivc < NVC; ++ivc) {
963  vcp1_yp[ivc + ix1] = bc2 * vt1[ivc];
964  vcp1_yp[ivc + ix2] = bc2 * vt2[ivc];
965  }
966  }
967 
968  if (iy == m_Ny - 1) {
969  int ig = m_Ndf * (site + 1 * m_Nvol);
970  int ix1 = Nvc2 * ixzt;
971  int ix2 = ix1 + NVC;
972  double vt1[NVC], vt2[NVC];
973  set_sp2_ym(vt1, vt2, &wp[in], m_Nc);
974  for (int ic = 0; ic < m_Nc; ++ic) {
975  int ic2 = 2 * ic;
976  int icr = 2 * ic;
977  int ici = 2 * ic + 1;
978  vcp1_ym[icr + ix1] = mult_udagv_r(&up[ic2 + ig], vt1, m_Nc);
979  vcp1_ym[ici + ix1] = mult_udagv_i(&up[ic2 + ig], vt1, m_Nc);
980  vcp1_ym[icr + ix2] = mult_udagv_r(&up[ic2 + ig], vt2, m_Nc);
981  vcp1_ym[ici + ix2] = mult_udagv_i(&up[ic2 + ig], vt2, m_Nc);
982  }
983  }
984 
985  if (iz == 0) {
986  int ix1 = Nvc2 * ixyt;
987  int ix2 = ix1 + NVC;
988  double bc2 = m_boundary_each_node[2];
989  double vt1[NVC], vt2[NVC];
990  set_sp2_zp(vt1, vt2, &wp[in], m_Nc);
991  for (int ivc = 0; ivc < NVC; ++ivc) {
992  vcp1_zp[ivc + ix1] = bc2 * vt1[ivc];
993  vcp1_zp[ivc + ix2] = bc2 * vt2[ivc];
994  }
995  }
996 
997  if (iz == m_Nz - 1) {
998  int ig = m_Ndf * (site + 2 * m_Nvol);
999  int ix1 = Nvc2 * ixyt;
1000  int ix2 = ix1 + NVC;
1001  double vt1[NVC], vt2[NVC];
1002  set_sp2_zm(vt1, vt2, &wp[in], m_Nc);
1003  for (int ic = 0; ic < m_Nc; ++ic) {
1004  int ic2 = 2 * ic;
1005  int icr = 2 * ic;
1006  int ici = 2 * ic + 1;
1007  vcp1_zm[icr + ix1] = mult_udagv_r(&up[ic2 + ig], vt1, m_Nc);
1008  vcp1_zm[ici + ix1] = mult_udagv_i(&up[ic2 + ig], vt1, m_Nc);
1009  vcp1_zm[icr + ix2] = mult_udagv_r(&up[ic2 + ig], vt2, m_Nc);
1010  vcp1_zm[ici + ix2] = mult_udagv_i(&up[ic2 + ig], vt2, m_Nc);
1011  }
1012  }
1013 
1014  if (it == 0) {
1015  int ix1 = Nvc2 * ixyz;
1016  int ix2 = ix1 + NVC;
1017  double bc2 = m_boundary_each_node[3];
1018  double vt1[NVC], vt2[NVC];
1019  set_sp2_tp_chiral(vt1, vt2, &wp[in], m_Nc);
1020  for (int ivc = 0; ivc < NVC; ++ivc) {
1021  vcp1_tp[ivc + ix1] = bc2 * vt1[ivc];
1022  vcp1_tp[ivc + ix2] = bc2 * vt2[ivc];
1023  }
1024  }
1025 
1026  if (it == m_Nt - 1) {
1027  int ig = m_Ndf * (site + 3 * m_Nvol);
1028  int ix1 = Nvc2 * ixyz;
1029  int ix2 = ix1 + NVC;
1030  double vt1[NVC], vt2[NVC];
1031  set_sp2_tm_chiral(vt1, vt2, &wp[in], m_Nc);
1032  for (int ic = 0; ic < m_Nc; ++ic) {
1033  int ic2 = 2 * ic;
1034  int icr = 2 * ic;
1035  int ici = 2 * ic + 1;
1036  vcp1_tm[icr + ix1] = mult_udagv_r(&up[ic2 + ig], vt1, m_Nc);
1037  vcp1_tm[ici + ix1] = mult_udagv_i(&up[ic2 + ig], vt1, m_Nc);
1038  vcp1_tm[icr + ix2] = mult_udagv_r(&up[ic2 + ig], vt2, m_Nc);
1039  vcp1_tm[ici + ix2] = mult_udagv_i(&up[ic2 + ig], vt2, m_Nc);
1040  }
1041  }
1042  }
1043 
1044 #pragma omp barrier
1045 
1046 #pragma omp master
1047  {
1048  int Nvx = m_Nvc * 2 * m_Ny * m_Nz * m_Nt;
1049  Communicator::exchange(Nvx, vcp2_xp, vcp1_xp, 0, 1, 1);
1050  Communicator::exchange(Nvx, vcp2_xm, vcp1_xm, 0, -1, 2);
1051 
1052  int Nvy = m_Nvc * 2 * m_Nx * m_Nz * m_Nt;
1053  Communicator::exchange(Nvy, vcp2_yp, vcp1_yp, 1, 1, 3);
1054  Communicator::exchange(Nvy, vcp2_ym, vcp1_ym, 1, -1, 4);
1055 
1056  int Nvz = m_Nvc * 2 * m_Nx * m_Ny * m_Nt;
1057  Communicator::exchange(Nvz, vcp2_zp, vcp1_zp, 2, 1, 5);
1058  Communicator::exchange(Nvz, vcp2_zm, vcp1_zm, 2, -1, 6);
1059 
1060  int Nvt = m_Nvc * 2 * m_Nx * m_Ny * m_Nz;
1061  Communicator::exchange(Nvt, vcp2_tp, vcp1_tp, 3, 1, 7);
1062  Communicator::exchange(Nvt, vcp2_tm, vcp1_tm, 3, -1, 8);
1063  }
1064 #pragma omp barrier
1065 
1066  for (int site = is; site < ns; ++site) {
1067  int ix = site % m_Nx;
1068  int iyzt = site / m_Nx;
1069  int iy = iyzt % m_Ny;
1070  int izt = iyzt / m_Ny;
1071  int iz = izt % m_Nz;
1072  int it = izt / m_Nz;
1073 
1074  int ixy = site % Nxy;
1075  int ixyz = ixy + Nxy * iz;
1076  int ixyt = ixy + Nxy * it;
1077  int ixzt = ix + m_Nx * izt;
1078 
1079  int iv = Nvcd * site;
1080 
1081  for (int ivcd = 0; ivcd < Nvcd; ++ivcd) {
1082  vp[ivcd + iv] = 0.0;
1083  }
1084 
1085  if (ix < m_Nx - 1) {
1086  int nei = ix + 1 + m_Nx * iyzt;
1087  int in = Nvcd * nei;
1088  int ig = m_Ndf * (site + 0 * m_Nvol);
1089  double vt1[NVC], vt2[NVC];
1090  set_sp2_xp(vt1, vt2, &wp[in], m_Nc);
1091  for (int ic = 0; ic < m_Nc; ++ic) {
1092  int ic2 = ic * NVC;
1093  double wt1r = mult_uv_r(&up[ic2 + ig], vt1, m_Nc);
1094  double wt1i = mult_uv_i(&up[ic2 + ig], vt1, m_Nc);
1095  double wt2r = mult_uv_r(&up[ic2 + ig], vt2, m_Nc);
1096  double wt2i = mult_uv_i(&up[ic2 + ig], vt2, m_Nc);
1097  set_sp4_xp(&vp[2 * ic + iv], wt1r, wt1i, wt2r, wt2i, m_Nc);
1098  }
1099  } else {
1100  int ix1 = Nvc2 * iyzt;
1101  int ix2 = ix1 + NVC;
1102  int ig = m_Ndf * (site + 0 * m_Nvol);
1103  for (int ic = 0; ic < m_Nc; ++ic) {
1104  int ic2 = ic * NVC;
1105  double wt1r = mult_uv_r(&up[ic2 + ig], &vcp2_xp[ix1], m_Nc);
1106  double wt1i = mult_uv_i(&up[ic2 + ig], &vcp2_xp[ix1], m_Nc);
1107  double wt2r = mult_uv_r(&up[ic2 + ig], &vcp2_xp[ix2], m_Nc);
1108  double wt2i = mult_uv_i(&up[ic2 + ig], &vcp2_xp[ix2], m_Nc);
1109  set_sp4_xp(&vp[2 * ic + iv], wt1r, wt1i, wt2r, wt2i, m_Nc);
1110  }
1111  }
1112 
1113  if (ix > 0) {
1114  int nei = ix - 1 + m_Nx * iyzt;
1115  int ig = m_Ndf * (nei + 0 * m_Nvol);
1116  int in = Nvcd * nei;
1117  double vt1[NVC], vt2[NVC];
1118  set_sp2_xm(vt1, vt2, &wp[in], m_Nc);
1119  for (int ic = 0; ic < m_Nc; ++ic) {
1120  int ic2 = 2 * ic;
1121  double wt1r = mult_udagv_r(&up[ic2 + ig], vt1, m_Nc);
1122  double wt1i = mult_udagv_i(&up[ic2 + ig], vt1, m_Nc);
1123  double wt2r = mult_udagv_r(&up[ic2 + ig], vt2, m_Nc);
1124  double wt2i = mult_udagv_i(&up[ic2 + ig], vt2, m_Nc);
1125  set_sp4_xm(&vp[2 * ic + iv], wt1r, wt1i, wt2r, wt2i, m_Nc);
1126  }
1127  } else {
1128  int ix1 = Nvc2 * iyzt;
1129  int ix2 = ix1 + NVC;
1130  double bc2 = m_boundary_each_node[0];
1131  for (int ic = 0; ic < m_Nc; ++ic) {
1132  double wt1r = bc2 * vcp2_xm[2 * ic + ix1];
1133  double wt1i = bc2 * vcp2_xm[2 * ic + 1 + ix1];
1134  double wt2r = bc2 * vcp2_xm[2 * ic + ix2];
1135  double wt2i = bc2 * vcp2_xm[2 * ic + 1 + ix2];
1136  set_sp4_xm(&vp[2 * ic + iv], wt1r, wt1i, wt2r, wt2i, m_Nc);
1137  }
1138  }
1139 
1140  if (iy < m_Ny - 1) {
1141  int nei = ix + m_Nx * (iy + 1 + m_Ny * izt);
1142  int ig = m_Ndf * (site + 1 * m_Nvol);
1143  int in = Nvcd * nei;
1144  double vt1[NVC], vt2[NVC];
1145  set_sp2_yp(vt1, vt2, &wp[in], m_Nc);
1146  for (int ic = 0; ic < m_Nc; ++ic) {
1147  int ic2 = ic * NVC;
1148  double wt1r = mult_uv_r(&up[ic2 + ig], vt1, m_Nc);
1149  double wt1i = mult_uv_i(&up[ic2 + ig], vt1, m_Nc);
1150  double wt2r = mult_uv_r(&up[ic2 + ig], vt2, m_Nc);
1151  double wt2i = mult_uv_i(&up[ic2 + ig], vt2, m_Nc);
1152  set_sp4_yp(&vp[2 * ic + iv], wt1r, wt1i, wt2r, wt2i, m_Nc);
1153  }
1154  } else {
1155  int ig = m_Ndf * (site + 1 * m_Nvol);
1156  int ix1 = Nvc2 * ixzt;
1157  int ix2 = ix1 + NVC;
1158  for (int ic = 0; ic < m_Nc; ++ic) {
1159  int ic2 = ic * NVC;
1160  double wt1r = mult_uv_r(&up[ic2 + ig], &vcp2_yp[ix1], m_Nc);
1161  double wt1i = mult_uv_i(&up[ic2 + ig], &vcp2_yp[ix1], m_Nc);
1162  double wt2r = mult_uv_r(&up[ic2 + ig], &vcp2_yp[ix2], m_Nc);
1163  double wt2i = mult_uv_i(&up[ic2 + ig], &vcp2_yp[ix2], m_Nc);
1164  set_sp4_yp(&vp[2 * ic + iv], wt1r, wt1i, wt2r, wt2i, m_Nc);
1165  }
1166  }
1167 
1168  if (iy > 0) {
1169  int nei = ix + m_Nx * (iy - 1 + m_Ny * izt);
1170  int ig = m_Ndf * (nei + 1 * m_Nvol);
1171  int in = Nvcd * nei;
1172  double vt1[NVC], vt2[NVC];
1173  set_sp2_ym(vt1, vt2, &wp[in], m_Nc);
1174  for (int ic = 0; ic < m_Nc; ++ic) {
1175  int ic2 = 2 * ic;
1176  double wt1r = mult_udagv_r(&up[ic2 + ig], vt1, m_Nc);
1177  double wt1i = mult_udagv_i(&up[ic2 + ig], vt1, m_Nc);
1178  double wt2r = mult_udagv_r(&up[ic2 + ig], vt2, m_Nc);
1179  double wt2i = mult_udagv_i(&up[ic2 + ig], vt2, m_Nc);
1180  set_sp4_ym(&vp[2 * ic + iv], wt1r, wt1i, wt2r, wt2i, m_Nc);
1181  }
1182  } else {
1183  int ix1 = Nvc2 * ixzt;
1184  int ix2 = ix1 + NVC;
1185  double bc2 = m_boundary_each_node[1];
1186  for (int ic = 0; ic < m_Nc; ++ic) {
1187  double wt1r = bc2 * vcp2_ym[2 * ic + ix1];
1188  double wt1i = bc2 * vcp2_ym[2 * ic + 1 + ix1];
1189  double wt2r = bc2 * vcp2_ym[2 * ic + ix2];
1190  double wt2i = bc2 * vcp2_ym[2 * ic + 1 + ix2];
1191  set_sp4_ym(&vp[2 * ic + iv], wt1r, wt1i, wt2r, wt2i, m_Nc);
1192  }
1193  }
1194 
1195  if (iz < m_Nz - 1) {
1196  int nei = ixy + Nxy * (iz + 1 + m_Nz * it);
1197  int ig = m_Ndf * (site + 2 * m_Nvol);
1198  int in = Nvcd * nei;
1199  double vt1[NVC], vt2[NVC];
1200  set_sp2_zp(vt1, vt2, &wp[in], m_Nc);
1201  for (int ic = 0; ic < m_Nc; ++ic) {
1202  int ic2 = ic * NVC;
1203  double wt1r = mult_uv_r(&up[ic2 + ig], vt1, m_Nc);
1204  double wt1i = mult_uv_i(&up[ic2 + ig], vt1, m_Nc);
1205  double wt2r = mult_uv_r(&up[ic2 + ig], vt2, m_Nc);
1206  double wt2i = mult_uv_i(&up[ic2 + ig], vt2, m_Nc);
1207  set_sp4_zp(&vp[2 * ic + iv], wt1r, wt1i, wt2r, wt2i, m_Nc);
1208  }
1209  } else {
1210  int ig = m_Ndf * (site + 2 * m_Nvol);
1211  int ix1 = Nvc2 * ixyt;
1212  int ix2 = ix1 + NVC;
1213  for (int ic = 0; ic < m_Nc; ++ic) {
1214  int ic2 = ic * NVC;
1215  double wt1r = mult_uv_r(&up[ic2 + ig], &vcp2_zp[ix1], m_Nc);
1216  double wt1i = mult_uv_i(&up[ic2 + ig], &vcp2_zp[ix1], m_Nc);
1217  double wt2r = mult_uv_r(&up[ic2 + ig], &vcp2_zp[ix2], m_Nc);
1218  double wt2i = mult_uv_i(&up[ic2 + ig], &vcp2_zp[ix2], m_Nc);
1219  set_sp4_zp(&vp[2 * ic + iv], wt1r, wt1i, wt2r, wt2i, m_Nc);
1220  }
1221  }
1222 
1223  if (iz > 0) {
1224  int nei = ixy + Nxy * (iz - 1 + m_Nz * it);
1225  int ig = m_Ndf * (nei + 2 * m_Nvol);
1226  int in = Nvcd * nei;
1227  double vt1[NVC], vt2[NVC];
1228  set_sp2_zm(vt1, vt2, &wp[in], m_Nc);
1229  for (int ic = 0; ic < m_Nc; ++ic) {
1230  int ic2 = 2 * ic;
1231  double wt1r = mult_udagv_r(&up[ic2 + ig], vt1, m_Nc);
1232  double wt1i = mult_udagv_i(&up[ic2 + ig], vt1, m_Nc);
1233  double wt2r = mult_udagv_r(&up[ic2 + ig], vt2, m_Nc);
1234  double wt2i = mult_udagv_i(&up[ic2 + ig], vt2, m_Nc);
1235  set_sp4_zm(&vp[2 * ic + iv], wt1r, wt1i, wt2r, wt2i, m_Nc);
1236  }
1237  } else {
1238  int ix1 = Nvc2 * ixyt;
1239  int ix2 = ix1 + NVC;
1240  double bc2 = m_boundary_each_node[2];
1241  for (int ic = 0; ic < m_Nc; ++ic) {
1242  double wt1r = bc2 * vcp2_zm[2 * ic + ix1];
1243  double wt1i = bc2 * vcp2_zm[2 * ic + 1 + ix1];
1244  double wt2r = bc2 * vcp2_zm[2 * ic + ix2];
1245  double wt2i = bc2 * vcp2_zm[2 * ic + 1 + ix2];
1246  set_sp4_zm(&vp[2 * ic + iv], wt1r, wt1i, wt2r, wt2i, m_Nc);
1247  }
1248  }
1249 
1250  if (it < m_Nt - 1) {
1251  int nei = ixyz + Nxyz * (it + 1);
1252  int ig = m_Ndf * (site + 3 * m_Nvol);
1253  int in = Nvcd * nei;
1254  double vt1[NVC], vt2[NVC];
1255  set_sp2_tp_chiral(vt1, vt2, &wp[in], m_Nc);
1256  for (int ic = 0; ic < m_Nc; ++ic) {
1257  int ic2 = ic * NVC;
1258  double wt1r = mult_uv_r(&up[ic2 + ig], vt1, m_Nc);
1259  double wt1i = mult_uv_i(&up[ic2 + ig], vt1, m_Nc);
1260  double wt2r = mult_uv_r(&up[ic2 + ig], vt2, m_Nc);
1261  double wt2i = mult_uv_i(&up[ic2 + ig], vt2, m_Nc);
1262  set_sp4_tp_chiral(&vp[2 * ic + iv], wt1r, wt1i, wt2r, wt2i, m_Nc);
1263  }
1264  } else {
1265  int ig = m_Ndf * (site + 3 * m_Nvol);
1266  int ix1 = Nvc2 * ixyz;
1267  int ix2 = ix1 + NVC;
1268  for (int ic = 0; ic < m_Nc; ++ic) {
1269  int ic2 = ic * NVC;
1270  double wt1r = mult_uv_r(&up[ic2 + ig], &vcp2_tp[ix1], m_Nc);
1271  double wt1i = mult_uv_i(&up[ic2 + ig], &vcp2_tp[ix1], m_Nc);
1272  double wt2r = mult_uv_r(&up[ic2 + ig], &vcp2_tp[ix2], m_Nc);
1273  double wt2i = mult_uv_i(&up[ic2 + ig], &vcp2_tp[ix2], m_Nc);
1274  set_sp4_tp_chiral(&vp[2 * ic + iv], wt1r, wt1i, wt2r, wt2i, m_Nc);
1275  }
1276  }
1277 
1278  if (it > 0) {
1279  int nei = ixyz + Nxyz * (it - 1);
1280  int ig = m_Ndf * (nei + 3 * m_Nvol);
1281  int in = Nvcd * nei;
1282  double vt1[NVC], vt2[NVC];
1283  set_sp2_tm_chiral(vt1, vt2, &wp[in], m_Nc);
1284  for (int ic = 0; ic < m_Nc; ++ic) {
1285  int ic2 = 2 * ic;
1286  double wt1r = mult_udagv_r(&up[ic2 + ig], vt1, m_Nc);
1287  double wt1i = mult_udagv_i(&up[ic2 + ig], vt1, m_Nc);
1288  double wt2r = mult_udagv_r(&up[ic2 + ig], vt2, m_Nc);
1289  double wt2i = mult_udagv_i(&up[ic2 + ig], vt2, m_Nc);
1290  set_sp4_tm_chiral(&vp[2 * ic + iv], wt1r, wt1i, wt2r, wt2i, m_Nc);
1291  }
1292  } else {
1293  int ix1 = Nvc2 * ixyz;
1294  int ix2 = ix1 + NVC;
1295  double bc2 = m_boundary_each_node[3];
1296  for (int ic = 0; ic < m_Nc; ++ic) {
1297  int icr = 2 * ic;
1298  int ici = 2 * ic + 1;
1299  double wt1r = bc2 * vcp2_tm[icr + ix1];
1300  double wt1i = bc2 * vcp2_tm[ici + ix1];
1301  double wt2r = bc2 * vcp2_tm[icr + ix2];
1302  double wt2i = bc2 * vcp2_tm[ici + ix2];
1303  set_sp4_tm_chiral(&vp[2 * ic + iv], wt1r, wt1i, wt2r, wt2i, m_Nc);
1304  }
1305  }
1306 
1307  for (int ivcd = 0; ivcd < Nvcd; ++ivcd) {
1308  vp[ivcd + iv] = -m_kappa * vp[ivcd + iv] + wp[ivcd + iv];
1309  }
1310  }
1311 
1312 #pragma omp barrier
1313  }
1314 
1315 
1316 //====================================================================
1317  void Fopr_Wilson::D_ex_dirac_alt(Field& w, const int ex1,
1318  const Field& f, const int ex2)
1319  {
1320  clear(w);
1321  mult_xp(w, f);
1322  mult_xm(w, f);
1323  mult_yp(w, f);
1324  mult_ym(w, f);
1325  mult_zp(w, f);
1326  mult_zm(w, f);
1327  mult_tp_dirac(w, f);
1328  mult_tm_dirac(w, f);
1329  daypx(w, -m_kappa, f); // w = -m_kappa * w + f.
1330  }
1331 
1332 
1333 //====================================================================
1334  void Fopr_Wilson::D_ex_chiral_alt(Field& w, const int ex1,
1335  const Field& f, const int ex2)
1336  {
1337  clear(w);
1338  mult_xp(w, f);
1339  mult_xm(w, f);
1340  mult_yp(w, f);
1341  mult_ym(w, f);
1342  mult_zp(w, f);
1343  mult_zm(w, f);
1344  mult_tp_chiral(w, f);
1345  mult_tm_chiral(w, f);
1346  daypx(w, -m_kappa, f); // w = -m_kappa * w + f.
1347  }
1348 
1349 
1350 //====================================================================
1351  void Fopr_Wilson::mult_gm5p(const int mu, Field& v, const Field& w)
1352  {
1353  clear(m_w2);
1354  mult_up(mu, m_w2, w);
1355  mult_gm5(v, m_w2);
1356  }
1357 
1358 
1359 //====================================================================
1360  void Fopr_Wilson::proj_chiral(Field& w, const int ex1,
1361  const Field& v, const int ex2, const int ipm)
1362  {
1363  double fpm = 0.0;
1364 
1365  if (ipm == 1) {
1366  fpm = 1.0;
1367  } else if (ipm == -1) {
1368  fpm = -1.0;
1369  } else {
1370  vout.crucial(m_vl, "Error at %s: illegal chirality = %d\n", class_name.c_str(), ipm);
1371  exit(EXIT_FAILURE);
1372  }
1373 
1374  copy(m_w1, 0, v, ex2);
1375  mult_gm5(m_w2, m_w1);
1376  axpy(m_w1, 0, fpm, m_w2, 0);
1377  copy(w, ex1, m_w1, 0);
1378 
1379 #pragma omp barrier
1380  }
1381 
1382 
1383 //====================================================================
1385  {
1386  double *wp = w.ptr(0);
1387 
1388  int ith, nth, is, ns;
1389  set_threadtask(ith, nth, is, ns, m_Nvol);
1390 
1391  int Nvcd = m_Nvc * m_Nd;
1392 
1393 #pragma omp barrier
1394 
1395  for (int site = is; site < ns; ++site) {
1396  for (int ivcd = 0; ivcd < Nvcd; ++ivcd) {
1397  wp[ivcd + Nvcd * site] = 0.0;
1398  }
1399  }
1400 
1401 #pragma omp barrier
1402  }
1403 
1404 
1405 //====================================================================
1407  const double fac, const Field& w)
1408  {
1409  double *vp = v.ptr(0);
1410  const double *wp = w.ptr(0);
1411 
1412  int ith, nth, is, ns;
1413  set_threadtask(ith, nth, is, ns, m_Nvol);
1414 
1415  int Nvcd = m_Nvc * m_Nd;
1416 
1417 #pragma omp barrier
1418 
1419  for (int site = is; site < ns; ++site) {
1420  for (int ivcd = 0; ivcd < Nvcd; ++ivcd) {
1421  vp[ivcd + Nvcd * site]
1422  = fac * vp[ivcd + Nvcd * site] + wp[ivcd + Nvcd * site];
1423  }
1424  }
1425 
1426 #pragma omp barrier
1427  }
1428 
1429 
1430 //====================================================================
1432  {
1433  double *vp = v.ptr(0);
1434  const double *wp = w.ptr(0);
1435 
1436  int ith, nth, is, ns;
1437  set_threadtask(ith, nth, is, ns, m_Nvol);
1438 
1439  int Nvcd = m_Nvc * m_Nd;
1440 
1441 #pragma omp barrier
1442 
1443  for (int site = is; site < ns; ++site) {
1444  mult_gamma5_dirac(&vp[Nvcd * site], &wp[Nvcd * site], m_Nc);
1445  }
1446 
1447 #pragma omp barrier
1448  }
1449 
1450 
1451 //====================================================================
1453  {
1454  double *vp = v.ptr(0);
1455  const double *wp = w.ptr(0);
1456 
1457  int ith, nth, is, ns;
1458  set_threadtask(ith, nth, is, ns, m_Nvol);
1459 
1460  int Nvcd = m_Nvc * m_Nd;
1461 
1462 #pragma omp barrier
1463 
1464  for (int site = is; site < ns; ++site) {
1465  mult_gamma5_chiral(&vp[Nvcd * site], &wp[Nvcd * site], m_Nc);
1466  }
1467 
1468 #pragma omp barrier
1469  }
1470 
1471 
1472 //====================================================================
1473  void Fopr_Wilson::mult_xp(Field& v, const Field& w)
1474  {
1475  int idir = 0;
1476 
1477  int Nvc2 = m_Nvc * 2;
1478  int Nvcd = m_Nvc * m_Nd;
1479 
1480  double bc2 = m_boundary_each_node[idir];
1481 
1482  double *vp = v.ptr(0);
1483  const double *wp = w.ptr(0);
1484  const double *up = m_U->ptr(m_Ndf * m_Nvol * idir);
1485 
1486  int ith, nth, is, ns;
1487  set_threadtask(ith, nth, is, ns, m_Nvol);
1488 
1489 #pragma omp barrier
1490 
1491  for (int site = is; site < ns; ++site) {
1492  int ix = site % m_Nx;
1493  int iyzt = site / m_Nx;
1494  if (ix == 0) {
1495  int in = Nvcd * site;
1496  int ix1 = Nvc2 * iyzt;
1497  int ix2 = ix1 + NVC;
1498  double vt1[NVC], vt2[NVC];
1499  set_sp2_xp(vt1, vt2, &wp[in], m_Nc);
1500  for (int ivc = 0; ivc < NVC; ++ivc) {
1501  vcp1_xp[ivc + ix1] = bc2 * vt1[ivc];
1502  vcp1_xp[ivc + ix2] = bc2 * vt2[ivc];
1503  }
1504  }
1505  }
1506 
1507 #pragma omp barrier
1508 
1509 #pragma omp master
1510  {
1511  const int Nv = m_Nvc * 2 * m_Ny * m_Nz * m_Nt;
1512  Communicator::exchange(Nv, vcp2_xp, vcp1_xp, 0, 1, 1);
1513  }
1514 #pragma omp barrier
1515 
1516  for (int site = is; site < ns; ++site) {
1517  int ix = site % m_Nx;
1518  int iyzt = site / m_Nx;
1519  int nei = ix + 1 + m_Nx * iyzt;
1520  int iv = Nvcd * site;
1521  int ig = m_Ndf * site;
1522 
1523  if (ix < m_Nx - 1) {
1524  int in = Nvcd * nei;
1525  double vt1[NVC], vt2[NVC];
1526  set_sp2_xp(vt1, vt2, &wp[in], m_Nc);
1527  for (int ic = 0; ic < m_Nc; ++ic) {
1528  int ic2 = ic * NVC;
1529  double wt1r = mult_uv_r(&up[ic2 + ig], vt1, m_Nc);
1530  double wt1i = mult_uv_i(&up[ic2 + ig], vt1, m_Nc);
1531  double wt2r = mult_uv_r(&up[ic2 + ig], vt2, m_Nc);
1532  double wt2i = mult_uv_i(&up[ic2 + ig], vt2, m_Nc);
1533  set_sp4_xp(&vp[2 * ic + iv], wt1r, wt1i, wt2r, wt2i, m_Nc);
1534  }
1535  } else {
1536  int ix1 = Nvc2 * iyzt;
1537  int ix2 = ix1 + NVC;
1538  for (int ic = 0; ic < m_Nc; ++ic) {
1539  int ic2 = ic * NVC;
1540  double wt1r = mult_uv_r(&up[ic2 + ig], &vcp2_xp[ix1], m_Nc);
1541  double wt1i = mult_uv_i(&up[ic2 + ig], &vcp2_xp[ix1], m_Nc);
1542  double wt2r = mult_uv_r(&up[ic2 + ig], &vcp2_xp[ix2], m_Nc);
1543  double wt2i = mult_uv_i(&up[ic2 + ig], &vcp2_xp[ix2], m_Nc);
1544  set_sp4_xp(&vp[2 * ic + iv], wt1r, wt1i, wt2r, wt2i, m_Nc);
1545  }
1546  }
1547  }
1548 
1549 #pragma omp barrier
1550  }
1551 
1552 
1553 //====================================================================
1554  void Fopr_Wilson::mult_xm(Field& v, const Field& w)
1555  {
1556  int idir = 0;
1557 
1558  int Nvc2 = m_Nvc * 2;
1559  int Nvcd = m_Nvc * m_Nd;
1560 
1561  double bc2 = m_boundary_each_node[idir];
1562 
1563  double *vp = v.ptr(0);
1564  const double *wp = w.ptr(0);
1565  const double *up = m_U->ptr(m_Ndf * m_Nvol * idir);
1566 
1567  int ith, nth, is, ns;
1568  set_threadtask(ith, nth, is, ns, m_Nvol);
1569 
1570 #pragma omp barrier
1571 
1572  for (int site = is; site < ns; ++site) {
1573  int ix = site % m_Nx;
1574  int iyzt = site / m_Nx;
1575  if (ix == m_Nx - 1) {
1576  int in = Nvcd * site;
1577  int ig = m_Ndf * site;
1578  int ix1 = Nvc2 * iyzt;
1579  int ix2 = ix1 + NVC;
1580 
1581  double vt1[NVC], vt2[NVC];
1582  set_sp2_xm(vt1, vt2, &wp[in], m_Nc);
1583  for (int ic = 0; ic < m_Nc; ++ic) {
1584  int ic2 = 2 * ic;
1585  int icr = 2 * ic;
1586  int ici = 2 * ic + 1;
1587  vcp1_xm[icr + ix1] = mult_udagv_r(&up[ic2 + ig], vt1, m_Nc);
1588  vcp1_xm[ici + ix1] = mult_udagv_i(&up[ic2 + ig], vt1, m_Nc);
1589  vcp1_xm[icr + ix2] = mult_udagv_r(&up[ic2 + ig], vt2, m_Nc);
1590  vcp1_xm[ici + ix2] = mult_udagv_i(&up[ic2 + ig], vt2, m_Nc);
1591  }
1592  }
1593  }
1594 
1595 #pragma omp barrier
1596 
1597 #pragma omp master
1598  {
1599  const int Nv = m_Nvc * 2 * m_Ny * m_Nz * m_Nt;
1600  Communicator::exchange(Nv, vcp2_xm, vcp1_xm, 0, -1, 2);
1601  }
1602 #pragma omp barrier
1603 
1604  for (int site = is; site < ns; ++site) {
1605  int ix = site % m_Nx;
1606  int iyzt = site / m_Nx;
1607  int nei = ix - 1 + m_Nx * iyzt;
1608  int iv = Nvcd * site;
1609 
1610  if (ix > 0) {
1611  int ig = m_Ndf * nei;
1612  int in = Nvcd * nei;
1613  double vt1[NVC], vt2[NVC];
1614  set_sp2_xm(vt1, vt2, &wp[in], m_Nc);
1615  for (int ic = 0; ic < m_Nc; ++ic) {
1616  int ic2 = 2 * ic;
1617  double wt1r = mult_udagv_r(&up[ic2 + ig], vt1, m_Nc);
1618  double wt1i = mult_udagv_i(&up[ic2 + ig], vt1, m_Nc);
1619  double wt2r = mult_udagv_r(&up[ic2 + ig], vt2, m_Nc);
1620  double wt2i = mult_udagv_i(&up[ic2 + ig], vt2, m_Nc);
1621  set_sp4_xm(&vp[2 * ic + iv], wt1r, wt1i, wt2r, wt2i, m_Nc);
1622  }
1623  } else {
1624  int ix1 = Nvc2 * iyzt;
1625  int ix2 = ix1 + NVC;
1626  for (int ic = 0; ic < m_Nc; ++ic) {
1627  double wt1r = bc2 * vcp2_xm[2 * ic + ix1];
1628  double wt1i = bc2 * vcp2_xm[2 * ic + 1 + ix1];
1629  double wt2r = bc2 * vcp2_xm[2 * ic + ix2];
1630  double wt2i = bc2 * vcp2_xm[2 * ic + 1 + ix2];
1631  set_sp4_xm(&vp[2 * ic + iv], wt1r, wt1i, wt2r, wt2i, m_Nc);
1632  }
1633  }
1634  }
1635 
1636 #pragma omp barrier
1637  }
1638 
1639 
1640 //====================================================================
1641  void Fopr_Wilson::mult_yp(Field& v, const Field& w)
1642  {
1643  int idir = 1;
1644 
1645  int Nvc2 = m_Nvc * 2;
1646  int Nvcd = m_Nvc * m_Nd;
1647 
1648  double bc2 = m_boundary_each_node[idir];
1649 
1650  double *vp = v.ptr(0);
1651  const double *wp = w.ptr(0);
1652  const double *up = m_U->ptr(m_Ndf * m_Nvol * idir);
1653 
1654  int ith, nth, is, ns;
1655  set_threadtask(ith, nth, is, ns, m_Nvol);
1656 
1657 #pragma omp barrier
1658 
1659  for (int site = is; site < ns; ++site) {
1660  int ix = site % m_Nx;
1661  int iyzt = site / m_Nx;
1662  int iy = iyzt % m_Ny;
1663  int izt = iyzt / m_Ny;
1664  int ixzt = ix + m_Nx * izt;
1665  if (iy == 0) {
1666  int in = Nvcd * site;
1667  int ix1 = Nvc2 * ixzt;
1668  int ix2 = ix1 + NVC;
1669  double vt1[NVC], vt2[NVC];
1670  set_sp2_yp(vt1, vt2, &wp[in], m_Nc);
1671  for (int ivc = 0; ivc < NVC; ++ivc) {
1672  vcp1_yp[ivc + ix1] = bc2 * vt1[ivc];
1673  vcp1_yp[ivc + ix2] = bc2 * vt2[ivc];
1674  }
1675  }
1676  }
1677 
1678 #pragma omp barrier
1679 
1680 #pragma omp master
1681  {
1682  const int Nv = m_Nvc * 2 * m_Nx * m_Nz * m_Nt;
1683  Communicator::exchange(Nv, vcp2_yp, vcp1_yp, 1, 1, 3);
1684  }
1685 #pragma omp barrier
1686 
1687  for (int site = is; site < ns; ++site) {
1688  int ix = site % m_Nx;
1689  int iyzt = site / m_Nx;
1690  int iy = iyzt % m_Ny;
1691  int izt = iyzt / m_Ny;
1692  int ixzt = ix + m_Nx * izt;
1693  int nei = ix + m_Nx * (iy + 1 + m_Ny * izt);
1694  int iv = Nvcd * site;
1695  int ig = m_Ndf * site;
1696 
1697  if (iy < m_Ny - 1) {
1698  int in = Nvcd * nei;
1699  double vt1[NVC], vt2[NVC];
1700  set_sp2_yp(vt1, vt2, &wp[in], m_Nc);
1701  for (int ic = 0; ic < m_Nc; ++ic) {
1702  int ic2 = ic * NVC;
1703  double wt1r = mult_uv_r(&up[ic2 + ig], vt1, m_Nc);
1704  double wt1i = mult_uv_i(&up[ic2 + ig], vt1, m_Nc);
1705  double wt2r = mult_uv_r(&up[ic2 + ig], vt2, m_Nc);
1706  double wt2i = mult_uv_i(&up[ic2 + ig], vt2, m_Nc);
1707  set_sp4_yp(&vp[2 * ic + iv], wt1r, wt1i, wt2r, wt2i, m_Nc);
1708  }
1709  } else {
1710  int ix1 = Nvc2 * ixzt;
1711  int ix2 = ix1 + NVC;
1712  for (int ic = 0; ic < m_Nc; ++ic) {
1713  int ic2 = ic * NVC;
1714  double wt1r = mult_uv_r(&up[ic2 + ig], &vcp2_yp[ix1], m_Nc);
1715  double wt1i = mult_uv_i(&up[ic2 + ig], &vcp2_yp[ix1], m_Nc);
1716  double wt2r = mult_uv_r(&up[ic2 + ig], &vcp2_yp[ix2], m_Nc);
1717  double wt2i = mult_uv_i(&up[ic2 + ig], &vcp2_yp[ix2], m_Nc);
1718  set_sp4_yp(&vp[2 * ic + iv], wt1r, wt1i, wt2r, wt2i, m_Nc);
1719  }
1720  }
1721  }
1722 
1723 #pragma omp barrier
1724  }
1725 
1726 
1727 //====================================================================
1728  void Fopr_Wilson::mult_ym(Field& v, const Field& w)
1729  {
1730  int idir = 1;
1731 
1732  int Nvc2 = m_Nvc * 2;
1733  int Nvcd = m_Nvc * m_Nd;
1734 
1735  double bc2 = m_boundary_each_node[idir];
1736 
1737  double *vp = v.ptr(0);
1738  const double *wp = w.ptr(0);
1739  const double *up = m_U->ptr(m_Ndf * m_Nvol * idir);
1740 
1741  int ith, nth, is, ns;
1742  set_threadtask(ith, nth, is, ns, m_Nvol);
1743 
1744 #pragma omp barrier
1745 
1746  for (int site = is; site < ns; ++site) {
1747  int ix = site % m_Nx;
1748  int iyzt = site / m_Nx;
1749  int iy = iyzt % m_Ny;
1750  int izt = iyzt / m_Ny;
1751  int ixzt = ix + m_Nx * izt;
1752  if (iy == m_Ny - 1) {
1753  int in = Nvcd * site;
1754  int ig = m_Ndf * site;
1755  int ix1 = Nvc2 * ixzt;
1756  int ix2 = ix1 + NVC;
1757 
1758  double vt1[NVC], vt2[NVC];
1759  set_sp2_ym(vt1, vt2, &wp[in], m_Nc);
1760  for (int ic = 0; ic < m_Nc; ++ic) {
1761  int ic2 = 2 * ic;
1762  int icr = 2 * ic;
1763  int ici = 2 * ic + 1;
1764  vcp1_ym[icr + ix1] = mult_udagv_r(&up[ic2 + ig], vt1, m_Nc);
1765  vcp1_ym[ici + ix1] = mult_udagv_i(&up[ic2 + ig], vt1, m_Nc);
1766  vcp1_ym[icr + ix2] = mult_udagv_r(&up[ic2 + ig], vt2, m_Nc);
1767  vcp1_ym[ici + ix2] = mult_udagv_i(&up[ic2 + ig], vt2, m_Nc);
1768  }
1769  }
1770  }
1771 
1772 #pragma omp barrier
1773 
1774 #pragma omp master
1775  {
1776  const int Nv = m_Nvc * 2 * m_Nx * m_Nz * m_Nt;
1777  Communicator::exchange(Nv, vcp2_ym, vcp1_ym, 1, -1, 4);
1778  }
1779 #pragma omp barrier
1780 
1781  for (int site = is; site < ns; ++site) {
1782  int ix = site % m_Nx;
1783  int iyzt = site / m_Nx;
1784  int iy = iyzt % m_Ny;
1785  int izt = iyzt / m_Ny;
1786  int ixzt = ix + m_Nx * izt;
1787  int nei = ix + m_Nx * (iy - 1 + m_Ny * izt);
1788  int iv = Nvcd * site;
1789 
1790  if (iy > 0) {
1791  int ig = m_Ndf * nei;
1792  int in = Nvcd * nei;
1793  double vt1[NVC], vt2[NVC];
1794  set_sp2_ym(vt1, vt2, &wp[in], m_Nc);
1795  for (int ic = 0; ic < m_Nc; ++ic) {
1796  int ic2 = 2 * ic;
1797  double wt1r = mult_udagv_r(&up[ic2 + ig], vt1, m_Nc);
1798  double wt1i = mult_udagv_i(&up[ic2 + ig], vt1, m_Nc);
1799  double wt2r = mult_udagv_r(&up[ic2 + ig], vt2, m_Nc);
1800  double wt2i = mult_udagv_i(&up[ic2 + ig], vt2, m_Nc);
1801  set_sp4_ym(&vp[2 * ic + iv], wt1r, wt1i, wt2r, wt2i, m_Nc);
1802  }
1803  } else {
1804  int ix1 = Nvc2 * ixzt;
1805  int ix2 = ix1 + NVC;
1806  for (int ic = 0; ic < m_Nc; ++ic) {
1807  double wt1r = bc2 * vcp2_ym[2 * ic + ix1];
1808  double wt1i = bc2 * vcp2_ym[2 * ic + 1 + ix1];
1809  double wt2r = bc2 * vcp2_ym[2 * ic + ix2];
1810  double wt2i = bc2 * vcp2_ym[2 * ic + 1 + ix2];
1811  set_sp4_ym(&vp[2 * ic + iv], wt1r, wt1i, wt2r, wt2i, m_Nc);
1812  }
1813  }
1814  }
1815 
1816 #pragma omp barrier
1817  }
1818 
1819 
1820 //====================================================================
1821  void Fopr_Wilson::mult_zp(Field& v, const Field& w)
1822  {
1823  int idir = 2;
1824 
1825  int Nvc2 = m_Nvc * 2;
1826  int Nvcd = m_Nvc * m_Nd;
1827 
1828  double bc2 = m_boundary_each_node[idir];
1829 
1830  double *vp = v.ptr(0);
1831  const double *wp = w.ptr(0);
1832  const double *up = m_U->ptr(m_Ndf * m_Nvol * idir);
1833 
1834  int ith, nth, is, ns;
1835  set_threadtask(ith, nth, is, ns, m_Nvol);
1836 
1837  int Nxy = m_Nx * m_Ny;
1838 
1839 #pragma omp barrier
1840 
1841  for (int site = is; site < ns; ++site) {
1842  int ixy = site % Nxy;
1843  int izt = site / Nxy;
1844  int iz = izt % m_Nz;
1845  int it = izt / m_Nz;
1846  int ixyt = ixy + Nxy * it;
1847  if (iz == 0) {
1848  int in = Nvcd * site;
1849  int ix1 = Nvc2 * ixyt;
1850  int ix2 = ix1 + NVC;
1851  double vt1[NVC], vt2[NVC];
1852  set_sp2_zp(vt1, vt2, &wp[in], m_Nc);
1853  for (int ivc = 0; ivc < NVC; ++ivc) {
1854  vcp1_zp[ivc + ix1] = bc2 * vt1[ivc];
1855  vcp1_zp[ivc + ix2] = bc2 * vt2[ivc];
1856  }
1857  }
1858  }
1859 
1860 #pragma omp barrier
1861 
1862 #pragma omp master
1863  {
1864  int Nv = m_Nvc * 2 * m_Nx * m_Ny * m_Nt;
1865  Communicator::exchange(Nv, vcp2_zp, vcp1_zp, 2, 1, 5);
1866  }
1867 #pragma omp barrier
1868 
1869  for (int site = is; site < ns; ++site) {
1870  int ixy = site % Nxy;
1871  int izt = site / Nxy;
1872  int iz = izt % m_Nz;
1873  int it = izt / m_Nz;
1874  int ixyt = ixy + Nxy * it;
1875  int nei = ixy + Nxy * (iz + 1 + m_Nz * it);
1876  int iv = Nvcd * site;
1877  int ig = m_Ndf * site;
1878 
1879  if (iz < m_Nz - 1) {
1880  int in = Nvcd * nei;
1881  double vt1[NVC], vt2[NVC];
1882  set_sp2_zp(vt1, vt2, &wp[in], m_Nc);
1883  for (int ic = 0; ic < m_Nc; ++ic) {
1884  int ic2 = ic * NVC;
1885  double wt1r = mult_uv_r(&up[ic2 + ig], vt1, m_Nc);
1886  double wt1i = mult_uv_i(&up[ic2 + ig], vt1, m_Nc);
1887  double wt2r = mult_uv_r(&up[ic2 + ig], vt2, m_Nc);
1888  double wt2i = mult_uv_i(&up[ic2 + ig], vt2, m_Nc);
1889  set_sp4_zp(&vp[2 * ic + iv], wt1r, wt1i, wt2r, wt2i, m_Nc);
1890  }
1891  } else {
1892  int ix1 = Nvc2 * ixyt;
1893  int ix2 = ix1 + NVC;
1894  for (int ic = 0; ic < m_Nc; ++ic) {
1895  int ic2 = ic * NVC;
1896  double wt1r = mult_uv_r(&up[ic2 + ig], &vcp2_zp[ix1], m_Nc);
1897  double wt1i = mult_uv_i(&up[ic2 + ig], &vcp2_zp[ix1], m_Nc);
1898  double wt2r = mult_uv_r(&up[ic2 + ig], &vcp2_zp[ix2], m_Nc);
1899  double wt2i = mult_uv_i(&up[ic2 + ig], &vcp2_zp[ix2], m_Nc);
1900  set_sp4_zp(&vp[2 * ic + iv], wt1r, wt1i, wt2r, wt2i, m_Nc);
1901  }
1902  }
1903  }
1904 
1905 #pragma omp barrier
1906  }
1907 
1908 
1909 //====================================================================
1910  void Fopr_Wilson::mult_zm(Field& v, const Field& w)
1911  {
1912  int idir = 2;
1913 
1914  int Nvc2 = m_Nvc * 2;
1915  int Nvcd = m_Nvc * m_Nd;
1916 
1917  double bc2 = m_boundary_each_node[idir];
1918 
1919  double *vp = v.ptr(0);
1920  const double *wp = w.ptr(0);
1921  const double *up = m_U->ptr(m_Ndf * m_Nvol * idir);
1922 
1923  int ith, nth, is, ns;
1924  set_threadtask(ith, nth, is, ns, m_Nvol);
1925 
1926  int Nxy = m_Nx * m_Ny;
1927 
1928 #pragma omp barrier
1929 
1930  for (int site = is; site < ns; ++site) {
1931  int ixy = site % Nxy;
1932  int izt = site / Nxy;
1933  int iz = izt % m_Nz;
1934  int it = izt / m_Nz;
1935  int ixyt = ixy + Nxy * it;
1936  if (iz == m_Nz - 1) {
1937  int in = Nvcd * site;
1938  int ig = m_Ndf * site;
1939  int ix1 = Nvc2 * ixyt;
1940  int ix2 = ix1 + NVC;
1941 
1942  double vt1[NVC], vt2[NVC];
1943  set_sp2_zm(vt1, vt2, &wp[in], m_Nc);
1944  for (int ic = 0; ic < m_Nc; ++ic) {
1945  int ic2 = 2 * ic;
1946  int icr = 2 * ic;
1947  int ici = 2 * ic + 1;
1948  vcp1_zm[icr + ix1] = mult_udagv_r(&up[ic2 + ig], vt1, m_Nc);
1949  vcp1_zm[ici + ix1] = mult_udagv_i(&up[ic2 + ig], vt1, m_Nc);
1950  vcp1_zm[icr + ix2] = mult_udagv_r(&up[ic2 + ig], vt2, m_Nc);
1951  vcp1_zm[ici + ix2] = mult_udagv_i(&up[ic2 + ig], vt2, m_Nc);
1952  }
1953  }
1954  }
1955 
1956 #pragma omp barrier
1957 
1958 #pragma omp master
1959  {
1960  const int Nv = m_Nvc * 2 * m_Nx * m_Ny * m_Nt;
1961  Communicator::exchange(Nv, vcp2_zm, vcp1_zm, 2, -1, 6);
1962  }
1963 #pragma omp barrier
1964 
1965  for (int site = is; site < ns; ++site) {
1966  int ixy = site % Nxy;
1967  int izt = site / Nxy;
1968  int iz = izt % m_Nz;
1969  int it = izt / m_Nz;
1970  int ixyt = ixy + Nxy * it;
1971  int nei = ixy + Nxy * (iz - 1 + m_Nz * it);
1972  int iv = Nvcd * site;
1973 
1974  if (iz > 0) {
1975  int ig = m_Ndf * nei;
1976  int in = Nvcd * nei;
1977  double vt1[NVC], vt2[NVC];
1978  set_sp2_zm(vt1, vt2, &wp[in], m_Nc);
1979  for (int ic = 0; ic < m_Nc; ++ic) {
1980  int ic2 = 2 * ic;
1981  double wt1r = mult_udagv_r(&up[ic2 + ig], vt1, m_Nc);
1982  double wt1i = mult_udagv_i(&up[ic2 + ig], vt1, m_Nc);
1983  double wt2r = mult_udagv_r(&up[ic2 + ig], vt2, m_Nc);
1984  double wt2i = mult_udagv_i(&up[ic2 + ig], vt2, m_Nc);
1985  set_sp4_zm(&vp[2 * ic + iv], wt1r, wt1i, wt2r, wt2i, m_Nc);
1986  }
1987  } else {
1988  int ix1 = Nvc2 * ixyt;
1989  int ix2 = ix1 + NVC;
1990  for (int ic = 0; ic < m_Nc; ++ic) {
1991  double wt1r = bc2 * vcp2_zm[2 * ic + ix1];
1992  double wt1i = bc2 * vcp2_zm[2 * ic + 1 + ix1];
1993  double wt2r = bc2 * vcp2_zm[2 * ic + ix2];
1994  double wt2i = bc2 * vcp2_zm[2 * ic + 1 + ix2];
1995  set_sp4_zm(&vp[2 * ic + iv], wt1r, wt1i, wt2r, wt2i, m_Nc);
1996  }
1997  }
1998  }
1999 
2000 #pragma omp barrier
2001  }
2002 
2003 
2004 //====================================================================
2006  {
2007  int idir = 3;
2008 
2009  int Nvc2 = m_Nvc * 2;
2010  int Nvcd = m_Nvc * m_Nd;
2011 
2012  double bc2 = m_boundary_each_node[idir];
2013 
2014  double *vp = v.ptr(0);
2015  const double *wp = w.ptr(0);
2016  const double *up = m_U->ptr(m_Ndf * m_Nvol * idir);
2017 
2018  int ith, nth, is, ns;
2019  set_threadtask(ith, nth, is, ns, m_Nvol);
2020 
2021  int Nxyz = m_Nx * m_Ny * m_Nz;
2022 
2023 #pragma omp barrier
2024 
2025  for (int site = is; site < ns; ++site) {
2026  int ixyz = site % Nxyz;
2027  int it = site / Nxyz;
2028  if (it == 0) {
2029  int in = Nvcd * site;
2030  int ix1 = Nvc2 * ixyz;
2031  int ix2 = ix1 + NVC;
2032  double vt1[NVC], vt2[NVC];
2033  set_sp2_tp_dirac(vt1, vt2, &wp[in], m_Nc);
2034  for (int ivc = 0; ivc < NVC; ++ivc) {
2035  vcp1_tp[ivc + ix1] = bc2 * vt1[ivc];
2036  vcp1_tp[ivc + ix2] = bc2 * vt2[ivc];
2037  }
2038  }
2039  }
2040 
2041 #pragma omp barrier
2042 
2043 #pragma omp master
2044  {
2045  int Nv = m_Nvc * 2 * m_Nx * m_Ny * m_Nz;
2046  Communicator::exchange(Nv, vcp2_tp, vcp1_tp, 3, 1, 7);
2047  }
2048 #pragma omp barrier
2049 
2050  for (int site = is; site < ns; ++site) {
2051  int ixyz = site % Nxyz;
2052  int it = site / Nxyz;
2053  int nei = ixyz + Nxyz * (it + 1);
2054  int iv = Nvcd * site;
2055  int ig = m_Ndf * site;
2056 
2057  if (it < m_Nt - 1) {
2058  int in = Nvcd * nei;
2059  double vt1[NVC], vt2[NVC];
2060  set_sp2_tp_dirac(vt1, vt2, &wp[in], m_Nc);
2061  for (int ic = 0; ic < m_Nc; ++ic) {
2062  int ic2 = ic * NVC;
2063  double wt1r = mult_uv_r(&up[ic2 + ig], vt1, m_Nc);
2064  double wt1i = mult_uv_i(&up[ic2 + ig], vt1, m_Nc);
2065  double wt2r = mult_uv_r(&up[ic2 + ig], vt2, m_Nc);
2066  double wt2i = mult_uv_i(&up[ic2 + ig], vt2, m_Nc);
2067  set_sp4_tp_dirac(&vp[2 * ic + iv], wt1r, wt1i, wt2r, wt2i, m_Nc);
2068  }
2069  } else {
2070  int ix1 = Nvc2 * ixyz;
2071  int ix2 = ix1 + NVC;
2072  for (int ic = 0; ic < m_Nc; ++ic) {
2073  int ic2 = ic * NVC;
2074  double wt1r = mult_uv_r(&up[ic2 + ig], &vcp2_tp[ix1], m_Nc);
2075  double wt1i = mult_uv_i(&up[ic2 + ig], &vcp2_tp[ix1], m_Nc);
2076  double wt2r = mult_uv_r(&up[ic2 + ig], &vcp2_tp[ix2], m_Nc);
2077  double wt2i = mult_uv_i(&up[ic2 + ig], &vcp2_tp[ix2], m_Nc);
2078  set_sp4_tp_dirac(&vp[2 * ic + iv], wt1r, wt1i, wt2r, wt2i, m_Nc);
2079  }
2080  }
2081  }
2082 
2083 #pragma omp barrier
2084  }
2085 
2086 
2087 //====================================================================
2089  {
2090  int idir = 3;
2091 
2092  int Nvc2 = m_Nvc * 2;
2093  int Nvcd = m_Nvc * m_Nd;
2094 
2095  double bc2 = m_boundary_each_node[idir];
2096 
2097  double *vp = v.ptr(0);
2098  const double *wp = w.ptr(0);
2099  const double *up = m_U->ptr(m_Ndf * m_Nvol * idir);
2100 
2101  int ith, nth, is, ns;
2102  set_threadtask(ith, nth, is, ns, m_Nvol);
2103 
2104  int Nxyz = m_Nx * m_Ny * m_Nz;
2105 
2106 #pragma omp barrier
2107 
2108  for (int site = is; site < ns; ++site) {
2109  int ixyz = site % Nxyz;
2110  int it = site / Nxyz;
2111  if (it == m_Nt - 1) {
2112  int in = Nvcd * site;
2113  int ig = m_Ndf * site;
2114  int ix1 = Nvc2 * ixyz;
2115  int ix2 = ix1 + NVC;
2116 
2117  double vt1[NVC], vt2[NVC];
2118  set_sp2_tm_dirac(vt1, vt2, &wp[in], m_Nc);
2119  for (int ic = 0; ic < m_Nc; ++ic) {
2120  int ic2 = 2 * ic;
2121  int icr = 2 * ic;
2122  int ici = 2 * ic + 1;
2123  vcp1_tm[icr + ix1] = mult_udagv_r(&up[ic2 + ig], vt1, m_Nc);
2124  vcp1_tm[ici + ix1] = mult_udagv_i(&up[ic2 + ig], vt1, m_Nc);
2125  vcp1_tm[icr + ix2] = mult_udagv_r(&up[ic2 + ig], vt2, m_Nc);
2126  vcp1_tm[ici + ix2] = mult_udagv_i(&up[ic2 + ig], vt2, m_Nc);
2127  }
2128  }
2129  }
2130 
2131 #pragma omp barrier
2132 
2133 #pragma omp master
2134  {
2135  const int Nv = m_Nvc * 2 * m_Nx * m_Ny * m_Nz;
2136  Communicator::exchange(Nv, vcp2_tm, vcp1_tm, 3, -1, 8);
2137  }
2138 #pragma omp barrier
2139 
2140  for (int site = is; site < ns; ++site) {
2141  int ixyz = site % Nxyz;
2142  int it = site / Nxyz;
2143  int nei = ixyz + Nxyz * (it - 1);
2144  int iv = Nvcd * site;
2145 
2146  if (it > 0) {
2147  int ig = m_Ndf * nei;
2148  int in = Nvcd * nei;
2149  double vt1[NVC], vt2[NVC];
2150  set_sp2_tm_dirac(vt1, vt2, &wp[in], m_Nc);
2151  for (int ic = 0; ic < m_Nc; ++ic) {
2152  int ic2 = 2 * ic;
2153  double wt1r = mult_udagv_r(&up[ic2 + ig], vt1, m_Nc);
2154  double wt1i = mult_udagv_i(&up[ic2 + ig], vt1, m_Nc);
2155  double wt2r = mult_udagv_r(&up[ic2 + ig], vt2, m_Nc);
2156  double wt2i = mult_udagv_i(&up[ic2 + ig], vt2, m_Nc);
2157  set_sp4_tm_dirac(&vp[2 * ic + iv], wt1r, wt1i, wt2r, wt2i, m_Nc);
2158  }
2159  } else {
2160  int ix1 = Nvc2 * ixyz;
2161  int ix2 = ix1 + NVC;
2162  for (int ic = 0; ic < m_Nc; ++ic) {
2163  int icr = 2 * ic;
2164  int ici = 2 * ic + 1;
2165  double wt1r = bc2 * vcp2_tm[icr + ix1];
2166  double wt1i = bc2 * vcp2_tm[ici + ix1];
2167  double wt2r = bc2 * vcp2_tm[icr + ix2];
2168  double wt2i = bc2 * vcp2_tm[ici + ix2];
2169  set_sp4_tm_dirac(&vp[2 * ic + iv], wt1r, wt1i, wt2r, wt2i, m_Nc);
2170  }
2171  }
2172  }
2173 
2174 #pragma omp barrier
2175  }
2176 
2177 
2178 //====================================================================
2180  {
2181  int idir = 3;
2182 
2183  int Nvc2 = m_Nvc * 2;
2184  int Nvcd = m_Nvc * m_Nd;
2185 
2186  double bc2 = m_boundary_each_node[idir];
2187 
2188  double *vp = v.ptr(0);
2189  const double *wp = w.ptr(0);
2190  const double *up = m_U->ptr(m_Ndf * m_Nvol * idir);
2191 
2192  int ith, nth, is, ns;
2193  set_threadtask(ith, nth, is, ns, m_Nvol);
2194 
2195  int Nxyz = m_Nx * m_Ny * m_Nz;
2196 
2197 #pragma omp barrier
2198 
2199  for (int site = is; site < ns; ++site) {
2200  int ixyz = site % Nxyz;
2201  int it = site / Nxyz;
2202  if (it == 0) {
2203  int in = Nvcd * site;
2204  int ix1 = Nvc2 * ixyz;
2205  int ix2 = ix1 + NVC;
2206  double vt1[NVC], vt2[NVC];
2207  set_sp2_tp_chiral(vt1, vt2, &wp[in], m_Nc);
2208  for (int ivc = 0; ivc < NVC; ++ivc) {
2209  vcp1_tp[ivc + ix1] = bc2 * vt1[ivc];
2210  vcp1_tp[ivc + ix2] = bc2 * vt2[ivc];
2211  }
2212  }
2213  }
2214 
2215 #pragma omp barrier
2216 
2217 #pragma omp master
2218  {
2219  int Nv = m_Nvc * 2 * m_Nx * m_Ny * m_Nz;
2220  Communicator::exchange(Nv, vcp2_tp, vcp1_tp, 3, 1, 7);
2221  }
2222 #pragma omp barrier
2223 
2224  for (int site = is; site < ns; ++site) {
2225  int ixyz = site % Nxyz;
2226  int it = site / Nxyz;
2227  int nei = ixyz + Nxyz * (it + 1);
2228  int iv = Nvcd * site;
2229  int ig = m_Ndf * site;
2230 
2231  if (it < m_Nt - 1) {
2232  int in = Nvcd * nei;
2233  double vt1[NVC], vt2[NVC];
2234  set_sp2_tp_chiral(vt1, vt2, &wp[in], m_Nc);
2235  for (int ic = 0; ic < m_Nc; ++ic) {
2236  int ic2 = ic * NVC;
2237  double wt1r = mult_uv_r(&up[ic2 + ig], vt1, m_Nc);
2238  double wt1i = mult_uv_i(&up[ic2 + ig], vt1, m_Nc);
2239  double wt2r = mult_uv_r(&up[ic2 + ig], vt2, m_Nc);
2240  double wt2i = mult_uv_i(&up[ic2 + ig], vt2, m_Nc);
2241  set_sp4_tp_chiral(&vp[2 * ic + iv], wt1r, wt1i, wt2r, wt2i, m_Nc);
2242  }
2243  } else {
2244  int ix1 = Nvc2 * ixyz;
2245  int ix2 = ix1 + NVC;
2246  for (int ic = 0; ic < m_Nc; ++ic) {
2247  int ic2 = ic * NVC;
2248  double wt1r = mult_uv_r(&up[ic2 + ig], &vcp2_tp[ix1], m_Nc);
2249  double wt1i = mult_uv_i(&up[ic2 + ig], &vcp2_tp[ix1], m_Nc);
2250  double wt2r = mult_uv_r(&up[ic2 + ig], &vcp2_tp[ix2], m_Nc);
2251  double wt2i = mult_uv_i(&up[ic2 + ig], &vcp2_tp[ix2], m_Nc);
2252  set_sp4_tp_chiral(&vp[2 * ic + iv], wt1r, wt1i, wt2r, wt2i, m_Nc);
2253  }
2254  }
2255  }
2256 
2257 #pragma omp barrier
2258  }
2259 
2260 
2261 //====================================================================
2263  {
2264  int idir = 3;
2265 
2266  int Nvc2 = m_Nvc * 2;
2267  int Nvcd = m_Nvc * m_Nd;
2268 
2269  double bc2 = m_boundary_each_node[idir];
2270 
2271  double *vp = v.ptr(0);
2272  const double *wp = w.ptr(0);
2273  const double *up = m_U->ptr(m_Ndf * m_Nvol * idir);
2274 
2275  int ith, nth, is, ns;
2276  set_threadtask(ith, nth, is, ns, m_Nvol);
2277 
2278  int Nxyz = m_Nx * m_Ny * m_Nz;
2279 
2280 #pragma omp barrier
2281 
2282  for (int site = is; site < ns; ++site) {
2283  int ixyz = site % Nxyz;
2284  int it = site / Nxyz;
2285  if (it == m_Nt - 1) {
2286  int in = Nvcd * site;
2287  int ig = m_Ndf * site;
2288  int ix1 = Nvc2 * ixyz;
2289  int ix2 = ix1 + NVC;
2290 
2291  double vt1[NVC], vt2[NVC];
2292  set_sp2_tm_chiral(vt1, vt2, &wp[in], m_Nc);
2293  for (int ic = 0; ic < m_Nc; ++ic) {
2294  int ic2 = 2 * ic;
2295  int icr = 2 * ic;
2296  int ici = 2 * ic + 1;
2297  vcp1_tm[icr + ix1] = mult_udagv_r(&up[ic2 + ig], vt1, m_Nc);
2298  vcp1_tm[ici + ix1] = mult_udagv_i(&up[ic2 + ig], vt1, m_Nc);
2299  vcp1_tm[icr + ix2] = mult_udagv_r(&up[ic2 + ig], vt2, m_Nc);
2300  vcp1_tm[ici + ix2] = mult_udagv_i(&up[ic2 + ig], vt2, m_Nc);
2301  }
2302  }
2303  }
2304 
2305 #pragma omp barrier
2306 
2307 #pragma omp master
2308  {
2309  const int Nv = m_Nvc * 2 * m_Nx * m_Ny * m_Nz;
2310  Communicator::exchange(Nv, vcp2_tm, vcp1_tm, 3, -1, 8);
2311  }
2312 #pragma omp barrier
2313 
2314  for (int site = is; site < ns; ++site) {
2315  int ixyz = site % Nxyz;
2316  int it = site / Nxyz;
2317  int nei = ixyz + Nxyz * (it - 1);
2318  int iv = Nvcd * site;
2319 
2320  if (it > 0) {
2321  int ig = m_Ndf * nei;
2322  int in = Nvcd * nei;
2323  double vt1[NVC], vt2[NVC];
2324  set_sp2_tm_chiral(vt1, vt2, &wp[in], m_Nc);
2325  for (int ic = 0; ic < m_Nc; ++ic) {
2326  int ic2 = 2 * ic;
2327  double wt1r = mult_udagv_r(&up[ic2 + ig], vt1, m_Nc);
2328  double wt1i = mult_udagv_i(&up[ic2 + ig], vt1, m_Nc);
2329  double wt2r = mult_udagv_r(&up[ic2 + ig], vt2, m_Nc);
2330  double wt2i = mult_udagv_i(&up[ic2 + ig], vt2, m_Nc);
2331  set_sp4_tm_chiral(&vp[2 * ic + iv], wt1r, wt1i, wt2r, wt2i, m_Nc);
2332  }
2333  } else {
2334  int ix1 = Nvc2 * ixyz;
2335  int ix2 = ix1 + NVC;
2336  for (int ic = 0; ic < m_Nc; ++ic) {
2337  double wt1r = bc2 * vcp2_tm[2 * ic + ix1];
2338  double wt1i = bc2 * vcp2_tm[2 * ic + 1 + ix1];
2339  double wt2r = bc2 * vcp2_tm[2 * ic + ix2];
2340  double wt2i = bc2 * vcp2_tm[2 * ic + 1 + ix2];
2341  set_sp4_tm_chiral(&vp[2 * ic + iv], wt1r, wt1i, wt2r, wt2i, m_Nc);
2342  }
2343  }
2344  }
2345 
2346 #pragma omp barrier
2347  }
2348 
2349 
2350 //====================================================================
2352  {
2353  // Counting of floating point operations in giga unit.
2354  // The following counting explicitly depends on the implementation.
2355  // It will be recalculated when the code is modified.
2356  // The present counting is based on rev.1107. [24 Aug 2014 H.Matsufuru]
2357 
2358  const int Nvol = CommonParameters::Nvol();
2359  const int NPE = CommonParameters::NPE();
2360 
2361  int flop_site;
2362 
2363  if (m_repr == "Dirac") {
2364  flop_site = m_Nc * m_Nd * (4 + 6 * (4 * m_Nc + 2) + 2 * (4 * m_Nc + 1));
2365  } else if (m_repr == "Chiral") {
2366  flop_site = m_Nc * m_Nd * (4 + 8 * (4 * m_Nc + 2));
2367  } else {
2368  vout.crucial(m_vl, "Error at %s: input repr is undefined.\n",
2369  class_name.c_str());
2370  exit(EXIT_FAILURE);
2371  }
2372 
2373  double flop = flop_site * (Nvol * NPE);
2374 
2375  if ((m_mode == "DdagD") || (m_mode == "DDdag")) flop *= 2;
2376 
2377  double gflop = flop * 1.e-9;
2378  return gflop;
2379  }
2380 
2381 
2382 //====================================================================
2383 }
2384 //============================================================END=====
Imp::Fopr_Wilson::mult_ym
void mult_ym(Field &, const Field &)
Definition: fopr_Wilson_impl.cpp:1728
Imp::Fopr_Wilson::mult_tp_dirac
void mult_tp_dirac(Field &, const Field &)
Definition: fopr_Wilson_impl.cpp:2005
CommonParameters::Ny
static int Ny()
Definition: commonParameters.h:106
Imp::Fopr_Wilson::class_name
static const std::string class_name
Definition: fopr_Wilson_impl.h:46
Imp::Fopr_Wilson::m_w2
Field m_w2
working fields
Definition: fopr_Wilson_impl.h:72
CommonParameters::Nz
static int Nz()
Definition: commonParameters.h:107
Imp::Fopr_Wilson::Ddag
void Ddag(Field &v, const Field &w)
Definition: fopr_Wilson_impl.cpp:432
Imp::Fopr_Wilson::flop_count
double flop_count()
returns the number of floating point operations.
Definition: fopr_Wilson_impl.cpp:2351
Imp::Fopr_Wilson::vcp1_zp
double * vcp1_zp
Definition: fopr_Wilson_impl.h:69
fopr_thread-inc.h
Imp::Fopr_Wilson::vcp1_xm
double * vcp1_xm
Definition: fopr_Wilson_impl.h:67
Imp::Fopr_Wilson::mult_gm5
void mult_gm5(Field &v, const Field &w)
multiplies gamma_5 matrix.
Definition: fopr_Wilson_impl.cpp:396
Imp::Fopr_Wilson::D_ex_dirac
void D_ex_dirac(Field &, const int ex1, const Field &, const int ex2)
Definition: fopr_Wilson_impl.cpp:469
Imp::Fopr_Wilson::mult
void mult(Field &v, const Field &w)
multiplies fermion operator to a given field.
Definition: fopr_Wilson_impl.cpp:264
Parameters::set_string
void set_string(const string &key, const string &value)
Definition: parameters.cpp:39
Imp::Fopr_Wilson::m_Ny
int m_Ny
Definition: fopr_Wilson_impl.h:59
fopr_Wilson_impl_SU_N-inc.h
fopr_Wilson_impl_SU3-inc.h
Imp::Fopr_Wilson::m_mode
std::string m_mode
mult mode
Definition: fopr_Wilson_impl.h:55
CommonParameters::Ndim
static int Ndim()
Definition: commonParameters.h:117
Parameters
Class for parameters.
Definition: parameters.h:46
Imp::Fopr_Wilson::mult_zm
void mult_zm(Field &, const Field &)
Definition: fopr_Wilson_impl.cpp:1910
Imp::Fopr_Wilson::vcp2_tp
double * vcp2_tp
Definition: fopr_Wilson_impl.h:70
Parameters::set_double
void set_double(const string &key, const double value)
Definition: parameters.cpp:33
Imp::Fopr_Wilson::proj_chiral
void proj_chiral(Field &w, const int ex1, const Field &v, const int ex2, const int ipm)
Definition: fopr_Wilson_impl.cpp:1360
Imp::Fopr_Wilson::m_Nd
int m_Nd
Definition: fopr_Wilson_impl.h:58
Bridge::BridgeIO::decrease_indent
void decrease_indent()
Definition: bridgeIO.h:86
Imp::Fopr_Wilson::mult_tm_dirac
void mult_tm_dirac(Field &, const Field &)
Definition: fopr_Wilson_impl.cpp:2088
Imp::Fopr_Wilson::mult_xp
void mult_xp(Field &, const Field &)
Definition: fopr_Wilson_impl.cpp:1473
Imp::Fopr_Wilson::DdagD
void DdagD(Field &v, const Field &w)
Definition: fopr_Wilson_impl.cpp:441
Imp::Fopr_Wilson::DDdag
void DDdag(Field &v, const Field &w)
Definition: fopr_Wilson_impl.cpp:451
Bridge::BridgeIO::increase_indent
void increase_indent()
Definition: bridgeIO.h:85
Imp::Fopr_Wilson::vcp2_xm
double * vcp2_xm
Definition: fopr_Wilson_impl.h:67
Imp::Fopr_Wilson::mult_dn
void mult_dn(const int mu, Field &, const Field &)
downward nearest neighbor hopping term.
Definition: fopr_Wilson_impl.cpp:373
Imp::Fopr_Wilson::mult_xm
void mult_xm(Field &, const Field &)
Definition: fopr_Wilson_impl.cpp:1554
CommonParameters::Nvol
static int Nvol()
Definition: commonParameters.h:109
Imp::Fopr_Wilson::vcp2_tm
double * vcp2_tm
Definition: fopr_Wilson_impl.h:70
axpy
void axpy(Field &y, const double a, const Field &x)
axpy(y, a, x): y := a * x + y
Definition: field.cpp:380
Imp::Fopr_Wilson::m_Ndf
int m_Ndf
Definition: fopr_Wilson_impl.h:58
Imp::Fopr_Wilson::vcp2_zp
double * vcp2_zp
Definition: fopr_Wilson_impl.h:69
Imp::Fopr_Wilson::mult_zp
void mult_zp(Field &, const Field &)
Definition: fopr_Wilson_impl.cpp:1821
Imp::Fopr_Wilson::m_w1
Field m_w1
Definition: fopr_Wilson_impl.h:72
copy
void copy(Field &y, const Field &x)
copy(y, x): y = x
Definition: field.cpp:212
Imp::Fopr_Wilson::m_kappa
double m_kappa
hopping parameter
Definition: fopr_Wilson_impl.h:50
Imp::Fopr_Wilson::mult_gm5p
void mult_gm5p(const int mu, Field &, const Field &)
Definition: fopr_Wilson_impl.cpp:1351
Imp::Fopr_Wilson::m_Nz
int m_Nz
Definition: fopr_Wilson_impl.h:59
Imp::Fopr_Wilson::mult_yp
void mult_yp(Field &, const Field &)
Definition: fopr_Wilson_impl.cpp:1641
CommonParameters::Nx
static int Nx()
Definition: commonParameters.h:105
Imp::Fopr_Wilson::D
void D(Field &v, const Field &w)
Definition: fopr_Wilson_impl.cpp:407
Imp::Fopr_Wilson::setup
void setup()
initial setup main.
Definition: fopr_Wilson_impl.cpp:95
Imp::Fopr_Wilson::m_boundary_each_node
std::vector< double > m_boundary_each_node
b.c. on each node.
Definition: fopr_Wilson_impl.h:64
CommonParameters::Nc
static int Nc()
Definition: commonParameters.h:115
Imp::Fopr_Wilson::mult_up
void mult_up(const int mu, Field &, const Field &)
upward nearest neighbor hopping term.
Definition: fopr_Wilson_impl.cpp:350
Imp::Fopr_Wilson::D_ex
void D_ex(Field &v, const int ex1, const Field &f, const int ex2)
Definition: fopr_Wilson_impl.cpp:420
Imp::Fopr_Wilson::tidyup
void tidyup()
final clean-up.
Definition: fopr_Wilson_impl.cpp:149
Imp::Fopr_Wilson::mult_gm5_chiral
void mult_gm5_chiral(Field &, const Field &)
Definition: fopr_Wilson_impl.cpp:1452
Imp::Fopr_Wilson::mult_gm5_dirac
void mult_gm5_dirac(Field &, const Field &)
Definition: fopr_Wilson_impl.cpp:1431
Imp::Fopr_Wilson::D_ex_dirac_alt
void D_ex_dirac_alt(Field &, const int ex1, const Field &, const int ex2)
Definition: fopr_Wilson_impl.cpp:1317
Imp::Fopr_Wilson::D_ex_chiral_alt
void D_ex_chiral_alt(Field &, const int ex1, const Field &, const int ex2)
Definition: fopr_Wilson_impl.cpp:1334
CommonParameters::Nt
static int Nt()
Definition: commonParameters.h:108
Parameters::fetch_int_vector
int fetch_int_vector(const string &key, vector< int > &value) const
Definition: parameters.cpp:429
Imp::Fopr_Wilson::vcp2_yp
double * vcp2_yp
Definition: fopr_Wilson_impl.h:68
Imp::Fopr_Wilson::vcp2_ym
double * vcp2_ym
Definition: fopr_Wilson_impl.h:68
Imp::Fopr_Wilson::vcp1_xp
double * vcp1_xp
arrays for communication buffer.
Definition: fopr_Wilson_impl.h:67
Imp::Fopr_Wilson::m_U
const Field_G * m_U
gauge configuration.
Definition: fopr_Wilson_impl.h:62
Imp::Fopr_Wilson::vcp2_zm
double * vcp2_zm
Definition: fopr_Wilson_impl.h:69
Imp::Fopr_Wilson::vcp2_xp
double * vcp2_xp
Definition: fopr_Wilson_impl.h:67
Imp::Fopr_Wilson::set_config
void set_config(Field *U)
sets the gauge configuration.
Definition: fopr_Wilson_impl.cpp:245
Imp::Fopr_Wilson::vcp1_zm
double * vcp1_zm
Definition: fopr_Wilson_impl.h:69
Imp::Fopr_Wilson::set_parameters
void set_parameters(const Parameters &params)
sets parameters by a Parameter object: to be implemented in a subclass.
Definition: fopr_Wilson_impl.cpp:174
fopr_Wilson_impl_common-inc.h
Imp::Fopr_Wilson::vcp1_ym
double * vcp1_ym
Definition: fopr_Wilson_impl.h:68
CommonParameters::NPE
static int NPE()
Definition: commonParameters.h:101
Imp::Fopr_Wilson::D_ex_chiral
void D_ex_chiral(Field &, const int ex1, const Field &, const int ex2)
Definition: fopr_Wilson_impl.cpp:893
Field::reset
void reset(const int Nin, const int Nvol, const int Nex, const element_type cmpl=Element_type::COMPLEX)
Definition: field.h:95
Parameters::set_int_vector
void set_int_vector(const string &key, const vector< int > &value)
Definition: parameters.cpp:45
Imp::Fopr_Wilson::get_parameters
void get_parameters(Parameters &params) const
gets parameters by a Parameter object: to be implemented in a subclass.
Definition: fopr_Wilson_impl.cpp:234
fopr_Wilson_impl.h
Imp::Fopr_Wilson::daypx
void daypx(Field &, const double, const Field &)
Definition: fopr_Wilson_impl.cpp:1406
Imp::Fopr_Wilson::m_Nx
int m_Nx
Definition: fopr_Wilson_impl.h:59
Imp::Fopr_Wilson::m_boundary
std::vector< int > m_boundary
boundary condition
Definition: fopr_Wilson_impl.h:51
Field::ptr
const double * ptr(const int jin, const int site, const int jex) const
Definition: field.h:153
CommonParameters::Nd
static int Nd()
Definition: commonParameters.h:116
NVC
#define NVC
Definition: fopr_Wilson_impl_SU2-inc.h:15
Imp::Fopr_Wilson::vcp1_tm
double * vcp1_tm
Definition: fopr_Wilson_impl.h:70
CommonParameters::Vlevel
static Bridge::VerboseLevel Vlevel()
Definition: commonParameters.h:122
Imp::Fopr_Wilson::H
void H(Field &v, const Field &w)
Definition: fopr_Wilson_impl.cpp:461
fopr_Wilson_impl_SU2-inc.h
Bridge::BridgeIO::set_verbose_level
static VerboseLevel set_verbose_level(const std::string &str)
Definition: bridgeIO.cpp:133
Imp::Fopr_Wilson::m_vl
Bridge::VerboseLevel m_vl
verbose level
Definition: fopr_Wilson_impl.h:53
Imp::Fopr_Wilson::m_repr
std::string m_repr
gamma-matrix representation
Definition: fopr_Wilson_impl.h:52
Imp::Fopr_Wilson::set_mode
void set_mode(const std::string mode)
setting the mode of multiplication if necessary. Default implementation here is just to avoid irrelev...
Definition: fopr_Wilson_impl.cpp:254
Imp::Fopr_Wilson::vcp1_tp
double * vcp1_tp
Definition: fopr_Wilson_impl.h:70
Imp::Fopr_Wilson::vcp1_yp
double * vcp1_yp
Definition: fopr_Wilson_impl.h:68
Communicator::ipe
static int ipe(const int dir)
logical coordinate of current proc.
Definition: communicator.cpp:105
Parameters::fetch_string
int fetch_string(const string &key, string &value) const
Definition: parameters.cpp:378
Parameters::fetch_double
int fetch_double(const string &key, double &value) const
Definition: parameters.cpp:327
Imp::Fopr_Wilson::m_Nc
int m_Nc
Definition: fopr_Wilson_impl.h:58
Bridge::BridgeIO::crucial
void crucial(const char *format,...)
Definition: bridgeIO.cpp:180
Imp
Clover term operator.
Definition: fopr_CloverTerm_eo_impl.cpp:32
Imp::Fopr_Wilson::m_Nvc
int m_Nvc
Definition: fopr_Wilson_impl.h:58
Imp::Fopr_Wilson::mult_dag
void mult_dag(Field &v, const Field &w)
hermitian conjugate of mult.
Definition: fopr_Wilson_impl.cpp:285
Field
Container of Field-type object.
Definition: field.h:46
Communicator::exchange
static int exchange(int count, dcomplex *recv_buf, dcomplex *send_buf, int idir, int ipm, int tag)
receive array of dcomplex from upstream specified by idir and ipm, and send array to downstream.
Definition: communicator.cpp:207
Imp::Fopr_Wilson::m_Nvol
int m_Nvol
Definition: fopr_Wilson_impl.h:60
ThreadManager::get_thread_id
static int get_thread_id()
returns thread id.
Definition: threadManager.cpp:253
Field_G
SU(N) gauge field.
Definition: field_G.h:38
Imp::Fopr_Wilson::clear
void clear(Field &)
Definition: fopr_Wilson_impl.cpp:1384
Imp::Fopr_Wilson::mult_tp_chiral
void mult_tp_chiral(Field &, const Field &)
Definition: fopr_Wilson_impl.cpp:2179
Bridge::BridgeIO::general
void general(const char *format,...)
Definition: bridgeIO.cpp:200
Imp::Fopr_Wilson::m_Nt
int m_Nt
Definition: fopr_Wilson_impl.h:59
ThreadManager::assert_single_thread
static void assert_single_thread(const std::string &class_name)
assert currently running on single thread.
Definition: threadManager.cpp:372
Imp::Fopr_Wilson::mult_tm_chiral
void mult_tm_chiral(Field &, const Field &)
Definition: fopr_Wilson_impl.cpp:2262
Bridge::vout
BridgeIO vout
Definition: bridgeIO.cpp:512
Imp::Fopr_Wilson::init
void init()
to be discarded.
Bridge::BridgeIO::get_verbose_level
static std::string get_verbose_level(const VerboseLevel vl)
Definition: bridgeIO.cpp:154
Imp::Fopr_Wilson::m_Ndim
int m_Ndim
Definition: fopr_Wilson_impl.h:60