16 #if defined USE_GROUP_SU3
17 #include "fopr_Wilson_impl_SU3.inc"
18 #elif defined USE_GROUP_SU2
19 #include "fopr_Wilson_impl_SU2.inc"
20 #elif defined USE_GROUP_SU_N
21 #include "fopr_Wilson_impl_SU_N.inc"
73 }
else if (
m_repr ==
"Chiral") {
124 }
else if (m_mode ==
"Ddag") {
127 }
else if (m_mode ==
"DdagD") {
130 }
else if (m_mode ==
"DDdag") {
133 }
else if (m_mode ==
"H") {
146 delete[] vcp1_x_plus;
147 delete[] vcp2_x_plus;
148 delete[] vcp1_x_minus;
149 delete[] vcp2_x_minus;
151 delete[] vcp1_y_plus;
152 delete[] vcp2_y_plus;
153 delete[] vcp1_y_minus;
154 delete[] vcp2_y_minus;
156 delete[] vcp1_z_plus;
157 delete[] vcp2_z_plus;
158 delete[] vcp1_z_minus;
159 delete[] vcp2_z_minus;
161 delete[] vcp1_t_plus;
162 delete[] vcp2_t_plus;
163 delete[] vcp1_t_minus;
164 delete[] vcp2_t_minus;
177 const double kappa_t,
180 const std::vector<int> bc)
182 assert(bc.size() ==
m_Ndim);
190 for (
int mu = 0; mu <
m_Ndim; ++mu) {
200 for (
int mu = 0; mu <
m_Ndim; ++mu) {
201 m_boundary[mu] = bc[mu];
205 for (
int idir = 0; idir <
m_Ndim; ++idir) {
206 m_boundary2[idir] = 1.0;
220 double flop_site, flop;
222 if (m_repr ==
"Dirac") {
223 flop_site =
static_cast<double>(
224 m_Nc * m_Nd * (4 + 6 * (4 * m_Nc + 2) + 2 * (4 * m_Nc + 1)));
225 }
else if (m_repr ==
"Chiral") {
226 flop_site =
static_cast<double>(
227 m_Nc * m_Nd * (4 + 8 * (4 * m_Nc + 2)));
234 flop = flop_site *
static_cast<double>(Lvol);
236 if ((m_mode ==
"DdagD") || (m_mode ==
"DDdag")) flop *= 2.0;
251 for (
int mu = 0; mu < (
m_Ndim - 1); ++mu) {
255 daxpy(w, -m_kappa_s, m_w1);
263 daxpy(w, -m_kappa_t, m_w1);
279 for (
int mu = 0; mu < (
m_Ndim - 1); ++mu) {
283 daxpy(w, -m_kappa_s, m_w1);
291 daxpy(w, -m_kappa_t, m_w1);
304 }
else if (mu == 1) {
306 }
else if (mu == 2) {
308 }
else if (mu == 3) {
309 (this->*m_mult_t_plus)(w, f);
322 }
else if (mu == 1) {
324 }
else if (mu == 2) {
326 }
else if (mu == 3) {
327 (this->*m_mult_t_minus)(w, f);
337 const Field& f,
const int ex2)
339 int Ninvol = m_Nvc * m_Nd * m_Nvol;
340 const double *v1 = f.
ptr(Ninvol * ex2);
341 double *v2 = w.
ptr(Ninvol * ex1);
346 int is = m_Ntask * i_thread / Nthread;
347 int ns = m_Ntask * (i_thread + 1) / Nthread - is;
349 for (
int i = is; i < is + ns; ++i) {
350 mult_x_plus1_thread(i, vcp1_x_plus, v1);
351 mult_x_minus1_thread(i, vcp1_x_minus, v1);
352 mult_y_plus1_thread(i, vcp1_y_plus, v1);
353 mult_y_minus1_thread(i, vcp1_y_minus, v1);
354 mult_z_plus1_thread(i, vcp1_z_plus, v1);
355 mult_z_minus1_thread(i, vcp1_z_minus, v1);
356 mult_t_plus1_dirac_thread(i, vcp1_t_plus, v1);
357 mult_t_minus1_dirac_thread(i, vcp1_t_minus, v1);
364 int Nvx = 2 * m_Nvc * m_Nd * m_Ny * m_Nz * m_Nt;
369 int Nvy = 2 * m_Nvc * m_Nd * m_Nx * m_Nz * m_Nt;
374 int Nvz = 2 * m_Nvc * m_Nd * m_Nx * m_Ny * m_Nt;
380 int Nvt = m_Nvc * m_Nd * m_Nx * m_Ny * m_Nz;
386 for (
int i = is; i < is + ns; ++i) {
388 mult_x_plus_bulk_thread(i, v2, v1);
389 mult_x_minus_bulk_thread(i, v2, v1);
390 mult_y_plus_bulk_thread(i, v2, v1);
391 mult_y_minus_bulk_thread(i, v2, v1);
392 mult_z_plus_bulk_thread(i, v2, v1);
393 mult_z_minus_bulk_thread(i, v2, v1);
394 mult_t_plus_bulk_dirac_thread(i, v2, v1);
395 mult_t_minus_bulk_dirac_thread(i, v2, v1);
398 for (
int i = is; i < is + ns; ++i) {
399 mult_x_plus2_thread(i, v2, vcp2_x_plus);
400 mult_x_minus2_thread(i, v2, vcp2_x_minus);
401 mult_y_plus2_thread(i, v2, vcp2_y_plus);
402 mult_y_minus2_thread(i, v2, vcp2_y_minus);
403 mult_z_plus2_thread(i, v2, vcp2_z_plus);
404 mult_z_minus2_thread(i, v2, vcp2_z_minus);
405 mult_t_plus2_dirac_thread(i, v2, vcp2_t_plus);
406 mult_t_minus2_dirac_thread(i, v2, vcp2_t_minus);
421 const Field& f,
const int ex2)
423 int Ninvol = m_Nvc * m_Nd * m_Nvol;
424 const double *v1 = f.
ptr(Ninvol * ex2);
425 double *v2 = w.
ptr(Ninvol * ex1);
430 int is = m_Ntask * i_thread / Nthread;
431 int ns = m_Ntask * (i_thread + 1) / Nthread - is;
433 for (
int i = is; i < is + ns; ++i) {
434 mult_x_plus1_thread(i, vcp1_x_plus, v1);
435 mult_x_minus1_thread(i, vcp1_x_minus, v1);
436 mult_y_plus1_thread(i, vcp1_y_plus, v1);
437 mult_y_minus1_thread(i, vcp1_y_minus, v1);
438 mult_z_plus1_thread(i, vcp1_z_plus, v1);
439 mult_z_minus1_thread(i, vcp1_z_minus, v1);
440 mult_t_plus1_chiral_thread(i, vcp1_t_plus, v1);
441 mult_t_minus1_chiral_thread(i, vcp1_t_minus, v1);
448 int Nvx = 2 * m_Nvc * m_Nd * m_Ny * m_Nz * m_Nt;
453 int Nvy = 2 * m_Nvc * m_Nd * m_Nx * m_Nz * m_Nt;
458 int Nvz = 2 * m_Nvc * m_Nd * m_Nx * m_Ny * m_Nt;
464 int Nvt = m_Nvc * m_Nd * m_Nx * m_Ny * m_Nz;
470 for (
int i = is; i < is + ns; ++i) {
472 mult_x_plus_bulk_thread(i, v2, v1);
473 mult_x_minus_bulk_thread(i, v2, v1);
474 mult_y_plus_bulk_thread(i, v2, v1);
475 mult_y_minus_bulk_thread(i, v2, v1);
476 mult_z_plus_bulk_thread(i, v2, v1);
477 mult_z_minus_bulk_thread(i, v2, v1);
478 mult_t_plus_bulk_chiral_thread(i, v2, v1);
479 mult_t_minus_bulk_chiral_thread(i, v2, v1);
482 for (
int i = is; i < is + ns; ++i) {
483 mult_x_plus2_thread(i, v2, vcp2_x_plus);
484 mult_x_minus2_thread(i, v2, vcp2_x_minus);
485 mult_y_plus2_thread(i, v2, vcp2_y_plus);
486 mult_y_minus2_thread(i, v2, vcp2_y_minus);
487 mult_z_plus2_thread(i, v2, vcp2_z_plus);
488 mult_z_minus2_thread(i, v2, vcp2_z_minus);
489 mult_t_plus2_chiral_thread(i, v2, vcp2_t_plus);
490 mult_t_minus2_chiral_thread(i, v2, vcp2_t_minus);
527 const Field& v,
const int ex2,
534 }
else if (ipm == -1) {
541 m_w1.setpart_ex(0, v, ex2);
543 m_w1.addpart_ex(0, m_w2, 0, fpm);
552 double fac,
const Field& f)
554 const double *v1 = f.
ptr(0);
555 double *v2 = w.
ptr(0);
560 int is = m_Ntask * i_thread / Nthread;
561 int ns = m_Ntask * (i_thread + 1) / Nthread - is;
563 for (
int i = is; i < is + ns; ++i) {
564 daxpy_thread(i, v2, fac, v1);
572 double fac,
const Field& f)
574 const double *v1 = f.
ptr(0);
575 double *v2 = w.
ptr(0);
580 int is = m_Ntask * i_thread / Nthread;
581 int ns = m_Ntask * (i_thread + 1) / Nthread - is;
583 for (
int i = is; i < is + ns; ++i) {
584 daypx_thread(i, v2, fac, v1);
594 double *v2 = w.
ptr(0);
599 int is = m_Ntask * i_thread / Nthread;
600 int ns = m_Ntask * (i_thread + 1) / Nthread - is;
602 for (
int i = is; i < is + ns; ++i) {
603 scal_thread(i, v2, fac);
612 double *v2 = w.
ptr(0);
617 int is = m_Ntask * i_thread / Nthread;
618 int ns = m_Ntask * (i_thread + 1) / Nthread - is;
620 for (
int i = is; i < is + ns; ++i) {
630 const double *v1 = f.
ptr(0);
631 double *v2 = w.
ptr(0);
636 int is = m_Ntask * i_thread / Nthread;
637 int ns = m_Ntask * (i_thread + 1) / Nthread - is;
639 for (
int i = is; i < is + ns; ++i) {
640 gm5_dirac_thread(i, v2, v1);
649 const double *v1 = f.
ptr(0);
650 double *v2 = w.
ptr(0);
655 int is = m_Ntask * i_thread / Nthread;
656 int ns = m_Ntask * (i_thread + 1) / Nthread - is;
658 for (
int i = is; i < is + ns; ++i) {
659 gm5_chiral_thread(i, v2, v1);
668 const double *v1 = f.
ptr(0);
669 double *v2 = w.
ptr(0);
674 int is = m_Ntask * i_thread / Nthread;
675 int ns = m_Ntask * (i_thread + 1) / Nthread - is;
679 for (
int i = is; i < is + ns; ++i) {
680 mult_x_plus1_thread(i, vcp1_x_plus, v1);
687 int Nv = 2 * m_Nvc * m_Nd * m_Ny * m_Nz * m_Nt;
692 for (
int i = is; i < is + ns; ++i) {
693 mult_x_plus_bulk_thread(i, v2, v1);
696 for (
int i = is; i < is + ns; ++i) {
697 mult_x_plus2_thread(i, v2, vcp2_x_plus);
707 const double *v1 = f.
ptr(0);
708 double *v2 = w.
ptr(0);
713 int is = m_Ntask * i_thread / Nthread;
714 int ns = m_Ntask * (i_thread + 1) / Nthread - is;
718 for (
int i = is; i < is + ns; ++i) {
719 mult_x_minus1_thread(i, vcp1_x_minus, v1);
726 int Nv = 2 * m_Nvc * m_Nd * m_Ny * m_Nz * m_Nt;
731 for (
int i = is; i < is + ns; ++i) {
732 mult_x_minus_bulk_thread(i, v2, v1);
735 for (
int i = is; i < is + ns; ++i) {
736 mult_x_minus2_thread(i, v2, vcp2_x_minus);
746 const double *v1 = f.
ptr(0);
747 double *v2 = w.
ptr(0);
752 int is = m_Ntask * i_thread / Nthread;
753 int ns = m_Ntask * (i_thread + 1) / Nthread - is;
757 for (
int i = is; i < is + ns; ++i) {
758 mult_y_plus1_thread(i, vcp1_y_plus, v1);
765 int Nv = 2 * m_Nvc * m_Nd * m_Nx * m_Nz * m_Nt;
770 for (
int i = is; i < is + ns; ++i) {
771 mult_y_plus_bulk_thread(i, v2, v1);
774 for (
int i = is; i < is + ns; ++i) {
775 mult_y_plus2_thread(i, v2, vcp2_y_plus);
785 const double *v1 = f.
ptr(0);
786 double *v2 = w.
ptr(0);
791 int is = m_Ntask * i_thread / Nthread;
792 int ns = m_Ntask * (i_thread + 1) / Nthread - is;
796 for (
int i = is; i < is + ns; ++i) {
797 mult_y_minus1_thread(i, vcp1_y_minus, v1);
804 int Nv = 2 * m_Nvc * m_Nd * m_Nx * m_Nz * m_Nt;
809 for (
int i = is; i < is + ns; ++i) {
810 mult_y_minus_bulk_thread(i, v2, v1);
813 for (
int i = is; i < is + ns; ++i) {
814 mult_y_minus2_thread(i, v2, vcp2_y_minus);
824 const double *v1 = f.
ptr(0);
825 double *v2 = w.
ptr(0);
830 int is = m_Ntask * i_thread / Nthread;
831 int ns = m_Ntask * (i_thread + 1) / Nthread - is;
835 for (
int i = is; i < is + ns; ++i) {
836 mult_z_plus1_thread(i, vcp1_z_plus, v1);
843 int Nv = 2 * m_Nvc * m_Nd * m_Nx * m_Ny * m_Nt;
848 for (
int i = is; i < is + ns; ++i) {
849 mult_z_plus_bulk_thread(i, v2, v1);
852 for (
int i = is; i < is + ns; ++i) {
853 mult_z_plus2_thread(i, v2, vcp2_z_plus);
863 const double *v1 = f.
ptr(0);
864 double *v2 = w.
ptr(0);
869 int is = m_Ntask * i_thread / Nthread;
870 int ns = m_Ntask * (i_thread + 1) / Nthread - is;
874 for (
int i = is; i < is + ns; ++i) {
875 mult_z_minus1_thread(i, vcp1_z_minus, v1);
882 int Nv = 2 * m_Nvc * m_Nd * m_Nx * m_Ny * m_Nt;
887 for (
int i = is; i < is + ns; ++i) {
888 mult_z_minus_bulk_thread(i, v2, v1);
891 for (
int i = is; i < is + ns; ++i) {
892 mult_z_minus2_thread(i, v2, vcp2_z_minus);
902 const double *v1 = f.
ptr(0);
903 double *v2 = w.
ptr(0);
908 int is = m_Ntask * i_thread / Nthread;
909 int ns = m_Ntask * (i_thread + 1) / Nthread - is;
913 for (
int i = is; i < is + ns; ++i) {
914 mult_t_plus1_dirac_thread(i, vcp1_t_plus, v1);
922 int Nv = m_Nvc * m_Nd * m_Nx * m_Ny * m_Nz;
927 for (
int i = is; i < is + ns; ++i) {
928 mult_t_plus_bulk_dirac_thread(i, v2, v1);
931 for (
int i = is; i < is + ns; ++i) {
932 mult_t_plus2_dirac_thread(i, v2, vcp2_t_plus);
942 const double *v1 = f.
ptr(0);
943 double *v2 = w.
ptr(0);
948 int is = m_Ntask * i_thread / Nthread;
949 int ns = m_Ntask * (i_thread + 1) / Nthread - is;
953 for (
int i = is; i < is + ns; ++i) {
954 mult_t_minus1_dirac_thread(i, vcp1_t_minus, v1);
962 int Nv = m_Nvc * m_Nd * m_Nx * m_Ny * m_Nz;
967 for (
int i = is; i < is + ns; ++i) {
968 mult_t_minus_bulk_dirac_thread(i, v2, v1);
971 for (
int i = is; i < is + ns; ++i) {
972 mult_t_minus2_dirac_thread(i, v2, vcp2_t_minus);
982 const double *v1 = f.
ptr(0);
983 double *v2 = w.
ptr(0);
988 int is = m_Ntask * i_thread / Nthread;
989 int ns = m_Ntask * (i_thread + 1) / Nthread - is;
993 for (
int i = is; i < is + ns; ++i) {
994 mult_t_plus1_chiral_thread(i, vcp1_t_plus, v1);
1002 int Nv = m_Nvc * m_Nd * m_Nx * m_Ny * m_Nz;
1007 for (
int i = is; i < is + ns; ++i) {
1008 mult_t_plus_bulk_chiral_thread(i, v2, v1);
1011 for (
int i = is; i < is + ns; ++i) {
1012 mult_t_plus2_chiral_thread(i, v2, vcp2_t_plus);
1022 const double *v1 = f.
ptr(0);
1023 double *v2 = w.
ptr(0);
1028 int is = m_Ntask * i_thread / Nthread;
1029 int ns = m_Ntask * (i_thread + 1) / Nthread - is;
1033 for (
int i = is; i < is + ns; ++i) {
1034 mult_t_minus1_chiral_thread(i, vcp1_t_minus, v1);
1042 int Nv = m_Nvc * m_Nd * m_Nx * m_Ny * m_Nz;
1047 for (
int i = is; i < is + ns; ++i) {
1048 mult_t_minus_bulk_chiral_thread(i, v2, v1);
1051 for (
int i = is; i < is + ns; ++i) {
1052 mult_t_minus2_chiral_thread(i, v2, vcp2_t_minus);
void DDdag(Field &w, const Field &f)
void scal(Field &, double)
static int get_num_threads()
returns available number of threads.
const double * ptr(const int jin, const int site, const int jex) const
void mult_y_minus(Field &, const Field &)
void mult_up(int mu, Field &, const Field &)
void mult_t_minus_chiral(Field &, const Field &)
void mult_x_minus(Field &, const Field &)
void general(const char *format,...)
GammaMatrix get_GM(GMspecies spec)
void gm5_chiral(Field &, const Field &)
void(Fopr_Wilson_General::Fopr_Wilson_General_impl::* m_mult)(Field &, const Field &)
std::vector< GammaMatrix > m_GM
gamma matrices.
static Bridge::VerboseLevel Vlevel()
Container of Field-type object.
void daypx(Field &, double, const Field &)
void mult_z_minus(Field &, const Field &)
double * vcp1_x_plus
arrays for data transfer.
void set_mode(std::string mode)
Field m_w2
temporary fields.
void Ddag(Field &w, const Field &f)
void mult_t_plus_chiral(Field &, const Field &)
void H(Field &w, const Field &f)
static int ipe(const int dir)
logical coordinate of current proc.
void set_parameters(const double kappa_s, const double kappa_t, const double nu_s, const double r_s, const std::vector< int > bc)
void mult_x_plus(Field &, const Field &)
std::vector< double > m_boundary2
b.c. for each node.
static int get_thread_id()
returns thread id.
void D_ex_dirac(Field &, const int ex1, const Field &, const int ex2)
void(Fopr_Wilson_General::Fopr_Wilson_General_impl::* m_gm5)(Field &, const Field &)
const Field_G * m_U
gauge configuration.
void daxpy(Field &, double, const Field &)
void mult_undef(Field &, const Field &f)
const Field_F mult_gm5p(int mu, const Field_F &w)
void gm5_dirac(Field &, const Field &)
void mult_dn(int mu, Field &w, const Field &v)
void init(std::string repr)
void(Fopr_Wilson_General::Fopr_Wilson_General_impl::* m_mult_t_minus)(Field &, const Field &)
std::string get_mode() const
Bridge::VerboseLevel m_vl
void DdagD(Field &w, const Field &f)
void reset(const int Nin, const int Nvol, const int Nex, const element_type cmpl=COMPLEX)
void mult_up(int mu, Field &w, const Field &v)
adding the hopping to nearest neighbor site in mu-th direction.
static void sync_barrier_all()
barrier among all the threads and nodes.
void mult_y_plus(Field &, const Field &)
static int exchange(int count, double *recv_buf, double *send_buf, int idir, int ipm, int tag)
receive array of double from upstream specified by idir and ipm, and send array to downstream...
std::vector< int > m_boundary
boundary condition.
void mult_z_plus(Field &, const Field &)
void mult_gm5(Field &w, const Field &v)
void mult_t_minus_dirac(Field &, const Field &)
void crucial(const char *format,...)
static const std::string class_name
void D(Field &v, const Field &f)
void(Fopr_Wilson_General::Fopr_Wilson_General_impl::* m_mult_dag)(Field &, const Field &)
void proj_chiral(Field &w, const int ex1, const Field &v, const int ex2, const int ipm)
void D_ex_chiral(Field &, const int ex1, const Field &, const int ex2)
void mult_t_plus_dirac(Field &, const Field &)
void D_dirac(Field &, const Field &)
void(Fopr_Wilson_General::Fopr_Wilson_General_impl::* m_mult_t_plus)(Field &, const Field &)
static const std::string class_name
void mult_dn(int mu, Field &, const Field &)
Bridge::VerboseLevel m_vl
void D_chiral(Field &, const Field &)
void setpart_ex(int ex, const Field &w, int exw)
void(Fopr_Wilson_General::Fopr_Wilson_General_impl::* m_D_ex)(Field &, const int, const Field &, const int)
void(Fopr_Wilson_General::Fopr_Wilson_General_impl::* m_D)(Field &, const Field &)