29 #if defined USE_GROUP_SU3
30 #include "fopr_Wilson_impl_SU3.inc"
31 #elif defined USE_GROUP_SU2
32 #include "fopr_Wilson_impl_SU2.inc"
33 #elif defined USE_GROUP_SU_N
34 #include "fopr_Wilson_impl_SU_N.inc"
43 vout.
general(
m_vl,
"Fopr_Wilson: Construction of Wilson fermion operator(imp).\n");
70 }
else if (
m_repr ==
"Chiral") {
96 }
else if (
m_repr ==
"Chiral") {
139 buf_size[0] =
sizeof(double) *
m_Nvc * 2 *
m_Ny * m_Nz * m_Nt;
140 buf_size[1] =
sizeof(double) *
m_Nvc * 2 *
m_Nx * m_Nz * m_Nt;
141 buf_size[2] =
sizeof(double) *
m_Nvc * 2 *
m_Nx *
m_Ny * m_Nt;
142 buf_size[3] =
sizeof(double) *
m_Nvc * 2 *
m_Nx *
m_Ny * m_Nz;
158 for (
int imu = 0; imu <
m_Ndim; ++imu) {
194 }
else if (m_mode ==
"Ddag") {
197 }
else if (m_mode ==
"DdagD") {
200 }
else if (m_mode ==
"DDdag") {
203 }
else if (m_mode ==
"H") {
207 vout.
crucial(
m_vl,
"Fopr_Wilson: input mode is undefined in Fopr_Wilson.\n");
247 const std::vector<int> bc)
249 assert(bc.size() ==
m_Ndim);
253 for (
int mu = 0; mu <
m_Ndim; ++mu) {
254 m_boundary[mu] = bc[mu];
259 for (
int mu = 0; mu <
m_Ndim; ++mu) {
264 for (
int idir = 0; idir <
m_Ndim; ++idir) {
265 m_boundary2[idir] = 1.0;
279 double flop_site, flop;
281 if (m_repr ==
"Dirac") {
282 flop_site =
static_cast<double>(
283 m_Nc * m_Nd * (4 + 6 * (4 * m_Nc + 2) + 2 * (4 * m_Nc + 1)));
284 }
else if (m_repr ==
"Chiral") {
285 flop_site =
static_cast<double>(
286 m_Nc * m_Nd * (4 + 8 * (4 * m_Nc + 2)));
293 flop = flop_site *
static_cast<double>(Lvol);
294 if ((m_mode ==
"DdagD") || (m_mode ==
"DDdag")) flop *= 2.0;
303 D_ex_dirac(w, 0, f, 0);
310 D_ex_chiral(w, 0, f, 0);
316 const Field& f,
const int ex2)
318 int Ninvol = m_Nvc * m_Nd * m_Nvol;
320 const double *v1 = f.
ptr(Ninvol * ex2);
321 double *v2 = w.
ptr(Ninvol * ex1);
326 int is = m_Ntask * ith / nth;
327 int ns = m_Ntask * (ith + 1) / nth - is;
331 for (
int i = is; i < is + ns; ++i) {
332 mult_xp1_thread(i, vcp1_xp, v1);
333 mult_xm1_thread(i, vcp1_xm, v1);
334 mult_yp1_thread(i, vcp1_yp, v1);
335 mult_ym1_thread(i, vcp1_ym, v1);
336 mult_zp1_thread(i, vcp1_zp, v1);
337 mult_zm1_thread(i, vcp1_zm, v1);
338 mult_tp1_dirac_thread(i, vcp1_tp, v1);
339 mult_tm1_dirac_thread(i, vcp1_tm, v1);
343 for (
int i = is; i < is + ns; ++i) {
345 mult_xpb_thread(i, v2, v1);
346 mult_xmb_thread(i, v2, v1);
347 mult_ypb_thread(i, v2, v1);
348 mult_ymb_thread(i, v2, v1);
349 mult_zpb_thread(i, v2, v1);
350 mult_zmb_thread(i, v2, v1);
351 mult_tpb_dirac_thread(i, v2, v1);
352 mult_tmb_dirac_thread(i, v2, v1);
355 for (
int i = is; i < is + ns; ++i) {
356 mult_xp2_thread(i, v2, vcp2_xp);
357 mult_xm2_thread(i, v2, vcp2_xm);
358 mult_yp2_thread(i, v2, vcp2_yp);
359 mult_ym2_thread(i, v2, vcp2_ym);
360 mult_zp2_thread(i, v2, vcp2_zp);
361 mult_zm2_thread(i, v2, vcp2_zm);
362 mult_tp2_dirac_thread(i, v2, vcp2_tp);
363 mult_tm2_dirac_thread(i, v2, vcp2_tm);
364 daypx_thread(i, v2, -m_kappa, v1);
373 const Field& f,
const int ex2)
375 int Ninvol = m_Nvc * m_Nd * m_Nvol;
377 const double *v1 = f.
ptr(Ninvol * ex2);
378 double *v2 = w.
ptr(Ninvol * ex1);
383 int is = m_Ntask * ith / nth;
384 int ns = m_Ntask * (ith + 1) / nth - is;
388 for (
int i = is; i < is + ns; ++i) {
389 mult_xp1_thread(i, vcp1_xp, v1);
390 mult_xm1_thread(i, vcp1_xm, v1);
391 mult_yp1_thread(i, vcp1_yp, v1);
392 mult_ym1_thread(i, vcp1_ym, v1);
393 mult_zp1_thread(i, vcp1_zp, v1);
394 mult_zm1_thread(i, vcp1_zm, v1);
395 mult_tp1_chiral_thread(i, vcp1_tp, v1);
396 mult_tm1_chiral_thread(i, vcp1_tm, v1);
400 for (
int i = is; i < is + ns; ++i) {
402 mult_xpb_thread(i, v2, v1);
403 mult_xmb_thread(i, v2, v1);
404 mult_ypb_thread(i, v2, v1);
405 mult_ymb_thread(i, v2, v1);
406 mult_zpb_thread(i, v2, v1);
407 mult_zmb_thread(i, v2, v1);
408 mult_tpb_chiral_thread(i, v2, v1);
409 mult_tmb_chiral_thread(i, v2, v1);
412 for (
int i = is; i < is + ns; ++i) {
413 mult_xp2_thread(i, v2, vcp2_xp);
414 mult_xm2_thread(i, v2, vcp2_xm);
415 mult_yp2_thread(i, v2, vcp2_yp);
416 mult_ym2_thread(i, v2, vcp2_ym);
417 mult_zp2_thread(i, v2, vcp2_zp);
418 mult_zm2_thread(i, v2, vcp2_zm);
419 mult_tp2_chiral_thread(i, v2, vcp2_tp);
420 mult_tm2_chiral_thread(i, v2, vcp2_tm);
421 daypx_thread(i, v2, -m_kappa, v1);
447 }
else if (mu == 1) {
449 }
else if (mu == 2) {
451 }
else if (mu == 3) {
452 (this->*m_mult_tp)(w, f);
465 }
else if (mu == 1) {
467 }
else if (mu == 2) {
469 }
else if (mu == 3) {
470 (this->*m_mult_tm)(w, f);
480 Field& w,
const int ex1,
481 const Field& v,
const int ex2,
const int ipm)
487 }
else if (ipm == -1) {
491 "Fopr_Wilson_impl::proj_chiral: illegal chirality = %d.\n", ipm);
495 m_w1.setpart_ex(0, v, ex2);
497 m_w1.addpart_ex(0, m_w2, 0, fpm);
514 double fac,
const Field& f)
516 const double *v1 = f.
ptr(0);
517 double *v2 = w.
ptr(0);
522 int is = m_Ntask * ith / nth;
523 int ns = m_Ntask * (ith + 1) / nth - is;
525 for (
int i = is; i < is + ns; ++i) {
526 daypx_thread(i, v2, fac, v1);
534 scal(v, 2.0 * m_kappa);
541 scal(v, 1.0 / (2.0 * m_kappa));
548 double *v2 = w.
ptr(0);
553 int is = m_Ntask * ith / nth;
554 int ns = m_Ntask * (ith + 1) / nth - is;
556 for (
int i = is; i < is + ns; ++i) {
565 const double *v1 = f.
ptr(0);
566 double *v2 = w.
ptr(0);
571 int is = m_Ntask * ith / nth;
572 int ns = m_Ntask * (ith + 1) / nth - is;
574 for (
int i = is; i < is + ns; ++i) {
575 gm5_dirac_thread(i, v2, v1);
584 const double *v1 = f.
ptr(0);
585 double *v2 = w.
ptr(0);
590 int is = m_Ntask * ith / nth;
591 int ns = m_Ntask * (ith + 1) / nth - is;
593 for (
int i = is; i < is + ns; ++i) {
594 gm5_chiral_thread(i, v2, v1);
603 const double *v1 = f.
ptr(0);
604 double *v2 = w.
ptr(0);
609 int is = m_Ntask * ith / nth;
610 int ns = m_Ntask * (ith + 1) / nth - is;
614 for (
int i = is; i < is + ns; ++i) {
615 mult_xp1_thread(i, vcp1_xp, v1);
619 for (
int i = is; i < is + ns; ++i) {
620 mult_xpb_thread(i, v2, v1);
623 for (
int i = is; i < is + ns; ++i) {
624 mult_xp2_thread(i, v2, vcp2_xp);
634 const double *v1 = f.
ptr(0);
635 double *v2 = w.
ptr(0);
640 int is = m_Ntask * ith / nth;
641 int ns = m_Ntask * (ith + 1) / nth - is;
645 for (
int i = is; i < is + ns; ++i) {
646 mult_xm1_thread(i, vcp1_xm, v1);
650 for (
int i = is; i < is + ns; ++i) {
651 mult_xmb_thread(i, v2, v1);
654 for (
int i = is; i < is + ns; ++i) {
655 mult_xm2_thread(i, v2, vcp2_xm);
665 const double *v1 = f.
ptr(0);
666 double *v2 = w.
ptr(0);
671 int is = m_Ntask * ith / nth;
672 int ns = m_Ntask * (ith + 1) / nth - is;
676 for (
int i = is; i < is + ns; ++i) {
677 mult_yp1_thread(i, vcp1_yp, v1);
681 for (
int i = is; i < is + ns; ++i) {
682 mult_ypb_thread(i, v2, v1);
685 for (
int i = is; i < is + ns; ++i) {
686 mult_yp2_thread(i, v2, vcp2_yp);
696 const double *v1 = f.
ptr(0);
697 double *v2 = w.
ptr(0);
702 int is = m_Ntask * ith / nth;
703 int ns = m_Ntask * (ith + 1) / nth - is;
707 for (
int i = is; i < is + ns; ++i) {
708 mult_ym1_thread(i, vcp1_ym, v1);
712 for (
int i = is; i < is + ns; ++i) {
713 mult_ymb_thread(i, v2, v1);
716 for (
int i = is; i < is + ns; ++i) {
717 mult_ym2_thread(i, v2, vcp2_ym);
727 const double *v1 = f.
ptr(0);
728 double *v2 = w.
ptr(0);
733 int is = m_Ntask * ith / nth;
734 int ns = m_Ntask * (ith + 1) / nth - is;
738 for (
int i = is; i < is + ns; ++i) {
739 mult_zp1_thread(i, vcp1_zp, v1);
743 for (
int i = is; i < is + ns; ++i) {
744 mult_zpb_thread(i, v2, v1);
747 for (
int i = is; i < is + ns; ++i) {
748 mult_zp2_thread(i, v2, vcp2_zp);
758 const double *v1 = f.
ptr(0);
759 double *v2 = w.
ptr(0);
764 int is = m_Ntask * ith / nth;
765 int ns = m_Ntask * (ith + 1) / nth - is;
769 for (
int i = is; i < is + ns; ++i) {
770 mult_zm1_thread(i, vcp1_zm, v1);
774 for (
int i = is; i < is + ns; ++i) {
775 mult_zmb_thread(i, v2, v1);
778 for (
int i = is; i < is + ns; ++i) {
779 mult_zm2_thread(i, v2, vcp2_zm);
789 const double *v1 = f.
ptr(0);
790 double *v2 = w.
ptr(0);
795 int is = m_Ntask * ith / nth;
796 int ns = m_Ntask * (ith + 1) / nth - is;
800 for (
int i = is; i < is + ns; ++i) {
801 mult_tp1_dirac_thread(i, vcp1_tp, v1);
805 for (
int i = is; i < is + ns; ++i) {
806 mult_tpb_dirac_thread(i, v2, v1);
809 for (
int i = is; i < is + ns; ++i) {
810 mult_tp2_dirac_thread(i, v2, vcp2_tp);
820 const double *v1 = f.
ptr(0);
821 double *v2 = w.
ptr(0);
826 int is = m_Ntask * ith / nth;
827 int ns = m_Ntask * (ith + 1) / nth - is;
831 for (
int i = is; i < is + ns; ++i) {
832 mult_tm1_dirac_thread(i, vcp1_tm, v1);
836 for (
int i = is; i < is + ns; ++i) {
837 mult_tmb_dirac_thread(i, v2, v1);
840 for (
int i = is; i < is + ns; ++i) {
841 mult_tm2_dirac_thread(i, v2, vcp2_tm);
851 const double *v1 = f.
ptr(0);
852 double *v2 = w.
ptr(0);
857 int is = m_Ntask * ith / nth;
858 int ns = m_Ntask * (ith + 1) / nth - is;
862 for (
int i = is; i < is + ns; ++i) {
863 mult_tp1_chiral_thread(i, vcp1_tp, v1);
867 for (
int i = is; i < is + ns; ++i) {
868 mult_tpb_chiral_thread(i, v2, v1);
871 for (
int i = is; i < is + ns; ++i) {
872 mult_tp2_chiral_thread(i, v2, vcp2_tp);
882 const double *v1 = f.
ptr(0);
883 double *v2 = w.
ptr(0);
888 int is = m_Ntask * ith / nth;
889 int ns = m_Ntask * (ith + 1) / nth - is;
893 for (
int i = is; i < is + ns; ++i) {
894 mult_tm1_chiral_thread(i, vcp1_tm, v1);
898 for (
int i = is; i < is + ns; ++i) {
899 mult_tmb_chiral_thread(i, v2, v1);
902 for (
int i = is; i < is + ns; ++i) {
903 mult_tm2_chiral_thread(i, v2, vcp2_tm);
void daypx(Field &, double, const Field &)
void scal(Field &x, const double a)
scal(x, a): x = a * x
std::vector< GammaMatrix > m_GM
gamma matrices.
void mult_zp(Field &, const Field &)
void detailed(const char *format,...)
static int get_num_threads()
returns available number of threads.
const double * ptr(const int jin, const int site, const int jex) const
void set_parameters(const double kappa, const std::vector< int > bc)
std::vector< int > m_boundary
boundary condition.
void gm5_chiral(Field &, const Field &)
void proj_chiral(Field &w, const int ex1, const Field &v, const int ex2, const int ipm)
double * vcp1_xp
arrays for data transfer.
void mult_tm_dirac(Field &, const Field &)
void general(const char *format,...)
GammaMatrix get_GM(GMspecies spec)
static Bridge::VerboseLevel Vlevel()
Container of Field-type object.
void(Fopr_Wilson::Fopr_Wilson_impl::* m_D_ex)(Field &, const int, const Field &, const int)
void D(Field &v, const Field &f)
void(Fopr_Wilson::Fopr_Wilson_impl::* m_mult_dag)(Field &, const Field &)
static Channel * recv_init(int count, int idir, int ipm)
std::vector< Channel * > m_bw_recv
void mult_up(int mu, Field &w, const Field &v)
adding the hopping to nearest neighbor site in mu-th direction.
void mult_tp_chiral(Field &, const Field &)
static int ipe(const int dir)
logical coordinate of current proc.
void H(Field &w, const Field &f)
void D_chiral(Field &, const Field &)
Set of Gamma Matrix: chiral representation.
static int get_thread_id()
returns thread id.
void mult_xm(Field &, const Field &)
std::vector< Channel * > m_fw_recv
std::vector< Channel * > m_fw_send
void fprop_normalize(Field &v)
void DdagD(Field &w, const Field &f)
const Field_G * m_U
gauge configuration.
Field m_w2
temporary fields.
std::string get_mode() const
void(Fopr_Wilson::Fopr_Wilson_impl::* m_mult)(Field &, const Field &)
void DDdag(Field &w, const Field &f)
Bridge::VerboseLevel m_vl
void reset(const int Nin, const int Nvol, const int Nex, const element_type cmpl=COMPLEX)
Bridge::VerboseLevel m_vl
static void sync_barrier_all()
barrier among all the threads and nodes.
void mult_ym(Field &, const Field &)
void(Fopr_Wilson::Fopr_Wilson_impl::* m_D)(Field &, const Field &)
Set of Gamma Matrices: basis class.
Set of Gamma Matrix: Dirac representation.
std::vector< Channel * > m_bw_send
static const std::string class_name
void(Fopr_Wilson::Fopr_Wilson_impl::* m_gm5)(Field &, const Field &)
void D_ex_chiral(Field &, const int ex1, const Field &, const int ex2)
void D_ex_dirac(Field &, const int ex1, const Field &, const int ex2)
static const std::string class_name
void paranoiac(const char *format,...)
const Field_F mult_gm5p(int mu, const Field_F &w)
void crucial(const char *format,...)
void set_mode(std::string mode)
void mult_gm5(Field &w, const Field &v)
void fopr_normalize(Field &v)
void(Fopr_Wilson::Fopr_Wilson_impl::* m_mult_tm)(Field &, const Field &)
void mult_tp_dirac(Field &, const Field &)
void mult_up(int mu, Field &, const Field &)
static Channel * send_init(int count, int idir, int ipm)
void D_dirac(Field &, const Field &)
void mult_zm(Field &, const Field &)
static int sync()
synchronize within small world.
void mult_undef(Field &, const Field &f)
std::vector< double > m_boundary2
b.c. for each node.
void mult_yp(Field &, const Field &)
void setpart_ex(int ex, const Field &w, int exw)
void mult_xp(Field &, const Field &)
void mult_dn(int mu, Field &, const Field &)
void Ddag(Field &w, const Field &f)
void init(std::string repr)
void(Fopr_Wilson::Fopr_Wilson_impl::* m_mult_tp)(Field &, const Field &)
void mult_tm_chiral(Field &, const Field &)
void gm5_dirac(Field &, const Field &)