18 #if defined USE_GROUP_SU3
20 #elif defined USE_GROUP_SU2
22 #elif defined USE_GROUP_SU_N
29 #ifdef USE_FACTORY_AUTOREGISTER
31 bool init = Fopr_WilsonGeneral::register_factory();
36 =
"Imp::Fopr_WilsonGeneral";
59 vout.
crucial(
"Error at %s: unsupported gamma-matrix type: %s\n",
99 std::string imple_gauge = imple_Nc();
101 imple_gauge.c_str());
186 double kappa_s, kappa_t;
191 err += params.
fetch_double(
"hopping_parameter_spatial", kappa_s);
192 err += params.
fetch_double(
"hopping_parameter_temporal", kappa_t);
193 err += params.
fetch_double(
"dispersion_parameter_spatial", nu_s);
194 err += params.
fetch_double(
"Wilson_parameter_spatial", r_s);
198 vout.
crucial(
"Error at %s: input parameter not found.\n",
209 const double kappa_t,
212 const std::vector<int> bc)
214 assert(bc.size() ==
m_Ndim);
227 for (
int idir = 0; idir <
m_Ndim; ++idir) {
242 for (
int mu = 0; mu <
m_Ndim; ++mu) {
276 if (ith == 0)
m_mode = mode;
286 }
else if (
m_mode ==
"Ddag") {
288 }
else if (
m_mode ==
"DdagD") {
290 }
else if (
m_mode ==
"DDdag") {
292 }
else if (
m_mode ==
"H") {
307 }
else if (
m_mode ==
"Ddag") {
309 }
else if (
m_mode ==
"DdagD") {
311 }
else if (
m_mode ==
"DDdag") {
313 }
else if (
m_mode ==
"H") {
325 const std::string mode)
329 }
else if (mode ==
"Ddag") {
331 }
else if (mode ==
"DdagD") {
333 }
else if (mode ==
"DDdag") {
335 }
else if (mode ==
"H") {
347 const std::string mode)
351 }
else if (mode ==
"Ddag") {
353 }
else if (mode ==
"DdagD") {
355 }
else if (mode ==
"DDdag") {
357 }
else if (mode ==
"H") {
372 }
else if (mu == 1) {
374 }
else if (mu == 2) {
376 }
else if (mu == 3) {
395 }
else if (mu == 1) {
397 }
else if (mu == 2) {
399 }
else if (mu == 3) {
418 }
else if (
m_repr ==
"Chiral") {
429 }
else if (
m_repr ==
"Chiral") {
474 const Field& f,
const int ex2)
496 const Field& f,
const int ex2)
523 double *wp = w.
ptr(0);
525 int ith, nth, is, ns;
526 set_threadtask(ith, nth, is, ns,
m_Nvol);
528 for (
int site = is; site < ns; ++site) {
529 for (
int ivcd = 0; ivcd < Nvcd; ++ivcd) {
530 wp[ivcd + Nvcd * site] = 0.0;
540 const double fac,
const Field& w)
546 double *vp = v.
ptr(0);
547 const double *wp = w.
ptr(0);
549 int ith, nth, is, ns;
550 set_threadtask(ith, nth, is, ns,
m_Nvol);
552 for (
int site = is; site < ns; ++site) {
553 for (
int ivcd = 0; ivcd < Nvcd; ++ivcd) {
554 vp[ivcd + Nvcd * site] += fac * wp[ivcd + Nvcd * site];
569 double *vp = v.
ptr(0);
570 const double *wp = w.
ptr(0);
572 int ith, nth, is, ns;
573 set_threadtask(ith, nth, is, ns,
m_Nvol);
575 for (
int site = is; site < ns; ++site) {
576 mult_gamma5_dirac(&vp[Nvcd * site], &wp[Nvcd * site],
m_Nc);
590 double *vp = v.
ptr(0);
591 const double *wp = w.
ptr(0);
593 int ith, nth, is, ns;
594 set_threadtask(ith, nth, is, ns,
m_Nvol);
596 for (
int site = is; site < ns; ++site) {
597 mult_gamma5_chiral(&vp[Nvcd * site], &wp[Nvcd * site],
m_Nc);
612 const int id2 =
m_Nvc;
613 const int id3 =
m_Nvc * 2;
614 const int id4 =
m_Nvc * 3;
618 double *vp = v.
ptr(0);
619 const double *wp = w.
ptr(0);
622 int ith, nth, is, ns;
623 set_threadtask(ith, nth, is, ns,
m_Nvol);
627 for (
int site = is; site < ns; ++site) {
628 int ix = site %
m_Nx;
629 int iyzt = site /
m_Nx;
631 int in = Nvcd * site;
632 int ix1 = Nvcd * iyzt;
634 int ix3 = ix1 + 2 *
NVC;
635 int ix4 = ix1 + 3 *
NVC;
637 for (
int ic = 0; ic <
m_Nc; ++ic) {
639 int ici = 2 * ic + 1;
640 vt1[icr] =
m_r_s * wp[icr + id1 + in] -
m_nu_s * wp[ici + id4 + in];
641 vt1[ici] =
m_r_s * wp[ici + id1 + in] +
m_nu_s * wp[icr + id4 + in];
642 vt2[icr] =
m_r_s * wp[icr + id2 + in] -
m_nu_s * wp[ici + id3 + in];
643 vt2[ici] =
m_r_s * wp[ici + id2 + in] +
m_nu_s * wp[icr + id3 + in];
644 vt3[icr] =
m_r_s * wp[icr + id3 + in] +
m_nu_s * wp[ici + id2 + in];
645 vt3[ici] =
m_r_s * wp[ici + id3 + in] -
m_nu_s * wp[icr + id2 + in];
646 vt4[icr] =
m_r_s * wp[icr + id4 + in] +
m_nu_s * wp[ici + id1 + in];
647 vt4[ici] =
m_r_s * wp[ici + id4 + in] -
m_nu_s * wp[icr + id1 + in];
650 for (
int ivc = 0; ivc <
NVC; ++ivc) {
651 vcp1_xp[ivc + ix1] = bc2 * vt1[ivc];
652 vcp1_xp[ivc + ix2] = bc2 * vt2[ivc];
653 vcp1_xp[ivc + ix3] = bc2 * vt3[ivc];
654 vcp1_xp[ivc + ix4] = bc2 * vt4[ivc];
668 for (
int site = is; site < ns; ++site) {
669 int ix = site %
m_Nx;
670 int iyzt = site /
m_Nx;
671 int nei = ix + 1 +
m_Nx * iyzt;
672 int iv = Nvcd * site;
673 int ig =
m_Ndf * site;
678 for (
int ic = 0; ic <
m_Nc; ++ic) {
680 int ici = 2 * ic + 1;
681 vt1[icr] =
m_r_s * wp[icr + id1 + in] -
m_nu_s * wp[ici + id4 + in];
682 vt1[ici] =
m_r_s * wp[ici + id1 + in] +
m_nu_s * wp[icr + id4 + in];
683 vt2[icr] =
m_r_s * wp[icr + id2 + in] -
m_nu_s * wp[ici + id3 + in];
684 vt2[ici] =
m_r_s * wp[ici + id2 + in] +
m_nu_s * wp[icr + id3 + in];
685 vt3[icr] =
m_r_s * wp[icr + id3 + in] +
m_nu_s * wp[ici + id2 + in];
686 vt3[ici] =
m_r_s * wp[ici + id3 + in] -
m_nu_s * wp[icr + id2 + in];
687 vt4[icr] =
m_r_s * wp[icr + id4 + in] +
m_nu_s * wp[ici + id1 + in];
688 vt4[ici] =
m_r_s * wp[ici + id4 + in] -
m_nu_s * wp[icr + id1 + in];
690 for (
int ic = 0; ic <
m_Nc; ++ic) {
692 double wt1r = mult_uv_r(&up[ic2 + ig], vt1,
m_Nc);
693 double wt1i = mult_uv_i(&up[ic2 + ig], vt1,
m_Nc);
694 double wt2r = mult_uv_r(&up[ic2 + ig], vt2,
m_Nc);
695 double wt2i = mult_uv_i(&up[ic2 + ig], vt2,
m_Nc);
696 double wt3r = mult_uv_r(&up[ic2 + ig], vt3,
m_Nc);
697 double wt3i = mult_uv_i(&up[ic2 + ig], vt3,
m_Nc);
698 double wt4r = mult_uv_r(&up[ic2 + ig], vt4,
m_Nc);
699 double wt4i = mult_uv_i(&up[ic2 + ig], vt4,
m_Nc);
702 int ici = 2 * ic + 1;
703 vp[icr + id1 + iv] += wt1r;
704 vp[ici + id1 + iv] += wt1i;
705 vp[icr + id2 + iv] += wt2r;
706 vp[ici + id2 + iv] += wt2i;
707 vp[icr + id3 + iv] += wt3r;
708 vp[ici + id3 + iv] += wt3i;
709 vp[icr + id4 + iv] += wt4r;
710 vp[ici + id4 + iv] += wt4i;
713 int ix1 = Nvcd * iyzt;
715 int ix3 = ix1 + 2 *
NVC;
716 int ix4 = ix1 + 3 *
NVC;
717 for (
int ic = 0; ic <
m_Nc; ++ic) {
719 double wt1r = mult_uv_r(&up[ic2 + ig], &
vcp2_xp[ix1],
m_Nc);
720 double wt1i = mult_uv_i(&up[ic2 + ig], &
vcp2_xp[ix1],
m_Nc);
721 double wt2r = mult_uv_r(&up[ic2 + ig], &
vcp2_xp[ix2],
m_Nc);
722 double wt2i = mult_uv_i(&up[ic2 + ig], &
vcp2_xp[ix2],
m_Nc);
723 double wt3r = mult_uv_r(&up[ic2 + ig], &
vcp2_xp[ix3],
m_Nc);
724 double wt3i = mult_uv_i(&up[ic2 + ig], &
vcp2_xp[ix3],
m_Nc);
725 double wt4r = mult_uv_r(&up[ic2 + ig], &
vcp2_xp[ix4],
m_Nc);
726 double wt4i = mult_uv_i(&up[ic2 + ig], &
vcp2_xp[ix4],
m_Nc);
729 int ici = 2 * ic + 1;
730 vp[icr + id1 + iv] += wt1r;
731 vp[ici + id1 + iv] += wt1i;
732 vp[icr + id2 + iv] += wt2r;
733 vp[ici + id2 + iv] += wt2i;
734 vp[icr + id3 + iv] += wt3r;
735 vp[ici + id3 + iv] += wt3i;
736 vp[icr + id4 + iv] += wt4r;
737 vp[ici + id4 + iv] += wt4i;
754 const int id2 =
m_Nvc;
755 const int id3 =
m_Nvc * 2;
756 const int id4 =
m_Nvc * 3;
760 double *vp = v.
ptr(0);
761 const double *wp = w.
ptr(0);
764 int ith, nth, is, ns;
765 set_threadtask(ith, nth, is, ns,
m_Nvol);
769 for (
int site = is; site < ns; ++site) {
770 int ix = site %
m_Nx;
771 int iyzt = site /
m_Nx;
772 if (ix ==
m_Nx - 1) {
773 int in = Nvcd * site;
774 int ig =
m_Ndf * site;
775 int ix1 = Nvcd * iyzt;
777 int ix3 = ix1 + 2 *
NVC;
778 int ix4 = ix1 + 3 *
NVC;
781 for (
int ic = 0; ic <
m_Nc; ++ic) {
783 int ici = 2 * ic + 1;
784 vt1[icr] =
m_r_s * wp[icr + id1 + in] +
m_nu_s * wp[ici + id4 + in];
785 vt1[ici] =
m_r_s * wp[ici + id1 + in] -
m_nu_s * wp[icr + id4 + in];
786 vt2[icr] =
m_r_s * wp[icr + id2 + in] +
m_nu_s * wp[ici + id3 + in];
787 vt2[ici] =
m_r_s * wp[ici + id2 + in] -
m_nu_s * wp[icr + id3 + in];
788 vt3[icr] =
m_r_s * wp[icr + id3 + in] -
m_nu_s * wp[ici + id2 + in];
789 vt3[ici] =
m_r_s * wp[ici + id3 + in] +
m_nu_s * wp[icr + id2 + in];
790 vt4[icr] =
m_r_s * wp[icr + id4 + in] -
m_nu_s * wp[ici + id1 + in];
791 vt4[ici] =
m_r_s * wp[ici + id4 + in] +
m_nu_s * wp[icr + id1 + in];
794 for (
int ic = 0; ic <
m_Nc; ++ic) {
797 int ici = 2 * ic + 1;
798 vcp1_xm[icr + ix1] = mult_udagv_r(&up[ic2 + ig], vt1,
m_Nc);
799 vcp1_xm[ici + ix1] = mult_udagv_i(&up[ic2 + ig], vt1,
m_Nc);
800 vcp1_xm[icr + ix2] = mult_udagv_r(&up[ic2 + ig], vt2,
m_Nc);
801 vcp1_xm[ici + ix2] = mult_udagv_i(&up[ic2 + ig], vt2,
m_Nc);
802 vcp1_xm[icr + ix3] = mult_udagv_r(&up[ic2 + ig], vt3,
m_Nc);
803 vcp1_xm[ici + ix3] = mult_udagv_i(&up[ic2 + ig], vt3,
m_Nc);
804 vcp1_xm[icr + ix4] = mult_udagv_r(&up[ic2 + ig], vt4,
m_Nc);
805 vcp1_xm[ici + ix4] = mult_udagv_i(&up[ic2 + ig], vt4,
m_Nc);
819 for (
int site = is; site < ns; ++site) {
820 int ix = site %
m_Nx;
821 int iyzt = site /
m_Nx;
822 int nei = ix - 1 +
m_Nx * iyzt;
823 int iv = Nvcd * site;
826 int ig =
m_Ndf * nei;
830 for (
int ic = 0; ic <
m_Nc; ++ic) {
832 int ici = 2 * ic + 1;
833 vt1[icr] =
m_r_s * wp[icr + id1 + in] +
m_nu_s * wp[ici + id4 + in];
834 vt1[ici] =
m_r_s * wp[ici + id1 + in] -
m_nu_s * wp[icr + id4 + in];
835 vt2[icr] =
m_r_s * wp[icr + id2 + in] +
m_nu_s * wp[ici + id3 + in];
836 vt2[ici] =
m_r_s * wp[ici + id2 + in] -
m_nu_s * wp[icr + id3 + in];
837 vt3[icr] =
m_r_s * wp[icr + id3 + in] -
m_nu_s * wp[ici + id2 + in];
838 vt3[ici] =
m_r_s * wp[ici + id3 + in] +
m_nu_s * wp[icr + id2 + in];
839 vt4[icr] =
m_r_s * wp[icr + id4 + in] -
m_nu_s * wp[ici + id1 + in];
840 vt4[ici] =
m_r_s * wp[ici + id4 + in] +
m_nu_s * wp[icr + id1 + in];
843 for (
int ic = 0; ic <
m_Nc; ++ic) {
845 double wt1r = mult_udagv_r(&up[ic2 + ig], vt1,
m_Nc);
846 double wt1i = mult_udagv_i(&up[ic2 + ig], vt1,
m_Nc);
847 double wt2r = mult_udagv_r(&up[ic2 + ig], vt2,
m_Nc);
848 double wt2i = mult_udagv_i(&up[ic2 + ig], vt2,
m_Nc);
849 double wt3r = mult_udagv_r(&up[ic2 + ig], vt3,
m_Nc);
850 double wt3i = mult_udagv_i(&up[ic2 + ig], vt3,
m_Nc);
851 double wt4r = mult_udagv_r(&up[ic2 + ig], vt4,
m_Nc);
852 double wt4i = mult_udagv_i(&up[ic2 + ig], vt4,
m_Nc);
855 int ici = 2 * ic + 1;
856 vp[icr + id1 + iv] += wt1r;
857 vp[ici + id1 + iv] += wt1i;
858 vp[icr + id2 + iv] += wt2r;
859 vp[ici + id2 + iv] += wt2i;
860 vp[icr + id3 + iv] += wt3r;
861 vp[ici + id3 + iv] += wt3i;
862 vp[icr + id4 + iv] += wt4r;
863 vp[ici + id4 + iv] += wt4i;
866 int ix1 = Nvcd * iyzt;
868 int ix3 = ix1 + 2 *
NVC;
869 int ix4 = ix1 + 3 *
NVC;
870 for (
int ic = 0; ic <
m_Nc; ++ic) {
871 double wt1r = bc2 *
vcp2_xm[2 * ic + ix1];
872 double wt1i = bc2 *
vcp2_xm[2 * ic + 1 + ix1];
873 double wt2r = bc2 *
vcp2_xm[2 * ic + ix2];
874 double wt2i = bc2 *
vcp2_xm[2 * ic + 1 + ix2];
875 double wt3r = bc2 *
vcp2_xm[2 * ic + ix3];
876 double wt3i = bc2 *
vcp2_xm[2 * ic + 1 + ix3];
877 double wt4r = bc2 *
vcp2_xm[2 * ic + ix4];
878 double wt4i = bc2 *
vcp2_xm[2 * ic + 1 + ix4];
881 int ici = 2 * ic + 1;
882 vp[icr + id1 + iv] += wt1r;
883 vp[ici + id1 + iv] += wt1i;
884 vp[icr + id2 + iv] += wt2r;
885 vp[ici + id2 + iv] += wt2i;
886 vp[icr + id3 + iv] += wt3r;
887 vp[ici + id3 + iv] += wt3i;
888 vp[icr + id4 + iv] += wt4r;
889 vp[ici + id4 + iv] += wt4i;
906 const int id2 =
m_Nvc;
907 const int id3 =
m_Nvc * 2;
908 const int id4 =
m_Nvc * 3;
912 double *vp = v.
ptr(0);
913 const double *wp = w.
ptr(0);
916 int ith, nth, is, ns;
917 set_threadtask(ith, nth, is, ns,
m_Nvol);
921 for (
int site = is; site < ns; ++site) {
922 int ix = site %
m_Nx;
923 int iyzt = site /
m_Nx;
924 int iy = iyzt %
m_Ny;
925 int izt = iyzt /
m_Ny;
926 int ixzt = ix +
m_Nx * izt;
928 int in = Nvcd * site;
929 int ix1 = Nvcd * ixzt;
931 int ix3 = ix1 + 2 *
NVC;
932 int ix4 = ix1 + 3 *
NVC;
934 for (
int ic = 0; ic <
m_Nc; ++ic) {
936 int ici = 2 * ic + 1;
937 vt1[icr] =
m_r_s * wp[icr + id1 + in] +
m_nu_s * wp[icr + id4 + in];
938 vt1[ici] =
m_r_s * wp[ici + id1 + in] +
m_nu_s * wp[ici + id4 + in];
939 vt2[icr] =
m_r_s * wp[icr + id2 + in] -
m_nu_s * wp[icr + id3 + in];
940 vt2[ici] =
m_r_s * wp[ici + id2 + in] -
m_nu_s * wp[ici + id3 + in];
941 vt3[icr] =
m_r_s * wp[icr + id3 + in] -
m_nu_s * wp[icr + id2 + in];
942 vt3[ici] =
m_r_s * wp[ici + id3 + in] -
m_nu_s * wp[ici + id2 + in];
943 vt4[icr] =
m_r_s * wp[icr + id4 + in] +
m_nu_s * wp[icr + id1 + in];
944 vt4[ici] =
m_r_s * wp[ici + id4 + in] +
m_nu_s * wp[ici + id1 + in];
947 for (
int ivc = 0; ivc <
NVC; ++ivc) {
948 vcp1_yp[ivc + ix1] = bc2 * vt1[ivc];
949 vcp1_yp[ivc + ix2] = bc2 * vt2[ivc];
950 vcp1_yp[ivc + ix3] = bc2 * vt3[ivc];
951 vcp1_yp[ivc + ix4] = bc2 * vt4[ivc];
965 for (
int site = is; site < ns; ++site) {
966 int ix = site %
m_Nx;
967 int iyzt = site /
m_Nx;
968 int iy = iyzt %
m_Ny;
969 int izt = iyzt /
m_Ny;
970 int ixzt = ix +
m_Nx * izt;
971 int nei = ix +
m_Nx * (iy + 1 +
m_Ny * izt);
972 int iv = Nvcd * site;
973 int ig =
m_Ndf * site;
979 for (
int ic = 0; ic <
m_Nc; ++ic) {
981 int ici = 2 * ic + 1;
982 vt1[icr] =
m_r_s * wp[icr + id1 + in] +
m_nu_s * wp[icr + id4 + in];
983 vt1[ici] =
m_r_s * wp[ici + id1 + in] +
m_nu_s * wp[ici + id4 + in];
984 vt2[icr] =
m_r_s * wp[icr + id2 + in] -
m_nu_s * wp[icr + id3 + in];
985 vt2[ici] =
m_r_s * wp[ici + id2 + in] -
m_nu_s * wp[ici + id3 + in];
986 vt3[icr] =
m_r_s * wp[icr + id3 + in] -
m_nu_s * wp[icr + id2 + in];
987 vt3[ici] =
m_r_s * wp[ici + id3 + in] -
m_nu_s * wp[ici + id2 + in];
988 vt4[icr] =
m_r_s * wp[icr + id4 + in] +
m_nu_s * wp[icr + id1 + in];
989 vt4[ici] =
m_r_s * wp[ici + id4 + in] +
m_nu_s * wp[ici + id1 + in];
991 for (
int ic = 0; ic <
m_Nc; ++ic) {
993 double wt1r = mult_uv_r(&up[ic2 + ig], vt1,
m_Nc);
994 double wt1i = mult_uv_i(&up[ic2 + ig], vt1,
m_Nc);
995 double wt2r = mult_uv_r(&up[ic2 + ig], vt2,
m_Nc);
996 double wt2i = mult_uv_i(&up[ic2 + ig], vt2,
m_Nc);
997 double wt3r = mult_uv_r(&up[ic2 + ig], vt3,
m_Nc);
998 double wt3i = mult_uv_i(&up[ic2 + ig], vt3,
m_Nc);
999 double wt4r = mult_uv_r(&up[ic2 + ig], vt4,
m_Nc);
1000 double wt4i = mult_uv_i(&up[ic2 + ig], vt4,
m_Nc);
1003 int ici = 2 * ic + 1;
1004 vp[icr + id1 + iv] += wt1r;
1005 vp[ici + id1 + iv] += wt1i;
1006 vp[icr + id2 + iv] += wt2r;
1007 vp[ici + id2 + iv] += wt2i;
1008 vp[icr + id3 + iv] += wt3r;
1009 vp[ici + id3 + iv] += wt3i;
1010 vp[icr + id4 + iv] += wt4r;
1011 vp[ici + id4 + iv] += wt4i;
1014 int ix1 = Nvcd * ixzt;
1015 int ix2 = ix1 +
NVC;
1016 int ix3 = ix1 + 2 *
NVC;
1017 int ix4 = ix1 + 3 *
NVC;
1018 for (
int ic = 0; ic <
m_Nc; ++ic) {
1020 double wt1r = mult_uv_r(&up[ic2 + ig], &
vcp2_yp[ix1],
m_Nc);
1021 double wt1i = mult_uv_i(&up[ic2 + ig], &
vcp2_yp[ix1],
m_Nc);
1022 double wt2r = mult_uv_r(&up[ic2 + ig], &
vcp2_yp[ix2],
m_Nc);
1023 double wt2i = mult_uv_i(&up[ic2 + ig], &
vcp2_yp[ix2],
m_Nc);
1024 double wt3r = mult_uv_r(&up[ic2 + ig], &
vcp2_yp[ix3],
m_Nc);
1025 double wt3i = mult_uv_i(&up[ic2 + ig], &
vcp2_yp[ix3],
m_Nc);
1026 double wt4r = mult_uv_r(&up[ic2 + ig], &
vcp2_yp[ix4],
m_Nc);
1027 double wt4i = mult_uv_i(&up[ic2 + ig], &
vcp2_yp[ix4],
m_Nc);
1030 int ici = 2 * ic + 1;
1031 vp[icr + id1 + iv] += wt1r;
1032 vp[ici + id1 + iv] += wt1i;
1033 vp[icr + id2 + iv] += wt2r;
1034 vp[ici + id2 + iv] += wt2i;
1035 vp[icr + id3 + iv] += wt3r;
1036 vp[ici + id3 + iv] += wt3i;
1037 vp[icr + id4 + iv] += wt4r;
1038 vp[ici + id4 + iv] += wt4i;
1055 const int id2 =
m_Nvc;
1056 const int id3 =
m_Nvc * 2;
1057 const int id4 =
m_Nvc * 3;
1061 double *vp = v.
ptr(0);
1062 const double *wp = w.
ptr(0);
1065 int ith, nth, is, ns;
1066 set_threadtask(ith, nth, is, ns,
m_Nvol);
1070 for (
int site = is; site < ns; ++site) {
1071 int ix = site %
m_Nx;
1072 int iyzt = site /
m_Nx;
1073 int iy = iyzt %
m_Ny;
1074 int izt = iyzt /
m_Ny;
1075 int ixzt = ix +
m_Nx * izt;
1076 if (iy ==
m_Ny - 1) {
1077 int in = Nvcd * site;
1078 int ig =
m_Ndf * site;
1079 int ix1 = Nvcd * ixzt;
1080 int ix2 = ix1 +
NVC;
1081 int ix3 = ix1 + 2 *
NVC;
1082 int ix4 = ix1 + 3 *
NVC;
1085 for (
int ic = 0; ic <
m_Nc; ++ic) {
1087 int ici = 2 * ic + 1;
1088 vt1[icr] =
m_r_s * wp[icr + id1 + in] -
m_nu_s * wp[icr + id4 + in];
1089 vt1[ici] =
m_r_s * wp[ici + id1 + in] -
m_nu_s * wp[ici + id4 + in];
1090 vt2[icr] =
m_r_s * wp[icr + id2 + in] +
m_nu_s * wp[icr + id3 + in];
1091 vt2[ici] =
m_r_s * wp[ici + id2 + in] +
m_nu_s * wp[ici + id3 + in];
1092 vt3[icr] =
m_r_s * wp[icr + id3 + in] +
m_nu_s * wp[icr + id2 + in];
1093 vt3[ici] =
m_r_s * wp[ici + id3 + in] +
m_nu_s * wp[ici + id2 + in];
1094 vt4[icr] =
m_r_s * wp[icr + id4 + in] -
m_nu_s * wp[icr + id1 + in];
1095 vt4[ici] =
m_r_s * wp[ici + id4 + in] -
m_nu_s * wp[ici + id1 + in];
1098 for (
int ic = 0; ic <
m_Nc; ++ic) {
1101 int ici = 2 * ic + 1;
1102 vcp1_ym[icr + ix1] = mult_udagv_r(&up[ic2 + ig], vt1,
m_Nc);
1103 vcp1_ym[ici + ix1] = mult_udagv_i(&up[ic2 + ig], vt1,
m_Nc);
1104 vcp1_ym[icr + ix2] = mult_udagv_r(&up[ic2 + ig], vt2,
m_Nc);
1105 vcp1_ym[ici + ix2] = mult_udagv_i(&up[ic2 + ig], vt2,
m_Nc);
1106 vcp1_ym[icr + ix3] = mult_udagv_r(&up[ic2 + ig], vt3,
m_Nc);
1107 vcp1_ym[ici + ix3] = mult_udagv_i(&up[ic2 + ig], vt3,
m_Nc);
1108 vcp1_ym[icr + ix4] = mult_udagv_r(&up[ic2 + ig], vt4,
m_Nc);
1109 vcp1_ym[ici + ix4] = mult_udagv_i(&up[ic2 + ig], vt4,
m_Nc);
1123 for (
int site = is; site < ns; ++site) {
1124 int ix = site %
m_Nx;
1125 int iyzt = site /
m_Nx;
1126 int iy = iyzt %
m_Ny;
1127 int izt = iyzt /
m_Ny;
1128 int ixzt = ix +
m_Nx * izt;
1129 int nei = ix +
m_Nx * (iy - 1 +
m_Ny * izt);
1130 int iv = Nvcd * site;
1133 int ig =
m_Ndf * nei;
1134 int in = Nvcd * nei;
1136 for (
int ic = 0; ic <
m_Nc; ++ic) {
1138 int ici = 2 * ic + 1;
1139 vt1[icr] =
m_r_s * wp[icr + id1 + in] -
m_nu_s * wp[icr + id4 + in];
1140 vt1[ici] =
m_r_s * wp[ici + id1 + in] -
m_nu_s * wp[ici + id4 + in];
1141 vt2[icr] =
m_r_s * wp[icr + id2 + in] +
m_nu_s * wp[icr + id3 + in];
1142 vt2[ici] =
m_r_s * wp[ici + id2 + in] +
m_nu_s * wp[ici + id3 + in];
1143 vt3[icr] =
m_r_s * wp[icr + id3 + in] +
m_nu_s * wp[icr + id2 + in];
1144 vt3[ici] =
m_r_s * wp[ici + id3 + in] +
m_nu_s * wp[ici + id2 + in];
1145 vt4[icr] =
m_r_s * wp[icr + id4 + in] -
m_nu_s * wp[icr + id1 + in];
1146 vt4[ici] =
m_r_s * wp[ici + id4 + in] -
m_nu_s * wp[ici + id1 + in];
1149 for (
int ic = 0; ic <
m_Nc; ++ic) {
1151 double wt1r = mult_udagv_r(&up[ic2 + ig], vt1,
m_Nc);
1152 double wt1i = mult_udagv_i(&up[ic2 + ig], vt1,
m_Nc);
1153 double wt2r = mult_udagv_r(&up[ic2 + ig], vt2,
m_Nc);
1154 double wt2i = mult_udagv_i(&up[ic2 + ig], vt2,
m_Nc);
1155 double wt3r = mult_udagv_r(&up[ic2 + ig], vt3,
m_Nc);
1156 double wt3i = mult_udagv_i(&up[ic2 + ig], vt3,
m_Nc);
1157 double wt4r = mult_udagv_r(&up[ic2 + ig], vt4,
m_Nc);
1158 double wt4i = mult_udagv_i(&up[ic2 + ig], vt4,
m_Nc);
1161 int ici = 2 * ic + 1;
1162 vp[icr + id1 + iv] += wt1r;
1163 vp[ici + id1 + iv] += wt1i;
1164 vp[icr + id2 + iv] += wt2r;
1165 vp[ici + id2 + iv] += wt2i;
1166 vp[icr + id3 + iv] += wt3r;
1167 vp[ici + id3 + iv] += wt3i;
1168 vp[icr + id4 + iv] += wt4r;
1169 vp[ici + id4 + iv] += wt4i;
1172 int ix1 = Nvcd * ixzt;
1173 int ix2 = ix1 +
NVC;
1174 int ix3 = ix1 + 2 *
NVC;
1175 int ix4 = ix1 + 3 *
NVC;
1176 for (
int ic = 0; ic <
m_Nc; ++ic) {
1177 double wt1r = bc2 *
vcp2_ym[2 * ic + ix1];
1178 double wt1i = bc2 *
vcp2_ym[2 * ic + 1 + ix1];
1179 double wt2r = bc2 *
vcp2_ym[2 * ic + ix2];
1180 double wt2i = bc2 *
vcp2_ym[2 * ic + 1 + ix2];
1181 double wt3r = bc2 *
vcp2_ym[2 * ic + ix3];
1182 double wt3i = bc2 *
vcp2_ym[2 * ic + 1 + ix3];
1183 double wt4r = bc2 *
vcp2_ym[2 * ic + ix4];
1184 double wt4i = bc2 *
vcp2_ym[2 * ic + 1 + ix4];
1187 int ici = 2 * ic + 1;
1188 vp[icr + id1 + iv] += wt1r;
1189 vp[ici + id1 + iv] += wt1i;
1190 vp[icr + id2 + iv] += wt2r;
1191 vp[ici + id2 + iv] += wt2i;
1192 vp[icr + id3 + iv] += wt3r;
1193 vp[ici + id3 + iv] += wt3i;
1194 vp[icr + id4 + iv] += wt4r;
1195 vp[ici + id4 + iv] += wt4i;
1212 const int id2 =
m_Nvc;
1213 const int id3 =
m_Nvc * 2;
1214 const int id4 =
m_Nvc * 3;
1218 double *vp = v.
ptr(0);
1219 const double *wp = w.
ptr(0);
1222 int ith, nth, is, ns;
1223 set_threadtask(ith, nth, is, ns,
m_Nvol);
1229 for (
int site = is; site < ns; ++site) {
1230 int ixy = site % Nxy;
1231 int izt = site / Nxy;
1232 int iz = izt %
m_Nz;
1233 int it = izt /
m_Nz;
1234 int ixyt = ixy + Nxy * it;
1236 int in = Nvcd * site;
1237 int ix1 = Nvcd * ixyt;
1238 int ix2 = ix1 +
NVC;
1239 int ix3 = ix1 + 2 *
NVC;
1240 int ix4 = ix1 + 3 *
NVC;
1242 for (
int ic = 0; ic <
m_Nc; ++ic) {
1244 int ici = 2 * ic + 1;
1245 vt1[icr] =
m_r_s * wp[icr + id1 + in] -
m_nu_s * wp[ici + id3 + in];
1246 vt1[ici] =
m_r_s * wp[ici + id1 + in] +
m_nu_s * wp[icr + id3 + in];
1247 vt2[icr] =
m_r_s * wp[icr + id2 + in] +
m_nu_s * wp[ici + id4 + in];
1248 vt2[ici] =
m_r_s * wp[ici + id2 + in] -
m_nu_s * wp[icr + id4 + in];
1249 vt3[icr] =
m_r_s * wp[icr + id3 + in] +
m_nu_s * wp[ici + id1 + in];
1250 vt3[ici] =
m_r_s * wp[ici + id3 + in] -
m_nu_s * wp[icr + id1 + in];
1251 vt4[icr] =
m_r_s * wp[icr + id4 + in] -
m_nu_s * wp[ici + id2 + in];
1252 vt4[ici] =
m_r_s * wp[ici + id4 + in] +
m_nu_s * wp[icr + id2 + in];
1255 for (
int ivc = 0; ivc <
NVC; ++ivc) {
1256 vcp1_zp[ivc + ix1] = bc2 * vt1[ivc];
1257 vcp1_zp[ivc + ix2] = bc2 * vt2[ivc];
1258 vcp1_zp[ivc + ix3] = bc2 * vt3[ivc];
1259 vcp1_zp[ivc + ix4] = bc2 * vt4[ivc];
1273 for (
int site = is; site < ns; ++site) {
1274 int ixy = site % Nxy;
1275 int izt = site / Nxy;
1276 int iz = izt %
m_Nz;
1277 int it = izt /
m_Nz;
1278 int ixyt = ixy + Nxy * it;
1279 int nei = ixy + Nxy * (iz + 1 +
m_Nz * it);
1280 int iv = Nvcd * site;
1281 int ig =
m_Ndf * site;
1283 if (iz <
m_Nz - 1) {
1284 int in = Nvcd * nei;
1286 for (
int ic = 0; ic <
m_Nc; ++ic) {
1288 int ici = 2 * ic + 1;
1289 vt1[icr] =
m_r_s * wp[icr + id1 + in] -
m_nu_s * wp[ici + id3 + in];
1290 vt1[ici] =
m_r_s * wp[ici + id1 + in] +
m_nu_s * wp[icr + id3 + in];
1291 vt2[icr] =
m_r_s * wp[icr + id2 + in] +
m_nu_s * wp[ici + id4 + in];
1292 vt2[ici] =
m_r_s * wp[ici + id2 + in] -
m_nu_s * wp[icr + id4 + in];
1293 vt3[icr] =
m_r_s * wp[icr + id3 + in] +
m_nu_s * wp[ici + id1 + in];
1294 vt3[ici] =
m_r_s * wp[ici + id3 + in] -
m_nu_s * wp[icr + id1 + in];
1295 vt4[icr] =
m_r_s * wp[icr + id4 + in] -
m_nu_s * wp[ici + id2 + in];
1296 vt4[ici] =
m_r_s * wp[ici + id4 + in] +
m_nu_s * wp[icr + id2 + in];
1299 for (
int ic = 0; ic <
m_Nc; ++ic) {
1301 double wt1r = mult_uv_r(&up[ic2 + ig], vt1,
m_Nc);
1302 double wt1i = mult_uv_i(&up[ic2 + ig], vt1,
m_Nc);
1303 double wt2r = mult_uv_r(&up[ic2 + ig], vt2,
m_Nc);
1304 double wt2i = mult_uv_i(&up[ic2 + ig], vt2,
m_Nc);
1305 double wt3r = mult_uv_r(&up[ic2 + ig], vt3,
m_Nc);
1306 double wt3i = mult_uv_i(&up[ic2 + ig], vt3,
m_Nc);
1307 double wt4r = mult_uv_r(&up[ic2 + ig], vt4,
m_Nc);
1308 double wt4i = mult_uv_i(&up[ic2 + ig], vt4,
m_Nc);
1311 int ici = 2 * ic + 1;
1312 vp[icr + id1 + iv] += wt1r;
1313 vp[ici + id1 + iv] += wt1i;
1314 vp[icr + id2 + iv] += wt2r;
1315 vp[ici + id2 + iv] += wt2i;
1316 vp[icr + id3 + iv] += wt3r;
1317 vp[ici + id3 + iv] += wt3i;
1318 vp[icr + id4 + iv] += wt4r;
1319 vp[ici + id4 + iv] += wt4i;
1322 int ix1 = Nvcd * ixyt;
1323 int ix2 = ix1 +
NVC;
1324 int ix3 = ix1 + 2 *
NVC;
1325 int ix4 = ix1 + 3 *
NVC;
1326 for (
int ic = 0; ic <
m_Nc; ++ic) {
1328 double wt1r = mult_uv_r(&up[ic2 + ig], &
vcp2_zp[ix1],
m_Nc);
1329 double wt1i = mult_uv_i(&up[ic2 + ig], &
vcp2_zp[ix1],
m_Nc);
1330 double wt2r = mult_uv_r(&up[ic2 + ig], &
vcp2_zp[ix2],
m_Nc);
1331 double wt2i = mult_uv_i(&up[ic2 + ig], &
vcp2_zp[ix2],
m_Nc);
1332 double wt3r = mult_uv_r(&up[ic2 + ig], &
vcp2_zp[ix3],
m_Nc);
1333 double wt3i = mult_uv_i(&up[ic2 + ig], &
vcp2_zp[ix3],
m_Nc);
1334 double wt4r = mult_uv_r(&up[ic2 + ig], &
vcp2_zp[ix4],
m_Nc);
1335 double wt4i = mult_uv_i(&up[ic2 + ig], &
vcp2_zp[ix4],
m_Nc);
1338 int ici = 2 * ic + 1;
1339 vp[icr + id1 + iv] += wt1r;
1340 vp[ici + id1 + iv] += wt1i;
1341 vp[icr + id2 + iv] += wt2r;
1342 vp[ici + id2 + iv] += wt2i;
1343 vp[icr + id3 + iv] += wt3r;
1344 vp[ici + id3 + iv] += wt3i;
1345 vp[icr + id4 + iv] += wt4r;
1346 vp[ici + id4 + iv] += wt4i;
1363 const int id2 =
m_Nvc;
1364 const int id3 =
m_Nvc * 2;
1365 const int id4 =
m_Nvc * 3;
1369 double *vp = v.
ptr(0);
1370 const double *wp = w.
ptr(0);
1373 int ith, nth, is, ns;
1374 set_threadtask(ith, nth, is, ns,
m_Nvol);
1380 for (
int site = is; site < ns; ++site) {
1381 int ixy = site % Nxy;
1382 int izt = site / Nxy;
1383 int iz = izt %
m_Nz;
1384 int it = izt /
m_Nz;
1385 int ixyt = ixy + Nxy * it;
1386 if (iz ==
m_Nz - 1) {
1387 int in = Nvcd * site;
1388 int ig =
m_Ndf * site;
1389 int ix1 = Nvcd * ixyt;
1390 int ix2 = ix1 +
NVC;
1391 int ix3 = ix1 + 2 *
NVC;
1392 int ix4 = ix1 + 3 *
NVC;
1394 for (
int ic = 0; ic <
m_Nc; ++ic) {
1396 int ici = 2 * ic + 1;
1397 vt1[icr] =
m_r_s * wp[icr + id1 + in] +
m_nu_s * wp[ici + id3 + in];
1398 vt1[ici] =
m_r_s * wp[ici + id1 + in] -
m_nu_s * wp[icr + id3 + in];
1399 vt2[icr] =
m_r_s * wp[icr + id2 + in] -
m_nu_s * wp[ici + id4 + in];
1400 vt2[ici] =
m_r_s * wp[ici + id2 + in] +
m_nu_s * wp[icr + id4 + in];
1401 vt3[icr] =
m_r_s * wp[icr + id3 + in] -
m_nu_s * wp[ici + id1 + in];
1402 vt3[ici] =
m_r_s * wp[ici + id3 + in] +
m_nu_s * wp[icr + id1 + in];
1403 vt4[icr] =
m_r_s * wp[icr + id4 + in] +
m_nu_s * wp[ici + id2 + in];
1404 vt4[ici] =
m_r_s * wp[ici + id4 + in] -
m_nu_s * wp[icr + id2 + in];
1407 for (
int ic = 0; ic <
m_Nc; ++ic) {
1410 int ici = 2 * ic + 1;
1411 vcp1_zm[icr + ix1] = mult_udagv_r(&up[ic2 + ig], vt1,
m_Nc);
1412 vcp1_zm[ici + ix1] = mult_udagv_i(&up[ic2 + ig], vt1,
m_Nc);
1413 vcp1_zm[icr + ix2] = mult_udagv_r(&up[ic2 + ig], vt2,
m_Nc);
1414 vcp1_zm[ici + ix2] = mult_udagv_i(&up[ic2 + ig], vt2,
m_Nc);
1415 vcp1_zm[icr + ix3] = mult_udagv_r(&up[ic2 + ig], vt3,
m_Nc);
1416 vcp1_zm[ici + ix3] = mult_udagv_i(&up[ic2 + ig], vt3,
m_Nc);
1417 vcp1_zm[icr + ix4] = mult_udagv_r(&up[ic2 + ig], vt4,
m_Nc);
1418 vcp1_zm[ici + ix4] = mult_udagv_i(&up[ic2 + ig], vt4,
m_Nc);
1432 for (
int site = is; site < ns; ++site) {
1433 int ixy = site % Nxy;
1434 int izt = site / Nxy;
1435 int iz = izt %
m_Nz;
1436 int it = izt /
m_Nz;
1437 int ixyt = ixy + Nxy * it;
1438 int nei = ixy + Nxy * (iz - 1 +
m_Nz * it);
1439 int iv = Nvcd * site;
1442 int ig =
m_Ndf * nei;
1443 int in = Nvcd * nei;
1445 for (
int ic = 0; ic <
m_Nc; ++ic) {
1447 int ici = 2 * ic + 1;
1448 vt1[icr] =
m_r_s * wp[icr + id1 + in] +
m_nu_s * wp[ici + id3 + in];
1449 vt1[ici] =
m_r_s * wp[ici + id1 + in] -
m_nu_s * wp[icr + id3 + in];
1450 vt2[icr] =
m_r_s * wp[icr + id2 + in] -
m_nu_s * wp[ici + id4 + in];
1451 vt2[ici] =
m_r_s * wp[ici + id2 + in] +
m_nu_s * wp[icr + id4 + in];
1452 vt3[icr] =
m_r_s * wp[icr + id3 + in] -
m_nu_s * wp[ici + id1 + in];
1453 vt3[ici] =
m_r_s * wp[ici + id3 + in] +
m_nu_s * wp[icr + id1 + in];
1454 vt4[icr] =
m_r_s * wp[icr + id4 + in] +
m_nu_s * wp[ici + id2 + in];
1455 vt4[ici] =
m_r_s * wp[ici + id4 + in] -
m_nu_s * wp[icr + id2 + in];
1457 for (
int ic = 0; ic <
m_Nc; ++ic) {
1459 double wt1r = mult_udagv_r(&up[ic2 + ig], vt1,
m_Nc);
1460 double wt1i = mult_udagv_i(&up[ic2 + ig], vt1,
m_Nc);
1461 double wt2r = mult_udagv_r(&up[ic2 + ig], vt2,
m_Nc);
1462 double wt2i = mult_udagv_i(&up[ic2 + ig], vt2,
m_Nc);
1463 double wt3r = mult_udagv_r(&up[ic2 + ig], vt3,
m_Nc);
1464 double wt3i = mult_udagv_i(&up[ic2 + ig], vt3,
m_Nc);
1465 double wt4r = mult_udagv_r(&up[ic2 + ig], vt4,
m_Nc);
1466 double wt4i = mult_udagv_i(&up[ic2 + ig], vt4,
m_Nc);
1469 int ici = 2 * ic + 1;
1470 vp[icr + id1 + iv] += wt1r;
1471 vp[ici + id1 + iv] += wt1i;
1472 vp[icr + id2 + iv] += wt2r;
1473 vp[ici + id2 + iv] += wt2i;
1474 vp[icr + id3 + iv] += wt3r;
1475 vp[ici + id3 + iv] += wt3i;
1476 vp[icr + id4 + iv] += wt4r;
1477 vp[ici + id4 + iv] += wt4i;
1480 int ix1 = Nvcd * ixyt;
1481 int ix2 = ix1 +
NVC;
1482 int ix3 = ix1 + 2 *
NVC;
1483 int ix4 = ix1 + 3 *
NVC;
1484 for (
int ic = 0; ic <
m_Nc; ++ic) {
1485 double wt1r = bc2 *
vcp2_zm[2 * ic + ix1];
1486 double wt1i = bc2 *
vcp2_zm[2 * ic + 1 + ix1];
1487 double wt2r = bc2 *
vcp2_zm[2 * ic + ix2];
1488 double wt2i = bc2 *
vcp2_zm[2 * ic + 1 + ix2];
1489 double wt3r = bc2 *
vcp2_zm[2 * ic + ix3];
1490 double wt3i = bc2 *
vcp2_zm[2 * ic + 1 + ix3];
1491 double wt4r = bc2 *
vcp2_zm[2 * ic + ix4];
1492 double wt4i = bc2 *
vcp2_zm[2 * ic + 1 + ix4];
1495 int ici = 2 * ic + 1;
1496 vp[icr + id1 + iv] += wt1r;
1497 vp[ici + id1 + iv] += wt1i;
1498 vp[icr + id2 + iv] += wt2r;
1499 vp[ici + id2 + iv] += wt2i;
1500 vp[icr + id3 + iv] += wt3r;
1501 vp[ici + id3 + iv] += wt3i;
1502 vp[icr + id4 + iv] += wt4r;
1503 vp[ici + id4 + iv] += wt4i;
1517 const int Nvc2 =
m_Nvc * 2;
1521 const int id2 =
m_Nvc;
1522 const int id3 =
m_Nvc * 2;
1523 const int id4 =
m_Nvc * 3;
1527 double *vp = v.
ptr(0);
1528 const double *wp = w.
ptr(0);
1531 int ith, nth, is, ns;
1532 set_threadtask(ith, nth, is, ns,
m_Nvol);
1538 for (
int site = is; site < ns; ++site) {
1539 int ixyz = site % Nxyz;
1540 int it = site / Nxyz;
1542 int in = Nvcd * site;
1543 int ix1 = Nvc2 * ixyz;
1544 int ix2 = ix1 +
NVC;
1545 double vt1[
NVC], vt2[
NVC];
1546 set_sp2_tp_dirac(vt1, vt2, &wp[in],
m_Nc);
1547 for (
int ivc = 0; ivc <
NVC; ++ivc) {
1548 vcp1_tp[ivc + ix1] = bc2 * vt1[ivc];
1549 vcp1_tp[ivc + ix2] = bc2 * vt2[ivc];
1563 for (
int site = is; site < ns; ++site) {
1564 int ixyz = site % Nxyz;
1565 int it = site / Nxyz;
1566 int nei = ixyz + Nxyz * (it + 1);
1567 int iv = Nvcd * site;
1568 int ig =
m_Ndf * site;
1570 if (it <
m_Nt - 1) {
1571 int in = Nvcd * nei;
1572 double vt1[
NVC], vt2[
NVC];
1573 set_sp2_tp_dirac(vt1, vt2, &wp[in],
m_Nc);
1574 for (
int ic = 0; ic <
m_Nc; ++ic) {
1576 double wt1r = mult_uv_r(&up[ic2 + ig], vt1,
m_Nc);
1577 double wt1i = mult_uv_i(&up[ic2 + ig], vt1,
m_Nc);
1578 double wt2r = mult_uv_r(&up[ic2 + ig], vt2,
m_Nc);
1579 double wt2i = mult_uv_i(&up[ic2 + ig], vt2,
m_Nc);
1580 set_sp4_tp_dirac(&vp[2 * ic + iv], wt1r, wt1i, wt2r, wt2i,
m_Nc);
1583 int ix1 = Nvc2 * ixyz;
1584 int ix2 = ix1 +
NVC;
1585 for (
int ic = 0; ic <
m_Nc; ++ic) {
1587 double wt1r = mult_uv_r(&up[ic2 + ig], &
vcp2_tp[ix1],
m_Nc);
1588 double wt1i = mult_uv_i(&up[ic2 + ig], &
vcp2_tp[ix1],
m_Nc);
1589 double wt2r = mult_uv_r(&up[ic2 + ig], &
vcp2_tp[ix2],
m_Nc);
1590 double wt2i = mult_uv_i(&up[ic2 + ig], &
vcp2_tp[ix2],
m_Nc);
1591 set_sp4_tp_dirac(&vp[2 * ic + iv], wt1r, wt1i, wt2r, wt2i,
m_Nc);
1605 const int Nvc2 =
m_Nvc * 2;
1609 const int id2 =
m_Nvc;
1610 const int id3 =
m_Nvc * 2;
1611 const int id4 =
m_Nvc * 3;
1615 double *vp = v.
ptr(0);
1616 const double *wp = w.
ptr(0);
1619 int ith, nth, is, ns;
1620 set_threadtask(ith, nth, is, ns,
m_Nvol);
1626 for (
int site = is; site < ns; ++site) {
1627 int ixyz = site % Nxyz;
1628 int it = site / Nxyz;
1629 if (it ==
m_Nt - 1) {
1630 int in = Nvcd * site;
1631 int ig =
m_Ndf * site;
1632 int ix1 = Nvc2 * ixyz;
1633 int ix2 = ix1 +
NVC;
1635 double vt1[
NVC], vt2[
NVC];
1636 set_sp2_tm_dirac(vt1, vt2, &wp[in],
m_Nc);
1637 for (
int ic = 0; ic <
m_Nc; ++ic) {
1640 int ici = 2 * ic + 1;
1641 vcp1_tm[icr + ix1] = mult_udagv_r(&up[ic2 + ig], vt1,
m_Nc);
1642 vcp1_tm[ici + ix1] = mult_udagv_i(&up[ic2 + ig], vt1,
m_Nc);
1643 vcp1_tm[icr + ix2] = mult_udagv_r(&up[ic2 + ig], vt2,
m_Nc);
1644 vcp1_tm[ici + ix2] = mult_udagv_i(&up[ic2 + ig], vt2,
m_Nc);
1658 for (
int site = is; site < ns; ++site) {
1659 int ixyz = site % Nxyz;
1660 int it = site / Nxyz;
1661 int nei = ixyz + Nxyz * (it - 1);
1662 int iv = Nvcd * site;
1665 int ig =
m_Ndf * nei;
1666 int in = Nvcd * nei;
1667 double vt1[
NVC], vt2[
NVC];
1668 set_sp2_tm_dirac(vt1, vt2, &wp[in],
m_Nc);
1669 for (
int ic = 0; ic <
m_Nc; ++ic) {
1671 double wt1r = mult_udagv_r(&up[ic2 + ig], vt1,
m_Nc);
1672 double wt1i = mult_udagv_i(&up[ic2 + ig], vt1,
m_Nc);
1673 double wt2r = mult_udagv_r(&up[ic2 + ig], vt2,
m_Nc);
1674 double wt2i = mult_udagv_i(&up[ic2 + ig], vt2,
m_Nc);
1675 set_sp4_tm_dirac(&vp[2 * ic + iv], wt1r, wt1i, wt2r, wt2i,
m_Nc);
1678 int ix1 = Nvc2 * ixyz;
1679 int ix2 = ix1 +
NVC;
1680 for (
int ic = 0; ic <
m_Nc; ++ic) {
1682 int ici = 2 * ic + 1;
1683 double wt1r = bc2 *
vcp2_tm[icr + ix1];
1684 double wt1i = bc2 *
vcp2_tm[ici + ix1];
1685 double wt2r = bc2 *
vcp2_tm[icr + ix2];
1686 double wt2i = bc2 *
vcp2_tm[ici + ix2];
1687 set_sp4_tm_dirac(&vp[2 * ic + iv], wt1r, wt1i, wt2r, wt2i,
m_Nc);
1701 const int Nvc2 =
m_Nvc * 2;
1705 const int id2 =
m_Nvc;
1706 const int id3 =
m_Nvc * 2;
1707 const int id4 =
m_Nvc * 3;
1711 double *vp = v.
ptr(0);
1712 const double *wp = w.
ptr(0);
1715 int ith, nth, is, ns;
1716 set_threadtask(ith, nth, is, ns,
m_Nvol);
1722 for (
int site = is; site < ns; ++site) {
1723 int ixyz = site % Nxyz;
1724 int it = site / Nxyz;
1726 int in = Nvcd * site;
1727 int ix1 = Nvc2 * ixyz;
1728 int ix2 = ix1 +
NVC;
1729 double vt1[
NVC], vt2[
NVC];
1730 set_sp2_tp_chiral(vt1, vt2, &wp[in],
m_Nc);
1731 for (
int ivc = 0; ivc <
NVC; ++ivc) {
1732 vcp1_tp[ivc + ix1] = bc2 * vt1[ivc];
1733 vcp1_tp[ivc + ix2] = bc2 * vt2[ivc];
1747 for (
int site = is; site < ns; ++site) {
1748 int ixyz = site % Nxyz;
1749 int it = site / Nxyz;
1750 int nei = ixyz + Nxyz * (it + 1);
1751 int iv = Nvcd * site;
1752 int ig =
m_Ndf * site;
1754 if (it <
m_Nt - 1) {
1755 int in = Nvcd * nei;
1756 double vt1[
NVC], vt2[
NVC];
1757 set_sp2_tp_chiral(vt1, vt2, &wp[in],
m_Nc);
1758 for (
int ic = 0; ic <
m_Nc; ++ic) {
1760 double wt1r = mult_uv_r(&up[ic2 + ig], vt1,
m_Nc);
1761 double wt1i = mult_uv_i(&up[ic2 + ig], vt1,
m_Nc);
1762 double wt2r = mult_uv_r(&up[ic2 + ig], vt2,
m_Nc);
1763 double wt2i = mult_uv_i(&up[ic2 + ig], vt2,
m_Nc);
1764 set_sp4_tp_chiral(&vp[2 * ic + iv], wt1r, wt1i, wt2r, wt2i,
m_Nc);
1767 int ix1 = Nvc2 * ixyz;
1768 int ix2 = ix1 +
NVC;
1769 for (
int ic = 0; ic <
m_Nc; ++ic) {
1771 double wt1r = mult_uv_r(&up[ic2 + ig], &
vcp2_tp[ix1],
m_Nc);
1772 double wt1i = mult_uv_i(&up[ic2 + ig], &
vcp2_tp[ix1],
m_Nc);
1773 double wt2r = mult_uv_r(&up[ic2 + ig], &
vcp2_tp[ix2],
m_Nc);
1774 double wt2i = mult_uv_i(&up[ic2 + ig], &
vcp2_tp[ix2],
m_Nc);
1775 set_sp4_tp_chiral(&vp[2 * ic + iv], wt1r, wt1i, wt2r, wt2i,
m_Nc);
1789 const int Nvc2 =
m_Nvc * 2;
1793 const int id2 =
m_Nvc;
1794 const int id3 =
m_Nvc * 2;
1795 const int id4 =
m_Nvc * 3;
1799 double *vp = v.
ptr(0);
1800 const double *wp = w.
ptr(0);
1803 int ith, nth, is, ns;
1804 set_threadtask(ith, nth, is, ns,
m_Nvol);
1810 for (
int site = is; site < ns; ++site) {
1811 int ixyz = site % Nxyz;
1812 int it = site / Nxyz;
1813 if (it ==
m_Nt - 1) {
1814 int in = Nvcd * site;
1815 int ig =
m_Ndf * site;
1816 int ix1 = Nvc2 * ixyz;
1817 int ix2 = ix1 +
NVC;
1819 double vt1[
NVC], vt2[
NVC];
1820 set_sp2_tm_chiral(vt1, vt2, &wp[in],
m_Nc);
1821 for (
int ic = 0; ic <
m_Nc; ++ic) {
1824 int ici = 2 * ic + 1;
1825 vcp1_tm[icr + ix1] = mult_udagv_r(&up[ic2 + ig], vt1,
m_Nc);
1826 vcp1_tm[ici + ix1] = mult_udagv_i(&up[ic2 + ig], vt1,
m_Nc);
1827 vcp1_tm[icr + ix2] = mult_udagv_r(&up[ic2 + ig], vt2,
m_Nc);
1828 vcp1_tm[ici + ix2] = mult_udagv_i(&up[ic2 + ig], vt2,
m_Nc);
1842 for (
int site = is; site < ns; ++site) {
1843 int ixyz = site % Nxyz;
1844 int it = site / Nxyz;
1845 int nei = ixyz + Nxyz * (it - 1);
1846 int iv = Nvcd * site;
1849 int ig =
m_Ndf * nei;
1850 int in = Nvcd * nei;
1851 double vt1[
NVC], vt2[
NVC];
1852 set_sp2_tm_chiral(vt1, vt2, &wp[in],
m_Nc);
1853 for (
int ic = 0; ic <
m_Nc; ++ic) {
1855 double wt1r = mult_udagv_r(&up[ic2 + ig], vt1,
m_Nc);
1856 double wt1i = mult_udagv_i(&up[ic2 + ig], vt1,
m_Nc);
1857 double wt2r = mult_udagv_r(&up[ic2 + ig], vt2,
m_Nc);
1858 double wt2i = mult_udagv_i(&up[ic2 + ig], vt2,
m_Nc);
1859 set_sp4_tm_chiral(&vp[2 * ic + iv], wt1r, wt1i, wt2r, wt2i,
m_Nc);
1862 int ix1 = Nvc2 * ixyz;
1863 int ix2 = ix1 +
NVC;
1864 for (
int ic = 0; ic <
m_Nc; ++ic) {
1865 double wt1r = bc2 *
vcp2_tm[2 * ic + ix1];
1866 double wt1i = bc2 *
vcp2_tm[2 * ic + 1 + ix1];
1867 double wt2r = bc2 *
vcp2_tm[2 * ic + ix2];
1868 double wt2i = bc2 *
vcp2_tm[2 * ic + 1 + ix2];
1869 set_sp4_tm_chiral(&vp[2 * ic + iv], wt1r, wt1i, wt2r, wt2i,
m_Nc);
1893 }
else if (
m_repr ==
"Chiral") {
1901 double gflop = flop_site * (Nvol * (NPE / 1.0e+9));
1903 if ((
m_mode ==
"DdagD") || (
m_mode ==
"DDdag")) gflop *= 2;