18 #if defined USE_GROUP_SU3
20 #elif defined USE_GROUP_SU2
22 #elif defined USE_GROUP_SU_N
29 #ifdef USE_FACTORY_AUTOREGISTER
31 bool init = Fopr_Wilson::register_factory();
58 vout.
crucial(
"Error at %s: unsupported gamma-matrix type: %s\n",
98 std::string imple_gauge = imple_Nc();
100 imple_gauge.c_str());
193 vout.
crucial(
"Error at %s: input parameter not found.\n",
204 const std::vector<int> bc)
206 assert(bc.size() ==
m_Ndim);
215 for (
int idir = 0; idir <
m_Ndim; ++idir) {
227 for (
int mu = 0; mu <
m_Ndim; ++mu) {
258 if (ith == 0)
m_mode = mode;
268 }
else if (
m_mode ==
"Ddag") {
270 }
else if (
m_mode ==
"DdagD") {
272 }
else if (
m_mode ==
"DDdag") {
274 }
else if (
m_mode ==
"H") {
289 }
else if (
m_mode ==
"Ddag") {
291 }
else if (
m_mode ==
"DdagD") {
293 }
else if (
m_mode ==
"DDdag") {
295 }
else if (
m_mode ==
"H") {
307 const std::string mode)
311 }
else if (mode ==
"Ddag") {
313 }
else if (mode ==
"DdagD") {
315 }
else if (mode ==
"DDdag") {
317 }
else if (mode ==
"H") {
329 const std::string mode)
333 }
else if (mode ==
"Ddag") {
335 }
else if (mode ==
"DdagD") {
337 }
else if (mode ==
"DDdag") {
339 }
else if (mode ==
"H") {
354 }
else if (mu == 1) {
356 }
else if (mu == 2) {
358 }
else if (mu == 3) {
377 }
else if (mu == 1) {
379 }
else if (mu == 2) {
381 }
else if (mu == 3) {
400 }
else if (
m_repr ==
"Chiral") {
412 }
else if (
m_repr ==
"Chiral") {
421 const Field& w,
const int ex2)
425 }
else if (
m_repr ==
"Chiral") {
470 const Field& w,
const int ex2)
473 double *vp = v.
ptr(Ninvol * ex1);
474 const double *wp = w.
ptr(Ninvol * ex2);
475 const double *up =
m_U->
ptr(0);
477 int ith, nth, is, ns;
478 set_threadtask(ith, nth, is, ns,
m_Nvol);
480 int Nvc2 =
m_Nvc * 2;
488 for (
int site = is; site < ns; ++site) {
489 int ix = site %
m_Nx;
490 int iyzt = site /
m_Nx;
491 int iy = iyzt %
m_Ny;
492 int izt = iyzt /
m_Ny;
496 int ixy = ix +
m_Nx * iy;
497 int ixyz = ixy + Nxy * iz;
498 int ixyt = ixy + Nxy * it;
499 int ixzt = ix +
m_Nx * izt;
501 int in = Nvcd * site;
504 int ix1 = Nvc2 * iyzt;
505 int ix2 = ix1 +
m_Nvc;
507 double vt1[
NVC], vt2[
NVC];
508 set_sp2_xp(vt1, vt2, &wp[in],
m_Nc);
509 for (
int ivc = 0; ivc <
NVC; ++ivc) {
510 vcp1_xp[ivc + ix1] = bc2 * vt1[ivc];
511 vcp1_xp[ivc + ix2] = bc2 * vt2[ivc];
515 if (ix ==
m_Nx - 1) {
517 int ix1 = Nvc2 * iyzt;
519 double vt1[
NVC], vt2[
NVC];
520 set_sp2_xm(vt1, vt2, &wp[in],
m_Nc);
521 for (
int ic = 0; ic <
m_Nc; ++ic) {
524 int ici = 2 * ic + 1;
525 vcp1_xm[icr + ix1] = mult_udagv_r(&up[ic2 + ig], vt1,
m_Nc);
526 vcp1_xm[ici + ix1] = mult_udagv_i(&up[ic2 + ig], vt1,
m_Nc);
527 vcp1_xm[icr + ix2] = mult_udagv_r(&up[ic2 + ig], vt2,
m_Nc);
528 vcp1_xm[ici + ix2] = mult_udagv_i(&up[ic2 + ig], vt2,
m_Nc);
533 int ix1 = Nvc2 * ixzt;
536 double vt1[
NVC], vt2[
NVC];
537 set_sp2_yp(vt1, vt2, &wp[in],
m_Nc);
538 for (
int ivc = 0; ivc <
NVC; ++ivc) {
539 vcp1_yp[ivc + ix1] = bc2 * vt1[ivc];
540 vcp1_yp[ivc + ix2] = bc2 * vt2[ivc];
544 if (iy ==
m_Ny - 1) {
546 int ix1 = Nvc2 * ixzt;
548 double vt1[
NVC], vt2[
NVC];
549 set_sp2_ym(vt1, vt2, &wp[in],
m_Nc);
550 for (
int ic = 0; ic <
m_Nc; ++ic) {
553 int ici = 2 * ic + 1;
554 vcp1_ym[icr + ix1] = mult_udagv_r(&up[ic2 + ig], vt1,
m_Nc);
555 vcp1_ym[ici + ix1] = mult_udagv_i(&up[ic2 + ig], vt1,
m_Nc);
556 vcp1_ym[icr + ix2] = mult_udagv_r(&up[ic2 + ig], vt2,
m_Nc);
557 vcp1_ym[ici + ix2] = mult_udagv_i(&up[ic2 + ig], vt2,
m_Nc);
562 int ix1 = Nvc2 * ixyt;
565 double vt1[
NVC], vt2[
NVC];
566 set_sp2_zp(vt1, vt2, &wp[in],
m_Nc);
567 for (
int ivc = 0; ivc <
NVC; ++ivc) {
568 vcp1_zp[ivc + ix1] = bc2 * vt1[ivc];
569 vcp1_zp[ivc + ix2] = bc2 * vt2[ivc];
573 if (iz ==
m_Nz - 1) {
575 int ix1 = Nvc2 * ixyt;
577 double vt1[
NVC], vt2[
NVC];
578 set_sp2_zm(vt1, vt2, &wp[in],
m_Nc);
579 for (
int ic = 0; ic <
m_Nc; ++ic) {
582 int ici = 2 * ic + 1;
583 vcp1_zm[icr + ix1] = mult_udagv_r(&up[ic2 + ig], vt1,
m_Nc);
584 vcp1_zm[ici + ix1] = mult_udagv_i(&up[ic2 + ig], vt1,
m_Nc);
585 vcp1_zm[icr + ix2] = mult_udagv_r(&up[ic2 + ig], vt2,
m_Nc);
586 vcp1_zm[ici + ix2] = mult_udagv_i(&up[ic2 + ig], vt2,
m_Nc);
591 int ix1 = Nvc2 * ixyz;
594 double vt1[
NVC], vt2[
NVC];
595 set_sp2_tp_dirac(vt1, vt2, &wp[in],
m_Nc);
596 for (
int ivc = 0; ivc <
NVC; ++ivc) {
597 vcp1_tp[ivc + ix1] = bc2 * vt1[ivc];
598 vcp1_tp[ivc + ix2] = bc2 * vt2[ivc];
602 if (it ==
m_Nt - 1) {
604 int ix1 = Nvc2 * ixyz;
606 double vt1[
NVC], vt2[
NVC];
607 set_sp2_tm_dirac(vt1, vt2, &wp[in],
m_Nc);
608 for (
int ic = 0; ic <
m_Nc; ++ic) {
611 int ici = 2 * ic + 1;
612 vcp1_tm[icr + ix1] = mult_udagv_r(&up[ic2 + ig], vt1,
m_Nc);
613 vcp1_tm[ici + ix1] = mult_udagv_i(&up[ic2 + ig], vt1,
m_Nc);
614 vcp1_tm[icr + ix2] = mult_udagv_r(&up[ic2 + ig], vt2,
m_Nc);
615 vcp1_tm[ici + ix2] = mult_udagv_i(&up[ic2 + ig], vt2,
m_Nc);
642 for (
int site = is; site < ns; ++site) {
643 int ix = site %
m_Nx;
644 int iyzt = site /
m_Nx;
645 int iy = iyzt %
m_Ny;
646 int izt = iyzt /
m_Ny;
650 int ixy = site % Nxy;
651 int ixyz = ixy + Nxy * iz;
652 int ixyt = ixy + Nxy * it;
653 int ixzt = ix +
m_Nx * izt;
655 int iv = Nvcd * site;
657 for (
int ivcd = 0; ivcd < Nvcd; ++ivcd) {
662 int nei = ix + 1 +
m_Nx * iyzt;
665 double vt1[
NVC], vt2[
NVC];
666 set_sp2_xp(vt1, vt2, &wp[in],
m_Nc);
667 for (
int ic = 0; ic <
m_Nc; ++ic) {
669 double wt1r = mult_uv_r(&up[ic2 + ig], vt1,
m_Nc);
670 double wt1i = mult_uv_i(&up[ic2 + ig], vt1,
m_Nc);
671 double wt2r = mult_uv_r(&up[ic2 + ig], vt2,
m_Nc);
672 double wt2i = mult_uv_i(&up[ic2 + ig], vt2,
m_Nc);
673 set_sp4_xp(&vp[2 * ic + iv], wt1r, wt1i, wt2r, wt2i,
m_Nc);
676 int ix1 = Nvc2 * iyzt;
679 for (
int ic = 0; ic <
m_Nc; ++ic) {
681 double wt1r = mult_uv_r(&up[ic2 + ig], &
vcp2_xp[ix1],
m_Nc);
682 double wt1i = mult_uv_i(&up[ic2 + ig], &
vcp2_xp[ix1],
m_Nc);
683 double wt2r = mult_uv_r(&up[ic2 + ig], &
vcp2_xp[ix2],
m_Nc);
684 double wt2i = mult_uv_i(&up[ic2 + ig], &
vcp2_xp[ix2],
m_Nc);
685 set_sp4_xp(&vp[2 * ic + iv], wt1r, wt1i, wt2r, wt2i,
m_Nc);
690 int nei = ix - 1 +
m_Nx * iyzt;
693 double vt1[
NVC], vt2[
NVC];
694 set_sp2_xm(vt1, vt2, &wp[in],
m_Nc);
695 for (
int ic = 0; ic <
m_Nc; ++ic) {
697 double wt1r = mult_udagv_r(&up[ic2 + ig], vt1,
m_Nc);
698 double wt1i = mult_udagv_i(&up[ic2 + ig], vt1,
m_Nc);
699 double wt2r = mult_udagv_r(&up[ic2 + ig], vt2,
m_Nc);
700 double wt2i = mult_udagv_i(&up[ic2 + ig], vt2,
m_Nc);
701 set_sp4_xm(&vp[2 * ic + iv], wt1r, wt1i, wt2r, wt2i,
m_Nc);
704 int ix1 = Nvc2 * iyzt;
707 for (
int ic = 0; ic <
m_Nc; ++ic) {
708 double wt1r = bc2 *
vcp2_xm[2 * ic + ix1];
709 double wt1i = bc2 *
vcp2_xm[2 * ic + 1 + ix1];
710 double wt2r = bc2 *
vcp2_xm[2 * ic + ix2];
711 double wt2i = bc2 *
vcp2_xm[2 * ic + 1 + ix2];
712 set_sp4_xm(&vp[2 * ic + iv], wt1r, wt1i, wt2r, wt2i,
m_Nc);
717 int nei = ix +
m_Nx * (iy + 1 +
m_Ny * izt);
720 double vt1[
NVC], vt2[
NVC];
721 set_sp2_yp(vt1, vt2, &wp[in],
m_Nc);
722 for (
int ic = 0; ic <
m_Nc; ++ic) {
724 double wt1r = mult_uv_r(&up[ic2 + ig], vt1,
m_Nc);
725 double wt1i = mult_uv_i(&up[ic2 + ig], vt1,
m_Nc);
726 double wt2r = mult_uv_r(&up[ic2 + ig], vt2,
m_Nc);
727 double wt2i = mult_uv_i(&up[ic2 + ig], vt2,
m_Nc);
728 set_sp4_yp(&vp[2 * ic + iv], wt1r, wt1i, wt2r, wt2i,
m_Nc);
732 int ix1 = Nvc2 * ixzt;
734 for (
int ic = 0; ic <
m_Nc; ++ic) {
736 double wt1r = mult_uv_r(&up[ic2 + ig], &
vcp2_yp[ix1],
m_Nc);
737 double wt1i = mult_uv_i(&up[ic2 + ig], &
vcp2_yp[ix1],
m_Nc);
738 double wt2r = mult_uv_r(&up[ic2 + ig], &
vcp2_yp[ix2],
m_Nc);
739 double wt2i = mult_uv_i(&up[ic2 + ig], &
vcp2_yp[ix2],
m_Nc);
740 set_sp4_yp(&vp[2 * ic + iv], wt1r, wt1i, wt2r, wt2i,
m_Nc);
745 int nei = ix +
m_Nx * (iy - 1 +
m_Ny * izt);
748 double vt1[
NVC], vt2[
NVC];
749 set_sp2_ym(vt1, vt2, &wp[in],
m_Nc);
750 for (
int ic = 0; ic <
m_Nc; ++ic) {
752 double wt1r = mult_udagv_r(&up[ic2 + ig], vt1,
m_Nc);
753 double wt1i = mult_udagv_i(&up[ic2 + ig], vt1,
m_Nc);
754 double wt2r = mult_udagv_r(&up[ic2 + ig], vt2,
m_Nc);
755 double wt2i = mult_udagv_i(&up[ic2 + ig], vt2,
m_Nc);
756 set_sp4_ym(&vp[2 * ic + iv], wt1r, wt1i, wt2r, wt2i,
m_Nc);
759 int ix1 = Nvc2 * ixzt;
762 for (
int ic = 0; ic <
m_Nc; ++ic) {
763 double wt1r = bc2 *
vcp2_ym[2 * ic + ix1];
764 double wt1i = bc2 *
vcp2_ym[2 * ic + 1 + ix1];
765 double wt2r = bc2 *
vcp2_ym[2 * ic + ix2];
766 double wt2i = bc2 *
vcp2_ym[2 * ic + 1 + ix2];
767 set_sp4_ym(&vp[2 * ic + iv], wt1r, wt1i, wt2r, wt2i,
m_Nc);
772 int nei = ixy + Nxy * (iz + 1 +
m_Nz * it);
775 double vt1[
NVC], vt2[
NVC];
776 set_sp2_zp(vt1, vt2, &wp[in],
m_Nc);
777 for (
int ic = 0; ic <
m_Nc; ++ic) {
779 double wt1r = mult_uv_r(&up[ic2 + ig], vt1,
m_Nc);
780 double wt1i = mult_uv_i(&up[ic2 + ig], vt1,
m_Nc);
781 double wt2r = mult_uv_r(&up[ic2 + ig], vt2,
m_Nc);
782 double wt2i = mult_uv_i(&up[ic2 + ig], vt2,
m_Nc);
783 set_sp4_zp(&vp[2 * ic + iv], wt1r, wt1i, wt2r, wt2i,
m_Nc);
787 int ix1 = Nvc2 * ixyt;
789 for (
int ic = 0; ic <
m_Nc; ++ic) {
791 double wt1r = mult_uv_r(&up[ic2 + ig], &
vcp2_zp[ix1],
m_Nc);
792 double wt1i = mult_uv_i(&up[ic2 + ig], &
vcp2_zp[ix1],
m_Nc);
793 double wt2r = mult_uv_r(&up[ic2 + ig], &
vcp2_zp[ix2],
m_Nc);
794 double wt2i = mult_uv_i(&up[ic2 + ig], &
vcp2_zp[ix2],
m_Nc);
795 set_sp4_zp(&vp[2 * ic + iv], wt1r, wt1i, wt2r, wt2i,
m_Nc);
800 int nei = ixy + Nxy * (iz - 1 +
m_Nz * it);
803 double vt1[
NVC], vt2[
NVC];
804 set_sp2_zm(vt1, vt2, &wp[in],
m_Nc);
805 for (
int ic = 0; ic <
m_Nc; ++ic) {
807 double wt1r = mult_udagv_r(&up[ic2 + ig], vt1,
m_Nc);
808 double wt1i = mult_udagv_i(&up[ic2 + ig], vt1,
m_Nc);
809 double wt2r = mult_udagv_r(&up[ic2 + ig], vt2,
m_Nc);
810 double wt2i = mult_udagv_i(&up[ic2 + ig], vt2,
m_Nc);
811 set_sp4_zm(&vp[2 * ic + iv], wt1r, wt1i, wt2r, wt2i,
m_Nc);
814 int ix1 = Nvc2 * ixyt;
817 for (
int ic = 0; ic <
m_Nc; ++ic) {
818 double wt1r = bc2 *
vcp2_zm[2 * ic + ix1];
819 double wt1i = bc2 *
vcp2_zm[2 * ic + 1 + ix1];
820 double wt2r = bc2 *
vcp2_zm[2 * ic + ix2];
821 double wt2i = bc2 *
vcp2_zm[2 * ic + 1 + ix2];
822 set_sp4_zm(&vp[2 * ic + iv], wt1r, wt1i, wt2r, wt2i,
m_Nc);
827 int nei = ixyz + Nxyz * (it + 1);
830 double vt1[
NVC], vt2[
NVC];
831 set_sp2_tp_dirac(vt1, vt2, &wp[in],
m_Nc);
832 for (
int ic = 0; ic <
m_Nc; ++ic) {
834 double wt1r = mult_uv_r(&up[ic2 + ig], vt1,
m_Nc);
835 double wt1i = mult_uv_i(&up[ic2 + ig], vt1,
m_Nc);
836 double wt2r = mult_uv_r(&up[ic2 + ig], vt2,
m_Nc);
837 double wt2i = mult_uv_i(&up[ic2 + ig], vt2,
m_Nc);
838 set_sp4_tp_dirac(&vp[2 * ic + iv], wt1r, wt1i, wt2r, wt2i,
m_Nc);
842 int ix1 = Nvc2 * ixyz;
844 for (
int ic = 0; ic <
m_Nc; ++ic) {
846 double wt1r = mult_uv_r(&up[ic2 + ig], &
vcp2_tp[ix1],
m_Nc);
847 double wt1i = mult_uv_i(&up[ic2 + ig], &
vcp2_tp[ix1],
m_Nc);
848 double wt2r = mult_uv_r(&up[ic2 + ig], &
vcp2_tp[ix2],
m_Nc);
849 double wt2i = mult_uv_i(&up[ic2 + ig], &
vcp2_tp[ix2],
m_Nc);
850 set_sp4_tp_dirac(&vp[2 * ic + iv], wt1r, wt1i, wt2r, wt2i,
m_Nc);
855 int nei = ixyz + Nxyz * (it - 1);
858 double vt1[
NVC], vt2[
NVC];
859 set_sp2_tm_dirac(vt1, vt2, &wp[in],
m_Nc);
860 for (
int ic = 0; ic <
m_Nc; ++ic) {
862 double wt1r = mult_udagv_r(&up[ic2 + ig], vt1,
m_Nc);
863 double wt1i = mult_udagv_i(&up[ic2 + ig], vt1,
m_Nc);
864 double wt2r = mult_udagv_r(&up[ic2 + ig], vt2,
m_Nc);
865 double wt2i = mult_udagv_i(&up[ic2 + ig], vt2,
m_Nc);
866 set_sp4_tm_dirac(&vp[2 * ic + iv], wt1r, wt1i, wt2r, wt2i,
m_Nc);
869 int ix1 = Nvc2 * ixyz;
872 for (
int ic = 0; ic <
m_Nc; ++ic) {
874 int ici = 2 * ic + 1;
875 double wt1r = bc2 *
vcp2_tm[icr + ix1];
876 double wt1i = bc2 *
vcp2_tm[ici + ix1];
877 double wt2r = bc2 *
vcp2_tm[icr + ix2];
878 double wt2i = bc2 *
vcp2_tm[ici + ix2];
879 set_sp4_tm_dirac(&vp[2 * ic + iv], wt1r, wt1i, wt2r, wt2i,
m_Nc);
883 for (
int ivcd = 0; ivcd < Nvcd; ++ivcd) {
884 vp[ivcd + iv] = -
m_kappa * vp[ivcd + iv] + wp[ivcd + iv];
894 const Field& w,
const int ex2)
897 double *vp = v.
ptr(Ninvol * ex1);
898 const double *wp = w.
ptr(Ninvol * ex2);
899 const double *up =
m_U->
ptr(0);
901 int ith, nth, is, ns;
902 set_threadtask(ith, nth, is, ns,
m_Nvol);
904 int Nvc2 =
m_Nvc * 2;
912 for (
int site = is; site < ns; ++site) {
913 int ix = site %
m_Nx;
914 int iyzt = site /
m_Nx;
915 int iy = iyzt %
m_Ny;
916 int izt = iyzt /
m_Ny;
920 int ixy = ix +
m_Nx * iy;
921 int ixyz = ixy + Nxy * iz;
922 int ixyt = ixy + Nxy * it;
923 int ixzt = ix +
m_Nx * izt;
925 int in = Nvcd * site;
928 int ix1 = Nvc2 * iyzt;
931 double vt1[
NVC], vt2[
NVC];
932 set_sp2_xp(vt1, vt2, &wp[in],
m_Nc);
933 for (
int ivc = 0; ivc <
NVC; ++ivc) {
934 vcp1_xp[ivc + ix1] = bc2 * vt1[ivc];
935 vcp1_xp[ivc + ix2] = bc2 * vt2[ivc];
939 if (ix ==
m_Nx - 1) {
941 int ix1 = Nvc2 * iyzt;
943 double vt1[
NVC], vt2[
NVC];
944 set_sp2_xm(vt1, vt2, &wp[in],
m_Nc);
945 for (
int ic = 0; ic <
m_Nc; ++ic) {
948 int ici = 2 * ic + 1;
949 vcp1_xm[icr + ix1] = mult_udagv_r(&up[ic2 + ig], vt1,
m_Nc);
950 vcp1_xm[ici + ix1] = mult_udagv_i(&up[ic2 + ig], vt1,
m_Nc);
951 vcp1_xm[icr + ix2] = mult_udagv_r(&up[ic2 + ig], vt2,
m_Nc);
952 vcp1_xm[ici + ix2] = mult_udagv_i(&up[ic2 + ig], vt2,
m_Nc);
957 int ix1 = Nvc2 * ixzt;
960 double vt1[
NVC], vt2[
NVC];
961 set_sp2_yp(vt1, vt2, &wp[in],
m_Nc);
962 for (
int ivc = 0; ivc <
NVC; ++ivc) {
963 vcp1_yp[ivc + ix1] = bc2 * vt1[ivc];
964 vcp1_yp[ivc + ix2] = bc2 * vt2[ivc];
968 if (iy ==
m_Ny - 1) {
970 int ix1 = Nvc2 * ixzt;
972 double vt1[
NVC], vt2[
NVC];
973 set_sp2_ym(vt1, vt2, &wp[in],
m_Nc);
974 for (
int ic = 0; ic <
m_Nc; ++ic) {
977 int ici = 2 * ic + 1;
978 vcp1_ym[icr + ix1] = mult_udagv_r(&up[ic2 + ig], vt1,
m_Nc);
979 vcp1_ym[ici + ix1] = mult_udagv_i(&up[ic2 + ig], vt1,
m_Nc);
980 vcp1_ym[icr + ix2] = mult_udagv_r(&up[ic2 + ig], vt2,
m_Nc);
981 vcp1_ym[ici + ix2] = mult_udagv_i(&up[ic2 + ig], vt2,
m_Nc);
986 int ix1 = Nvc2 * ixyt;
989 double vt1[
NVC], vt2[
NVC];
990 set_sp2_zp(vt1, vt2, &wp[in],
m_Nc);
991 for (
int ivc = 0; ivc <
NVC; ++ivc) {
992 vcp1_zp[ivc + ix1] = bc2 * vt1[ivc];
993 vcp1_zp[ivc + ix2] = bc2 * vt2[ivc];
997 if (iz ==
m_Nz - 1) {
999 int ix1 = Nvc2 * ixyt;
1000 int ix2 = ix1 +
NVC;
1001 double vt1[
NVC], vt2[
NVC];
1002 set_sp2_zm(vt1, vt2, &wp[in],
m_Nc);
1003 for (
int ic = 0; ic <
m_Nc; ++ic) {
1006 int ici = 2 * ic + 1;
1007 vcp1_zm[icr + ix1] = mult_udagv_r(&up[ic2 + ig], vt1,
m_Nc);
1008 vcp1_zm[ici + ix1] = mult_udagv_i(&up[ic2 + ig], vt1,
m_Nc);
1009 vcp1_zm[icr + ix2] = mult_udagv_r(&up[ic2 + ig], vt2,
m_Nc);
1010 vcp1_zm[ici + ix2] = mult_udagv_i(&up[ic2 + ig], vt2,
m_Nc);
1015 int ix1 = Nvc2 * ixyz;
1016 int ix2 = ix1 +
NVC;
1018 double vt1[
NVC], vt2[
NVC];
1019 set_sp2_tp_chiral(vt1, vt2, &wp[in],
m_Nc);
1020 for (
int ivc = 0; ivc <
NVC; ++ivc) {
1021 vcp1_tp[ivc + ix1] = bc2 * vt1[ivc];
1022 vcp1_tp[ivc + ix2] = bc2 * vt2[ivc];
1026 if (it ==
m_Nt - 1) {
1028 int ix1 = Nvc2 * ixyz;
1029 int ix2 = ix1 +
NVC;
1030 double vt1[
NVC], vt2[
NVC];
1031 set_sp2_tm_chiral(vt1, vt2, &wp[in],
m_Nc);
1032 for (
int ic = 0; ic <
m_Nc; ++ic) {
1035 int ici = 2 * ic + 1;
1036 vcp1_tm[icr + ix1] = mult_udagv_r(&up[ic2 + ig], vt1,
m_Nc);
1037 vcp1_tm[ici + ix1] = mult_udagv_i(&up[ic2 + ig], vt1,
m_Nc);
1038 vcp1_tm[icr + ix2] = mult_udagv_r(&up[ic2 + ig], vt2,
m_Nc);
1039 vcp1_tm[ici + ix2] = mult_udagv_i(&up[ic2 + ig], vt2,
m_Nc);
1066 for (
int site = is; site < ns; ++site) {
1067 int ix = site %
m_Nx;
1068 int iyzt = site /
m_Nx;
1069 int iy = iyzt %
m_Ny;
1070 int izt = iyzt /
m_Ny;
1071 int iz = izt %
m_Nz;
1072 int it = izt /
m_Nz;
1074 int ixy = site % Nxy;
1075 int ixyz = ixy + Nxy * iz;
1076 int ixyt = ixy + Nxy * it;
1077 int ixzt = ix +
m_Nx * izt;
1079 int iv = Nvcd * site;
1081 for (
int ivcd = 0; ivcd < Nvcd; ++ivcd) {
1082 vp[ivcd + iv] = 0.0;
1085 if (ix <
m_Nx - 1) {
1086 int nei = ix + 1 +
m_Nx * iyzt;
1087 int in = Nvcd * nei;
1089 double vt1[
NVC], vt2[
NVC];
1090 set_sp2_xp(vt1, vt2, &wp[in],
m_Nc);
1091 for (
int ic = 0; ic <
m_Nc; ++ic) {
1093 double wt1r = mult_uv_r(&up[ic2 + ig], vt1,
m_Nc);
1094 double wt1i = mult_uv_i(&up[ic2 + ig], vt1,
m_Nc);
1095 double wt2r = mult_uv_r(&up[ic2 + ig], vt2,
m_Nc);
1096 double wt2i = mult_uv_i(&up[ic2 + ig], vt2,
m_Nc);
1097 set_sp4_xp(&vp[2 * ic + iv], wt1r, wt1i, wt2r, wt2i,
m_Nc);
1100 int ix1 = Nvc2 * iyzt;
1101 int ix2 = ix1 +
NVC;
1103 for (
int ic = 0; ic <
m_Nc; ++ic) {
1105 double wt1r = mult_uv_r(&up[ic2 + ig], &
vcp2_xp[ix1],
m_Nc);
1106 double wt1i = mult_uv_i(&up[ic2 + ig], &
vcp2_xp[ix1],
m_Nc);
1107 double wt2r = mult_uv_r(&up[ic2 + ig], &
vcp2_xp[ix2],
m_Nc);
1108 double wt2i = mult_uv_i(&up[ic2 + ig], &
vcp2_xp[ix2],
m_Nc);
1109 set_sp4_xp(&vp[2 * ic + iv], wt1r, wt1i, wt2r, wt2i,
m_Nc);
1114 int nei = ix - 1 +
m_Nx * iyzt;
1116 int in = Nvcd * nei;
1117 double vt1[
NVC], vt2[
NVC];
1118 set_sp2_xm(vt1, vt2, &wp[in],
m_Nc);
1119 for (
int ic = 0; ic <
m_Nc; ++ic) {
1121 double wt1r = mult_udagv_r(&up[ic2 + ig], vt1,
m_Nc);
1122 double wt1i = mult_udagv_i(&up[ic2 + ig], vt1,
m_Nc);
1123 double wt2r = mult_udagv_r(&up[ic2 + ig], vt2,
m_Nc);
1124 double wt2i = mult_udagv_i(&up[ic2 + ig], vt2,
m_Nc);
1125 set_sp4_xm(&vp[2 * ic + iv], wt1r, wt1i, wt2r, wt2i,
m_Nc);
1128 int ix1 = Nvc2 * iyzt;
1129 int ix2 = ix1 +
NVC;
1131 for (
int ic = 0; ic <
m_Nc; ++ic) {
1132 double wt1r = bc2 *
vcp2_xm[2 * ic + ix1];
1133 double wt1i = bc2 *
vcp2_xm[2 * ic + 1 + ix1];
1134 double wt2r = bc2 *
vcp2_xm[2 * ic + ix2];
1135 double wt2i = bc2 *
vcp2_xm[2 * ic + 1 + ix2];
1136 set_sp4_xm(&vp[2 * ic + iv], wt1r, wt1i, wt2r, wt2i,
m_Nc);
1140 if (iy <
m_Ny - 1) {
1141 int nei = ix +
m_Nx * (iy + 1 +
m_Ny * izt);
1143 int in = Nvcd * nei;
1144 double vt1[
NVC], vt2[
NVC];
1145 set_sp2_yp(vt1, vt2, &wp[in],
m_Nc);
1146 for (
int ic = 0; ic <
m_Nc; ++ic) {
1148 double wt1r = mult_uv_r(&up[ic2 + ig], vt1,
m_Nc);
1149 double wt1i = mult_uv_i(&up[ic2 + ig], vt1,
m_Nc);
1150 double wt2r = mult_uv_r(&up[ic2 + ig], vt2,
m_Nc);
1151 double wt2i = mult_uv_i(&up[ic2 + ig], vt2,
m_Nc);
1152 set_sp4_yp(&vp[2 * ic + iv], wt1r, wt1i, wt2r, wt2i,
m_Nc);
1156 int ix1 = Nvc2 * ixzt;
1157 int ix2 = ix1 +
NVC;
1158 for (
int ic = 0; ic <
m_Nc; ++ic) {
1160 double wt1r = mult_uv_r(&up[ic2 + ig], &
vcp2_yp[ix1],
m_Nc);
1161 double wt1i = mult_uv_i(&up[ic2 + ig], &
vcp2_yp[ix1],
m_Nc);
1162 double wt2r = mult_uv_r(&up[ic2 + ig], &
vcp2_yp[ix2],
m_Nc);
1163 double wt2i = mult_uv_i(&up[ic2 + ig], &
vcp2_yp[ix2],
m_Nc);
1164 set_sp4_yp(&vp[2 * ic + iv], wt1r, wt1i, wt2r, wt2i,
m_Nc);
1169 int nei = ix +
m_Nx * (iy - 1 +
m_Ny * izt);
1171 int in = Nvcd * nei;
1172 double vt1[
NVC], vt2[
NVC];
1173 set_sp2_ym(vt1, vt2, &wp[in],
m_Nc);
1174 for (
int ic = 0; ic <
m_Nc; ++ic) {
1176 double wt1r = mult_udagv_r(&up[ic2 + ig], vt1,
m_Nc);
1177 double wt1i = mult_udagv_i(&up[ic2 + ig], vt1,
m_Nc);
1178 double wt2r = mult_udagv_r(&up[ic2 + ig], vt2,
m_Nc);
1179 double wt2i = mult_udagv_i(&up[ic2 + ig], vt2,
m_Nc);
1180 set_sp4_ym(&vp[2 * ic + iv], wt1r, wt1i, wt2r, wt2i,
m_Nc);
1183 int ix1 = Nvc2 * ixzt;
1184 int ix2 = ix1 +
NVC;
1186 for (
int ic = 0; ic <
m_Nc; ++ic) {
1187 double wt1r = bc2 *
vcp2_ym[2 * ic + ix1];
1188 double wt1i = bc2 *
vcp2_ym[2 * ic + 1 + ix1];
1189 double wt2r = bc2 *
vcp2_ym[2 * ic + ix2];
1190 double wt2i = bc2 *
vcp2_ym[2 * ic + 1 + ix2];
1191 set_sp4_ym(&vp[2 * ic + iv], wt1r, wt1i, wt2r, wt2i,
m_Nc);
1195 if (iz <
m_Nz - 1) {
1196 int nei = ixy + Nxy * (iz + 1 +
m_Nz * it);
1198 int in = Nvcd * nei;
1199 double vt1[
NVC], vt2[
NVC];
1200 set_sp2_zp(vt1, vt2, &wp[in],
m_Nc);
1201 for (
int ic = 0; ic <
m_Nc; ++ic) {
1203 double wt1r = mult_uv_r(&up[ic2 + ig], vt1,
m_Nc);
1204 double wt1i = mult_uv_i(&up[ic2 + ig], vt1,
m_Nc);
1205 double wt2r = mult_uv_r(&up[ic2 + ig], vt2,
m_Nc);
1206 double wt2i = mult_uv_i(&up[ic2 + ig], vt2,
m_Nc);
1207 set_sp4_zp(&vp[2 * ic + iv], wt1r, wt1i, wt2r, wt2i,
m_Nc);
1211 int ix1 = Nvc2 * ixyt;
1212 int ix2 = ix1 +
NVC;
1213 for (
int ic = 0; ic <
m_Nc; ++ic) {
1215 double wt1r = mult_uv_r(&up[ic2 + ig], &
vcp2_zp[ix1],
m_Nc);
1216 double wt1i = mult_uv_i(&up[ic2 + ig], &
vcp2_zp[ix1],
m_Nc);
1217 double wt2r = mult_uv_r(&up[ic2 + ig], &
vcp2_zp[ix2],
m_Nc);
1218 double wt2i = mult_uv_i(&up[ic2 + ig], &
vcp2_zp[ix2],
m_Nc);
1219 set_sp4_zp(&vp[2 * ic + iv], wt1r, wt1i, wt2r, wt2i,
m_Nc);
1224 int nei = ixy + Nxy * (iz - 1 +
m_Nz * it);
1226 int in = Nvcd * nei;
1227 double vt1[
NVC], vt2[
NVC];
1228 set_sp2_zm(vt1, vt2, &wp[in],
m_Nc);
1229 for (
int ic = 0; ic <
m_Nc; ++ic) {
1231 double wt1r = mult_udagv_r(&up[ic2 + ig], vt1,
m_Nc);
1232 double wt1i = mult_udagv_i(&up[ic2 + ig], vt1,
m_Nc);
1233 double wt2r = mult_udagv_r(&up[ic2 + ig], vt2,
m_Nc);
1234 double wt2i = mult_udagv_i(&up[ic2 + ig], vt2,
m_Nc);
1235 set_sp4_zm(&vp[2 * ic + iv], wt1r, wt1i, wt2r, wt2i,
m_Nc);
1238 int ix1 = Nvc2 * ixyt;
1239 int ix2 = ix1 +
NVC;
1241 for (
int ic = 0; ic <
m_Nc; ++ic) {
1242 double wt1r = bc2 *
vcp2_zm[2 * ic + ix1];
1243 double wt1i = bc2 *
vcp2_zm[2 * ic + 1 + ix1];
1244 double wt2r = bc2 *
vcp2_zm[2 * ic + ix2];
1245 double wt2i = bc2 *
vcp2_zm[2 * ic + 1 + ix2];
1246 set_sp4_zm(&vp[2 * ic + iv], wt1r, wt1i, wt2r, wt2i,
m_Nc);
1250 if (it <
m_Nt - 1) {
1251 int nei = ixyz + Nxyz * (it + 1);
1253 int in = Nvcd * nei;
1254 double vt1[
NVC], vt2[
NVC];
1255 set_sp2_tp_chiral(vt1, vt2, &wp[in],
m_Nc);
1256 for (
int ic = 0; ic <
m_Nc; ++ic) {
1258 double wt1r = mult_uv_r(&up[ic2 + ig], vt1,
m_Nc);
1259 double wt1i = mult_uv_i(&up[ic2 + ig], vt1,
m_Nc);
1260 double wt2r = mult_uv_r(&up[ic2 + ig], vt2,
m_Nc);
1261 double wt2i = mult_uv_i(&up[ic2 + ig], vt2,
m_Nc);
1262 set_sp4_tp_chiral(&vp[2 * ic + iv], wt1r, wt1i, wt2r, wt2i,
m_Nc);
1266 int ix1 = Nvc2 * ixyz;
1267 int ix2 = ix1 +
NVC;
1268 for (
int ic = 0; ic <
m_Nc; ++ic) {
1270 double wt1r = mult_uv_r(&up[ic2 + ig], &
vcp2_tp[ix1],
m_Nc);
1271 double wt1i = mult_uv_i(&up[ic2 + ig], &
vcp2_tp[ix1],
m_Nc);
1272 double wt2r = mult_uv_r(&up[ic2 + ig], &
vcp2_tp[ix2],
m_Nc);
1273 double wt2i = mult_uv_i(&up[ic2 + ig], &
vcp2_tp[ix2],
m_Nc);
1274 set_sp4_tp_chiral(&vp[2 * ic + iv], wt1r, wt1i, wt2r, wt2i,
m_Nc);
1279 int nei = ixyz + Nxyz * (it - 1);
1281 int in = Nvcd * nei;
1282 double vt1[
NVC], vt2[
NVC];
1283 set_sp2_tm_chiral(vt1, vt2, &wp[in],
m_Nc);
1284 for (
int ic = 0; ic <
m_Nc; ++ic) {
1286 double wt1r = mult_udagv_r(&up[ic2 + ig], vt1,
m_Nc);
1287 double wt1i = mult_udagv_i(&up[ic2 + ig], vt1,
m_Nc);
1288 double wt2r = mult_udagv_r(&up[ic2 + ig], vt2,
m_Nc);
1289 double wt2i = mult_udagv_i(&up[ic2 + ig], vt2,
m_Nc);
1290 set_sp4_tm_chiral(&vp[2 * ic + iv], wt1r, wt1i, wt2r, wt2i,
m_Nc);
1293 int ix1 = Nvc2 * ixyz;
1294 int ix2 = ix1 +
NVC;
1296 for (
int ic = 0; ic <
m_Nc; ++ic) {
1298 int ici = 2 * ic + 1;
1299 double wt1r = bc2 *
vcp2_tm[icr + ix1];
1300 double wt1i = bc2 *
vcp2_tm[ici + ix1];
1301 double wt2r = bc2 *
vcp2_tm[icr + ix2];
1302 double wt2i = bc2 *
vcp2_tm[ici + ix2];
1303 set_sp4_tm_chiral(&vp[2 * ic + iv], wt1r, wt1i, wt2r, wt2i,
m_Nc);
1307 for (
int ivcd = 0; ivcd < Nvcd; ++ivcd) {
1308 vp[ivcd + iv] = -
m_kappa * vp[ivcd + iv] + wp[ivcd + iv];
1318 const Field& f,
const int ex2)
1335 const Field& f,
const int ex2)
1361 const Field& v,
const int ex2,
const int ipm)
1367 }
else if (ipm == -1) {
1386 double *wp = w.
ptr(0);
1388 int ith, nth, is, ns;
1389 set_threadtask(ith, nth, is, ns,
m_Nvol);
1395 for (
int site = is; site < ns; ++site) {
1396 for (
int ivcd = 0; ivcd < Nvcd; ++ivcd) {
1397 wp[ivcd + Nvcd * site] = 0.0;
1407 const double fac,
const Field& w)
1409 double *vp = v.
ptr(0);
1410 const double *wp = w.
ptr(0);
1412 int ith, nth, is, ns;
1413 set_threadtask(ith, nth, is, ns,
m_Nvol);
1419 for (
int site = is; site < ns; ++site) {
1420 for (
int ivcd = 0; ivcd < Nvcd; ++ivcd) {
1421 vp[ivcd + Nvcd * site]
1422 = fac * vp[ivcd + Nvcd * site] + wp[ivcd + Nvcd * site];
1433 double *vp = v.
ptr(0);
1434 const double *wp = w.
ptr(0);
1436 int ith, nth, is, ns;
1437 set_threadtask(ith, nth, is, ns,
m_Nvol);
1443 for (
int site = is; site < ns; ++site) {
1444 mult_gamma5_dirac(&vp[Nvcd * site], &wp[Nvcd * site],
m_Nc);
1454 double *vp = v.
ptr(0);
1455 const double *wp = w.
ptr(0);
1457 int ith, nth, is, ns;
1458 set_threadtask(ith, nth, is, ns,
m_Nvol);
1464 for (
int site = is; site < ns; ++site) {
1465 mult_gamma5_chiral(&vp[Nvcd * site], &wp[Nvcd * site],
m_Nc);
1477 int Nvc2 =
m_Nvc * 2;
1482 double *vp = v.
ptr(0);
1483 const double *wp = w.
ptr(0);
1486 int ith, nth, is, ns;
1487 set_threadtask(ith, nth, is, ns,
m_Nvol);
1491 for (
int site = is; site < ns; ++site) {
1492 int ix = site %
m_Nx;
1493 int iyzt = site /
m_Nx;
1495 int in = Nvcd * site;
1496 int ix1 = Nvc2 * iyzt;
1497 int ix2 = ix1 +
NVC;
1498 double vt1[
NVC], vt2[
NVC];
1499 set_sp2_xp(vt1, vt2, &wp[in],
m_Nc);
1500 for (
int ivc = 0; ivc <
NVC; ++ivc) {
1501 vcp1_xp[ivc + ix1] = bc2 * vt1[ivc];
1502 vcp1_xp[ivc + ix2] = bc2 * vt2[ivc];
1516 for (
int site = is; site < ns; ++site) {
1517 int ix = site %
m_Nx;
1518 int iyzt = site /
m_Nx;
1519 int nei = ix + 1 +
m_Nx * iyzt;
1520 int iv = Nvcd * site;
1521 int ig =
m_Ndf * site;
1523 if (ix <
m_Nx - 1) {
1524 int in = Nvcd * nei;
1525 double vt1[
NVC], vt2[
NVC];
1526 set_sp2_xp(vt1, vt2, &wp[in],
m_Nc);
1527 for (
int ic = 0; ic <
m_Nc; ++ic) {
1529 double wt1r = mult_uv_r(&up[ic2 + ig], vt1,
m_Nc);
1530 double wt1i = mult_uv_i(&up[ic2 + ig], vt1,
m_Nc);
1531 double wt2r = mult_uv_r(&up[ic2 + ig], vt2,
m_Nc);
1532 double wt2i = mult_uv_i(&up[ic2 + ig], vt2,
m_Nc);
1533 set_sp4_xp(&vp[2 * ic + iv], wt1r, wt1i, wt2r, wt2i,
m_Nc);
1536 int ix1 = Nvc2 * iyzt;
1537 int ix2 = ix1 +
NVC;
1538 for (
int ic = 0; ic <
m_Nc; ++ic) {
1540 double wt1r = mult_uv_r(&up[ic2 + ig], &
vcp2_xp[ix1],
m_Nc);
1541 double wt1i = mult_uv_i(&up[ic2 + ig], &
vcp2_xp[ix1],
m_Nc);
1542 double wt2r = mult_uv_r(&up[ic2 + ig], &
vcp2_xp[ix2],
m_Nc);
1543 double wt2i = mult_uv_i(&up[ic2 + ig], &
vcp2_xp[ix2],
m_Nc);
1544 set_sp4_xp(&vp[2 * ic + iv], wt1r, wt1i, wt2r, wt2i,
m_Nc);
1558 int Nvc2 =
m_Nvc * 2;
1563 double *vp = v.
ptr(0);
1564 const double *wp = w.
ptr(0);
1567 int ith, nth, is, ns;
1568 set_threadtask(ith, nth, is, ns,
m_Nvol);
1572 for (
int site = is; site < ns; ++site) {
1573 int ix = site %
m_Nx;
1574 int iyzt = site /
m_Nx;
1575 if (ix ==
m_Nx - 1) {
1576 int in = Nvcd * site;
1577 int ig =
m_Ndf * site;
1578 int ix1 = Nvc2 * iyzt;
1579 int ix2 = ix1 +
NVC;
1581 double vt1[
NVC], vt2[
NVC];
1582 set_sp2_xm(vt1, vt2, &wp[in],
m_Nc);
1583 for (
int ic = 0; ic <
m_Nc; ++ic) {
1586 int ici = 2 * ic + 1;
1587 vcp1_xm[icr + ix1] = mult_udagv_r(&up[ic2 + ig], vt1,
m_Nc);
1588 vcp1_xm[ici + ix1] = mult_udagv_i(&up[ic2 + ig], vt1,
m_Nc);
1589 vcp1_xm[icr + ix2] = mult_udagv_r(&up[ic2 + ig], vt2,
m_Nc);
1590 vcp1_xm[ici + ix2] = mult_udagv_i(&up[ic2 + ig], vt2,
m_Nc);
1604 for (
int site = is; site < ns; ++site) {
1605 int ix = site %
m_Nx;
1606 int iyzt = site /
m_Nx;
1607 int nei = ix - 1 +
m_Nx * iyzt;
1608 int iv = Nvcd * site;
1611 int ig =
m_Ndf * nei;
1612 int in = Nvcd * nei;
1613 double vt1[
NVC], vt2[
NVC];
1614 set_sp2_xm(vt1, vt2, &wp[in],
m_Nc);
1615 for (
int ic = 0; ic <
m_Nc; ++ic) {
1617 double wt1r = mult_udagv_r(&up[ic2 + ig], vt1,
m_Nc);
1618 double wt1i = mult_udagv_i(&up[ic2 + ig], vt1,
m_Nc);
1619 double wt2r = mult_udagv_r(&up[ic2 + ig], vt2,
m_Nc);
1620 double wt2i = mult_udagv_i(&up[ic2 + ig], vt2,
m_Nc);
1621 set_sp4_xm(&vp[2 * ic + iv], wt1r, wt1i, wt2r, wt2i,
m_Nc);
1624 int ix1 = Nvc2 * iyzt;
1625 int ix2 = ix1 +
NVC;
1626 for (
int ic = 0; ic <
m_Nc; ++ic) {
1627 double wt1r = bc2 *
vcp2_xm[2 * ic + ix1];
1628 double wt1i = bc2 *
vcp2_xm[2 * ic + 1 + ix1];
1629 double wt2r = bc2 *
vcp2_xm[2 * ic + ix2];
1630 double wt2i = bc2 *
vcp2_xm[2 * ic + 1 + ix2];
1631 set_sp4_xm(&vp[2 * ic + iv], wt1r, wt1i, wt2r, wt2i,
m_Nc);
1645 int Nvc2 =
m_Nvc * 2;
1650 double *vp = v.
ptr(0);
1651 const double *wp = w.
ptr(0);
1654 int ith, nth, is, ns;
1655 set_threadtask(ith, nth, is, ns,
m_Nvol);
1659 for (
int site = is; site < ns; ++site) {
1660 int ix = site %
m_Nx;
1661 int iyzt = site /
m_Nx;
1662 int iy = iyzt %
m_Ny;
1663 int izt = iyzt /
m_Ny;
1664 int ixzt = ix +
m_Nx * izt;
1666 int in = Nvcd * site;
1667 int ix1 = Nvc2 * ixzt;
1668 int ix2 = ix1 +
NVC;
1669 double vt1[
NVC], vt2[
NVC];
1670 set_sp2_yp(vt1, vt2, &wp[in],
m_Nc);
1671 for (
int ivc = 0; ivc <
NVC; ++ivc) {
1672 vcp1_yp[ivc + ix1] = bc2 * vt1[ivc];
1673 vcp1_yp[ivc + ix2] = bc2 * vt2[ivc];
1687 for (
int site = is; site < ns; ++site) {
1688 int ix = site %
m_Nx;
1689 int iyzt = site /
m_Nx;
1690 int iy = iyzt %
m_Ny;
1691 int izt = iyzt /
m_Ny;
1692 int ixzt = ix +
m_Nx * izt;
1693 int nei = ix +
m_Nx * (iy + 1 +
m_Ny * izt);
1694 int iv = Nvcd * site;
1695 int ig =
m_Ndf * site;
1697 if (iy <
m_Ny - 1) {
1698 int in = Nvcd * nei;
1699 double vt1[
NVC], vt2[
NVC];
1700 set_sp2_yp(vt1, vt2, &wp[in],
m_Nc);
1701 for (
int ic = 0; ic <
m_Nc; ++ic) {
1703 double wt1r = mult_uv_r(&up[ic2 + ig], vt1,
m_Nc);
1704 double wt1i = mult_uv_i(&up[ic2 + ig], vt1,
m_Nc);
1705 double wt2r = mult_uv_r(&up[ic2 + ig], vt2,
m_Nc);
1706 double wt2i = mult_uv_i(&up[ic2 + ig], vt2,
m_Nc);
1707 set_sp4_yp(&vp[2 * ic + iv], wt1r, wt1i, wt2r, wt2i,
m_Nc);
1710 int ix1 = Nvc2 * ixzt;
1711 int ix2 = ix1 +
NVC;
1712 for (
int ic = 0; ic <
m_Nc; ++ic) {
1714 double wt1r = mult_uv_r(&up[ic2 + ig], &
vcp2_yp[ix1],
m_Nc);
1715 double wt1i = mult_uv_i(&up[ic2 + ig], &
vcp2_yp[ix1],
m_Nc);
1716 double wt2r = mult_uv_r(&up[ic2 + ig], &
vcp2_yp[ix2],
m_Nc);
1717 double wt2i = mult_uv_i(&up[ic2 + ig], &
vcp2_yp[ix2],
m_Nc);
1718 set_sp4_yp(&vp[2 * ic + iv], wt1r, wt1i, wt2r, wt2i,
m_Nc);
1732 int Nvc2 =
m_Nvc * 2;
1737 double *vp = v.
ptr(0);
1738 const double *wp = w.
ptr(0);
1741 int ith, nth, is, ns;
1742 set_threadtask(ith, nth, is, ns,
m_Nvol);
1746 for (
int site = is; site < ns; ++site) {
1747 int ix = site %
m_Nx;
1748 int iyzt = site /
m_Nx;
1749 int iy = iyzt %
m_Ny;
1750 int izt = iyzt /
m_Ny;
1751 int ixzt = ix +
m_Nx * izt;
1752 if (iy ==
m_Ny - 1) {
1753 int in = Nvcd * site;
1754 int ig =
m_Ndf * site;
1755 int ix1 = Nvc2 * ixzt;
1756 int ix2 = ix1 +
NVC;
1758 double vt1[
NVC], vt2[
NVC];
1759 set_sp2_ym(vt1, vt2, &wp[in],
m_Nc);
1760 for (
int ic = 0; ic <
m_Nc; ++ic) {
1763 int ici = 2 * ic + 1;
1764 vcp1_ym[icr + ix1] = mult_udagv_r(&up[ic2 + ig], vt1,
m_Nc);
1765 vcp1_ym[ici + ix1] = mult_udagv_i(&up[ic2 + ig], vt1,
m_Nc);
1766 vcp1_ym[icr + ix2] = mult_udagv_r(&up[ic2 + ig], vt2,
m_Nc);
1767 vcp1_ym[ici + ix2] = mult_udagv_i(&up[ic2 + ig], vt2,
m_Nc);
1781 for (
int site = is; site < ns; ++site) {
1782 int ix = site %
m_Nx;
1783 int iyzt = site /
m_Nx;
1784 int iy = iyzt %
m_Ny;
1785 int izt = iyzt /
m_Ny;
1786 int ixzt = ix +
m_Nx * izt;
1787 int nei = ix +
m_Nx * (iy - 1 +
m_Ny * izt);
1788 int iv = Nvcd * site;
1791 int ig =
m_Ndf * nei;
1792 int in = Nvcd * nei;
1793 double vt1[
NVC], vt2[
NVC];
1794 set_sp2_ym(vt1, vt2, &wp[in],
m_Nc);
1795 for (
int ic = 0; ic <
m_Nc; ++ic) {
1797 double wt1r = mult_udagv_r(&up[ic2 + ig], vt1,
m_Nc);
1798 double wt1i = mult_udagv_i(&up[ic2 + ig], vt1,
m_Nc);
1799 double wt2r = mult_udagv_r(&up[ic2 + ig], vt2,
m_Nc);
1800 double wt2i = mult_udagv_i(&up[ic2 + ig], vt2,
m_Nc);
1801 set_sp4_ym(&vp[2 * ic + iv], wt1r, wt1i, wt2r, wt2i,
m_Nc);
1804 int ix1 = Nvc2 * ixzt;
1805 int ix2 = ix1 +
NVC;
1806 for (
int ic = 0; ic <
m_Nc; ++ic) {
1807 double wt1r = bc2 *
vcp2_ym[2 * ic + ix1];
1808 double wt1i = bc2 *
vcp2_ym[2 * ic + 1 + ix1];
1809 double wt2r = bc2 *
vcp2_ym[2 * ic + ix2];
1810 double wt2i = bc2 *
vcp2_ym[2 * ic + 1 + ix2];
1811 set_sp4_ym(&vp[2 * ic + iv], wt1r, wt1i, wt2r, wt2i,
m_Nc);
1825 int Nvc2 =
m_Nvc * 2;
1830 double *vp = v.
ptr(0);
1831 const double *wp = w.
ptr(0);
1834 int ith, nth, is, ns;
1835 set_threadtask(ith, nth, is, ns,
m_Nvol);
1841 for (
int site = is; site < ns; ++site) {
1842 int ixy = site % Nxy;
1843 int izt = site / Nxy;
1844 int iz = izt %
m_Nz;
1845 int it = izt /
m_Nz;
1846 int ixyt = ixy + Nxy * it;
1848 int in = Nvcd * site;
1849 int ix1 = Nvc2 * ixyt;
1850 int ix2 = ix1 +
NVC;
1851 double vt1[
NVC], vt2[
NVC];
1852 set_sp2_zp(vt1, vt2, &wp[in],
m_Nc);
1853 for (
int ivc = 0; ivc <
NVC; ++ivc) {
1854 vcp1_zp[ivc + ix1] = bc2 * vt1[ivc];
1855 vcp1_zp[ivc + ix2] = bc2 * vt2[ivc];
1869 for (
int site = is; site < ns; ++site) {
1870 int ixy = site % Nxy;
1871 int izt = site / Nxy;
1872 int iz = izt %
m_Nz;
1873 int it = izt /
m_Nz;
1874 int ixyt = ixy + Nxy * it;
1875 int nei = ixy + Nxy * (iz + 1 +
m_Nz * it);
1876 int iv = Nvcd * site;
1877 int ig =
m_Ndf * site;
1879 if (iz <
m_Nz - 1) {
1880 int in = Nvcd * nei;
1881 double vt1[
NVC], vt2[
NVC];
1882 set_sp2_zp(vt1, vt2, &wp[in],
m_Nc);
1883 for (
int ic = 0; ic <
m_Nc; ++ic) {
1885 double wt1r = mult_uv_r(&up[ic2 + ig], vt1,
m_Nc);
1886 double wt1i = mult_uv_i(&up[ic2 + ig], vt1,
m_Nc);
1887 double wt2r = mult_uv_r(&up[ic2 + ig], vt2,
m_Nc);
1888 double wt2i = mult_uv_i(&up[ic2 + ig], vt2,
m_Nc);
1889 set_sp4_zp(&vp[2 * ic + iv], wt1r, wt1i, wt2r, wt2i,
m_Nc);
1892 int ix1 = Nvc2 * ixyt;
1893 int ix2 = ix1 +
NVC;
1894 for (
int ic = 0; ic <
m_Nc; ++ic) {
1896 double wt1r = mult_uv_r(&up[ic2 + ig], &
vcp2_zp[ix1],
m_Nc);
1897 double wt1i = mult_uv_i(&up[ic2 + ig], &
vcp2_zp[ix1],
m_Nc);
1898 double wt2r = mult_uv_r(&up[ic2 + ig], &
vcp2_zp[ix2],
m_Nc);
1899 double wt2i = mult_uv_i(&up[ic2 + ig], &
vcp2_zp[ix2],
m_Nc);
1900 set_sp4_zp(&vp[2 * ic + iv], wt1r, wt1i, wt2r, wt2i,
m_Nc);
1914 int Nvc2 =
m_Nvc * 2;
1919 double *vp = v.
ptr(0);
1920 const double *wp = w.
ptr(0);
1923 int ith, nth, is, ns;
1924 set_threadtask(ith, nth, is, ns,
m_Nvol);
1930 for (
int site = is; site < ns; ++site) {
1931 int ixy = site % Nxy;
1932 int izt = site / Nxy;
1933 int iz = izt %
m_Nz;
1934 int it = izt /
m_Nz;
1935 int ixyt = ixy + Nxy * it;
1936 if (iz ==
m_Nz - 1) {
1937 int in = Nvcd * site;
1938 int ig =
m_Ndf * site;
1939 int ix1 = Nvc2 * ixyt;
1940 int ix2 = ix1 +
NVC;
1942 double vt1[
NVC], vt2[
NVC];
1943 set_sp2_zm(vt1, vt2, &wp[in],
m_Nc);
1944 for (
int ic = 0; ic <
m_Nc; ++ic) {
1947 int ici = 2 * ic + 1;
1948 vcp1_zm[icr + ix1] = mult_udagv_r(&up[ic2 + ig], vt1,
m_Nc);
1949 vcp1_zm[ici + ix1] = mult_udagv_i(&up[ic2 + ig], vt1,
m_Nc);
1950 vcp1_zm[icr + ix2] = mult_udagv_r(&up[ic2 + ig], vt2,
m_Nc);
1951 vcp1_zm[ici + ix2] = mult_udagv_i(&up[ic2 + ig], vt2,
m_Nc);
1965 for (
int site = is; site < ns; ++site) {
1966 int ixy = site % Nxy;
1967 int izt = site / Nxy;
1968 int iz = izt %
m_Nz;
1969 int it = izt /
m_Nz;
1970 int ixyt = ixy + Nxy * it;
1971 int nei = ixy + Nxy * (iz - 1 +
m_Nz * it);
1972 int iv = Nvcd * site;
1975 int ig =
m_Ndf * nei;
1976 int in = Nvcd * nei;
1977 double vt1[
NVC], vt2[
NVC];
1978 set_sp2_zm(vt1, vt2, &wp[in],
m_Nc);
1979 for (
int ic = 0; ic <
m_Nc; ++ic) {
1981 double wt1r = mult_udagv_r(&up[ic2 + ig], vt1,
m_Nc);
1982 double wt1i = mult_udagv_i(&up[ic2 + ig], vt1,
m_Nc);
1983 double wt2r = mult_udagv_r(&up[ic2 + ig], vt2,
m_Nc);
1984 double wt2i = mult_udagv_i(&up[ic2 + ig], vt2,
m_Nc);
1985 set_sp4_zm(&vp[2 * ic + iv], wt1r, wt1i, wt2r, wt2i,
m_Nc);
1988 int ix1 = Nvc2 * ixyt;
1989 int ix2 = ix1 +
NVC;
1990 for (
int ic = 0; ic <
m_Nc; ++ic) {
1991 double wt1r = bc2 *
vcp2_zm[2 * ic + ix1];
1992 double wt1i = bc2 *
vcp2_zm[2 * ic + 1 + ix1];
1993 double wt2r = bc2 *
vcp2_zm[2 * ic + ix2];
1994 double wt2i = bc2 *
vcp2_zm[2 * ic + 1 + ix2];
1995 set_sp4_zm(&vp[2 * ic + iv], wt1r, wt1i, wt2r, wt2i,
m_Nc);
2009 int Nvc2 =
m_Nvc * 2;
2014 double *vp = v.
ptr(0);
2015 const double *wp = w.
ptr(0);
2018 int ith, nth, is, ns;
2019 set_threadtask(ith, nth, is, ns,
m_Nvol);
2025 for (
int site = is; site < ns; ++site) {
2026 int ixyz = site % Nxyz;
2027 int it = site / Nxyz;
2029 int in = Nvcd * site;
2030 int ix1 = Nvc2 * ixyz;
2031 int ix2 = ix1 +
NVC;
2032 double vt1[
NVC], vt2[
NVC];
2033 set_sp2_tp_dirac(vt1, vt2, &wp[in],
m_Nc);
2034 for (
int ivc = 0; ivc <
NVC; ++ivc) {
2035 vcp1_tp[ivc + ix1] = bc2 * vt1[ivc];
2036 vcp1_tp[ivc + ix2] = bc2 * vt2[ivc];
2050 for (
int site = is; site < ns; ++site) {
2051 int ixyz = site % Nxyz;
2052 int it = site / Nxyz;
2053 int nei = ixyz + Nxyz * (it + 1);
2054 int iv = Nvcd * site;
2055 int ig =
m_Ndf * site;
2057 if (it <
m_Nt - 1) {
2058 int in = Nvcd * nei;
2059 double vt1[
NVC], vt2[
NVC];
2060 set_sp2_tp_dirac(vt1, vt2, &wp[in],
m_Nc);
2061 for (
int ic = 0; ic <
m_Nc; ++ic) {
2063 double wt1r = mult_uv_r(&up[ic2 + ig], vt1,
m_Nc);
2064 double wt1i = mult_uv_i(&up[ic2 + ig], vt1,
m_Nc);
2065 double wt2r = mult_uv_r(&up[ic2 + ig], vt2,
m_Nc);
2066 double wt2i = mult_uv_i(&up[ic2 + ig], vt2,
m_Nc);
2067 set_sp4_tp_dirac(&vp[2 * ic + iv], wt1r, wt1i, wt2r, wt2i,
m_Nc);
2070 int ix1 = Nvc2 * ixyz;
2071 int ix2 = ix1 +
NVC;
2072 for (
int ic = 0; ic <
m_Nc; ++ic) {
2074 double wt1r = mult_uv_r(&up[ic2 + ig], &
vcp2_tp[ix1],
m_Nc);
2075 double wt1i = mult_uv_i(&up[ic2 + ig], &
vcp2_tp[ix1],
m_Nc);
2076 double wt2r = mult_uv_r(&up[ic2 + ig], &
vcp2_tp[ix2],
m_Nc);
2077 double wt2i = mult_uv_i(&up[ic2 + ig], &
vcp2_tp[ix2],
m_Nc);
2078 set_sp4_tp_dirac(&vp[2 * ic + iv], wt1r, wt1i, wt2r, wt2i,
m_Nc);
2092 int Nvc2 =
m_Nvc * 2;
2097 double *vp = v.
ptr(0);
2098 const double *wp = w.
ptr(0);
2101 int ith, nth, is, ns;
2102 set_threadtask(ith, nth, is, ns,
m_Nvol);
2108 for (
int site = is; site < ns; ++site) {
2109 int ixyz = site % Nxyz;
2110 int it = site / Nxyz;
2111 if (it ==
m_Nt - 1) {
2112 int in = Nvcd * site;
2113 int ig =
m_Ndf * site;
2114 int ix1 = Nvc2 * ixyz;
2115 int ix2 = ix1 +
NVC;
2117 double vt1[
NVC], vt2[
NVC];
2118 set_sp2_tm_dirac(vt1, vt2, &wp[in],
m_Nc);
2119 for (
int ic = 0; ic <
m_Nc; ++ic) {
2122 int ici = 2 * ic + 1;
2123 vcp1_tm[icr + ix1] = mult_udagv_r(&up[ic2 + ig], vt1,
m_Nc);
2124 vcp1_tm[ici + ix1] = mult_udagv_i(&up[ic2 + ig], vt1,
m_Nc);
2125 vcp1_tm[icr + ix2] = mult_udagv_r(&up[ic2 + ig], vt2,
m_Nc);
2126 vcp1_tm[ici + ix2] = mult_udagv_i(&up[ic2 + ig], vt2,
m_Nc);
2140 for (
int site = is; site < ns; ++site) {
2141 int ixyz = site % Nxyz;
2142 int it = site / Nxyz;
2143 int nei = ixyz + Nxyz * (it - 1);
2144 int iv = Nvcd * site;
2147 int ig =
m_Ndf * nei;
2148 int in = Nvcd * nei;
2149 double vt1[
NVC], vt2[
NVC];
2150 set_sp2_tm_dirac(vt1, vt2, &wp[in],
m_Nc);
2151 for (
int ic = 0; ic <
m_Nc; ++ic) {
2153 double wt1r = mult_udagv_r(&up[ic2 + ig], vt1,
m_Nc);
2154 double wt1i = mult_udagv_i(&up[ic2 + ig], vt1,
m_Nc);
2155 double wt2r = mult_udagv_r(&up[ic2 + ig], vt2,
m_Nc);
2156 double wt2i = mult_udagv_i(&up[ic2 + ig], vt2,
m_Nc);
2157 set_sp4_tm_dirac(&vp[2 * ic + iv], wt1r, wt1i, wt2r, wt2i,
m_Nc);
2160 int ix1 = Nvc2 * ixyz;
2161 int ix2 = ix1 +
NVC;
2162 for (
int ic = 0; ic <
m_Nc; ++ic) {
2164 int ici = 2 * ic + 1;
2165 double wt1r = bc2 *
vcp2_tm[icr + ix1];
2166 double wt1i = bc2 *
vcp2_tm[ici + ix1];
2167 double wt2r = bc2 *
vcp2_tm[icr + ix2];
2168 double wt2i = bc2 *
vcp2_tm[ici + ix2];
2169 set_sp4_tm_dirac(&vp[2 * ic + iv], wt1r, wt1i, wt2r, wt2i,
m_Nc);
2183 int Nvc2 =
m_Nvc * 2;
2188 double *vp = v.
ptr(0);
2189 const double *wp = w.
ptr(0);
2192 int ith, nth, is, ns;
2193 set_threadtask(ith, nth, is, ns,
m_Nvol);
2199 for (
int site = is; site < ns; ++site) {
2200 int ixyz = site % Nxyz;
2201 int it = site / Nxyz;
2203 int in = Nvcd * site;
2204 int ix1 = Nvc2 * ixyz;
2205 int ix2 = ix1 +
NVC;
2206 double vt1[
NVC], vt2[
NVC];
2207 set_sp2_tp_chiral(vt1, vt2, &wp[in],
m_Nc);
2208 for (
int ivc = 0; ivc <
NVC; ++ivc) {
2209 vcp1_tp[ivc + ix1] = bc2 * vt1[ivc];
2210 vcp1_tp[ivc + ix2] = bc2 * vt2[ivc];
2224 for (
int site = is; site < ns; ++site) {
2225 int ixyz = site % Nxyz;
2226 int it = site / Nxyz;
2227 int nei = ixyz + Nxyz * (it + 1);
2228 int iv = Nvcd * site;
2229 int ig =
m_Ndf * site;
2231 if (it <
m_Nt - 1) {
2232 int in = Nvcd * nei;
2233 double vt1[
NVC], vt2[
NVC];
2234 set_sp2_tp_chiral(vt1, vt2, &wp[in],
m_Nc);
2235 for (
int ic = 0; ic <
m_Nc; ++ic) {
2237 double wt1r = mult_uv_r(&up[ic2 + ig], vt1,
m_Nc);
2238 double wt1i = mult_uv_i(&up[ic2 + ig], vt1,
m_Nc);
2239 double wt2r = mult_uv_r(&up[ic2 + ig], vt2,
m_Nc);
2240 double wt2i = mult_uv_i(&up[ic2 + ig], vt2,
m_Nc);
2241 set_sp4_tp_chiral(&vp[2 * ic + iv], wt1r, wt1i, wt2r, wt2i,
m_Nc);
2244 int ix1 = Nvc2 * ixyz;
2245 int ix2 = ix1 +
NVC;
2246 for (
int ic = 0; ic <
m_Nc; ++ic) {
2248 double wt1r = mult_uv_r(&up[ic2 + ig], &
vcp2_tp[ix1],
m_Nc);
2249 double wt1i = mult_uv_i(&up[ic2 + ig], &
vcp2_tp[ix1],
m_Nc);
2250 double wt2r = mult_uv_r(&up[ic2 + ig], &
vcp2_tp[ix2],
m_Nc);
2251 double wt2i = mult_uv_i(&up[ic2 + ig], &
vcp2_tp[ix2],
m_Nc);
2252 set_sp4_tp_chiral(&vp[2 * ic + iv], wt1r, wt1i, wt2r, wt2i,
m_Nc);
2266 int Nvc2 =
m_Nvc * 2;
2271 double *vp = v.
ptr(0);
2272 const double *wp = w.
ptr(0);
2275 int ith, nth, is, ns;
2276 set_threadtask(ith, nth, is, ns,
m_Nvol);
2282 for (
int site = is; site < ns; ++site) {
2283 int ixyz = site % Nxyz;
2284 int it = site / Nxyz;
2285 if (it ==
m_Nt - 1) {
2286 int in = Nvcd * site;
2287 int ig =
m_Ndf * site;
2288 int ix1 = Nvc2 * ixyz;
2289 int ix2 = ix1 +
NVC;
2291 double vt1[
NVC], vt2[
NVC];
2292 set_sp2_tm_chiral(vt1, vt2, &wp[in],
m_Nc);
2293 for (
int ic = 0; ic <
m_Nc; ++ic) {
2296 int ici = 2 * ic + 1;
2297 vcp1_tm[icr + ix1] = mult_udagv_r(&up[ic2 + ig], vt1,
m_Nc);
2298 vcp1_tm[ici + ix1] = mult_udagv_i(&up[ic2 + ig], vt1,
m_Nc);
2299 vcp1_tm[icr + ix2] = mult_udagv_r(&up[ic2 + ig], vt2,
m_Nc);
2300 vcp1_tm[ici + ix2] = mult_udagv_i(&up[ic2 + ig], vt2,
m_Nc);
2314 for (
int site = is; site < ns; ++site) {
2315 int ixyz = site % Nxyz;
2316 int it = site / Nxyz;
2317 int nei = ixyz + Nxyz * (it - 1);
2318 int iv = Nvcd * site;
2321 int ig =
m_Ndf * nei;
2322 int in = Nvcd * nei;
2323 double vt1[
NVC], vt2[
NVC];
2324 set_sp2_tm_chiral(vt1, vt2, &wp[in],
m_Nc);
2325 for (
int ic = 0; ic <
m_Nc; ++ic) {
2327 double wt1r = mult_udagv_r(&up[ic2 + ig], vt1,
m_Nc);
2328 double wt1i = mult_udagv_i(&up[ic2 + ig], vt1,
m_Nc);
2329 double wt2r = mult_udagv_r(&up[ic2 + ig], vt2,
m_Nc);
2330 double wt2i = mult_udagv_i(&up[ic2 + ig], vt2,
m_Nc);
2331 set_sp4_tm_chiral(&vp[2 * ic + iv], wt1r, wt1i, wt2r, wt2i,
m_Nc);
2334 int ix1 = Nvc2 * ixyz;
2335 int ix2 = ix1 +
NVC;
2336 for (
int ic = 0; ic <
m_Nc; ++ic) {
2337 double wt1r = bc2 *
vcp2_tm[2 * ic + ix1];
2338 double wt1i = bc2 *
vcp2_tm[2 * ic + 1 + ix1];
2339 double wt2r = bc2 *
vcp2_tm[2 * ic + ix2];
2340 double wt2i = bc2 *
vcp2_tm[2 * ic + 1 + ix2];
2341 set_sp4_tm_chiral(&vp[2 * ic + iv], wt1r, wt1i, wt2r, wt2i,
m_Nc);
2365 }
else if (
m_repr ==
"Chiral") {
2373 double flop = flop_site * (Nvol * NPE);
2375 if ((
m_mode ==
"DdagD") || (
m_mode ==
"DDdag")) flop *= 2;
2377 double gflop = flop * 1.e-9;