11 template<
typename AFIELD>
13 "ShiftAField_eo<AFIELD>";
15 template<
typename AFIELD>
19 std::vector<int> bc(Ndim);
20 for (
int mu = 0; mu < Ndim; ++mu) {
29 template<
typename AFIELD>
37 vout.
general(m_vl,
"%s: being constructed.\n", class_name.c_str());
46 m_Nvol = m_Nx * m_Ny * m_Nz * m_Nt;
52 m_Nx2v = m_Nx2 /
VLENX;
54 m_Nst2v = m_Nvol2 /
VLEN;
57 vout.
general(m_vl,
" Nx2 = %d Nvol2 = %d\n", m_Nx2, m_Nvol2);
58 vout.
general(m_vl,
" Nx2v = %d Nyv = %d\n", m_Nx2v, m_Nyv);
60 if (bc.size() != m_Ndim) {
61 vout.
crucial(m_vl,
"%s: incorrect size of boundary condition\n",
66 m_boundary.resize(m_Ndim);
68 for (
int mu = 0; mu < m_Ndim; ++mu) {
69 m_boundary[mu] = bc[mu];
70 vout.
general(m_vl,
" boundary[%d] = %2d\n", mu, m_boundary[mu]);
74 for (
int mu = 0; mu < m_Ndim; ++mu) {
77 do_comm_any += do_comm[mu];
78 vout.
general(
" do_comm[%d] = %d\n", mu, do_comm[mu]);
81 m_Nbdsize.resize(m_Ndim);
82 m_Nbdsize[0] = m_Nin * m_Ny * m_Nz * m_Nt;
83 m_Nbdsize[1] = m_Nin * m_Nx * m_Nz * m_Nt;
84 m_Nbdsize[2] = m_Nin * m_Nx * m_Ny * m_Nt;
85 m_Nbdsize[3] = m_Nin * m_Nx * m_Ny * m_Nz;
89 vout.
general(m_vl,
"%s: construction finished.\n", class_name.c_str());
94 template<
typename AFIELD>
101 template<
typename AFIELD>
104 chsend_up.resize(m_Ndim);
105 chrecv_up.resize(m_Ndim);
106 chsend_dn.resize(m_Ndim);
107 chrecv_dn.resize(m_Ndim);
109 for (
int mu = 0; mu < m_Ndim; ++mu) {
110 int Nvsize = m_Nbdsize[mu] *
sizeof(
real_t);
112 chsend_dn[mu].send_init(Nvsize, mu, -1);
113 chsend_up[mu].send_init(Nvsize, mu, 1);
115 chrecv_up[mu].recv_init(Nvsize, mu, 1);
116 chrecv_dn[mu].recv_init(Nvsize, mu, -1);
118 void *buf_up = (
void *)chsend_dn[mu].ptr();
119 chrecv_up[mu].recv_init(Nvsize, mu, 1, buf_up);
120 void *buf_dn = (
void *)chsend_up[mu].ptr();
121 chrecv_dn[mu].recv_init(Nvsize, mu, -1, buf_dn);
123 if (do_comm[mu] == 1) {
124 chset_send.append(chsend_up[mu]);
125 chset_send.append(chsend_dn[mu]);
126 chset_recv.append(chrecv_up[mu]);
127 chset_recv.append(chrecv_dn[mu]);
134 template<
typename AFIELD>
136 const int mu,
const int ieo)
144 for (
int ex = 0; ex < Nex; ++ex) {
145 real_t *vp = v.
ptr(index.idxh(0, m_Nin, 0, ex));
146 real_t *wp =
const_cast<AFIELD *
>(&w)->ptr(index.idxh(0, m_Nin, 0, ex));
150 up_xh_naive(vp, wp, ieo);
151 }
else if (mu == 1) {
152 up_yh_nv(vp, wp, ieo);
153 }
else if (mu == 2) {
154 up_zh_nv(vp, wp, ieo);
155 }
else if (mu == 3) {
156 up_th_nv(vp, wp, ieo);
158 vout.
crucial(m_vl,
"Error at %s: wrong parameter\n",
167 template<
typename AFIELD>
169 const AFIELD& w,
const int ex2,
170 const int mu,
const int ieo)
181 real_t *vp = v.
ptr(index.idxh(0, m_Nin, 0, ex1));
182 real_t *wp =
const_cast<AFIELD *
>(&w)->ptr(index.idxh(0, m_Nin, 0, ex2));
186 up_xh_naive(vp, wp, ieo);
187 }
else if (mu == 1) {
188 up_yh_nv(vp, wp, ieo);
189 }
else if (mu == 2) {
190 up_zh_nv(vp, wp, ieo);
191 }
else if (mu == 3) {
192 up_th_nv(vp, wp, ieo);
194 vout.
crucial(m_vl,
"Error at %s: wrong parameter\n",
202 template<
typename AFIELD>
204 const int mu,
const int ieo)
212 for (
int ex = 0; ex < Nex; ++ex) {
213 real_t *vp = v.
ptr(index.idxh(0, m_Nin, 0, ex));
214 real_t *wp =
const_cast<AFIELD *
>(&w)->ptr(index.idxh(0, m_Nin, 0, ex));
218 dn_xh_naive(vp, wp, ieo);
219 }
else if (mu == 1) {
220 dn_yh_nv(vp, wp, ieo);
221 }
else if (mu == 2) {
222 dn_zh_nv(vp, wp, ieo);
223 }
else if (mu == 3) {
224 dn_th_nv(vp, wp, ieo);
226 vout.
crucial(m_vl,
"Error at %s: wrong parameter\n",
235 template<
typename AFIELD>
237 const AFIELD& w,
const int ex2,
238 const int mu,
const int ieo)
249 real_t *vp = v.
ptr(index.idxh(0, m_Nin, 0, ex1));
250 real_t *wp =
const_cast<AFIELD *
>(&w)->ptr(index.idxh(0, m_Nin, 0, ex2));
254 dn_xh_naive(vp, wp, ieo);
255 }
else if (mu == 1) {
256 dn_yh_nv(vp, wp, ieo);
257 }
else if (mu == 2) {
258 dn_zh_nv(vp, wp, ieo);
259 }
else if (mu == 3) {
260 dn_th_nv(vp, wp, ieo);
262 vout.
crucial(m_vl,
"Error at %s: wrong parameter\n",
272 template<
typename AFIELD>
288 int ith, nth, is, ns;
289 set_threadtask(ith, nth, is, ns, m_Nst2v);
293 if (do_comm[0] == 1) {
298 chrecv_up[0].start();
299 chsend_dn[0].start();
309 for (
int site = is; site < ns; ++site) {
310 int ix2 = site % m_Nx2v;
311 int iyzt = site / m_Nx2v;
312 int leo = index_alt.leo(
VLENY * iyzt);
313 int jeo = (ieo + leo) % 2;
314 if ((ix2 < m_Nx2v - 1) || (do_comm[0] == 0)) {
316 if (ix2 == m_Nx2v - 1) nei = 0 + m_Nx2v * iyzt;
317 for (
int in = 0; in < m_Nin; ++in) {
318 int iw1 =
VLEN * (in + m_Nin * site);
319 int iw2 =
VLEN * (in + m_Nin * nei);
320 shift_vec2_xbw_eo(&xt, &wp[iw1], &wp[iw2], jeo, 1);
321 save_vec(&vp[iw1], &xt, 1);
334 template<
typename AFIELD>
348 int ith, nth, is, ns;
349 set_threadtask(ith, nth, is, ns, m_Nvol2);
353 if (do_comm[0] == 1) {
354 for (
int site = is; site < ns; ++site) {
355 int ix2 = site % m_Nx2;
356 int iyzt = site / m_Nx2;
357 int iyzt2 = iyzt / 2;
358 int leo = index_alt.leo(iyzt);
359 if ((ix2 == 0) && (leo == (1 - ieo))) {
360 for (
int in = 0; in < m_Nin; ++in) {
361 int index = index_alt.idxh(in, m_Nin, site, ex);
362 buf1[in + m_Nin * iyzt2] = bc2 * wp[index];
371 chrecv_up[0].start();
372 chsend_dn[0].start();
381 for (
int site = is; site < ns; ++site) {
382 int ix2 = site % m_Nx2;
383 int iyzt = site / m_Nx2;
384 int leo = index_alt.leo(iyzt);
386 for (
int in = 0; in < m_Nin; ++in) {
387 int iv = index_alt.idxh(in, m_Nin, site, ex);
391 if ((ix2 < m_Nx2 - 1) || (do_comm[0] == 0)) {
392 int nei = ix2 + 1 + m_Nx2 * iyzt;
394 if (ix2 == m_Nx2 - 1) {
395 nei = 0 + m_Nx2 * iyzt;
398 for (
int in = 0; in < m_Nin; ++in) {
399 int iv = index_alt.idxh(in, m_Nin, site, ex);
400 int iw = index_alt.idxh(in, m_Nin, nei, ex);
401 vp[iv] = bc3 * wp[iw];
404 for (
int in = 0; in < m_Nin; ++in) {
405 int iv = index_alt.idxh(in, m_Nin, site, ex);
406 int iyzt2 = iyzt / 2;
407 vp[iv] = buf2[in + m_Nin * iyzt2];
420 template<
typename AFIELD>
426 bc2 =
real_t(m_boundary[0]);
436 int ith, nth, is, ns;
437 set_threadtask(ith, nth, is, ns, m_Nst2v);
441 if (do_comm[0] == 1) {
446 chrecv_dn[0].start();
447 chsend_up[0].start();
457 for (
int site = is; site < ns; ++site) {
458 int ix2 = site % m_Nx2v;
459 int iyzt = site / m_Nx2v;
460 int leo = index_alt.leo(
VLENY * iyzt);
461 int jeo = (ieo + leo) % 2;
462 if ((ix2 > 0) || (do_comm[0] == 0)) {
464 if (ix2 == 0) nei = m_Nx2v - 1 + m_Nx2v * iyzt;
465 for (
int in = 0; in < m_Nin; ++in) {
466 int iw1 =
VLEN * (in + m_Nin * site);
467 int iw2 =
VLEN * (in + m_Nin * nei);
468 shift_vec2_xfw_eo(&xt, &wp[iw1], &wp[iw2], jeo, 1);
469 save_vec(&vp[iw1], &xt, 1);
483 template<
typename AFIELD>
489 bc2 =
real_t(m_boundary[0]);
499 int ith, nth, is, ns;
500 set_threadtask(ith, nth, is, ns, m_Nvol2);
504 if (do_comm[0] == 1) {
505 for (
int site = is; site < ns; ++site) {
506 int ix2 = site % m_Nx2;
507 int iyzt = site / m_Nx2;
508 int iyzt2 = iyzt / 2;
509 int leo = index_alt.leo(iyzt);
510 if ((ix2 == m_Nx2 - 1) && (leo == ieo)) {
511 for (
int in = 0; in < m_Nin; ++in) {
512 int index = index_alt.idxh(in, m_Nin, site, ex);
513 buf1[in + m_Nin * iyzt2] = bc2 * wp[index];
522 chrecv_dn[0].start();
523 chsend_up[0].start();
532 for (
int site = is; site < ns; ++site) {
533 int ix2 = site % m_Nx2;
534 int iyzt = site / m_Nx2;
535 int leo = index_alt.leo(iyzt);
536 if (leo == (1 - ieo)) {
537 for (
int in = 0; in < m_Nin; ++in) {
538 int iv = index_alt.idxh(in, m_Nin, site, ex);
542 if ((ix2 > 0) || (do_comm[0] == 0)) {
543 int nei = ix2 - 1 + m_Nx2 * iyzt;
546 nei = m_Nx2 - 1 + m_Nx2 * iyzt;
549 for (
int in = 0; in < m_Nin; ++in) {
550 int iv = index_alt.idxh(in, m_Nin, site, ex);
551 int iw = index_alt.idxh(in, m_Nin, nei, ex);
552 vp[iv] = bc3 * wp[iw];
555 for (
int in = 0; in < m_Nin; ++in) {
556 int iv = index_alt.idxh(in, m_Nin, site, ex);
557 int iyzt2 = iyzt / 2;
558 vp[iv] = buf2[in + m_Nin * iyzt2];
569 template<
typename AFIELD>
583 int ith, nth, is, ns;
584 set_threadtask(ith, nth, is, ns, m_Nvol2);
588 if (do_comm[1] == 1) {
591 chrecv_up[1].start();
593 for (
int site = is; site < ns; ++site) {
594 int ix = site % m_Nx2;
595 int iy = (site / m_Nx2) % m_Ny;
596 int izt = site / (m_Nx2 * m_Ny);
598 for (
int in = 0; in < m_Nin; ++in) {
599 int iw = index_alt.idxh(in, m_Nin, site, ex);
600 int ixzt = ix + m_Nx2 * izt;
601 buf1[in + m_Nin * ixzt] = bc2 * wp[iw];
609 chsend_dn[1].start();
613 for (
int site = is; site < ns; ++site) {
614 int ix = site % m_Nx2;
615 int iy = (site / m_Nx2) % m_Ny;
616 int izt = site / (m_Nx2 * m_Ny);
617 if ((iy < m_Ny - 1) || (do_comm[1] == 0)) {
618 int iy2 = (iy + 1) % m_Ny;
619 int nei = ix + m_Nx2 * (iy2 + m_Ny * izt);
621 if (iy == m_Ny - 1) bc3 = bc2;
622 for (
int in = 0; in < m_Nin; ++in) {
623 int iv = index_alt.idxh(in, m_Nin, site, ex);
624 int iw = index_alt.idxh(in, m_Nin, nei, ex);
625 vp[iv] = bc3 * wp[iw];
630 if (do_comm[1] == 1) {
638 for (
int site = is; site < ns; ++site) {
639 int ix = site % m_Nx2;
640 int iy = (site / m_Nx2) % m_Ny;
641 int izt = site / (m_Nx2 * m_Ny);
642 int ixzt = ix + m_Nx2 * izt;
643 if (iy == m_Ny - 1) {
644 for (
int in = 0; in < m_Nin; ++in) {
645 int iv = index_alt.idxh(in, m_Nin, site, ex);
646 vp[iv] = buf2[in + m_Nin * ixzt];
661 template<
typename AFIELD>
675 int ith, nth, is, ns;
676 set_threadtask(ith, nth, is, ns, m_Nvol2);
680 if (do_comm[1] == 1) {
683 chrecv_dn[1].start();
685 for (
int site = is; site < ns; ++site) {
686 int ix = site % m_Nx2;
687 int iy = (site / m_Nx2) % m_Ny;
688 int izt = site / (m_Nx2 * m_Ny);
689 if (iy == m_Ny - 1) {
690 for (
int in = 0; in < m_Nin; ++in) {
691 int iw = index_alt.idxh(in, m_Nin, site, ex);
692 int ixzt = ix + m_Nx2 * izt;
693 buf1[in + m_Nin * ixzt] = wp[iw];
702 chsend_up[1].start();
706 for (
int site = is; site < ns; ++site) {
707 int ix = site % m_Nx2;
708 int iy = (site / m_Nx2) % m_Ny;
709 int izt = site / (m_Nx2 * m_Ny);
710 if ((iy > 0) || (do_comm[1] == 0)) {
711 int iy2 = (iy - 1 + m_Ny) % m_Ny;
712 int nei = ix + m_Nx2 * (iy2 + m_Ny * izt);
714 if (iy == 0) bc3 = bc2;
715 for (
int in = 0; in < m_Nin; ++in) {
716 int iv = index_alt.idxh(in, m_Nin, site, ex);
717 int iw = index_alt.idxh(in, m_Nin, nei, ex);
718 vp[iv] = bc3 * wp[iw];
723 if (do_comm[1] == 1) {
731 for (
int site = is; site < ns; ++site) {
732 int ix = site % m_Nx2;
733 int iy = (site / m_Nx2) % m_Ny;
734 int izt = site / (m_Nx2 * m_Ny);
735 int ixzt = ix + m_Nx2 * izt;
737 for (
int in = 0; in < m_Nin; ++in) {
738 int iv = index_alt.idxh(in, m_Nin, site, ex);
739 vp[iv] = bc2 * buf2[in + m_Nin * ixzt];
755 template<
typename AFIELD>
769 int ith, nth, is, ns;
770 set_threadtask(ith, nth, is, ns, m_Nvol2);
772 int Nxy = m_Nx2 * m_Ny;
776 if (do_comm[2] == 1) {
779 chrecv_up[2].start();
781 for (
int site = is; site < ns; ++site) {
782 int ixy = site % Nxy;
783 int iz = (site / Nxy) % m_Nz;
784 int it = site / (Nxy * m_Nz);
786 for (
int in = 0; in < m_Nin; ++in) {
787 int iw = index_alt.idxh(in, m_Nin, site, ex);
788 int ixyt = ixy + Nxy * it;
789 buf1[in + m_Nin * ixyt] = bc2 * wp[iw];
798 chsend_dn[2].start();
802 for (
int site = is; site < ns; ++site) {
803 int ixy = site % Nxy;
804 int iz = (site / Nxy) % m_Nz;
805 int it = site / (Nxy * m_Nz);
806 if ((iz < m_Nz - 1) || (do_comm[2] == 0)) {
807 int iz2 = (iz + 1) % m_Nz;
808 int nei = ixy + Nxy * (iz2 + m_Nz * it);
810 if (iz == m_Nz - 1) bc3 = bc2;
811 for (
int in = 0; in < m_Nin; ++in) {
812 int iv = index_alt.idxh(in, m_Nin, site, ex);
813 int iw = index_alt.idxh(in, m_Nin, nei, ex);
814 vp[iv] = bc3 * wp[iw];
819 if (do_comm[2] == 1) {
827 for (
int site = is; site < ns; ++site) {
828 int ixy = site % Nxy;
829 int iz = (site / Nxy) % m_Nz;
830 int it = site / (Nxy * m_Nz);
831 if (iz == m_Nz - 1) {
832 for (
int in = 0; in < m_Nin; ++in) {
833 int iv = index_alt.idxh(in, m_Nin, site, ex);
834 int ixyt = ixy + Nxy * it;
835 vp[iv] = buf2[in + m_Nin * ixyt];
850 template<
typename AFIELD>
866 int ith, nth, is, ns;
867 set_threadtask(ith, nth, is, ns, m_Nvol2);
869 int Nxy = m_Nx2 * m_Ny;
873 if (do_comm[2] == 1) {
876 chrecv_dn[2].start();
879 for (
int site = is; site < ns; ++site) {
880 int ixy = site % Nxy;
881 int iz = (site / Nxy) % m_Nz;
882 int it = site / (Nxy * m_Nz);
883 if (iz == m_Nz - 1) {
884 for (
int in = 0; in < m_Nin; ++in) {
885 int iw = index_alt.idxh(in, m_Nin, site, ex);
886 int ixyt = ixy + Nxy * it;
887 buf1[in + m_Nin * ixyt] = wp[iw];
896 chsend_up[2].start();
900 for (
int site = is; site < ns; ++site) {
901 int ixy = site % Nxy;
902 int iz = (site / Nxy) % m_Nz;
903 int it = site / (Nxy * m_Nz);
904 if ((iz > 0) || (do_comm[2] == 0)) {
905 int iz2 = (iz - 1 + m_Nz) % m_Nz;
906 int nei = ixy + Nxy * (iz2 + m_Nz * it);
908 if (iz == 0) bc3 = bc2;
909 for (
int in = 0; in < m_Nin; ++in) {
910 int iv = index_alt.idxh(in, m_Nin, site, ex);
911 int iw = index_alt.idxh(in, m_Nin, nei, ex);
912 vp[iv] = bc3 * wp[iw];
917 if (do_comm[2] == 1) {
925 for (
int site = is; site < ns; ++site) {
926 int ixy = site % Nxy;
927 int iz = (site / Nxy) % m_Nz;
928 int it = site / (Nxy * m_Nz);
930 for (
int in = 0; in < m_Nin; ++in) {
931 int iv = index_alt.idxh(in, m_Nin, site, ex);
932 int ixyt = ixy + Nxy * it;
933 vp[iv] = bc2 * buf2[in + m_Nin * ixyt];
949 template<
typename AFIELD>
963 int ith, nth, is, ns;
964 set_threadtask(ith, nth, is, ns, m_Nvol2);
966 int Nxyz = m_Nx2 * m_Ny * m_Nz;
970 if (do_comm[3] == 1) {
973 chrecv_up[3].start();
975 for (
int site = is; site < ns; ++site) {
976 int ixyz = site % Nxyz;
977 int it = site / Nxyz;
979 for (
int in = 0; in < m_Nin; ++in) {
980 int iw = index_alt.idxh(in, m_Nin, site, ex);
981 buf1[in + m_Nin * ixyz] = bc2 * wp[iw];
990 chsend_dn[3].start();
995 for (
int site = is; site < ns; ++site) {
996 int ixyz = site % Nxyz;
997 int it = site / Nxyz;
998 if ((it < m_Nt - 1) || (do_comm[3] == 0)) {
999 int it2 = (it + 1) % m_Nt;
1000 int nei = ixyz + Nxyz * it2;
1002 if (it == m_Nt - 1) bc3 = bc2;
1003 for (
int in = 0; in < m_Nin; ++in) {
1004 int iv = index_alt.idxh(in, m_Nin, site, ex);
1005 int iw = index_alt.idxh(in, m_Nin, nei, ex);
1006 vp[iv] = bc3 * wp[iw];
1011 if (do_comm[3] == 1) {
1014 chrecv_up[3].wait();
1019 for (
int site = is; site < ns; ++site) {
1020 int ixyz = site % Nxyz;
1021 int it = site / Nxyz;
1022 if (it == m_Nt - 1) {
1023 for (
int in = 0; in < m_Nin; ++in) {
1024 int iv = index_alt.idxh(in, m_Nin, site, ex);
1025 vp[iv] = buf2[in + m_Nin * ixyz];
1031 chsend_dn[3].wait();
1040 template<
typename AFIELD>
1054 int ith, nth, is, ns;
1055 set_threadtask(ith, nth, is, ns, m_Nvol2);
1057 int Nxyz = m_Nx2 * m_Ny * m_Nz;
1061 if (do_comm[3] == 1) {
1064 chrecv_dn[3].start();
1066 for (
int site = is; site < ns; ++site) {
1067 int ixyz = site % Nxyz;
1068 int it = site / Nxyz;
1069 if (it == m_Nt - 1) {
1070 for (
int in = 0; in < m_Nin; ++in) {
1071 int iw = index_alt.idxh(in, m_Nin, site, ex);
1072 buf1[in + m_Nin * ixyz] = wp[iw];
1081 chsend_up[3].start();
1085 for (
int site = is; site < ns; ++site) {
1086 int ixyz = site % Nxyz;
1087 int it = site / Nxyz;
1088 if ((it > 0) || (do_comm[3] == 0)) {
1089 int it2 = (it - 1 + m_Nt) % m_Nt;
1090 int nei = ixyz + Nxyz * it2;
1092 if (it == 0) bc3 = bc2;
1093 for (
int in = 0; in < m_Nin; ++in) {
1094 int iv = index_alt.idxh(in, m_Nin, site, ex);
1095 int iw = index_alt.idxh(in, m_Nin, nei, ex);
1096 vp[iv] = bc3 * wp[iw];
1101 if (do_comm[3] == 1) {
1104 chrecv_dn[3].wait();
1109 for (
int site = is; site < ns; ++site) {
1110 int ixyz = site % Nxyz;
1111 int it = site / Nxyz;
1113 for (
int in = 0; in < m_Nin; ++in) {
1114 int iv = index_alt.idxh(in, m_Nin, site, ex);
1115 vp[iv] = bc2 * buf2[in + m_Nin * ixyz];
1122 chsend_up[3].wait();