12 template<
typename AFIELD>
14 =
"AFopr_Domainwall_5din_eo";
16 template<
typename AFIELD>
32 vout.
general(m_vl,
"%s: construction\n", class_name.c_str());
38 if (repr !=
"Dirac") {
39 vout.
crucial(
" Error at %s: unsupported gamma-matrix type: %s\n",
40 class_name.c_str(), repr.c_str());
66 m_Nx2v = m_Nx2 /
VLENX;
68 m_Nst2v = m_Nst2 /
VLEN;
76 if (m_Nx % (2 *
VLENX) != 0) {
77 vout.
crucial(m_vl,
"%s: Nx must be mulriple of 2*VLENX.\n",
81 if (m_Ny % (
VLENY) != 0) {
82 vout.
crucial(m_vl,
"%s: Ny must be multiple of VLENY.\n",
91 m_Leo.resize(m_Ny * m_Nz * m_Nt);
96 for (
int t = 0; t < m_Nt; ++t) {
97 for (
int z = 0; z < m_Nz; ++z) {
98 for (
int y = 0; y < m_Ny; ++y) {
99 int t2 = ipe3 * m_Nt + t;
100 int z2 = ipe2 * m_Nz + z;
101 int y2 = ipe1 * m_Ny + y;
102 m_Leo[y + m_Ny * (z + m_Nz * t)] = (y2 + z2 + t2) % 2;
113 for (
int mu = 0; mu < m_Ndim; ++mu) {
116 do_comm_any += do_comm[mu];
117 vout.
general(
" do_comm[%d] = %d\n", mu, do_comm[mu]);
123 set_parameters(params);
127 m_Nbdsize.resize(m_Ndim);
128 int Nbdin = (m_Nvcd / 2) * m_Ns;
129 m_Nbdsize[0] = Nbdin * ((m_Ny * m_Nz * m_Nt + 1) / 2);
130 m_Nbdsize[1] = Nbdin * m_Nx2 * m_Nz * m_Nt;
131 m_Nbdsize[2] = Nbdin * m_Nx2 * m_Ny * m_Nt;
132 m_Nbdsize[3] = Nbdin * m_Nx2 * m_Ny * m_Nz;
137 m_Ueo.reset(m_Ndf, m_Nvol, m_Ndim);
138 m_Ulex.reset(m_Ndf, m_Nvol, m_Ndim);
146 template<
typename AFIELD>
154 template<
typename AFIELD>
157 chsend_up.resize(m_Ndim);
158 chrecv_up.resize(m_Ndim);
159 chsend_dn.resize(m_Ndim);
160 chrecv_dn.resize(m_Ndim);
162 for (
int mu = 0; mu < m_Ndim; ++mu) {
163 size_t Nvsize = m_Nbdsize[mu] *
sizeof(
real_t);
165 chsend_dn[mu].send_init(Nvsize, mu, -1);
166 chsend_up[mu].send_init(Nvsize, mu, 1);
168 chrecv_up[mu].recv_init(Nvsize, mu, 1);
169 chrecv_dn[mu].recv_init(Nvsize, mu, -1);
171 void *buf_up = (
void *)chsend_dn[mu].ptr();
172 chrecv_up[mu].recv_init(Nvsize, mu, 1, buf_up);
173 void *buf_dn = (
void *)chsend_up[mu].ptr();
174 chrecv_dn[mu].recv_init(Nvsize, mu, -1, buf_dn);
177 if (do_comm[mu] == 1) {
178 chset_send.append(chsend_up[mu]);
179 chset_send.append(chsend_dn[mu]);
180 chset_recv.append(chrecv_up[mu]);
181 chset_recv.append(chrecv_dn[mu]);
188 template<
typename AFIELD>
198 int err_optional = 0;
199 err_optional += params.
fetch_string(
"gamma_matrix_type", gmset_type);
201 if (gmset_type != m_repr) {
202 vout.
crucial(m_vl,
"%s: unsupported gamma_matrix_type: %s\n",
203 class_name.c_str(), gmset_type.c_str());
211 err += params.
fetch_int(
"extent_of_5th_dimension", Ns);
217 vout.
crucial(m_vl,
"Error at %s: input parameter not found.\n",
222 set_parameters(mq, M0, Ns, bc, b, c);
227 template<
typename AFIELD>
232 const vector<int> bc,
236 assert(bc.size() == m_Ndim);
246 m_NinF = m_Nvcd * m_Ns;
248 if (m_boundary.size() != m_Ndim) m_boundary.resize(m_Ndim);
249 for (
int mu = 0; mu < m_Ndim; ++mu) {
250 m_boundary[mu] = bc[mu];
253 if (m_b.size() != m_Ns) {
257 for (
int is = 0; is < m_Ns; ++is) {
263 vout.
general(m_vl,
"Parameters of %s:\n", class_name.c_str());
267 for (
int mu = 0; mu < m_Ndim; ++mu) {
268 vout.
general(m_vl,
" boundary[%d] = %2d\n", mu, m_boundary[mu]);
270 vout.
general(m_vl,
" coefficients b = %16.10f c = %16.10f\n",
273 set_precond_parameters();
276 if (m_y1.nin() != m_NinF) {
278 m_y1.reset(m_NinF, m_Nst2, 1);
279 m_v1.reset(m_NinF, m_Nst2, 1);
280 m_v2.reset(m_NinF, m_Nst2, 1);
289 template<
typename AFIELD>
297 if (m_dp.size() != m_Ns) {
299 m_dpinv.resize(m_Ns);
301 m_e.resize(m_Ns - 1);
302 m_f.resize(m_Ns - 1);
305 for (
int is = 0; is < m_Ns; ++is) {
306 m_dp[is] = 1.0 + m_b[is] * (4.0 - m_M0);
307 m_dm[is] = 1.0 - m_c[is] * (4.0 - m_M0);
310 m_e[0] = m_mq * m_dm[m_Ns - 1] / m_dp[0];
311 m_f[0] = m_mq * m_dm[0];
312 for (
int is = 1; is < m_Ns - 1; ++is) {
313 m_e[is] = m_e[is - 1] * m_dm[is - 1] / m_dp[is];
314 m_f[is] = m_f[is - 1] * m_dm[is] / m_dp[is - 1];
317 m_g = m_e[m_Ns - 2] * m_dm[m_Ns - 2];
319 for (
int is = 0; is < m_Ns - 1; ++is) {
320 m_dpinv[is] = 1.0 / m_dp[is];
322 m_dpinv[m_Ns - 1] = 1.0 / (m_dp[m_Ns - 1] + m_g);
330 template<
typename AFIELD>
332 const std::vector<double> vec_b,
333 const std::vector<double> vec_c)
337 if ((vec_b.size() != m_Ns) || (vec_c.size() != m_Ns)) {
338 vout.
crucial(m_vl,
"%s: size of coefficient vectors incorrect.\n",
342 vout.
general(m_vl,
"%s: coefficient vectors are set:\n",
348 for (
int is = 0; is < m_Ns; ++is) {
349 m_b[is] =
real_t(vec_b[is]);
350 m_c[is] =
real_t(vec_c[is]);
356 for (
int is = 0; is < m_Ns; ++is) {
357 vout.
general(m_vl,
"b[%2d] = %16.10f c[%2d] = %16.10f\n",
358 is, m_b[is], is, m_c[is]);
361 set_precond_parameters();
366 template<
typename AFIELD>
371 vout.
detailed(m_vl,
"%s: set_config is called: num_threads = %d\n",
372 class_name.c_str(), nth);
380 vout.
detailed(m_vl,
"%s: set_config finished\n", class_name.c_str());
385 template<
typename AFIELD>
398 template<
typename AFIELD>
410 int ith, nth, is, ns;
411 set_threadtask_mult(ith, nth, is, ns, m_Nvol);
413 for (
int ex = 0; ex < m_Ndim; ++ex) {
414 for (
int site = is; site < ns; ++site) {
415 for (
int in = 0; in < m_Ndf; ++in) {
416 int iv1 = index_lex.idx(in, m_Ndf, site, ex);
417 int iv2 = index_eo.idx(in, m_Ndf, site, ex);
418 m_Ueo.set(iv2, m_Ulex.cmp(iv1));
477 template<
typename AFIELD>
483 int ith, nth, site0, site1;
484 set_threadtask_mult(ith, nth, site0, site1, Nvol);
490 for (
int site = site0; site < site1; ++site) {
491 for (
int is = 0; is < m_Ns; ++is) {
492 for (
int ic = 0; ic <
NC; ++ic) {
493 for (
int id = 0;
id <
ND2; ++id) {
494 for (
int iri = 0; iri < 2; ++iri) {
495 int in_org = iri + 2 * (ic +
NC * id);
496 int in_alt = iri + 2 * (
id +
ND * ic) +
NVCD * is;
497 v.
set(index.idx(in_alt, m_NinF, site, 0),
502 for (
int id =
ND2;
id <
ND; ++id) {
503 for (
int iri = 0; iri < 2; ++iri) {
504 int in_org = iri + 2 * (ic +
NC * id);
505 int in_alt = iri + 2 * (
id +
ND * ic) +
NVCD * is;
506 v.
set(index.idx(in_alt, m_NinF, site, 0),
519 template<
typename AFIELD>
525 int ith, nth, site0, site1;
526 set_threadtask_mult(ith, nth, site0, site1, Nvol);
532 for (
int site = site0; site < site1; ++site) {
533 for (
int is = 0; is < m_Ns; ++is) {
534 for (
int ic = 0; ic <
NC; ++ic) {
535 for (
int id = 0;
id <
ND2; ++id) {
536 for (
int iri = 0; iri < 2; ++iri) {
537 int in_org = iri + 2 * (ic +
NC * id);
538 int in_alt = iri + 2 * (
id +
ND * ic) +
NVCD * is;
539 v.
set(in_org, site, is,
540 double( w.
cmp(index.idx(in_alt, m_NinF, site, 0))));
544 for (
int id =
ND2;
id <
ND; ++id) {
545 for (
int iri = 0; iri < 2; ++iri) {
546 int in_org = iri + 2 * (ic +
NC * id);
547 int in_alt = iri + 2 * (
id +
ND * ic) +
NVCD * is;
548 v.
set(in_org, site, is,
549 double( -w.
cmp(index.idx(in_alt, m_NinF, site, 0))));
561 template<
typename AFIELD>
567 if (ith == 0) m_mode = mode;
574 template<
typename AFIELD>
579 }
else if (m_mode ==
"Ddag") {
581 }
else if (m_mode ==
"DdagD") {
584 vout.
crucial(m_vl,
"mode undeifined in %s.\n", class_name.c_str());
591 template<
typename AFIELD>
597 }
else if (m_mode ==
"Ddag") {
599 }
else if (m_mode ==
"DdagD") {
602 vout.
crucial(m_vl,
"mode undeifined in %s.\n", class_name.c_str());
609 template<
typename AFIELD>
619 }
else if (mode ==
"Doe") {
621 }
else if (mode ==
"Dee") {
623 }
else if (mode ==
"Doo") {
625 }
else if (mode ==
"Dee_inv") {
627 }
else if (mode ==
"Doo_inv") {
630 std::cout <<
"mode undeifined in AFopr_Domainwall_eo.\n";
637 template<
typename AFIELD>
647 }
else if (mode ==
"Doe") {
649 }
else if (mode ==
"Dee") {
651 }
else if (mode ==
"Doo") {
653 }
else if (mode ==
"Dee_inv") {
654 Ddag_ee_inv(v, w, 0);
655 }
else if (mode ==
"Doo_inv") {
656 Ddag_ee_inv(v, w, 1);
658 std::cout <<
"mode undeifined in AFopr_Domainwall_eo.\n";
665 template<
typename AFIELD>
694 template<
typename AFIELD>
711 template<
typename AFIELD>
717 Ldag_inv(m_v2, m_v1);
718 Ddag_eo(m_v1, m_v2, 1);
719 Udag_inv(m_v2, m_v1);
720 Ldag_inv(m_v1, m_v2);
721 Ddag_eo(m_v2, m_v1, 0);
730 template<
typename AFIELD>
735 mult_D_eo(v, w, ieo);
740 template<
typename AFIELD>
745 mult_Ddag_eo(v, w, ieo);
750 template<
typename AFIELD>
758 real_t *up = m_Ueo.ptr(0);
767 m_mq, m_M0, m_Ns, &m_boundary[0],
773 if (do_comm_any > 0) {
774 if (ith == 0) chset_recv.start();
786 buf1_xp, buf1_xm, buf1_yp, buf1_ym,
787 buf1_zp, buf1_zm, buf1_tp, buf1_tm,
789 m_mq, m_M0, m_Ns, &m_boundary[0],
790 &m_Leo[0], m_Nsize, do_comm, ieo);
794 if (ith == 0) chset_send.start();
798 m_mq, m_M0, m_Ns, &m_boundary[0],
800 &m_Leo[0], m_Nsize, do_comm, ieo);
802 if (do_comm_any > 0) {
803 if (ith == 0) chset_recv.wait();
817 buf2_xp, buf2_xm, buf2_yp, buf2_ym,
818 buf2_zp, buf2_zm, buf2_tp, buf2_tm,
819 m_mq, m_M0, m_Ns, &m_boundary[0],
820 &m_Leo[0], m_Nsize, do_comm, ieo);
822 if (ith == 0) chset_send.wait();
830 template<
typename AFIELD>
838 real_t *up = m_Ueo.ptr(0);
850 if (do_comm_any > 0) {
851 if (ith == 0) chset_recv.start();
863 buf1_xp, buf1_xm, buf1_yp, buf1_ym,
864 buf1_zp, buf1_zm, buf1_tp, buf1_tm,
866 m_mq, m_M0, m_Ns, &m_boundary[0],
867 &m_Leo[0], m_Nsize, do_comm, ieo);
871 if (ith == 0) chset_send.start();
875 m_mq, m_M0, m_Ns, &m_boundary[0],
877 &m_Leo[0], m_Nsize, do_comm, ieo);
879 if (do_comm_any > 0) {
880 if (ith == 0) chset_recv.wait();
894 buf2_xp, buf2_xm, buf2_yp, buf2_ym,
895 buf2_zp, buf2_zm, buf2_tp, buf2_tm,
896 m_mq, m_M0, m_Ns, &m_boundary[0],
897 &m_Leo[0], m_Nsize, do_comm, ieo);
899 if (ith == 0) chset_send.wait();
905 m_mq, m_M0, m_Ns, &m_boundary[0],
914 template<
typename AFIELD>
926 template<
typename AFIELD>
938 template<
typename AFIELD>
944 int Nin5 = Nin4 * m_Ns;
949 int ith, nth, site0, site1;
950 set_threadtask_mult(ith, nth, site0, site1, m_Nst2v);
954 for (
int site = site0; site < site1; ++site) {
955 real_t *vp2 = &vp[Nin5 * site];
956 real_t *wp2 = &wp[Nin5 * site];
960 for (
int is = 0; is < m_Ns; ++is) {
961 real_t FF1 = m_b[is] * (4.0 - m_M0) + 1.0;
962 real_t FF2 = m_c[is] * (4.0 - m_M0) - 1.0;
964 load_vec(wL, &wp2[Nin4 * is],
NVCD);
965 set_vec(vL, FF1, wL,
NVCD);
967 int is_up = (is + 1) % m_Ns;
969 if (is == m_Ns - 1) Fup *= -m_mq;
970 load_vec(wL, &wp2[Nin4 * is_up],
NVCD);
971 add_aPm5_dirac_vec(vL, Fup, wL,
NC);
973 int is_dn = (is - 1 + m_Ns) % m_Ns;
975 if (is == 0) Fdn *= -m_mq;
976 load_vec(wL, &wp2[Nin4 * is_dn],
NVCD);
977 add_aPp5_dirac_vec(vL, Fdn, wL,
NC);
979 save_vec(&vp2[Nin4 * is], vL,
NVCD);
988 template<
typename AFIELD>
994 int Nin5 = Nin4 * m_Ns;
999 int ith, nth, site0, site1;
1000 set_threadtask_mult(ith, nth, site0, site1, m_Nst2v);
1004 for (
int site = site0; site < site1; ++site) {
1005 real_t *vp2 = &vp[Nin5 * site];
1006 real_t *wp2 = &wp[Nin5 * site];
1010 for (
int is = 0; is < m_Ns; ++is) {
1011 real_t FF1 = m_b[is] * (4.0 - m_M0) + 1.0;
1012 load_vec(vL, &wp2[Nin4 * is],
NVCD);
1013 scal_vec(vL, FF1,
NVCD);
1015 int is_up = (is + 1) % m_Ns;
1016 real_t Fup = 0.5 * (m_c[is_up] * (4.0 - m_M0) - 1.0);
1017 if (is == m_Ns - 1) Fup *= -m_mq;
1018 load_vec(wL, &wp2[Nin4 * is_up],
NVCD);
1019 add_aPp5_dirac_vec(vL, Fup, wL,
NC);
1021 int is_dn = (is - 1 + m_Ns) % m_Ns;
1022 real_t Fdn = 0.5 * (m_c[is_dn] * (4.0 - m_M0) - 1.0);
1023 if (is == 0) Fdn *= -m_mq;
1024 load_vec(wL, &wp2[Nin4 * is_dn],
NVCD);
1025 add_aPm5_dirac_vec(vL, Fdn, wL,
NC);
1027 save_vec(&vp2[Nin4 * is], vL,
NVCD);
1036 template<
typename AFIELD>
1044 vp, wp, m_Ns, m_Nsize,
1045 &m_e[0], &m_dpinv[0], &m_dm[0]);
1050 template<
typename AFIELD>
1058 vp, wp, m_Ns, m_Nsize,
1059 &m_f[0], &m_dpinv[0], &m_dm[0]);
1064 template<
typename AFIELD>
1072 vp, wp, m_Ns, m_Nsize,
1073 &m_f[0], &m_dpinv[0], &m_dm[0]);
1078 template<
typename AFIELD>
1086 vp, wp, m_Ns, m_Nsize,
1087 &m_e[0], &m_dpinv[0], &m_dm[0]);
1092 template<
typename AFIELD>
1098 double vsite =
static_cast<double>(Lvol);
1099 double vNs =
static_cast<double>(m_Ns);
1101 double flop_Wilson_hop;
1106 if (m_repr ==
"Dirac") {
1107 flop_Wilson_hop =
static_cast<double>(
1108 vNs * Nc * Nd * (6 * (4 * Nc + 2) + 2 * (4 * Nc + 1))) * vsite;
1109 flop_pre =
static_cast<double>(vNs * Nc * Nd * 14) * vsite;
1110 flop_LU_inv =
static_cast<double>(Nc * Nd * (2 + (vNs - 1) * 26)) * vsite;
1111 }
else if (m_repr ==
"Chiral") {
1112 flop_Wilson_hop =
static_cast<double>(
1113 vNs * Nc * Nd * (8 * (4 * Nc + 2))) * vsite;
1114 flop_pre =
static_cast<double>(vNs * Nc * Nd * 6) * vsite;
1115 flop_LU_inv =
static_cast<double>(Nc * Nd * (2 + (vNs - 1) * 10)) * vsite;
1117 double flop_axpy =
static_cast<double>(vNs * 2 * Nc * Nd) * vsite;
1123 double flop_Deo = flop_Wilson_hop + flop_pre;
1130 if ((mode ==
"D") || (mode ==
"Ddag")) {
1131 flop = flop_Deo + flop_LU_inv + 0.5 * flop_axpy;
1132 }
else if (mode ==
"DdagD") {
1133 flop = 2 * flop_Deo + 2 * flop_LU_inv + flop_axpy;
1134 }
else if ((mode ==
"Dee_inv") || (mode ==
"Doo_inv")) {
1135 flop = 0.5 * flop_LU_inv;
1136 }
else if ((mode ==
"Dee") || (mode ==
"Doo")) {
1137 flop = 0.5 * flop_pre;
1138 }
else if ((mode ==
"Doe") || (mode ==
"Doe")) {
1139 flop = 0.5 * flop_Deo;
1141 vout.
crucial(m_vl,
"Error at %s: input mode is undefined: %s.\n",
1142 class_name.c_str(), mode.c_str());