10 template<
typename AFIELD>
12 =
"AFopr_Domainwall_5din";
14 template<
typename AFIELD>
29 vout.
general(m_vl,
"%s: construction\n", class_name.c_str());
35 if (repr !=
"Dirac") {
36 vout.
crucial(
" Error at %s: unsupported gamma-matrix type: %s\n",
37 class_name.c_str(), repr.c_str());
62 m_Nstv = m_Nvol /
VLEN;
64 if (
VLENX * m_Nxv != m_Nx) {
65 vout.
crucial(m_vl,
"%s: Nx must be multiple of VLENX.\n",
69 if (
VLENY * m_Nyv != m_Ny) {
70 vout.
crucial(m_vl,
"%s: Ny must be multiple of VLENY.\n",
85 for (
int mu = 0; mu < m_Ndim; ++mu) {
88 do_comm_any += do_comm[mu];
89 vout.
general(
" do_comm[%d] = %d\n", mu, do_comm[mu]);
95 set_parameters(params);
100 m_U.reset(m_Ndf, m_Nvol, m_Ndim);
102 m_Nbdsize.resize(m_Ndim);
103 int Nvst = (m_Nvcd / 2) * m_Ns * m_Nvol;
104 m_Nbdsize[0] = Nvst / m_Nx;
105 m_Nbdsize[1] = Nvst / m_Ny;
106 m_Nbdsize[2] = Nvst / m_Nz;
107 m_Nbdsize[3] = Nvst / m_Nt;
117 template<
typename AFIELD>
125 template<
typename AFIELD>
128 chsend_up.resize(m_Ndim);
129 chrecv_up.resize(m_Ndim);
130 chsend_dn.resize(m_Ndim);
131 chrecv_dn.resize(m_Ndim);
133 for (
int mu = 0; mu < m_Ndim; ++mu) {
134 size_t Nvsize = m_Nbdsize[mu] *
sizeof(
real_t);
136 chsend_dn[mu].send_init(Nvsize, mu, -1);
137 chsend_up[mu].send_init(Nvsize, mu, 1);
139 chrecv_up[mu].recv_init(Nvsize, mu, 1);
140 chrecv_dn[mu].recv_init(Nvsize, mu, -1);
142 void *buf_up = (
void *)chsend_dn[mu].ptr();
143 chrecv_up[mu].recv_init(Nvsize, mu, 1, buf_up);
144 void *buf_dn = (
void *)chsend_up[mu].ptr();
145 chrecv_dn[mu].recv_init(Nvsize, mu, -1, buf_dn);
148 if (do_comm[mu] == 1) {
149 chset_send.append(chsend_up[mu]);
150 chset_send.append(chsend_dn[mu]);
151 chset_recv.append(chrecv_up[mu]);
152 chset_recv.append(chrecv_dn[mu]);
159 template<
typename AFIELD>
169 int err_optional = 0;
170 err_optional += params.
fetch_string(
"gamma_matrix_type", gmset_type);
172 if (gmset_type != m_repr) {
173 vout.
crucial(m_vl,
"%s: unsupported gamma_matrix_type: %s\n",
174 class_name.c_str(), gmset_type.c_str());
182 err += params.
fetch_int(
"extent_of_5th_dimension", Ns);
188 vout.
crucial(m_vl,
"Error at %s: input parameter not found.\n",
193 set_parameters(mq, M0, Ns, bc, b, c);
198 template<
typename AFIELD>
203 const vector<int> bc,
207 assert(bc.size() == m_Ndim);
217 m_NinF = m_Nvcd * m_Ns;
219 if (m_boundary.size() != m_Ndim) m_boundary.resize(m_Ndim);
220 for (
int mu = 0; mu < m_Ndim; ++mu) {
221 m_boundary[mu] = bc[mu];
224 if (m_b.size() != m_Ns) {
228 for (
int is = 0; is < m_Ns; ++is) {
234 vout.
general(m_vl,
"Parameters of %s:\n", class_name.c_str());
238 for (
int mu = 0; mu < m_Ndim; ++mu) {
239 vout.
general(m_vl,
" boundary[%d] = %2d\n", mu, m_boundary[mu]);
241 vout.
general(m_vl,
" coefficients b = %16.10f c = %16.10f\n",
244 set_precond_parameters();
247 if (m_w1.nin() != m_NinF) {
249 m_w1.reset(m_NinF, m_Nvol, 1);
250 m_v1.reset(m_NinF, m_Nvol, 1);
251 m_v2.reset(m_NinF, m_Nvol, 1);
260 template<
typename AFIELD>
267 if (m_dp.size() != m_Ns) {
270 m_dpinv.resize(m_Ns);
271 m_e.resize(m_Ns - 1);
272 m_f.resize(m_Ns - 1);
275 for (
int is = 0; is < m_Ns; ++is) {
276 m_dp[is] = 1.0 + m_b[is] * (4.0 - m_M0);
277 m_dm[is] = 1.0 - m_c[is] * (4.0 - m_M0);
280 m_e[0] = m_mq * m_dm[m_Ns - 1] / m_dp[0];
281 m_f[0] = m_mq * m_dm[0];
282 for (
int is = 1; is < m_Ns - 1; ++is) {
283 m_e[is] = m_e[is - 1] * m_dm[is - 1] / m_dp[is];
284 m_f[is] = m_f[is - 1] * m_dm[is] / m_dp[is - 1];
287 m_g = m_e[m_Ns - 2] * m_dm[m_Ns - 2];
289 for (
int is = 0; is < m_Ns - 1; ++is) {
290 m_dpinv[is] = 1.0 / m_dp[is];
292 m_dpinv[m_Ns - 1] = 1.0 / (m_dp[m_Ns - 1] + m_g);
300 template<
typename AFIELD>
302 const std::vector<double> vec_b,
303 const std::vector<double> vec_c)
307 if ((vec_b.size() != m_Ns) || (vec_c.size() != m_Ns)) {
308 vout.
crucial(m_vl,
"%s: size of coefficient vectors incorrect.\n",
312 vout.
general(m_vl,
"%s: coefficient vectors are set:\n",
318 for (
int is = 0; is < m_Ns; ++is) {
319 m_b[is] =
real_t(vec_b[is]);
320 m_c[is] =
real_t(vec_c[is]);
326 for (
int is = 0; is < m_Ns; ++is) {
327 vout.
general(m_vl,
"b[%2d] = %16.10f c[%2d] = %16.10f\n",
328 is, m_b[is], is, m_c[is]);
331 set_precond_parameters();
336 template<
typename AFIELD>
341 vout.
detailed(m_vl,
"%s: set_config is called: num_threads = %d\n",
342 class_name.c_str(), nth);
350 vout.
detailed(m_vl,
"%s: set_config finished\n", class_name.c_str());
355 template<
typename AFIELD>
368 template<
typename AFIELD>
383 template<
typename AFIELD>
389 int ith, nth, isite, nsite;
390 set_threadtask_mult(ith, nth, isite, nsite, m_Nvol);
394 for (
int site = isite; site < nsite; ++site) {
395 for (
int is = 0; is < m_Ns; ++is) {
396 for (
int ic = 0; ic <
NC; ++ic) {
397 for (
int id = 0;
id <
ND2; ++id) {
398 for (
int iri = 0; iri < 2; ++iri) {
399 int in_org = iri + 2 * (ic +
NC * id);
400 int in_alt = iri + 2 * (
id +
ND * ic) +
NVCD * is;
401 v.
set(index.idx(in_alt, m_NinF, site, 0),
406 for (
int id =
ND2;
id <
ND; ++id) {
407 for (
int iri = 0; iri < 2; ++iri) {
408 int in_org = iri + 2 * (ic +
NC * id);
409 int in_alt = iri + 2 * (
id +
ND * ic) +
NVCD * is;
410 v.
set(index.idx(in_alt, m_NinF, site, 0),
423 template<
typename AFIELD>
429 int ith, nth, isite, nsite;
430 set_threadtask_mult(ith, nth, isite, nsite, m_Nvol);
434 for (
int site = isite; site < nsite; ++site) {
435 for (
int is = 0; is < m_Ns; ++is) {
436 for (
int ic = 0; ic <
NC; ++ic) {
437 for (
int id = 0;
id <
ND2; ++id) {
438 for (
int iri = 0; iri < 2; ++iri) {
439 int in_org = iri + 2 * (ic +
NC * id);
440 int in_alt = iri + 2 * (
id +
ND * ic) +
NVCD * is;
441 v.
set(in_org, site, is,
442 double( w.
cmp(index.idx(in_alt, m_NinF, site, 0))));
446 for (
int id =
ND2;
id <
ND; ++id) {
447 for (
int iri = 0; iri < 2; ++iri) {
448 int in_org = iri + 2 * (ic +
NC * id);
449 int in_alt = iri + 2 * (
id +
ND * ic) +
NVCD * is;
450 v.
set(in_org, site, is,
451 double( -w.
cmp(index.idx(in_alt, m_NinF, site, 0))));
463 template<
typename AFIELD>
469 if (ith == 0) m_mode = mode;
476 template<
typename AFIELD>
485 }
else if (mode ==
"Ddag") {
487 }
else if (mode ==
"DdagD") {
489 }
else if (mode ==
"D_prec") {
491 }
else if (mode ==
"Ddag_prec") {
493 }
else if (mode ==
"DdagD_prec") {
495 }
else if (mode ==
"Prec") {
497 }
else if (mode ==
"Precdag") {
501 class_name.c_str(), mode.c_str());
508 template<
typename AFIELD>
518 }
else if (mode ==
"Ddag") {
520 }
else if (mode ==
"DdagD") {
522 }
else if (mode ==
"D_prec") {
524 }
else if (mode ==
"Ddag_prec") {
526 }
else if (mode ==
"DdagD_prec") {
528 }
else if (mode ==
"Prec") {
530 }
else if (mode ==
"Precdag") {
533 std::cout <<
"mode undeifined in AFopr_Domainwall_5din.\n";
540 template<
typename AFIELD>
555 template<
typename AFIELD>
564 template<
typename AFIELD>
574 template<
typename AFIELD>
586 template<
typename AFIELD>
597 template<
typename AFIELD>
608 template<
typename AFIELD>
619 template<
typename AFIELD>
627 template<
typename AFIELD>
635 template<
typename AFIELD>
639 int Nin5 = Nin4 * m_Ns;
651 m_mq, m_M0, m_Ns, &m_boundary[0],
657 if (do_comm_any > 0) {
658 if (ith == 0) chset_recv.start();
670 buf1_xp, buf1_xm, buf1_yp, buf1_ym,
671 buf1_zp, buf1_zm, buf1_tp, buf1_tm,
673 m_mq, m_M0, m_Ns, &m_boundary[0],
678 if (ith == 0) chset_send.start();
682 m_mq, m_M0, m_Ns, &m_boundary[0],
686 if (do_comm_any > 0) {
687 if (ith == 0) chset_recv.wait();
701 buf2_xp, buf2_xm, buf2_yp, buf2_ym,
702 buf2_zp, buf2_zm, buf2_tp, buf2_tm,
703 m_mq, m_M0, m_Ns, &m_boundary[0],
706 if (ith == 0) chset_send.wait();
714 template<
typename AFIELD>
728 if (do_comm_any > 0) {
729 if (ith == 0) chset_recv.start();
741 buf1_xp, buf1_xm, buf1_yp, buf1_ym,
742 buf1_zp, buf1_zm, buf1_tp, buf1_tm,
744 m_mq, m_M0, m_Ns, &m_boundary[0],
749 if (ith == 0) chset_send.start();
755 m_mq, m_M0, m_Ns, &m_boundary[0],
759 if (do_comm_any > 0) {
760 if (ith == 0) chset_recv.wait();
774 buf2_xp, buf2_xm, buf2_yp, buf2_ym,
775 buf2_zp, buf2_zm, buf2_tp, buf2_tm,
776 m_mq, m_M0, m_Ns, &m_boundary[0],
779 if (ith == 0) chset_send.wait();
785 m_mq, m_M0, m_Ns, &m_boundary[0],
794 template<
typename AFIELD>
802 &m_e[0], &m_dpinv[0], &m_dm[0]);
807 template<
typename AFIELD>
814 vp, wp, m_Ns, m_Nsize,
815 &m_f[0], &m_dpinv[0], &m_dm[0]);
820 template<
typename AFIELD>
828 vp, wp, m_Ns, m_Nsize,
829 &m_e[0], &m_dpinv[0], &m_dm[0]);
834 template<
typename AFIELD>
842 vp, wp, m_Ns, m_Nsize,
843 &m_f[0], &m_dpinv[0], &m_dm[0]);
848 template<
typename AFIELD>
852 double vsite =
static_cast<double>(Lvol);
853 double vNs =
static_cast<double>(m_Ns);
859 double axpy1 =
static_cast<double>(2 * m_NinF);
860 double scal1 =
static_cast<double>(1 * m_NinF);
861 if (m_repr ==
"Dirac") {
862 flop_Wilson =
static_cast<double>(
863 Nc * Nd * (4 + 6 * (4 * Nc + 2) + 2 * (4 * Nc + 1))) * vsite;
864 flop_LU_inv =
static_cast<double>(Nc * Nd * (2 + (vNs - 1) * 26)) * vsite;
865 }
else if (m_repr ==
"Chiral") {
866 flop_Wilson =
static_cast<double>(
867 Nc * Nd * (4 + 8 * (4 * Nc + 2))) * vsite;
868 flop_LU_inv =
static_cast<double>(Nc * Nd * (2 + (vNs - 1) * 10)) * vsite;
872 double flop_DW = vNs * flop_Wilson + vsite * (6 * axpy1 + 2 * scal1);
879 if (mode ==
"Prec") {
881 }
else if ((mode ==
"D") || (mode ==
"Ddag")) {
883 }
else if (mode ==
"DdagD") {
884 flop = 2.0 * flop_DW;
885 }
else if ((mode ==
"D_prec") || (mode ==
"Ddag_prec")) {
886 flop = flop_LU_inv + flop_DW;
887 }
else if (mode ==
"DdagD_prec") {
888 flop = 2.0 * (flop_LU_inv + flop_DW);
890 vout.
crucial(m_vl,
"Error at %s: input mode is undefined.\n",