13 template<
typename AFIELD>
17 template<
typename AFIELD>
32 vout.
general(m_vl,
"%s: construction\n", class_name.c_str());
38 if (repr !=
"Dirac") {
39 vout.
crucial(
" Error at %s: unsupported gamma-matrix type: %s\n",
40 class_name.c_str(), repr.c_str());
55 m_Ndf = 2 * m_Nc * m_Nc;
66 m_Nx2v = m_Nx2 /
VLENX;
68 m_Nst2v = m_Nst2 /
VLEN;
76 if (m_Nx % (2 *
VLENX) != 0) {
77 vout.
crucial(m_vl,
"%s: Nx must be mulriple of 2*VLEN.\n",
81 if (m_Ny % (
VLENY) != 0) {
82 vout.
crucial(m_vl,
"%s: Ny must be multiple of VLENY.\n",
91 m_Leo.resize(m_Ny * m_Nz * m_Nt);
96 for (
int t = 0; t < m_Nt; ++t) {
97 for (
int z = 0; z < m_Nz; ++z) {
98 for (
int y = 0; y < m_Ny; ++y) {
99 int t2 = ipe3 * m_Nt + t;
100 int z2 = ipe2 * m_Nz + z;
101 int y2 = ipe1 * m_Ny + y;
102 m_Leo[y + m_Ny * (z + m_Nz * t)] = (y2 + z2 + t2) % 2;
113 for (
int mu = 0; mu < m_Ndim; ++mu) {
116 do_comm_any += do_comm[mu];
117 vout.
general(
" do_comm[%d] = %d\n", mu, do_comm[mu]);
121 m_bdsize.resize(m_Ndim);
123 m_bdsize[0] = m_Nvc * Nd2 * ((m_Ny * m_Nz * m_Nt + 1) / 2);
124 m_bdsize[1] = m_Nvc * Nd2 * m_Nx2 * m_Nz * m_Nt;
125 m_bdsize[2] = m_Nvc * Nd2 * m_Nx2 * m_Ny * m_Nt;
126 m_bdsize[3] = m_Nvc * Nd2 * m_Nx2 * m_Ny * m_Nz;
131 m_Ueo.reset(m_Ndf, m_Nst, m_Ndim);
132 m_Ulex.reset(m_Ndf, m_Nst, m_Ndim);
134 int Ndm2 = m_Nd * m_Nd / 2;
140 #ifdef CHIRAL_ROTATION
141 m_Te_inv.reset(
NDF * Ndm2 / 2, m_Nst2, 1);
142 m_To_inv.reset(
NDF * Ndm2 / 2, m_Nst2, 1);
143 params_ct.
set_string(
"gamma_matrix_type",
"Chiral");
146 m_Te_inv.reset(
NDF * Ndm2, m_Nst2, 1);
147 m_To_inv.reset(
NDF * Ndm2, m_Nst2, 1);
151 set_parameters(params);
156 int NinF = 2 * m_Nc * m_Nd;
157 m_v1.reset(NinF, m_Nst2, 1);
158 m_v2.reset(NinF, m_Nst2, 1);
160 m_z1.reset(NinF, m_Nst, 1);
168 template<
typename AFIELD>
171 chsend_up.resize(m_Ndim);
172 chrecv_up.resize(m_Ndim);
173 chsend_dn.resize(m_Ndim);
174 chrecv_dn.resize(m_Ndim);
176 for (
int mu = 0; mu < m_Ndim; ++mu) {
177 size_t Nvsize = m_bdsize[mu] *
sizeof(
real_t);
179 chsend_dn[mu].send_init(Nvsize, mu, -1);
180 chsend_up[mu].send_init(Nvsize, mu, 1);
182 chrecv_up[mu].recv_init(Nvsize, mu, 1);
183 chrecv_dn[mu].recv_init(Nvsize, mu, -1);
185 void *buf_up = (
void *)chsend_dn[mu].ptr();
186 chrecv_up[mu].recv_init(Nvsize, mu, 1, buf_up);
187 void *buf_dn = (
void *)chsend_up[mu].ptr();
188 chrecv_dn[mu].recv_init(Nvsize, mu, -1, buf_dn);
191 if (do_comm[mu] == 1) {
192 chset_send.append(chsend_up[mu]);
193 chset_send.append(chsend_dn[mu]);
194 chset_recv.append(chrecv_up[mu]);
195 chset_recv.append(chrecv_dn[mu]);
202 template<
typename AFIELD>
212 template<
typename AFIELD>
215 const string str_vlevel = params.
get_string(
"verbose_level");
227 vout.
crucial(m_vl,
"Error at %s: input parameter not found.\n",
232 set_parameters(
real_t(kappa), csw, bc);
235 #ifdef CHIRAL_ROTATION
236 params_ct.
set_string(
"gamma_matrix_type",
"Chiral");
238 m_fopr_ct->set_parameters(params_ct);
243 template<
typename AFIELD>
246 const std::vector<int> bc)
248 assert(bc.size() == m_Ndim);
257 m_boundary.resize(m_Ndim);
258 for (
int mu = 0; mu < m_Ndim; ++mu) {
259 m_boundary[mu] = bc[mu];
263 vout.
general(m_vl,
"%s: set parameters\n", class_name.c_str());
264 vout.
general(m_vl,
" gamma-matrix type = %s\n", m_repr.c_str());
267 for (
int mu = 0; mu < m_Ndim; ++mu) {
268 vout.
general(m_vl,
" boundary[%d] = %2d\n", mu, m_boundary[mu]);
276 template<
typename AFIELD>
279 params.
set_double(
"hopping_parameter",
double(m_CKs));
280 params.
set_double(
"clover_coefficient",
double(m_csw));
282 params.
set_string(
"gamma_matrix_type", m_repr);
289 template<
typename AFIELD>
294 vout.
detailed(m_vl,
"%s: set_config is called: num_threads = %d\n",
295 class_name.c_str(), nth);
303 vout.
detailed(m_vl,
"%s: set_config finished\n", class_name.c_str());
308 template<
typename AFIELD>
321 template<
typename AFIELD>
326 int ith, nth, is, ns;
329 if (ith == 0) m_conf = u;
340 set_threadtask_mult(ith, nth, is, ns, m_Nst);
342 for (
int ex = 0; ex < m_Ndim; ++ex) {
343 for (
int site = is; site < ns; ++site) {
344 for (
int in = 0; in < m_Ndf; ++in) {
345 int iv1 = index_lex2.idx(in, m_Ndf, site, ex);
346 int iv2 = index_eo.idx(in, m_Ndf, site, ex);
347 m_Ueo.e(iv2) = m_Ulex.cmp(iv1);
353 m_fopr_ct->set_config(u);
355 #ifdef CHIRAL_ROTATION
356 set_csw_chrot(m_Te_inv, 0);
357 set_csw_chrot(m_To_inv, 1);
367 template<
typename AFIELD>
370 vout.
crucial(m_vl,
"%s: set_csw() has not been implemented.\n",
377 template<
typename AFIELD>
392 const int Nin =
NDF *
ND;
394 m_fopr_ct->set_mode(
"D");
396 int ith, nth, is, ns;
397 set_threadtask_mult(ith, nth, is, ns, m_Nst2);
400 constexpr
int idx[72] = {
401 0, -1, 6, 7, 16, 17, 28, 29, 24, 25, 34, 35,
402 -1, -1, 1, -1, 12, 13, 18, 19, 32, 33, 26, 27,
403 -1, -1, -1, -5, 2, -1, 8, 9, 20, 21, 30, 31,
404 -1, -1, -1, -1, -1, -1, 3, -1, 14, 15, 22, 23,
405 -1, -1, -1, -1, -1, -1, -1, -1, 4, -1, 10, 11,
406 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 5, -1,
410 for (
int id = 0;
id < m_Nd / 2; ++id) {
411 for (
int ic = 0; ic < m_Nc; ++ic) {
415 for (
int site = is; site < ns; ++site) {
416 m_w1.set_r(ic,
id, site, 0, 1.0);
417 m_w1.set_r(ic,
id + 2, site, 0, 1.0);
421 m_fopr_ct->mult_csw_inv(m_w2, m_w1, ieo);
424 for (
int site = is; site < ns; ++site) {
425 for (
int id2 = 0; id2 < m_Nd; ++id2) {
426 for (
int ic2 = 0; ic2 < m_Nc; ++ic2) {
427 real_t vt_r = 0.5 * m_w2.cmp_r(ic2, id2, site, 0);
428 real_t vt_i = 0.5 * m_w2.cmp_i(ic2, id2, site, 0);
429 int i = ic2 + m_Nc * (id2 % 2);
430 int j = ic + m_Nc * id;
431 int ij = m_Nc * 2 * i + j;
432 int in_r =
idx[2 * ij];
433 int in_i =
idx[2 * ij + 1];
435 in_r += 36 * (id2 / 2);
436 int idx_r = index.idxh(in_r, Nin, site, 0);
437 m_T_inv.
set(idx_r, vt_r);
440 in_i += 36 * (id2 / 2);
441 int idx_i = index.idxh(in_i, Nin, site, 0);
442 m_T_inv.
set(idx_i, vt_i);
457 template<
typename AFIELD>
468 template<
typename AFIELD>
479 template<
typename AFIELD>
485 if (ith == 0) m_mode = mode;
492 template<
typename AFIELD>
497 }
else if (m_mode ==
"DdagD") {
499 }
else if (m_mode ==
"Ddag") {
502 vout.
crucial(m_vl,
"%s: mode undefined.\n", class_name.c_str());
509 template<
typename AFIELD>
514 }
else if (m_mode ==
"DdagD") {
516 }
else if (m_mode ==
"Ddag") {
519 vout.
crucial(m_vl,
"%s: mode undefined.\n", class_name.c_str());
526 template<
typename AFIELD>
544 template<
typename AFIELD>
548 if (Nst != v.
nvol()) {
549 vout.
crucial(m_vl,
"%s: sizes of fields inconsisutent in mult_gm4.\n",
557 int ith, nth, is, ns;
558 set_threadtask_mult(ith, nth, is, ns, m_Nst);
563 for (
int site = is; site < ns; ++site) {
564 for (
int ic = 0; ic <
NC; ++ic) {
565 vp[m_index.idxh_SPr(ic, 0, site, 0)] = wp[m_index.idxh_SPr(ic, 0, site, 0)];
566 vp[m_index.idxh_SPi(ic, 0, site, 0)] = wp[m_index.idxh_SPi(ic, 0, site, 0)];
567 vp[m_index.idxh_SPr(ic, 1, site, 0)] = wp[m_index.idxh_SPr(ic, 1, site, 0)];
568 vp[m_index.idxh_SPi(ic, 1, site, 0)] = wp[m_index.idxh_SPi(ic, 1, site, 0)];
569 vp[m_index.idxh_SPr(ic, 2, site, 0)] = -wp[m_index.idxh_SPr(ic, 2, site, 0)];
570 vp[m_index.idxh_SPi(ic, 2, site, 0)] = -wp[m_index.idxh_SPi(ic, 2, site, 0)];
571 vp[m_index.idxh_SPr(ic, 3, site, 0)] = -wp[m_index.idxh_SPr(ic, 3, site, 0)];
572 vp[m_index.idxh_SPi(ic, 3, site, 0)] = -wp[m_index.idxh_SPi(ic, 3, site, 0)];
581 template<
typename AFIELD>
583 const std::string mode)
585 if (mode ==
"Dee_inv") {
586 mult_cswinv(v, w, 0);
587 }
else if (mode ==
"Doo_inv") {
588 mult_cswinv(v, w, 1);
589 }
else if (mode ==
"Deo") {
591 }
else if (mode ==
"Doe") {
594 vout.
crucial(m_vl,
"%s: illegal mode is given to mult with mode\n",
602 template<
typename AFIELD>
611 template<
typename AFIELD>
617 mult_cswinv(v, m_v1, 0);
619 mult_cswinv(v, m_v1, 1);
627 template<
typename AFIELD>
632 mult_cswinv(v, m_v1, 1);
634 mult_cswinv(v, m_v1, 0);
640 template<
typename AFIELD>
653 template<
typename AFIELD>
657 mult_Meo_qxs(v, w, w, ieo, 0);
662 template<
typename AFIELD>
665 const int ieo,
const int iflag)
667 mult_Meo_qxs(v, w, x, ieo, iflag);
672 template<
typename AFIELD>
675 const int ieo,
const int iflag)
687 if (do_comm_any > 0) {
688 if (ith == 0) chset_recv.start();
699 buf1_zp, buf1_zm, buf1_tp, buf1_tm,
700 up, v1, &m_boundary[0],
701 m_Nsize, do_comm, &m_Leo[0], ieo, iflag);
705 if (ith == 0) chset_send.start();
709 m_Nsize, do_comm, &m_Leo[0], ieo, iflag);
711 if (do_comm_any > 0) {
712 if (ith == 0) chset_recv.wait();
726 buf2_xp, buf2_xm, buf2_yp, buf2_ym,
727 buf2_zp, buf2_zm, buf2_tp, buf2_tm,
728 m_CKs, &m_boundary[0],
729 m_Nsize, do_comm, &m_Leo[0], ieo, iflag);
731 if (ith == 0) chset_send.wait();
739 template<
typename AFIELD>
747 ct = m_Te_inv.
ptr(0);
749 ct = m_To_inv.ptr(0);
754 #ifdef CHIRAL_ROTATION
763 template<
typename AFIELD>
767 int Ncd = m_Nvc * m_Nd;
771 int ith, nth, is, ns;
772 set_threadtask(ith, nth, is, ns, m_Nst2v);
774 for (
int site = is; site < ns; ++site) {
775 real_t *__restrict v2 = v +
VLEN * Ncd * site;
776 real_t *__restrict w2 = w +
VLEN * Ncd * site;
777 for (
int icd = 0; icd < Ncd / 2; ++icd) {
780 load_vec(pg, wtr, &w2[2 *
VLEN * icd]);
781 load_vec(pg, wti, &w2[2 *
VLEN * icd +
VLEN]);
782 load_vec(pg, vtr, &v2[2 *
VLEN * icd]);
783 load_vec(pg, vti, &v2[2 *
VLEN * icd +
VLEN]);
784 aypx_vec(pg, a, vtr, wtr);
785 aypx_vec(pg, a, vti, wti);
786 save_vec(pg, &v2[2 *
VLEN * icd], vtr);
787 save_vec(pg, &v2[2 *
VLEN * icd +
VLEN], vti);
791 int ith, nth, is, ns;
792 set_threadtask_mult(ith, nth, is, ns, m_Nst2);
793 for (
int i = is; i < ns; ++i) {
794 for (
int icd = 0; icd < Ncd; ++icd) {
795 int i2 = icd + Ncd * i;
796 v[i2] = a * v[i2] + w[i2];
806 template<
typename AFIELD>
809 int ith, nth, is, ns;
810 set_threadtask_mult(ith, nth, is, ns, m_Nst2);
813 int Ncd = m_Nvc * m_Nd;
815 for (
int i = is; i < ns; ++i) {
816 for (
int icd = 0; icd < Ncd; ++icd) {
817 v[icd + Ncd * i] = zero;
826 template<
typename AFIELD>
829 int ith, nth, is, ns;
830 set_threadtask_mult(ith, nth, is, ns, m_Nst2);
832 int Ncd = m_Nvc * m_Nd;
834 for (
int i = is; i < ns; ++i) {
835 for (
int icd = 0; icd < Ncd; ++icd) {
836 v[icd + Ncd * i] *= a;
845 template<
typename AFIELD>
853 double flop_wilson, flop_clover, flop_site, flop;
855 if (m_repr ==
"Dirac") {
856 flop_wilson =
static_cast<double>(
859 + 2 * (4 * m_Nc + 1)));
862 flop_clover =
static_cast<double>(
864 + 2 * (2 * (m_Nc * m_Nd - 1) + 1)
867 }
else if (m_repr ==
"Chiral") {
868 flop_wilson =
static_cast<double>(
869 m_Nc * m_Nd * (4 + 8 * (4 * m_Nc + 2)));
871 flop_clover =
static_cast<double>(
872 m_Nc * m_Nd * (2 * (2 * (m_Nc * m_Nd - 1) + 1)
875 vout.
crucial(m_vl,
"%s: input repr is undefined.\n");
879 flop_site = flop_wilson + flop_clover;
881 flop = flop_site *
static_cast<double>(Lvol);
882 if ((mode ==
"DdagD") || (mode ==
"DDdag")) flop *= 2.0;