12 template<
typename AFIELD>
14 =
"AFopr_Staggered_eo<AFIELD>";
16 template<
typename AFIELD>
32 vout.
general(m_vl,
"%s: construction\n", class_name.c_str());
42 m_Ndf = 2 * m_Nc * m_Nc;
53 m_Nx2v = m_Nx2 /
VLENX;
55 m_Nst2v = m_Nst2 /
VLEN;
63 if (m_Nx % (2 *
VLENX) != 0) {
64 vout.
crucial(m_vl,
"%s: Nx must be mulriple of 2*VLENX.\n",
68 if (m_Ny % (
VLENY) != 0) {
69 vout.
crucial(m_vl,
"%s: Ny must be multiple of VLENY.\n",
78 m_Leo.resize(m_Ny * m_Nz * m_Nt);
83 for (
int t = 0; t < m_Nt; ++t) {
84 for (
int z = 0; z < m_Nz; ++z) {
85 for (
int y = 0; y < m_Ny; ++y) {
86 int t2 = ipe3 * m_Nt + t;
87 int z2 = ipe2 * m_Nz + z;
88 int y2 = ipe1 * m_Ny + y;
89 m_Leo[y + m_Ny * (z + m_Nz * t)] = (y2 + z2 + t2) % 2;
100 for (
int mu = 0; mu < m_Ndim; ++mu) {
103 do_comm_any += do_comm[mu];
104 vout.
general(m_vl,
" do_comm[%d] = %d\n", mu, do_comm[mu]);
107 m_Nbdsize.resize(m_Ndim);
108 m_Nbdsize[0] = m_Nvc * ((m_Ny * m_Nz * m_Nt + 1) / 2);
109 m_Nbdsize[1] = m_Nvc * m_Nx2 * m_Nz * m_Nt;
110 m_Nbdsize[2] = m_Nvc * m_Nx2 * m_Ny * m_Nt;
111 m_Nbdsize[3] = m_Nvc * m_Nx2 * m_Ny * m_Nz;
116 m_Ueo.reset(m_Ndf, m_Nst, m_Ndim);
117 m_Ulex.reset(m_Ndf, m_Nst, m_Ndim);
120 m_w1.reset(m_Nvc, m_Nst2, 1);
121 m_w2.reset(m_Nvc, m_Nst2, 1);
122 m_v1.reset(m_Nvc, m_Nst2, 1);
123 m_v2.reset(m_Nvc, m_Nst2, 1);
131 set_staggered_phase();
135 set_parameters(params);
147 template<
typename AFIELD>
155 template<
typename AFIELD>
158 chsend_up.resize(m_Ndim);
159 chrecv_up.resize(m_Ndim);
160 chsend_dn.resize(m_Ndim);
161 chrecv_dn.resize(m_Ndim);
163 for (
int mu = 0; mu < m_Ndim; ++mu) {
164 int Nvsize = m_Nbdsize[mu] *
sizeof(
real_t);
166 chsend_dn[mu].send_init(Nvsize, mu, -1);
167 chsend_up[mu].send_init(Nvsize, mu, 1);
169 chrecv_up[mu].recv_init(Nvsize, mu, 1);
170 chrecv_dn[mu].recv_init(Nvsize, mu, -1);
172 void *buf_up = (
void *)chsend_dn[mu].ptr();
173 chrecv_up[mu].recv_init(Nvsize, mu, 1, buf_up);
174 void *buf_dn = (
void *)chsend_up[mu].ptr();
175 chrecv_dn[mu].recv_init(Nvsize, mu, -1, buf_dn);
178 if (do_comm[mu] == 1) {
179 chset_send.append(chsend_up[mu]);
180 chset_send.append(chsend_dn[mu]);
181 chset_recv.append(chrecv_up[mu]);
182 chset_recv.append(chrecv_dn[mu]);
189 template<
typename AFIELD>
192 const string str_vlevel = params.
get_string(
"verbose_level");
203 vout.
crucial(m_vl,
"%s: fetch error, input parameter not found.\n",
208 set_parameters(
real_t(mq), bc);
213 template<
typename AFIELD>
215 const std::vector<int> bc)
217 assert(bc.size() == m_Ndim);
225 m_boundary.resize(m_Ndim);
226 for (
int mu = 0; mu < m_Ndim; ++mu) {
227 m_boundary[mu] = bc[mu];
231 vout.
general(m_vl,
"%s: set parameters\n", class_name.c_str());
233 for (
int mu = 0; mu < m_Ndim; ++mu) {
234 vout.
general(m_vl,
" boundary[%d] = %2d\n", mu, m_boundary[mu]);
242 template<
typename AFIELD>
245 params.
set_double(
"quark_mass",
double(m_mq));
253 template<
typename AFIELD>
261 real_t *stgph = m_stg_phase.ptr(0);
262 real_t *prty = m_parity.ptr(0);
266 for (
int t = 0; t < m_Nt; ++t) {
267 int t2 = t + ipet * m_Nt;
268 for (
int z = 0; z < m_Nz; ++z) {
269 int z2 = z + ipez * m_Nz;
270 for (
int y = 0; y < m_Ny; ++y) {
271 int y2 = y + ipey * m_Ny;
272 for (
int x = 0; x < m_Nx; ++x) {
273 int x2 = x + ipex * m_Nx;
274 int is = index.site(x, y, z, t);
276 stgph[index.idx(0, 1, is, 0)] = 1.0;
277 stgph[index.idx(0, 1, is, 1)] = 1.0;
278 stgph[index.idx(0, 1, is, 2)] = 1.0;
279 stgph[index.idx(0, 1, is, 3)] = 1.0;
281 prty[index.idx(0, 1, is, 0)] = 1.0;
284 stgph[index.idx(0, 1, is, 1)] = -1.0;
286 if (((x2 + y2) % 2) == 1) {
287 stgph[index.idx(0, 1, is, 2)] = -1.0;
289 if (((x2 + y2 + z2) % 2) == 1) {
290 stgph[index.idx(0, 1, is, 3)] = -1.0;
292 if (((x2 + y2 + z2 + t2) % 2) == 1) {
293 prty[index.idx(0, 1, is, 0)] = -1.0;
303 template<
typename AFIELD>
308 vout.
detailed(m_vl,
"%s: set_config is called: num_threads = %d\n",
309 class_name.c_str(), nth);
317 vout.
detailed(m_vl,
"%s: set_config finished\n", class_name.c_str());
322 template<
typename AFIELD>
335 template<
typename AFIELD>
341 real_t *Ueo = m_Ueo.ptr(0);
344 Nsize_lex[0] = m_Nsize[0] * 2;
345 Nsize_lex[1] = m_Nsize[1];
346 Nsize_lex[2] = m_Nsize[2];
347 Nsize_lex[3] = m_Nsize[3];
353 for (
int mu = 0; mu < m_Ndim; ++mu) {
355 real_t *stgph = m_stg_phase.ptr(index_lex.idx(0, 1, 0, mu));
356 real_t *up = m_Ulex.ptr(index_lex.idx_G(0, 0, mu));
361 int ith, nth, is, ns;
362 set_threadtask_mult(ith, nth, is, ns, m_Nst);
364 for (
int ex = 0; ex < m_Ndim; ++ex) {
365 for (
int site = is; site < ns; ++site) {
366 for (
int in = 0; in < m_Ndf; ++in) {
367 int iv1 = index_lex.idx(in, m_Ndf, site, ex);
368 int iv2 = index_eo.idx(in, m_Ndf, site, ex);
369 Ueo[iv2] = m_Ulex.cmp(iv1);
379 template<
typename AFIELD>
389 template<
typename AFIELD>
393 scal(v, m_mq * m_mq);
399 template<
typename AFIELD>
405 if (ith == 0) m_mode = mode;
412 template<
typename AFIELD>
417 }
else if (m_mode ==
"Ddag") {
419 }
else if (m_mode ==
"DdagD") {
424 vout.
crucial(m_vl,
"%s: mode undeifined.\n", class_name.c_str());
431 template<
typename AFIELD>
436 }
else if (m_mode ==
"Ddag") {
438 }
else if (m_mode ==
"DdagD") {
443 vout.
crucial(m_vl,
"%s: mode undeifined.\n", class_name.c_str());
450 template<
typename AFIELD>
452 const std::string mode)
459 }
else if (mode ==
"Doo") {
462 }
else if (mode ==
"Dee_inv") {
465 }
else if (mode ==
"Doo_inv") {
468 }
else if (mode ==
"Deo") {
470 }
else if (mode ==
"Doe") {
473 vout.
crucial(m_vl,
"%s: illegal mode is given to mult with mode\n",
483 template<
typename AFIELD>
485 const std::string mode)
492 }
else if (mode ==
"Doo") {
495 }
else if (mode ==
"Dee_inv") {
498 }
else if (mode ==
"Doo_inv") {
501 }
else if (mode ==
"Deo") {
504 }
else if (mode ==
"Doe") {
508 vout.
crucial(m_vl,
"%s: illegal mode is given to mult with mode\n",
518 template<
typename AFIELD>
527 template<
typename AFIELD>
538 template<
typename AFIELD>
544 Meo(m_v1, w, w, 1, 0);
545 Meo(v, m_v1, w, 0, 0);
550 axpy(v, m_mq * m_mq, w);
556 template<
typename AFIELD>
562 Meo(m_v1, w, w, 1, 0);
563 Meo(v, m_v1, w, 0, 0);
566 axpy(v, m_mq * m_mq, w);
572 template<
typename AFIELD>
576 mult_Meo_qxs(v, w, w, ieo, 0);
582 template<
typename AFIELD>
587 mult_Meo_qxs(v, w, x, ieo, iflag);
593 template<
typename AFIELD>
608 if (do_comm_any > 0) {
609 if (ith == 0) chset_recv.start();
624 buf1zp, buf1zm, buf1tp, buf1tm,
626 m_Nsize, do_comm, &m_Leo[0], ieo);
630 if (ith == 0) chset_send.start();
636 &m_Leo[0], ieo, iflag);
638 if (do_comm_any > 0) {
639 if (ith == 0) chset_recv.wait();
656 buf2xp, buf2xm, buf2yp, buf2ym,
657 buf2zp, buf2zm, buf2tp, buf2tm,
658 m_mq, m_Nsize, do_comm,
659 &m_Leo[0], ieo, iflag);
661 if (ith == 0) chset_send.wait();
669 template<
typename AFIELD>
684 for (
int mu = 0; mu < m_Ndim; ++mu) {
685 mult_up(mu, v, w, ieo);
686 mult_dn(mu, v, w, ieo);
693 real_t fac = -1.0 / (m_mq * m_mq);
694 axpby(fac, vp,
real_t(1.0), xp);
702 template<
typename AFIELD>
722 template<
typename AFIELD>
725 real_t *ph = m_parity.ptr(0);
735 template<
typename AFIELD>
743 template<
typename AFIELD>
752 template<
typename AFIELD>
760 template<
typename AFIELD>
768 template<
typename AFIELD>
773 m_shift->backward(m_w1, w, idir, ieo);
776 real_t *up = m_Ueo.ptr(
NDF * m_Nst2 * (ieo + 2 * idir));
790 template<
typename AFIELD>
796 real_t *up = m_Ueo.
ptr(
NDF * m_Nst2 * (1 - ieo + 2 * idir));
805 m_shift->forward(m_w2, m_w1, idir, ieo);
813 template<
typename AFIELD>
816 return flop_count(m_mode);
821 template<
typename AFIELD>
829 double flop_site, flop;
831 flop_site =
static_cast<double>(m_Nvc * (2 + 8 * 2 * m_Nvc));
834 flop = flop_site *
static_cast<double>(Lvol);
836 if ((mode ==
"DdagD") || (mode ==
"DDdag")) flop *= 2.0;