12 template<
typename AFIELD>
14 =
"AFopr_Staggered<AFIELD>";
16 template<
typename AFIELD>
32 vout.
general(m_vl,
"%s: construction\n", class_name.c_str());
43 m_Ndf = 2 * m_Nc * m_Nc;
52 m_Nstv = m_Nst /
VLEN;
54 if (
VLENX * m_Nxv != m_Nx) {
55 vout.
crucial(m_vl,
"%s: Nx must be multiple of VLENX.\n",
59 if (
VLENY * m_Nyv != m_Ny) {
60 vout.
crucial(m_vl,
"%s: Ny must be multiple of VLENY.\n",
75 for (
int mu = 0; mu < m_Ndim; ++mu) {
78 do_comm_any += do_comm[mu];
79 vout.
general(m_vl,
" do_comm[%d] = %d\n", mu, do_comm[mu]);
82 m_Nbdsize.resize(m_Ndim);
83 m_Nbdsize[0] = m_Nvc * m_Ny * m_Nz * m_Nt;
84 m_Nbdsize[1] = m_Nvc * m_Nx * m_Nz * m_Nt;
85 m_Nbdsize[2] = m_Nvc * m_Nx * m_Ny * m_Nt;
86 m_Nbdsize[3] = m_Nvc * m_Nx * m_Ny * m_Nz;
91 m_U.reset(m_Ndf, m_Nst, m_Ndim);
94 m_w1.reset(m_Nvc, m_Nst, 1);
95 m_w2.reset(m_Nvc, m_Nst, 1);
96 m_v2.reset(m_Nvc, m_Nst, 1);
102 set_staggered_phase();
106 set_parameters(params);
118 template<
typename AFIELD>
128 template<
typename AFIELD>
131 chsend_up.resize(m_Ndim);
132 chrecv_up.resize(m_Ndim);
133 chsend_dn.resize(m_Ndim);
134 chrecv_dn.resize(m_Ndim);
136 for (
int mu = 0; mu < m_Ndim; ++mu) {
137 int Nvsize = m_Nbdsize[mu] *
sizeof(
real_t);
139 chsend_dn[mu].send_init(Nvsize, mu, -1);
140 chsend_up[mu].send_init(Nvsize, mu, 1);
142 chrecv_up[mu].recv_init(Nvsize, mu, 1);
143 chrecv_dn[mu].recv_init(Nvsize, mu, -1);
145 void *buf_up = (
void *)chsend_dn[mu].ptr();
146 chrecv_up[mu].recv_init(Nvsize, mu, 1, buf_up);
147 void *buf_dn = (
void *)chsend_up[mu].ptr();
148 chrecv_dn[mu].recv_init(Nvsize, mu, -1, buf_dn);
151 if (do_comm[mu] == 1) {
152 chset_send.append(chsend_up[mu]);
153 chset_send.append(chsend_dn[mu]);
154 chset_recv.append(chrecv_up[mu]);
155 chset_recv.append(chrecv_dn[mu]);
162 template<
typename AFIELD>
173 vout.
crucial(m_vl,
"%s: fetch error, input parameter not found.\n",
178 set_parameters(
real_t(mq), bc);
183 template<
typename AFIELD>
185 const std::vector<int> bc)
187 assert(bc.size() == m_Ndim);
195 m_boundary.resize(m_Ndim);
196 for (
int mu = 0; mu < m_Ndim; ++mu) {
197 m_boundary[mu] = bc[mu];
201 vout.
general(m_vl,
"%s: set parameters\n", class_name.c_str());
203 for (
int mu = 0; mu < m_Ndim; ++mu) {
204 vout.
general(m_vl,
" boundary[%d] = %2d\n", mu, m_boundary[mu]);
212 template<
typename AFIELD>
215 params.
set_double(
"quark_mass",
double(m_mq));
223 template<
typename AFIELD>
231 real_t *stgph = m_stg_phase.ptr(0);
232 real_t *prty = m_parity.ptr(0);
236 for (
int t = 0; t < m_Nt; ++t) {
237 int t2 = t + ipet * m_Nt;
238 for (
int z = 0; z < m_Nz; ++z) {
239 int z2 = z + ipez * m_Nz;
240 for (
int y = 0; y < m_Ny; ++y) {
241 int y2 = y + ipey * m_Ny;
242 for (
int x = 0; x < m_Nx; ++x) {
243 int x2 = x + ipex * m_Nx;
244 int is = index.site(x, y, z, t);
246 stgph[index.idx(0, 1, is, 0)] = 1.0;
247 stgph[index.idx(0, 1, is, 1)] = 1.0;
248 stgph[index.idx(0, 1, is, 2)] = 1.0;
249 stgph[index.idx(0, 1, is, 3)] = 1.0;
251 prty[index.idx(0, 1, is, 0)] = 1.0;
254 stgph[index.idx(0, 1, is, 1)] = -1.0;
256 if (((x2 + y2) % 2) == 1) {
257 stgph[index.idx(0, 1, is, 2)] = -1.0;
259 if (((x2 + y2 + z2) % 2) == 1) {
260 stgph[index.idx(0, 1, is, 3)] = -1.0;
262 if (((x2 + y2 + z2 + t2) % 2) == 1) {
263 prty[index.idx(0, 1, is, 0)] = -1.0;
273 template<
typename AFIELD>
278 vout.
detailed(m_vl,
"%s: set_config is called: num_threads = %d\n",
279 class_name.c_str(), nth);
287 vout.
detailed(m_vl,
"%s: set_config finished\n", class_name.c_str());
292 template<
typename AFIELD>
305 template<
typename AFIELD>
314 for (
int mu = 0; mu < m_Ndim; ++mu) {
316 real_t *stgph = m_stg_phase.ptr(index.idx(0, 1, 0, mu));
317 real_t *up = m_U.ptr(index.idx_G(0, 0, mu));
326 template<
typename AFIELD>
332 if (ith == 0) m_mode = mode;
339 template<
typename AFIELD>
344 }
else if (m_mode ==
"Ddag") {
346 }
else if (m_mode ==
"DdagD") {
348 }
else if (m_mode ==
"H") {
351 vout.
crucial(m_vl,
"%s: mode undeifined.\n", class_name.c_str());
358 template<
typename AFIELD>
363 }
else if (m_mode ==
"Ddag") {
365 }
else if (m_mode ==
"DdagD") {
367 }
else if (m_mode ==
"H") {
370 vout.
crucial(m_vl,
"%s: mode undeifined.\n", class_name.c_str());
377 template<
typename AFIELD>
386 template<
typename AFIELD>
397 template<
typename AFIELD>
406 template<
typename AFIELD>
409 mult_D_qxs(v, w, -1);
415 template<
typename AFIELD>
429 if (do_comm_any > 0) {
430 if (ith == 0) chset_recv.start();
445 buf1zp, buf1zm, buf1tp, buf1tm,
446 up, wp, m_Nsize, do_comm);
450 if (ith == 0) chset_send.start();
456 if (do_comm_any > 0) {
457 if (ith == 0) chset_recv.wait();
474 buf2xp, buf2xm, buf2yp, buf2ym,
475 buf2zp, buf2zm, buf2tp, buf2tm,
476 m_mq, jd, m_Nsize, do_comm);
478 if (ith == 0) chset_send.wait();
486 template<
typename AFIELD>
498 for (
int mu = 0; mu < m_Ndim; ++mu) {
507 axpby(fac, vp, m_mq, wp);
514 template<
typename AFIELD>
522 real_t *ph = m_parity.ptr(0);
532 template<
typename AFIELD>
535 real_t *ph = m_parity.ptr(0);
545 template<
typename AFIELD>
553 template<
typename AFIELD>
562 template<
typename AFIELD>
566 m_shift->backward(m_w1, w, idir);
569 real_t *up = m_U.ptr(m_Ndf * m_Nst * idir);
581 template<
typename AFIELD>
586 real_t *up = m_U.
ptr(m_Ndf * m_Nst * idir);
592 m_shift->forward(m_w2, m_w1, idir);
600 template<
typename AFIELD>
603 return flop_count(m_mode);
608 template<
typename AFIELD>
616 double flop_site, flop;
618 flop_site =
static_cast<double>(m_Nvc * (2 + 8 * 2 * m_Nvc));
621 flop = flop_site *
static_cast<double>(Lvol);
623 if ((mode ==
"DdagD") || (mode ==
"DDdag")) flop *= 2.0;