12 template<
typename AFIELD>
 
   14   = 
"AFopr_Staggered<AFIELD>";
 
   16 template<
typename AFIELD>
 
   32   vout.
general(m_vl, 
"%s: construction\n", class_name.c_str());
 
   43   m_Ndf  = 2 * m_Nc * m_Nc;
 
   52   m_Nstv = m_Nst / 
VLEN;
 
   54   if (
VLENX * m_Nxv != m_Nx) {
 
   55     vout.
crucial(m_vl, 
"%s: Nx must be multiple of VLENX.\n",
 
   59   if (
VLENY * m_Nyv != m_Ny) {
 
   60     vout.
crucial(m_vl, 
"%s: Ny must be multiple of VLENY.\n",
 
   75   for (
int mu = 0; mu < m_Ndim; ++mu) {
 
   78     do_comm_any += do_comm[mu];
 
   79     vout.
general(m_vl, 
"  do_comm[%d] = %d\n", mu, do_comm[mu]);
 
   82   m_Nbdsize.resize(m_Ndim);
 
   83   m_Nbdsize[0] = m_Nvc * m_Ny * m_Nz * m_Nt;
 
   84   m_Nbdsize[1] = m_Nvc * m_Nx * m_Nz * m_Nt;
 
   85   m_Nbdsize[2] = m_Nvc * m_Nx * m_Ny * m_Nt;
 
   86   m_Nbdsize[3] = m_Nvc * m_Nx * m_Ny * m_Nz;
 
   91   m_U.reset(m_Ndf, m_Nst, m_Ndim);
 
   94   m_w1.reset(m_Nvc, m_Nst, 1);
 
   95   m_w2.reset(m_Nvc, m_Nst, 1);
 
   96   m_v2.reset(m_Nvc, m_Nst, 1);
 
  102   set_staggered_phase();
 
  106   set_parameters(params);
 
  118 template<
typename AFIELD>
 
  128 template<
typename AFIELD>
 
  131   chsend_up.resize(m_Ndim);
 
  132   chrecv_up.resize(m_Ndim);
 
  133   chsend_dn.resize(m_Ndim);
 
  134   chrecv_dn.resize(m_Ndim);
 
  136   for (
int mu = 0; mu < m_Ndim; ++mu) {
 
  137     int Nvsize = m_Nbdsize[mu] * 
sizeof(
real_t);
 
  139     chsend_dn[mu].send_init(Nvsize, mu, -1);
 
  140     chsend_up[mu].send_init(Nvsize, mu, 1);
 
  142     chrecv_up[mu].recv_init(Nvsize, mu, 1);
 
  143     chrecv_dn[mu].recv_init(Nvsize, mu, -1);
 
  145     void *buf_up = (
void *)chsend_dn[mu].ptr();
 
  146     chrecv_up[mu].recv_init(Nvsize, mu, 1, buf_up);
 
  147     void *buf_dn = (
void *)chsend_up[mu].ptr();
 
  148     chrecv_dn[mu].recv_init(Nvsize, mu, -1, buf_dn);
 
  151     if (do_comm[mu] == 1) {
 
  152       chset_send.append(chsend_up[mu]);
 
  153       chset_send.append(chsend_dn[mu]);
 
  154       chset_recv.append(chrecv_up[mu]);
 
  155       chset_recv.append(chrecv_dn[mu]);
 
  162 template<
typename AFIELD>
 
  173     vout.
crucial(m_vl, 
"%s: fetch error, input parameter not found.\n",
 
  178   set_parameters(
real_t(mq), bc);
 
  183 template<
typename AFIELD>
 
  185                                              const std::vector<int> bc)
 
  187   assert(bc.size() == m_Ndim);
 
  195     m_boundary.resize(m_Ndim);
 
  196     for (
int mu = 0; mu < m_Ndim; ++mu) {
 
  197       m_boundary[mu] = bc[mu];
 
  201   vout.
general(m_vl, 
"%s: set parameters\n", class_name.c_str());
 
  203   for (
int mu = 0; mu < m_Ndim; ++mu) {
 
  204     vout.
general(m_vl, 
"  boundary[%d] = %2d\n", mu, m_boundary[mu]);
 
  212 template<
typename AFIELD>
 
  215   params.
set_double(
"quark_mass", 
double(m_mq));
 
  223 template<
typename AFIELD>
 
  231   real_t *stgph = m_stg_phase.ptr(0);
 
  232   real_t *prty  = m_parity.ptr(0);
 
  236   for (
int t = 0; t < m_Nt; ++t) {
 
  237     int t2 = t + ipet * m_Nt;
 
  238     for (
int z = 0; z < m_Nz; ++z) {
 
  239       int z2 = z + ipez * m_Nz;
 
  240       for (
int y = 0; y < m_Ny; ++y) {
 
  241         int y2 = y + ipey * m_Ny;
 
  242         for (
int x = 0; x < m_Nx; ++x) {
 
  243           int x2 = x + ipex * m_Nx;
 
  244           int is = index.site(x, y, z, t);
 
  246           stgph[index.idx(0, 1, is, 0)] = 1.0;
 
  247           stgph[index.idx(0, 1, is, 1)] = 1.0;
 
  248           stgph[index.idx(0, 1, is, 2)] = 1.0;
 
  249           stgph[index.idx(0, 1, is, 3)] = 1.0;
 
  251           prty[index.idx(0, 1, is, 0)] = 1.0;
 
  254             stgph[index.idx(0, 1, is, 1)] = -1.0;
 
  256           if (((x2 + y2) % 2) == 1) {
 
  257             stgph[index.idx(0, 1, is, 2)] = -1.0;
 
  259           if (((x2 + y2 + z2) % 2) == 1) {
 
  260             stgph[index.idx(0, 1, is, 3)] = -1.0;
 
  262           if (((x2 + y2 + z2 + t2) % 2) == 1) {
 
  263             prty[index.idx(0, 1, is, 0)] = -1.0;
 
  273 template<
typename AFIELD>
 
  278   vout.
detailed(m_vl, 
"%s: set_config is called: num_threads = %d\n",
 
  279                 class_name.c_str(), nth);
 
  287   vout.
detailed(m_vl, 
"%s: set_config finished\n", class_name.c_str());
 
  292 template<
typename AFIELD>
 
  305 template<
typename AFIELD>
 
  314   for (
int mu = 0; mu < m_Ndim; ++mu) {
 
  316     real_t *stgph = m_stg_phase.ptr(index.idx(0, 1, 0, mu));
 
  317     real_t *up    = m_U.ptr(index.idx_G(0, 0, mu));
 
  326 template<
typename AFIELD>
 
  332   if (ith == 0) m_mode = mode;
 
  339 template<
typename AFIELD>
 
  344   } 
else if (m_mode == 
"Ddag") {
 
  346   } 
else if (m_mode == 
"DdagD") {
 
  348   } 
else if (m_mode == 
"H") {
 
  351     vout.
crucial(m_vl, 
"%s: mode undeifined.\n", class_name.c_str());
 
  358 template<
typename AFIELD>
 
  363   } 
else if (m_mode == 
"Ddag") {
 
  365   } 
else if (m_mode == 
"DdagD") {
 
  367   } 
else if (m_mode == 
"H") {
 
  370     vout.
crucial(m_vl, 
"%s: mode undeifined.\n", class_name.c_str());
 
  377 template<
typename AFIELD>
 
  386 template<
typename AFIELD>
 
  397 template<
typename AFIELD>
 
  406 template<
typename AFIELD>
 
  409   mult_D_qxs(v, w, -1);
 
  415 template<
typename AFIELD>
 
  429   if (do_comm_any > 0) {
 
  430     if (ith == 0) chset_recv.start();
 
  445                                 buf1zp, buf1zm, buf1tp, buf1tm,
 
  446                                 up, wp, m_Nsize, do_comm);
 
  450     if (ith == 0) chset_send.start();
 
  456   if (do_comm_any > 0) {
 
  457     if (ith == 0) chset_recv.wait();
 
  474                                 buf2xp, buf2xm, buf2yp, buf2ym,
 
  475                                 buf2zp, buf2zm, buf2tp, buf2tm,
 
  476                                 m_mq, jd, m_Nsize, do_comm);
 
  478     if (ith == 0) chset_send.wait();
 
  486 template<
typename AFIELD>
 
  498   for (
int mu = 0; mu < m_Ndim; ++mu) {
 
  507   axpby(fac, vp, m_mq, wp);
 
  514 template<
typename AFIELD>
 
  522   real_t *ph = m_parity.ptr(0);
 
  532 template<
typename AFIELD>
 
  535   real_t *ph = m_parity.ptr(0);
 
  545 template<
typename AFIELD>
 
  553 template<
typename AFIELD>
 
  562 template<
typename AFIELD>
 
  566   m_shift->backward(m_w1, w, idir);
 
  569   real_t *up = m_U.ptr(m_Ndf * m_Nst * idir);
 
  581 template<
typename AFIELD>
 
  586   real_t *up = m_U.
ptr(m_Ndf * m_Nst * idir);
 
  592   m_shift->forward(m_w2, m_w1, idir);
 
  600 template<
typename AFIELD>
 
  603   return flop_count(m_mode);
 
  608 template<
typename AFIELD>
 
  616   double flop_site, flop;
 
  618   flop_site = 
static_cast<double>(m_Nvc * (2 + 8 * 2 * m_Nvc));
 
  621   flop = flop_site * 
static_cast<double>(Lvol);
 
  623   if ((mode == 
"DdagD") || (mode == 
"DDdag")) flop *= 2.0;