23 #if defined USE_GROUP_SU3 
   24 #include "fopr_Wilson_impl_SU3.inc" 
   25 #elif defined USE_GROUP_SU2 
   26 #include "fopr_Wilson_impl_SU2.inc" 
   27 #elif defined USE_GROUP_SU_N 
   28 #include "fopr_Wilson_impl_SU_N.inc" 
   49     vout.
crucial(
m_vl, 
"%s:  Nz = %d and Nt = %d do not match Nthread = %d\n",
 
   59     vout.
crucial(
m_vl, 
"%s:  Mz = %d and Ntask_z = %d do not match Nz = %d\n",
 
   65     vout.
crucial(
m_vl, 
"%s:  Mt = %d and Ntask_t = %d do not match Nt = %d\n",
 
   78   for (
int ith_t = 0; ith_t < 
m_Ntask_t; ++ith_t) {
 
   79     for (
int ith_z = 0; ith_z < 
m_Ntask_z; ++ith_z) {
 
   80       int itask = ith_z + m_Ntask_z * ith_t;
 
   88       if (ith_t == 0) 
m_arg[itask].kt0 = 1;
 
   89       if (ith_z == 0) 
m_arg[itask].kz0 = 1;
 
   90       if (ith_t == m_Ntask_t - 1) 
m_arg[itask].kt1 = 1;
 
   91       if (ith_z == m_Ntask_z - 1) 
m_arg[itask].kz1 = 1;
 
   95       m_arg[itask].isite_cpz = ith_t * 
m_Mt * Nxy2;
 
   96       m_arg[itask].isite_cpt = ith_z * 
m_Mz * Nxy2;
 
  104                                                       double *w, 
double fac)
 
  106   int Nvcd = m_Nvc * 
m_Nd;
 
  107   int Nvxy = Nvcd * m_Nx2 * m_Ny;
 
  109   int    isite = m_arg[itask].isite;
 
  110   double *wp   = &w[Nvcd * isite];
 
  112   for (
int it = 0; it < m_Mt; ++it) {
 
  113     for (
int iz = 0; iz < m_Mz; ++iz) {
 
  114       for (
int ivxy = 0; ivxy < Nvxy; ++ivxy) {
 
  115         int iv = ivxy + Nvxy * (iz + m_Nz * it);
 
  116         wp[iv] = fac * wp[iv];
 
  127   int Nvcd = m_Nvc * 
m_Nd;
 
  128   int Nvxy = Nvcd * m_Nx2 * m_Ny;
 
  130   int    isite = m_arg[itask].isite;
 
  131   double *wp   = &v[Nvcd * isite];
 
  133   for (
int it = 0; it < m_Mt; ++it) {
 
  134     for (
int iz = 0; iz < m_Mz; ++iz) {
 
  135       for (
int ivxy = 0; ivxy < Nvxy; ++ivxy) {
 
  136         int iv = ivxy + Nvxy * (iz + m_Nz * it);
 
  146   int itask, 
double *vcp1, 
const double *
v1, 
int ieo)
 
  148   int Nvc2  = 2 * m_Nvc;
 
  149   int Nvcd  = m_Nvc * 
m_Nd;
 
  150   int Nvcd2 = Nvcd / 2;
 
  157   int isite    = m_arg[itask].isite;
 
  158   int isite_cp = m_arg[itask].isite_cpx;
 
  159   int iyzt0    = isite / m_Nx2;
 
  161   const double *w1 = &v1[Nvcd * isite];
 
  162   double       *w2 = &vcp1[Nvcd2 * isite_cp];
 
  165   double bc2  = m_boundary2[idir];
 
  170   for (
int it = 0; it < m_Mt; ++it) {
 
  171     for (
int iz = 0; iz < m_Mz; ++iz) {
 
  172       for (
int iy = 0; iy < m_Ny; ++iy) {
 
  173         int iyzt = iy + m_Ny * (iz + m_Nz * it);
 
  174         int Leo  = ieo + (1 - 2 * ieo) * m_Leo[iyzt0 + iyzt];
 
  176           int is = ix + m_Nx2 * iyzt;
 
  179           int ix1 = Nvc2 * ibf;
 
  180           int ix2 = ix1 + m_Nvc;
 
  182           for (
int ic = 0; ic < 
m_Nc; ++ic) {
 
  183             w2[2 * ic + ix1]     = bc2 * (w1[2 * ic + id1 + in] - w1[2 * ic + 1 + id4 + in]);
 
  184             w2[2 * ic + 1 + ix1] = bc2 * (w1[2 * ic + 1 + id1 + in] + w1[2 * ic + id4 + in]);
 
  185             w2[2 * ic + ix2]     = bc2 * (w1[2 * ic + id2 + in] - w1[2 * ic + 1 + id3 + in]);
 
  186             w2[2 * ic + 1 + ix2] = bc2 * (w1[2 * ic + 1 + id2 + in] + w1[2 * ic + id3 + in]);
 
  198   int itask, 
double *
v2, 
const double *vcp2, 
int ieo)
 
  200   int Nvc2  = 2 * m_Nvc;
 
  201   int Nvcd  = m_Nvc * 
m_Nd;
 
  202   int Nvcd2 = Nvcd / 2;
 
  211   double wt1r, wt1i, wt2r, wt2i;
 
  213   int isite    = m_arg[itask].isite;
 
  214   int isite_cp = m_arg[itask].isite_cpx;
 
  215   int iyzt0    = isite / m_Nx2;
 
  217   const double *w1 = &vcp2[Nvcd2 * isite_cp];
 
  218   double       *w2 = &v2[Nvcd * isite];
 
  219   const double *u  = m_U->ptr(m_Ndf * (isite + ieo * 
m_Nvol / 2 + idir * 
m_Nvol));
 
  224   for (
int it = 0; it < m_Mt; ++it) {
 
  225     for (
int iz = 0; iz < m_Mz; ++iz) {
 
  226       for (
int iy = 0; iy < m_Ny; ++iy) {
 
  227         int iyzt = iy + m_Ny * (iz + m_Nz * it);
 
  228         int Leo  = ieo + (1 - 2 * ieo) * m_Leo[iyzt0 + iyzt];
 
  231           int is  = ix + m_Nx2 * iyzt;
 
  234           int ix1 = Nvc2 * ibf;
 
  235           int ix2 = ix1 + m_Nvc;
 
  237           for (
int ic = 0; ic < 
m_Nc; ++ic) {
 
  238             int ic2 = ic * m_Nvc;
 
  239             wt1r = mult_uv_r(&u[ic2 + ig], &w1[ix1], m_Nc);
 
  240             wt1i = mult_uv_i(&u[ic2 + ig], &w1[ix1], m_Nc);
 
  241             wt2r = mult_uv_r(&u[ic2 + ig], &w1[ix2], m_Nc);
 
  242             wt2i = mult_uv_i(&u[ic2 + ig], &w1[ix2], m_Nc);
 
  243             w2[2 * ic + id1 + iv]     += wt1r;
 
  244             w2[2 * ic + 1 + id1 + iv] += wt1i;
 
  245             w2[2 * ic + id2 + iv]     += wt2r;
 
  246             w2[2 * ic + 1 + id2 + iv] += wt2i;
 
  247             w2[2 * ic + id3 + iv]     += wt2i;
 
  248             w2[2 * ic + 1 + id3 + iv] += -wt2r;
 
  249             w2[2 * ic + id4 + iv]     += wt1i;
 
  250             w2[2 * ic + 1 + id4 + iv] += -wt1r;
 
  262   int itask, 
double *
v2, 
const double *
v1, 
int ieo)
 
  264   int Nvcd = m_Nvc * 
m_Nd;
 
  273   double vt1[m_Nvc], vt2[m_Nvc];
 
  274   double wt1r, wt1i, wt2r, wt2i;
 
  276   int isite = m_arg[itask].isite;
 
  277   int iyzt0 = isite / m_Nx2;
 
  279   const double *w1 = &v1[Nvcd * isite];
 
  280   double       *w2 = &v2[Nvcd * isite];
 
  281   const double *u  = m_U->ptr(m_Ndf * (isite + ieo * 
m_Nvol / 2 + idir * 
m_Nvol));
 
  283   for (
int it = 0; it < m_Mt; ++it) {
 
  284     for (
int iz = 0; iz < m_Mz; ++iz) {
 
  285       for (
int iy = 0; iy < m_Ny; ++iy) {
 
  286         int iyzt = iy + m_Ny * (iz + m_Nz * it);
 
  287         int Leo  = ieo + (1 - 2 * ieo) * m_Leo[iyzt0 + iyzt];
 
  288         for (
int ix = 0; ix < m_Nx2 - Leo; ++ix) {
 
  289           int is = ix + m_Nx2 * iyzt;
 
  291           int in = Nvcd * (is + Leo);
 
  294           for (
int ic = 0; ic < 
m_Nc; ++ic) {
 
  295             vt1[2 * ic]     = w1[2 * ic + id1 + in] - w1[2 * ic + 1 + id4 + in];
 
  296             vt1[2 * ic + 1] = w1[2 * ic + 1 + id1 + in] + w1[2 * ic + id4 + in];
 
  297             vt2[2 * ic]     = w1[2 * ic + id2 + in] - w1[2 * ic + 1 + id3 + in];
 
  298             vt2[2 * ic + 1] = w1[2 * ic + 1 + id2 + in] + w1[2 * ic + id3 + in];
 
  301           for (
int ic = 0; ic < 
m_Nc; ++ic) {
 
  302             int ic2 = ic * m_Nvc;
 
  304             wt1r = mult_uv_r(&u[ic2 + ig], vt1, m_Nc);
 
  305             wt1i = mult_uv_i(&u[ic2 + ig], vt1, m_Nc);
 
  306             wt2r = mult_uv_r(&u[ic2 + ig], vt2, m_Nc);
 
  307             wt2i = mult_uv_i(&u[ic2 + ig], vt2, m_Nc);
 
  309             w2[2 * ic + id1 + iv]     += wt1r;
 
  310             w2[2 * ic + 1 + id1 + iv] += wt1i;
 
  311             w2[2 * ic + id2 + iv]     += wt2r;
 
  312             w2[2 * ic + 1 + id2 + iv] += wt2i;
 
  313             w2[2 * ic + id3 + iv]     += wt2i;
 
  314             w2[2 * ic + 1 + id3 + iv] += -wt2r;
 
  315             w2[2 * ic + id4 + iv]     += wt1i;
 
  316             w2[2 * ic + 1 + id4 + iv] += -wt1r;
 
  327   int itask, 
double *vcp1, 
const double *
v1, 
int ieo)
 
  329   int Nvc2  = 2 * m_Nvc;
 
  330   int Nvcd  = m_Nvc * 
m_Nd;
 
  331   int Nvcd2 = Nvcd / 2;
 
  340   int isite    = m_arg[itask].isite;
 
  341   int isite_cp = m_arg[itask].isite_cpx;
 
  342   int iyzt0    = isite / m_Nx2;
 
  344   const double *w1 = &v1[Nvcd * isite];
 
  345   double       *w2 = &vcp1[Nvcd2 * isite_cp];
 
  346   const double *u  = m_U->ptr(m_Ndf * (isite + (1 - ieo) * 
m_Nvol / 2 + idir * 
m_Nvol));
 
  348   double vt1[m_Nvc], vt2[m_Nvc];
 
  353   for (
int it = 0; it < m_Mt; ++it) {
 
  354     for (
int iz = 0; iz < m_Mz; ++iz) {
 
  355       for (
int iy = 0; iy < m_Ny; ++iy) {
 
  356         int iyzt = iy + m_Ny * (iz + m_Nz * it);
 
  357         int Leo  = ieo + (1 - 2 * ieo) * m_Leo[iyzt0 + iyzt];
 
  359           int is = ix + m_Nx2 * iyzt;
 
  363           int ix1 = Nvc2 * ibf;
 
  364           int ix2 = ix1 + m_Nvc;
 
  366           for (
int ic = 0; ic < 
m_Nc; ++ic) {
 
  367             vt1[2 * ic]     = w1[2 * ic + id1 + in] + w1[2 * ic + 1 + id4 + in];
 
  368             vt1[2 * ic + 1] = w1[2 * ic + 1 + id1 + in] - w1[2 * ic + id4 + in];
 
  369             vt2[2 * ic]     = w1[2 * ic + id2 + in] + w1[2 * ic + 1 + id3 + in];
 
  370             vt2[2 * ic + 1] = w1[2 * ic + 1 + id2 + in] - w1[2 * ic + id3 + in];
 
  373           for (
int ic = 0; ic < 
m_Nc; ++ic) {
 
  375             w2[icr + ix1]     = mult_udagv_r(&u[icr + ig], vt1, m_Nc);
 
  376             w2[icr + 1 + ix1] = mult_udagv_i(&u[icr + ig], vt1, m_Nc);
 
  377             w2[icr + ix2]     = mult_udagv_r(&u[icr + ig], vt2, m_Nc);
 
  378             w2[icr + 1 + ix2] = mult_udagv_i(&u[icr + ig], vt2, m_Nc);
 
  390   int itask, 
double *
v2, 
const double *vcp2, 
int ieo)
 
  392   int Nvc2  = 2 * m_Nvc;
 
  393   int Nvcd  = m_Nvc * 
m_Nd;
 
  394   int Nvcd2 = Nvcd / 2;
 
  402   double bc2  = m_boundary2[idir];
 
  406   int isite    = m_arg[itask].isite;
 
  407   int isite_cp = m_arg[itask].isite_cpx;
 
  408   int iyzt0    = isite / m_Nx2;
 
  410   const double *w1 = &vcp2[Nvcd2 * isite_cp];
 
  411   double       *w2 = &v2[Nvcd * isite];
 
  416   for (
int it = 0; it < m_Mt; ++it) {
 
  417     for (
int iz = 0; iz < m_Mz; ++iz) {
 
  418       for (
int iy = 0; iy < m_Ny; ++iy) {
 
  419         int iyzt = iy + m_Ny * (iz + m_Nz * it);
 
  420         int Leo  = ieo + (1 - 2 * ieo) * m_Leo[iyzt0 + iyzt];
 
  422           int is = ix + m_Nx2 * iyzt;
 
  425           int ix1 = Nvc2 * ibf;
 
  426           int ix2 = ix1 + m_Nvc;
 
  428           for (
int ic = 0; ic < 
m_Nc; ++ic) {
 
  430             int ici = 2 * ic + 1;
 
  431             w2[icr + id1 + iv] += bc2 * w1[icr + ix1];
 
  432             w2[ici + id1 + iv] += bc2 * w1[ici + ix1];
 
  433             w2[icr + id2 + iv] += bc2 * w1[icr + ix2];
 
  434             w2[ici + id2 + iv] += bc2 * w1[ici + ix2];
 
  435             w2[icr + id3 + iv] += -bc2 * w1[ici + ix2];
 
  436             w2[ici + id3 + iv] += +bc2 * w1[icr + ix2];
 
  437             w2[icr + id4 + iv] += -bc2 * w1[ici + ix1];
 
  438             w2[ici + id4 + iv] += +bc2 * w1[icr + ix1];
 
  450   int itask, 
double *
v2, 
const double *
v1, 
int ieo)
 
  452   int Nvcd = m_Nvc * 
m_Nd;
 
  461   double vt1[m_Nvc], vt2[m_Nvc];
 
  462   double wt1r, wt1i, wt2r, wt2i;
 
  464   int isite = m_arg[itask].isite;
 
  465   int iyzt0 = isite / m_Nx2;
 
  467   const double *w1 = &v1[Nvcd * isite];
 
  468   double       *w2 = &v2[Nvcd * isite];
 
  469   const double *u  = m_U->ptr(m_Ndf * (isite + (1 - ieo) * 
m_Nvol / 2 + idir * 
m_Nvol));
 
  471   for (
int it = 0; it < m_Mt; ++it) {
 
  472     for (
int iz = 0; iz < m_Mz; ++iz) {
 
  473       for (
int iy = 0; iy < m_Ny; ++iy) {
 
  474         int iyzt = iy + m_Ny * (iz + m_Nz * it);
 
  475         int Leo  = ieo + (1 - 2 * ieo) * m_Leo[iyzt0 + iyzt];
 
  477         for (
int ix = Meo; ix < m_Nx2; ++ix) {
 
  478           int is = ix + m_Nx2 * iyzt;
 
  480           int in = Nvcd * (is - 
Meo);
 
  481           int ig = m_Ndf * (is - 
Meo);
 
  483           for (
int ic = 0; ic < 
m_Nc; ++ic) {
 
  484             vt1[2 * ic]     = w1[2 * ic + id1 + in] + w1[2 * ic + 1 + id4 + in];
 
  485             vt1[2 * ic + 1] = w1[2 * ic + 1 + id1 + in] - w1[2 * ic + id4 + in];
 
  486             vt2[2 * ic]     = w1[2 * ic + id2 + in] + w1[2 * ic + 1 + id3 + in];
 
  487             vt2[2 * ic + 1] = w1[2 * ic + 1 + id2 + in] - w1[2 * ic + id3 + in];
 
  490           for (
int ic = 0; ic < 
m_Nc; ++ic) {
 
  493             wt1r = mult_udagv_r(&u[ic2 + ig], vt1, m_Nc);
 
  494             wt1i = mult_udagv_i(&u[ic2 + ig], vt1, m_Nc);
 
  495             wt2r = mult_udagv_r(&u[ic2 + ig], vt2, m_Nc);
 
  496             wt2i = mult_udagv_i(&u[ic2 + ig], vt2, m_Nc);
 
  498             w2[2 * ic + id1 + iv]     += wt1r;
 
  499             w2[2 * ic + 1 + id1 + iv] += wt1i;
 
  500             w2[2 * ic + id2 + iv]     += wt2r;
 
  501             w2[2 * ic + 1 + id2 + iv] += wt2i;
 
  502             w2[2 * ic + id3 + iv]     += -wt2i;
 
  503             w2[2 * ic + 1 + id3 + iv] += +wt2r;
 
  504             w2[2 * ic + id4 + iv]     += -wt1i;
 
  505             w2[2 * ic + 1 + id4 + iv] += +wt1r;
 
  516   int itask, 
double *vcp1, 
const double *
v1, 
int ieo)
 
  518   int Nvc2  = 2 * m_Nvc;
 
  519   int Nvcd  = m_Nvc * 
m_Nd;
 
  520   int Nvcd2 = Nvcd / 2;
 
  527   int isite    = m_arg[itask].isite;
 
  528   int isite_cp = m_arg[itask].isite_cpy;
 
  530   const double *w1 = &v1[Nvcd * isite];
 
  531   double       *w2 = &vcp1[Nvcd2 * isite_cp];
 
  534   double bc2  = m_boundary2[idir];
 
  538   for (
int it = 0; it < m_Mt; ++it) {
 
  539     for (
int iz = 0; iz < m_Mz; ++iz) {
 
  540       for (
int ix = 0; ix < m_Nx2; ++ix) {
 
  541         int is  = ix + m_Nx2 * (iy + m_Ny * (iz + m_Nz * it));
 
  542         int is2 = ix + m_Nx2 * (iz + m_Mz * it);
 
  544         int ix1 = Nvc2 * is2;
 
  545         int ix2 = ix1 + m_Nvc;
 
  547         for (
int ic = 0; ic < 
m_Nc; ++ic) {
 
  548           w2[2 * ic + ix1]     = bc2 * (w1[2 * ic + id1 + in] + w1[2 * ic + id4 + in]);
 
  549           w2[2 * ic + 1 + ix1] = bc2 * (w1[2 * ic + 1 + id1 + in] + w1[2 * ic + 1 + id4 + in]);
 
  550           w2[2 * ic + ix2]     = bc2 * (w1[2 * ic + id2 + in] - w1[2 * ic + id3 + in]);
 
  551           w2[2 * ic + 1 + ix2] = bc2 * (w1[2 * ic + 1 + id2 + in] - w1[2 * ic + 1 + id3 + in]);
 
  561   int itask, 
double *
v2, 
const double *vcp2, 
int ieo)
 
  563   int Nvc2  = 2 * m_Nvc;
 
  564   int Nvcd  = m_Nvc * 
m_Nd;
 
  565   int Nvcd2 = Nvcd / 2;
 
  574   double wt1r, wt1i, wt2r, wt2i;
 
  576   int isite    = m_arg[itask].isite;
 
  577   int isite_cp = m_arg[itask].isite_cpy;
 
  579   const double *w1 = &vcp2[Nvcd2 * isite_cp];
 
  580   double       *w2 = &v2[Nvcd * isite];
 
  581   const double *u  = m_U->ptr(m_Ndf * (isite + ieo * 
m_Nvol / 2 + idir * 
m_Nvol));
 
  585   for (
int it = 0; it < m_Mt; ++it) {
 
  586     for (
int iz = 0; iz < m_Mz; ++iz) {
 
  587       for (
int ix = 0; ix < m_Nx2; ++ix) {
 
  588         int is  = ix + m_Nx2 * (iy + m_Ny * (iz + m_Nz * it));
 
  589         int is2 = ix + m_Nx2 * (iz + m_Mz * it);
 
  592         int ix1 = Nvc2 * is2;
 
  593         int ix2 = ix1 + m_Nvc;
 
  595         for (
int ic = 0; ic < 
m_Nc; ++ic) {
 
  596           int ic2 = ic * m_Nvc;
 
  598           wt1r = mult_uv_r(&u[ic2 + ig], &w1[ix1], m_Nc);
 
  599           wt1i = mult_uv_i(&u[ic2 + ig], &w1[ix1], m_Nc);
 
  600           wt2r = mult_uv_r(&u[ic2 + ig], &w1[ix2], m_Nc);
 
  601           wt2i = mult_uv_i(&u[ic2 + ig], &w1[ix2], m_Nc);
 
  603           w2[2 * ic + id1 + iv]     += wt1r;
 
  604           w2[2 * ic + 1 + id1 + iv] += wt1i;
 
  605           w2[2 * ic + id2 + iv]     += wt2r;
 
  606           w2[2 * ic + 1 + id2 + iv] += wt2i;
 
  607           w2[2 * ic + id3 + iv]     += -wt2r;
 
  608           w2[2 * ic + 1 + id3 + iv] += -wt2i;
 
  609           w2[2 * ic + id4 + iv]     += wt1r;
 
  610           w2[2 * ic + 1 + id4 + iv] += wt1i;
 
  620   int itask, 
double *
v2, 
const double *
v1, 
int ieo)
 
  622   int Nvcd = m_Nvc * 
m_Nd;
 
  631   double vt1[m_Nvc], vt2[m_Nvc];
 
  632   double wt1r, wt1i, wt2r, wt2i;
 
  634   int isite = m_arg[itask].isite;
 
  636   const double *w1 = &v1[Nvcd * isite];
 
  637   double       *w2 = &v2[Nvcd * isite];
 
  638   const double *u  = m_U->ptr(m_Ndf * (isite + ieo * 
m_Nvol / 2 + idir * 
m_Nvol));
 
  640   for (
int it = 0; it < m_Mt; ++it) {
 
  641     for (
int iz = 0; iz < m_Mz; ++iz) {
 
  642       for (
int iy = 0; iy < m_Ny - 1; ++iy) {
 
  643         for (
int ix = 0; ix < m_Nx2; ++ix) {
 
  644           int is = ix + m_Nx2 * (iy + m_Ny * (iz + m_Nz * it));
 
  646           int in = Nvcd * (is + m_Nx2);
 
  649           for (
int ic = 0; ic < 
m_Nc; ++ic) {
 
  650             vt1[2 * ic]     = w1[2 * ic + id1 + in] + w1[2 * ic + id4 + in];
 
  651             vt1[2 * ic + 1] = w1[2 * ic + 1 + id1 + in] + w1[2 * ic + 1 + id4 + in];
 
  652             vt2[2 * ic]     = w1[2 * ic + id2 + in] - w1[2 * ic + id3 + in];
 
  653             vt2[2 * ic + 1] = w1[2 * ic + 1 + id2 + in] - w1[2 * ic + 1 + id3 + in];
 
  656           for (
int ic = 0; ic < 
m_Nc; ++ic) {
 
  657             int ic2 = ic * m_Nvc;
 
  659             wt1r = mult_uv_r(&u[ic2 + ig], vt1, m_Nc);
 
  660             wt1i = mult_uv_i(&u[ic2 + ig], vt1, m_Nc);
 
  661             wt2r = mult_uv_r(&u[ic2 + ig], vt2, m_Nc);
 
  662             wt2i = mult_uv_i(&u[ic2 + ig], vt2, m_Nc);
 
  664             w2[2 * ic + id1 + iv]     += wt1r;
 
  665             w2[2 * ic + 1 + id1 + iv] += wt1i;
 
  666             w2[2 * ic + id2 + iv]     += wt2r;
 
  667             w2[2 * ic + 1 + id2 + iv] += wt2i;
 
  668             w2[2 * ic + id3 + iv]     += -wt2r;
 
  669             w2[2 * ic + 1 + id3 + iv] += -wt2i;
 
  670             w2[2 * ic + id4 + iv]     += wt1r;
 
  671             w2[2 * ic + 1 + id4 + iv] += wt1i;
 
  682   int itask, 
double *vcp1, 
const double *
v1, 
int ieo)
 
  684   int Nvc2  = 2 * m_Nvc;
 
  685   int Nvcd  = m_Nvc * 
m_Nd;
 
  686   int Nvcd2 = Nvcd / 2;
 
  695   int isite    = m_arg[itask].isite;
 
  696   int isite_cp = m_arg[itask].isite_cpy;
 
  698   const double *w1 = &v1[Nvcd * isite];
 
  699   double       *w2 = &vcp1[Nvcd2 * isite_cp];
 
  700   const double *u  = m_U->ptr(m_Ndf * (isite + (1 - ieo) * 
m_Nvol / 2 + idir * 
m_Nvol));
 
  702   double vt1[m_Nvc], vt2[m_Nvc];
 
  706   for (
int it = 0; it < m_Mt; ++it) {
 
  707     for (
int iz = 0; iz < m_Mz; ++iz) {
 
  708       for (
int ix = 0; ix < m_Nx2; ++ix) {
 
  709         int is  = ix + m_Nx2 * (iy + m_Ny * (iz + m_Nz * it));
 
  710         int is2 = ix + m_Nx2 * (iz + m_Mz * it);
 
  713         int ix1 = Nvc2 * is2;
 
  714         int ix2 = ix1 + m_Nvc;
 
  716         for (
int ic = 0; ic < 
m_Nc; ++ic) {
 
  717           vt1[2 * ic]     = w1[2 * ic + id1 + in] - w1[2 * ic + id4 + in];
 
  718           vt1[2 * ic + 1] = w1[2 * ic + 1 + id1 + in] - w1[2 * ic + 1 + id4 + in];
 
  719           vt2[2 * ic]     = w1[2 * ic + id2 + in] + w1[2 * ic + id3 + in];
 
  720           vt2[2 * ic + 1] = w1[2 * ic + 1 + id2 + in] + w1[2 * ic + 1 + id3 + in];
 
  723         for (
int ic = 0; ic < 
m_Nc; ++ic) {
 
  725           w2[icr + ix1]     = mult_udagv_r(&u[icr + ig], vt1, m_Nc);
 
  726           w2[icr + 1 + ix1] = mult_udagv_i(&u[icr + ig], vt1, m_Nc);
 
  727           w2[icr + ix2]     = mult_udagv_r(&u[icr + ig], vt2, m_Nc);
 
  728           w2[icr + 1 + ix2] = mult_udagv_i(&u[icr + ig], vt2, m_Nc);
 
  738   int itask, 
double *
v2, 
const double *vcp2, 
int ieo)
 
  740   int Nvc2  = 2 * m_Nvc;
 
  741   int Nvcd  = m_Nvc * 
m_Nd;
 
  742   int Nvcd2 = Nvcd / 2;
 
  750   double bc2  = m_boundary2[idir];
 
  754   int isite    = m_arg[itask].isite;
 
  755   int isite_cp = m_arg[itask].isite_cpy;
 
  757   const double *w1 = &vcp2[Nvcd2 * isite_cp];
 
  758   double       *w2 = &v2[Nvcd * isite];
 
  762   for (
int it = 0; it < m_Mt; ++it) {
 
  763     for (
int iz = 0; iz < m_Mz; ++iz) {
 
  764       for (
int ix = 0; ix < m_Nx2; ++ix) {
 
  765         int is  = ix + m_Nx2 * (iy + m_Ny * (iz + m_Nz * it));
 
  766         int is2 = ix + m_Nx2 * (iz + m_Mz * it);
 
  768         int ix1 = Nvc2 * is2;
 
  769         int ix2 = ix1 + m_Nvc;
 
  771         for (
int ic = 0; ic < 
m_Nc; ++ic) {
 
  773           int ici = 2 * ic + 1;
 
  774           w2[icr + id1 + iv] += bc2 * w1[icr + ix1];
 
  775           w2[ici + id1 + iv] += bc2 * w1[ici + ix1];
 
  776           w2[icr + id2 + iv] += bc2 * w1[icr + ix2];
 
  777           w2[ici + id2 + iv] += bc2 * w1[ici + ix2];
 
  778           w2[icr + id3 + iv] += bc2 * w1[icr + ix2];
 
  779           w2[ici + id3 + iv] += bc2 * w1[ici + ix2];
 
  780           w2[icr + id4 + iv] += -bc2 * w1[icr + ix1];
 
  781           w2[ici + id4 + iv] += -bc2 * w1[ici + ix1];
 
  791   int itask, 
double *
v2, 
const double *
v1, 
int ieo)
 
  793   int Nvcd = m_Nvc * 
m_Nd;
 
  802   double vt1[m_Nvc], vt2[m_Nvc];
 
  803   double wt1r, wt1i, wt2r, wt2i;
 
  805   int isite = m_arg[itask].isite;
 
  807   const double *w1 = &v1[Nvcd * isite];
 
  808   double       *w2 = &v2[Nvcd * isite];
 
  809   const double *u  = m_U->ptr(m_Ndf * (isite + (1 - ieo) * 
m_Nvol / 2 + idir * 
m_Nvol));
 
  811   for (
int it = 0; it < m_Mt; ++it) {
 
  812     for (
int iz = 0; iz < m_Mz; ++iz) {
 
  813       for (
int iy = 1; iy < m_Ny; ++iy) {
 
  814         for (
int ix = 0; ix < m_Nx2; ++ix) {
 
  815           int is = ix + m_Nx2 * (iy + m_Ny * (iz + m_Nz * it));
 
  817           int in = Nvcd * (is - m_Nx2);
 
  818           int ig = m_Ndf * (is - m_Nx2);
 
  820           for (
int ic = 0; ic < 
m_Nc; ++ic) {
 
  821             vt1[2 * ic]     = w1[2 * ic + id1 + in] - w1[2 * ic + id4 + in];
 
  822             vt1[2 * ic + 1] = w1[2 * ic + 1 + id1 + in] - w1[2 * ic + 1 + id4 + in];
 
  823             vt2[2 * ic]     = w1[2 * ic + id2 + in] + w1[2 * ic + id3 + in];
 
  824             vt2[2 * ic + 1] = w1[2 * ic + 1 + id2 + in] + w1[2 * ic + 1 + id3 + in];
 
  827           for (
int ic = 0; ic < 
m_Nc; ++ic) {
 
  829             wt1r = mult_udagv_r(&u[ic2 + ig], vt1, m_Nc);
 
  830             wt1i = mult_udagv_i(&u[ic2 + ig], vt1, m_Nc);
 
  831             wt2r = mult_udagv_r(&u[ic2 + ig], vt2, m_Nc);
 
  832             wt2i = mult_udagv_i(&u[ic2 + ig], vt2, m_Nc);
 
  834             w2[ic2 + id1 + iv]     += wt1r;
 
  835             w2[ic2 + 1 + id1 + iv] += wt1i;
 
  836             w2[ic2 + id2 + iv]     += wt2r;
 
  837             w2[ic2 + 1 + id2 + iv] += wt2i;
 
  838             w2[ic2 + id3 + iv]     += wt2r;
 
  839             w2[ic2 + 1 + id3 + iv] += wt2i;
 
  840             w2[ic2 + id4 + iv]     += -wt1r;
 
  841             w2[ic2 + 1 + id4 + iv] += -wt1i;
 
  852   int itask, 
double *vcp1, 
const double *
v1, 
int ieo)
 
  854   int Nvc2  = 2 * m_Nvc;
 
  855   int Nvcd  = m_Nvc * 
m_Nd;
 
  856   int Nvcd2 = Nvcd / 2;
 
  863   int isite    = m_arg[itask].isite;
 
  864   int isite_cp = m_arg[itask].isite_cpz;
 
  866   const double *w1 = &v1[Nvcd * isite];
 
  867   double       *w2 = &vcp1[Nvcd2 * isite_cp];
 
  870   double bc2  = m_boundary2[idir];
 
  872   if (m_arg[itask].kz0 == 1) {
 
  873     int Nxy = m_Nx2 * m_Ny;
 
  875     for (
int it = 0; it < m_Mt; ++it) {
 
  876       for (
int ixy = 0; ixy < Nxy; ++ixy) {
 
  877         int is  = ixy + Nxy * (iz + m_Nz * it);
 
  878         int is2 = ixy + Nxy * it;
 
  881         int ix1 = Nvc2 * is2;
 
  882         int ix2 = ix1 + m_Nvc;
 
  884         for (
int ic = 0; ic < 
m_Nc; ++ic) {
 
  885           w2[2 * ic + ix1]     = bc2 * (w1[2 * ic + id1 + in] - w1[2 * ic + 1 + id3 + in]);
 
  886           w2[2 * ic + 1 + ix1] = bc2 * (w1[2 * ic + 1 + id1 + in] + w1[2 * ic + id3 + in]);
 
  887           w2[2 * ic + ix2]     = bc2 * (w1[2 * ic + id2 + in] + w1[2 * ic + 1 + id4 + in]);
 
  888           w2[2 * ic + 1 + ix2] = bc2 * (w1[2 * ic + 1 + id2 + in] - w1[2 * ic + id4 + in]);
 
  898   int itask, 
double *
v2, 
const double *vcp2, 
int ieo)
 
  900   int Nvc2  = 2 * m_Nvc;
 
  901   int Nvcd  = m_Nvc * 
m_Nd;
 
  902   int Nvcd2 = Nvcd / 2;
 
  911   double wt1r, wt1i, wt2r, wt2i;
 
  913   int isite    = m_arg[itask].isite;
 
  914   int isite_cp = m_arg[itask].isite_cpz;
 
  916   const double *w1 = &vcp2[Nvcd2 * isite_cp];
 
  917   double       *w2 = &v2[Nvcd * isite];
 
  918   const double *u  = m_U->ptr(m_Ndf * (isite + ieo * 
m_Nvol / 2 + idir * 
m_Nvol));
 
  920   if (m_arg[itask].kz1 == 1) {
 
  921     int Nxy = m_Nx2 * m_Ny;
 
  923     for (
int it = 0; it < m_Mt; ++it) {
 
  924       for (
int ixy = 0; ixy < Nxy; ++ixy) {
 
  925         int is  = ixy + Nxy * (iz + m_Nz * it);
 
  926         int is2 = ixy + Nxy * it;
 
  929         int ix1 = Nvc2 * is2;
 
  930         int ix2 = ix1 + m_Nvc;
 
  932         for (
int ic = 0; ic < 
m_Nc; ++ic) {
 
  933           int ic2 = ic * m_Nvc;
 
  935           wt1r = mult_uv_r(&u[ic2 + ig], &w1[ix1], m_Nc);
 
  936           wt1i = mult_uv_i(&u[ic2 + ig], &w1[ix1], m_Nc);
 
  937           wt2r = mult_uv_r(&u[ic2 + ig], &w1[ix2], m_Nc);
 
  938           wt2i = mult_uv_i(&u[ic2 + ig], &w1[ix2], m_Nc);
 
  940           w2[2 * ic + id1 + iv]     += wt1r;
 
  941           w2[2 * ic + 1 + id1 + iv] += wt1i;
 
  942           w2[2 * ic + id2 + iv]     += wt2r;
 
  943           w2[2 * ic + 1 + id2 + iv] += wt2i;
 
  944           w2[2 * ic + id3 + iv]     += wt1i;
 
  945           w2[2 * ic + 1 + id3 + iv] += -wt1r;
 
  946           w2[2 * ic + id4 + iv]     += -wt2i;
 
  947           w2[2 * ic + 1 + id4 + iv] += wt2r;
 
  957   int itask, 
double *
v2, 
const double *
v1, 
int ieo)
 
  959   int Nvcd = m_Nvc * 
m_Nd;
 
  968   double vt1[m_Nvc], vt2[m_Nvc];
 
  969   double wt1r, wt1i, wt2r, wt2i;
 
  971   int isite = m_arg[itask].isite;
 
  973   const double *w1 = &v1[Nvcd * isite];
 
  974   double       *w2 = &v2[Nvcd * isite];
 
  975   const double *u  = m_U->ptr(m_Ndf * (isite + ieo * 
m_Nvol / 2 + idir * 
m_Nvol));
 
  977   int kz1 = m_arg[itask].kz1;
 
  978   int Nxy = m_Nx2 * m_Ny;
 
  980   for (
int it = 0; it < m_Mt; ++it) {
 
  981     for (
int iz = 0; iz < m_Mz - kz1; ++iz) {
 
  982       for (
int ixy = 0; ixy < Nxy; ++ixy) {
 
  983         int is = ixy + Nxy * (iz + m_Nz * it);
 
  985         int in = Nvcd * (is + Nxy);
 
  988         for (
int ic = 0; ic < 
m_Nc; ++ic) {
 
  989           vt1[2 * ic]     = w1[2 * ic + id1 + in] - w1[2 * ic + 1 + id3 + in];
 
  990           vt1[2 * ic + 1] = w1[2 * ic + 1 + id1 + in] + w1[2 * ic + id3 + in];
 
  991           vt2[2 * ic]     = w1[2 * ic + id2 + in] + w1[2 * ic + 1 + id4 + in];
 
  992           vt2[2 * ic + 1] = w1[2 * ic + 1 + id2 + in] - w1[2 * ic + id4 + in];
 
  995         for (
int ic = 0; ic < 
m_Nc; ++ic) {
 
  996           int ic2 = ic * m_Nvc;
 
  998           wt1r = mult_uv_r(&u[ic2 + ig], vt1, m_Nc);
 
  999           wt1i = mult_uv_i(&u[ic2 + ig], vt1, m_Nc);
 
 1000           wt2r = mult_uv_r(&u[ic2 + ig], vt2, m_Nc);
 
 1001           wt2i = mult_uv_i(&u[ic2 + ig], vt2, m_Nc);
 
 1003           w2[2 * ic + id1 + iv]     += wt1r;
 
 1004           w2[2 * ic + 1 + id1 + iv] += wt1i;
 
 1005           w2[2 * ic + id2 + iv]     += wt2r;
 
 1006           w2[2 * ic + 1 + id2 + iv] += wt2i;
 
 1007           w2[2 * ic + id3 + iv]     += wt1i;
 
 1008           w2[2 * ic + 1 + id3 + iv] += -wt1r;
 
 1009           w2[2 * ic + id4 + iv]     += -wt2i;
 
 1010           w2[2 * ic + 1 + id4 + iv] += wt2r;
 
 1020   int itask, 
double *vcp1, 
const double *
v1, 
int ieo)
 
 1022   int Nvc2  = 2 * m_Nvc;
 
 1023   int Nvcd  = m_Nvc * 
m_Nd;
 
 1024   int Nvcd2 = Nvcd / 2;
 
 1028   int id3 = m_Nvc * 2;
 
 1029   int id4 = m_Nvc * 3;
 
 1033   int isite    = m_arg[itask].isite;
 
 1034   int isite_cp = m_arg[itask].isite_cpz;
 
 1036   const double *w1 = &v1[Nvcd * isite];
 
 1037   double       *w2 = &vcp1[Nvcd2 * isite_cp];
 
 1038   const double *u  = m_U->ptr(m_Ndf * (isite + (1 - ieo) * 
m_Nvol / 2 + idir * 
m_Nvol));
 
 1040   double vt1[m_Nvc], vt2[m_Nvc];
 
 1042   if (m_arg[itask].kz1 == 1) {
 
 1043     int Nxy = m_Nx2 * m_Ny;
 
 1045     for (
int it = 0; it < m_Mt; ++it) {
 
 1046       for (
int ixy = 0; ixy < Nxy; ++ixy) {
 
 1047         int is  = ixy + Nxy * (iz + m_Nz * it);
 
 1048         int is2 = ixy + Nxy * it;
 
 1050         int ig  = m_Ndf * is;
 
 1051         int ix1 = Nvc2 * is2;
 
 1052         int ix2 = ix1 + m_Nvc;
 
 1054         for (
int ic = 0; ic < 
m_Nc; ++ic) {
 
 1055           vt1[2 * ic]     = w1[2 * ic + id1 + in] + w1[2 * ic + 1 + id3 + in];
 
 1056           vt1[2 * ic + 1] = w1[2 * ic + 1 + id1 + in] - w1[2 * ic + id3 + in];
 
 1057           vt2[2 * ic]     = w1[2 * ic + id2 + in] - w1[2 * ic + 1 + id4 + in];
 
 1058           vt2[2 * ic + 1] = w1[2 * ic + 1 + id2 + in] + w1[2 * ic + id4 + in];
 
 1061         for (
int ic = 0; ic < 
m_Nc; ++ic) {
 
 1063           w2[icr + ix1]     = mult_udagv_r(&u[icr + ig], vt1, m_Nc);
 
 1064           w2[icr + 1 + ix1] = mult_udagv_i(&u[icr + ig], vt1, m_Nc);
 
 1065           w2[icr + ix2]     = mult_udagv_r(&u[icr + ig], vt2, m_Nc);
 
 1066           w2[icr + 1 + ix2] = mult_udagv_i(&u[icr + ig], vt2, m_Nc);
 
 1076   int itask, 
double *
v2, 
const double *vcp2, 
int ieo)
 
 1078   int Nvc2  = 2 * m_Nvc;
 
 1079   int Nvcd  = m_Nvc * 
m_Nd;
 
 1080   int Nvcd2 = Nvcd / 2;
 
 1084   int id3 = m_Nvc * 2;
 
 1085   int id4 = m_Nvc * 3;
 
 1088   double bc2  = m_boundary2[idir];
 
 1092   int isite    = m_arg[itask].isite;
 
 1093   int isite_cp = m_arg[itask].isite_cpz;
 
 1095   const double *w1 = &vcp2[Nvcd2 * isite_cp];
 
 1096   double       *w2 = &v2[Nvcd * isite];
 
 1098   if (m_arg[itask].kz0 == 1) {
 
 1099     int Nxy = m_Nx2 * m_Ny;
 
 1102     for (
int it = 0; it < m_Mt; ++it) {
 
 1103       for (
int ixy = 0; ixy < Nxy; ++ixy) {
 
 1104         int is  = ixy + Nxy * (iz + m_Nz * it);
 
 1105         int is2 = ixy + Nxy * it;
 
 1107         int ix1 = Nvc2 * is2;
 
 1108         int ix2 = ix1 + m_Nvc;
 
 1110         for (
int ic = 0; ic < 
m_Nc; ++ic) {
 
 1112           int ici = 2 * ic + 1;
 
 1113           w2[icr + id1 + iv] += bc2 * w1[icr + ix1];
 
 1114           w2[ici + id1 + iv] += bc2 * w1[ici + ix1];
 
 1115           w2[icr + id2 + iv] += bc2 * w1[icr + ix2];
 
 1116           w2[ici + id2 + iv] += bc2 * w1[ici + ix2];
 
 1117           w2[icr + id3 + iv] += -bc2 * w1[ici + ix1];
 
 1118           w2[ici + id3 + iv] += bc2 * w1[icr + ix1];
 
 1119           w2[icr + id4 + iv] += bc2 * w1[ici + ix2];
 
 1120           w2[ici + id4 + iv] += -bc2 * w1[icr + ix2];
 
 1130   int itask, 
double *
v2, 
const double *
v1, 
int ieo)
 
 1132   int Nvcd = m_Nvc * 
m_Nd;
 
 1136   int id3 = m_Nvc * 2;
 
 1137   int id4 = m_Nvc * 3;
 
 1141   double vt1[m_Nvc], vt2[m_Nvc];
 
 1142   double wt1r, wt1i, wt2r, wt2i;
 
 1144   int isite = m_arg[itask].isite;
 
 1146   const double *w1 = &v1[Nvcd * isite];
 
 1147   double       *w2 = &v2[Nvcd * isite];
 
 1148   const double *u  = m_U->ptr(m_Ndf * (isite + (1 - ieo) * 
m_Nvol / 2 + idir * 
m_Nvol));
 
 1150   int kz0 = m_arg[itask].kz0;
 
 1151   int Nxy = m_Nx2 * m_Ny;
 
 1153   for (
int it = 0; it < m_Mt; ++it) {
 
 1154     for (
int iz = kz0; iz < m_Mz; ++iz) {
 
 1155       for (
int ixy = 0; ixy < Nxy; ++ixy) {
 
 1156         int is = ixy + Nxy * (iz + m_Nz * it);
 
 1158         int in = Nvcd * (is - Nxy);
 
 1159         int ig = m_Ndf * (is - Nxy);
 
 1161         for (
int ic = 0; ic < 
m_Nc; ++ic) {
 
 1162           vt1[2 * ic]     = w1[2 * ic + id1 + in] + w1[2 * ic + 1 + id3 + in];
 
 1163           vt1[2 * ic + 1] = w1[2 * ic + 1 + id1 + in] - w1[2 * ic + id3 + in];
 
 1164           vt2[2 * ic]     = w1[2 * ic + id2 + in] - w1[2 * ic + 1 + id4 + in];
 
 1165           vt2[2 * ic + 1] = w1[2 * ic + 1 + id2 + in] + w1[2 * ic + id4 + in];
 
 1168         for (
int ic = 0; ic < 
m_Nc; ++ic) {
 
 1170           wt1r = mult_udagv_r(&u[ic2 + ig], vt1, m_Nc);
 
 1171           wt1i = mult_udagv_i(&u[ic2 + ig], vt1, m_Nc);
 
 1172           wt2r = mult_udagv_r(&u[ic2 + ig], vt2, m_Nc);
 
 1173           wt2i = mult_udagv_i(&u[ic2 + ig], vt2, m_Nc);
 
 1175           w2[ic2 + id1 + iv]     += wt1r;
 
 1176           w2[ic2 + 1 + id1 + iv] += wt1i;
 
 1177           w2[ic2 + id2 + iv]     += wt2r;
 
 1178           w2[ic2 + 1 + id2 + iv] += wt2i;
 
 1179           w2[ic2 + id3 + iv]     += -wt1i;
 
 1180           w2[ic2 + 1 + id3 + iv] += wt1r;
 
 1181           w2[ic2 + id4 + iv]     += wt2i;
 
 1182           w2[ic2 + 1 + id4 + iv] += -wt2r;
 
 1192   int itask, 
double *vcp1, 
const double *
v1, 
int ieo)
 
 1194   int Nvc2  = 2 * m_Nvc;
 
 1195   int Nvcd  = m_Nvc * 
m_Nd;
 
 1196   int Nvcd2 = Nvcd / 2;
 
 1200   int id3 = m_Nvc * 2;
 
 1201   int id4 = m_Nvc * 3;
 
 1203   int isite    = m_arg[itask].isite;
 
 1204   int isite_cp = m_arg[itask].isite_cpt;
 
 1206   const double *w1 = &v1[Nvcd * isite];
 
 1207   double       *w2 = &vcp1[Nvcd2 * isite_cp];
 
 1210   double bc2  = m_boundary2[idir];
 
 1212   if (m_arg[itask].kt0 == 1) {
 
 1213     int Nxy = m_Nx2 * m_Ny;
 
 1215     for (
int iz = 0; iz < m_Mz; ++iz) {
 
 1216       for (
int ixy = 0; ixy < Nxy; ++ixy) {
 
 1217         int is  = ixy + Nxy * (iz + m_Nz * it);
 
 1218         int is2 = ixy + Nxy * iz;
 
 1221         int ix1 = Nvc2 * is2;
 
 1222         int ix2 = ix1 + m_Nvc;
 
 1224         for (
int ic = 0; ic < 
m_Nc; ++ic) {
 
 1225           w2[2 * ic + ix1]     = 2.0 * bc2 * w1[2 * ic + id3 + in];
 
 1226           w2[2 * ic + 1 + ix1] = 2.0 * bc2 * w1[2 * ic + 1 + id3 + in];
 
 1227           w2[2 * ic + ix2]     = 2.0 * bc2 * w1[2 * ic + id4 + in];
 
 1228           w2[2 * ic + 1 + ix2] = 2.0 * bc2 * w1[2 * ic + 1 + id4 + in];
 
 1238   int itask, 
double *
v2, 
const double *vcp2, 
int ieo)
 
 1240   int Nvc2  = 2 * m_Nvc;
 
 1241   int Nvcd  = m_Nvc * 
m_Nd;
 
 1242   int Nvcd2 = Nvcd / 2;
 
 1246   int id3 = m_Nvc * 2;
 
 1247   int id4 = m_Nvc * 3;
 
 1251   double wt1r, wt1i, wt2r, wt2i;
 
 1253   int isite    = m_arg[itask].isite;
 
 1254   int isite_cp = m_arg[itask].isite_cpt;
 
 1256   const double *w1 = &vcp2[Nvcd2 * isite_cp];
 
 1257   double       *w2 = &v2[Nvcd * isite];
 
 1258   const double *u  = m_U->ptr(m_Ndf * (isite + ieo * 
m_Nvol / 2 + idir * 
m_Nvol));
 
 1260   if (m_arg[itask].kt1 == 1) {
 
 1261     int Nxy = m_Nx2 * m_Ny;
 
 1263     for (
int iz = 0; iz < m_Mz; ++iz) {
 
 1264       for (
int ixy = 0; ixy < Nxy; ++ixy) {
 
 1265         int is  = ixy + Nxy * (iz + m_Nz * it);
 
 1266         int is2 = ixy + Nxy * iz;
 
 1268         int ig  = m_Ndf * is;
 
 1269         int ix1 = Nvc2 * is2;
 
 1270         int ix2 = ix1 + m_Nvc;
 
 1272         for (
int ic = 0; ic < 
m_Nc; ++ic) {
 
 1273           int ic2 = ic * m_Nvc;
 
 1275           wt1r = mult_uv_r(&u[ic2 + ig], &w1[ix1], m_Nc);
 
 1276           wt1i = mult_uv_i(&u[ic2 + ig], &w1[ix1], m_Nc);
 
 1277           wt2r = mult_uv_r(&u[ic2 + ig], &w1[ix2], m_Nc);
 
 1278           wt2i = mult_uv_i(&u[ic2 + ig], &w1[ix2], m_Nc);
 
 1280           w2[2 * ic + id3 + iv]     += wt1r;
 
 1281           w2[2 * ic + 1 + id3 + iv] += wt1i;
 
 1282           w2[2 * ic + id4 + iv]     += wt2r;
 
 1283           w2[2 * ic + 1 + id4 + iv] += wt2i;
 
 1293   int itask, 
double *
v2, 
const double *
v1, 
int ieo)
 
 1295   int Nvcd = m_Nvc * 
m_Nd;
 
 1299   int id3 = m_Nvc * 2;
 
 1300   int id4 = m_Nvc * 3;
 
 1304   double vt1[m_Nvc], vt2[m_Nvc];
 
 1305   double wt1r, wt1i, wt2r, wt2i;
 
 1307   int isite = m_arg[itask].isite;
 
 1309   const double *w1 = &v1[Nvcd * isite];
 
 1310   double       *w2 = &v2[Nvcd * isite];
 
 1311   const double *u  = m_U->ptr(m_Ndf * (isite + ieo * 
m_Nvol / 2 + idir * 
m_Nvol));
 
 1313   int kt1  = m_arg[itask].kt1;
 
 1314   int Nxy  = m_Nx2 * m_Ny;
 
 1315   int Nxyz = Nxy * m_Nz;
 
 1317   for (
int it = 0; it < m_Mt - kt1; ++it) {
 
 1318     for (
int iz = 0; iz < m_Mz; ++iz) {
 
 1319       for (
int ixy = 0; ixy < Nxy; ++ixy) {
 
 1320         int is = ixy + Nxy * (iz + m_Nz * it);
 
 1322         int in = Nvcd * (is + Nxyz);
 
 1323         int ig = m_Ndf * is;
 
 1325         for (
int ic = 0; ic < 
m_Nc; ++ic) {
 
 1326           vt1[2 * ic]     = 2.0 * w1[2 * ic + id3 + in];
 
 1327           vt1[2 * ic + 1] = 2.0 * w1[2 * ic + 1 + id3 + in];
 
 1328           vt2[2 * ic]     = 2.0 * w1[2 * ic + id4 + in];
 
 1329           vt2[2 * ic + 1] = 2.0 * w1[2 * ic + 1 + id4 + in];
 
 1332         for (
int ic = 0; ic < 
m_Nc; ++ic) {
 
 1333           int ic2 = ic * m_Nvc;
 
 1335           wt1r = mult_uv_r(&u[ic2 + ig], vt1, m_Nc);
 
 1336           wt1i = mult_uv_i(&u[ic2 + ig], vt1, m_Nc);
 
 1337           wt2r = mult_uv_r(&u[ic2 + ig], vt2, m_Nc);
 
 1338           wt2i = mult_uv_i(&u[ic2 + ig], vt2, m_Nc);
 
 1340           w2[2 * ic + id3 + iv]     += wt1r;
 
 1341           w2[2 * ic + 1 + id3 + iv] += wt1i;
 
 1342           w2[2 * ic + id4 + iv]     += wt2r;
 
 1343           w2[2 * ic + 1 + id4 + iv] += wt2i;
 
 1353   int itask, 
double *vcp1, 
const double *
v1, 
int ieo)
 
 1355   int Nvc2  = 2 * m_Nvc;
 
 1356   int Nvcd  = m_Nvc * 
m_Nd;
 
 1357   int Nvcd2 = Nvcd / 2;
 
 1366   int isite    = m_arg[itask].isite;
 
 1367   int isite_cp = m_arg[itask].isite_cpt;
 
 1369   const double *w1 = &v1[Nvcd * isite];
 
 1370   double       *w2 = &vcp1[Nvcd2 * isite_cp];
 
 1371   const double *u  = m_U->ptr(m_Ndf * (isite + (1 - ieo) * 
m_Nvol / 2 + idir * 
m_Nvol));
 
 1373   double vt1[m_Nvc], vt2[m_Nvc];
 
 1375   if (m_arg[itask].kt1 == 1) {
 
 1376     int Nxy = m_Nx2 * m_Ny;
 
 1378     for (
int iz = 0; iz < m_Mz; ++iz) {
 
 1379       for (
int ixy = 0; ixy < Nxy; ++ixy) {
 
 1380         int is  = ixy + Nxy * (iz + m_Nz * it);
 
 1381         int is2 = ixy + Nxy * iz;
 
 1383         int ig  = m_Ndf * is;
 
 1384         int ix1 = Nvc2 * is2;
 
 1385         int ix2 = ix1 + m_Nvc;
 
 1387         for (
int ic = 0; ic < 
m_Nc; ++ic) {
 
 1388           vt1[2 * ic]     = 2.0 * w1[2 * ic + id1 + in];
 
 1389           vt1[2 * ic + 1] = 2.0 * w1[2 * ic + 1 + id1 + in];
 
 1390           vt2[2 * ic]     = 2.0 * w1[2 * ic + id2 + in];
 
 1391           vt2[2 * ic + 1] = 2.0 * w1[2 * ic + 1 + id2 + in];
 
 1394         for (
int ic = 0; ic < 
m_Nc; ++ic) {
 
 1396           w2[icr + ix1]     = mult_udagv_r(&u[icr + ig], vt1, m_Nc);
 
 1397           w2[icr + 1 + ix1] = mult_udagv_i(&u[icr + ig], vt1, m_Nc);
 
 1398           w2[icr + ix2]     = mult_udagv_r(&u[icr + ig], vt2, m_Nc);
 
 1399           w2[icr + 1 + ix2] = mult_udagv_i(&u[icr + ig], vt2, m_Nc);
 
 1409   int itask, 
double *
v2, 
const double *vcp2, 
int ieo)
 
 1411   int Nvc2  = 2 * m_Nvc;
 
 1412   int Nvcd  = m_Nvc * 
m_Nd;
 
 1413   int Nvcd2 = Nvcd / 2;
 
 1421   double bc2  = m_boundary2[idir];
 
 1425   int isite    = m_arg[itask].isite;
 
 1426   int isite_cp = m_arg[itask].isite_cpt;
 
 1428   const double *w1 = &vcp2[Nvcd2 * isite_cp];
 
 1429   double       *w2 = &v2[Nvcd * isite];
 
 1431   if (m_arg[itask].kt0 == 1) {
 
 1432     int Nxy = m_Nx2 * m_Ny;
 
 1434     for (
int iz = 0; iz < m_Mz; ++iz) {
 
 1435       for (
int ixy = 0; ixy < Nxy; ++ixy) {
 
 1436         int is  = ixy + Nxy * (iz + m_Nz * it);
 
 1437         int is2 = ixy + Nxy * iz;
 
 1439         int ix1 = Nvc2 * is2;
 
 1440         int ix2 = ix1 + m_Nvc;
 
 1442         for (
int ic = 0; ic < 
m_Nc; ++ic) {
 
 1444           int ici = 2 * ic + 1;
 
 1445           w2[icr + id1 + iv] += bc2 * w1[icr + ix1];
 
 1446           w2[ici + id1 + iv] += bc2 * w1[ici + ix1];
 
 1447           w2[icr + id2 + iv] += bc2 * w1[icr + ix2];
 
 1448           w2[ici + id2 + iv] += bc2 * w1[ici + ix2];
 
 1458   int itask, 
double *
v2, 
const double *
v1, 
int ieo)
 
 1460   int Nvcd = m_Nvc * 
m_Nd;
 
 1469   double vt1[m_Nvc], vt2[m_Nvc];
 
 1470   double wt1r, wt1i, wt2r, wt2i;
 
 1472   int isite = m_arg[itask].isite;
 
 1474   const double *w1 = &v1[Nvcd * isite];
 
 1475   double       *w2 = &v2[Nvcd * isite];
 
 1476   const double *u  = m_U->ptr(m_Ndf * (isite + (1 - ieo) * 
m_Nvol / 2 + idir * 
m_Nvol));
 
 1478   int kt0  = m_arg[itask].kt0;
 
 1479   int Nxy  = m_Nx2 * m_Ny;
 
 1480   int Nxyz = Nxy * m_Nz;
 
 1482   for (
int it = kt0; it < m_Mt; ++it) {
 
 1483     for (
int iz = 0; iz < m_Mz; ++iz) {
 
 1484       for (
int ixy = 0; ixy < Nxy; ++ixy) {
 
 1485         int is = ixy + Nxy * (iz + m_Nz * it);
 
 1487         int in = Nvcd * (is - Nxyz);
 
 1488         int ig = m_Ndf * (is - Nxyz);
 
 1490         for (
int ic = 0; ic < 
m_Nc; ++ic) {
 
 1491           vt1[2 * ic]     = 2.0 * w1[2 * ic + id1 + in];
 
 1492           vt1[2 * ic + 1] = 2.0 * w1[2 * ic + 1 + id1 + in];
 
 1493           vt2[2 * ic]     = 2.0 * w1[2 * ic + id2 + in];
 
 1494           vt2[2 * ic + 1] = 2.0 * w1[2 * ic + 1 + id2 + in];
 
 1497         for (
int ic = 0; ic < 
m_Nc; ++ic) {
 
 1499           wt1r = mult_udagv_r(&u[ic2 + ig], vt1, m_Nc);
 
 1500           wt1i = mult_udagv_i(&u[ic2 + ig], vt1, m_Nc);
 
 1501           wt2r = mult_udagv_r(&u[ic2 + ig], vt2, m_Nc);
 
 1502           wt2i = mult_udagv_i(&u[ic2 + ig], vt2, m_Nc);
 
 1504           w2[ic2 + id1 + iv]     += wt1r;
 
 1505           w2[ic2 + 1 + id1 + iv] += wt1i;
 
 1506           w2[ic2 + id2 + iv]     += wt2r;
 
 1507           w2[ic2 + 1 + id2 + iv] += wt2i;
 
 1517   int itask, 
double *vcp1, 
const double *
v1, 
int ieo)
 
 1519   int Nvc2  = 2 * m_Nvc;
 
 1520   int Nvcd  = m_Nvc * 
m_Nd;
 
 1521   int Nvcd2 = Nvcd / 2;
 
 1525   int id3 = m_Nvc * 2;
 
 1526   int id4 = m_Nvc * 3;
 
 1528   int isite    = m_arg[itask].isite;
 
 1529   int isite_cp = m_arg[itask].isite_cpt;
 
 1531   const double *w1 = &v1[Nvcd * isite];
 
 1532   double       *w2 = &vcp1[Nvcd2 * isite_cp];
 
 1535   double bc2  = m_boundary2[idir];
 
 1537   if (m_arg[itask].kt0 == 1) {
 
 1538     int Nxy = m_Nx2 * m_Ny;
 
 1540     for (
int iz = 0; iz < m_Mz; ++iz) {
 
 1541       for (
int ixy = 0; ixy < Nxy; ++ixy) {
 
 1542         int is  = ixy + Nxy * (iz + m_Nz * it);
 
 1543         int is2 = ixy + Nxy * iz;
 
 1546         int ix1 = Nvc2 * is2;
 
 1547         int ix2 = ix1 + m_Nvc;
 
 1549         for (
int ic = 0; ic < 
m_Nc; ++ic) {
 
 1550           w2[2 * ic + ix1]     = bc2 * (w1[2 * ic + id1 + in] + w1[2 * ic + id3 + in]);
 
 1551           w2[2 * ic + 1 + ix1] = bc2 * (w1[2 * ic + 1 + id1 + in] + w1[2 * ic + 1 + id3 + in]);
 
 1552           w2[2 * ic + ix2]     = bc2 * (w1[2 * ic + id2 + in] + w1[2 * ic + id4 + in]);
 
 1553           w2[2 * ic + 1 + ix2] = bc2 * (w1[2 * ic + 1 + id2 + in] + w1[2 * ic + 1 + id4 + in]);
 
 1563   int itask, 
double *
v2, 
const double *vcp2, 
int ieo)
 
 1565   int Nvc2  = 2 * m_Nvc;
 
 1566   int Nvcd  = m_Nvc * 
m_Nd;
 
 1567   int Nvcd2 = Nvcd / 2;
 
 1571   int id3 = m_Nvc * 2;
 
 1572   int id4 = m_Nvc * 3;
 
 1576   double wt1r, wt1i, wt2r, wt2i;
 
 1578   int isite    = m_arg[itask].isite;
 
 1579   int isite_cp = m_arg[itask].isite_cpt;
 
 1581   const double *w1 = &vcp2[Nvcd2 * isite_cp];
 
 1582   double       *w2 = &v2[Nvcd * isite];
 
 1583   const double *u  = m_U->ptr(m_Ndf * (isite + ieo * 
m_Nvol / 2 + idir * 
m_Nvol));
 
 1585   if (m_arg[itask].kt1 == 1) {
 
 1586     int Nxy = m_Nx2 * m_Ny;
 
 1588     for (
int iz = 0; iz < m_Mz; ++iz) {
 
 1589       for (
int ixy = 0; ixy < Nxy; ++ixy) {
 
 1590         int is  = ixy + Nxy * (iz + m_Nz * it);
 
 1591         int is2 = ixy + Nxy * iz;
 
 1593         int ig  = m_Ndf * is;
 
 1594         int ix1 = Nvc2 * is2;
 
 1595         int ix2 = ix1 + m_Nvc;
 
 1597         for (
int ic = 0; ic < 
m_Nc; ++ic) {
 
 1598           int ic2 = ic * m_Nvc;
 
 1600           wt1r = mult_uv_r(&u[ic2 + ig], &w1[ix1], m_Nc);
 
 1601           wt1i = mult_uv_i(&u[ic2 + ig], &w1[ix1], m_Nc);
 
 1602           wt2r = mult_uv_r(&u[ic2 + ig], &w1[ix2], m_Nc);
 
 1603           wt2i = mult_uv_i(&u[ic2 + ig], &w1[ix2], m_Nc);
 
 1605           w2[2 * ic + id1 + iv]     += wt1r;
 
 1606           w2[2 * ic + 1 + id1 + iv] += wt1i;
 
 1607           w2[2 * ic + id2 + iv]     += wt2r;
 
 1608           w2[2 * ic + 1 + id2 + iv] += wt2i;
 
 1609           w2[2 * ic + id3 + iv]     += wt1r;
 
 1610           w2[2 * ic + 1 + id3 + iv] += wt1i;
 
 1611           w2[2 * ic + id4 + iv]     += wt2r;
 
 1612           w2[2 * ic + 1 + id4 + iv] += wt2i;
 
 1622   int itask, 
double *
v2, 
const double *
v1, 
int ieo)
 
 1624   int Nvcd = m_Nvc * 
m_Nd;
 
 1628   int id3 = m_Nvc * 2;
 
 1629   int id4 = m_Nvc * 3;
 
 1633   double vt1[m_Nvc], vt2[m_Nvc];
 
 1634   double wt1r, wt1i, wt2r, wt2i;
 
 1636   int isite = m_arg[itask].isite;
 
 1638   const double *w1 = &v1[Nvcd * isite];
 
 1639   double       *w2 = &v2[Nvcd * isite];
 
 1640   const double *u  = m_U->ptr(m_Ndf * (isite + ieo * 
m_Nvol / 2 + idir * 
m_Nvol));
 
 1642   int kt1  = m_arg[itask].kt1;
 
 1643   int Nxy  = m_Nx2 * m_Ny;
 
 1644   int Nxyz = Nxy * m_Nz;
 
 1646   for (
int it = 0; it < m_Mt - kt1; ++it) {
 
 1647     for (
int iz = 0; iz < m_Mz; ++iz) {
 
 1648       for (
int ixy = 0; ixy < Nxy; ++ixy) {
 
 1649         int is = ixy + Nxy * (iz + m_Nz * it);
 
 1651         int in = Nvcd * (is + Nxyz);
 
 1652         int ig = m_Ndf * is;
 
 1654         for (
int ic = 0; ic < 
m_Nc; ++ic) {
 
 1655           vt1[2 * ic]     = w1[2 * ic + id1 + in] + w1[2 * ic + id3 + in];
 
 1656           vt1[2 * ic + 1] = w1[2 * ic + 1 + id1 + in] + w1[2 * ic + 1 + id3 + in];
 
 1657           vt2[2 * ic]     = w1[2 * ic + id2 + in] + w1[2 * ic + id4 + in];
 
 1658           vt2[2 * ic + 1] = w1[2 * ic + 1 + id2 + in] + w1[2 * ic + 1 + id4 + in];
 
 1661         for (
int ic = 0; ic < 
m_Nc; ++ic) {
 
 1662           int ic2 = ic * m_Nvc;
 
 1664           wt1r = mult_uv_r(&u[ic2 + ig], vt1, m_Nc);
 
 1665           wt1i = mult_uv_i(&u[ic2 + ig], vt1, m_Nc);
 
 1666           wt2r = mult_uv_r(&u[ic2 + ig], vt2, m_Nc);
 
 1667           wt2i = mult_uv_i(&u[ic2 + ig], vt2, m_Nc);
 
 1669           w2[2 * ic + id1 + iv]     += wt1r;
 
 1670           w2[2 * ic + 1 + id1 + iv] += wt1i;
 
 1671           w2[2 * ic + id2 + iv]     += wt2r;
 
 1672           w2[2 * ic + 1 + id2 + iv] += wt2i;
 
 1673           w2[2 * ic + id3 + iv]     += wt1r;
 
 1674           w2[2 * ic + 1 + id3 + iv] += wt1i;
 
 1675           w2[2 * ic + id4 + iv]     += wt2r;
 
 1676           w2[2 * ic + 1 + id4 + iv] += wt2i;
 
 1686   int itask, 
double *vcp1, 
const double *
v1, 
int ieo)
 
 1688   int Nvc2  = 2 * m_Nvc;
 
 1689   int Nvcd  = m_Nvc * 
m_Nd;
 
 1690   int Nvcd2 = Nvcd / 2;
 
 1694   int id3 = m_Nvc * 2;
 
 1695   int id4 = m_Nvc * 3;
 
 1699   int isite    = m_arg[itask].isite;
 
 1700   int isite_cp = m_arg[itask].isite_cpt;
 
 1702   const double *w1 = &v1[Nvcd * isite];
 
 1703   double       *w2 = &vcp1[Nvcd2 * isite_cp];
 
 1704   const double *u  = m_U->ptr(m_Ndf * (isite + (1 - ieo) * 
m_Nvol / 2 + idir * 
m_Nvol));
 
 1706   double vt1[m_Nvc], vt2[m_Nvc];
 
 1708   if (m_arg[itask].kt1 == 1) {
 
 1709     int Nxy = m_Nx2 * m_Ny;
 
 1711     for (
int iz = 0; iz < m_Mz; ++iz) {
 
 1712       for (
int ixy = 0; ixy < Nxy; ++ixy) {
 
 1713         int is  = ixy + Nxy * (iz + m_Nz * it);
 
 1714         int is2 = ixy + Nxy * iz;
 
 1716         int ig  = m_Ndf * is;
 
 1717         int ix1 = Nvc2 * is2;
 
 1718         int ix2 = ix1 + m_Nvc;
 
 1720         for (
int ic = 0; ic < 
m_Nc; ++ic) {
 
 1721           vt1[2 * ic]     = w1[2 * ic + id1 + in] - w1[2 * ic + id3 + in];
 
 1722           vt1[2 * ic + 1] = w1[2 * ic + 1 + id1 + in] - w1[2 * ic + 1 + id3 + in];
 
 1723           vt2[2 * ic]     = w1[2 * ic + id2 + in] - w1[2 * ic + id4 + in];
 
 1724           vt2[2 * ic + 1] = w1[2 * ic + 1 + id2 + in] - w1[2 * ic + 1 + id4 + in];
 
 1727         for (
int ic = 0; ic < 
m_Nc; ++ic) {
 
 1729           w2[icr + ix1]     = mult_udagv_r(&u[icr + ig], vt1, m_Nc);
 
 1730           w2[icr + 1 + ix1] = mult_udagv_i(&u[icr + ig], vt1, m_Nc);
 
 1731           w2[icr + ix2]     = mult_udagv_r(&u[icr + ig], vt2, m_Nc);
 
 1732           w2[icr + 1 + ix2] = mult_udagv_i(&u[icr + ig], vt2, m_Nc);
 
 1742   int itask, 
double *
v2, 
const double *vcp2, 
int ieo)
 
 1744   int Nvc2  = 2 * m_Nvc;
 
 1745   int Nvcd  = m_Nvc * 
m_Nd;
 
 1746   int Nvcd2 = Nvcd / 2;
 
 1750   int id3 = m_Nvc * 2;
 
 1751   int id4 = m_Nvc * 3;
 
 1754   double bc2  = m_boundary2[idir];
 
 1758   int isite    = m_arg[itask].isite;
 
 1759   int isite_cp = m_arg[itask].isite_cpt;
 
 1761   const double *w1 = &vcp2[Nvcd2 * isite_cp];
 
 1762   double       *w2 = &v2[Nvcd * isite];
 
 1764   if (m_arg[itask].kt0 == 1) {
 
 1765     int Nxy = m_Nx2 * m_Ny;
 
 1767     for (
int iz = 0; iz < m_Mz; ++iz) {
 
 1768       for (
int ixy = 0; ixy < Nxy; ++ixy) {
 
 1769         int is  = ixy + Nxy * (iz + m_Nz * it);
 
 1770         int is2 = ixy + Nxy * iz;
 
 1772         int ix1 = Nvc2 * is2;
 
 1773         int ix2 = ix1 + m_Nvc;
 
 1775         for (
int ic = 0; ic < 
m_Nc; ++ic) {
 
 1777           int ici = 2 * ic + 1;
 
 1778           w2[icr + id1 + iv] += bc2 * w1[icr + ix1];
 
 1779           w2[ici + id1 + iv] += bc2 * w1[ici + ix1];
 
 1780           w2[icr + id2 + iv] += bc2 * w1[icr + ix2];
 
 1781           w2[ici + id2 + iv] += bc2 * w1[ici + ix2];
 
 1782           w2[icr + id3 + iv] -= bc2 * w1[icr + ix1];
 
 1783           w2[ici + id3 + iv] -= bc2 * w1[ici + ix1];
 
 1784           w2[icr + id4 + iv] -= bc2 * w1[icr + ix2];
 
 1785           w2[ici + id4 + iv] -= bc2 * w1[ici + ix2];
 
 1795   int itask, 
double *
v2, 
const double *
v1, 
int ieo)
 
 1797   int Nvcd = m_Nvc * 
m_Nd;
 
 1801   int id3 = m_Nvc * 2;
 
 1802   int id4 = m_Nvc * 3;
 
 1806   double vt1[m_Nvc], vt2[m_Nvc];
 
 1807   double wt1r, wt1i, wt2r, wt2i;
 
 1809   int isite = m_arg[itask].isite;
 
 1811   const double *w1 = &v1[Nvcd * isite];
 
 1812   double       *w2 = &v2[Nvcd * isite];
 
 1813   const double *u  = m_U->ptr(m_Ndf * (isite + (1 - ieo) * 
m_Nvol / 2 + idir * 
m_Nvol));
 
 1815   int kt0  = m_arg[itask].kt0;
 
 1816   int Nxy  = m_Nx2 * m_Ny;
 
 1817   int Nxyz = Nxy * m_Nz;
 
 1819   for (
int it = kt0; it < m_Mt; ++it) {
 
 1820     for (
int iz = 0; iz < m_Mz; ++iz) {
 
 1821       for (
int ixy = 0; ixy < Nxy; ++ixy) {
 
 1822         int is = ixy + Nxy * (iz + m_Nz * it);
 
 1824         int in = Nvcd * (is - Nxyz);
 
 1825         int ig = m_Ndf * (is - Nxyz);
 
 1827         for (
int ic = 0; ic < 
m_Nc; ++ic) {
 
 1828           vt1[2 * ic]     = w1[2 * ic + id1 + in] - w1[2 * ic + id3 + in];
 
 1829           vt1[2 * ic + 1] = w1[2 * ic + 1 + id1 + in] - w1[2 * ic + 1 + id3 + in];
 
 1830           vt2[2 * ic]     = w1[2 * ic + id2 + in] - w1[2 * ic + id4 + in];
 
 1831           vt2[2 * ic + 1] = w1[2 * ic + 1 + id2 + in] - w1[2 * ic + 1 + id4 + in];
 
 1834         for (
int ic = 0; ic < 
m_Nc; ++ic) {
 
 1836           wt1r = mult_udagv_r(&u[ic2 + ig], vt1, m_Nc);
 
 1837           wt1i = mult_udagv_i(&u[ic2 + ig], vt1, m_Nc);
 
 1838           wt2r = mult_udagv_r(&u[ic2 + ig], vt2, m_Nc);
 
 1839           wt2i = mult_udagv_i(&u[ic2 + ig], vt2, m_Nc);
 
 1841           w2[ic2 + id1 + iv]     += wt1r;
 
 1842           w2[ic2 + 1 + id1 + iv] += wt1i;
 
 1843           w2[ic2 + id2 + iv]     += wt2r;
 
 1844           w2[ic2 + 1 + id2 + iv] += wt2i;
 
 1845           w2[ic2 + id3 + iv]     -= wt1r;
 
 1846           w2[ic2 + 1 + id3 + iv] -= wt1i;
 
 1847           w2[ic2 + id4 + iv]     -= wt2r;
 
 1848           w2[ic2 + 1 + id4 + iv] -= wt2i;
 
 1858   int itask, 
double *
v2, 
const double *
v1)
 
 1860   int Nvcd = m_Nvc * 
m_Nd;
 
 1861   int Nxy  = m_Nx2 * m_Ny;
 
 1865   int id3 = m_Nvc * 2;
 
 1866   int id4 = m_Nvc * 3;
 
 1868   int isite = m_arg[itask].isite;
 
 1870   const double *w1 = &v1[Nvcd * isite];
 
 1871   double       *w2 = &v2[Nvcd * isite];
 
 1873   for (
int it = 0; it < m_Mt; ++it) {
 
 1874     for (
int iz = 0; iz < m_Mz; ++iz) {
 
 1875       for (
int ixy = 0; ixy < Nxy; ++ixy) {
 
 1876         int iv = Nvcd * (ixy + Nxy * (iz + m_Nz * it));
 
 1877         for (
int ivc = 0; ivc < m_Nvc; ++ivc) {
 
 1878           w2[ivc + id1 + iv] = w1[ivc + id3 + iv];
 
 1879           w2[ivc + id2 + iv] = w1[ivc + id4 + iv];
 
 1880           w2[ivc + id3 + iv] = w1[ivc + id1 + iv];
 
 1881           w2[ivc + id4 + iv] = w1[ivc + id2 + iv];
 
 1891   int itask, 
double *
v2, 
const double *
v1)
 
 1893   int Nvcd = m_Nvc * 
m_Nd;
 
 1894   int Nxy  = m_Nx2 * m_Ny;
 
 1898   int id3 = m_Nvc * 2;
 
 1899   int id4 = m_Nvc * 3;
 
 1901   int isite = m_arg[itask].isite;
 
 1903   const double *w1 = &v1[Nvcd * isite];
 
 1904   double       *w2 = &v2[Nvcd * isite];
 
 1906   for (
int it = 0; it < m_Mt; ++it) {
 
 1907     for (
int iz = 0; iz < m_Mz; ++iz) {
 
 1908       for (
int ixy = 0; ixy < Nxy; ++ixy) {
 
 1909         int iv = Nvcd * (ixy + Nxy * (iz + m_Nz * it));
 
 1910         for (
int ivc = 0; ivc < m_Nvc; ++ivc) {
 
 1911           w2[ivc + id1 + iv] = w1[ivc + id1 + iv];
 
 1912           w2[ivc + id2 + iv] = w1[ivc + id2 + iv];
 
 1913           w2[ivc + id3 + iv] = -w1[ivc + id3 + iv];
 
 1914           w2[ivc + id4 + iv] = -w1[ivc + id4 + iv];
 
 1926   int Nvcd = m_Nvc * 
m_Nd;
 
 1927   int Nxy  = m_Nx2 * m_Ny;
 
 1931   int id3 = m_Nvc * 2;
 
 1932   int id4 = m_Nvc * 3;
 
 1934   int    isite = m_arg[itask].isite;
 
 1935   double *w1   = &v1[Nvcd * isite];
 
 1937   for (
int it = 0; it < m_Mt; ++it) {
 
 1938     for (
int iz = 0; iz < m_Mz; ++iz) {
 
 1939       for (
int ixy = 0; ixy < Nxy; ++ixy) {
 
 1940         int iv = Nvcd * (ixy + Nxy * (iz + m_Nz * it));
 
 1941         for (
int ivc = 0; ivc < m_Nvc; ++ivc) {
 
 1942           double wt1 = w1[ivc + id1 + iv];
 
 1943           double wt2 = w1[ivc + id2 + iv];
 
 1944           w1[ivc + id1 + iv] = w1[ivc + id3 + iv];
 
 1945           w1[ivc + id2 + iv] = w1[ivc + id4 + iv];
 
 1946           w1[ivc + id3 + iv] = wt1;
 
 1947           w1[ivc + id4 + iv] = wt2;
 
 1959   int Nvcd = m_Nvc * 
m_Nd;
 
 1960   int Nxy  = m_Nx2 * m_Ny;
 
 1964   int id3 = m_Nvc * 2;
 
 1965   int id4 = m_Nvc * 3;
 
 1967   int    isite = m_arg[itask].isite;
 
 1968   double *w1   = &v1[Nvcd * isite];
 
 1970   for (
int it = 0; it < m_Mt; ++it) {
 
 1971     for (
int iz = 0; iz < m_Mz; ++iz) {
 
 1972       for (
int ixy = 0; ixy < Nxy; ++ixy) {
 
 1973         int iv = Nvcd * (ixy + Nxy * (iz + m_Nz * it));
 
 1974         for (
int ivc = 0; ivc < m_Nvc; ++ivc) {
 
 1975           w1[ivc + id3 + iv] = -w1[ivc + id3 + iv];
 
 1976           w1[ivc + id4 + iv] = -w1[ivc + id4 + iv];
 
void mult_xpb_thread(int, double *, const double *, int)
 
void mult_tpb_dirac_thread(int, double *, const double *, int)
 
void mult_tmb_chiral_thread(int, double *, const double *, int)
 
void mult_tp2_chiral_thread(int, double *, const double *, int)
 
void mult_ym1_thread(int, double *, const double *, int)
 
static const std::string class_name
 
void mult_ymb_thread(int, double *, const double *, int)
 
void mult_tmb_dirac_thread(int, double *, const double *, int)
 
void general(const char *format,...)
 
void mult_xm2_thread(int, double *, const double *, int)
 
void mult_zm2_thread(int, double *, const double *, int)
 
void mult_zm1_thread(int, double *, const double *, int)
 
void clear_thread(int, double *)
 
void mult_zp1_thread(int, double *, const double *, int)
 
void mult_tm2_chiral_thread(int, double *, const double *, int)
 
void mult_tp2_dirac_thread(int, double *, const double *, int)
 
void mult_ypb_thread(int, double *, const double *, int)
 
void mult_tm1_dirac_thread(int, double *, const double *, int)
 
void mult_tm2_dirac_thread(int, double *, const double *, int)
 
void mult_tp1_chiral_thread(int, double *, const double *, int)
 
void mult_yp1_thread(int, double *, const double *, int)
 
void mult_zmb_thread(int, double *, const double *, int)
 
static int get_num_threads_available()
returns number of threads (works outside of parallel region). 
 
void mult_zpb_thread(int, double *, const double *, int)
 
void mult_xm1_thread(int, double *, const double *, int)
 
void mult_yp2_thread(int, double *, const double *, int)
 
void crucial(const char *format,...)
 
void mult_tm1_chiral_thread(int, double *, const double *, int)
 
std::vector< mult_arg > m_arg
 
void mult_xmb_thread(int, double *, const double *, int)
 
void gm5_dirac_thread(int, double *, const double *)
 
void Meo(Field &, const Field &, const int ieo)
even-odd operatior: ieo=0: even <– odd, ieo=1: odd <– even 
 
void mult_tp1_dirac_thread(int, double *, const double *, int)
 
Bridge::VerboseLevel m_vl
 
void mult_zp2_thread(int, double *, const double *, int)
 
void mult_tpb_chiral_thread(int, double *, const double *, int)
 
void mult_xp2_thread(int, double *, const double *, int)
 
void scal_thread(int, double *, double)
 
void mult_ym2_thread(int, double *, const double *, int)
 
void gm5_chiral_thread(int, double *, const double *)
 
void mult_xp1_thread(int, double *, const double *, int)