20 #if defined USE_GROUP_SU3 
   21 #include "field_F_imp_SU3.inc" 
   22 #elif defined USE_GROUP_SU2 
   23 #include "field_F_imp_SU2.inc" 
   24 #elif defined USE_GROUP_SU_N 
   25 #include "field_F_imp_SU_N.inc" 
   40                    const Field_G& U, 
const int ex1,
 
   41                    const Field_F& x, 
const int ex2)
 
   44   assert(ex1 < U.
nex());
 
   45   assert(ex2 < x.
nex());
 
   49   const int Nd   = x.
nd();
 
   50   const int Nc   = x.
nc();
 
   51   const int Nc2  = x.
nc2();
 
   52   const int Ncd2 = Nc2 * Nd;
 
   53   const int Ndf  = Nc2 * Nc;
 
   60   int is = y.
nvol() * i_thread / Nthread;
 
   61   int ns = y.
nvol() * (i_thread + 1) / Nthread - is;
 
   63   double       *v = y.
ptr(0, is, ex);
 
   64   const double *g = U.
ptr(0, is, ex1);
 
   65   const double *w = x.
ptr(0, is, ex2);
 
   67   for (
int site = 0; site < ns; ++site) {
 
   70     for (
int s = 0; s < Nd; ++s) {
 
   71       for (
int ic = 0; ic < Nc; ++ic) {
 
   72         int ig2 = ic * Nc2 + ig;
 
   73         int iv2 = s * Nc2 + iv;
 
   74         v[2 * ic + iv2]     = mult_Gn_r(&g[ig2], &w[iv2], Nc);
 
   75         v[2 * ic + 1 + iv2] = mult_Gn_i(&g[ig2], &w[iv2], Nc);
 
   84                    const Field_G& U, 
const int ex1,
 
   85                    const Field_F& x, 
const int ex2)
 
   88   assert(ex1 < U.
nex());
 
   89   assert(ex2 < x.
nex());
 
   93   const int Nd   = x.
nd();
 
   94   const int Nc   = x.
nc();
 
   95   const int Nc2  = x.
nc2();
 
   96   const int Ncd2 = Nc2 * Nd;
 
   97   const int Ndf  = Nc2 * Nc;
 
  104   int is = y.
nvol() * i_thread / Nthread;
 
  105   int ns = y.
nvol() * (i_thread + 1) / Nthread - is;
 
  107   double       *v = y.
ptr(0, is, ex);
 
  108   const double *g = U.
ptr(0, is, ex1);
 
  109   const double *w = x.
ptr(0, is, ex2);
 
  111   for (
int site = 0; site < ns; ++site) {
 
  113     int iv = Ncd2 * site;
 
  114     for (
int s = 0; s < Nd; ++s) {
 
  115       for (
int ic = 0; ic < Nc; ++ic) {
 
  116         int ig2 = ic * 2 + ig;
 
  117         int iv2 = s * Nc2 + iv;
 
  118         v[2 * ic + iv2]     = mult_Gd_r(&g[ig2], &w[iv2], Nc);
 
  119         v[2 * ic + 1 + iv2] = mult_Gd_i(&g[ig2], &w[iv2], Nc);
 
  128                       const Field_G& U, 
const int ex1,
 
  129                       const Field_F& x, 
const int ex2,
 
  132   assert(ex < y.
nex());
 
  133   assert(ex1 < U.
nex());
 
  134   assert(ex2 < x.
nex());
 
  138   const int Nd   = x.
nd();
 
  139   const int Nc   = x.
nc();
 
  140   const int Nc2  = x.
nc2();
 
  141   const int Ncd2 = Nc2 * Nd;
 
  142   const int Ndf  = Nc2 * Nc;
 
  149   int is = y.
nvol() * i_thread / Nthread;
 
  150   int ns = y.
nvol() * (i_thread + 1) / Nthread - is;
 
  152   double       *v = y.
ptr(0, is, ex);
 
  153   const double *g = U.
ptr(0, is, ex1);
 
  154   const double *w = x.
ptr(0, is, ex2);
 
  156   for (
int site = 0; site < ns; ++site) {
 
  158     int iv = Ncd2 * site;
 
  159     for (
int s = 0; s < Nd; ++s) {
 
  160       for (
int ic = 0; ic < Nc; ++ic) {
 
  161         int ig2 = ic * Nc2 + ig;
 
  162         int iv2 = s * Nc2 + iv;
 
  163         v[2 * ic + iv2]     += a * mult_Gn_r(&g[ig2], &w[iv2], Nc);
 
  164         v[2 * ic + 1 + iv2] += a * mult_Gn_i(&g[ig2], &w[iv2], Nc);
 
  173                       const Field_G& U, 
const int ex1,
 
  174                       const Field_F& x, 
const int ex2,
 
  177   assert(ex < y.
nex());
 
  178   assert(ex1 < U.
nex());
 
  179   assert(ex2 < x.
nex());
 
  183   const int Nd   = x.
nd();
 
  184   const int Nc   = x.
nc();
 
  185   const int Nc2  = x.
nc2();
 
  186   const int Ncd2 = Nc2 * Nd;
 
  187   const int Ndf  = Nc2 * Nc;
 
  194   int is = y.
nvol() * i_thread / Nthread;
 
  195   int ns = y.
nvol() * (i_thread + 1) / Nthread - is;
 
  197   double       *v = y.
ptr(0, is, ex);
 
  198   const double *g = U.
ptr(0, is, ex1);
 
  199   const double *w = x.
ptr(0, is, ex2);
 
  201   for (
int site = 0; site < ns; ++site) {
 
  203     int iv = Ncd2 * site;
 
  204     for (
int s = 0; s < Nd; ++s) {
 
  205       for (
int ic = 0; ic < Nc; ++ic) {
 
  206         int ig2 = ic * 2 + ig;
 
  207         int iv2 = s * Nc2 + iv;
 
  208         v[2 * ic + iv2]     += a * mult_Gd_r(&g[ig2], &w[iv2], Nc);
 
  209         v[2 * ic + 1 + iv2] += a * mult_Gd_i(&g[ig2], &w[iv2], Nc);
 
  219   assert(x.
nex() == y.
nex());
 
  222   const int Nd   = x.
nd();
 
  223   const int Nc   = x.
nc();
 
  224   const int Nc2  = x.
nc2();
 
  225   const int Ncd2 = Nc2 * Nd;
 
  233   for (
int s = 0; s < Nd; ++s) {
 
  238     idc_i[s] = 1 - idc_r[s];
 
  246   int is = y.
nvol() * i_thread / Nthread;
 
  247   int ns = y.
nvol() * (i_thread + 1) / Nthread - is;
 
  249   const int Nex = y.
nex();
 
  251   for (
int ex = 0; ex < Nex; ++ex) {
 
  252     double       *v = y.
ptr(0, is, ex);
 
  253     const double *w = x.
ptr(0, is, ex);
 
  255     for (
int site = 0; site < ns; ++site) {
 
  256       int iv = Ncd2 * site;
 
  258       for (
int s = 0; s < Nd; ++s) {
 
  259         int iv2 = s * Nc2 + iv;
 
  260         int iw2 = 
id[s] * Nc2 + iv;
 
  262         for (
int ic = 0; ic < Nc; ++ic) {
 
  263           v[2 * ic + iv2]     = gv_r[s] * w[2 * ic + idc_r[s] + iw2];
 
  264           v[2 * ic + 1 + iv2] = gv_i[s] * w[2 * ic + idc_i[s] + iw2];
 
  275   assert(x.
nex() == y.
nex());
 
  278   const int Nd   = x.
nd();
 
  279   const int Nc   = x.
nc();
 
  280   const int Nc2  = x.
nc2();
 
  281   const int Ncd2 = Nc2 * Nd;
 
  289   for (
int s = 0; s < Nd; ++s) {
 
  294     idc_i[s] = 1 - idc_r[s];
 
  302   int is = y.
nvol() * i_thread / Nthread;
 
  303   int ns = y.
nvol() * (i_thread + 1) / Nthread - is;
 
  305   const int Nex = y.
nex();
 
  307   for (
int ex = 0; ex < Nex; ++ex) {
 
  308     double       *v = y.
ptr(0, is, ex);
 
  309     const double *w = x.
ptr(0, is, ex);
 
  311     for (
int site = 0; site < ns; ++site) {
 
  312       int iv = Ncd2 * site;
 
  314       for (
int s = 0; s < Nd; ++s) {
 
  315         int iv2 = s * Nc2 + iv;
 
  316         int iw2 = 
id[s] * Nc2 + iv;
 
  317         for (
int ic = 0; ic < Nc; ++ic) {
 
  318           v[2 * ic + iv2]     = gv_r[s] * w[2 * ic + idc_r[s] + iw2];
 
  319           v[2 * ic + 1 + iv2] = gv_i[s] * w[2 * ic + idc_i[s] + iw2];
 
  333   assert(w.
nex() == y.
nex());
 
  340   } 
else if (pm == -1) {
 
  344     vout.
crucial(
"Error at %s: wrong pm.\n", __func__);
 
  356   assert(w.
nex() == y.
nex());
 
  362   } 
else if (pm == -1) {
 
  366     vout.
crucial(
"Error at %s: wrong pm.\n", __func__);
 
  381   assert(w.
nex() == y.
nex());
 
  388   } 
else if (pm == -1) {
 
  392     vout.
crucial(
"Error at %s: wrong pm = %d\n", __func__, pm);
 
void scal(Field &x, const double a)
scal(x, a): x = a * x 
 
void mult_iGM(Field_F &y, const GammaMatrix &gm, const Field_F &x)
gamma matrix multiplication (i is multiplied) 
 
const double * ptr(const int jin, const int site, const int jex) const 
 
void check()
check several assumptions for performance implementation. 
 
void mult_GM(Field_F &y, const GammaMatrix &gm, const Field_F &x)
gamma matrix multiplication 
 
Wilson-type fermion field. 
 
double value_r(int row) const 
 
void axpy(Field &y, const double a, const Field &x)
axpy(y, a, x): y := a * x + y 
 
void crucial(const char *format,...)
 
void multadd_Field_Gn(Field_F &y, const int ex, const Field_G &U, const int ex1, const Field_F &x, const int ex2, const double a)
 
void mult_Field_Gd(Field_F &y, const int ex, const Field_G &U, const int ex1, const Field_F &x, const int ex2)
 
void mult_GMproj2(Field_F &y, const int pm, const GammaMatrix &gm, const Field_F &w)
projection with gamma matrix: (1  gamma) 
 
int index_c(int row) const 
 
double value_i(int row) const 
 
void mult_GMproj(Field_F &y, const int pm, const GammaMatrix &gm, const Field_F &w)
projection with gamma matrix: (1  gamma)/2 
 
void mult_Field_Gn(Field_F &y, const int ex, const Field_G &U, const int ex1, const Field_F &x, const int ex2)
 
void multadd_Field_Gd(Field_F &y, const int ex, const Field_G &U, const int ex1, const Field_F &x, const int ex2, const double a)