27 void set_threadtask(
int& ith,
int& nth,
int& is,
int& ns,
33 is = size * ith / nth;
34 ns = size * (ith + 1) / nth;
48 double *yp =
const_cast<Field *
>(&y)->ptr(0);
49 double *xp =
const_cast<Field *
>(&x)->ptr(0);
54 set_threadtask(ith, nth, is, ns, x.
ntot());
59 for (
int k = is; k < ns; ++k) {
71 double dot(
const Field& y,
const int exy,
const Field& x,
const int exx)
73 assert(x.
nin() == y.
nin());
76 double *yp =
const_cast<Field *
>(&y)->ptr(0, 0, exy);
77 double *xp =
const_cast<Field *
>(&x)->ptr(0, 0, exx);
81 set_threadtask(ith, nth, is, ns, x.
ntot());
86 for (
int k = is; k < ns; ++k) {
100 double *yp =
const_cast<Field *
>(&y)->ptr(0);
101 double *xp =
const_cast<Field *
>(&x)->ptr(0);
106 int ith, nth, is, ns;
107 set_threadtask(ith, nth, is, ns, x.
ntot());
115 for (
int k = is; k < ns; k += 2) {
116 prdr += yp[k] * xp[k] + yp[k + 1] * xp[k + 1];
117 prdi += yp[k] * xp[k + 1] - yp[k + 1] * xp[k];
132 return cmplx(prdr, prdi);
135 return cmplx(
dot(y, x), 0.0);
137 vout.
crucial(
"%s: unsupported arg types.\n", __func__);
139 return cmplx(0.0, 0.0);
147 double *yp =
const_cast<Field *
>(&y)->ptr(0, 0, exy);
148 double *xp =
const_cast<Field *
>(&x)->ptr(0, 0, exx);
150 assert(x.
nin() == y.
nin());
154 int ith, nth, is, ns;
155 set_threadtask(ith, nth, is, ns, x.
nin() * x.
nvol());
163 for (
int k = is; k < ns; k += 2) {
164 prdr += yp[k] * xp[k] + yp[k + 1] * xp[k + 1];
165 prdi += yp[k] * xp[k + 1] - yp[k + 1] * xp[k];
180 return cmplx(prdr, prdi);
183 return cmplx(
dot(y, exy, x, exx), 0.0);
185 vout.
crucial(
"%s: unsupported arg types.\n", __func__);
187 return cmplx(0.0, 0.0);
195 double *yp =
const_cast<Field *
>(&y)->ptr(0);
196 double *xp =
const_cast<Field *
>(&x)->ptr(0);
199 int ith, nth, is, ns;
201 set_threadtask(ith, nth, is, ns, x.
ntot());
204 for (
int k = is; k < ns; ++k) {
211 void axpy(
Field& y,
const int exy,
const double a,
const Field& x,
const int exx)
213 assert(x.
nin() == y.
nin());
216 double *yp =
const_cast<Field *
>(&y)->ptr(0, 0, exy);
217 double *xp =
const_cast<Field *
>(&x)->ptr(0, 0, exx);
220 int ith, nth, is, ns;
221 set_threadtask(ith, nth, is, ns, x.
nin() * x.
nvol());
224 for (
int k = is; k < ns; ++k) {
233 if (imag(a) == 0.0) {
234 return axpy(y, real(a), x);
237 vout.
crucial(
"%s: real vector and complex parameter.\n", __func__);
240 return axpy(y, real(a), x);
243 double *yp =
const_cast<Field *
>(&y)->ptr(0);
244 double *xp =
const_cast<Field *
>(&x)->ptr(0);
249 int ith, nth, is, ns;
250 set_threadtask(ith, nth, is, ns, x.
ntot());
256 for (
int k = is; k < ns; k += 2) {
257 yp[k] += ar * xp[k] - ai * xp[k + 1];
258 yp[k + 1] += ar * xp[k + 1] + ai * xp[k];
263 vout.
crucial(
"%s: unsupported types.\n", __func__);
270 void axpy(
Field& y,
const int exy,
const dcomplex a,
const Field& x,
const int exx)
272 if (imag(a) == 0.0) {
273 return axpy(y, exy, real(a), x, exx);
276 vout.
crucial(
"%s: real vector and complex parameter.\n", __func__);
279 return axpy(y, real(a), x);
282 double *yp =
const_cast<Field *
>(&y)->ptr(0, 0, exy);
283 double *xp =
const_cast<Field *
>(&x)->ptr(0, 0, exx);
285 assert(x.
nin() == y.
nin());
289 int ith, nth, is, ns;
290 set_threadtask(ith, nth, is, ns, x.
nin() * x.
nvol());
296 for (
int k = is; k < ns; k += 2) {
297 yp[k] += ar * xp[k] - ai * xp[k + 1];
298 yp[k + 1] += ar * xp[k + 1] + ai * xp[k];
303 vout.
crucial(
"%s: unsupported types.\n", __func__);
314 double *xp =
const_cast<Field *
>(&x)->ptr(0, 0, 0);
315 int ith, nth, is, ns;
317 set_threadtask(ith, nth, is, ns, x.
ntot());
319 for (
int k = is; k < ns; ++k) {
328 double *xp =
const_cast<Field *
>(&x)->ptr(0, 0, exx);
330 int ith, nth, is, ns;
332 set_threadtask(ith, nth, is, ns, x.
nin() * x.
nvol());
335 for (
int k = is; k < ns; ++k) {
346 vout.
crucial(
"%s: real vector and complex parameter.\n", __func__);
352 double *xp =
const_cast<Field *
>(&x)->ptr(0);
354 int ith, nth, is, ns;
355 set_threadtask(ith, nth, is, ns, x.
ntot());
361 for (
int k = is; k < ns; k += 2) {
363 double xi = xp[k + 1];
365 xp[k] = ar * xr - ai * xi;
366 xp[k + 1] = ar * xi + ai * xr;
369 vout.
crucial(
"%s: unsupported field type.\n", __func__);
379 vout.
crucial(
"%s: real vector and complex parameter.\n", __func__);
385 double *xp =
const_cast<Field *
>(&x)->ptr(0, 0, exx);
387 int ith, nth, is, ns;
388 set_threadtask(ith, nth, is, ns, x.
nin() * x.
nvol());
394 for (
int k = is; k < ns; k += 2) {
396 double xi = xp[k + 1];
398 xp[k] = ar * xr - ai * xi;
399 xp[k + 1] = ar * xi + ai * xr;
402 vout.
crucial(
"%s: unsupported field type.\n", __func__);
413 assert(x.
nin() == y.
nin());
415 assert(x.
nex() == y.
nex());
417 double *yp =
const_cast<Field *
>(&y)->ptr(0, 0, 0);
418 double *xp =
const_cast<Field *
>(&x)->ptr(0, 0, 0);
420 int ith, nth, is, ns;
421 set_threadtask(ith, nth, is, ns, x.
ntot());
423 for (
int k = is; k < ns; ++k) {
432 assert(x.
nin() == y.
nin());
435 double *yp =
const_cast<Field *
>(&y)->ptr(0, 0, exy);
436 double *xp =
const_cast<Field *
>(&x)->ptr(0, 0, exx);
439 int ith, nth, is, ns;
440 set_threadtask(ith, nth, is, ns, x.
nin() * x.
nvol());
442 for (
int k = is; k < ns; ++k) {
456 int ith, nth, is, ns;
458 set_threadtask(ith, nth, is, ns,
ntot());
460 double *yp =
const_cast<Field *
>(
this)->
ptr(0);
462 for (
int k = is; k < ns; ++k) {
471 int ith, nth, is, ns;
473 set_threadtask(ith, nth, is, ns,
ntot());
475 double *yp =
const_cast<Field *
>(
this)->
ptr(0);
479 for (
int k = is; k < ns; ++k) {
491 int ith, nth, is, ns;
493 set_threadtask(ith, nth, is, ns, x.
ntot());
495 double *yp =
const_cast<Field *
>(&y)->ptr(0);
496 double *xp =
const_cast<Field *
>(&x)->ptr(0);
498 for (
int k = is; k < ns; ++k) {
499 yp[k] = a * yp[k] + xp[k];
507 if (imag(a) == 0.0) {
508 return aypx(real(a), y, x);
511 vout.
crucial(
"%s: real vector and complex parameter.\n", __func__);
514 return aypx(real(a), y, x);
517 double *yp =
const_cast<Field *
>(&y)->ptr(0);
518 double *xp =
const_cast<Field *
>(&x)->ptr(0);
522 int ith, nth, is, ns;
523 set_threadtask(ith, nth, is, ns, x.
ntot());
528 for (
int k = is; k < ns; k += 2) {
530 double ypi = yp[k + 1];
531 yp[k] = ar * ypr - ai * ypi + xp[k];
532 yp[k + 1] = ar * ypi + ai * ypr + xp[k + 1];
537 vout.
crucial(
"%s: unsupported types.\n", __func__);
552 for (
int ex = 0, nnex =
nex(); ex < nnex; ++ex) {
553 for (
int site = 0, nnvol =
nvol(); site < nnvol; ++site) {
555 for (
int in = 0, nnin =
nin(); in < nnin; ++in) {
562 if (fst > Fmax) Fmax = fst;
573 double fval = sum2 * perdeg;
581 const std::string& msg,
const Field& f)
585 double favg, fmax, fdev;
586 f.
stat(favg, fmax, fdev);
589 " %s: avg = %12.6f max = %12.6f dev = %12.6f\n",
void scal(Field &x, const double a)
scal(x, a): x = a * x
void report_field_stat(const Bridge::VerboseLevel vl, const std::string &msg, const Field &f)
static int get_num_threads()
returns available number of threads.
static int reduce_max(int count, double *recv_buf, double *send_buf, int pattern=0)
find a global maximum of an array of double over the communicator. pattern specifies the dimensions t...
void set(const int jin, const int site, const int jex, double v)
double dot(const Field &y, const Field &x)
void general(const char *format,...)
double * ptr(const int jin, const int site, const int jex)
Container of Field-type object.
void copy(Field &y, const Field &x)
copy(y, x): y = x
static void reduce_sum_global(double &value, const int ith, const int nth)
global reduction with summation: value is assumed thread local.
static int get_thread_id()
returns thread id.
dcomplex dotc(const Field &y, const Field &x)
void aypx(const double a, Field &y, const Field &x)
aypx(y, a, x): y := a * y + x
std::valarray< double > field
void axpy(Field &y, const double a, const Field &x)
axpy(y, a, x): y := a * x + y
void crucial(const char *format,...)
element_type field_element_type() const
void stat(double &Fave, double &Fmax, double &Fdev) const
determines the statistics of the field. average, maximum value, and deviation is determined over glob...
static int reduce_sum(int count, double *recv_buf, double *send_buf, int pattern=0)
make a global sum of an array of double over the communicator. pattern specifies the dimensions to be...
int myindex(const int jin, const int site, const int jex) const