27 void set_threadtask(
int& i_thread,
int& Nthread,
int& is,
int& ns,
33 is = size * i_thread / Nthread;
34 ns = size * (i_thread + 1) / Nthread;
48 const double *yp = y.
ptr(0);
49 const double *xp = x.
ptr(0);
52 int i_thread, Nthread, is, ns;
54 set_threadtask(i_thread, Nthread, is, ns, x.
ntot());
58 for (
int k = is; k < ns; ++k) {
68 double dot(
const Field& y,
const int exy,
const Field& x,
const int exx)
70 assert(x.
nin() == y.
nin());
73 const double *yp = y.
ptr(0, 0, exy);
74 const double *xp = x.
ptr(0, 0, exx);
77 int i_thread, Nthread, is, ns;
78 set_threadtask(i_thread, Nthread, is, ns, x.
ntot());
82 for (
int k = is; k < ns; ++k) {
94 const double *yp = y.
ptr(0);
95 const double *xp = x.
ptr(0);
100 int i_thread, Nthread, is, ns;
101 set_threadtask(i_thread, Nthread, is, ns, x.
ntot());
109 for (
int k = is; k < ns; k += 2) {
110 prd_r += yp[k] * xp[k] + yp[k + 1] * xp[k + 1];
111 prd_i += yp[k] * xp[k + 1] - yp[k + 1] * xp[k];
116 return cmplx(prd_r, prd_i);
119 return cmplx(
dot(y, x), 0.0);
121 vout.
crucial(
"%s: unsupported arg types.\n", __func__);
124 return cmplx(0.0, 0.0);
132 const double *yp = y.
ptr(0, 0, exy);
133 const double *xp = x.
ptr(0, 0, exx);
135 assert(x.
nin() == y.
nin());
139 int i_thread, Nthread, is, ns;
140 set_threadtask(i_thread, Nthread, is, ns, x.
nin() * x.
nvol());
147 for (
int k = is; k < ns; k += 2) {
148 prd_r += yp[k] * xp[k] + yp[k + 1] * xp[k + 1];
149 prd_i += yp[k] * xp[k + 1] - yp[k + 1] * xp[k];
154 return cmplx(prd_r, prd_i);
157 return cmplx(
dot(y, exy, x, exx), 0.0);
159 vout.
crucial(
"%s: unsupported arg types.\n", __func__);
162 return cmplx(0.0, 0.0);
170 double *yp = y.
ptr(0);
171 const double *xp = x.
ptr(0);
174 int i_thread, Nthread, is, ns;
176 set_threadtask(i_thread, Nthread, is, ns, x.
ntot());
178 for (
int k = is; k < ns; ++k) {
185 void axpy(
Field& y,
const int exy,
const double a,
const Field& x,
const int exx)
187 assert(x.
nin() == y.
nin());
190 double *yp = y.
ptr(0, 0, exy);
191 const double *xp = x.
ptr(0, 0, exx);
194 int i_thread, Nthread, is, ns;
195 set_threadtask(i_thread, Nthread, is, ns, x.
nin() * x.
nvol());
197 for (
int k = is; k < ns; ++k) {
206 if (imag(a) == 0.0) {
207 return axpy(y, real(a), x);
210 vout.
crucial(
"%s: real vector and complex parameter.\n", __func__);
213 return axpy(y, real(a), x);
216 double *yp = y.
ptr(0);
217 const double *xp = x.
ptr(0);
222 int i_thread, Nthread, is, ns;
223 set_threadtask(i_thread, Nthread, is, ns, x.
ntot());
229 for (
int k = is; k < ns; k += 2) {
230 yp[k] += ar * xp[k] - ai * xp[k + 1];
231 yp[k + 1] += ar * xp[k + 1] + ai * xp[k];
236 vout.
crucial(
"%s: unsupported types.\n", __func__);
243 void axpy(
Field& y,
const int exy,
const dcomplex a,
const Field& x,
const int exx)
245 if (imag(a) == 0.0) {
246 return axpy(y, exy, real(a), x, exx);
249 vout.
crucial(
"%s: real vector and complex parameter.\n", __func__);
252 return axpy(y, real(a), x);
255 double *yp = y.
ptr(0, 0, exy);
256 const double *xp = x.
ptr(0, 0, exx);
258 assert(x.
nin() == y.
nin());
262 int i_thread, Nthread, is, ns;
263 set_threadtask(i_thread, Nthread, is, ns, x.
nin() * x.
nvol());
268 for (
int k = is; k < ns; k += 2) {
269 yp[k] += ar * xp[k] - ai * xp[k + 1];
270 yp[k + 1] += ar * xp[k + 1] + ai * xp[k];
275 vout.
crucial(
"%s: unsupported types.\n", __func__);
286 double *xp = x.
ptr(0, 0, 0);
287 int i_thread, Nthread, is, ns;
289 set_threadtask(i_thread, Nthread, is, ns, x.
ntot());
291 for (
int k = is; k < ns; ++k) {
300 double *xp = x.
ptr(0, 0, exx);
302 int i_thread, Nthread, is, ns;
304 set_threadtask(i_thread, Nthread, is, ns, x.
nin() * x.
nvol());
307 for (
int k = is; k < ns; ++k) {
318 vout.
crucial(
"%s: real vector and complex parameter.\n", __func__);
324 double *xp = x.
ptr(0);
326 int i_thread, Nthread, is, ns;
327 set_threadtask(i_thread, Nthread, is, ns, x.
ntot());
333 for (
int k = is; k < ns; k += 2) {
335 double xi = xp[k + 1];
337 xp[k] = ar * xr - ai * xi;
338 xp[k + 1] = ar * xi + ai * xr;
341 vout.
crucial(
"%s: unsupported field type.\n", __func__);
351 vout.
crucial(
"%s: real vector and complex parameter.\n", __func__);
357 double *xp = x.
ptr(0, 0, exx);
359 int i_thread, Nthread, is, ns;
360 set_threadtask(i_thread, Nthread, is, ns, x.
nin() * x.
nvol());
366 for (
int k = is; k < ns; k += 2) {
368 double xi = xp[k + 1];
370 xp[k] = ar * xr - ai * xi;
371 xp[k + 1] = ar * xi + ai * xr;
374 vout.
crucial(
"%s: unsupported field type.\n", __func__);
385 assert(x.
nin() == y.
nin());
387 assert(x.
nex() == y.
nex());
389 double *yp = y.
ptr(0, 0, 0);
390 const double *xp = x.
ptr(0, 0, 0);
392 int i_thread, Nthread, is, ns;
393 set_threadtask(i_thread, Nthread, is, ns, x.
ntot());
395 for (
int k = is; k < ns; ++k) {
404 assert(x.
nin() == y.
nin());
407 double *yp = y.
ptr(0, 0, exy);
408 const double *xp = x.
ptr(0, 0, exx);
411 int i_thread, Nthread, is, ns;
412 set_threadtask(i_thread, Nthread, is, ns, x.
nin() * x.
nvol());
414 for (
int k = is; k < ns; ++k) {
428 int i_thread, Nthread, is, ns;
430 set_threadtask(i_thread, Nthread, is, ns,
ntot());
432 double *yp = this->
ptr(0);
434 for (
int k = is; k < ns; ++k) {
443 int i_thread, Nthread, is, ns;
445 set_threadtask(i_thread, Nthread, is, ns,
ntot());
447 const double *yp = this->
ptr(0);
451 for (
int k = is; k < ns; ++k) {
463 int i_thread, Nthread, is, ns;
465 set_threadtask(i_thread, Nthread, is, ns, x.
ntot());
467 double *yp = y.
ptr(0);
468 const double *xp = x.
ptr(0);
470 for (
int k = is; k < ns; ++k) {
471 yp[k] = a * yp[k] + xp[k];
479 if (imag(a) == 0.0) {
480 return aypx(real(a), y, x);
483 vout.
crucial(
"%s: real vector and complex parameter.\n", __func__);
486 return aypx(real(a), y, x);
489 double *yp = y.
ptr(0);
490 const double *xp = x.
ptr(0);
494 int i_thread, Nthread, is, ns;
495 set_threadtask(i_thread, Nthread, is, ns, x.
ntot());
500 for (
int k = is; k < ns; k += 2) {
502 double ypi = yp[k + 1];
503 yp[k] = ar * ypr - ai * ypi + xp[k];
504 yp[k + 1] = ar * ypi + ai * ypr + xp[k + 1];
509 vout.
crucial(
"%s: unsupported types.\n", __func__);
524 for (
int ex = 0, nnex =
nex(); ex < nnex; ++ex) {
525 for (
int site = 0, nnvol =
nvol(); site < nnvol; ++site) {
527 for (
int in = 0, nnin =
nin(); in < nnin; ++in) {
534 if (fst > Fmax) Fmax = fst;
545 double fval = sum2 * perdeg;
553 const std::string& msg,
const Field& f)
557 double favg, fmax, fdev;
558 f.
stat(favg, fmax, fdev);
561 " %s: avg = %12.6f max = %12.6f dev = %12.6f\n",
void scal(Field &x, const double a)
scal(x, a): x = a * x
void report_field_stat(const Bridge::VerboseLevel vl, const std::string &msg, const Field &f)
static int get_num_threads()
returns available number of threads.
static int reduce_max(int count, double *recv_buf, double *send_buf, int pattern=0)
find a global maximum of an array of double over the communicator. pattern specifies the dimensions t...
const double * ptr(const int jin, const int site, const int jex) const
void set(const int jin, const int site, const int jex, double v)
double dot(const Field &y, const Field &x)
void general(const char *format,...)
Container of Field-type object.
void copy(Field &y, const Field &x)
copy(y, x): y = x
static int get_thread_id()
returns thread id.
static void reduce_sum_global(double &value, const int i_thread, const int Nthread)
global reduction with summation: value is assumed thread local.
dcomplex dotc(const Field &y, const Field &x)
void aypx(const double a, Field &y, const Field &x)
aypx(y, a, x): y := a * y + x
std::valarray< double > field
void axpy(Field &y, const double a, const Field &x)
axpy(y, a, x): y := a * x + y
void crucial(const char *format,...)
element_type field_element_type() const
void stat(double &Fave, double &Fmax, double &Fdev) const
determines the statistics of the field. average, maximum value, and deviation is determined over glob...
static int reduce_sum(int count, double *recv_buf, double *send_buf, int pattern=0)
make a global sum of an array of double over the communicator. pattern specifies the dimensions to be...
int myindex(const int jin, const int site, const int jex) const