27 void set_threadtask(
int& i_thread,
int& Nthread,
int& is,
int& ns,
33 is =
static_cast<long_t>(size) * i_thread / Nthread;
34 ns =
static_cast<long_t>(size) * (i_thread + 1) / Nthread;
48 const double *yp = y.
ptr(0);
49 const double *xp = x.
ptr(0);
52 int i_thread, Nthread, is, ns;
54 set_threadtask(i_thread, Nthread, is, ns, x.
ntot());
58 for (
int k = is; k < ns; ++k) {
68 double dot(
const Field& y,
const int exy,
const Field& x,
const int exx)
70 assert(x.
nin() == y.
nin());
73 const double *yp = y.
ptr(0, 0, exy);
74 const double *xp = x.
ptr(0, 0, exx);
77 int i_thread, Nthread, is, ns;
78 set_threadtask(i_thread, Nthread, is, ns, x.
ntot());
82 for (
int k = is; k < ns; ++k) {
94 assert(x.
nin() == y.
nin());
97 const double *yp = y.
ptr(0, 0, exy);
98 const double *xp = x.
ptr(0, 0, exx);
101 int i_thread, Nthread;
103 set_threadtask(i_thread, Nthread, is, ns, x.
nin() * x.
nvol());
109 for (
int k = is; k < ns; ++k) {
110 sum_yx += yp[k] * xp[k];
111 sum_x2 += xp[k] * xp[k];
112 sum_y2 += yp[k] * yp[k];
115 double prd[3]={sum_yx, sum_x2, sum_y2};
125 assert(x.
nin() == y.
nin());
126 assert(x.
nex() == y.
nex());
129 const double *yp = y.
ptr(0);
130 const double *xp = x.
ptr(0);
132 int i_thread, Nthread;
134 set_threadtask(i_thread, Nthread, is, ns, x.
ntot());
140 for (
int k = is; k < ns; ++k) {
141 sum_yx += yp[k] * xp[k];
142 sum_x2 += xp[k] * xp[k];
143 sum_y2 += yp[k] * yp[k];
146 double prd[3]={sum_yx, sum_x2, sum_y2};
157 const double *yp = y.
ptr(0);
158 const double *xp = x.
ptr(0);
162 int i_thread, Nthread;
164 set_threadtask(i_thread, Nthread, is, ns, x.
ntot());
171 for (
int k = is; k < ns; k += 2) {
172 prd_r += yp[k] * xp[k] + yp[k + 1] * xp[k + 1];
173 prd_i += yp[k] * xp[k + 1] - yp[k + 1] * xp[k];
175 double prd[2]={prd_r, prd_i};
177 return cmplx(prd[0], prd[1]);
181 return cmplx(
dot(y, x), 0.0);
183 vout.
crucial(
"Error at %s: unsupported arg types.\n", __func__);
186 return cmplx(0.0, 0.0);
194 const double *yp = y.
ptr(0, 0, exy);
195 const double *xp = x.
ptr(0, 0, exx);
197 assert(x.
nin() == y.
nin());
201 int i_thread, Nthread;
203 set_threadtask(i_thread, Nthread, is, ns, x.
nin() * x.
nvol());
210 for (
int k = is; k < ns; k += 2) {
211 prd_r += yp[k] * xp[k] + yp[k + 1] * xp[k + 1];
212 prd_i += yp[k] * xp[k + 1] - yp[k + 1] * xp[k];
214 double prd[2]={prd_r, prd_i};
216 return cmplx(prd[0], prd[1]);
220 return cmplx(
dot(y, exy, x, exx), 0.0);
222 vout.
crucial(
"Error at %s: unsupported arg types.\n", __func__);
225 return cmplx(0.0, 0.0);
230 const Field& y,
const int exy,
const Field& x,
const int exx)
232 const double *yp = y.
ptr(0, 0, exy);
233 const double *xp = x.
ptr(0, 0, exx);
235 assert(x.
nin() == y.
nin());
238 int i_thread, Nthread;
240 set_threadtask(i_thread, Nthread, is, ns, x.
nin() * x.
nvol());
249 for (
int k = is; k < ns; k += 2) {
250 prd_r += yp[k] * xp[k] + yp[k + 1] * xp[k + 1];
251 prd_i += yp[k] * xp[k + 1] - yp[k + 1] * xp[k];
252 prd_x2 += xp[k] * xp[k] + xp[k + 1] * xp[k + 1];
253 prd_y2 += yp[k] * yp[k] + yp[k + 1] * yp[k + 1];
255 double prd[4]={prd_r, prd_i, prd_x2, prd_y2};
257 yx=cmplx(prd[0], prd[1]);
264 yx=cmplx(yx_re, 0.0);
266 vout.
crucial(
"Error at %s: unsupported arg types.\n", __func__);
277 const double *yp = y.
ptr(0);
278 const double *xp = x.
ptr(0);
280 assert(x.
nin() == y.
nin());
283 int i_thread, Nthread;
285 set_threadtask(i_thread, Nthread, is, ns, x.
ntot());
294 for (
int k = is; k < ns; k += 2) {
295 prd_r += yp[k] * xp[k] + yp[k + 1] * xp[k + 1];
296 prd_i += yp[k] * xp[k + 1] - yp[k + 1] * xp[k];
297 prd_x2 += xp[k] * xp[k] + xp[k + 1] * xp[k + 1];
298 prd_y2 += yp[k] * yp[k] + yp[k + 1] * yp[k + 1];
300 double prd[4]={prd_r, prd_i, prd_x2, prd_y2};
302 yx=cmplx(prd[0], prd[1]);
309 yx=cmplx(yx_re, 0.0);
311 vout.
crucial(
"Error at %s: unsupported arg types.\n", __func__);
321 double *yp = y.
ptr(0);
322 const double *xp = x.
ptr(0);
325 int i_thread, Nthread, is, ns;
327 set_threadtask(i_thread, Nthread, is, ns, x.
ntot());
329 for (
int k = is; k < ns; ++k) {
336 void axpy(
Field& y,
const int exy,
const double a,
const Field& x,
const int exx)
338 assert(x.
nin() == y.
nin());
341 double *yp = y.
ptr(0, 0, exy);
342 const double *xp = x.
ptr(0, 0, exx);
345 int i_thread, Nthread, is, ns;
346 set_threadtask(i_thread, Nthread, is, ns, x.
nin() * x.
nvol());
348 for (
int k = is; k < ns; ++k) {
357 if (imag(a) == 0.0) {
358 return axpy(y, real(a), x);
361 vout.
crucial(
"Error at %s: real vector and complex parameter.\n", __func__);
364 return axpy(y, real(a), x);
367 double *yp = y.
ptr(0);
368 const double *xp = x.
ptr(0);
373 int i_thread, Nthread, is, ns;
374 set_threadtask(i_thread, Nthread, is, ns, x.
ntot());
380 for (
int k = is; k < ns; k += 2) {
381 yp[k] += ar * xp[k] - ai * xp[k + 1];
382 yp[k + 1] += ar * xp[k + 1] + ai * xp[k];
387 vout.
crucial(
"Error at %s: unsupported types.\n", __func__);
394 void axpy(
Field& y,
const int exy,
const dcomplex a,
const Field& x,
const int exx)
396 if (imag(a) == 0.0) {
397 return axpy(y, exy, real(a), x, exx);
400 vout.
crucial(
"Error at %s: real vector and complex parameter.\n", __func__);
403 return axpy(y, real(a), x);
406 double *yp = y.
ptr(0, 0, exy);
407 const double *xp = x.
ptr(0, 0, exx);
409 assert(x.
nin() == y.
nin());
413 int i_thread, Nthread, is, ns;
414 set_threadtask(i_thread, Nthread, is, ns, x.
nin() * x.
nvol());
419 for (
int k = is; k < ns; k += 2) {
420 yp[k] += ar * xp[k] - ai * xp[k + 1];
421 yp[k + 1] += ar * xp[k + 1] + ai * xp[k];
426 vout.
crucial(
"Error at %s: unsupported types.\n", __func__);
437 double *xp = x.
ptr(0, 0, 0);
438 int i_thread, Nthread, is, ns;
440 set_threadtask(i_thread, Nthread, is, ns, x.
ntot());
442 for (
int k = is; k < ns; ++k) {
451 double *xp = x.
ptr(0, 0, exx);
453 int i_thread, Nthread, is, ns;
455 set_threadtask(i_thread, Nthread, is, ns, x.
nin() * x.
nvol());
458 for (
int k = is; k < ns; ++k) {
469 vout.
crucial(
"Error at %s: real vector and complex parameter.\n", __func__);
475 double *xp = x.
ptr(0);
477 int i_thread, Nthread, is, ns;
478 set_threadtask(i_thread, Nthread, is, ns, x.
ntot());
484 for (
int k = is; k < ns; k += 2) {
486 double xi = xp[k + 1];
488 xp[k] = ar * xr - ai * xi;
489 xp[k + 1] = ar * xi + ai * xr;
492 vout.
crucial(
"Error at %s: unsupported field type.\n", __func__);
502 vout.
crucial(
"Error at %s: real vector and complex parameter.\n", __func__);
508 double *xp = x.
ptr(0, 0, exx);
510 int i_thread, Nthread, is, ns;
511 set_threadtask(i_thread, Nthread, is, ns, x.
nin() * x.
nvol());
517 for (
int k = is; k < ns; k += 2) {
519 double xi = xp[k + 1];
521 xp[k] = ar * xr - ai * xi;
522 xp[k + 1] = ar * xi + ai * xr;
525 vout.
crucial(
"Error ar %s: unsupported field type.\n", __func__);
536 assert(x.
nin() == y.
nin());
538 assert(x.
nex() == y.
nex());
540 double *yp = y.
ptr(0, 0, 0);
541 const double *xp = x.
ptr(0, 0, 0);
543 int i_thread, Nthread, is, ns;
544 set_threadtask(i_thread, Nthread, is, ns, x.
ntot());
546 for (
int k = is; k < ns; ++k) {
555 assert(x.
nin() == y.
nin());
558 double *yp = y.
ptr(0, 0, exy);
559 const double *xp = x.
ptr(0, 0, exx);
562 int i_thread, Nthread, is, ns;
563 set_threadtask(i_thread, Nthread, is, ns, x.
nin() * x.
nvol());
565 for (
int k = is; k < ns; ++k) {
579 int i_thread, Nthread, is, ns;
581 set_threadtask(i_thread, Nthread, is, ns,
ntot());
583 double *yp = this->
ptr(0);
585 for (
int k = is; k < ns; ++k) {
594 int i_thread, Nthread, is, ns;
596 set_threadtask(i_thread, Nthread, is, ns,
ntot());
598 const double *yp = this->
ptr(0);
602 for (
int k = is; k < ns; ++k) {
614 int i_thread, Nthread, is, ns;
616 set_threadtask(i_thread, Nthread, is, ns, x.
ntot());
618 double *yp = y.
ptr(0);
619 const double *xp = x.
ptr(0);
621 for (
int k = is; k < ns; ++k) {
622 yp[k] = a * yp[k] + xp[k];
630 if (imag(a) == 0.0) {
631 return aypx(real(a), y, x);
634 vout.
crucial(
"Error at %s: real vector and complex parameter.\n", __func__);
637 return aypx(real(a), y, x);
640 double *yp = y.
ptr(0);
641 const double *xp = x.
ptr(0);
645 int i_thread, Nthread, is, ns;
646 set_threadtask(i_thread, Nthread, is, ns, x.
ntot());
651 for (
int k = is; k < ns; k += 2) {
653 double ypi = yp[k + 1];
654 yp[k] = ar * ypr - ai * ypi + xp[k];
655 yp[k + 1] = ar * ypi + ai * ypr + xp[k + 1];
660 vout.
crucial(
"Error at %s: unsupported types.\n", __func__);
675 for (
int ex = 0, nnex =
nex(); ex < nnex; ++ex) {
676 for (
int site = 0, nnvol =
nvol(); site < nnvol; ++site) {
678 for (
int in = 0, nnin =
nin(); in < nnin; ++in) {
685 if (fst > Fmax) Fmax = fst;
696 double fval = sum2 * perdeg;
704 const std::string& msg,
const Field& f)
708 double favg, fmax, fdev;
709 f.
stat(favg, fmax, fdev);
712 " %s: avg = %12.6f max = %12.6f dev = %12.6f\n",
void scal(Field &x, const double a)
scal(x, a): x = a * x
void report_field_stat(const Bridge::VerboseLevel vl, const std::string &msg, const Field &f)
static int get_num_threads()
returns available number of threads.
static int reduce_max(int count, double *recv_buf, double *send_buf, int pattern=0)
find a global maximum of an array of double over the communicator. pattern specifies the dimensions t...
const double * ptr(const int jin, const int site, const int jex) const
size_t myindex(const int jin, const int site, const int jex) const
void set(const int jin, const int site, const int jex, double v)
double dot(const Field &y, const Field &x)
void dotc_and_norm2(dcomplex &yx, double &y2, double &x2, const Field &y, const int exy, const Field &x, const int exx)
void general(const char *format,...)
Container of Field-type object.
void copy(Field &y, const Field &x)
copy(y, x): y = x
static int get_thread_id()
returns thread id.
static void reduce_sum_global(double &value, const int i_thread, const int Nthread)
global reduction with summation: value is assumed thread local.
dcomplex dotc(const Field &y, const Field &x)
void aypx(const double a, Field &y, const Field &x)
aypx(y, a, x): y := a * y + x
std::valarray< double > field
long long_t
definition of long for Bridge++
void axpy(Field &y, const double a, const Field &x)
axpy(y, a, x): y := a * x + y
void crucial(const char *format,...)
element_type field_element_type() const
void stat(double &Fave, double &Fmax, double &Fdev) const
determines the statistics of the field. average, maximum value, and deviation is determined over glob...
static int reduce_sum(int count, double *recv_buf, double *send_buf, int pattern=0)
make a global sum of an array of double over the communicator. pattern specifies the dimensions to be...
void dot_and_norm2(double &yx, double &y2, double &x2, const Field &y, const int exy, const Field &x, const int exx)