Bridge++  Ver. 2.0.2
field.cpp
Go to the documentation of this file.
1 
14 #include "Field/field.h"
15 
16 #include <cstring>
17 using std::string;
18 
20 #include "IO/bridgeIO.h"
21 using Bridge::vout;
22 
23 #include "Field/field_thread-inc.h"
24 
25 const std::string Field::class_name = "Field";
26 
27 //====================================================================
29 {
30  // ThreadManager::assert_single_thread(class_name);
31  // vout.general("Field was constructed.\n");
32 }
33 
34 
35 //====================================================================
36 void Field::set(double a)
37 {
38  int ith, nth, is, ns;
39  set_threadtask(ith, nth, is, ns, m_Nvol);
40 
41  double *yp = this->ptr(0);
42 
43  for (int ex = 0; ex < m_Nex; ++ex) {
44  for (int site = is; site < ns; ++site) {
45  int kv = m_Nin * (site + m_Nvol * ex);
46  for (int in = 0; in < m_Nin; ++in) {
47  yp[in + kv] = a;
48  }
49  }
50  }
51 }
52 
53 
54 //====================================================================
55 void Field::setc(const double a)
56 {
58  set(a);
59  } else if (m_element_type == Element_type::COMPLEX) {
60  dcomplex c = cmplx(a, 0.0);
61  setc(c);
62  } else {
63  vout.crucial("Error at %s: unsupported arg types.\n", __func__);
64  exit(EXIT_FAILURE);
65  }
66 }
67 
68 
69 //====================================================================
70 void Field::setc(const dcomplex a)
71 {
72  if (imag(a) == 0.0) {
73  return set(real(a));
74  } else if (m_element_type == Element_type::COMPLEX) {
75  double *yp = this->ptr(0);
76  double ar = real(a);
77  double ai = imag(a);
78  int Nin2 = m_Nin / 2;
79 
80  int ith, nth, is, ns;
81  set_threadtask(ith, nth, is, ns, m_Nvol);
82 
83  for (int ex = 0; ex < m_Nex; ++ex) {
84  for (int site = is; site < ns; ++site) {
85  int kv = m_Nin * (site + m_Nvol * ex);
86  for (int k = 0; k < Nin2; ++k) {
87  int kr = 2 * k;
88  int ki = 2 * k + 1;
89  yp[kr + kv] = ar;
90  yp[ki + kv] = ai;
91  }
92  }
93  }
94  } else if (m_element_type == Element_type::REAL) {
95  double ar = real(a);
96  double ai = imag(a);
97 
98  if (fabs(ai) < fabs(ar) * CommonParameters::epsilon_criterion()) {
99  return set(real(a));
100  } else {
101  vout.crucial("Error at %s: real vector and complex parameter.\n",
102  __func__);
103  exit(EXIT_FAILURE);
104  }
105  } else {
106  vout.crucial("Error at %s: unsupported arg types.\n", __func__);
107  exit(EXIT_FAILURE);
108  }
109 }
110 
111 
112 //====================================================================
113 double Field::norm2() const
114 {
115  const double *yp = this->ptr(0);
116 
117  int ith, nth, is, ns;
118  set_threadtask(ith, nth, is, ns, m_Nvol);
119 
120  double a = 0.0;
121 
122  for (int ex = 0; ex < m_Nex; ++ex) {
123  for (int site = is; site < ns; ++site) {
124  int kv = m_Nin * (site + m_Nvol * ex);
125  for (int in = 0; in < m_Nin; ++in) {
126  a += yp[in + kv] * yp[in + kv];
127  }
128  }
129  }
130 
132 
133  return a;
134 }
135 
136 
137 //====================================================================
138 void Field::xI()
139 {
141  vout.crucial("Error at %s: xI() is not available for real field.\n",
142  class_name.c_str());
143  exit(EXIT_FAILURE);
144  }
145 
146  double *yp = this->ptr(0);
147  int Nin2 = m_Nin / 2;
148 
149  int ith, nth, is, ns;
150  set_threadtask(ith, nth, is, ns, m_Nvol);
151 
152  for (int ex = 0; ex < m_Nex; ++ex) {
153  for (int site = is; site < ns; ++site) {
154  int kv = m_Nin * (site + m_Nvol * ex);
155  for (int k = 0; k < Nin2; ++k) {
156  int kr = 2 * k;
157  int ki = 2 * k + 1;
158  double yr = yp[kr + kv];
159  double yi = yp[ki + kv];
160  yp[kr + kv] = -yi;
161  yp[ki + kv] = yr;
162  }
163  }
164  }
165 }
166 
167 
168 //====================================================================
169 void Field::stat(double& Fave, double& Fmax, double& Fdev) const
170 {
171 #pragma omp barrier
172 
173  double sum = 0.0;
174  double sum2 = 0.0;
175  double vmax = 0.0;
176 
177  const double *yp = this->ptr(0);
178 
179  int ith, nth, is, ns;
180  set_threadtask(ith, nth, is, ns, m_Nvol);
181 
182  vmax = 0.0;
183  for (int ex = 0; ex < m_Nex; ++ex) {
184  for (int site = is; site < ns; ++site) {
185  double fst = 0.0;
186  for (int in = 0; in < m_Nin; ++in) {
187  double fv = yp[myindex(in, site, ex)];
188  fst += fv * fv;
189  }
190  sum2 += fst;
191  fst = sqrt(fst);
192  sum += fst;
193  if (fst > vmax) vmax = fst;
194  }
195  }
196 
197  ThreadManager::reduce_sum_global(sum, ith, nth);
198  ThreadManager::reduce_sum_global(sum2, ith, nth);
199  ThreadManager::reduce_max_global(vmax, ith, nth);
200 
201  double vfac = double(m_Nvol) * double(m_Nex)
202  * double(CommonParameters::NPE());
203  Fave = sum / vfac;
204  Fdev = sqrt(sum2 / vfac - Fave * Fave);
205  Fmax = vmax;
206 
207 #pragma omp barrier
208 }
209 
210 
211 //====================================================================
212 void copy(Field& y, const Field& x)
213 {
214  int Nin = y.nin();
215  int Nvol = y.nvol();
216  int Nex = y.nex();
217  assert(x.check_size(Nin, Nvol, Nex));
218 
219  double *yp = y.ptr(0);
220  const double *xp = x.ptr(0);
221 
222  int ith, nth, is, ns;
223  set_threadtask(ith, nth, is, ns, Nvol);
224 
225  for (int ex = 0; ex < Nex; ++ex) {
226  for (int site = is; site < ns; ++site) {
227  int kv = Nin * (site + Nvol * ex);
228  for (int in = 0; in < Nin; ++in) {
229  yp[in + kv] = xp[in + kv];
230  }
231  }
232  }
233 }
234 
235 
236 //====================================================================
237 void copy(Field& y, const int exy, const Field& x, const int exx)
238 {
239  int Nin = y.nin();
240  int Nvol = y.nvol();
241 
242  assert(x.nin() == Nin);
243  assert(x.nvol() == Nvol);
244 
245  double *yp = y.ptr(0, 0, exy);
246  const double *xp = x.ptr(0, 0, exx);
247 
248  int ith, nth, is, ns;
249  set_threadtask(ith, nth, is, ns, Nvol);
250 
251  for (int site = is; site < ns; ++site) {
252  int kv = Nin * site;
253  for (int in = 0; in < Nin; ++in) {
254  yp[in + kv] = xp[in + kv];
255  }
256  }
257 }
258 
259 
260 //====================================================================
261 void scal(Field& x, const double a)
262 {
263  int Nin = x.nin();
264  int Nvol = x.nvol();
265  int Nex = x.nex();
266 
267  double *xp = x.ptr(0);
268 
269  int ith, nth, is, ns;
270  set_threadtask(ith, nth, is, ns, Nvol);
271 
272  for (int ex = 0; ex < Nex; ++ex) {
273  for (int site = is; site < ns; ++site) {
274  int kv = Nin * (site + Nvol * ex);
275  for (int in = 0; in < Nin; ++in) {
276  xp[in + kv] *= a;
277  }
278  }
279  }
280 }
281 
282 
283 //====================================================================
284 void scal(Field& x, const int exx, const double a)
285 {
286  int Nin = x.nin();
287  int Nvol = x.nvol();
288 
289  double *xp = x.ptr(0, 0, exx);
290 
291  int ith, nth, is, ns;
292  set_threadtask(ith, nth, is, ns, Nvol);
293 
294  for (int site = is; site < ns; ++site) {
295  int kv = Nin * site;
296  for (int in = 0; in < Nin; ++in) {
297  xp[in + kv] *= a;
298  }
299  }
300 }
301 
302 
303 //====================================================================
304 void scal(Field& x, const dcomplex a)
305 {
306  if (imag(a) == 0.0) {
307  return scal(x, real(a));
308  } else if (x.field_element_type() == Element_type::COMPLEX) {
309  int Nin = x.nin();
310  int Nvol = x.nvol();
311  int Nex = x.nex();
312 
313  int ith, nth, is, ns;
314  set_threadtask(ith, nth, is, ns, Nvol);
315 
316  double *xp = x.ptr(0);
317  double ar = real(a);
318  double ai = imag(a);
319  int Nin2 = Nin / 2;
320 
321  for (int ex = 0; ex < Nex; ++ex) {
322  for (int site = is; site < ns; ++site) {
323  int kv = Nin * (site + Nvol * ex);
324  for (int k = 0; k < Nin2; ++k) {
325  int kr = 2 * k;
326  int ki = 2 * k + 1;
327  double xr = xp[kr + kv];
328  double xi = xp[ki + kv];
329  xp[kr + kv] = ar * xr - ai * xi;
330  xp[ki + kv] = ar * xi + ai * xr;
331  }
332  }
333  }
334  } else {
335  vout.crucial("Error at %s: real vector and complex parameter.\n",
336  __func__);
337  exit(EXIT_FAILURE);
338  }
339 }
340 
341 
342 //====================================================================
343 void scal(Field& x, const int exx, const dcomplex a)
344 {
345  if (imag(a) == 0.0) {
346  return scal(x, exx, real(a));
347  } else if (x.field_element_type() == Element_type::COMPLEX) {
348  int Nin = x.nin();
349  int Nvol = x.nvol();
350  int Nex = x.nex();
351 
352  int ith, nth, is, ns;
353  set_threadtask(ith, nth, is, ns, Nvol);
354 
355  double *xp = x.ptr(0, 0, exx);
356  double ar = real(a);
357  double ai = imag(a);
358  int Nin2 = Nin / 2;
359 
360  for (int site = is; site < ns; ++site) {
361  int kv = Nin * site;
362  for (int k = 0; k < Nin2; ++k) {
363  int kr = 2 * k;
364  int ki = 2 * k + 1;
365  double xr = xp[kr + kv];
366  double xi = xp[ki + kv];
367  xp[kr + kv] = ar * xr - ai * xi;
368  xp[ki + kv] = ar * xi + ai * xr;
369  }
370  }
371  } else {
372  vout.crucial("Error at %s: real vector and complex parameter.\n",
373  __func__);
374  exit(EXIT_FAILURE);
375  }
376 }
377 
378 
379 //====================================================================
380 void axpy(Field& y, const double a, const Field& x)
381 {
382  int Nin = y.nin();
383  int Nvol = y.nvol();
384  int Nex = y.nex();
385  assert(x.check_size(Nin, Nvol, Nex));
386 
387  double *yp = y.ptr(0);
388  const double *xp = x.ptr(0);
389 
390  int ith, nth, is, ns;
391  set_threadtask(ith, nth, is, ns, Nvol);
392 
393  for (int ex = 0; ex < Nex; ++ex) {
394  for (int site = is; site < ns; ++site) {
395  int kv = Nin * (site + Nvol * ex);
396  for (int in = 0; in < Nin; ++in) {
397  yp[in + kv] += a * xp[in + kv];
398  }
399  }
400  }
401 }
402 
403 
404 //====================================================================
405 void axpy(Field& y, const int exy,
406  const double a, const Field& x, const int exx)
407 {
408  assert(x.nin() == y.nin());
409  assert(x.nvol() == y.nvol());
410 
411  int Nin = y.nin();
412  int Nvol = y.nvol();
413 
414  double *yp = y.ptr(0, 0, exy);
415  const double *xp = x.ptr(0, 0, exx);
416 
417  int ith, nth, is, ns;
418  set_threadtask(ith, nth, is, ns, Nvol);
419 
420  for (int site = is; site < ns; ++site) {
421  int kv = Nin * site;
422  for (int in = 0; in < Nin; ++in) {
423  yp[in + kv] += a * xp[in + kv];
424  }
425  }
426 }
427 
428 
429 //====================================================================
430 void axpy(Field& y, const dcomplex a, const Field& x)
431 {
432  if (imag(a) == 0.0) {
433  return axpy(y, real(a), x);
434  } else if ((y.field_element_type() == Element_type::COMPLEX) &&
436  int Nin = y.nin();
437  int Nvol = y.nvol();
438  int Nex = y.nex();
439  assert(x.check_size(Nin, Nvol, Nex));
440 
441  double *yp = y.ptr(0);
442  const double *xp = x.ptr(0);
443 
444  int ith, nth, is, ns;
445  set_threadtask(ith, nth, is, ns, Nvol);
446 
447  double ar = real(a);
448  double ai = imag(a);
449  int Nin2 = Nin / 2;
450 
451  for (int ex = 0; ex < Nex; ++ex) {
452  for (int site = is; site < ns; ++site) {
453  int kv = Nin * (site + Nvol * ex);
454  for (int k = 0; k < Nin2; ++k) {
455  int kr = 2 * k;
456  int ki = 2 * k + 1;
457  yp[kr + kv] += ar * xp[kr + kv] - ai * xp[ki + kv];
458  yp[ki + kv] += ar * xp[ki + kv] + ai * xp[kr + kv];
459  }
460  }
461  }
462  } else {
463  vout.crucial("Error at %s: unsupported types.\n", __func__);
464  exit(EXIT_FAILURE);
465  }
466 }
467 
468 
469 //====================================================================
470 void axpy(Field& y, const int exy,
471  const dcomplex a, const Field& x, const int exx)
472 {
473  if (imag(a) == 0.0) {
474  return axpy(y, exy, real(a), x, exx);
475  } else if ((y.field_element_type() == Element_type::COMPLEX) &&
477  int Nin = y.nin();
478  int Nvol = y.nvol();
479  assert(x.nin() == Nin);
480  assert(x.nvol() == Nvol);
481 
482  double *yp = y.ptr(0, 0, exy);
483  const double *xp = x.ptr(0, 0, exx);
484 
485  int ith, nth, is, ns;
486  set_threadtask(ith, nth, is, ns, Nvol);
487 
488  double ar = real(a);
489  double ai = imag(a);
490  int Nin2 = Nin / 2;
491 
492  for (int site = is; site < ns; ++site) {
493  int kv = Nin * site;
494  for (int k = 0; k < Nin2; ++k) {
495  int kr = 2 * k;
496  int ki = 2 * k + 1;
497  yp[kr + kv] += ar * xp[kr + kv] - ai * xp[ki + kv];
498  yp[ki + kv] += ar * xp[ki + kv] + ai * xp[kr + kv];
499  }
500  }
501  } else {
502  vout.crucial("Error at %s: unsupported types.\n", __func__);
503  exit(EXIT_FAILURE);
504  }
505 }
506 
507 
508 //====================================================================
509 void aypx(const double a, Field& y, const Field& x)
510 {
511  int Nin = y.nin();
512  int Nvol = y.nvol();
513  int Nex = y.nex();
514  assert(x.check_size(Nin, Nvol, Nex));
515 
516  double *yp = y.ptr(0);
517  const double *xp = x.ptr(0);
518 
519  int ith, nth, is, ns;
520  set_threadtask(ith, nth, is, ns, Nvol);
521 
522  for (int ex = 0; ex < Nex; ++ex) {
523  for (int site = is; site < ns; ++site) {
524  int kv = Nin * (site + Nvol * ex);
525  for (int in = 0; in < Nin; ++in) {
526  yp[in + kv] = a * yp[in + kv] + xp[in + kv];
527  }
528  }
529  }
530 }
531 
532 
533 //====================================================================
534 void aypx(const dcomplex a, Field& y, const Field& x)
535 {
536  if (imag(a) == 0.0) {
537  return aypx(real(a), y, x);
538  } else if ((y.field_element_type() == Element_type::COMPLEX) &&
540  int Nin = y.nin();
541  int Nvol = y.nvol();
542  int Nex = y.nex();
543  assert(x.check_size(Nin, Nvol, Nex));
544 
545  double *yp = y.ptr(0);
546  const double *xp = x.ptr(0);
547 
548  int ith, nth, is, ns;
549  set_threadtask(ith, nth, is, ns, Nvol);
550 
551  double ar = real(a);
552  double ai = imag(a);
553  int Nin2 = Nin / 2;
554 
555  for (int ex = 0; ex < Nex; ++ex) {
556  for (int site = is; site < ns; ++site) {
557  int kv = Nin * (site + Nvol * ex);
558  for (int k = 0; k < Nin2; ++k) {
559  int kr = 2 * k;
560  int ki = 2 * k + 1;
561  double yr = yp[kr + kv];
562  double yi = yp[ki + kv];
563  yp[kr + kv] = ar * yr - ai * yi + xp[kr + kv];
564  yp[ki + kv] = ar * yi + ai * yr + xp[ki + kv];
565  }
566  }
567  }
568  } else {
569  vout.crucial("Error at %s: unsupported types.\n", __func__);
570  exit(EXIT_FAILURE);
571  }
572 }
573 
574 
575 //====================================================================
576 double dot(const Field& y, const Field& x)
577 {
578  int Nin = y.nin();
579  int Nvol = y.nvol();
580  int Nex = y.nex();
581  assert(x.check_size(Nin, Nvol, Nex));
582 
583  const double *yp = y.ptr(0);
584  const double *xp = x.ptr(0);
585 
586  int ith, nth, is, ns;
587  set_threadtask(ith, nth, is, ns, Nvol);
588 
589  double a = 0.0;
590 
591  for (int ex = 0; ex < Nex; ++ex) {
592  for (int site = is; site < ns; ++site) {
593  int kv = Nin * (site + Nvol * ex);
594  for (int in = 0; in < Nin; ++in) {
595  a += yp[in + kv] * xp[in + kv];
596  }
597  }
598  }
599 
601 
602  return a;
603 }
604 
605 
606 //====================================================================
607 double dot(const Field& y, const int exy, const Field& x, const int exx)
608 {
609  assert(x.nin() == y.nin());
610  assert(x.nvol() == y.nvol());
611 
612  int Nin = y.nin();
613  int Nvol = y.nvol();
614 
615  const double *yp = y.ptr(0, 0, exy);
616  const double *xp = x.ptr(0, 0, exx);
617 
618  int ith, nth, is, ns;
619  set_threadtask(ith, nth, is, ns, Nvol);
620 
621  double a = 0.0;
622 
623  for (int site = is; site < ns; ++site) {
624  int kv = Nin * site;
625  for (int in = 0; in < Nin; ++in) {
626  a += yp[in + kv] * xp[in + kv];
627  }
628  }
629 
631 
632  return a;
633 }
634 
635 
636 //====================================================================
637 void dot_and_norm2(double& yx, double& y2, double& x2,
638  const Field& y, const int exy,
639  const Field& x, const int exx)
640 {
641  int Nin = y.nin();
642  int Nvol = y.nvol();
643  assert(x.nin() == Nin);
644  assert(x.nvol() == Nvol);
645 
646  const double *yp = y.ptr(0, 0, exy);
647  const double *xp = x.ptr(0, 0, exx);
648 
649  int ith, nth, is, ns;
650  set_threadtask(ith, nth, is, ns, Nvol);
651 
652  double sum_yx = 0.0;
653  double sum_x2 = 0.0;
654  double sum_y2 = 0.0;
655 
656  for (int site = is; site < ns; ++site) {
657  int kv = Nin * site;
658  for (int in = 0; in < Nin; ++in) {
659  sum_yx += yp[in + kv] * xp[in + kv];
660  sum_x2 += xp[in + kv] * xp[in + kv];
661  sum_y2 += yp[in + kv] * yp[in + kv];
662  }
663  }
664 
665  double prd[3] = { sum_yx, sum_x2, sum_y2 };
666  ThreadManager::reduce_sum_global(prd, 3, ith, nth);
667  yx = prd[0];
668  y2 = prd[1];
669  x2 = prd[2];
670 }
671 
672 
673 //====================================================================
674 void dot_and_norm2(double& yx, double& y2, double& x2,
675  const Field& y, const Field& x)
676 {
677  int Nin = y.nin();
678  int Nvol = y.nvol();
679  int Nex = y.nex();
680  assert(x.check_size(Nin, Nvol, Nex));
681 
682  const double *yp = y.ptr(0);
683  const double *xp = x.ptr(0);
684 
685  int ith, nth, is, ns;
686  set_threadtask(ith, nth, is, ns, Nvol);
687 
688  double sum_yx = 0.0;
689  double sum_x2 = 0.0;
690  double sum_y2 = 0.0;
691 
692  for (int ex = 0; ex < Nex; ++ex) {
693  for (int site = is; site < ns; ++site) {
694  int kv = Nin * (site + Nvol * ex);
695  for (int in = 0; in < Nin; ++in) {
696  sum_yx += yp[in + kv] * xp[in + kv];
697  sum_x2 += xp[in + kv] * xp[in + kv];
698  sum_y2 += yp[in + kv] * yp[in + kv];
699  }
700  }
701  }
702 
703  double prd[3] = { sum_yx, sum_x2, sum_y2 };
704  ThreadManager::reduce_sum_global(prd, 3, ith, nth);
705  yx = prd[0];
706  y2 = prd[1];
707  x2 = prd[2];
708 }
709 
710 
711 //====================================================================
712 dcomplex dotc(const Field& y, const Field& x)
713 {
714  int Nin = y.nin();
715  int Nvol = y.nvol();
716  int Nex = y.nex();
717  assert(x.check_size(Nin, Nvol, Nex));
718 
719  const double *yp = y.ptr(0);
720  const double *xp = x.ptr(0);
721 
722  int ith, nth, is, ns;
723  set_threadtask(ith, nth, is, ns, Nvol);
724 
727  double prdr = 0.0;
728  double prdi = 0.0;
729  int Nin2 = Nin / 2;
730 
731  for (int ex = 0; ex < Nex; ++ex) {
732  for (int site = is; site < ns; ++site) {
733  int kv = Nin * (site + Nvol * ex);
734  for (int k = 0; k < Nin2; ++k) {
735  int kr = 2 * k;
736  int ki = 2 * k + 1;
737  prdr += yp[kr + kv] * xp[kr + kv] + yp[ki + kv] * xp[ki + kv];
738  prdi += yp[kr + kv] * xp[ki + kv] - yp[ki + kv] * xp[kr + kv];
739  }
740  }
741  }
742 
743  double prd[2] = { prdr, prdi };
744  ThreadManager::reduce_sum_global(prd, 2, ith, nth);
745 
746  return cmplx(prd[0], prd[1]);
747  } else if ((y.field_element_type() == Element_type::REAL) &&
749  return cmplx(dot(y, x), 0.0);
750  } else {
751  vout.crucial("Error at %s: unsupported arg types\n", __func__);
752  exit(EXIT_FAILURE);
753 
754  return cmplx(0.0, 0.0); // never reached.
755  }
756 }
757 
758 
759 //====================================================================
760 dcomplex dotc(const Field& y, const int exy, const Field& x, const int exx)
761 {
762  int Nin = y.nin();
763  int Nvol = y.nvol();
764  assert(x.nin() == Nin);
765  assert(x.nvol() == Nvol);
766 
767  const double *yp = y.ptr(0, 0, exy);
768  const double *xp = x.ptr(0, 0, exx);
769 
770  int ith, nth, is, ns;
771  set_threadtask(ith, nth, is, ns, Nvol);
772 
775  double prdr = 0.0;
776  double prdi = 0.0;
777  int Nin2 = Nin / 2;
778 
779  for (int site = is; site < ns; ++site) {
780  int kv = Nin * site;
781  for (int k = 0; k < Nin2; ++k) {
782  int kr = 2 * k;
783  int ki = 2 * k + 1;
784  prdr += yp[kr + kv] * xp[kr + kv] + yp[ki + kv] * xp[ki + kv];
785  prdi += yp[kr + kv] * xp[ki + kv] - yp[ki + kv] * xp[kr + kv];
786  }
787  }
788 
789  double prd[2] = { prdr, prdi };
790  ThreadManager::reduce_sum_global(prd, 2, ith, nth);
791 
792  return cmplx(prd[0], prd[1]);
793  } else if ((y.field_element_type() == Element_type::REAL) &&
795  return cmplx(dot(y, exy, x, exx), 0.0);
796  } else {
797  vout.crucial("Error at %s: unsupported arg types\n", __func__);
798  exit(EXIT_FAILURE);
799 
800  return cmplx(0.0, 0.0); // never reached.
801  }
802 }
803 
804 
805 //====================================================================
806 void dotc_and_norm2(dcomplex& yx, double& y2, double& x2,
807  const Field& y, const Field& x)
808 {
809  int Nin = y.nin();
810  int Nvol = y.nvol();
811  int Nex = y.nex();
812  assert(x.check_size(Nin, Nvol, Nex));
813 
814  const double *yp = y.ptr(0);
815  const double *xp = x.ptr(0);
816 
817  int ith, nth, is, ns;
818  set_threadtask(ith, nth, is, ns, Nvol);
819 
822  double prd_r = 0.0;
823  double prd_i = 0.0;
824  double prd_x2 = 0.0;
825  double prd_y2 = 0.0;
826  int Nin2 = Nin / 2;
827 
828  for (int ex = 0; ex < Nex; ++ex) {
829  for (int site = is; site < ns; ++site) {
830  int kv = Nin * (site + Nvol * ex);
831  for (int k = 0; k < Nin2; ++k) {
832  int kr = 2 * k;
833  int ki = 2 * k + 1;
834  prd_r += yp[kr + kv] * xp[kr + kv] + yp[ki + kv] * xp[ki + kv];
835  prd_i += yp[kr + kv] * xp[ki + kv] - yp[ki + kv] * xp[kr + kv];
836  prd_x2 += xp[kr + kv] * xp[kr + kv] + xp[ki + kv] * xp[ki + kv];
837  prd_y2 += yp[kr + kv] * yp[kr + kv] + yp[ki + kv] * yp[ki + kv];
838  }
839  }
840  }
841 
842  double prd[4] = { prd_r, prd_i, prd_x2, prd_y2 };
843  ThreadManager::reduce_sum_global(prd, 4, ith, nth);
844 
845  yx = cmplx(prd[0], prd[1]);
846  y2 = prd[2];
847  x2 = prd[3];
848  } else if ((y.field_element_type() == Element_type::REAL) &&
850  double yx_re = 0.0;
851  dot_and_norm2(yx_re, y2, x2, y, x);
852  yx = cmplx(yx_re, 0.0);
853  } else {
854  vout.crucial("Error at %s: unsupported arg types.\n", __func__);
855  exit(EXIT_FAILURE);
856  }
857 }
858 
859 
860 //====================================================================
861 void dotc_and_norm2(dcomplex& yx, double& y2, double& x2,
862  const Field& y, const int exy,
863  const Field& x, const int exx)
864 {
865  int Nin = y.nin();
866  int Nvol = y.nvol();
867  assert(x.nin() == Nin);
868  assert(x.nvol() == Nvol);
869 
870  const double *yp = y.ptr(0, 0, exy);
871  const double *xp = x.ptr(0, 0, exx);
872 
873  int ith, nth, is, ns;
874  set_threadtask(ith, nth, is, ns, Nvol);
875 
878  double prd_r = 0.0;
879  double prd_i = 0.0;
880  double prd_x2 = 0.0;
881  double prd_y2 = 0.0;
882  int Nin2 = Nin / 2;
883 
884  for (int site = is; site < ns; ++site) {
885  int kv = Nin * site;
886  for (int k = 0; k < Nin2; ++k) {
887  int kr = 2 * k;
888  int ki = 2 * k + 1;
889  prd_r += yp[kr + kv] * xp[kr + kv] + yp[ki + kv] * xp[ki + kv];
890  prd_i += yp[kr + kv] * xp[ki + kv] - yp[ki + kv] * xp[kr + kv];
891  prd_x2 += xp[kr + kv] * xp[kr + kv] + xp[ki + kv] * xp[ki + kv];
892  prd_y2 += yp[kr + kv] * yp[kr + kv] + yp[ki + kv] * yp[ki + kv];
893  }
894  }
895 
896  double prd[4] = { prd_r, prd_i, prd_x2, prd_y2 };
897  ThreadManager::reduce_sum_global(prd, 4, ith, nth);
898 
899  yx = cmplx(prd[0], prd[1]);
900  y2 = prd[2];
901  x2 = prd[3];
902  } else if ((y.field_element_type() == Element_type::REAL) &&
904  double yx_re = 0.0;
905  dot_and_norm2(yx_re, y2, x2, y, exy, x, exx);
906  yx = cmplx(yx_re, 0.0);
907  } else {
908  vout.crucial("Error at %s: unsupported arg types.\n", __func__);
909  exit(EXIT_FAILURE);
910  }
911 }
912 
913 
914 //============================================================END=====
Field::myindex
size_t myindex(const int jin, const int site, const int jex) const
Definition: field.h:64
bridgeIO.h
dot_and_norm2
void dot_and_norm2(double &yx, double &y2, double &x2, const Field &y, const int exy, const Field &x, const int exx)
Definition: field.cpp:637
Field::m_Nex
int m_Nex
external d.o.f.
Definition: field.h:57
Field::set
void set(const int jin, const int site, const int jex, double v)
Definition: field.h:175
Field::nex
int nex() const
Definition: field.h:128
Field::check_size
bool check_size(const int nin, const int nvol, const int nex) const
checking size parameters. [23 May 2016 H.Matsufuru]
Definition: field.h:135
aypx
void aypx(const double a, Field &y, const Field &x)
aypx(y, a, x): y := a * y + x
Definition: field.cpp:509
Field::stat
void stat(double &Fave, double &Fmax, double &Fdev) const
determines the statistics of the field. average, maximum value, and deviation is determined over glob...
Definition: field.cpp:169
Field::xI
void xI()
Definition: field.cpp:138
axpy
void axpy(Field &y, const double a, const Field &x)
axpy(y, a, x): y := a * x + y
Definition: field.cpp:380
dot
double dot(const Field &y, const Field &x)
Definition: field.cpp:576
dotc_and_norm2
void dotc_and_norm2(dcomplex &yx, double &y2, double &x2, const Field &y, const Field &x)
Definition: field.cpp:806
Field::nin
int nin() const
Definition: field.h:126
Field::m_element_type
element_type m_element_type
field complex type
Definition: field.h:58
copy
void copy(Field &y, const Field &x)
copy(y, x): y = x
Definition: field.cpp:212
Field::norm2
double norm2() const
Definition: field.cpp:113
ThreadManager::reduce_max_global
static void reduce_max_global(double *value, const int num, const int i_thread, const int Nthread)
global reduction with max for an array: double values are assumed thread local.
Definition: threadManager.cpp:361
Field::check
void check()
Definition: field.cpp:28
Field::class_name
static const std::string class_name
Definition: field.h:52
ThreadManager::reduce_sum_global
static void reduce_sum_global(dcomplex &value, const int i_thread, const int Nthread)
global reduction with summation: dcomplex values are assumed thread local.
Definition: threadManager.cpp:288
threadManager.h
field.h
Element_type::REAL
@ REAL
Definition: bridge_defs.h:43
Field::nvol
int nvol() const
Definition: field.h:127
dotc
dcomplex dotc(const Field &y, const Field &x)
Definition: field.cpp:712
CommonParameters::NPE
static int NPE()
Definition: commonParameters.h:101
Field::field_element_type
element_type field_element_type() const
Definition: field.h:129
Field::setc
void setc(double a)
Definition: field.cpp:55
Field::ptr
const double * ptr(const int jin, const int site, const int jex) const
Definition: field.h:153
Element_type::COMPLEX
@ COMPLEX
Definition: bridge_defs.h:43
scal
void scal(Field &x, const double a)
scal(x, a): x = a * x
Definition: field.cpp:261
field_thread-inc.h
Field::m_Nin
int m_Nin
internal d.o.f.
Definition: field.h:55
Bridge::BridgeIO::crucial
void crucial(const char *format,...)
Definition: bridgeIO.cpp:180
Field
Container of Field-type object.
Definition: field.h:46
Field::m_Nvol
int m_Nvol
lattice volume
Definition: field.h:56
Bridge::vout
BridgeIO vout
Definition: bridgeIO.cpp:512
CommonParameters::epsilon_criterion
static double epsilon_criterion()
Definition: commonParameters.h:119