Bridge++  Ver. 2.0.2
shiftAField_lex-tmpl.h
Go to the documentation of this file.
1 
11 template<typename AFIELD>
12 const std::string ShiftAField_lex<AFIELD>::class_name =
13  "ShiftAField_lex<AFIELD>";
14 //====================================================================
15 template<typename AFIELD>
17 {
18  int Ndim = CommonParameters::Ndim();
19  std::vector<int> bc(Ndim);
20  for (int mu = 0; mu < Ndim; ++mu) {
21  bc[mu] = 1;
22  }
23 
24  init(Nin, bc);
25 }
26 
27 
28 //====================================================================
29 template<typename AFIELD>
30 void ShiftAField_lex<AFIELD>::init(int Nin, std::vector<int>& bc)
31 {
32  m_vl = CommonParameters::Vlevel();
33 
34  int req_comm = 0;
35  //int req_comm = 1; // set 1 if communication forced any time
36 
37  vout.general(m_vl, "%s: construction\n", class_name.c_str());
38 
39  m_Nin = Nin;
40  vout.general(m_vl, " Nin = %d\n", m_Nin);
41 
42  m_Nx = CommonParameters::Nx();
43  m_Ny = CommonParameters::Ny();
44  m_Nz = CommonParameters::Nz();
45  m_Nt = CommonParameters::Nt();
46  m_Nvol = m_Nx * m_Ny * m_Nz * m_Nt;
47 
48  m_Ndim = CommonParameters::Ndim();
49 
50  m_Nxv = m_Nx / VLENX;
51  m_Nyv = m_Ny / VLENY;
52  m_Nstv = m_Nvol / VLEN;
53  vout.general(m_vl, " VLENX = %d Nxv = %d\n", VLENX, m_Nxv);
54  vout.general(m_vl, " VLENY = %d Nyv = %d\n", VLENY, m_Nyv);
55  vout.general(m_vl, " VLEN = %d Nstv = %d\n", VLEN, m_Nstv);
56 
57  if (bc.size() != m_Ndim) {
58  vout.crucial(m_vl, "%s: incorrect size of boundary condition\n",
59  class_name.c_str());
60  exit(EXIT_FAILURE);
61  }
62 
63  m_boundary.resize(m_Ndim);
64 
65  for (int mu = 0; mu < m_Ndim; ++mu) {
66  m_boundary[mu] = bc[mu];
67  vout.general(m_vl, " boundary[%d] = %2d\n", mu, m_boundary[mu]);
68  }
69 
70  do_comm_any = 0;
71  for (int mu = 0; mu < m_Ndim; ++mu) {
72  do_comm[mu] = 1;
73  if ((req_comm == 0) && (Communicator::npe(mu) == 1)) do_comm[mu] = 0;
74  do_comm_any += do_comm[mu];
75  vout.general(" do_comm[%d] = %d\n", mu, do_comm[mu]);
76  }
77 
78  m_Nbdsize.resize(m_Ndim);
79  m_Nbdsize[0] = m_Nin * m_Ny * m_Nz * m_Nt;
80  m_Nbdsize[1] = m_Nin * m_Nx * m_Nz * m_Nt;
81  m_Nbdsize[2] = m_Nin * m_Nx * m_Ny * m_Nt;
82  m_Nbdsize[3] = m_Nin * m_Nx * m_Ny * m_Nz;
83 
84  setup_channels();
85 
86  vout.general(m_vl, "%s: construction finished.\n", class_name.c_str());
87 }
88 
89 
90 //====================================================================
91 template<typename AFIELD>
93 {
94 }
95 
96 
97 //====================================================================
98 template<typename AFIELD>
100 {
101  chsend_up.resize(m_Ndim);
102  chrecv_up.resize(m_Ndim);
103  chsend_dn.resize(m_Ndim);
104  chrecv_dn.resize(m_Ndim);
105 
106  for (int mu = 0; mu < m_Ndim; ++mu) {
107  int Nvsize = m_Nbdsize[mu] * sizeof(real_t);
108 
109  chsend_dn[mu].send_init(Nvsize, mu, -1);
110  chsend_up[mu].send_init(Nvsize, mu, 1);
111 #ifdef USE_MPI
112  chrecv_up[mu].recv_init(Nvsize, mu, 1);
113  chrecv_dn[mu].recv_init(Nvsize, mu, -1);
114 #else
115  void *buf_up = (void *)chsend_dn[mu].ptr();
116  chrecv_up[mu].recv_init(Nvsize, mu, 1, buf_up);
117  void *buf_dn = (void *)chsend_up[mu].ptr();
118  chrecv_dn[mu].recv_init(Nvsize, mu, -1, buf_dn);
119 #endif
120  if (do_comm[mu] == 1) {
121  chset_send.append(chsend_up[mu]);
122  chset_send.append(chsend_dn[mu]);
123  chset_recv.append(chrecv_up[mu]);
124  chset_recv.append(chrecv_dn[mu]);
125  }
126  }
127 }
128 
129 
130 //====================================================================
131 template<typename AFIELD>
133  const int mu)
134 {
135  int Nex = w.nex();
136  assert(w.check_size(m_Nin, m_Nvol, Nex));
137  assert(v.check_size(m_Nin, m_Nvol, Nex));
138 
140 
141  for (int ex = 0; ex < Nex; ++ex) {
142  real_t *vp = v.ptr(index.idx(0, m_Nin, 0, ex));
143  real_t *wp = const_cast<AFIELD *>(&w)->ptr(index.idx(0, m_Nin, 0, ex));
144 
145  if (mu == 0) {
146  //up_x(vp, wp);
147  up_x_nv(vp, wp);
148  } else if (mu == 1) {
149  //up_y(vp, wp);
150  up_y_nv(vp, wp);
151  } else if (mu == 2) {
152  //up_z(vp, wp);
153  up_z_nv(vp, wp);
154  } else if (mu == 3) {
155  //up_t(vp, wp);
156  up_t_nv(vp, wp);
157  } else {
158  vout.crucial(m_vl, "Error at %s: wrong parameter\n",
159  class_name.c_str());
160  exit(EXIT_FAILURE);
161  }
162  }
163 }
164 
165 
166 //====================================================================
167 template<typename AFIELD>
169  const AFIELD& w, const int ex2,
170  const int mu)
171 {
172  int Nex = v.nex();
173  assert(v.check_size(m_Nin, m_Nvol, Nex));
174  assert(ex1 < Nex);
175  Nex = w.nex();
176  assert(w.check_size(m_Nin, m_Nvol, Nex));
177  assert(ex2 < Nex);
178 
180 
181  real_t *vp = v.ptr(index.idx(0, m_Nin, 0, ex1));
182  real_t *wp = const_cast<AFIELD *>(&w)->ptr(index.idx(0, m_Nin, 0, ex2));
183 
184  if (mu == 0) {
185  //up_x(vp, wp);
186  up_x_nv(vp, wp);
187  } else if (mu == 1) {
188  //up_y(vp, wp);
189  up_y_nv(vp, wp);
190  } else if (mu == 2) {
191  // up_z(vp, wp);
192  up_z_nv(vp, wp);
193  } else if (mu == 3) {
194  //up_t(vp, wp);
195  up_t_nv(vp, wp);
196  } else {
197  vout.crucial(m_vl, "Error at %s: wrong parameter\n",
198  class_name.c_str());
199  exit(EXIT_FAILURE);
200  }
201 }
202 
203 
204 //====================================================================
205 template<typename AFIELD>
207  const int mu)
208 {
209  int Nex = w.nex();
210  assert(w.check_size(m_Nin, m_Nvol, Nex));
211  assert(v.check_size(m_Nin, m_Nvol, Nex));
212 
214 
215  for (int ex = 0; ex < Nex; ++ex) {
216  real_t *vp = v.ptr(index.idx(0, m_Nin, 0, ex));
217  real_t *wp = const_cast<AFIELD *>(&w)->ptr(index.idx(0, m_Nin, 0, ex));
218 
219  if (mu == 0) {
220  // dn_x(vp, wp);
221  dn_x_nv(vp, wp);
222  } else if (mu == 1) {
223  // dn_y(vp, wp);
224  dn_y_nv(vp, wp);
225  } else if (mu == 2) {
226  // dn_z(vp, wp);
227  dn_z_nv(vp, wp);
228  } else if (mu == 3) {
229  // dn_t(vp, wp);
230  dn_t_nv(vp, wp);
231  } else {
232  vout.crucial(m_vl, "Error at %s: wrong parameter\n",
233  class_name.c_str());
234  exit(EXIT_FAILURE);
235  }
236  }
237 }
238 
239 
240 //====================================================================
241 template<typename AFIELD>
243  const AFIELD& w, const int ex2,
244  const int mu)
245 {
246  int Nex = v.nex();
247  assert(v.check_size(m_Nin, m_Nvol, Nex));
248  assert(ex1 < Nex);
249  Nex = w.nex();
250  assert(w.check_size(m_Nin, m_Nvol, Nex));
251  assert(ex2 < Nex);
252 
254 
255  real_t *vp = v.ptr(index.idx(0, m_Nin, 0, ex1));
256  real_t *wp = const_cast<AFIELD *>(&w)->ptr(index.idx(0, m_Nin, 0, ex2));
257 
258  if (mu == 0) {
259  //dn_x(vp, wp);
260  dn_x_nv(vp, wp);
261  } else if (mu == 1) {
262  //dn_y(vp, wp);
263  dn_y_nv(vp, wp);
264  } else if (mu == 2) {
265  //dn_z(vp, wp);
266  dn_z_nv(vp, wp);
267  } else if (mu == 3) {
268  // dn_t(vp, wp);
269  dn_t_nv(vp, wp);
270  } else {
271  vout.crucial(m_vl, "Error at %s: wrong parameter\n",
272  class_name.c_str());
273  exit(EXIT_FAILURE);
274  }
275 }
276 
277 
278 //====================================================================
279 template<typename AFIELD>
281 {
282  real_t bc2 = 1.0;
283  if (Communicator::ipe(0) == 0) bc2 = real_t(m_boundary[0]);
284 
285  int ex = 0; // ex loop is outside this method
286 
287  real_t *buf1 = (real_t *)chsend_dn[0].ptr();
288  real_t *buf2 = (real_t *)chrecv_up[0].ptr();
289 
291 
292  int ith, nth, is, ns;
293  set_threadtask(ith, nth, is, ns, m_Nvol);
294 
295 #pragma omp barrier
296 
297  if (do_comm[0] == 1) {
298  /*
299  for(int site = is; site < ns; ++site){
300  int ix = site % m_Nxv;
301  int iyzt = site/m_Nxv;
302  if(ix == 0){
303  real_t buf[m_Nin];
304  load_vec1(buf, &wp[VLEN*Nin2*site], 0, Nin2);
305  scal_th(buf, bc2, m_Nin);
306  copy_th(&buf1[m_Nin*iyzt], buf, m_Nin);
307  }
308  }
309  */
310 
311  for (int site = is; site < ns; ++site) {
312  int ix = site % m_Nx;
313  int iyzt = site / m_Nx;
314  if (ix == 0) {
315  for (int in = 0; in < m_Nin; ++in) {
316  int index = index_alt.idx(in, m_Nin, site, ex);
317  buf1[in + m_Nin * iyzt] = bc2 * wp[index];
318  }
319  }
320  }
321 
322 #pragma omp barrier
323 
324 #pragma omp master
325  {
326  chrecv_up[0].start();
327  chsend_dn[0].start();
328  // communication is not overlapped in x-direction.
329  chrecv_up[0].wait();
330  chsend_dn[0].wait();
331  }
332 
333 #pragma omp barrier
334  }
335 
336  for (int site = is; site < ns; ++site) {
337  int ix = site % m_Nx;
338  int iyzt = site / m_Nx;
339  if ((ix < m_Nx - 1) || (do_comm[0] == 0)) {
340  int ix2 = (ix + 1) % m_Nx;
341  int nei = ix2 + m_Nx * iyzt;
342  real_t bc3 = 1.0;
343  if (ix == m_Nx - 1) bc3 = bc2;
344  for (int in = 0; in < m_Nin; ++in) {
345  int iv = index_alt.idx(in, m_Nin, site, ex);
346  int iw = index_alt.idx(in, m_Nin, nei, ex);
347  vp[iv] = bc3 * wp[iw];
348  }
349  } else {
350  for (int in = 0; in < m_Nin; ++in) {
351  int iv = index_alt.idx(in, m_Nin, site, ex);
352  vp[iv] = buf2[in + m_Nin * iyzt];
353  }
354  }
355  }
356 
357  /*
358 for(int site = is; site < ns; ++site){
359  int ix = site % m_Nxv;
360  int iyzt = site/m_Nxv;
361  int iv = VLEN * Nin2 * site;
362  if(ix < m_Nxv-1){
363  int nei = VLEN * Nin2 * (site + 1);
364  shift_vec2_bw(&vp[iv], &wp[iv], &wp[nei], Nin2);
365  }else if(do_comm[0] == 0){
366  int nei = VLEN * Nin2 * (0 + m_Nxv * iyzt);
367  real_t buf[m_Nin];
368  load_vec1(buf, &wp[nei], 0, Nin2);
369  scal_th(buf, bc2, m_Nin);
370  shift_vec1_bw(&vp[iv], &wp[iv], buf, Nin2);
371  }else{
372  shift_vec1_bw(&vp[iv], &wp[iv], &buf2[m_Nin*iyzt], Nin2);
373  }
374 }
375  */
376 
377 #pragma omp barrier
378 }
379 
380 
381 //====================================================================
382 template<typename AFIELD>
384 {
385  real_t bc2 = 1.0;
386  if (Communicator::ipe(0) == 0) bc2 = real_t(m_boundary[0]);
387 
388  int ex = 0; // ex loop is outside this method
389 
390  real_t *buf1 = (real_t *)chsend_dn[0].ptr();
391  real_t *buf2 = (real_t *)chrecv_up[0].ptr();
392 
394 
395  int ith, nth, is, ns;
396  set_threadtask(ith, nth, is, ns, m_Nvol);
397 
398 #pragma omp barrier
399 
400  if (do_comm[0] == 1) {
401  for (int site = is; site < ns; ++site) {
402  int ix = site % m_Nx;
403  int iyzt = site / m_Nx;
404  if (ix == 0) {
405  for (int in = 0; in < m_Nin; ++in) {
406  int index = index_alt.idx(in, m_Nin, site, ex);
407  buf1[in + m_Nin * iyzt] = bc2 * wp[index];
408  }
409  }
410  }
411 
412 #pragma omp barrier
413 
414 #pragma omp master
415  {
416  chrecv_up[0].start();
417  chsend_dn[0].start();
418  // communication is not overlapped in x-direction.
419  chrecv_up[0].wait();
420  chsend_dn[0].wait();
421  }
422 
423 #pragma omp barrier
424  }
425 
426  for (int site = is; site < ns; ++site) {
427  int ix = site % m_Nx;
428  int iyzt = site / m_Nx;
429  if ((ix < m_Nx - 1) || (do_comm[0] == 0)) {
430  int ix2 = (ix + 1) % m_Nx;
431  int nei = ix2 + m_Nx * iyzt;
432  real_t bc3 = 1.0;
433  if (ix == m_Nx - 1) bc3 = bc2;
434  for (int in = 0; in < m_Nin; ++in) {
435  int iv = index_alt.idx(in, m_Nin, site, ex);
436  int iw = index_alt.idx(in, m_Nin, nei, ex);
437  vp[iv] = bc3 * wp[iw];
438  }
439  } else {
440  for (int in = 0; in < m_Nin; ++in) {
441  int iv = index_alt.idx(in, m_Nin, site, ex);
442  vp[iv] = buf2[in + m_Nin * iyzt];
443  }
444  }
445  }
446 
447 #pragma omp barrier
448 }
449 
450 
451 //====================================================================
452 template<typename AFIELD>
454 {
455  real_t bc2 = 1.0;
456  if (Communicator::ipe(0) == Communicator::npe(0) - 1) {
457  bc2 = real_t(m_boundary[0]);
458  }
459 
460  int ex = 0; // ex loop is outside this method
461 
462  real_t *buf1 = (real_t *)chsend_up[0].ptr();
463  real_t *buf2 = (real_t *)chrecv_dn[0].ptr();
464 
466 
467  int ith, nth, is, ns;
468  set_threadtask(ith, nth, is, ns, m_Nvol);
469 
470 #pragma omp barrier
471 
472  if (do_comm[0] == 1) {
473  /*
474  for(int site = is; site < ns; ++site){
475  int ix = site % m_Nxv;
476  int iyzt = site/m_Nxv;
477  if(ix == m_Nxv-1){
478  real_t buf[m_Nin];
479  load_vec1(buf, &wp[VLEN*Nin2*site], VLEN2-1, Nin2);
480  scal_th(buf, bc2, m_Nin);
481  copy_th(&buf1[m_Nin*iyzt], buf, m_Nin);
482  }
483  }
484  */
485 
486  for (int site = is; site < ns; ++site) {
487  int ix = site % m_Nx;
488  int iyzt = site / m_Nx;
489  if (ix == m_Nx - 1) {
490  for (int in = 0; in < m_Nin; ++in) {
491  int index = index_alt.idx(in, m_Nin, site, ex);
492  buf1[in + m_Nin * iyzt] = bc2 * wp[index];
493  }
494  }
495  }
496 
497 #pragma omp barrier
498 
499 #pragma omp master
500  {
501  chrecv_dn[0].start();
502  chsend_up[0].start();
503  // communication is not overlapped in x-direction.
504  chrecv_dn[0].wait();
505  chsend_up[0].wait();
506  }
507 
508 #pragma omp barrier
509  }
510 
511  for (int site = is; site < ns; ++site) {
512  int ix = site % m_Nx;
513  int iyzt = site / m_Nx;
514  if ((ix > 0) || (do_comm[0] == 0)) {
515  int ix2 = (ix - 1 + m_Nx) % m_Nx;
516  int nei = ix2 + m_Nx * iyzt;
517  real_t bc3 = 1.0;
518  if (ix == 0) bc3 = bc2;
519  for (int in = 0; in < m_Nin; ++in) {
520  int iv = index_alt.idx(in, m_Nin, site, ex);
521  int iw = index_alt.idx(in, m_Nin, nei, ex);
522  vp[iv] = bc3 * wp[iw];
523  }
524  } else {
525  for (int in = 0; in < m_Nin; ++in) {
526  int index = index_alt.idx(in, m_Nin, site, ex);
527  vp[index] = buf2[in + m_Nin * iyzt];
528  }
529  }
530  }
531 
532  /*
533  for(int site = is; site < ns; ++site){
534  int ix = site % m_Nxv;
535  int iyzt = site/m_Nxv;
536  int iv = VLEN * Nin2 * site;
537  if(ix > 0){
538  int nei = VLEN * Nin2 * (site - 1);
539  shift_vec2_fw(&vp[iv], &wp[iv], &wp[nei], Nin2);
540  }else if(do_comm[0] == 0){
541  int nei = VLEN * Nin2 * (m_Nxv-1 + m_Nxv * iyzt);
542  real_t buf[m_Nin];
543  load_vec1(buf, &wp[nei], VLEN2-1, Nin2);
544  scal_th(buf, bc2, m_Nin);
545  shift_vec1_fw(&vp[iv], &wp[iv], buf, Nin2);
546  }else{
547  shift_vec1_fw(&vp[iv], &wp[iv], &buf2[m_Nin*iyzt], Nin2);
548  }
549  }
550  */
551 
552 #pragma omp barrier
553 }
554 
555 
556 //====================================================================
557 template<typename AFIELD>
559 {
560  real_t bc2 = 1.0;
561  if (Communicator::ipe(0) == Communicator::npe(0) - 1) {
562  bc2 = real_t(m_boundary[0]);
563  }
564 
565  int ex = 0; // ex loop is outside this method
566 
567  real_t *buf1 = (real_t *)chsend_up[0].ptr();
568  real_t *buf2 = (real_t *)chrecv_dn[0].ptr();
569 
571 
572  int ith, nth, is, ns;
573  set_threadtask(ith, nth, is, ns, m_Nvol);
574 
575 #pragma omp barrier
576 
577  if (do_comm[0] == 1) {
578  for (int site = is; site < ns; ++site) {
579  int ix = site % m_Nx;
580  int iyzt = site / m_Nx;
581  if (ix == m_Nx - 1) {
582  for (int in = 0; in < m_Nin; ++in) {
583  int index = index_alt.idx(in, m_Nin, site, ex);
584  buf1[in + m_Nin * iyzt] = bc2 * wp[index];
585  }
586  }
587  }
588 
589 #pragma omp barrier
590 
591 #pragma omp master
592  {
593  chrecv_dn[0].start();
594  chsend_up[0].start();
595  // communication is not overlapped in x-direction.
596  chrecv_dn[0].wait();
597  chsend_up[0].wait();
598  }
599 
600 #pragma omp barrier
601  }
602 
603  for (int site = is; site < ns; ++site) {
604  int ix = site % m_Nx;
605  int iyzt = site / m_Nx;
606  if ((ix > 0) || (do_comm[0] == 0)) {
607  int ix2 = (ix - 1 + m_Nx) % m_Nx;
608  int nei = ix2 + m_Nx * iyzt;
609  real_t bc3 = 1.0;
610  if (ix == 0) bc3 = bc2;
611  for (int in = 0; in < m_Nin; ++in) {
612  int iv = index_alt.idx(in, m_Nin, site, ex);
613  int iw = index_alt.idx(in, m_Nin, nei, ex);
614  vp[iv] = bc3 * wp[iw];
615  }
616  } else {
617  for (int in = 0; in < m_Nin; ++in) {
618  int index = index_alt.idx(in, m_Nin, site, ex);
619  vp[index] = buf2[in + m_Nin * iyzt];
620  }
621  }
622  }
623 
624 #pragma omp barrier
625 }
626 
627 
628 //====================================================================
629 template<typename AFIELD>
631 {
632  real_t bc2 = 1.0;
633  if (Communicator::ipe(1) == 0) bc2 = real_t(m_boundary[1]);
634 
635  int Nin2 = m_Nin / 2;
636 
637  real_t *buf1 = (real_t *)chsend_dn[1].ptr();
638  real_t *buf2 = (real_t *)chrecv_up[1].ptr();
639 
640  int ith, nth, is, ns;
641  set_threadtask(ith, nth, is, ns, m_Nstv);
642 
643 #pragma omp barrier
644 
645  if (do_comm[1] == 1) {
646 #pragma omp master
647  {
648  chrecv_up[1].start();
649  }
650  for (int site = is; site < ns; ++site) {
651  int ix = site % m_Nxv;
652  int iy = (site / m_Nxv) % m_Ny;
653  int izt = site / (m_Nxv * m_Ny);
654  if (iy == 0) {
655  int iv = VLEN * Nin2 * site;
656  int ibf = VLEN * Nin2 * (ix + m_Nxv * izt);
657  Vsimd_t vt[Nin2];
658  load_vec(vt, &wp[iv], Nin2);
659  scal_vec(vt, bc2, Nin2);
660  save_vec(&buf1[ibf], vt, Nin2);
661  }
662  }
663 #pragma omp barrier
664 
665 #pragma omp master
666  {
667  chsend_dn[1].start();
668  }
669  } // if(do_comm[1] == 1)
670 
671  for (int site = is; site < ns; ++site) {
672  int ix = site % m_Nxv;
673  int iy = (site / m_Nxv) % m_Ny;
674  int izt = site / (m_Nxv * m_Ny);
675  int iv = VLEN * Nin2 * site;
676  if ((iy < m_Ny - 1) || (do_comm[1] == 0)) {
677  int iyn = (iy + 1) % m_Ny;
678  int nei = VLEN * Nin2 * (ix + m_Nxv * (iyn + m_Ny * izt));
679  Vsimd_t vt[Nin2];
680  load_vec(vt, &wp[nei], Nin2);
681  if (iy == m_Ny - 1) scal_vec(vt, bc2, Nin2);
682  save_vec(&vp[iv], vt, Nin2);
683  }
684  }
685 
686  if (do_comm[1] == 1) {
687 #pragma omp master
688  {
689  chrecv_up[1].wait();
690  }
691 #pragma omp barrier
692 
693  for (int site = is; site < ns; ++site) {
694  int ix = site % m_Nxv;
695  int iy = (site / m_Nxv) % m_Ny;
696  int izt = site / (m_Nxv * m_Ny);
697  if (iy == m_Ny - 1) {
698  int iv = VLEN * Nin2 * site;
699  int ibf = VLEN * Nin2 * (ix + m_Nxv * izt);
700  Vsimd_t vt[Nin2];
701  load_vec(vt, &buf2[ibf], Nin2);
702  save_vec(&vp[iv], vt, Nin2);
703  }
704  }
705 
706 #pragma omp master
707  {
708  chsend_dn[1].wait();
709  }
710  } // if(do_comm[1] == 1)
711 
712 #pragma omp barrier
713 }
714 
715 
716 //====================================================================
717 template<typename AFIELD>
719 {
720  real_t bc2 = 1.0;
721  if (Communicator::ipe(1) == 0) bc2 = real_t(m_boundary[1]);
722 
723  int ex = 0;
724 
725  real_t *buf1 = (real_t *)chsend_dn[1].ptr();
726  real_t *buf2 = (real_t *)chrecv_up[1].ptr();
727 
729 
730  int ith, nth, is, ns;
731  set_threadtask(ith, nth, is, ns, m_Nvol);
732 
733 #pragma omp barrier
734 
735  if (do_comm[1] == 1) {
736 #pragma omp master
737  {
738  chrecv_up[1].start();
739  }
740 
741  for (int site = is; site < ns; ++site) {
742  int ix = site % m_Nx;
743  int iy = (site / m_Nx) % m_Ny;
744  int izt = site / (m_Nx * m_Ny);
745  if (iy == 0) {
746  for (int in = 0; in < m_Nin; ++in) {
747  int iw = index_alt.idx(in, m_Nin, site, ex);
748  int ixzt = ix + m_Nx * izt;
749  buf1[in + m_Nin * ixzt] = bc2 * wp[iw];
750  }
751  }
752  }
753 
754 #pragma omp barrier
755 
756 #pragma omp master
757  {
758  chsend_dn[1].start();
759  }
760  } // if(do_comm[1] == 1)
761 
762  for (int site = is; site < ns; ++site) {
763  int ix = site % m_Nx;
764  int iy = (site / m_Nx) % m_Ny;
765  int izt = site / (m_Nx * m_Ny);
766  if ((iy < m_Ny - 1) || (do_comm[1] == 0)) {
767  int iy2 = (iy + 1) % m_Ny;
768  int nei = ix + m_Nx * (iy2 + m_Ny * izt);
769  real_t bc3 = 1.0;
770  if (iy == m_Ny - 1) bc3 = bc2;
771  for (int in = 0; in < m_Nin; ++in) {
772  int iv = index_alt.idx(in, m_Nin, site, ex);
773  int iw = index_alt.idx(in, m_Nin, nei, ex);
774  vp[iv] = bc3 * wp[iw];
775  }
776  }
777  }
778 
779  if (do_comm[1] == 1) {
780 #pragma omp master
781  {
782  chrecv_up[1].wait();
783  }
784 
785 #pragma omp barrier
786 
787  for (int site = is; site < ns; ++site) {
788  int ix = site % m_Nx;
789  int iy = (site / m_Nx) % m_Ny;
790  int izt = site / (m_Nx * m_Ny);
791  int ixzt = ix + m_Nx * izt;
792  if (iy == m_Ny - 1) {
793  for (int in = 0; in < m_Nin; ++in) {
794  int iv = index_alt.idx(in, m_Nin, site, ex);
795  vp[iv] = buf2[in + m_Nin * ixzt];
796  }
797  }
798  }
799 #pragma omp master
800  {
801  chsend_dn[1].wait();
802  }
803  } // if(do_comm[1] == 1)
804 
805 #pragma omp barrier
806 }
807 
808 
809 //====================================================================
810 template<typename AFIELD>
812 {
813  real_t bc2 = 1.0;
814  if (Communicator::ipe(1) == 0) bc2 = real_t(m_boundary[1]);
815 
816  int Nin2 = m_Nin / 2;
817 
818  real_t *buf1 = (real_t *)chsend_up[1].ptr();
819  real_t *buf2 = (real_t *)chrecv_dn[1].ptr();
820 
821  int ith, nth, is, ns;
822  set_threadtask(ith, nth, is, ns, m_Nstv);
823 
824 #pragma omp barrier
825 
826  if (do_comm[1] == 1) {
827 #pragma omp master
828  {
829  chrecv_dn[1].start();
830  }
831  for (int site = is; site < ns; ++site) {
832  int ix = site % m_Nxv;
833  int iy = (site / m_Nxv) % m_Ny;
834  int izt = site / (m_Nxv * m_Ny);
835  if (iy == m_Ny - 1) {
836  int iv = VLEN * Nin2 * site;
837  int ibf = VLEN * Nin2 * (ix + m_Nxv * izt);
838  Vsimd_t vt[Nin2];
839  load_vec(vt, &wp[iv], Nin2);
840  save_vec(&buf1[ibf], vt, Nin2);
841  }
842  }
843 
844 #pragma omp barrier
845 
846 #pragma omp master
847  {
848  chsend_up[1].start();
849  }
850  } // if(do_comm[1] == 1)
851 
852  for (int site = is; site < ns; ++site) {
853  int ix = site % m_Nxv;
854  int iy = (site / m_Nxv) % m_Ny;
855  int izt = site / (m_Nxv * m_Ny);
856  int iv = VLEN * Nin2 * site;
857  if ((iy > 0) || (do_comm[1] == 0)) {
858  int iyn = (iy - 1 + m_Ny) % m_Ny;
859  int nei = VLEN * Nin2 * (ix + m_Nxv * (iyn + m_Ny * izt));
860  Vsimd_t vt[Nin2];
861  load_vec(vt, &wp[nei], Nin2);
862  if (iy == 0) scal_vec(vt, bc2, Nin2);
863  save_vec(&vp[iv], vt, Nin2);
864  }
865  }
866 
867  if (do_comm[1] == 1) {
868 #pragma omp master
869  {
870  chrecv_dn[1].wait();
871  }
872 
873 #pragma omp barrier
874 
875  for (int site = is; site < ns; ++site) {
876  int ix = site % m_Nxv;
877  int iy = (site / m_Nxv) % m_Ny;
878  int izt = site / (m_Nxv * m_Ny);
879  if (iy == 0) {
880  int iv = VLEN * Nin2 * site;
881  int ibf = VLEN * Nin2 * (ix + m_Nxv * izt);
882  Vsimd_t vt[Nin2];
883  load_vec(vt, &buf2[ibf], Nin2);
884  scal_vec(vt, bc2, Nin2);
885  save_vec(&vp[iv], vt, Nin2);
886  }
887  }
888 
889 #pragma omp master
890  {
891  chsend_up[1].wait();
892  }
893  } // if(do_comm[1] == 1)
894 
895 #pragma omp barrier
896 }
897 
898 
899 //====================================================================
900 template<typename AFIELD>
902 {
903  real_t bc2 = 1.0;
904  if (Communicator::ipe(1) == 0) bc2 = real_t(m_boundary[1]);
905 
906  int Nin2 = m_Nin / 2;
907 
908  int ex = 0;
909 
910  real_t *buf1 = (real_t *)chsend_up[1].ptr();
911  real_t *buf2 = (real_t *)chrecv_dn[1].ptr();
912 
914 
915  int ith, nth, is, ns;
916  set_threadtask(ith, nth, is, ns, m_Nvol);
917 
918 #pragma omp barrier
919 
920  if (do_comm[1] == 1) {
921 #pragma omp master
922  {
923  chrecv_dn[1].start();
924  }
925 
926  for (int site = is; site < ns; ++site) {
927  int ix = site % m_Nx;
928  int iy = (site / m_Nx) % m_Ny;
929  int izt = site / (m_Nx * m_Ny);
930  if (iy == m_Ny - 1) {
931  for (int in = 0; in < m_Nin; ++in) {
932  int iw = index_alt.idx(in, m_Nin, site, ex);
933  int ixzt = ix + m_Nx * izt;
934  buf1[in + m_Nin * ixzt] = wp[iw];
935  }
936  }
937  }
938 
939 #pragma omp barrier
940 
941 #pragma omp master
942  {
943  chsend_up[1].start();
944  }
945  } // if(do_comm[1] == 1)
946 
947  for (int site = is; site < ns; ++site) {
948  int ix = site % m_Nx;
949  int iy = (site / m_Nx) % m_Ny;
950  int izt = site / (m_Nx * m_Ny);
951  if ((iy > 0) || (do_comm[1] == 0)) {
952  int iy2 = (iy - 1 + m_Ny) % m_Ny;
953  int nei = ix + m_Nx * (iy2 + m_Ny * izt);
954  real_t bc3 = 1.0;
955  if (iy == 0) bc3 = bc2;
956  for (int in = 0; in < m_Nin; ++in) {
957  int iv = index_alt.idx(in, m_Nin, site, ex);
958  int iw = index_alt.idx(in, m_Nin, nei, ex);
959  vp[iv] = bc3 * wp[iw];
960  }
961  }
962  }
963 
964  if (do_comm[1] == 1) {
965 #pragma omp master
966  {
967  chsend_up[1].wait();
968  chrecv_dn[1].wait();
969  }
970 
971 #pragma omp barrier
972 
973  for (int site = is; site < ns; ++site) {
974  int ix = site % m_Nx;
975  int iy = (site / m_Nx) % m_Ny;
976  int izt = site / (m_Nx * m_Ny);
977  int ixzt = ix + m_Nx * izt;
978  if (iy == 0) {
979  for (int in = 0; in < m_Nin; ++in) {
980  int iv = index_alt.idx(in, m_Nin, site, ex);
981  vp[iv] = bc2 * buf2[in + m_Nin * ixzt];
982  }
983  }
984  }
985 
986 #pragma omp master
987  {
988  chsend_up[1].wait();
989  }
990  } // if(do_comm[1] == 1)
991 
992 #pragma omp barrier
993 }
994 
995 
996 //====================================================================
997 template<typename AFIELD>
999 {
1000  real_t bc2 = 1.0;
1001  if (Communicator::ipe(2) == 0) bc2 = real_t(m_boundary[2]);
1002 
1003  int Nin2 = m_Nin / 2;
1004 
1005  real_t *buf1 = (real_t *)chsend_dn[2].ptr();
1006  real_t *buf2 = (real_t *)chrecv_up[2].ptr();
1007 
1008  int ith, nth, is, ns;
1009  set_threadtask(ith, nth, is, ns, m_Nstv);
1010 
1011  int Nxy = m_Nxv * m_Ny;
1012 #pragma omp barrier
1013 
1014  if (do_comm[2] == 1) {
1015 #pragma omp master
1016  {
1017  chrecv_up[2].start();
1018  }
1019 
1020  for (int site = is; site < ns; ++site) {
1021  int ixy = site % Nxy;
1022  int iz = (site / Nxy) % m_Nz;
1023  int it = site / (Nxy * m_Nz);
1024  if (iz == 0) {
1025  int iv = VLEN * Nin2 * site;
1026  int ibf = VLEN * Nin2 * (ixy + Nxy * it);
1027  Vsimd_t vt[Nin2];
1028  load_vec(vt, &wp[iv], Nin2);
1029  scal_vec(vt, bc2, Nin2);
1030  save_vec(&buf1[ibf], vt, Nin2);
1031  }
1032  }
1033 
1034 #pragma omp barrier
1035 
1036 #pragma omp master
1037  {
1038  chsend_dn[2].start();
1039  }
1040  } // if(do_comm[2] == 1)
1041 
1042  for (int site = is; site < ns; ++site) {
1043  int ixy = site % Nxy;
1044  int iz = (site / Nxy) % m_Nz;
1045  int it = site / (Nxy * m_Nz);
1046  int iv = VLEN * Nin2 * site;
1047  if ((iz < m_Nz - 1) || (do_comm[2] == 0)) {
1048  int izn = (iz + 1) % m_Nz;
1049  int nei = VLEN * Nin2 * (ixy + Nxy * (izn + m_Nz * it));
1050  Vsimd_t vt[Nin2];
1051  load_vec(vt, &wp[nei], Nin2);
1052  if (iz == m_Nz - 1) scal_vec(vt, bc2, Nin2);
1053  save_vec(&vp[iv], vt, Nin2);
1054  }
1055  }
1056 
1057  if (do_comm[2] == 1) {
1058 #pragma omp master
1059  {
1060  chrecv_up[2].wait();
1061  }
1062 
1063 #pragma omp barrier
1064 
1065  for (int site = is; site < ns; ++site) {
1066  int ixy = site % Nxy;
1067  int iz = (site / Nxy) % m_Nz;
1068  int it = site / (Nxy * m_Nz);
1069  if (iz == m_Nz - 1) {
1070  int iv = VLEN * Nin2 * site;
1071  int ibf = VLEN * Nin2 * (ixy + Nxy * it);
1072  Vsimd_t vt[Nin2];
1073  load_vec(vt, &buf2[ibf], Nin2);
1074  save_vec(&vp[iv], vt, Nin2);
1075  }
1076  }
1077 #pragma omp master
1078  {
1079  chsend_dn[2].wait();
1080  }
1081  } // if(do_comm[2] == 1)
1082 
1083 #pragma omp barrier
1084 }
1085 
1086 
1087 //====================================================================
1088 template<typename AFIELD>
1090 {
1091  real_t bc2 = 1.0;
1092  if (Communicator::ipe(2) == 0) bc2 = real_t(m_boundary[2]);
1093 
1094  int ex = 0;
1095 
1096  real_t *buf1 = (real_t *)chsend_dn[2].ptr();
1097  real_t *buf2 = (real_t *)chrecv_up[2].ptr();
1098 
1100 
1101  int ith, nth, is, ns;
1102  set_threadtask(ith, nth, is, ns, m_Nvol);
1103 
1104  int Nxy = m_Nx * m_Ny;
1105 
1106 #pragma omp barrier
1107 
1108  if (do_comm[2] == 1) {
1109 #pragma omp master
1110  {
1111  chrecv_up[2].start();
1112  }
1113 
1114  for (int site = is; site < ns; ++site) {
1115  int ixy = site % Nxy;
1116  int iz = (site / Nxy) % m_Nz;
1117  int it = site / (Nxy * m_Nz);
1118  if (iz == 0) {
1119  for (int in = 0; in < m_Nin; ++in) {
1120  int iw = index_alt.idx(in, m_Nin, site, ex);
1121  int ixyt = ixy + Nxy * it;
1122  buf1[in + m_Nin * ixyt] = bc2 * wp[iw];
1123  }
1124  }
1125  }
1126 
1127 #pragma omp barrier
1128 
1129 #pragma omp master
1130  {
1131  chsend_dn[2].start();
1132  }
1133  } // if(do_comm[2] == 1)
1134 
1135  for (int site = is; site < ns; ++site) {
1136  int ixy = site % Nxy;
1137  int iz = (site / Nxy) % m_Nz;
1138  int it = site / (Nxy * m_Nz);
1139  if ((iz < m_Nz - 1) || (do_comm[2] == 0)) {
1140  int iz2 = (iz + 1) % m_Nz;
1141  int nei = ixy + Nxy * (iz2 + m_Nz * it);
1142  real_t bc3 = 1.0;
1143  if (iz == m_Nz - 1) bc3 = bc2;
1144  for (int in = 0; in < m_Nin; ++in) {
1145  int iv = index_alt.idx(in, m_Nin, site, ex);
1146  int iw = index_alt.idx(in, m_Nin, nei, ex);
1147  vp[iv] = bc3 * wp[iw];
1148  }
1149  }
1150  }
1151 
1152  if (do_comm[2] == 1) {
1153 #pragma omp master
1154  {
1155  chrecv_up[2].wait();
1156  }
1157 
1158 #pragma omp barrier
1159 
1160  for (int site = is; site < ns; ++site) {
1161  int ixy = site % Nxy;
1162  int iz = (site / Nxy) % m_Nz;
1163  int it = site / (Nxy * m_Nz);
1164  if (iz == m_Nz - 1) {
1165  for (int in = 0; in < m_Nin; ++in) {
1166  int iv = index_alt.idx(in, m_Nin, site, ex);
1167  int ixyt = ixy + Nxy * it;
1168  vp[iv] = buf2[in + m_Nin * ixyt];
1169  }
1170  }
1171  }
1172 #pragma omp master
1173  {
1174  chsend_dn[2].wait();
1175  }
1176  } // if(do_comm[2] == 1)
1177 
1178 #pragma omp barrier
1179 }
1180 
1181 
1182 //====================================================================
1183 template<typename AFIELD>
1185 {
1186  real_t bc2 = 1.0;
1187  if (Communicator::ipe(2) == 0) bc2 = real_t(m_boundary[2]);
1188 
1189  int Nin2 = m_Nin / 2;
1190 
1191  real_t *buf1 = (real_t *)chsend_up[2].ptr();
1192  real_t *buf2 = (real_t *)chrecv_dn[2].ptr();
1193 
1194  int ith, nth, is, ns;
1195  set_threadtask(ith, nth, is, ns, m_Nstv);
1196 
1197  int Nxy = m_Nxv * m_Ny;
1198 #pragma omp barrier
1199 
1200  if (do_comm[2] == 1) {
1201 #pragma omp master
1202  {
1203  chrecv_dn[2].start();
1204  }
1205  for (int site = is; site < ns; ++site) {
1206  int ixy = site % Nxy;
1207  int iz = (site / Nxy) % m_Nz;
1208  int it = site / (Nxy * m_Nz);
1209  if (iz == m_Nz - 1) {
1210  int iv = VLEN * Nin2 * site;
1211  int ibf = VLEN * Nin2 * (ixy + Nxy * it);
1212  Vsimd_t vt[Nin2];
1213  load_vec(vt, &wp[iv], Nin2);
1214  save_vec(&buf1[ibf], vt, Nin2);
1215  }
1216  }
1217 
1218 #pragma omp barrier
1219 
1220 #pragma omp master
1221  {
1222  chsend_up[2].start();
1223  }
1224  } // if(do_comm[2] == 1)
1225 
1226  for (int site = is; site < ns; ++site) {
1227  int ixy = site % Nxy;
1228  int iz = (site / Nxy) % m_Nz;
1229  int it = site / (Nxy * m_Nz);
1230  int iv = VLEN * Nin2 * site;
1231  if ((iz > 0) || (do_comm[2] == 0)) {
1232  int izn = (iz - 1 + m_Nz) % m_Nz;
1233  int nei = VLEN * Nin2 * (ixy + Nxy * (izn + m_Nz * it));
1234  Vsimd_t vt[Nin2];
1235  load_vec(vt, &wp[nei], Nin2);
1236  if (iz == 0) scal_vec(vt, bc2, Nin2);
1237  save_vec(&vp[iv], vt, Nin2);
1238  }
1239  }
1240 
1241  if (do_comm[2] == 1) {
1242 #pragma omp master
1243  {
1244  chrecv_dn[2].wait();
1245  }
1246 
1247 #pragma omp barrier
1248 
1249  for (int site = is; site < ns; ++site) {
1250  int ixy = site % Nxy;
1251  int iz = (site / Nxy) % m_Nz;
1252  int it = site / (Nxy * m_Nz);
1253  if (iz == 0) {
1254  int iv = VLEN * Nin2 * site;
1255  int ibf = VLEN * Nin2 * (ixy + Nxy * it);
1256  Vsimd_t vt[Nin2];
1257  load_vec(vt, &buf2[ibf], Nin2);
1258  scal_vec(vt, bc2, Nin2);
1259  save_vec(&vp[iv], vt, Nin2);
1260  }
1261  }
1262 #pragma omp master
1263  {
1264  chsend_up[2].wait();
1265  }
1266  } // if(do_comm[2] == 1)
1267 
1268 #pragma omp barrier
1269 }
1270 
1271 
1272 //====================================================================
1273 template<typename AFIELD>
1275 {
1276  real_t bc2 = 1.0;
1277  if (Communicator::ipe(2) == 0) bc2 = real_t(m_boundary[2]);
1278 
1279 
1280 
1281  int ex = 0;
1282 
1283  real_t *buf1 = (real_t *)chsend_up[2].ptr();
1284  real_t *buf2 = (real_t *)chrecv_dn[2].ptr();
1285 
1287 
1288  int ith, nth, is, ns;
1289  set_threadtask(ith, nth, is, ns, m_Nvol);
1290 
1291  int Nxy = m_Nx * m_Ny;
1292 
1293 #pragma omp barrier
1294 
1295  if (do_comm[2] == 1) {
1296 #pragma omp master
1297  {
1298  chrecv_dn[2].start();
1299  }
1300 
1301  for (int site = is; site < ns; ++site) {
1302  int ixy = site % Nxy;
1303  int iz = (site / Nxy) % m_Nz;
1304  int it = site / (Nxy * m_Nz);
1305  if (iz == m_Nz - 1) {
1306  for (int in = 0; in < m_Nin; ++in) {
1307  int iw = index_alt.idx(in, m_Nin, site, ex);
1308  int ixyt = ixy + Nxy * it;
1309  buf1[in + m_Nin * ixyt] = wp[iw];
1310  }
1311  }
1312  }
1313 
1314 #pragma omp barrier
1315 
1316 #pragma omp master
1317  {
1318  chsend_up[2].start();
1319  }
1320  } // if(do_comm[2] == 1)
1321 
1322  for (int site = is; site < ns; ++site) {
1323  int ixy = site % Nxy;
1324  int iz = (site / Nxy) % m_Nz;
1325  int it = site / (Nxy * m_Nz);
1326  if ((iz > 0) || (do_comm[2] == 0)) {
1327  int iz2 = (iz - 1 + m_Nz) % m_Nz;
1328  int nei = ixy + Nxy * (iz2 + m_Nz * it);
1329  real_t bc3 = 1.0;
1330  if (iz == 0) bc3 = bc2;
1331  for (int in = 0; in < m_Nin; ++in) {
1332  int iv = index_alt.idx(in, m_Nin, site, ex);
1333  int iw = index_alt.idx(in, m_Nin, nei, ex);
1334  vp[iv] = bc3 * wp[iw];
1335  }
1336  }
1337  }
1338 
1339  if (do_comm[2] == 1) {
1340 #pragma omp master
1341  {
1342  chrecv_dn[2].wait();
1343  }
1344 
1345 #pragma omp barrier
1346 
1347  for (int site = is; site < ns; ++site) {
1348  int ixy = site % Nxy;
1349  int iz = (site / Nxy) % m_Nz;
1350  int it = site / (Nxy * m_Nz);
1351  if (iz == 0) {
1352  for (int in = 0; in < m_Nin; ++in) {
1353  int iv = index_alt.idx(in, m_Nin, site, ex);
1354  int ixyt = ixy + Nxy * it;
1355  vp[iv] = bc2 * buf2[in + m_Nin * ixyt];
1356  }
1357  }
1358  }
1359 #pragma omp master
1360  {
1361  chsend_up[2].wait();
1362  }
1363  } // if(do_comm[2] == 1)
1364 
1365 #pragma omp barrier
1366 }
1367 
1368 
1369 //====================================================================
1370 template<typename AFIELD>
1372 {
1373  real_t bc2 = 1.0;
1374  if (Communicator::ipe(3) == 0) bc2 = real_t(m_boundary[3]);
1375 
1376  int Nin2 = m_Nin / 2;
1377 
1378  real_t *buf1 = (real_t *)chsend_dn[3].ptr();
1379  real_t *buf2 = (real_t *)chrecv_up[3].ptr();
1380 
1381  int ith, nth, is, ns;
1382  set_threadtask(ith, nth, is, ns, m_Nstv);
1383 
1384  int Nxyz = m_Nxv * m_Ny * m_Nz;
1385 
1386 #pragma omp barrier
1387 
1388  if (do_comm[3] == 1) {
1389 #pragma omp master
1390  {
1391  chrecv_up[3].start();
1392  }
1393 
1394  for (int site = is; site < ns; ++site) {
1395  int ixyz = site % Nxyz;
1396  int it = site / Nxyz;
1397  if (it == 0) {
1398  int iv = VLEN * Nin2 * site;
1399  int ibf = VLEN * Nin2 * ixyz;
1400  Vsimd_t vt[Nin2];
1401  load_vec(vt, &wp[iv], Nin2);
1402  scal_vec(vt, bc2, Nin2);
1403  save_vec(&buf1[ibf], vt, Nin2);
1404  }
1405  }
1406 
1407 #pragma omp barrier
1408 
1409 #pragma omp master
1410  {
1411  chsend_dn[3].start();
1412  }
1413  } // if(do_comm[3] == 1)
1414 
1415  for (int site = is; site < ns; ++site) {
1416  int ixyz = site % Nxyz;
1417  int it = site / Nxyz;
1418  int iv = VLEN * Nin2 * site;
1419  if ((it < m_Nt - 1) || (do_comm[3] == 0)) {
1420  int itn = (it + 1) % m_Nt;
1421  int nei = VLEN * Nin2 * (ixyz + Nxyz * itn);
1422  Vsimd_t vt[Nin2];
1423  load_vec(vt, &wp[nei], Nin2);
1424  if (it == m_Nt - 1) scal_vec(vt, bc2, Nin2);
1425  save_vec(&vp[iv], vt, Nin2);
1426  }
1427  }
1428 
1429  if (do_comm[3] == 1) {
1430 #pragma omp master
1431  {
1432  chrecv_up[3].wait();
1433  }
1434 
1435 #pragma omp barrier
1436 
1437  for (int site = is; site < ns; ++site) {
1438  int ixyz = site % Nxyz;
1439  int it = site / Nxyz;
1440  if (it == m_Nt - 1) {
1441  int iv = VLEN * Nin2 * site;
1442  int ibf = VLEN * Nin2 * ixyz;
1443  Vsimd_t vt[Nin2];
1444  load_vec(vt, &buf2[ibf], Nin2);
1445  save_vec(&vp[iv], vt, Nin2);
1446  }
1447  }
1448 #pragma omp master
1449  {
1450  chsend_dn[3].wait();
1451  }
1452  } // if(do_comm[3] == 1)
1453 
1454 #pragma omp barrier
1455 }
1456 
1457 
1458 //====================================================================
1459 template<typename AFIELD>
1461 {
1462  real_t bc2 = 1.0;
1463  if (Communicator::ipe(3) == 0) bc2 = real_t(m_boundary[3]);
1464 
1465  int ex = 0;
1466 
1467  real_t *buf1 = (real_t *)chsend_dn[3].ptr();
1468  real_t *buf2 = (real_t *)chrecv_up[3].ptr();
1469 
1471 
1472  int ith, nth, is, ns;
1473  set_threadtask(ith, nth, is, ns, m_Nvol);
1474 
1475  int Nxyz = m_Nx * m_Ny * m_Nz;
1476 
1477 #pragma omp barrier
1478 
1479  if (do_comm[3] == 1) {
1480 #pragma omp master
1481  {
1482  chrecv_up[3].start();
1483  }
1484  for (int site = is; site < ns; ++site) {
1485  int ixyz = site % Nxyz;
1486  int it = site / Nxyz;
1487  if (it == 0) {
1488  for (int in = 0; in < m_Nin; ++in) {
1489  int iw = index_alt.idx(in, m_Nin, site, ex);
1490  buf1[in + m_Nin * ixyz] = bc2 * wp[iw];
1491  }
1492  }
1493  }
1494 
1495 #pragma omp barrier
1496 
1497 #pragma omp master
1498  {
1499  chsend_dn[3].start();
1500  }
1501  } // if(do_comm[3] == 1)
1502 
1503 
1504  for (int site = is; site < ns; ++site) {
1505  int ixyz = site % Nxyz;
1506  int it = site / Nxyz;
1507  if ((it < m_Nt - 1) || (do_comm[3] == 0)) {
1508  int it2 = (it + 1) % m_Nt;
1509  int nei = ixyz + Nxyz * it2;
1510  real_t bc3 = 1.0;
1511  if (it == m_Nt - 1) bc3 = bc2;
1512  for (int in = 0; in < m_Nin; ++in) {
1513  int iv = index_alt.idx(in, m_Nin, site, ex);
1514  int iw = index_alt.idx(in, m_Nin, nei, ex);
1515  vp[iv] = bc3 * wp[iw];
1516  }
1517  }
1518  }
1519 
1520  if (do_comm[3] == 1) {
1521 #pragma omp master
1522  {
1523  chrecv_up[3].wait();
1524  }
1525 
1526 #pragma omp barrier
1527 
1528  for (int site = is; site < ns; ++site) {
1529  int ixyz = site % Nxyz;
1530  int it = site / Nxyz;
1531  if (it == m_Nt - 1) {
1532  for (int in = 0; in < m_Nin; ++in) {
1533  int iv = index_alt.idx(in, m_Nin, site, ex);
1534  vp[iv] = buf2[in + m_Nin * ixyz];
1535  }
1536  }
1537  }
1538 
1539 #pragma omp master
1540  {
1541  chsend_dn[3].wait();
1542  }
1543  } // if(do_comm[3] == 1)
1544 
1545 #pragma omp barrier
1546 }
1547 
1548 
1549 //====================================================================
1550 template<typename AFIELD>
1552 {
1553  real_t bc2 = 1.0;
1554  if (Communicator::ipe(3) == 0) bc2 = real_t(m_boundary[3]);
1555 
1556  int Nin2 = m_Nin / 2;
1557 
1558  real_t *buf1 = (real_t *)chsend_up[3].ptr();
1559  real_t *buf2 = (real_t *)chrecv_dn[3].ptr();
1560 
1561  int ith, nth, is, ns;
1562  set_threadtask(ith, nth, is, ns, m_Nstv);
1563 
1564  int Nxyz = m_Nxv * m_Ny * m_Nz;
1565 #pragma omp barrier
1566 
1567  if (do_comm[3] == 1) {
1568 #pragma omp master
1569  {
1570  chrecv_dn[3].start();
1571  }
1572  for (int site = is; site < ns; ++site) {
1573  int ixyz = site % Nxyz;
1574  int it = site / Nxyz;
1575  if (it == m_Nt - 1) {
1576  int iv = VLEN * Nin2 * site;
1577  int ibf = VLEN * Nin2 * ixyz;
1578  Vsimd_t vt[Nin2];
1579  load_vec(vt, &wp[iv], Nin2);
1580  save_vec(&buf1[ibf], vt, Nin2);
1581  }
1582  }
1583 
1584 #pragma omp barrier
1585 
1586 #pragma omp master
1587  {
1588  chsend_up[3].start();
1589  }
1590  } // if(do_comm[3] == 1)
1591 
1592  for (int site = is; site < ns; ++site) {
1593  int ixyz = site % Nxyz;
1594  int it = site / Nxyz;
1595  int iv = VLEN * Nin2 * site;
1596  if ((it > 0) || (do_comm[3] == 0)) {
1597  int itn = (it - 1 + m_Nt) % m_Nt;
1598  int nei = VLEN * Nin2 * (ixyz + Nxyz * itn);
1599  Vsimd_t vt[Nin2];
1600  load_vec(vt, &wp[nei], Nin2);
1601  if (it == 0) scal_vec(vt, bc2, Nin2);
1602  save_vec(&vp[iv], vt, Nin2);
1603  }
1604  }
1605 
1606  if (do_comm[3] == 1) {
1607 #pragma omp master
1608  {
1609  chrecv_dn[3].wait();
1610  }
1611 
1612 #pragma omp barrier
1613 
1614  for (int site = is; site < ns; ++site) {
1615  int ixyz = site % Nxyz;
1616  int it = site / Nxyz;
1617  if (it == 0) {
1618  int iv = VLEN * Nin2 * site;
1619  int ibf = VLEN * Nin2 * ixyz;
1620  Vsimd_t vt[Nin2];
1621  load_vec(vt, &buf2[ibf], Nin2);
1622  scal_vec(vt, bc2, Nin2);
1623  save_vec(&vp[iv], vt, Nin2);
1624  }
1625  }
1626 #pragma omp master
1627  {
1628  chsend_up[3].wait();
1629  }
1630  } // if(do_comm[3] == 1)
1631 
1632 #pragma omp barrier
1633 }
1634 
1635 
1636 //====================================================================
1637 template<typename AFIELD>
1639 {
1640  real_t bc2 = 1.0;
1641  if (Communicator::ipe(3) == 0) bc2 = real_t(m_boundary[3]);
1642 
1643  int ex = 0;
1644 
1645  real_t *buf1 = (real_t *)chsend_up[3].ptr();
1646  real_t *buf2 = (real_t *)chrecv_dn[3].ptr();
1647 
1649 
1650  int ith, nth, is, ns;
1651  set_threadtask(ith, nth, is, ns, m_Nvol);
1652 
1653  int Nxyz = m_Nx * m_Ny * m_Nz;
1654 
1655 #pragma omp barrier
1656 
1657  if (do_comm[3] == 1) {
1658 #pragma omp master
1659  {
1660  chrecv_dn[3].start();
1661  }
1662 
1663  for (int site = is; site < ns; ++site) {
1664  int ixyz = site % Nxyz;
1665  int it = site / Nxyz;
1666  if (it == m_Nt - 1) {
1667  for (int in = 0; in < m_Nin; ++in) {
1668  int iw = index_alt.idx(in, m_Nin, site, ex);
1669  buf1[in + m_Nin * ixyz] = wp[iw];
1670  }
1671  }
1672  }
1673 
1674 #pragma omp barrier
1675 
1676 #pragma omp master
1677  {
1678  chsend_up[3].start();
1679  }
1680  } // if(do_comm[3] == 1)
1681 
1682  for (int site = is; site < ns; ++site) {
1683  int ixyz = site % Nxyz;
1684  int it = site / Nxyz;
1685  if ((it > 0) || (do_comm[3] == 0)) {
1686  int it2 = (it - 1 + m_Nt) % m_Nt;
1687  int nei = ixyz + Nxyz * it2;
1688  real_t bc3 = 1.0;
1689  if (it == 0) bc3 = bc2;
1690  for (int in = 0; in < m_Nin; ++in) {
1691  int iv = index_alt.idx(in, m_Nin, site, ex);
1692  int iw = index_alt.idx(in, m_Nin, nei, ex);
1693  vp[iv] = bc3 * wp[iw];
1694  }
1695  }
1696  }
1697 
1698  if (do_comm[3] == 1) {
1699 #pragma omp master
1700  {
1701  chrecv_dn[3].wait();
1702  }
1703 
1704 #pragma omp barrier
1705 
1706  for (int site = is; site < ns; ++site) {
1707  int ixyz = site % Nxyz;
1708  int it = site / Nxyz;
1709  if (it == 0) {
1710  for (int in = 0; in < m_Nin; ++in) {
1711  int iv = index_alt.idx(in, m_Nin, site, ex);
1712  vp[iv] = bc2 * buf2[in + m_Nin * ixyz];
1713  }
1714  }
1715  }
1716 #pragma omp master
1717  {
1718  chsend_up[3].wait();
1719  }
1720  } // if(do_comm[3] == 1)
1721 
1722 #pragma omp barrier
1723 }
1724 
1725 
1726 //============================================================END=====
ShiftAField_lex::setup_channels
void setup_channels()
setup channels for communication.
Definition: shiftAField_lex-tmpl.h:99
CommonParameters::Ny
static int Ny()
Definition: commonParameters.h:106
CommonParameters::Nz
static int Nz()
Definition: commonParameters.h:107
ShiftAField_lex::tidyup
void tidyup()
Definition: shiftAField_lex-tmpl.h:92
ShiftAField_lex::up_z_nv
void up_z_nv(real_t *, real_t *)
Definition: shiftAField_lex-tmpl.h:1089
ShiftAField_lex::backward
void backward(AFIELD &, const AFIELD &, const int mu)
Definition: shiftAField_lex-tmpl.h:132
CommonParameters::Ndim
static int Ndim()
Definition: commonParameters.h:117
AIndex_lex
Definition: aindex_lex_base.h:17
ShiftAField_lex::dn_z
void dn_z(real_t *, real_t *)
Definition: shiftAField_lex-tmpl.h:1184
ShiftAField_lex::up_y
void up_y(real_t *, real_t *)
Definition: shiftAField_lex-tmpl.h:630
VLEN
#define VLEN
Definition: bridgeQXS_Clover_coarse_double.cpp:12
Field::nex
int nex() const
Definition: field.h:128
Field::check_size
bool check_size(const int nin, const int nvol, const int nex) const
checking size parameters. [23 May 2016 H.Matsufuru]
Definition: field.h:135
Vsimd_t
Definition: vsimd_double-inc.h:13
ShiftAField_lex< Field >::real_t
Field ::real_t real_t
Definition: shiftAField_lex.h:35
ShiftAField_lex
Methods to shift a field in the lexical site index.
Definition: shiftAField_lex.h:33
real_t
double real_t
Definition: bridgeQXS_Clover_coarse_double.cpp:16
ShiftAField_lex::init
void init(int Nin)
Definition: shiftAField_lex-tmpl.h:16
CommonParameters::Nx
static int Nx()
Definition: commonParameters.h:105
ShiftAField_lex::up_y_nv
void up_y_nv(real_t *, real_t *)
Definition: shiftAField_lex-tmpl.h:718
ShiftAField_lex::dn_t
void dn_t(real_t *, real_t *)
Definition: shiftAField_lex-tmpl.h:1551
CommonParameters::Nt
static int Nt()
Definition: commonParameters.h:108
Communicator::npe
static int npe(const int dir)
logical grid extent
Definition: communicator.cpp:112
ShiftAField_lex::dn_x
void dn_x(real_t *, real_t *)
Definition: shiftAField_lex-tmpl.h:453
ShiftAField_lex::up_t
void up_t(real_t *, real_t *)
Definition: shiftAField_lex-tmpl.h:1371
ShiftAField_lex::up_x_nv
void up_x_nv(real_t *, real_t *)
Definition: shiftAField_lex-tmpl.h:383
ShiftAField_lex::dn_x_nv
void dn_x_nv(real_t *, real_t *)
Definition: shiftAField_lex-tmpl.h:558
ShiftAField_lex::up_t_nv
void up_t_nv(real_t *, real_t *)
Definition: shiftAField_lex-tmpl.h:1460
VLENY
#define VLENY
Definition: bridgeQXS_Clover_coarse_double.cpp:14
Field::ptr
const double * ptr(const int jin, const int site, const int jex) const
Definition: field.h:153
CommonParameters::Vlevel
static Bridge::VerboseLevel Vlevel()
Definition: commonParameters.h:122
ShiftAField_lex::dn_y
void dn_y(real_t *, real_t *)
Definition: shiftAField_lex-tmpl.h:811
VLENX
#define VLENX
Definition: bridgeQXS_Clover_coarse_double.cpp:13
ShiftAField_lex::dn_z_nv
void dn_z_nv(real_t *, real_t *)
Definition: shiftAField_lex-tmpl.h:1274
Communicator::ipe
static int ipe(const int dir)
logical coordinate of current proc.
Definition: communicator.cpp:105
ShiftAField_lex::up_x
void up_x(real_t *, real_t *)
Definition: shiftAField_lex-tmpl.h:280
ShiftAField_lex::up_z
void up_z(real_t *, real_t *)
Definition: shiftAField_lex-tmpl.h:998
ShiftAField_lex::forward
void forward(AFIELD &, const AFIELD &, const int mu)
Definition: shiftAField_lex-tmpl.h:206
Bridge::BridgeIO::crucial
void crucial(const char *format,...)
Definition: bridgeIO.cpp:180
Field
Container of Field-type object.
Definition: field.h:46
ShiftAField_lex::dn_t_nv
void dn_t_nv(real_t *, real_t *)
Definition: shiftAField_lex-tmpl.h:1638
Bridge::BridgeIO::general
void general(const char *format,...)
Definition: bridgeIO.cpp:200
Bridge::vout
BridgeIO vout
Definition: bridgeIO.cpp:512
ShiftAField_lex::dn_y_nv
void dn_y_nv(real_t *, real_t *)
Definition: shiftAField_lex-tmpl.h:901