Bridge++  Ver. 1.2.x
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
fopr_Wilson_impl_thread.cpp
Go to the documentation of this file.
1 
14 #include <valarray>
15 using std::valarray;
16 #include <string>
17 using std::string;
18 
19 #include "fopr_Wilson_impl.h"
20 
21 #include "gammaMatrixSet_Dirac.h"
22 #include "gammaMatrixSet_Chiral.h"
23 
24 #include "bridgeIO.h"
25 using Bridge::vout;
26 
27 #include "threadManager_OpenMP.h"
28 
29 
30 #if defined USE_GROUP_SU3
31 #include "fopr_Wilson_impl_SU3.inc"
32 #elif defined USE_GROUP_SU2
33 #include "fopr_Wilson_impl_SU2.inc"
34 #elif defined USE_GROUP_SU_N
35 #include "fopr_Wilson_impl_SU_N.inc"
36 #endif
37 
38 //====================================================================
40 {
42 
43  // The following setup corresponds to unifirm division of volume.
44  if (m_Nthread <= m_Nt) {
46  } else if (m_Nthread <= m_Nz * m_Nt) {
47  m_Ntask_t = m_Nt;
48  } else {
49  vout.general(m_vl, " Too large Nthread: %d\n", m_Nthread);
50  abort();
51  }
53  if (m_Ntask_z * m_Ntask_t != m_Nthread) {
54  vout.general(m_vl, " Nz(%d) and Nt(%d) do not mach Nthread: %d\n",
55  m_Nz, m_Nt, m_Nthread);
56  abort();
57  }
59  m_Mz = m_Nz / m_Ntask_z;
60  m_Mt = m_Nt / m_Ntask_t;
61 
62  // The following setup is not monotonic division, and requires
63  // barrier at the beginning and end of mult (D and gamma5).
64  // [H.Matsufuru 22 Oct 2013]
65  // if(m_Nthread >= 64){
66  // m_Ntask_z = 8;
67  // }else if(m_Nthread >= 16){
68  // m_Ntask_z = 4;
69  // }else if(m_Nthread >= 4){
70  // m_Ntask_z = 2;
71  // }else{
72  // m_Ntask_z = 1;
73  // }
74  // m_Ntask_t = m_Nthread/m_Ntask_z;
75  // m_Ntask = m_Ntask_t * m_Ntask_z;
76  // m_Mz = m_Nz/m_Ntask_z;
77  // m_Mt = m_Nt/m_Ntask_t;
78 
79  vout.general(m_vl, " Nthread = %d\n", m_Nthread);
80  vout.general(m_vl, " Ntask = %d\n", m_Ntask);
81  vout.general(m_vl, " Ntask_z = %d Ntask_t = %d\n", m_Ntask_z, m_Ntask_t);
82  vout.general(m_vl, " Mz = %d Mt = %d\n", m_Mz, m_Mt);
83 
84  // setup of arguments
85  int Nxy = m_Nx * m_Ny;
86  m_arg.resize(m_Ntask);
87  for (int ith_t = 0; ith_t < m_Ntask_t; ++ith_t) {
88  for (int ith_z = 0; ith_z < m_Ntask_z; ++ith_z) {
89  int itask = ith_z + m_Ntask_z * ith_t;
90 
91  m_arg[itask].isite = (ith_z * m_Mz + ith_t * (m_Nz * m_Mt)) * Nxy;
92 
93  m_arg[itask].kt0 = 0;
94  m_arg[itask].kt1 = 0;
95  m_arg[itask].kz0 = 0;
96  m_arg[itask].kz1 = 0;
97  if (ith_t == 0) m_arg[itask].kt0 = 1;
98  if (ith_z == 0) m_arg[itask].kz0 = 1;
99  if (ith_t == m_Ntask_t - 1) m_arg[itask].kt1 = 1;
100  if (ith_z == m_Ntask_z - 1) m_arg[itask].kz1 = 1;
101 
102  m_arg[itask].isite_cpx = itask * m_Mz * m_Mt * m_Ny;
103  m_arg[itask].isite_cpy = itask * m_Mz * m_Mt * m_Nx;
104  m_arg[itask].isite_cpz = ith_t * m_Mt * Nxy;
105  m_arg[itask].isite_cpt = ith_z * m_Mz * Nxy;
106  }
107  }
108 
109  // setup for async data transfer
110  int Nc = CommonParameters::Nc();
111  int Nd = CommonParameters::Nd();
112  int Nvcd2 = 2 * Nc * Nd / 2;
113 
114  valarray<int> destid(m_Ntask);
115  valarray<int> offset(m_Ntask);
116  valarray<int> datasize(m_Ntask);
117  valarray<int> offset_up(m_Ntask);
118  valarray<int> offset_lw(m_Ntask);
119  valarray<int> datasize_up(m_Ntask);
120  valarray<int> datasize_lw(m_Ntask);
121 
122  int imu = 0;
123  for (int ith_t = 0; ith_t < m_Ntask_t; ++ith_t) {
124  for (int ith_z = 0; ith_z < m_Ntask_z; ++ith_z) {
125  int itask = ith_z + ith_t * m_Ntask_z;
126  int isite_cp = itask * m_Mz * m_Mt * m_Ny;
127  destid[itask] = itask;
128  offset[itask] = sizeof(double) * Nvcd2 * isite_cp;
129  datasize[itask] = sizeof(double) * Nvcd2 * m_Mz * m_Mt * m_Ny;
130  }
131  }
132  m_bw_send[imu]->set_thread(m_Ntask, destid, offset, datasize);
133  m_fw_send[imu]->set_thread(m_Ntask, destid, offset, datasize);
134  m_bw_recv[imu]->set_thread(m_Ntask, destid, offset, datasize);
135  m_fw_recv[imu]->set_thread(m_Ntask, destid, offset, datasize);
136 
137  imu = 1;
138  for (int ith_t = 0; ith_t < m_Ntask_t; ++ith_t) {
139  for (int ith_z = 0; ith_z < m_Ntask_z; ++ith_z) {
140  int itask = ith_z + ith_t * m_Ntask_z;
141  int isite_cp = itask * m_Mz * m_Mt * m_Nx;
142  destid[itask] = itask;
143  offset[itask] = sizeof(double) * Nvcd2 * isite_cp;
144  datasize[itask] = sizeof(double) * Nvcd2 * m_Mz * m_Mt * m_Nx;
145  }
146  }
147  m_bw_send[imu]->set_thread(m_Ntask, destid, offset, datasize);
148  m_fw_send[imu]->set_thread(m_Ntask, destid, offset, datasize);
149  m_bw_recv[imu]->set_thread(m_Ntask, destid, offset, datasize);
150  m_fw_recv[imu]->set_thread(m_Ntask, destid, offset, datasize);
151 
152  imu = 2;
153  for (int ith_t = 0; ith_t < m_Ntask_t; ++ith_t) {
154  for (int ith_z = 0; ith_z < m_Ntask_z; ++ith_z) {
155  int itask = ith_z + m_Ntask_z * ith_t;
156  destid[itask] = -1;
157  offset_up[itask] = 0;
158  offset_lw[itask] = 0;
159  datasize_up[itask] = 0;
160  datasize_lw[itask] = 0;
161  if (ith_z == 0) {
162  destid[itask] = (m_Ntask_z - 1) + ith_t * m_Ntask_z;
163  offset_lw[itask] = sizeof(double) * Nvcd2 * ith_t * m_Mt * m_Nx * m_Ny;
164  datasize_lw[itask] = sizeof(double) * Nvcd2 * m_Mt * m_Nx * m_Ny;
165  }
166  if (ith_z == m_Ntask_z - 1) {
167  destid[itask] = ith_t * m_Ntask_z;
168  offset_up[itask] = sizeof(double) * Nvcd2 * ith_t * m_Mt * m_Nx * m_Ny;
169  datasize_up[itask] = sizeof(double) * Nvcd2 * m_Mt * m_Nx * m_Ny;
170  }
171  }
172  }
173  m_bw_send[imu]->set_thread(m_Ntask, destid, offset_lw, datasize_lw);
174  m_bw_recv[imu]->set_thread(m_Ntask, destid, offset_up, datasize_up);
175  m_fw_send[imu]->set_thread(m_Ntask, destid, offset_up, datasize_up);
176  m_fw_recv[imu]->set_thread(m_Ntask, destid, offset_lw, datasize_lw);
177 
178  imu = 3;
179  for (int ith_t = 0; ith_t < m_Ntask_t; ++ith_t) {
180  for (int ith_z = 0; ith_z < m_Ntask_z; ++ith_z) {
181  int itask = ith_z + m_Ntask_z * ith_t;
182  destid[itask] = -1;
183  offset_up[itask] = 0;
184  offset_lw[itask] = 0;
185  datasize_up[itask] = 0;
186  datasize_lw[itask] = 0;
187  if (ith_t == 0) {
188  destid[itask] = ith_z + (m_Ntask_t - 1) * m_Ntask_z;
189  offset_lw[itask] = sizeof(double) * Nvcd2 * ith_z * m_Mz * m_Nx * m_Ny;
190  datasize_lw[itask] = sizeof(double) * Nvcd2 * m_Mz * m_Nx * m_Ny;
191  }
192  if (ith_t == m_Ntask_t - 1) {
193  destid[itask] = ith_z;
194  offset_up[itask] = sizeof(double) * Nvcd2 * ith_z * m_Mz * m_Nx * m_Ny;
195  datasize_up[itask] = sizeof(double) * Nvcd2 * m_Mz * m_Nx * m_Ny;
196  }
197  }
198  }
199  m_bw_send[imu]->set_thread(m_Ntask, destid, offset_lw, datasize_lw);
200  m_bw_recv[imu]->set_thread(m_Ntask, destid, offset_up, datasize_up);
201  m_fw_send[imu]->set_thread(m_Ntask, destid, offset_up, datasize_up);
202  m_fw_recv[imu]->set_thread(m_Ntask, destid, offset_lw, datasize_lw);
203 }
204 
205 //====================================================================
207  double *v2, double fac, double *v1)
208 {
209  int Nvcd = m_Nvc * m_Nd;
210  int Nvxy = Nvcd * m_Nx * m_Ny;
211 
212  int isite = m_arg[itask].isite;
213  double *w2 = &v2[Nvcd * isite];
214  double *w1 = &v1[Nvcd * isite];
215 
216  for (int it = 0; it < m_Mt; ++it) {
217  for (int iz = 0; iz < m_Mz; ++iz) {
218  for (int ivxy = 0; ivxy < Nvxy; ++ivxy) {
219  int iv = ivxy + Nvxy * (iz + m_Nz * it);
220  w2[iv] = fac * w2[iv] + w1[iv];
221  }
222  }
223  }
224 
225 }
226 
227 //====================================================================
229  double *v2)
230 {
231  int Nvcd = m_Nvc * m_Nd;
232  int Nvxy = Nvcd * m_Nx * m_Ny;
233 
234  int isite = m_arg[itask].isite;
235  double *w2 = &v2[Nvcd * isite];
236 
237  for (int it = 0; it < m_Mt; ++it) {
238  for (int iz = 0; iz < m_Mz; ++iz) {
239  for (int ivxy = 0; ivxy < Nvxy; ++ivxy) {
240  int iv = ivxy + Nvxy * (iz + m_Nz * it);
241  w2[iv] = 0.0;
242  }
243  }
244  }
245 
246 }
247 
248 //====================================================================
250  double *vcp1, double *v1)
251 {
252  int Nvc2 = 2 * m_Nvc;
253  int Nvcd = m_Nvc * m_Nd;
254  int Nvcd2 = Nvcd / 2;
255 
256  int id1 = 0;
257  int id2 = m_Nvc;
258  int id3 = m_Nvc * 2;
259  int id4 = m_Nvc * 3;
260 
261  int idir = 0;
262  double bc2 = m_boundary2[idir];
263 
264  int isite = m_arg[itask].isite;
265  int isite_cp = m_arg[itask].isite_cpx;
266 
267  // double* w2 = &vcp1[Nvcd2*isite_cp];
268  double *w2
269  = (double *)m_bw_send[idir]->ptr(sizeof(double) * Nvcd2 * isite_cp);
270  double *w1 = &v1[Nvcd * isite];
271 
272 
273  int ix = 0;
274 
275  for (int it = 0; it < m_Mt; ++it) {
276  for (int iz = 0; iz < m_Mz; ++iz) {
277  for (int iy = 0; iy < m_Ny; ++iy) {
278  int is = ix + m_Nx * (iy + m_Ny * (iz + m_Nz * it));
279  int is2 = iy + m_Ny * (iz + m_Mz * it);
280  int in = Nvcd * is;
281  int ix1 = Nvc2 * is2;
282  int ix2 = ix1 + m_Nvc;
283 
284  for (int ic = 0; ic < m_Nc; ++ic) {
285  w2[2 * ic + ix1] = bc2 * (w1[2 * ic + id1 + in] - w1[2 * ic + 1 + id4 + in]);
286  w2[2 * ic + 1 + ix1] = bc2 * (w1[2 * ic + 1 + id1 + in] + w1[2 * ic + id4 + in]);
287  w2[2 * ic + ix2] = bc2 * (w1[2 * ic + id2 + in] - w1[2 * ic + 1 + id3 + in]);
288  w2[2 * ic + 1 + ix2] = bc2 * (w1[2 * ic + 1 + id2 + in] + w1[2 * ic + id3 + in]);
289  }
290  }
291  }
292  }
293 
294  m_bw_send[idir]->start_thread(itask);
295 }
296 
297 //====================================================================
299  double *v2, double *vcp2)
300 {
301  int Nvc2 = 2 * m_Nvc;
302  int Nvcd = m_Nvc * m_Nd;
303  int Nvcd2 = Nvcd / 2;
304 
305  int id1 = 0;
306  int id2 = m_Nvc;
307  int id3 = m_Nvc * 2;
308  int id4 = m_Nvc * 3;
309 
310  int idir = 0;
311 
312  double wt1r, wt1i, wt2r, wt2i;
313 
314  int isite = m_arg[itask].isite;
315  int isite_cp = m_arg[itask].isite_cpx;
316 
317  double *w2 = &v2[Nvcd * isite];
318  // double* w1 = &vcp2[Nvcd2*isite_cp];
319  double *w1
320  = (double *)m_bw_recv[idir]->ptr(sizeof(double) * Nvcd2 * isite_cp);
321  double *u = const_cast<Field_G *>(m_U)->ptr(m_Ndf * (isite + idir * m_Nvol));
322 
323  m_bw_recv[idir]->wait_thread(itask);
324 
325  int ix = m_Nx - 1;
326  for (int it = 0; it < m_Mt; ++it) {
327  for (int iz = 0; iz < m_Mz; ++iz) {
328  for (int iy = 0; iy < m_Ny; ++iy) {
329  int is = ix + m_Nx * (iy + m_Ny * (iz + m_Nz * it));
330  int is2 = iy + m_Ny * (iz + m_Mz * it);
331  int iv = Nvcd * is;
332  int ig = m_Ndf * is;
333  int ix1 = Nvc2 * is2;
334  int ix2 = ix1 + m_Nvc;
335 
336  for (int ic = 0; ic < m_Nc; ++ic) {
337  int ic2 = ic * m_Nvc;
338 
339  wt1r = mult_uv_r(&u[ic2 + ig], &w1[ix1], m_Nc);
340  wt1i = mult_uv_i(&u[ic2 + ig], &w1[ix1], m_Nc);
341  wt2r = mult_uv_r(&u[ic2 + ig], &w1[ix2], m_Nc);
342  wt2i = mult_uv_i(&u[ic2 + ig], &w1[ix2], m_Nc);
343 
344  w2[2 * ic + id1 + iv] += wt1r;
345  w2[2 * ic + 1 + id1 + iv] += wt1i;
346  w2[2 * ic + id2 + iv] += wt2r;
347  w2[2 * ic + 1 + id2 + iv] += wt2i;
348  w2[2 * ic + id3 + iv] += wt2i;
349  w2[2 * ic + 1 + id3 + iv] += -wt2r;
350  w2[2 * ic + id4 + iv] += wt1i;
351  w2[2 * ic + 1 + id4 + iv] += -wt1r;
352  }
353  }
354  }
355  }
356 
357 }
358 
359 //====================================================================
361  double *v2, double *v1)
362 {
363  int Nvcd = m_Nvc * m_Nd;
364 
365  int id1 = 0;
366  int id2 = m_Nvc;
367  int id3 = m_Nvc * 2;
368  int id4 = m_Nvc * 3;
369 
370  int idir = 0;
371 
372  double vt1[m_Nvc], vt2[m_Nvc];
373  double wt1r, wt1i, wt2r, wt2i;
374 
375  int isite = m_arg[itask].isite;
376 
377  double *w2 = &v2[Nvcd * isite];
378  double *w1 = &v1[Nvcd * isite];
379  double *u = const_cast<Field_G *>(m_U)->ptr(m_Ndf * (isite + idir * m_Nvol));
380 
381  for (int it = 0; it < m_Mt; ++it) {
382  for (int iz = 0; iz < m_Mz; ++iz) {
383  for (int iy = 0; iy < m_Ny; ++iy) {
384  for (int ix = 0; ix < m_Nx - 1; ++ix) {
385  int is = ix + m_Nx * (iy + m_Ny * (iz + m_Nz * it));
386  int iv = Nvcd * is;
387  int in = Nvcd * (is + 1);
388  int ig = m_Ndf * is;
389 
390  for (int ic = 0; ic < m_Nc; ++ic) {
391  vt1[2 * ic] = w1[2 * ic + id1 + in] - w1[2 * ic + 1 + id4 + in];
392  vt1[2 * ic + 1] = w1[2 * ic + 1 + id1 + in] + w1[2 * ic + id4 + in];
393  vt2[2 * ic] = w1[2 * ic + id2 + in] - w1[2 * ic + 1 + id3 + in];
394  vt2[2 * ic + 1] = w1[2 * ic + 1 + id2 + in] + w1[2 * ic + id3 + in];
395  }
396 
397  for (int ic = 0; ic < m_Nc; ++ic) {
398  int ic2 = ic * m_Nvc;
399 
400  wt1r = mult_uv_r(&u[ic2 + ig], vt1, m_Nc);
401  wt1i = mult_uv_i(&u[ic2 + ig], vt1, m_Nc);
402  wt2r = mult_uv_r(&u[ic2 + ig], vt2, m_Nc);
403  wt2i = mult_uv_i(&u[ic2 + ig], vt2, m_Nc);
404 
405  w2[2 * ic + id1 + iv] += wt1r;
406  w2[2 * ic + 1 + id1 + iv] += wt1i;
407  w2[2 * ic + id2 + iv] += wt2r;
408  w2[2 * ic + 1 + id2 + iv] += wt2i;
409  w2[2 * ic + id3 + iv] += wt2i;
410  w2[2 * ic + 1 + id3 + iv] += -wt2r;
411  w2[2 * ic + id4 + iv] += wt1i;
412  w2[2 * ic + 1 + id4 + iv] += -wt1r;
413  }
414  }
415  }
416  }
417  }
418 
419 }
420 
421 //====================================================================
423  double *vcp1, double *v1)
424 {
425  int Nvc2 = 2 * m_Nvc;
426  int Nvcd = m_Nvc * m_Nd;
427  int Nvcd2 = Nvcd / 2;
428 
429  int id1 = 0;
430  int id2 = m_Nvc;
431  int id3 = m_Nvc * 2;
432  int id4 = m_Nvc * 3;
433 
434  int idir = 0;
435 
436  int isite = m_arg[itask].isite;
437  int isite_cp = m_arg[itask].isite_cpx;
438 
439  // double* w2 = &vcp1[Nvcd2*isite_cp];
440  double *w2
441  = (double *)m_fw_send[idir]->ptr(sizeof(double) * Nvcd2 * isite_cp);
442  double *w1 = &v1[Nvcd * isite];
443  double *u = const_cast<Field_G *>(m_U)->ptr(m_Ndf * (isite + idir * m_Nvol));
444 
445  double vt1[m_Nvc], vt2[m_Nvc];
446 
447  int ix = m_Nx - 1;
448 
449  for (int it = 0; it < m_Mt; ++it) {
450  for (int iz = 0; iz < m_Mz; ++iz) {
451  for (int iy = 0; iy < m_Ny; ++iy) {
452  int is = ix + m_Nx * (iy + m_Ny * (iz + m_Nz * it));
453  int is2 = iy + m_Ny * (iz + m_Mz * it);
454  int in = Nvcd * is;
455  int ig = m_Ndf * is;
456  int ix1 = Nvc2 * is2;
457  int ix2 = ix1 + m_Nvc;
458 
459  for (int ic = 0; ic < m_Nc; ++ic) {
460  vt1[2 * ic] = w1[2 * ic + id1 + in] + w1[2 * ic + 1 + id4 + in];
461  vt1[2 * ic + 1] = w1[2 * ic + 1 + id1 + in] - w1[2 * ic + id4 + in];
462  vt2[2 * ic] = w1[2 * ic + id2 + in] + w1[2 * ic + 1 + id3 + in];
463  vt2[2 * ic + 1] = w1[2 * ic + 1 + id2 + in] - w1[2 * ic + id3 + in];
464  }
465 
466  for (int ic = 0; ic < m_Nc; ++ic) {
467  int icr = 2 * ic;
468  w2[icr + ix1] = mult_udagv_r(&u[icr + ig], vt1, m_Nc);
469  w2[icr + 1 + ix1] = mult_udagv_i(&u[icr + ig], vt1, m_Nc);
470  w2[icr + ix2] = mult_udagv_r(&u[icr + ig], vt2, m_Nc);
471  w2[icr + 1 + ix2] = mult_udagv_i(&u[icr + ig], vt2, m_Nc);
472  }
473  }
474  }
475  }
476 
477  m_fw_send[idir]->start_thread(itask);
478 }
479 
480 //====================================================================
482  double *v2, double *vcp2)
483 {
484  int Nvc2 = 2 * m_Nvc;
485  int Nvcd = m_Nvc * m_Nd;
486  int Nvcd2 = Nvcd / 2;
487 
488  int id1 = 0;
489  int id2 = m_Nvc;
490  int id3 = m_Nvc * 2;
491  int id4 = m_Nvc * 3;
492 
493  int idir = 0;
494  double bc2 = m_boundary2[idir];
495 
496  double wt1r, wt1i, wt2r, wt2i;
497 
498  int isite = m_arg[itask].isite;
499  int isite_cp = m_arg[itask].isite_cpx;
500 
501  double *w2 = &v2[Nvcd * isite];
502  // double* w1 = &vcp2[Nvcd2*isite_cp];
503  double *w1
504  = (double *)m_fw_recv[idir]->ptr(sizeof(double) * Nvcd2 * isite_cp);
505 
506  m_fw_recv[idir]->wait_thread(itask);
507 
508  int ix = 0;
509  for (int it = 0; it < m_Mt; ++it) {
510  for (int iz = 0; iz < m_Mz; ++iz) {
511  for (int iy = 0; iy < m_Ny; ++iy) {
512  int is = ix + m_Nx * (iy + m_Ny * (iz + m_Nz * it));
513  int is2 = iy + m_Ny * (iz + m_Mz * it);
514  int iv = Nvcd * is;
515  int ix1 = Nvc2 * is2;
516  int ix2 = ix1 + m_Nvc;
517 
518  for (int ic = 0; ic < m_Nc; ++ic) {
519  int icr = 2 * ic;
520  int ici = 2 * ic + 1;
521  w2[icr + id1 + iv] += bc2 * w1[icr + ix1];
522  w2[ici + id1 + iv] += bc2 * w1[ici + ix1];
523  w2[icr + id2 + iv] += bc2 * w1[icr + ix2];
524  w2[ici + id2 + iv] += bc2 * w1[ici + ix2];
525  w2[icr + id3 + iv] += -bc2 * w1[ici + ix2];
526  w2[ici + id3 + iv] += +bc2 * w1[icr + ix2];
527  w2[icr + id4 + iv] += -bc2 * w1[ici + ix1];
528  w2[ici + id4 + iv] += +bc2 * w1[icr + ix1];
529  }
530  }
531  }
532  }
533 
534 }
535 
536 //====================================================================
538  double *v2, double *v1)
539 {
540  int Nvcd = m_Nvc * m_Nd;
541 
542  int id1 = 0;
543  int id2 = m_Nvc;
544  int id3 = m_Nvc * 2;
545  int id4 = m_Nvc * 3;
546 
547  int idir = 0;
548 
549  double vt1[m_Nvc], vt2[m_Nvc];
550  double wt1r, wt1i, wt2r, wt2i;
551 
552  int isite = m_arg[itask].isite;
553 
554  double *w2 = &v2[Nvcd * isite];
555  double *w1 = &v1[Nvcd * isite];
556  double *u = const_cast<Field_G *>(m_U)->ptr(m_Ndf * (isite + idir * m_Nvol));
557 
558  for (int it = 0; it < m_Mt; ++it) {
559  for (int iz = 0; iz < m_Mz; ++iz) {
560  for (int iy = 0; iy < m_Ny; ++iy) {
561  for (int ix = 1; ix < m_Nx; ++ix) {
562  int is = ix + m_Nx * (iy + m_Ny * (iz + m_Nz * it));
563  int iv = Nvcd * is;
564  int in = Nvcd * (is - 1);
565  int ig = m_Ndf * (is - 1);
566 
567  for (int ic = 0; ic < m_Nc; ++ic) {
568  vt1[2 * ic] = w1[2 * ic + id1 + in] + w1[2 * ic + 1 + id4 + in];
569  vt1[2 * ic + 1] = w1[2 * ic + 1 + id1 + in] - w1[2 * ic + id4 + in];
570  vt2[2 * ic] = w1[2 * ic + id2 + in] + w1[2 * ic + 1 + id3 + in];
571  vt2[2 * ic + 1] = w1[2 * ic + 1 + id2 + in] - w1[2 * ic + id3 + in];
572  }
573 
574  for (int ic = 0; ic < m_Nc; ++ic) {
575  int ic2 = 2 * ic;
576 
577  wt1r = mult_udagv_r(&u[ic2 + ig], vt1, m_Nc);
578  wt1i = mult_udagv_i(&u[ic2 + ig], vt1, m_Nc);
579  wt2r = mult_udagv_r(&u[ic2 + ig], vt2, m_Nc);
580  wt2i = mult_udagv_i(&u[ic2 + ig], vt2, m_Nc);
581 
582  w2[2 * ic + id1 + iv] += wt1r;
583  w2[2 * ic + 1 + id1 + iv] += wt1i;
584  w2[2 * ic + id2 + iv] += wt2r;
585  w2[2 * ic + 1 + id2 + iv] += wt2i;
586  w2[2 * ic + id3 + iv] += -wt2i;
587  w2[2 * ic + 1 + id3 + iv] += +wt2r;
588  w2[2 * ic + id4 + iv] += -wt1i;
589  w2[2 * ic + 1 + id4 + iv] += +wt1r;
590  }
591  }
592  }
593  }
594  }
595 
596 }
597 
598 //====================================================================
600  double *vcp1, double *v1)
601 {
602  int Nvc2 = 2 * m_Nvc;
603  int Nvcd = m_Nvc * m_Nd;
604  int Nvcd2 = Nvcd / 2;
605 
606  int id1 = 0;
607  int id2 = m_Nvc;
608  int id3 = m_Nvc * 2;
609  int id4 = m_Nvc * 3;
610 
611  int isite = m_arg[itask].isite;
612  int isite_cp = m_arg[itask].isite_cpy;
613 
614  int idir = 1;
615  double bc2 = m_boundary2[idir];
616 
617  // double* w2 = &vcp1[Nvcd2*isite_cp];
618  double *w2
619  = (double *)m_bw_send[idir]->ptr(sizeof(double) * Nvcd2 * isite_cp);
620  double *w1 = &v1[Nvcd * isite];
621 
622 
623  int iy = 0;
624 
625  for (int it = 0; it < m_Mt; ++it) {
626  for (int iz = 0; iz < m_Mz; ++iz) {
627  for (int ix = 0; ix < m_Nx; ++ix) {
628  int is = ix + m_Nx * (iy + m_Ny * (iz + m_Nz * it));
629  int is2 = ix + m_Nx * (iz + m_Mz * it);
630  int in = Nvcd * is;
631  int ix1 = Nvc2 * is2;
632  int ix2 = ix1 + m_Nvc;
633 
634  for (int ic = 0; ic < m_Nc; ++ic) {
635  w2[2 * ic + ix1] = bc2 * (w1[2 * ic + id1 + in] + w1[2 * ic + id4 + in]);
636  w2[2 * ic + 1 + ix1] = bc2 * (w1[2 * ic + 1 + id1 + in] + w1[2 * ic + 1 + id4 + in]);
637  w2[2 * ic + ix2] = bc2 * (w1[2 * ic + id2 + in] - w1[2 * ic + id3 + in]);
638  w2[2 * ic + 1 + ix2] = bc2 * (w1[2 * ic + 1 + id2 + in] - w1[2 * ic + 1 + id3 + in]);
639  }
640  }
641  }
642  }
643 
644  m_bw_send[idir]->start_thread(itask);
645 }
646 
647 //====================================================================
649  double *v2, double *vcp2)
650 {
651  int Nvc2 = 2 * m_Nvc;
652  int Nvcd = m_Nvc * m_Nd;
653  int Nvcd2 = Nvcd / 2;
654 
655  int id1 = 0;
656  int id2 = m_Nvc;
657  int id3 = m_Nvc * 2;
658  int id4 = m_Nvc * 3;
659 
660  int idir = 1;
661 
662  double wt1r, wt1i, wt2r, wt2i;
663 
664  int isite = m_arg[itask].isite;
665  int isite_cp = m_arg[itask].isite_cpy;
666 
667  double *w2 = &v2[Nvcd * isite];
668  // double* w1 = &vcp2[Nvcd2*isite_cp];
669  double *w1
670  = (double *)m_bw_recv[idir]->ptr(sizeof(double) * Nvcd2 * isite_cp);
671  double *u = const_cast<Field_G *>(m_U)->ptr(m_Ndf * (isite + idir * m_Nvol));
672 
673  m_bw_recv[idir]->wait_thread(itask);
674 
675  int iy = m_Ny - 1;
676  for (int it = 0; it < m_Mt; ++it) {
677  for (int iz = 0; iz < m_Mz; ++iz) {
678  for (int ix = 0; ix < m_Nx; ++ix) {
679  int is = ix + m_Nx * (iy + m_Ny * (iz + m_Nz * it));
680  int is2 = ix + m_Nx * (iz + m_Mz * it);
681  int iv = Nvcd * is;
682  int ig = m_Ndf * is;
683  int ix1 = Nvc2 * is2;
684  int ix2 = ix1 + m_Nvc;
685 
686  for (int ic = 0; ic < m_Nc; ++ic) {
687  int ic2 = ic * m_Nvc;
688 
689  wt1r = mult_uv_r(&u[ic2 + ig], &w1[ix1], m_Nc);
690  wt1i = mult_uv_i(&u[ic2 + ig], &w1[ix1], m_Nc);
691  wt2r = mult_uv_r(&u[ic2 + ig], &w1[ix2], m_Nc);
692  wt2i = mult_uv_i(&u[ic2 + ig], &w1[ix2], m_Nc);
693 
694  w2[2 * ic + id1 + iv] += wt1r;
695  w2[2 * ic + 1 + id1 + iv] += wt1i;
696  w2[2 * ic + id2 + iv] += wt2r;
697  w2[2 * ic + 1 + id2 + iv] += wt2i;
698  w2[2 * ic + id3 + iv] += -wt2r;
699  w2[2 * ic + 1 + id3 + iv] += -wt2i;
700  w2[2 * ic + id4 + iv] += wt1r;
701  w2[2 * ic + 1 + id4 + iv] += wt1i;
702  }
703  }
704  }
705  }
706 
707 }
708 
709 //====================================================================
711  double *v2, double *v1)
712 {
713  int Nvcd = m_Nvc * m_Nd;
714 
715  int id1 = 0;
716  int id2 = m_Nvc;
717  int id3 = m_Nvc * 2;
718  int id4 = m_Nvc * 3;
719 
720  int idir = 1;
721 
722  double vt1[m_Nvc], vt2[m_Nvc];
723  double wt1r, wt1i, wt2r, wt2i;
724 
725  int isite = m_arg[itask].isite;
726 
727  double *w2 = &v2[Nvcd * isite];
728  double *w1 = &v1[Nvcd * isite];
729  double *u = const_cast<Field_G *>(m_U)->ptr(m_Ndf * (isite + idir * m_Nvol));
730 
731  for (int it = 0; it < m_Mt; ++it) {
732  for (int iz = 0; iz < m_Mz; ++iz) {
733  for (int iy = 0; iy < m_Ny - 1; ++iy) {
734  for (int ix = 0; ix < m_Nx; ++ix) {
735  int is = ix + m_Nx * (iy + m_Ny * (iz + m_Nz * it));
736  int iv = Nvcd * is;
737  int in = Nvcd * (is + m_Nx);
738  int ig = m_Ndf * is;
739 
740  for (int ic = 0; ic < m_Nc; ++ic) {
741  vt1[2 * ic] = w1[2 * ic + id1 + in] + w1[2 * ic + id4 + in];
742  vt1[2 * ic + 1] = w1[2 * ic + 1 + id1 + in] + w1[2 * ic + 1 + id4 + in];
743  vt2[2 * ic] = w1[2 * ic + id2 + in] - w1[2 * ic + id3 + in];
744  vt2[2 * ic + 1] = w1[2 * ic + 1 + id2 + in] - w1[2 * ic + 1 + id3 + in];
745  }
746 
747  for (int ic = 0; ic < m_Nc; ++ic) {
748  int ic2 = ic * m_Nvc;
749 
750  wt1r = mult_uv_r(&u[ic2 + ig], vt1, m_Nc);
751  wt1i = mult_uv_i(&u[ic2 + ig], vt1, m_Nc);
752  wt2r = mult_uv_r(&u[ic2 + ig], vt2, m_Nc);
753  wt2i = mult_uv_i(&u[ic2 + ig], vt2, m_Nc);
754 
755  w2[2 * ic + id1 + iv] += wt1r;
756  w2[2 * ic + 1 + id1 + iv] += wt1i;
757  w2[2 * ic + id2 + iv] += wt2r;
758  w2[2 * ic + 1 + id2 + iv] += wt2i;
759  w2[2 * ic + id3 + iv] += -wt2r;
760  w2[2 * ic + 1 + id3 + iv] += -wt2i;
761  w2[2 * ic + id4 + iv] += wt1r;
762  w2[2 * ic + 1 + id4 + iv] += wt1i;
763  }
764  }
765  }
766  }
767  }
768 
769 }
770 
771 //====================================================================
773  double *vcp1, double *v1)
774 {
775  int Nvc2 = 2 * m_Nvc;
776  int Nvcd = m_Nvc * m_Nd;
777  int Nvcd2 = Nvcd / 2;
778 
779  int id1 = 0;
780  int id2 = m_Nvc;
781  int id3 = m_Nvc * 2;
782  int id4 = m_Nvc * 3;
783 
784  int idir = 1;
785 
786  int isite = m_arg[itask].isite;
787  int isite_cp = m_arg[itask].isite_cpy;
788 
789  // double* w2 = &vcp1[Nvcd2*isite_cp];
790  double *w2
791  = (double *)m_fw_send[idir]->ptr(sizeof(double) * Nvcd2 * isite_cp);
792 
793  double *w1 = &v1[Nvcd * isite];
794  double *u = const_cast<Field_G *>(m_U)->ptr(m_Ndf * (isite + idir * m_Nvol));
795 
796  double vt1[m_Nvc], vt2[m_Nvc];
797 
798  int iy = m_Ny - 1;
799 
800  for (int it = 0; it < m_Mt; ++it) {
801  for (int iz = 0; iz < m_Mz; ++iz) {
802  for (int ix = 0; ix < m_Nx; ++ix) {
803  int is = ix + m_Nx * (iy + m_Ny * (iz + m_Nz * it));
804  int is2 = ix + m_Nx * (iz + m_Mz * it);
805  int in = Nvcd * is;
806  int ig = m_Ndf * is;
807  int ix1 = Nvc2 * is2;
808  int ix2 = ix1 + m_Nvc;
809 
810  for (int ic = 0; ic < m_Nc; ++ic) {
811  vt1[2 * ic] = w1[2 * ic + id1 + in] - w1[2 * ic + id4 + in];
812  vt1[2 * ic + 1] = w1[2 * ic + 1 + id1 + in] - w1[2 * ic + 1 + id4 + in];
813  vt2[2 * ic] = w1[2 * ic + id2 + in] + w1[2 * ic + id3 + in];
814  vt2[2 * ic + 1] = w1[2 * ic + 1 + id2 + in] + w1[2 * ic + 1 + id3 + in];
815  }
816 
817  for (int ic = 0; ic < m_Nc; ++ic) {
818  int icr = 2 * ic;
819  w2[icr + ix1] = mult_udagv_r(&u[icr + ig], vt1, m_Nc);
820  w2[icr + 1 + ix1] = mult_udagv_i(&u[icr + ig], vt1, m_Nc);
821  w2[icr + ix2] = mult_udagv_r(&u[icr + ig], vt2, m_Nc);
822  w2[icr + 1 + ix2] = mult_udagv_i(&u[icr + ig], vt2, m_Nc);
823  }
824  }
825  }
826  }
827 
828  m_fw_send[idir]->start_thread(itask);
829 }
830 
831 //====================================================================
833  double *v2, double *vcp2)
834 {
835  int Nvc2 = 2 * m_Nvc;
836  int Nvcd = m_Nvc * m_Nd;
837  int Nvcd2 = Nvcd / 2;
838 
839  int id1 = 0;
840  int id2 = m_Nvc;
841  int id3 = m_Nvc * 2;
842  int id4 = m_Nvc * 3;
843 
844  int idir = 1;
845  double bc2 = m_boundary2[idir];
846 
847  double wt1r, wt1i, wt2r, wt2i;
848 
849  int isite = m_arg[itask].isite;
850  int isite_cp = m_arg[itask].isite_cpy;
851 
852  double *w2 = &v2[Nvcd * isite];
853  // double* w1 = &vcp2[Nvcd2*isite_cp];
854  double *w1
855  = (double *)m_fw_recv[idir]->ptr(sizeof(double) * Nvcd2 * isite_cp);
856 
857  m_fw_recv[idir]->wait_thread(itask);
858 
859  int iy = 0;
860  for (int it = 0; it < m_Mt; ++it) {
861  for (int iz = 0; iz < m_Mz; ++iz) {
862  for (int ix = 0; ix < m_Nx; ++ix) {
863  int is = ix + m_Nx * (iy + m_Ny * (iz + m_Nz * it));
864  int is2 = ix + m_Nx * (iz + m_Mz * it);
865  int iv = Nvcd * is;
866  int ix1 = Nvc2 * is2;
867  int ix2 = ix1 + m_Nvc;
868 
869  for (int ic = 0; ic < m_Nc; ++ic) {
870  int icr = 2 * ic;
871  int ici = 2 * ic + 1;
872  w2[icr + id1 + iv] += bc2 * w1[icr + ix1];
873  w2[ici + id1 + iv] += bc2 * w1[ici + ix1];
874  w2[icr + id2 + iv] += bc2 * w1[icr + ix2];
875  w2[ici + id2 + iv] += bc2 * w1[ici + ix2];
876  w2[icr + id3 + iv] += bc2 * w1[icr + ix2];
877  w2[ici + id3 + iv] += bc2 * w1[ici + ix2];
878  w2[icr + id4 + iv] += -bc2 * w1[icr + ix1];
879  w2[ici + id4 + iv] += -bc2 * w1[ici + ix1];
880  }
881  }
882  }
883  }
884 
885 }
886 
887 //====================================================================
889  double *v2, double *v1)
890 {
891  int Nvcd = m_Nvc * m_Nd;
892 
893  int id1 = 0;
894  int id2 = m_Nvc;
895  int id3 = m_Nvc * 2;
896  int id4 = m_Nvc * 3;
897 
898  int idir = 1;
899 
900  double vt1[m_Nvc], vt2[m_Nvc];
901  double wt1r, wt1i, wt2r, wt2i;
902 
903  int isite = m_arg[itask].isite;
904 
905  double *w2 = &v2[Nvcd * isite];
906  double *w1 = &v1[Nvcd * isite];
907  double *u = const_cast<Field_G *>(m_U)->ptr(m_Ndf * (isite + idir * m_Nvol));
908 
909  for (int it = 0; it < m_Mt; ++it) {
910  for (int iz = 0; iz < m_Mz; ++iz) {
911  for (int iy = 1; iy < m_Ny; ++iy) {
912  for (int ix = 0; ix < m_Nx; ++ix) {
913  int is = ix + m_Nx * (iy + m_Ny * (iz + m_Nz * it));
914  int iv = Nvcd * is;
915  int in = Nvcd * (is - m_Nx);
916  int ig = m_Ndf * (is - m_Nx);
917 
918  for (int ic = 0; ic < m_Nc; ++ic) {
919  vt1[2 * ic] = w1[2 * ic + id1 + in] - w1[2 * ic + id4 + in];
920  vt1[2 * ic + 1] = w1[2 * ic + 1 + id1 + in] - w1[2 * ic + 1 + id4 + in];
921  vt2[2 * ic] = w1[2 * ic + id2 + in] + w1[2 * ic + id3 + in];
922  vt2[2 * ic + 1] = w1[2 * ic + 1 + id2 + in] + w1[2 * ic + 1 + id3 + in];
923  }
924 
925  for (int ic = 0; ic < m_Nc; ++ic) {
926  int ic2 = 2 * ic;
927  wt1r = mult_udagv_r(&u[ic2 + ig], vt1, m_Nc);
928  wt1i = mult_udagv_i(&u[ic2 + ig], vt1, m_Nc);
929  wt2r = mult_udagv_r(&u[ic2 + ig], vt2, m_Nc);
930  wt2i = mult_udagv_i(&u[ic2 + ig], vt2, m_Nc);
931 
932  w2[ic2 + id1 + iv] += wt1r;
933  w2[ic2 + 1 + id1 + iv] += wt1i;
934  w2[ic2 + id2 + iv] += wt2r;
935  w2[ic2 + 1 + id2 + iv] += wt2i;
936  w2[ic2 + id3 + iv] += wt2r;
937  w2[ic2 + 1 + id3 + iv] += wt2i;
938  w2[ic2 + id4 + iv] += -wt1r;
939  w2[ic2 + 1 + id4 + iv] += -wt1i;
940  }
941  }
942  }
943  }
944  }
945 
946 }
947 
948 //====================================================================
950  double *vcp1, double *v1)
951 {
952  int Nvc2 = 2 * m_Nvc;
953  int Nvcd = m_Nvc * m_Nd;
954  int Nvcd2 = Nvcd / 2;
955 
956  int id1 = 0;
957  int id2 = m_Nvc;
958  int id3 = m_Nvc * 2;
959  int id4 = m_Nvc * 3;
960 
961  int isite = m_arg[itask].isite;
962  int isite_cp = m_arg[itask].isite_cpz;
963 
964  int idir = 2;
965  double bc2 = m_boundary2[idir];
966 
967  // double* w2 = &vcp1[Nvcd2*isite_cp];
968  double *w2
969  = (double *)m_bw_send[idir]->ptr(sizeof(double) * Nvcd2 * isite_cp);
970  double *w1 = &v1[Nvcd * isite];
971 
972  if (m_arg[itask].kz0 == 1) {
973  int Nxy = m_Nx * m_Ny;
974  int iz = 0;
975  for (int it = 0; it < m_Mt; ++it) {
976  for (int ixy = 0; ixy < Nxy; ++ixy) {
977  int is = ixy + Nxy * (iz + m_Nz * it);
978  int is2 = ixy + Nxy * it;
979 
980  int in = Nvcd * is;
981  int ix1 = Nvc2 * is2;
982  int ix2 = ix1 + m_Nvc;
983 
984  for (int ic = 0; ic < m_Nc; ++ic) {
985  w2[2 * ic + ix1] = bc2 * (w1[2 * ic + id1 + in] - w1[2 * ic + 1 + id3 + in]);
986  w2[2 * ic + 1 + ix1] = bc2 * (w1[2 * ic + 1 + id1 + in] + w1[2 * ic + id3 + in]);
987  w2[2 * ic + ix2] = bc2 * (w1[2 * ic + id2 + in] + w1[2 * ic + 1 + id4 + in]);
988  w2[2 * ic + 1 + ix2] = bc2 * (w1[2 * ic + 1 + id2 + in] - w1[2 * ic + id4 + in]);
989  }
990  }
991  }
992  }
993 
994  m_bw_send[idir]->start_thread(itask);
995 }
996 
997 //====================================================================
999  double *v2, double *vcp2)
1000 {
1001  int Nvc2 = 2 * m_Nvc;
1002  int Nvcd = m_Nvc * m_Nd;
1003  int Nvcd2 = Nvcd / 2;
1004 
1005  int id1 = 0;
1006  int id2 = m_Nvc;
1007  int id3 = m_Nvc * 2;
1008  int id4 = m_Nvc * 3;
1009 
1010  int idir = 2;
1011 
1012  double wt1r, wt1i, wt2r, wt2i;
1013 
1014  int isite = m_arg[itask].isite;
1015  int isite_cp = m_arg[itask].isite_cpz;
1016 
1017  double *w2 = &v2[Nvcd * isite];
1018  // double* w1 = &vcp2[Nvcd2*isite_cp];
1019  double *w1
1020  = (double *)m_bw_recv[idir]->ptr(sizeof(double) * Nvcd2 * isite_cp);
1021  double *u = const_cast<Field_G *>(m_U)->ptr(m_Ndf * (isite + idir * m_Nvol));
1022 
1023  m_bw_recv[idir]->wait_thread(itask);
1024 
1025  if (m_arg[itask].kz1 == 1) {
1026  int Nxy = m_Nx * m_Ny;
1027  int iz = m_Mz - 1;
1028  for (int it = 0; it < m_Mt; ++it) {
1029  for (int ixy = 0; ixy < Nxy; ++ixy) {
1030  int is = ixy + Nxy * (iz + m_Nz * it);
1031  int is2 = ixy + Nxy * it;
1032  int iv = Nvcd * is;
1033  int ig = m_Ndf * is;
1034  int ix1 = Nvc2 * is2;
1035  int ix2 = ix1 + m_Nvc;
1036 
1037  for (int ic = 0; ic < m_Nc; ++ic) {
1038  int ic2 = ic * m_Nvc;
1039 
1040  wt1r = mult_uv_r(&u[ic2 + ig], &w1[ix1], m_Nc);
1041  wt1i = mult_uv_i(&u[ic2 + ig], &w1[ix1], m_Nc);
1042  wt2r = mult_uv_r(&u[ic2 + ig], &w1[ix2], m_Nc);
1043  wt2i = mult_uv_i(&u[ic2 + ig], &w1[ix2], m_Nc);
1044 
1045  w2[2 * ic + id1 + iv] += wt1r;
1046  w2[2 * ic + 1 + id1 + iv] += wt1i;
1047  w2[2 * ic + id2 + iv] += wt2r;
1048  w2[2 * ic + 1 + id2 + iv] += wt2i;
1049  w2[2 * ic + id3 + iv] += wt1i;
1050  w2[2 * ic + 1 + id3 + iv] += -wt1r;
1051  w2[2 * ic + id4 + iv] += -wt2i;
1052  w2[2 * ic + 1 + id4 + iv] += wt2r;
1053  }
1054  }
1055  }
1056  }
1057 
1058 }
1059 
1060 //====================================================================
1062  double *v2, double *v1)
1063 {
1064  int Nvcd = m_Nvc * m_Nd;
1065 
1066  int id1 = 0;
1067  int id2 = m_Nvc;
1068  int id3 = m_Nvc * 2;
1069  int id4 = m_Nvc * 3;
1070 
1071  int idir = 2;
1072 
1073  double vt1[m_Nvc], vt2[m_Nvc];
1074  double wt1r, wt1i, wt2r, wt2i;
1075 
1076  int isite = m_arg[itask].isite;
1077 
1078  double *w2 = &v2[Nvcd * isite];
1079  double *w1 = &v1[Nvcd * isite];
1080  double *u = const_cast<Field_G *>(m_U)->ptr(m_Ndf * (isite + idir * m_Nvol));
1081 
1082  int kz1 = m_arg[itask].kz1;
1083  int Nxy = m_Nx * m_Ny;
1084 
1085  for (int it = 0; it < m_Mt; ++it) {
1086  for (int iz = 0; iz < m_Mz - kz1; ++iz) {
1087  for (int ixy = 0; ixy < Nxy; ++ixy) {
1088  int is = ixy + Nxy * (iz + m_Nz * it);
1089  int iv = Nvcd * is;
1090  int in = Nvcd * (is + Nxy);
1091  int ig = m_Ndf * is;
1092 
1093  for (int ic = 0; ic < m_Nc; ++ic) {
1094  vt1[2 * ic] = w1[2 * ic + id1 + in] - w1[2 * ic + 1 + id3 + in];
1095  vt1[2 * ic + 1] = w1[2 * ic + 1 + id1 + in] + w1[2 * ic + id3 + in];
1096  vt2[2 * ic] = w1[2 * ic + id2 + in] + w1[2 * ic + 1 + id4 + in];
1097  vt2[2 * ic + 1] = w1[2 * ic + 1 + id2 + in] - w1[2 * ic + id4 + in];
1098  }
1099 
1100  for (int ic = 0; ic < m_Nc; ++ic) {
1101  int ic2 = ic * m_Nvc;
1102 
1103  wt1r = mult_uv_r(&u[ic2 + ig], vt1, m_Nc);
1104  wt1i = mult_uv_i(&u[ic2 + ig], vt1, m_Nc);
1105  wt2r = mult_uv_r(&u[ic2 + ig], vt2, m_Nc);
1106  wt2i = mult_uv_i(&u[ic2 + ig], vt2, m_Nc);
1107 
1108  w2[2 * ic + id1 + iv] += wt1r;
1109  w2[2 * ic + 1 + id1 + iv] += wt1i;
1110  w2[2 * ic + id2 + iv] += wt2r;
1111  w2[2 * ic + 1 + id2 + iv] += wt2i;
1112  w2[2 * ic + id3 + iv] += wt1i;
1113  w2[2 * ic + 1 + id3 + iv] += -wt1r;
1114  w2[2 * ic + id4 + iv] += -wt2i;
1115  w2[2 * ic + 1 + id4 + iv] += wt2r;
1116  }
1117  }
1118  }
1119  }
1120 
1121 }
1122 
1123 //====================================================================
1125  double *vcp1, double *v1)
1126 {
1127  int Nvc2 = 2 * m_Nvc;
1128  int Nvcd = m_Nvc * m_Nd;
1129  int Nvcd2 = Nvcd / 2;
1130 
1131  int id1 = 0;
1132  int id2 = m_Nvc;
1133  int id3 = m_Nvc * 2;
1134  int id4 = m_Nvc * 3;
1135 
1136  int idir = 2;
1137 
1138  int isite = m_arg[itask].isite;
1139  int isite_cp = m_arg[itask].isite_cpz;
1140 
1141  // double* w2 = &vcp1[Nvcd2*isite_cp];
1142  double *w2
1143  = (double *)m_fw_send[idir]->ptr(sizeof(double) * Nvcd2 * isite_cp);
1144  double *w1 = &v1[Nvcd * isite];
1145  double *u = const_cast<Field_G *>(m_U)->ptr(m_Ndf * (isite + idir * m_Nvol));
1146 
1147  double vt1[m_Nvc], vt2[m_Nvc];
1148 
1149  if (m_arg[itask].kz1 == 1) {
1150  int Nxy = m_Nx * m_Ny;
1151  int iz = m_Mz - 1;
1152  for (int it = 0; it < m_Mt; ++it) {
1153  for (int ixy = 0; ixy < Nxy; ++ixy) {
1154  int is = ixy + Nxy * (iz + m_Nz * it);
1155  int is2 = ixy + Nxy * it;
1156  int in = Nvcd * is;
1157  int ig = m_Ndf * is;
1158  int ix1 = Nvc2 * is2;
1159  int ix2 = ix1 + m_Nvc;
1160 
1161  for (int ic = 0; ic < m_Nc; ++ic) {
1162  vt1[2 * ic] = w1[2 * ic + id1 + in] + w1[2 * ic + 1 + id3 + in];
1163  vt1[2 * ic + 1] = w1[2 * ic + 1 + id1 + in] - w1[2 * ic + id3 + in];
1164  vt2[2 * ic] = w1[2 * ic + id2 + in] - w1[2 * ic + 1 + id4 + in];
1165  vt2[2 * ic + 1] = w1[2 * ic + 1 + id2 + in] + w1[2 * ic + id4 + in];
1166  }
1167 
1168  for (int ic = 0; ic < m_Nc; ++ic) {
1169  int icr = 2 * ic;
1170  w2[icr + ix1] = mult_udagv_r(&u[icr + ig], vt1, m_Nc);
1171  w2[icr + 1 + ix1] = mult_udagv_i(&u[icr + ig], vt1, m_Nc);
1172  w2[icr + ix2] = mult_udagv_r(&u[icr + ig], vt2, m_Nc);
1173  w2[icr + 1 + ix2] = mult_udagv_i(&u[icr + ig], vt2, m_Nc);
1174  }
1175  }
1176  }
1177  }
1178 
1179  m_fw_send[idir]->start_thread(itask);
1180 }
1181 
1182 //====================================================================
1184  double *v2, double *vcp2)
1185 {
1186  int Nvc2 = 2 * m_Nvc;
1187  int Nvcd = m_Nvc * m_Nd;
1188  int Nvcd2 = Nvcd / 2;
1189 
1190  int id1 = 0;
1191  int id2 = m_Nvc;
1192  int id3 = m_Nvc * 2;
1193  int id4 = m_Nvc * 3;
1194 
1195  int idir = 2;
1196  double bc2 = m_boundary2[idir];
1197 
1198  double wt1r, wt1i, wt2r, wt2i;
1199 
1200  int isite = m_arg[itask].isite;
1201  int isite_cp = m_arg[itask].isite_cpz;
1202 
1203  double *w2 = &v2[Nvcd * isite];
1204  // double* w1 = &vcp2[Nvcd2*isite_cp];
1205  double *w1
1206  = (double *)m_fw_recv[idir]->ptr(sizeof(double) * Nvcd2 * isite_cp);
1207 
1208  m_fw_recv[idir]->wait_thread(itask);
1209 
1210  if (m_arg[itask].kz0 == 1) {
1211  int Nxy = m_Nx * m_Ny;
1212 
1213  int iz = 0;
1214  for (int it = 0; it < m_Mt; ++it) {
1215  for (int ixy = 0; ixy < Nxy; ++ixy) {
1216  int is = ixy + Nxy * (iz + m_Nz * it);
1217  int is2 = ixy + Nxy * it;
1218  int iv = Nvcd * is;
1219  int ix1 = Nvc2 * is2;
1220  int ix2 = ix1 + m_Nvc;
1221 
1222  for (int ic = 0; ic < m_Nc; ++ic) {
1223  int icr = 2 * ic;
1224  int ici = 2 * ic + 1;
1225  w2[icr + id1 + iv] += bc2 * w1[icr + ix1];
1226  w2[ici + id1 + iv] += bc2 * w1[ici + ix1];
1227  w2[icr + id2 + iv] += bc2 * w1[icr + ix2];
1228  w2[ici + id2 + iv] += bc2 * w1[ici + ix2];
1229  w2[icr + id3 + iv] += -bc2 * w1[ici + ix1];
1230  w2[ici + id3 + iv] += bc2 * w1[icr + ix1];
1231  w2[icr + id4 + iv] += bc2 * w1[ici + ix2];
1232  w2[ici + id4 + iv] += -bc2 * w1[icr + ix2];
1233  }
1234  }
1235  }
1236  }
1237 
1238 }
1239 
1240 //====================================================================
1242  double *v2, double *v1)
1243 {
1244  int Nvcd = m_Nvc * m_Nd;
1245 
1246  int id1 = 0;
1247  int id2 = m_Nvc;
1248  int id3 = m_Nvc * 2;
1249  int id4 = m_Nvc * 3;
1250 
1251  int idir = 2;
1252 
1253  double vt1[m_Nvc], vt2[m_Nvc];
1254  double wt1r, wt1i, wt2r, wt2i;
1255 
1256  int isite = m_arg[itask].isite;
1257 
1258  double *w2 = &v2[Nvcd * isite];
1259  double *w1 = &v1[Nvcd * isite];
1260  double *u = const_cast<Field_G *>(m_U)->ptr(m_Ndf * (isite + idir * m_Nvol));
1261 
1262  int kz0 = m_arg[itask].kz0;
1263  int Nxy = m_Nx * m_Ny;
1264 
1265  for (int it = 0; it < m_Mt; ++it) {
1266  for (int iz = kz0; iz < m_Mz; ++iz) {
1267  for (int ixy = 0; ixy < Nxy; ++ixy) {
1268  int is = ixy + Nxy * (iz + m_Nz * it);
1269  int iv = Nvcd * is;
1270  int in = Nvcd * (is - Nxy);
1271  int ig = m_Ndf * (is - Nxy);
1272 
1273  for (int ic = 0; ic < m_Nc; ++ic) {
1274  vt1[2 * ic] = w1[2 * ic + id1 + in] + w1[2 * ic + 1 + id3 + in];
1275  vt1[2 * ic + 1] = w1[2 * ic + 1 + id1 + in] - w1[2 * ic + id3 + in];
1276  vt2[2 * ic] = w1[2 * ic + id2 + in] - w1[2 * ic + 1 + id4 + in];
1277  vt2[2 * ic + 1] = w1[2 * ic + 1 + id2 + in] + w1[2 * ic + id4 + in];
1278  }
1279 
1280  for (int ic = 0; ic < m_Nc; ++ic) {
1281  int ic2 = 2 * ic;
1282  wt1r = mult_udagv_r(&u[ic2 + ig], vt1, m_Nc);
1283  wt1i = mult_udagv_i(&u[ic2 + ig], vt1, m_Nc);
1284  wt2r = mult_udagv_r(&u[ic2 + ig], vt2, m_Nc);
1285  wt2i = mult_udagv_i(&u[ic2 + ig], vt2, m_Nc);
1286 
1287  w2[ic2 + id1 + iv] += wt1r;
1288  w2[ic2 + 1 + id1 + iv] += wt1i;
1289  w2[ic2 + id2 + iv] += wt2r;
1290  w2[ic2 + 1 + id2 + iv] += wt2i;
1291  w2[ic2 + id3 + iv] += -wt1i;
1292  w2[ic2 + 1 + id3 + iv] += wt1r;
1293  w2[ic2 + id4 + iv] += wt2i;
1294  w2[ic2 + 1 + id4 + iv] += -wt2r;
1295  }
1296  }
1297  }
1298  }
1299 
1300 }
1301 
1302 //====================================================================
1304  double *vcp1, double *v1)
1305 {
1306  int Nvc2 = 2 * m_Nvc;
1307  int Nvcd = m_Nvc * m_Nd;
1308  int Nvcd2 = Nvcd / 2;
1309 
1310  int id1 = 0;
1311  int id2 = m_Nvc;
1312  int id3 = m_Nvc * 2;
1313  int id4 = m_Nvc * 3;
1314 
1315  int isite = m_arg[itask].isite;
1316  int isite_cp = m_arg[itask].isite_cpt;
1317 
1318  int idir = 3;
1319  double bc2 = m_boundary2[idir];
1320 
1321  // double* w2 = &vcp1[Nvcd2*isite_cp];
1322  double *w2
1323  = (double *)m_bw_send[idir]->ptr(sizeof(double) * Nvcd2 * isite_cp);
1324  double *w1 = &v1[Nvcd * isite];
1325 
1326  if (m_arg[itask].kt0 == 1) {
1327  int Nxy = m_Nx * m_Ny;
1328  int it = 0;
1329  for (int iz = 0; iz < m_Mz; ++iz) {
1330  for (int ixy = 0; ixy < Nxy; ++ixy) {
1331  int is = ixy + Nxy * (iz + m_Nz * it);
1332  int is2 = ixy + Nxy * iz;
1333 
1334  int in = Nvcd * is;
1335  int ix1 = Nvc2 * is2;
1336  int ix2 = ix1 + m_Nvc;
1337 
1338  for (int ic = 0; ic < m_Nc; ++ic) {
1339  w2[2 * ic + ix1] = 2.0 * bc2 * w1[2 * ic + id3 + in];
1340  w2[2 * ic + 1 + ix1] = 2.0 * bc2 * w1[2 * ic + 1 + id3 + in];
1341  w2[2 * ic + ix2] = 2.0 * bc2 * w1[2 * ic + id4 + in];
1342  w2[2 * ic + 1 + ix2] = 2.0 * bc2 * w1[2 * ic + 1 + id4 + in];
1343  }
1344  }
1345  }
1346  }
1347 
1348  m_bw_send[idir]->start_thread(itask);
1349 }
1350 
1351 //====================================================================
1353  double *v2, double *vcp2)
1354 {
1355  int Nvc2 = 2 * m_Nvc;
1356  int Nvcd = m_Nvc * m_Nd;
1357  int Nvcd2 = Nvcd / 2;
1358 
1359  int id1 = 0;
1360  int id2 = m_Nvc;
1361  int id3 = m_Nvc * 2;
1362  int id4 = m_Nvc * 3;
1363 
1364  int idir = 3;
1365 
1366  double wt1r, wt1i, wt2r, wt2i;
1367 
1368  int isite = m_arg[itask].isite;
1369  int isite_cp = m_arg[itask].isite_cpt;
1370 
1371  double *w2 = &v2[Nvcd * isite];
1372  // double* w1 = &vcp2[Nvcd2*isite_cp];
1373  double *w1
1374  = (double *)m_bw_recv[idir]->ptr(sizeof(double) * Nvcd2 * isite_cp);
1375  double *u = const_cast<Field_G *>(m_U)->ptr(m_Ndf * (isite + idir * m_Nvol));
1376 
1377  m_bw_recv[idir]->wait_thread(itask);
1378 
1379  if (m_arg[itask].kt1 == 1) {
1380  int Nxy = m_Nx * m_Ny;
1381  int it = m_Mt - 1;
1382  for (int iz = 0; iz < m_Mz; ++iz) {
1383  for (int ixy = 0; ixy < Nxy; ++ixy) {
1384  int is = ixy + Nxy * (iz + m_Nz * it);
1385  int is2 = ixy + Nxy * iz;
1386  int iv = Nvcd * is;
1387  int ig = m_Ndf * is;
1388  int ix1 = Nvc2 * is2;
1389  int ix2 = ix1 + m_Nvc;
1390 
1391  for (int ic = 0; ic < m_Nc; ++ic) {
1392  int ic2 = ic * m_Nvc;
1393 
1394  wt1r = mult_uv_r(&u[ic2 + ig], &w1[ix1], m_Nc);
1395  wt1i = mult_uv_i(&u[ic2 + ig], &w1[ix1], m_Nc);
1396  wt2r = mult_uv_r(&u[ic2 + ig], &w1[ix2], m_Nc);
1397  wt2i = mult_uv_i(&u[ic2 + ig], &w1[ix2], m_Nc);
1398 
1399  w2[2 * ic + id3 + iv] += wt1r;
1400  w2[2 * ic + 1 + id3 + iv] += wt1i;
1401  w2[2 * ic + id4 + iv] += wt2r;
1402  w2[2 * ic + 1 + id4 + iv] += wt2i;
1403  }
1404  }
1405  }
1406  }
1407 
1408 }
1409 
1410 //====================================================================
1412  double *v2, double *v1)
1413 {
1414  int Nvcd = m_Nvc * m_Nd;
1415 
1416  int id1 = 0;
1417  int id2 = m_Nvc;
1418  int id3 = m_Nvc * 2;
1419  int id4 = m_Nvc * 3;
1420 
1421  int idir = 3;
1422 
1423  double vt1[m_Nvc], vt2[m_Nvc];
1424  double wt1r, wt1i, wt2r, wt2i;
1425 
1426  int isite = m_arg[itask].isite;
1427 
1428  double *w2 = &v2[Nvcd * isite];
1429  double *w1 = &v1[Nvcd * isite];
1430  double *u = const_cast<Field_G *>(m_U)->ptr(m_Ndf * (isite + idir * m_Nvol));
1431 
1432  int kt1 = m_arg[itask].kt1;
1433  int Nxy = m_Nx * m_Ny;
1434  int Nxyz = Nxy * m_Nz;
1435 
1436  for (int it = 0; it < m_Mt - kt1; ++it) {
1437  for (int iz = 0; iz < m_Mz; ++iz) {
1438  for (int ixy = 0; ixy < Nxy; ++ixy) {
1439  int is = ixy + Nxy * (iz + m_Nz * it);
1440  int iv = Nvcd * is;
1441  int in = Nvcd * (is + Nxyz);
1442  int ig = m_Ndf * is;
1443 
1444  for (int ic = 0; ic < m_Nc; ++ic) {
1445  vt1[2 * ic] = 2.0 * w1[2 * ic + id3 + in];
1446  vt1[2 * ic + 1] = 2.0 * w1[2 * ic + 1 + id3 + in];
1447  vt2[2 * ic] = 2.0 * w1[2 * ic + id4 + in];
1448  vt2[2 * ic + 1] = 2.0 * w1[2 * ic + 1 + id4 + in];
1449  }
1450 
1451  for (int ic = 0; ic < m_Nc; ++ic) {
1452  int ic2 = ic * m_Nvc;
1453 
1454  wt1r = mult_uv_r(&u[ic2 + ig], vt1, m_Nc);
1455  wt1i = mult_uv_i(&u[ic2 + ig], vt1, m_Nc);
1456  wt2r = mult_uv_r(&u[ic2 + ig], vt2, m_Nc);
1457  wt2i = mult_uv_i(&u[ic2 + ig], vt2, m_Nc);
1458 
1459  w2[2 * ic + id3 + iv] += wt1r;
1460  w2[2 * ic + 1 + id3 + iv] += wt1i;
1461  w2[2 * ic + id4 + iv] += wt2r;
1462  w2[2 * ic + 1 + id4 + iv] += wt2i;
1463  }
1464  }
1465  }
1466  }
1467 
1468 }
1469 
1470 //====================================================================
1472  double *vcp1, double *v1)
1473 {
1474  int Nvc2 = 2 * m_Nvc;
1475  int Nvcd = m_Nvc * m_Nd;
1476  int Nvcd2 = Nvcd / 2;
1477 
1478  int id1 = 0;
1479  int id2 = m_Nvc;
1480  int id3 = m_Nvc * 2;
1481  int id4 = m_Nvc * 3;
1482 
1483  int idir = 3;
1484 
1485  int isite = m_arg[itask].isite;
1486  int isite_cp = m_arg[itask].isite_cpt;
1487 
1488  // double* w2 = &vcp1[Nvcd2*isite_cp];
1489  double *w2
1490  = (double *)m_fw_send[idir]->ptr(sizeof(double) * Nvcd2 * isite_cp);
1491  double *w1 = &v1[Nvcd * isite];
1492  double *u = const_cast<Field_G *>(m_U)->ptr(m_Ndf * (isite + idir * m_Nvol));
1493 
1494  double vt1[m_Nvc], vt2[m_Nvc];
1495 
1496  if (m_arg[itask].kt1 == 1) {
1497  int Nxy = m_Nx * m_Ny;
1498  int it = m_Mt - 1;
1499  for (int iz = 0; iz < m_Mz; ++iz) {
1500  for (int ixy = 0; ixy < Nxy; ++ixy) {
1501  int is = ixy + Nxy * (iz + m_Nz * it);
1502  int is2 = ixy + Nxy * iz;
1503  int in = Nvcd * is;
1504  int ig = m_Ndf * is;
1505  int ix1 = Nvc2 * is2;
1506  int ix2 = ix1 + m_Nvc;
1507 
1508  for (int ic = 0; ic < m_Nc; ++ic) {
1509  vt1[2 * ic] = 2.0 * w1[2 * ic + id1 + in];
1510  vt1[2 * ic + 1] = 2.0 * w1[2 * ic + 1 + id1 + in];
1511  vt2[2 * ic] = 2.0 * w1[2 * ic + id2 + in];
1512  vt2[2 * ic + 1] = 2.0 * w1[2 * ic + 1 + id2 + in];
1513  }
1514 
1515  for (int ic = 0; ic < m_Nc; ++ic) {
1516  int icr = 2 * ic;
1517  w2[icr + ix1] = mult_udagv_r(&u[icr + ig], vt1, m_Nc);
1518  w2[icr + 1 + ix1] = mult_udagv_i(&u[icr + ig], vt1, m_Nc);
1519  w2[icr + ix2] = mult_udagv_r(&u[icr + ig], vt2, m_Nc);
1520  w2[icr + 1 + ix2] = mult_udagv_i(&u[icr + ig], vt2, m_Nc);
1521  }
1522  }
1523  }
1524  }
1525 
1526  m_fw_send[idir]->start_thread(itask);
1527 }
1528 
1529 //====================================================================
1531  double *v2, double *vcp2)
1532 {
1533  int Nvc2 = 2 * m_Nvc;
1534  int Nvcd = m_Nvc * m_Nd;
1535  int Nvcd2 = Nvcd / 2;
1536 
1537  int id1 = 0;
1538  int id2 = m_Nvc;
1539  int id3 = m_Nvc * 2;
1540  int id4 = m_Nvc * 3;
1541 
1542  int idir = 3;
1543  double bc2 = m_boundary2[idir];
1544 
1545  double wt1r, wt1i, wt2r, wt2i;
1546 
1547  int isite = m_arg[itask].isite;
1548  int isite_cp = m_arg[itask].isite_cpt;
1549 
1550  double *w2 = &v2[Nvcd * isite];
1551  // double* w1 = &vcp2[Nvcd2*isite_cp];
1552  double *w1
1553  = (double *)m_fw_recv[idir]->ptr(sizeof(double) * Nvcd2 * isite_cp);
1554 
1555  m_fw_recv[idir]->wait_thread(itask);
1556 
1557  if (m_arg[itask].kt0 == 1) {
1558  int Nxy = m_Nx * m_Ny;
1559  int it = 0;
1560  for (int iz = 0; iz < m_Mz; ++iz) {
1561  for (int ixy = 0; ixy < Nxy; ++ixy) {
1562  int is = ixy + Nxy * (iz + m_Nz * it);
1563  int is2 = ixy + Nxy * iz;
1564  int iv = Nvcd * is;
1565  int ix1 = Nvc2 * is2;
1566  int ix2 = ix1 + m_Nvc;
1567 
1568  for (int ic = 0; ic < m_Nc; ++ic) {
1569  int icr = 2 * ic;
1570  int ici = 2 * ic + 1;
1571  w2[icr + id1 + iv] += bc2 * w1[icr + ix1];
1572  w2[ici + id1 + iv] += bc2 * w1[ici + ix1];
1573  w2[icr + id2 + iv] += bc2 * w1[icr + ix2];
1574  w2[ici + id2 + iv] += bc2 * w1[ici + ix2];
1575  }
1576  }
1577  }
1578  }
1579 
1580 }
1581 
1582 //====================================================================
1584  double *v2, double *v1)
1585 {
1586  int Nvcd = m_Nvc * m_Nd;
1587 
1588  int id1 = 0;
1589  int id2 = m_Nvc;
1590  int id3 = m_Nvc * 2;
1591  int id4 = m_Nvc * 3;
1592 
1593  int idir = 3;
1594 
1595  double vt1[m_Nvc], vt2[m_Nvc];
1596  double wt1r, wt1i, wt2r, wt2i;
1597 
1598  int isite = m_arg[itask].isite;
1599 
1600  double *w2 = &v2[Nvcd * isite];
1601  double *w1 = &v1[Nvcd * isite];
1602  double *u = const_cast<Field_G *>(m_U)->ptr(m_Ndf * (isite + idir * m_Nvol));
1603 
1604  int kt0 = m_arg[itask].kt0;
1605  int Nxy = m_Nx * m_Ny;
1606  int Nxyz = Nxy * m_Nz;
1607 
1608  for (int it = kt0; it < m_Mt; ++it) {
1609  for (int iz = 0; iz < m_Mz; ++iz) {
1610  for (int ixy = 0; ixy < Nxy; ++ixy) {
1611  int is = ixy + Nxy * (iz + m_Nz * it);
1612  int iv = Nvcd * is;
1613  int in = Nvcd * (is - Nxyz);
1614  int ig = m_Ndf * (is - Nxyz);
1615 
1616  for (int ic = 0; ic < m_Nc; ++ic) {
1617  vt1[2 * ic] = 2.0 * w1[2 * ic + id1 + in];
1618  vt1[2 * ic + 1] = 2.0 * w1[2 * ic + 1 + id1 + in];
1619  vt2[2 * ic] = 2.0 * w1[2 * ic + id2 + in];
1620  vt2[2 * ic + 1] = 2.0 * w1[2 * ic + 1 + id2 + in];
1621  }
1622 
1623  for (int ic = 0; ic < m_Nc; ++ic) {
1624  int ic2 = 2 * ic;
1625  wt1r = mult_udagv_r(&u[ic2 + ig], vt1, m_Nc);
1626  wt1i = mult_udagv_i(&u[ic2 + ig], vt1, m_Nc);
1627  wt2r = mult_udagv_r(&u[ic2 + ig], vt2, m_Nc);
1628  wt2i = mult_udagv_i(&u[ic2 + ig], vt2, m_Nc);
1629 
1630  w2[ic2 + id1 + iv] += wt1r;
1631  w2[ic2 + 1 + id1 + iv] += wt1i;
1632  w2[ic2 + id2 + iv] += wt2r;
1633  w2[ic2 + 1 + id2 + iv] += wt2i;
1634  }
1635  }
1636  }
1637  }
1638 
1639 }
1640 
1641 //====================================================================
1643  double *vcp1, double *v1)
1644 {
1645  int Nvc2 = 2 * m_Nvc;
1646  int Nvcd = m_Nvc * m_Nd;
1647  int Nvcd2 = Nvcd / 2;
1648 
1649  int id1 = 0;
1650  int id2 = m_Nvc;
1651  int id3 = m_Nvc * 2;
1652  int id4 = m_Nvc * 3;
1653 
1654  int isite = m_arg[itask].isite;
1655  int isite_cp = m_arg[itask].isite_cpt;
1656 
1657  int idir = 3;
1658  double bc2 = m_boundary2[idir];
1659 
1660  // double* w2 = &vcp1[Nvcd2*isite_cp];
1661  double *w2
1662  = (double *)m_bw_send[idir]->ptr(sizeof(double) * Nvcd2 * isite_cp);
1663  double *w1 = &v1[Nvcd * isite];
1664 
1665  if (m_arg[itask].kt0 == 1) {
1666  int Nxy = m_Nx * m_Ny;
1667  int it = 0;
1668  for (int iz = 0; iz < m_Mz; ++iz) {
1669  for (int ixy = 0; ixy < Nxy; ++ixy) {
1670  int is = ixy + Nxy * (iz + m_Nz * it);
1671  int is2 = ixy + Nxy * iz;
1672 
1673  int in = Nvcd * is;
1674  int ix1 = Nvc2 * is2;
1675  int ix2 = ix1 + m_Nvc;
1676 
1677  for (int ic = 0; ic < m_Nc; ++ic) {
1678  w2[2 * ic + ix1] = bc2 * (w1[2 * ic + id1 + in] + w1[2 * ic + id3 + in]);
1679  w2[2 * ic + 1 + ix1] = bc2 * (w1[2 * ic + 1 + id1 + in] + w1[2 * ic + 1 + id3 + in]);
1680  w2[2 * ic + ix2] = bc2 * (w1[2 * ic + id2 + in] + w1[2 * ic + id4 + in]);
1681  w2[2 * ic + 1 + ix2] = bc2 * (w1[2 * ic + 1 + id2 + in] + w1[2 * ic + 1 + id4 + in]);
1682  }
1683  }
1684  }
1685  }
1686 
1687  m_bw_send[idir]->start_thread(itask);
1688 }
1689 
1690 //====================================================================
1692  double *v2, double *vcp2)
1693 {
1694  int Nvc2 = 2 * m_Nvc;
1695  int Nvcd = m_Nvc * m_Nd;
1696  int Nvcd2 = Nvcd / 2;
1697 
1698  int id1 = 0;
1699  int id2 = m_Nvc;
1700  int id3 = m_Nvc * 2;
1701  int id4 = m_Nvc * 3;
1702 
1703  int idir = 3;
1704 
1705  double wt1r, wt1i, wt2r, wt2i;
1706 
1707  int isite = m_arg[itask].isite;
1708  int isite_cp = m_arg[itask].isite_cpt;
1709 
1710  double *w2 = &v2[Nvcd * isite];
1711  // double* w1 = &vcp2[Nvcd2*isite_cp];
1712  double *w1
1713  = (double *)m_bw_recv[idir]->ptr(sizeof(double) * Nvcd2 * isite_cp);
1714  double *u = const_cast<Field_G *>(m_U)->ptr(m_Ndf * (isite + idir * m_Nvol));
1715 
1716  m_bw_recv[idir]->wait_thread(itask);
1717 
1718  if (m_arg[itask].kt1 == 1) {
1719  int Nxy = m_Nx * m_Ny;
1720  int it = m_Mt - 1;
1721  for (int iz = 0; iz < m_Mz; ++iz) {
1722  for (int ixy = 0; ixy < Nxy; ++ixy) {
1723  int is = ixy + Nxy * (iz + m_Nz * it);
1724  int is2 = ixy + Nxy * iz;
1725  int iv = Nvcd * is;
1726  int ig = m_Ndf * is;
1727  int ix1 = Nvc2 * is2;
1728  int ix2 = ix1 + m_Nvc;
1729 
1730  for (int ic = 0; ic < m_Nc; ++ic) {
1731  int ic2 = ic * m_Nvc;
1732 
1733  wt1r = mult_uv_r(&u[ic2 + ig], &w1[ix1], m_Nc);
1734  wt1i = mult_uv_i(&u[ic2 + ig], &w1[ix1], m_Nc);
1735  wt2r = mult_uv_r(&u[ic2 + ig], &w1[ix2], m_Nc);
1736  wt2i = mult_uv_i(&u[ic2 + ig], &w1[ix2], m_Nc);
1737 
1738  w2[2 * ic + id1 + iv] += wt1r;
1739  w2[2 * ic + 1 + id1 + iv] += wt1i;
1740  w2[2 * ic + id2 + iv] += wt2r;
1741  w2[2 * ic + 1 + id2 + iv] += wt2i;
1742  w2[2 * ic + id3 + iv] += wt1r;
1743  w2[2 * ic + 1 + id3 + iv] += wt1i;
1744  w2[2 * ic + id4 + iv] += wt2r;
1745  w2[2 * ic + 1 + id4 + iv] += wt2i;
1746  }
1747  }
1748  }
1749  }
1750 
1751 }
1752 
1753 //====================================================================
1755  double *v2, double *v1)
1756 {
1757  int Nvcd = m_Nvc * m_Nd;
1758 
1759  int id1 = 0;
1760  int id2 = m_Nvc;
1761  int id3 = m_Nvc * 2;
1762  int id4 = m_Nvc * 3;
1763 
1764  int idir = 3;
1765 
1766  double vt1[m_Nvc], vt2[m_Nvc];
1767  double wt1r, wt1i, wt2r, wt2i;
1768 
1769  int isite = m_arg[itask].isite;
1770 
1771  double *w2 = &v2[Nvcd * isite];
1772  double *w1 = &v1[Nvcd * isite];
1773  double *u = const_cast<Field_G *>(m_U)->ptr(m_Ndf * (isite + idir * m_Nvol));
1774 
1775  int kt1 = m_arg[itask].kt1;
1776  int Nxy = m_Nx * m_Ny;
1777  int Nxyz = Nxy * m_Nz;
1778 
1779  for (int it = 0; it < m_Mt - kt1; ++it) {
1780  for (int iz = 0; iz < m_Mz; ++iz) {
1781  for (int ixy = 0; ixy < Nxy; ++ixy) {
1782  int is = ixy + Nxy * (iz + m_Nz * it);
1783  int iv = Nvcd * is;
1784  int in = Nvcd * (is + Nxyz);
1785  int ig = m_Ndf * is;
1786 
1787  for (int ic = 0; ic < m_Nc; ++ic) {
1788  vt1[2 * ic] = w1[2 * ic + id1 + in] + w1[2 * ic + id3 + in];
1789  vt1[2 * ic + 1] = w1[2 * ic + 1 + id1 + in] + w1[2 * ic + 1 + id3 + in];
1790  vt2[2 * ic] = w1[2 * ic + id2 + in] + w1[2 * ic + id4 + in];
1791  vt2[2 * ic + 1] = w1[2 * ic + 1 + id2 + in] + w1[2 * ic + 1 + id4 + in];
1792  }
1793 
1794  for (int ic = 0; ic < m_Nc; ++ic) {
1795  int ic2 = ic * m_Nvc;
1796 
1797  wt1r = mult_uv_r(&u[ic2 + ig], vt1, m_Nc);
1798  wt1i = mult_uv_i(&u[ic2 + ig], vt1, m_Nc);
1799  wt2r = mult_uv_r(&u[ic2 + ig], vt2, m_Nc);
1800  wt2i = mult_uv_i(&u[ic2 + ig], vt2, m_Nc);
1801 
1802  w2[2 * ic + id1 + iv] += wt1r;
1803  w2[2 * ic + 1 + id1 + iv] += wt1i;
1804  w2[2 * ic + id2 + iv] += wt2r;
1805  w2[2 * ic + 1 + id2 + iv] += wt2i;
1806  w2[2 * ic + id3 + iv] += wt1r;
1807  w2[2 * ic + 1 + id3 + iv] += wt1i;
1808  w2[2 * ic + id4 + iv] += wt2r;
1809  w2[2 * ic + 1 + id4 + iv] += wt2i;
1810  }
1811  }
1812  }
1813  }
1814 
1815 }
1816 
1817 //====================================================================
1819  double *vcp1, double *v1)
1820 {
1821  int Nvc2 = 2 * m_Nvc;
1822  int Nvcd = m_Nvc * m_Nd;
1823  int Nvcd2 = Nvcd / 2;
1824 
1825  int id1 = 0;
1826  int id2 = m_Nvc;
1827  int id3 = m_Nvc * 2;
1828  int id4 = m_Nvc * 3;
1829 
1830  int idir = 3;
1831 
1832  int isite = m_arg[itask].isite;
1833  int isite_cp = m_arg[itask].isite_cpt;
1834 
1835  // double* w2 = &vcp1[Nvcd2*isite_cp];
1836  double *w2
1837  = (double *)m_fw_send[idir]->ptr(sizeof(double) * Nvcd2 * isite_cp);
1838  double *w1 = &v1[Nvcd * isite];
1839  double *u = const_cast<Field_G *>(m_U)->ptr(m_Ndf * (isite + idir * m_Nvol));
1840 
1841  double vt1[m_Nvc], vt2[m_Nvc];
1842 
1843  if (m_arg[itask].kt1 == 1) {
1844  int Nxy = m_Nx * m_Ny;
1845  int it = m_Mt - 1;
1846  for (int iz = 0; iz < m_Mz; ++iz) {
1847  for (int ixy = 0; ixy < Nxy; ++ixy) {
1848  int is = ixy + Nxy * (iz + m_Nz * it);
1849  int is2 = ixy + Nxy * iz;
1850  int in = Nvcd * is;
1851  int ig = m_Ndf * is;
1852  int ix1 = Nvc2 * is2;
1853  int ix2 = ix1 + m_Nvc;
1854 
1855  for (int ic = 0; ic < m_Nc; ++ic) {
1856  vt1[2 * ic] = w1[2 * ic + id1 + in] - w1[2 * ic + id3 + in];
1857  vt1[2 * ic + 1] = w1[2 * ic + 1 + id1 + in] - w1[2 * ic + 1 + id3 + in];
1858  vt2[2 * ic] = w1[2 * ic + id2 + in] - w1[2 * ic + id4 + in];
1859  vt2[2 * ic + 1] = w1[2 * ic + 1 + id2 + in] - w1[2 * ic + 1 + id4 + in];
1860  }
1861 
1862  for (int ic = 0; ic < m_Nc; ++ic) {
1863  int icr = 2 * ic;
1864  w2[icr + ix1] = mult_udagv_r(&u[icr + ig], vt1, m_Nc);
1865  w2[icr + 1 + ix1] = mult_udagv_i(&u[icr + ig], vt1, m_Nc);
1866  w2[icr + ix2] = mult_udagv_r(&u[icr + ig], vt2, m_Nc);
1867  w2[icr + 1 + ix2] = mult_udagv_i(&u[icr + ig], vt2, m_Nc);
1868  }
1869  }
1870  }
1871  }
1872 
1873  m_fw_send[idir]->start_thread(itask);
1874 }
1875 
1876 //====================================================================
1878  double *v2, double *vcp2)
1879 {
1880  int Nvc2 = 2 * m_Nvc;
1881  int Nvcd = m_Nvc * m_Nd;
1882  int Nvcd2 = Nvcd / 2;
1883 
1884  int id1 = 0;
1885  int id2 = m_Nvc;
1886  int id3 = m_Nvc * 2;
1887  int id4 = m_Nvc * 3;
1888 
1889  int idir = 3;
1890  double bc2 = m_boundary2[idir];
1891 
1892  double wt1r, wt1i, wt2r, wt2i;
1893 
1894  int isite = m_arg[itask].isite;
1895  int isite_cp = m_arg[itask].isite_cpt;
1896 
1897  double *w2 = &v2[Nvcd * isite];
1898  // double* w1 = &vcp2[Nvcd2*isite_cp];
1899  double *w1
1900  = (double *)m_fw_recv[idir]->ptr(sizeof(double) * Nvcd2 * isite_cp);
1901 
1902  m_fw_recv[idir]->wait_thread(itask);
1903 
1904  if (m_arg[itask].kt0 == 1) {
1905  int Nxy = m_Nx * m_Ny;
1906  int it = 0;
1907  for (int iz = 0; iz < m_Mz; ++iz) {
1908  for (int ixy = 0; ixy < Nxy; ++ixy) {
1909  int is = ixy + Nxy * (iz + m_Nz * it);
1910  int is2 = ixy + Nxy * iz;
1911  int iv = Nvcd * is;
1912  int ix1 = Nvc2 * is2;
1913  int ix2 = ix1 + m_Nvc;
1914 
1915  for (int ic = 0; ic < m_Nc; ++ic) {
1916  int icr = 2 * ic;
1917  int ici = 2 * ic + 1;
1918  w2[icr + id1 + iv] += bc2 * w1[icr + ix1];
1919  w2[ici + id1 + iv] += bc2 * w1[ici + ix1];
1920  w2[icr + id2 + iv] += bc2 * w1[icr + ix2];
1921  w2[ici + id2 + iv] += bc2 * w1[ici + ix2];
1922  w2[icr + id3 + iv] -= bc2 * w1[icr + ix1];
1923  w2[ici + id3 + iv] -= bc2 * w1[ici + ix1];
1924  w2[icr + id4 + iv] -= bc2 * w1[icr + ix2];
1925  w2[ici + id4 + iv] -= bc2 * w1[ici + ix2];
1926  }
1927  }
1928  }
1929  }
1930 
1931 }
1932 
1933 //====================================================================
1935  double *v2, double *v1)
1936 {
1937  int Nvcd = m_Nvc * m_Nd;
1938 
1939  int id1 = 0;
1940  int id2 = m_Nvc;
1941  int id3 = m_Nvc * 2;
1942  int id4 = m_Nvc * 3;
1943 
1944  int idir = 3;
1945 
1946  double vt1[m_Nvc], vt2[m_Nvc];
1947  double wt1r, wt1i, wt2r, wt2i;
1948 
1949  int isite = m_arg[itask].isite;
1950 
1951  double *w2 = &v2[Nvcd * isite];
1952  double *w1 = &v1[Nvcd * isite];
1953  double *u = const_cast<Field_G *>(m_U)->ptr(m_Ndf * (isite + idir * m_Nvol));
1954 
1955  int kt0 = m_arg[itask].kt0;
1956  int Nxy = m_Nx * m_Ny;
1957  int Nxyz = Nxy * m_Nz;
1958 
1959  for (int it = kt0; it < m_Mt; ++it) {
1960  for (int iz = 0; iz < m_Mz; ++iz) {
1961  for (int ixy = 0; ixy < Nxy; ++ixy) {
1962  int is = ixy + Nxy * (iz + m_Nz * it);
1963  int iv = Nvcd * is;
1964  int in = Nvcd * (is - Nxyz);
1965  int ig = m_Ndf * (is - Nxyz);
1966 
1967  for (int ic = 0; ic < m_Nc; ++ic) {
1968  vt1[2 * ic] = w1[2 * ic + id1 + in] - w1[2 * ic + id3 + in];
1969  vt1[2 * ic + 1] = w1[2 * ic + 1 + id1 + in] - w1[2 * ic + 1 + id3 + in];
1970  vt2[2 * ic] = w1[2 * ic + id2 + in] - w1[2 * ic + id4 + in];
1971  vt2[2 * ic + 1] = w1[2 * ic + 1 + id2 + in] - w1[2 * ic + 1 + id4 + in];
1972  }
1973 
1974  for (int ic = 0; ic < m_Nc; ++ic) {
1975  int ic2 = 2 * ic;
1976  wt1r = mult_udagv_r(&u[ic2 + ig], vt1, m_Nc);
1977  wt1i = mult_udagv_i(&u[ic2 + ig], vt1, m_Nc);
1978  wt2r = mult_udagv_r(&u[ic2 + ig], vt2, m_Nc);
1979  wt2i = mult_udagv_i(&u[ic2 + ig], vt2, m_Nc);
1980 
1981  w2[ic2 + id1 + iv] += wt1r;
1982  w2[ic2 + 1 + id1 + iv] += wt1i;
1983  w2[ic2 + id2 + iv] += wt2r;
1984  w2[ic2 + 1 + id2 + iv] += wt2i;
1985  w2[ic2 + id3 + iv] -= wt1r;
1986  w2[ic2 + 1 + id3 + iv] -= wt1i;
1987  w2[ic2 + id4 + iv] -= wt2r;
1988  w2[ic2 + 1 + id4 + iv] -= wt2i;
1989  }
1990  }
1991  }
1992  }
1993 
1994 }
1995 
1996 //====================================================================
1998  double *v2, double *v1)
1999 {
2000  int Nvcd = m_Nvc * m_Nd;
2001  int Nxy = m_Nx * m_Ny;
2002 
2003  int id1 = 0;
2004  int id2 = m_Nvc;
2005  int id3 = m_Nvc * 2;
2006  int id4 = m_Nvc * 3;
2007 
2008  int isite = m_arg[itask].isite;
2009  double *w2 = &v2[Nvcd * isite];
2010  double *w1 = &v1[Nvcd * isite];
2011 
2012  for (int it = 0; it < m_Mt; ++it) {
2013  for (int iz = 0; iz < m_Mz; ++iz) {
2014  for (int ixy = 0; ixy < Nxy; ++ixy) {
2015  int iv = Nvcd * (ixy + Nxy * (iz + m_Nz * it));
2016  for (int ivc = 0; ivc < m_Nvc; ++ivc) {
2017  w2[ivc + id1 + iv] = w1[ivc + id3 + iv];
2018  w2[ivc + id2 + iv] = w1[ivc + id4 + iv];
2019  w2[ivc + id3 + iv] = w1[ivc + id1 + iv];
2020  w2[ivc + id4 + iv] = w1[ivc + id2 + iv];
2021  }
2022  }
2023  }
2024  }
2025 
2026 }
2027 
2028 //====================================================================
2030  double *v2, double *v1)
2031 {
2032  int Nvcd = m_Nvc * m_Nd;
2033  int Nxy = m_Nx * m_Ny;
2034 
2035  int id1 = 0;
2036  int id2 = m_Nvc;
2037  int id3 = m_Nvc * 2;
2038  int id4 = m_Nvc * 3;
2039 
2040  int isite = m_arg[itask].isite;
2041  double *w2 = &v2[Nvcd * isite];
2042  double *w1 = &v1[Nvcd * isite];
2043 
2044  for (int it = 0; it < m_Mt; ++it) {
2045  for (int iz = 0; iz < m_Mz; ++iz) {
2046  for (int ixy = 0; ixy < Nxy; ++ixy) {
2047  int iv = Nvcd * (ixy + Nxy * (iz + m_Nz * it));
2048  for (int ivc = 0; ivc < m_Nvc; ++ivc) {
2049  w2[ivc + id1 + iv] = w1[ivc + id1 + iv];
2050  w2[ivc + id2 + iv] = w1[ivc + id2 + iv];
2051  w2[ivc + id3 + iv] = -w1[ivc + id3 + iv];
2052  w2[ivc + id4 + iv] = -w1[ivc + id4 + iv];
2053  }
2054  }
2055  }
2056  }
2057 
2058 }
2059 
2060 //====================================================================
2061 //============================================================END=====
void mult_tpb_dirac_thread(int, double *, double *)
void mult_tm1_chiral_thread(int, double *, double *)
BridgeIO vout
Definition: bridgeIO.cpp:207
void general(const char *format,...)
Definition: bridgeIO.cpp:38
void mult_xp1_thread(int, double *, double *)
void daypx_thread(int, double *, double, double *)
void mult_xm2_thread(int, double *, double *)
void mult_zm2_thread(int, double *, double *)
void mult_xp2_thread(int, double *, double *)
std::valarray< Channel * > m_fw_recv
void mult_ymb_thread(int, double *, double *)
std::valarray< Channel * > m_bw_send
void mult_tm2_chiral_thread(int, double *, double *)
void gm5_dirac_thread(int, double *, double *)
void mult_zmb_thread(int, double *, double *)
void mult_tm2_dirac_thread(int, double *, double *)
std::valarray< Channel * > m_bw_recv
void mult_zp1_thread(int, double *, double *)
void mult_ym1_thread(int, double *, double *)
void mult_yp2_thread(int, double *, double *)
SU(N) gauge field.
Definition: field_G.h:36
void mult_zp2_thread(int, double *, double *)
void mult_tm1_dirac_thread(int, double *, double *)
void mult_tp1_chiral_thread(int, double *, double *)
void mult_xmb_thread(int, double *, double *)
void mult_xm1_thread(int, double *, double *)
static int get_num_threads_available()
returns number of threads (works outside of parallel region).
void mult_tp1_dirac_thread(int, double *, double *)
void mult_zm1_thread(int, double *, double *)
void mult_xpb_thread(int, double *, double *)
void mult_tmb_dirac_thread(int, double *, double *)
void mult_tp2_chiral_thread(int, double *, double *)
void mult_ypb_thread(int, double *, double *)
void mult_tp2_dirac_thread(int, double *, double *)
void gm5_chiral_thread(int, double *, double *)
void mult_zpb_thread(int, double *, double *)
void mult_ym2_thread(int, double *, double *)
void mult_tpb_chiral_thread(int, double *, double *)
void mult_yp1_thread(int, double *, double *)
void mult_tmb_chiral_thread(int, double *, double *)
std::valarray< Channel * > m_fw_send