Bridge++  Ver. 1.3.x
fopr_Wilson_eo_impl_thread.cpp
Go to the documentation of this file.
1 
14 #include "fopr_Wilson_eo_impl.h"
15 
16 #include "bridgeIO.h"
17 using Bridge::vout;
18 
19 #include "threadManager_OpenMP.h"
20 
21 
22 #if defined USE_GROUP_SU3
23 #include "fopr_Wilson_impl_SU3.inc"
24 #elif defined USE_GROUP_SU2
25 #include "fopr_Wilson_impl_SU2.inc"
26 #elif defined USE_GROUP_SU_N
27 #include "fopr_Wilson_impl_SU_N.inc"
28 #endif
29 
30 //====================================================================
32 {
34 
35  // The following setup corresponds to unifirm division of volume.
36  if (m_Nthread <= m_Nt) {
38  } else if (m_Nthread <= m_Nz * m_Nt) {
39  m_Ntask_t = m_Nt;
40  } else {
41  vout.crucial(m_vl, " Too large Nthread: %d\n", m_Nthread);
42  exit(EXIT_FAILURE);
43  }
45  if (m_Ntask_z * m_Ntask_t != m_Nthread) {
46  vout.crucial(m_vl, " Nz(%d) and Nt(%d) do not mach Nthread: %d\n",
47  m_Nz, m_Nt, m_Nthread);
48  exit(EXIT_FAILURE);
49  }
51  m_Mz = m_Nz / m_Ntask_z;
52  m_Mt = m_Nt / m_Ntask_t;
53 
54  vout.general(m_vl, " Nthread = %d\n", m_Nthread);
55  vout.general(m_vl, " Ntask = %d\n", m_Ntask);
56  vout.general(m_vl, " Ntask_z = %d Ntask_t = %d\n", m_Ntask_z, m_Ntask_t);
57  vout.general(m_vl, " Mz = %d Mt = %d\n", m_Mz, m_Mt);
58 
59  // setup of arguments
60  int Nxy2 = m_Nx2 * m_Ny;
61  m_arg.resize(m_Ntask);
62  for (int ith_t = 0; ith_t < m_Ntask_t; ++ith_t) {
63  for (int ith_z = 0; ith_z < m_Ntask_z; ++ith_z) {
64  int itask = ith_z + m_Ntask_z * ith_t;
65 
66  m_arg[itask].isite = (ith_z * m_Mz + ith_t * (m_Nz * m_Mt)) * Nxy2;
67 
68  m_arg[itask].kt0 = 0;
69  m_arg[itask].kt1 = 0;
70  m_arg[itask].kz0 = 0;
71  m_arg[itask].kz1 = 0;
72  if (ith_t == 0) m_arg[itask].kt0 = 1;
73  if (ith_z == 0) m_arg[itask].kz0 = 1;
74  if (ith_t == m_Ntask_t - 1) m_arg[itask].kt1 = 1;
75  if (ith_z == m_Ntask_z - 1) m_arg[itask].kz1 = 1;
76 
77  m_arg[itask].isite_cpx = itask * m_Mz * m_Mt * (m_Ny / 2);
78  m_arg[itask].isite_cpy = itask * m_Mz * m_Mt * m_Nx2;
79  m_arg[itask].isite_cpz = ith_t * m_Mt * Nxy2;
80  m_arg[itask].isite_cpt = ith_z * m_Mz * Nxy2;
81  }
82  }
83 
84  // setup for async data transfer
85  int Nc = CommonParameters::Nc();
86  int Nd = CommonParameters::Nd();
87  int Nvcd2 = 2 * Nc * Nd / 2;
88 
89  std::vector<int> destid(m_Ntask);
90  std::vector<int> offset(m_Ntask);
91  std::vector<int> datasize(m_Ntask);
92  std::vector<int> offset_up(m_Ntask);
93  std::vector<int> offset_lw(m_Ntask);
94  std::vector<int> datasize_up(m_Ntask);
95  std::vector<int> datasize_lw(m_Ntask);
96 
97  int imu = 0;
98  for (int ith_t = 0; ith_t < m_Ntask_t; ++ith_t) {
99  for (int ith_z = 0; ith_z < m_Ntask_z; ++ith_z) {
100  int itask = ith_z + ith_t * m_Ntask_z;
101  int isite_cp = itask * m_Mz * m_Mt * (m_Ny / 2);
102  destid[itask] = itask;
103  offset[itask] = sizeof(double) * Nvcd2 * isite_cp;
104  datasize[itask] = sizeof(double) * Nvcd2 * m_Mz * m_Mt * (m_Ny / 2);
105  }
106  }
107  m_bw_send[imu]->set_thread(m_Ntask, destid, offset, datasize);
108  m_fw_send[imu]->set_thread(m_Ntask, destid, offset, datasize);
109  m_bw_recv[imu]->set_thread(m_Ntask, destid, offset, datasize);
110  m_fw_recv[imu]->set_thread(m_Ntask, destid, offset, datasize);
111 
112  imu = 1;
113  for (int ith_t = 0; ith_t < m_Ntask_t; ++ith_t) {
114  for (int ith_z = 0; ith_z < m_Ntask_z; ++ith_z) {
115  int itask = ith_z + ith_t * m_Ntask_z;
116  int isite_cp = itask * m_Mz * m_Mt * m_Nx2;
117  destid[itask] = itask;
118  offset[itask] = sizeof(double) * Nvcd2 * isite_cp;
119  datasize[itask] = sizeof(double) * Nvcd2 * m_Mz * m_Mt * m_Nx2;
120  }
121  }
122  m_bw_send[imu]->set_thread(m_Ntask, destid, offset, datasize);
123  m_fw_send[imu]->set_thread(m_Ntask, destid, offset, datasize);
124  m_bw_recv[imu]->set_thread(m_Ntask, destid, offset, datasize);
125  m_fw_recv[imu]->set_thread(m_Ntask, destid, offset, datasize);
126 
127  imu = 2;
128  for (int ith_t = 0; ith_t < m_Ntask_t; ++ith_t) {
129  for (int ith_z = 0; ith_z < m_Ntask_z; ++ith_z) {
130  int itask = ith_z + m_Ntask_z * ith_t;
131  destid[itask] = -1;
132  offset_up[itask] = 0;
133  offset_lw[itask] = 0;
134  datasize_up[itask] = 0;
135  datasize_lw[itask] = 0;
136  if (ith_z == 0) {
137  destid[itask] = (m_Ntask_z - 1) + ith_t * m_Ntask_z;
138  offset_lw[itask] = sizeof(double) * Nvcd2 * ith_t * m_Mt * m_Nx2 * m_Ny;
139  datasize_lw[itask] = sizeof(double) * Nvcd2 * m_Mt * m_Nx2 * m_Ny;
140  }
141  if (ith_z == m_Ntask_z - 1) {
142  destid[itask] = ith_t * m_Ntask_z;
143  offset_up[itask] = sizeof(double) * Nvcd2 * ith_t * m_Mt * m_Nx2 * m_Ny;
144  datasize_up[itask] = sizeof(double) * Nvcd2 * m_Mt * m_Nx2 * m_Ny;
145  }
146  }
147  }
148  m_bw_send[imu]->set_thread(m_Ntask, destid, offset_lw, datasize_lw);
149  m_bw_recv[imu]->set_thread(m_Ntask, destid, offset_up, datasize_up);
150  m_fw_send[imu]->set_thread(m_Ntask, destid, offset_up, datasize_up);
151  m_fw_recv[imu]->set_thread(m_Ntask, destid, offset_lw, datasize_lw);
152 
153  imu = 3;
154  for (int ith_t = 0; ith_t < m_Ntask_t; ++ith_t) {
155  for (int ith_z = 0; ith_z < m_Ntask_z; ++ith_z) {
156  int itask = ith_z + m_Ntask_z * ith_t;
157  destid[itask] = -1;
158  offset_up[itask] = 0;
159  offset_lw[itask] = 0;
160  datasize_up[itask] = 0;
161  datasize_lw[itask] = 0;
162  if (ith_t == 0) {
163  destid[itask] = ith_z + (m_Ntask_t - 1) * m_Ntask_z;
164  offset_lw[itask] = sizeof(double) * Nvcd2 * ith_z * m_Mz * m_Nx2 * m_Ny;
165  datasize_lw[itask] = sizeof(double) * Nvcd2 * m_Mz * m_Nx2 * m_Ny;
166  }
167  if (ith_t == m_Ntask_t - 1) {
168  destid[itask] = ith_z;
169  offset_up[itask] = sizeof(double) * Nvcd2 * ith_z * m_Mz * m_Nx2 * m_Ny;
170  datasize_up[itask] = sizeof(double) * Nvcd2 * m_Mz * m_Nx2 * m_Ny;
171  }
172  }
173  }
174  m_bw_send[imu]->set_thread(m_Ntask, destid, offset_lw, datasize_lw);
175  m_bw_recv[imu]->set_thread(m_Ntask, destid, offset_up, datasize_up);
176  m_fw_send[imu]->set_thread(m_Ntask, destid, offset_up, datasize_up);
177  m_fw_recv[imu]->set_thread(m_Ntask, destid, offset_lw, datasize_lw);
178 }
179 
180 
181 //====================================================================
183  double *w, double fac)
184 {
185  int Nvcd = m_Nvc * m_Nd;
186  int Nvxy = Nvcd * m_Nx2 * m_Ny;
187 
188  int isite = m_arg[itask].isite;
189  double *wp = &w[Nvcd * isite];
190 
191  for (int it = 0; it < m_Mt; ++it) {
192  for (int iz = 0; iz < m_Mz; ++iz) {
193  for (int ivxy = 0; ivxy < Nvxy; ++ivxy) {
194  int iv = ivxy + Nvxy * (iz + m_Nz * it);
195  wp[iv] = fac * wp[iv];
196  }
197  }
198  }
199 }
200 
201 
202 //====================================================================
204  double *v)
205 {
206  int Nvcd = m_Nvc * m_Nd;
207  int Nvxy = Nvcd * m_Nx2 * m_Ny;
208 
209  int isite = m_arg[itask].isite;
210  double *wp = &v[Nvcd * isite];
211 
212  for (int it = 0; it < m_Mt; ++it) {
213  for (int iz = 0; iz < m_Mz; ++iz) {
214  for (int ivxy = 0; ivxy < Nvxy; ++ivxy) {
215  int iv = ivxy + Nvxy * (iz + m_Nz * it);
216  wp[iv] = 0.0;
217  }
218  }
219  }
220 }
221 
222 
223 //====================================================================
225  int itask, double *vcp1, const double *v1, int ieo)
226 {
227  int Nvc2 = 2 * m_Nvc;
228  int Nvcd = m_Nvc * m_Nd;
229  int Nvcd2 = Nvcd / 2;
230 
231  int id1 = 0;
232  int id2 = m_Nvc;
233  int id3 = m_Nvc * 2;
234  int id4 = m_Nvc * 3;
235 
236  int idir = 0;
237 
238  int isite = m_arg[itask].isite;
239  int isite_cp = m_arg[itask].isite_cpx;
240  int iyzt0 = isite / m_Nx2;
241 
242  // double* w2 = &vcp1[Nvcd2*isite_cp];
243  double *w2
244  = (double *)m_bw_send[idir]->ptr(sizeof(double) * Nvcd2 * isite_cp);
245  const double *w1 = &v1[Nvcd * isite];
246 
247  double bc2 = m_boundary2[idir];
248 
249  int ix = 0;
250  int ibf = 0;
251 
252  for (int it = 0; it < m_Mt; ++it) {
253  for (int iz = 0; iz < m_Mz; ++iz) {
254  for (int iy = 0; iy < m_Ny; ++iy) {
255  int iyzt = iy + m_Ny * (iz + m_Nz * it);
256  int Leo = ieo + (1 - 2 * ieo) * m_Leo[iyzt0 + iyzt];
257  if (Leo == 1) {
258  int is = ix + m_Nx2 * iyzt;
259  int in = Nvcd * is;
260 
261  int ix1 = Nvc2 * ibf;
262  int ix2 = ix1 + m_Nvc;
263 
264  for (int ic = 0; ic < m_Nc; ++ic) {
265  w2[2 * ic + ix1] = bc2 * (w1[2 * ic + id1 + in] - w1[2 * ic + 1 + id4 + in]);
266  w2[2 * ic + 1 + ix1] = bc2 * (w1[2 * ic + 1 + id1 + in] + w1[2 * ic + id4 + in]);
267  w2[2 * ic + ix2] = bc2 * (w1[2 * ic + id2 + in] - w1[2 * ic + 1 + id3 + in]);
268  w2[2 * ic + 1 + ix2] = bc2 * (w1[2 * ic + 1 + id2 + in] + w1[2 * ic + id3 + in]);
269  }
270  ++ibf;
271  }
272  }
273  }
274  }
275 
276  m_bw_send[idir]->start_thread(itask);
277 }
278 
279 
280 //====================================================================
282  int itask, double *v2, const double *vcp2, int ieo)
283 {
284  int Nvc2 = 2 * m_Nvc;
285  int Nvcd = m_Nvc * m_Nd;
286  int Nvcd2 = Nvcd / 2;
287 
288  int id1 = 0;
289  int id2 = m_Nvc;
290  int id3 = m_Nvc * 2;
291  int id4 = m_Nvc * 3;
292 
293  int idir = 0;
294 
295  double wt1r, wt1i, wt2r, wt2i;
296 
297  int isite = m_arg[itask].isite;
298  int isite_cp = m_arg[itask].isite_cpx;
299  int iyzt0 = isite / m_Nx2;
300 
301  double *w2 = &v2[Nvcd * isite];
302  // double* w1 = &vcp2[Nvcd2*isite_cp];
303  const double *w1
304  = (double *)m_bw_recv[idir]->ptr(sizeof(double) * Nvcd2 * isite_cp);
305  const double *u = m_U->ptr(m_Ndf * (isite + ieo * m_Nvol / 2 + idir * m_Nvol));
306 
307  m_bw_recv[idir]->wait_thread(itask);
308 
309  int ix = m_Nx2 - 1;
310  int ibf = 0;
311  for (int it = 0; it < m_Mt; ++it) {
312  for (int iz = 0; iz < m_Mz; ++iz) {
313  for (int iy = 0; iy < m_Ny; ++iy) {
314  int iyzt = iy + m_Ny * (iz + m_Nz * it);
315  int Leo = ieo + (1 - 2 * ieo) * m_Leo[iyzt0 + iyzt];
316 
317  if (Leo == 1) {
318  int is = ix + m_Nx2 * iyzt;
319  int iv = Nvcd * is;
320  int ig = m_Ndf * is;
321  int ix1 = Nvc2 * ibf;
322  int ix2 = ix1 + m_Nvc;
323 
324  for (int ic = 0; ic < m_Nc; ++ic) {
325  int ic2 = ic * m_Nvc;
326  wt1r = mult_uv_r(&u[ic2 + ig], &w1[ix1], m_Nc);
327  wt1i = mult_uv_i(&u[ic2 + ig], &w1[ix1], m_Nc);
328  wt2r = mult_uv_r(&u[ic2 + ig], &w1[ix2], m_Nc);
329  wt2i = mult_uv_i(&u[ic2 + ig], &w1[ix2], m_Nc);
330  w2[2 * ic + id1 + iv] += wt1r;
331  w2[2 * ic + 1 + id1 + iv] += wt1i;
332  w2[2 * ic + id2 + iv] += wt2r;
333  w2[2 * ic + 1 + id2 + iv] += wt2i;
334  w2[2 * ic + id3 + iv] += wt2i;
335  w2[2 * ic + 1 + id3 + iv] += -wt2r;
336  w2[2 * ic + id4 + iv] += wt1i;
337  w2[2 * ic + 1 + id4 + iv] += -wt1r;
338  }
339  ++ibf;
340  }
341  }
342  }
343  }
344 }
345 
346 
347 //====================================================================
349  int itask, double *v2, const double *v1, int ieo)
350 {
351  int Nvcd = m_Nvc * m_Nd;
352 
353  int id1 = 0;
354  int id2 = m_Nvc;
355  int id3 = m_Nvc * 2;
356  int id4 = m_Nvc * 3;
357 
358  int idir = 0;
359 
360  double vt1[m_Nvc], vt2[m_Nvc];
361  double wt1r, wt1i, wt2r, wt2i;
362 
363  int isite = m_arg[itask].isite;
364  int iyzt0 = isite / m_Nx2;
365 
366  double *w2 = &v2[Nvcd * isite];
367  const double *w1 = &v1[Nvcd * isite];
368  const double *u = m_U->ptr(m_Ndf * (isite + ieo * m_Nvol / 2 + idir * m_Nvol));
369 
370  for (int it = 0; it < m_Mt; ++it) {
371  for (int iz = 0; iz < m_Mz; ++iz) {
372  for (int iy = 0; iy < m_Ny; ++iy) {
373  int iyzt = iy + m_Ny * (iz + m_Nz * it);
374  int Leo = ieo + (1 - 2 * ieo) * m_Leo[iyzt0 + iyzt];
375  for (int ix = 0; ix < m_Nx2 - Leo; ++ix) {
376  int is = ix + m_Nx2 * iyzt;
377  int iv = Nvcd * is;
378  int in = Nvcd * (is + Leo);
379  int ig = m_Ndf * is;
380 
381  for (int ic = 0; ic < m_Nc; ++ic) {
382  vt1[2 * ic] = w1[2 * ic + id1 + in] - w1[2 * ic + 1 + id4 + in];
383  vt1[2 * ic + 1] = w1[2 * ic + 1 + id1 + in] + w1[2 * ic + id4 + in];
384  vt2[2 * ic] = w1[2 * ic + id2 + in] - w1[2 * ic + 1 + id3 + in];
385  vt2[2 * ic + 1] = w1[2 * ic + 1 + id2 + in] + w1[2 * ic + id3 + in];
386  }
387 
388  for (int ic = 0; ic < m_Nc; ++ic) {
389  int ic2 = ic * m_Nvc;
390 
391  wt1r = mult_uv_r(&u[ic2 + ig], vt1, m_Nc);
392  wt1i = mult_uv_i(&u[ic2 + ig], vt1, m_Nc);
393  wt2r = mult_uv_r(&u[ic2 + ig], vt2, m_Nc);
394  wt2i = mult_uv_i(&u[ic2 + ig], vt2, m_Nc);
395 
396  w2[2 * ic + id1 + iv] += wt1r;
397  w2[2 * ic + 1 + id1 + iv] += wt1i;
398  w2[2 * ic + id2 + iv] += wt2r;
399  w2[2 * ic + 1 + id2 + iv] += wt2i;
400  w2[2 * ic + id3 + iv] += wt2i;
401  w2[2 * ic + 1 + id3 + iv] += -wt2r;
402  w2[2 * ic + id4 + iv] += wt1i;
403  w2[2 * ic + 1 + id4 + iv] += -wt1r;
404  }
405  }
406  }
407  }
408  }
409 }
410 
411 
412 //====================================================================
414  int itask, double *vcp1, const double *v1, int ieo)
415 {
416  int Nvc2 = 2 * m_Nvc;
417  int Nvcd = m_Nvc * m_Nd;
418  int Nvcd2 = Nvcd / 2;
419 
420  int id1 = 0;
421  int id2 = m_Nvc;
422  int id3 = m_Nvc * 2;
423  int id4 = m_Nvc * 3;
424 
425  int idir = 0;
426 
427  int isite = m_arg[itask].isite;
428  int isite_cp = m_arg[itask].isite_cpx;
429  int iyzt0 = isite / m_Nx2;
430 
431  // double* w2 = &vcp1[Nvcd2*isite_cp];
432  double *w2
433  = (double *)m_fw_send[idir]->ptr(sizeof(double) * Nvcd2 * isite_cp);
434  const double *w1 = &v1[Nvcd * isite];
435  const double *u = m_U->ptr(m_Ndf * (isite + (1 - ieo) * m_Nvol / 2 + idir * m_Nvol));
436 
437  double vt1[m_Nvc], vt2[m_Nvc];
438 
439  int ix = m_Nx2 - 1;
440  int ibf = 0;
441 
442  for (int it = 0; it < m_Mt; ++it) {
443  for (int iz = 0; iz < m_Mz; ++iz) {
444  for (int iy = 0; iy < m_Ny; ++iy) {
445  int iyzt = iy + m_Ny * (iz + m_Nz * it);
446  int Leo = ieo + (1 - 2 * ieo) * m_Leo[iyzt0 + iyzt];
447  if (Leo == 0) {
448  int is = ix + m_Nx2 * iyzt;
449  int in = Nvcd * is;
450  int ig = m_Ndf * is;
451 
452  int ix1 = Nvc2 * ibf;
453  int ix2 = ix1 + m_Nvc;
454 
455  for (int ic = 0; ic < m_Nc; ++ic) {
456  vt1[2 * ic] = w1[2 * ic + id1 + in] + w1[2 * ic + 1 + id4 + in];
457  vt1[2 * ic + 1] = w1[2 * ic + 1 + id1 + in] - w1[2 * ic + id4 + in];
458  vt2[2 * ic] = w1[2 * ic + id2 + in] + w1[2 * ic + 1 + id3 + in];
459  vt2[2 * ic + 1] = w1[2 * ic + 1 + id2 + in] - w1[2 * ic + id3 + in];
460  }
461 
462  for (int ic = 0; ic < m_Nc; ++ic) {
463  int icr = 2 * ic;
464  w2[icr + ix1] = mult_udagv_r(&u[icr + ig], vt1, m_Nc);
465  w2[icr + 1 + ix1] = mult_udagv_i(&u[icr + ig], vt1, m_Nc);
466  w2[icr + ix2] = mult_udagv_r(&u[icr + ig], vt2, m_Nc);
467  w2[icr + 1 + ix2] = mult_udagv_i(&u[icr + ig], vt2, m_Nc);
468  }
469  ++ibf;
470  }
471  }
472  }
473  }
474 
475  m_fw_send[idir]->start_thread(itask);
476 }
477 
478 
479 //====================================================================
481  int itask, double *v2, const double *vcp2, int ieo)
482 {
483  int Nvc2 = 2 * m_Nvc;
484  int Nvcd = m_Nvc * m_Nd;
485  int Nvcd2 = Nvcd / 2;
486 
487  int id1 = 0;
488  int id2 = m_Nvc;
489  int id3 = m_Nvc * 2;
490  int id4 = m_Nvc * 3;
491 
492  int idir = 0;
493  double bc2 = m_boundary2[idir];
494 
495  double wt1r, wt1i, wt2r, wt2i;
496 
497  int isite = m_arg[itask].isite;
498  int isite_cp = m_arg[itask].isite_cpx;
499  int iyzt0 = isite / m_Nx2;
500 
501  double *w2 = &v2[Nvcd * isite];
502  // double* w1 = &vcp2[Nvcd2*isite_cp];
503  const double *w1
504  = (double *)m_fw_recv[idir]->ptr(sizeof(double) * Nvcd2 * isite_cp);
505 
506  m_fw_recv[idir]->wait_thread(itask);
507 
508  int ix = 0;
509  int ibf = 0;
510  for (int it = 0; it < m_Mt; ++it) {
511  for (int iz = 0; iz < m_Mz; ++iz) {
512  for (int iy = 0; iy < m_Ny; ++iy) {
513  int iyzt = iy + m_Ny * (iz + m_Nz * it);
514  int Leo = ieo + (1 - 2 * ieo) * m_Leo[iyzt0 + iyzt];
515  if (Leo == 0) {
516  int is = ix + m_Nx2 * iyzt;
517  int iv = Nvcd * is;
518 
519  int ix1 = Nvc2 * ibf;
520  int ix2 = ix1 + m_Nvc;
521 
522  for (int ic = 0; ic < m_Nc; ++ic) {
523  int icr = 2 * ic;
524  int ici = 2 * ic + 1;
525  w2[icr + id1 + iv] += bc2 * w1[icr + ix1];
526  w2[ici + id1 + iv] += bc2 * w1[ici + ix1];
527  w2[icr + id2 + iv] += bc2 * w1[icr + ix2];
528  w2[ici + id2 + iv] += bc2 * w1[ici + ix2];
529  w2[icr + id3 + iv] += -bc2 * w1[ici + ix2];
530  w2[ici + id3 + iv] += +bc2 * w1[icr + ix2];
531  w2[icr + id4 + iv] += -bc2 * w1[ici + ix1];
532  w2[ici + id4 + iv] += +bc2 * w1[icr + ix1];
533  }
534  ++ibf;
535  }
536  }
537  }
538  }
539 }
540 
541 
542 //====================================================================
544  int itask, double *v2, const double *v1, int ieo)
545 {
546  int Nvcd = m_Nvc * m_Nd;
547 
548  int id1 = 0;
549  int id2 = m_Nvc;
550  int id3 = m_Nvc * 2;
551  int id4 = m_Nvc * 3;
552 
553  int idir = 0;
554 
555  double vt1[m_Nvc], vt2[m_Nvc];
556  double wt1r, wt1i, wt2r, wt2i;
557 
558  int isite = m_arg[itask].isite;
559  int iyzt0 = isite / m_Nx2;
560 
561  double *w2 = &v2[Nvcd * isite];
562  const double *w1 = &v1[Nvcd * isite];
563  const double *u = m_U->ptr(m_Ndf * (isite + (1 - ieo) * m_Nvol / 2 + idir * m_Nvol));
564 
565  for (int it = 0; it < m_Mt; ++it) {
566  for (int iz = 0; iz < m_Mz; ++iz) {
567  for (int iy = 0; iy < m_Ny; ++iy) {
568  int iyzt = iy + m_Ny * (iz + m_Nz * it);
569  int Leo = ieo + (1 - 2 * ieo) * m_Leo[iyzt0 + iyzt];
570  int Meo = 1 - Leo;
571  for (int ix = Meo; ix < m_Nx2; ++ix) {
572  int is = ix + m_Nx2 * iyzt;
573  int iv = Nvcd * is;
574  int in = Nvcd * (is - Meo);
575  int ig = m_Ndf * (is - Meo);
576 
577  for (int ic = 0; ic < m_Nc; ++ic) {
578  vt1[2 * ic] = w1[2 * ic + id1 + in] + w1[2 * ic + 1 + id4 + in];
579  vt1[2 * ic + 1] = w1[2 * ic + 1 + id1 + in] - w1[2 * ic + id4 + in];
580  vt2[2 * ic] = w1[2 * ic + id2 + in] + w1[2 * ic + 1 + id3 + in];
581  vt2[2 * ic + 1] = w1[2 * ic + 1 + id2 + in] - w1[2 * ic + id3 + in];
582  }
583 
584  for (int ic = 0; ic < m_Nc; ++ic) {
585  int ic2 = 2 * ic;
586 
587  wt1r = mult_udagv_r(&u[ic2 + ig], vt1, m_Nc);
588  wt1i = mult_udagv_i(&u[ic2 + ig], vt1, m_Nc);
589  wt2r = mult_udagv_r(&u[ic2 + ig], vt2, m_Nc);
590  wt2i = mult_udagv_i(&u[ic2 + ig], vt2, m_Nc);
591 
592  w2[2 * ic + id1 + iv] += wt1r;
593  w2[2 * ic + 1 + id1 + iv] += wt1i;
594  w2[2 * ic + id2 + iv] += wt2r;
595  w2[2 * ic + 1 + id2 + iv] += wt2i;
596  w2[2 * ic + id3 + iv] += -wt2i;
597  w2[2 * ic + 1 + id3 + iv] += +wt2r;
598  w2[2 * ic + id4 + iv] += -wt1i;
599  w2[2 * ic + 1 + id4 + iv] += +wt1r;
600  }
601  }
602  }
603  }
604  }
605 }
606 
607 
608 //====================================================================
610  int itask, double *vcp1, const double *v1, int ieo)
611 {
612  int Nvc2 = 2 * m_Nvc;
613  int Nvcd = m_Nvc * m_Nd;
614  int Nvcd2 = Nvcd / 2;
615 
616  int id1 = 0;
617  int id2 = m_Nvc;
618  int id3 = m_Nvc * 2;
619  int id4 = m_Nvc * 3;
620 
621  int idir = 1;
622 
623  int isite = m_arg[itask].isite;
624  int isite_cp = m_arg[itask].isite_cpy;
625 
626  // double* w2 = &vcp1[Nvcd2*isite_cp];
627  double *w2
628  = (double *)m_bw_send[idir]->ptr(sizeof(double) * Nvcd2 * isite_cp);
629  const double *w1 = &v1[Nvcd * isite];
630 
631  double bc2 = m_boundary2[idir];
632 
633  int iy = 0;
634 
635  for (int it = 0; it < m_Mt; ++it) {
636  for (int iz = 0; iz < m_Mz; ++iz) {
637  for (int ix = 0; ix < m_Nx2; ++ix) {
638  int is = ix + m_Nx2 * (iy + m_Ny * (iz + m_Nz * it));
639  int is2 = ix + m_Nx2 * (iz + m_Mz * it);
640  int in = Nvcd * is;
641  int ix1 = Nvc2 * is2;
642  int ix2 = ix1 + m_Nvc;
643 
644  for (int ic = 0; ic < m_Nc; ++ic) {
645  w2[2 * ic + ix1] = bc2 * (w1[2 * ic + id1 + in] + w1[2 * ic + id4 + in]);
646  w2[2 * ic + 1 + ix1] = bc2 * (w1[2 * ic + 1 + id1 + in] + w1[2 * ic + 1 + id4 + in]);
647  w2[2 * ic + ix2] = bc2 * (w1[2 * ic + id2 + in] - w1[2 * ic + id3 + in]);
648  w2[2 * ic + 1 + ix2] = bc2 * (w1[2 * ic + 1 + id2 + in] - w1[2 * ic + 1 + id3 + in]);
649  }
650  }
651  }
652  }
653 
654  m_bw_send[idir]->start_thread(itask);
655 }
656 
657 
658 //====================================================================
660  int itask, double *v2, const double *vcp2, int ieo)
661 {
662  int Nvc2 = 2 * m_Nvc;
663  int Nvcd = m_Nvc * m_Nd;
664  int Nvcd2 = Nvcd / 2;
665 
666  int id1 = 0;
667  int id2 = m_Nvc;
668  int id3 = m_Nvc * 2;
669  int id4 = m_Nvc * 3;
670 
671  int idir = 1;
672 
673  double wt1r, wt1i, wt2r, wt2i;
674 
675  int isite = m_arg[itask].isite;
676  int isite_cp = m_arg[itask].isite_cpy;
677 
678  double *w2 = &v2[Nvcd * isite];
679  // double* w1 = &vcp2[Nvcd2*isite_cp];
680  const double *w1
681  = (double *)m_bw_recv[idir]->ptr(sizeof(double) * Nvcd2 * isite_cp);
682  const double *u = m_U->ptr(m_Ndf * (isite + ieo * m_Nvol / 2 + idir * m_Nvol));
683 
684  m_bw_recv[idir]->wait_thread(itask);
685 
686  int iy = m_Ny - 1;
687  for (int it = 0; it < m_Mt; ++it) {
688  for (int iz = 0; iz < m_Mz; ++iz) {
689  for (int ix = 0; ix < m_Nx2; ++ix) {
690  int is = ix + m_Nx2 * (iy + m_Ny * (iz + m_Nz * it));
691  int is2 = ix + m_Nx2 * (iz + m_Mz * it);
692  int iv = Nvcd * is;
693  int ig = m_Ndf * is;
694  int ix1 = Nvc2 * is2;
695  int ix2 = ix1 + m_Nvc;
696 
697  for (int ic = 0; ic < m_Nc; ++ic) {
698  int ic2 = ic * m_Nvc;
699 
700  wt1r = mult_uv_r(&u[ic2 + ig], &w1[ix1], m_Nc);
701  wt1i = mult_uv_i(&u[ic2 + ig], &w1[ix1], m_Nc);
702  wt2r = mult_uv_r(&u[ic2 + ig], &w1[ix2], m_Nc);
703  wt2i = mult_uv_i(&u[ic2 + ig], &w1[ix2], m_Nc);
704 
705  w2[2 * ic + id1 + iv] += wt1r;
706  w2[2 * ic + 1 + id1 + iv] += wt1i;
707  w2[2 * ic + id2 + iv] += wt2r;
708  w2[2 * ic + 1 + id2 + iv] += wt2i;
709  w2[2 * ic + id3 + iv] += -wt2r;
710  w2[2 * ic + 1 + id3 + iv] += -wt2i;
711  w2[2 * ic + id4 + iv] += wt1r;
712  w2[2 * ic + 1 + id4 + iv] += wt1i;
713  }
714  }
715  }
716  }
717 }
718 
719 
720 //====================================================================
722  int itask, double *v2, const double *v1, int ieo)
723 {
724  int Nvcd = m_Nvc * m_Nd;
725 
726  int id1 = 0;
727  int id2 = m_Nvc;
728  int id3 = m_Nvc * 2;
729  int id4 = m_Nvc * 3;
730 
731  int idir = 1;
732 
733  double vt1[m_Nvc], vt2[m_Nvc];
734  double wt1r, wt1i, wt2r, wt2i;
735 
736  int isite = m_arg[itask].isite;
737 
738  double *w2 = &v2[Nvcd * isite];
739  const double *w1 = &v1[Nvcd * isite];
740  const double *u = m_U->ptr(m_Ndf * (isite + ieo * m_Nvol / 2 + idir * m_Nvol));
741 
742  for (int it = 0; it < m_Mt; ++it) {
743  for (int iz = 0; iz < m_Mz; ++iz) {
744  for (int iy = 0; iy < m_Ny - 1; ++iy) {
745  for (int ix = 0; ix < m_Nx2; ++ix) {
746  int is = ix + m_Nx2 * (iy + m_Ny * (iz + m_Nz * it));
747  int iv = Nvcd * is;
748  int in = Nvcd * (is + m_Nx2);
749  int ig = m_Ndf * is;
750 
751  for (int ic = 0; ic < m_Nc; ++ic) {
752  vt1[2 * ic] = w1[2 * ic + id1 + in] + w1[2 * ic + id4 + in];
753  vt1[2 * ic + 1] = w1[2 * ic + 1 + id1 + in] + w1[2 * ic + 1 + id4 + in];
754  vt2[2 * ic] = w1[2 * ic + id2 + in] - w1[2 * ic + id3 + in];
755  vt2[2 * ic + 1] = w1[2 * ic + 1 + id2 + in] - w1[2 * ic + 1 + id3 + in];
756  }
757 
758  for (int ic = 0; ic < m_Nc; ++ic) {
759  int ic2 = ic * m_Nvc;
760 
761  wt1r = mult_uv_r(&u[ic2 + ig], vt1, m_Nc);
762  wt1i = mult_uv_i(&u[ic2 + ig], vt1, m_Nc);
763  wt2r = mult_uv_r(&u[ic2 + ig], vt2, m_Nc);
764  wt2i = mult_uv_i(&u[ic2 + ig], vt2, m_Nc);
765 
766  w2[2 * ic + id1 + iv] += wt1r;
767  w2[2 * ic + 1 + id1 + iv] += wt1i;
768  w2[2 * ic + id2 + iv] += wt2r;
769  w2[2 * ic + 1 + id2 + iv] += wt2i;
770  w2[2 * ic + id3 + iv] += -wt2r;
771  w2[2 * ic + 1 + id3 + iv] += -wt2i;
772  w2[2 * ic + id4 + iv] += wt1r;
773  w2[2 * ic + 1 + id4 + iv] += wt1i;
774  }
775  }
776  }
777  }
778  }
779 }
780 
781 
782 //====================================================================
784  int itask, double *vcp1, const double *v1, int ieo)
785 {
786  int Nvc2 = 2 * m_Nvc;
787  int Nvcd = m_Nvc * m_Nd;
788  int Nvcd2 = Nvcd / 2;
789 
790  int id1 = 0;
791  int id2 = m_Nvc;
792  int id3 = m_Nvc * 2;
793  int id4 = m_Nvc * 3;
794 
795  int idir = 1;
796 
797  int isite = m_arg[itask].isite;
798  int isite_cp = m_arg[itask].isite_cpy;
799 
800  // double* w2 = &vcp1[Nvcd2*isite_cp];
801  double *w2
802  = (double *)m_fw_send[idir]->ptr(sizeof(double) * Nvcd2 * isite_cp);
803  const double *w1 = &v1[Nvcd * isite];
804  const double *u = m_U->ptr(m_Ndf * (isite + (1 - ieo) * m_Nvol / 2 + idir * m_Nvol));
805 
806  double vt1[m_Nvc], vt2[m_Nvc];
807 
808  int iy = m_Ny - 1;
809 
810  for (int it = 0; it < m_Mt; ++it) {
811  for (int iz = 0; iz < m_Mz; ++iz) {
812  for (int ix = 0; ix < m_Nx2; ++ix) {
813  int is = ix + m_Nx2 * (iy + m_Ny * (iz + m_Nz * it));
814  int is2 = ix + m_Nx2 * (iz + m_Mz * it);
815  int in = Nvcd * is;
816  int ig = m_Ndf * is;
817  int ix1 = Nvc2 * is2;
818  int ix2 = ix1 + m_Nvc;
819 
820  for (int ic = 0; ic < m_Nc; ++ic) {
821  vt1[2 * ic] = w1[2 * ic + id1 + in] - w1[2 * ic + id4 + in];
822  vt1[2 * ic + 1] = w1[2 * ic + 1 + id1 + in] - w1[2 * ic + 1 + id4 + in];
823  vt2[2 * ic] = w1[2 * ic + id2 + in] + w1[2 * ic + id3 + in];
824  vt2[2 * ic + 1] = w1[2 * ic + 1 + id2 + in] + w1[2 * ic + 1 + id3 + in];
825  }
826 
827  for (int ic = 0; ic < m_Nc; ++ic) {
828  int icr = 2 * ic;
829  w2[icr + ix1] = mult_udagv_r(&u[icr + ig], vt1, m_Nc);
830  w2[icr + 1 + ix1] = mult_udagv_i(&u[icr + ig], vt1, m_Nc);
831  w2[icr + ix2] = mult_udagv_r(&u[icr + ig], vt2, m_Nc);
832  w2[icr + 1 + ix2] = mult_udagv_i(&u[icr + ig], vt2, m_Nc);
833  }
834  }
835  }
836  }
837 
838  m_fw_send[idir]->start_thread(itask);
839 }
840 
841 
842 //====================================================================
844  int itask, double *v2, const double *vcp2, int ieo)
845 {
846  int Nvc2 = 2 * m_Nvc;
847  int Nvcd = m_Nvc * m_Nd;
848  int Nvcd2 = Nvcd / 2;
849 
850  int id1 = 0;
851  int id2 = m_Nvc;
852  int id3 = m_Nvc * 2;
853  int id4 = m_Nvc * 3;
854 
855  int idir = 1;
856  double bc2 = m_boundary2[idir];
857 
858  double wt1r, wt1i, wt2r, wt2i;
859 
860  int isite = m_arg[itask].isite;
861  int isite_cp = m_arg[itask].isite_cpy;
862 
863  double *w2 = &v2[Nvcd * isite];
864  // double* w1 = &vcp2[Nvcd2*isite_cp];
865  const double *w1
866  = (double *)m_fw_recv[idir]->ptr(sizeof(double) * Nvcd2 * isite_cp);
867 
868  m_fw_recv[idir]->wait_thread(itask);
869 
870  int iy = 0;
871  for (int it = 0; it < m_Mt; ++it) {
872  for (int iz = 0; iz < m_Mz; ++iz) {
873  for (int ix = 0; ix < m_Nx2; ++ix) {
874  int is = ix + m_Nx2 * (iy + m_Ny * (iz + m_Nz * it));
875  int is2 = ix + m_Nx2 * (iz + m_Mz * it);
876  int iv = Nvcd * is;
877  int ix1 = Nvc2 * is2;
878  int ix2 = ix1 + m_Nvc;
879 
880  for (int ic = 0; ic < m_Nc; ++ic) {
881  int icr = 2 * ic;
882  int ici = 2 * ic + 1;
883  w2[icr + id1 + iv] += bc2 * w1[icr + ix1];
884  w2[ici + id1 + iv] += bc2 * w1[ici + ix1];
885  w2[icr + id2 + iv] += bc2 * w1[icr + ix2];
886  w2[ici + id2 + iv] += bc2 * w1[ici + ix2];
887  w2[icr + id3 + iv] += bc2 * w1[icr + ix2];
888  w2[ici + id3 + iv] += bc2 * w1[ici + ix2];
889  w2[icr + id4 + iv] += -bc2 * w1[icr + ix1];
890  w2[ici + id4 + iv] += -bc2 * w1[ici + ix1];
891  }
892  }
893  }
894  }
895 }
896 
897 
898 //====================================================================
900  int itask, double *v2, const double *v1, int ieo)
901 {
902  int Nvcd = m_Nvc * m_Nd;
903 
904  int id1 = 0;
905  int id2 = m_Nvc;
906  int id3 = m_Nvc * 2;
907  int id4 = m_Nvc * 3;
908 
909  int idir = 1;
910 
911  double vt1[m_Nvc], vt2[m_Nvc];
912  double wt1r, wt1i, wt2r, wt2i;
913 
914  int isite = m_arg[itask].isite;
915 
916  double *w2 = &v2[Nvcd * isite];
917  const double *w1 = &v1[Nvcd * isite];
918  const double *u = m_U->ptr(m_Ndf * (isite + (1 - ieo) * m_Nvol / 2 + idir * m_Nvol));
919 
920  for (int it = 0; it < m_Mt; ++it) {
921  for (int iz = 0; iz < m_Mz; ++iz) {
922  for (int iy = 1; iy < m_Ny; ++iy) {
923  for (int ix = 0; ix < m_Nx2; ++ix) {
924  int is = ix + m_Nx2 * (iy + m_Ny * (iz + m_Nz * it));
925  int iv = Nvcd * is;
926  int in = Nvcd * (is - m_Nx2);
927  int ig = m_Ndf * (is - m_Nx2);
928 
929  for (int ic = 0; ic < m_Nc; ++ic) {
930  vt1[2 * ic] = w1[2 * ic + id1 + in] - w1[2 * ic + id4 + in];
931  vt1[2 * ic + 1] = w1[2 * ic + 1 + id1 + in] - w1[2 * ic + 1 + id4 + in];
932  vt2[2 * ic] = w1[2 * ic + id2 + in] + w1[2 * ic + id3 + in];
933  vt2[2 * ic + 1] = w1[2 * ic + 1 + id2 + in] + w1[2 * ic + 1 + id3 + in];
934  }
935 
936  for (int ic = 0; ic < m_Nc; ++ic) {
937  int ic2 = 2 * ic;
938  wt1r = mult_udagv_r(&u[ic2 + ig], vt1, m_Nc);
939  wt1i = mult_udagv_i(&u[ic2 + ig], vt1, m_Nc);
940  wt2r = mult_udagv_r(&u[ic2 + ig], vt2, m_Nc);
941  wt2i = mult_udagv_i(&u[ic2 + ig], vt2, m_Nc);
942 
943  w2[ic2 + id1 + iv] += wt1r;
944  w2[ic2 + 1 + id1 + iv] += wt1i;
945  w2[ic2 + id2 + iv] += wt2r;
946  w2[ic2 + 1 + id2 + iv] += wt2i;
947  w2[ic2 + id3 + iv] += wt2r;
948  w2[ic2 + 1 + id3 + iv] += wt2i;
949  w2[ic2 + id4 + iv] += -wt1r;
950  w2[ic2 + 1 + id4 + iv] += -wt1i;
951  }
952  }
953  }
954  }
955  }
956 }
957 
958 
959 //====================================================================
961  int itask, double *vcp1, const double *v1, int ieo)
962 {
963  int Nvc2 = 2 * m_Nvc;
964  int Nvcd = m_Nvc * m_Nd;
965  int Nvcd2 = Nvcd / 2;
966 
967  int id1 = 0;
968  int id2 = m_Nvc;
969  int id3 = m_Nvc * 2;
970  int id4 = m_Nvc * 3;
971 
972  int idir = 2;
973 
974  int isite = m_arg[itask].isite;
975  int isite_cp = m_arg[itask].isite_cpz;
976 
977  // double* w2 = &vcp1[Nvcd2*isite_cp];
978  double *w2
979  = (double *)m_bw_send[idir]->ptr(sizeof(double) * Nvcd2 * isite_cp);
980  const double *w1 = &v1[Nvcd * isite];
981 
982  double bc2 = m_boundary2[idir];
983 
984  if (m_arg[itask].kz0 == 1) {
985  int Nxy = m_Nx2 * m_Ny;
986  int iz = 0;
987  for (int it = 0; it < m_Mt; ++it) {
988  for (int ixy = 0; ixy < Nxy; ++ixy) {
989  int is = ixy + Nxy * (iz + m_Nz * it);
990  int is2 = ixy + Nxy * it;
991 
992  int in = Nvcd * is;
993  int ix1 = Nvc2 * is2;
994  int ix2 = ix1 + m_Nvc;
995 
996  for (int ic = 0; ic < m_Nc; ++ic) {
997  w2[2 * ic + ix1] = bc2 * (w1[2 * ic + id1 + in] - w1[2 * ic + 1 + id3 + in]);
998  w2[2 * ic + 1 + ix1] = bc2 * (w1[2 * ic + 1 + id1 + in] + w1[2 * ic + id3 + in]);
999  w2[2 * ic + ix2] = bc2 * (w1[2 * ic + id2 + in] + w1[2 * ic + 1 + id4 + in]);
1000  w2[2 * ic + 1 + ix2] = bc2 * (w1[2 * ic + 1 + id2 + in] - w1[2 * ic + id4 + in]);
1001  }
1002  }
1003  }
1004  }
1005 
1006  m_bw_send[idir]->start_thread(itask);
1007 }
1008 
1009 
1010 //====================================================================
1012  int itask, double *v2, const double *vcp2, int ieo)
1013 {
1014  int Nvc2 = 2 * m_Nvc;
1015  int Nvcd = m_Nvc * m_Nd;
1016  int Nvcd2 = Nvcd / 2;
1017 
1018  int id1 = 0;
1019  int id2 = m_Nvc;
1020  int id3 = m_Nvc * 2;
1021  int id4 = m_Nvc * 3;
1022 
1023  int idir = 2;
1024 
1025  double wt1r, wt1i, wt2r, wt2i;
1026 
1027  int isite = m_arg[itask].isite;
1028  int isite_cp = m_arg[itask].isite_cpz;
1029 
1030  double *w2 = &v2[Nvcd * isite];
1031  // double* w1 = &vcp2[Nvcd2*isite_cp];
1032  const double *w1
1033  = (double *)m_bw_recv[idir]->ptr(sizeof(double) * Nvcd2 * isite_cp);
1034  const double *u = m_U->ptr(m_Ndf * (isite + ieo * m_Nvol / 2 + idir * m_Nvol));
1035 
1036  m_bw_recv[idir]->wait_thread(itask);
1037 
1038  if (m_arg[itask].kz1 == 1) {
1039  int Nxy = m_Nx2 * m_Ny;
1040  int iz = m_Mz - 1;
1041  for (int it = 0; it < m_Mt; ++it) {
1042  for (int ixy = 0; ixy < Nxy; ++ixy) {
1043  int is = ixy + Nxy * (iz + m_Nz * it);
1044  int is2 = ixy + Nxy * it;
1045  int iv = Nvcd * is;
1046  int ig = m_Ndf * is;
1047  int ix1 = Nvc2 * is2;
1048  int ix2 = ix1 + m_Nvc;
1049 
1050  for (int ic = 0; ic < m_Nc; ++ic) {
1051  int ic2 = ic * m_Nvc;
1052 
1053  wt1r = mult_uv_r(&u[ic2 + ig], &w1[ix1], m_Nc);
1054  wt1i = mult_uv_i(&u[ic2 + ig], &w1[ix1], m_Nc);
1055  wt2r = mult_uv_r(&u[ic2 + ig], &w1[ix2], m_Nc);
1056  wt2i = mult_uv_i(&u[ic2 + ig], &w1[ix2], m_Nc);
1057 
1058  w2[2 * ic + id1 + iv] += wt1r;
1059  w2[2 * ic + 1 + id1 + iv] += wt1i;
1060  w2[2 * ic + id2 + iv] += wt2r;
1061  w2[2 * ic + 1 + id2 + iv] += wt2i;
1062  w2[2 * ic + id3 + iv] += wt1i;
1063  w2[2 * ic + 1 + id3 + iv] += -wt1r;
1064  w2[2 * ic + id4 + iv] += -wt2i;
1065  w2[2 * ic + 1 + id4 + iv] += wt2r;
1066  }
1067  }
1068  }
1069  }
1070 }
1071 
1072 
1073 //====================================================================
1075  int itask, double *v2, const double *v1, int ieo)
1076 {
1077  int Nvcd = m_Nvc * m_Nd;
1078 
1079  int id1 = 0;
1080  int id2 = m_Nvc;
1081  int id3 = m_Nvc * 2;
1082  int id4 = m_Nvc * 3;
1083 
1084  int idir = 2;
1085 
1086  double vt1[m_Nvc], vt2[m_Nvc];
1087  double wt1r, wt1i, wt2r, wt2i;
1088 
1089  int isite = m_arg[itask].isite;
1090 
1091  double *w2 = &v2[Nvcd * isite];
1092  const double *w1 = &v1[Nvcd * isite];
1093  const double *u = m_U->ptr(m_Ndf * (isite + ieo * m_Nvol / 2 + idir * m_Nvol));
1094 
1095  int kz1 = m_arg[itask].kz1;
1096  int Nxy = m_Nx2 * m_Ny;
1097 
1098  for (int it = 0; it < m_Mt; ++it) {
1099  for (int iz = 0; iz < m_Mz - kz1; ++iz) {
1100  for (int ixy = 0; ixy < Nxy; ++ixy) {
1101  int is = ixy + Nxy * (iz + m_Nz * it);
1102  int iv = Nvcd * is;
1103  int in = Nvcd * (is + Nxy);
1104  int ig = m_Ndf * is;
1105 
1106  for (int ic = 0; ic < m_Nc; ++ic) {
1107  vt1[2 * ic] = w1[2 * ic + id1 + in] - w1[2 * ic + 1 + id3 + in];
1108  vt1[2 * ic + 1] = w1[2 * ic + 1 + id1 + in] + w1[2 * ic + id3 + in];
1109  vt2[2 * ic] = w1[2 * ic + id2 + in] + w1[2 * ic + 1 + id4 + in];
1110  vt2[2 * ic + 1] = w1[2 * ic + 1 + id2 + in] - w1[2 * ic + id4 + in];
1111  }
1112 
1113  for (int ic = 0; ic < m_Nc; ++ic) {
1114  int ic2 = ic * m_Nvc;
1115 
1116  wt1r = mult_uv_r(&u[ic2 + ig], vt1, m_Nc);
1117  wt1i = mult_uv_i(&u[ic2 + ig], vt1, m_Nc);
1118  wt2r = mult_uv_r(&u[ic2 + ig], vt2, m_Nc);
1119  wt2i = mult_uv_i(&u[ic2 + ig], vt2, m_Nc);
1120 
1121  w2[2 * ic + id1 + iv] += wt1r;
1122  w2[2 * ic + 1 + id1 + iv] += wt1i;
1123  w2[2 * ic + id2 + iv] += wt2r;
1124  w2[2 * ic + 1 + id2 + iv] += wt2i;
1125  w2[2 * ic + id3 + iv] += wt1i;
1126  w2[2 * ic + 1 + id3 + iv] += -wt1r;
1127  w2[2 * ic + id4 + iv] += -wt2i;
1128  w2[2 * ic + 1 + id4 + iv] += wt2r;
1129  }
1130  }
1131  }
1132  }
1133 }
1134 
1135 
1136 //====================================================================
1138  int itask, double *vcp1, const double *v1, int ieo)
1139 {
1140  int Nvc2 = 2 * m_Nvc;
1141  int Nvcd = m_Nvc * m_Nd;
1142  int Nvcd2 = Nvcd / 2;
1143 
1144  int id1 = 0;
1145  int id2 = m_Nvc;
1146  int id3 = m_Nvc * 2;
1147  int id4 = m_Nvc * 3;
1148 
1149  int idir = 2;
1150 
1151  int isite = m_arg[itask].isite;
1152  int isite_cp = m_arg[itask].isite_cpz;
1153 
1154  // double* w2 = &vcp1[Nvcd2*isite_cp];
1155  double *w2
1156  = (double *)m_fw_send[idir]->ptr(sizeof(double) * Nvcd2 * isite_cp);
1157  const double *w1 = &v1[Nvcd * isite];
1158  const double *u = m_U->ptr(m_Ndf * (isite + (1 - ieo) * m_Nvol / 2 + idir * m_Nvol));
1159 
1160  double vt1[m_Nvc], vt2[m_Nvc];
1161 
1162  if (m_arg[itask].kz1 == 1) {
1163  int Nxy = m_Nx2 * m_Ny;
1164  int iz = m_Mz - 1;
1165  for (int it = 0; it < m_Mt; ++it) {
1166  for (int ixy = 0; ixy < Nxy; ++ixy) {
1167  int is = ixy + Nxy * (iz + m_Nz * it);
1168  int is2 = ixy + Nxy * it;
1169  int in = Nvcd * is;
1170  int ig = m_Ndf * is;
1171  int ix1 = Nvc2 * is2;
1172  int ix2 = ix1 + m_Nvc;
1173 
1174  for (int ic = 0; ic < m_Nc; ++ic) {
1175  vt1[2 * ic] = w1[2 * ic + id1 + in] + w1[2 * ic + 1 + id3 + in];
1176  vt1[2 * ic + 1] = w1[2 * ic + 1 + id1 + in] - w1[2 * ic + id3 + in];
1177  vt2[2 * ic] = w1[2 * ic + id2 + in] - w1[2 * ic + 1 + id4 + in];
1178  vt2[2 * ic + 1] = w1[2 * ic + 1 + id2 + in] + w1[2 * ic + id4 + in];
1179  }
1180 
1181  for (int ic = 0; ic < m_Nc; ++ic) {
1182  int icr = 2 * ic;
1183  w2[icr + ix1] = mult_udagv_r(&u[icr + ig], vt1, m_Nc);
1184  w2[icr + 1 + ix1] = mult_udagv_i(&u[icr + ig], vt1, m_Nc);
1185  w2[icr + ix2] = mult_udagv_r(&u[icr + ig], vt2, m_Nc);
1186  w2[icr + 1 + ix2] = mult_udagv_i(&u[icr + ig], vt2, m_Nc);
1187  }
1188  }
1189  }
1190  }
1191 
1192  m_fw_send[idir]->start_thread(itask);
1193 }
1194 
1195 
1196 //====================================================================
1198  int itask, double *v2, const double *vcp2, int ieo)
1199 {
1200  int Nvc2 = 2 * m_Nvc;
1201  int Nvcd = m_Nvc * m_Nd;
1202  int Nvcd2 = Nvcd / 2;
1203 
1204  int id1 = 0;
1205  int id2 = m_Nvc;
1206  int id3 = m_Nvc * 2;
1207  int id4 = m_Nvc * 3;
1208 
1209  int idir = 2;
1210  double bc2 = m_boundary2[idir];
1211 
1212  double wt1r, wt1i, wt2r, wt2i;
1213 
1214  int isite = m_arg[itask].isite;
1215  int isite_cp = m_arg[itask].isite_cpz;
1216 
1217  double *w2 = &v2[Nvcd * isite];
1218  // double* w1 = &vcp2[Nvcd2*isite_cp];
1219  const double *w1
1220  = (double *)m_fw_recv[idir]->ptr(sizeof(double) * Nvcd2 * isite_cp);
1221 
1222  m_fw_recv[idir]->wait_thread(itask);
1223 
1224  if (m_arg[itask].kz0 == 1) {
1225  int Nxy = m_Nx2 * m_Ny;
1226 
1227  int iz = 0;
1228  for (int it = 0; it < m_Mt; ++it) {
1229  for (int ixy = 0; ixy < Nxy; ++ixy) {
1230  int is = ixy + Nxy * (iz + m_Nz * it);
1231  int is2 = ixy + Nxy * it;
1232  int iv = Nvcd * is;
1233  int ix1 = Nvc2 * is2;
1234  int ix2 = ix1 + m_Nvc;
1235 
1236  for (int ic = 0; ic < m_Nc; ++ic) {
1237  int icr = 2 * ic;
1238  int ici = 2 * ic + 1;
1239  w2[icr + id1 + iv] += bc2 * w1[icr + ix1];
1240  w2[ici + id1 + iv] += bc2 * w1[ici + ix1];
1241  w2[icr + id2 + iv] += bc2 * w1[icr + ix2];
1242  w2[ici + id2 + iv] += bc2 * w1[ici + ix2];
1243  w2[icr + id3 + iv] += -bc2 * w1[ici + ix1];
1244  w2[ici + id3 + iv] += bc2 * w1[icr + ix1];
1245  w2[icr + id4 + iv] += bc2 * w1[ici + ix2];
1246  w2[ici + id4 + iv] += -bc2 * w1[icr + ix2];
1247  }
1248  }
1249  }
1250  }
1251 }
1252 
1253 
1254 //====================================================================
1256  int itask, double *v2, const double *v1, int ieo)
1257 {
1258  int Nvcd = m_Nvc * m_Nd;
1259 
1260  int id1 = 0;
1261  int id2 = m_Nvc;
1262  int id3 = m_Nvc * 2;
1263  int id4 = m_Nvc * 3;
1264 
1265  int idir = 2;
1266 
1267  double vt1[m_Nvc], vt2[m_Nvc];
1268  double wt1r, wt1i, wt2r, wt2i;
1269 
1270  int isite = m_arg[itask].isite;
1271 
1272  double *w2 = &v2[Nvcd * isite];
1273  const double *w1 = &v1[Nvcd * isite];
1274  const double *u = m_U->ptr(m_Ndf * (isite + (1 - ieo) * m_Nvol / 2 + idir * m_Nvol));
1275 
1276  int kz0 = m_arg[itask].kz0;
1277  int Nxy = m_Nx2 * m_Ny;
1278 
1279  for (int it = 0; it < m_Mt; ++it) {
1280  for (int iz = kz0; iz < m_Mz; ++iz) {
1281  for (int ixy = 0; ixy < Nxy; ++ixy) {
1282  int is = ixy + Nxy * (iz + m_Nz * it);
1283  int iv = Nvcd * is;
1284  int in = Nvcd * (is - Nxy);
1285  int ig = m_Ndf * (is - Nxy);
1286 
1287  for (int ic = 0; ic < m_Nc; ++ic) {
1288  vt1[2 * ic] = w1[2 * ic + id1 + in] + w1[2 * ic + 1 + id3 + in];
1289  vt1[2 * ic + 1] = w1[2 * ic + 1 + id1 + in] - w1[2 * ic + id3 + in];
1290  vt2[2 * ic] = w1[2 * ic + id2 + in] - w1[2 * ic + 1 + id4 + in];
1291  vt2[2 * ic + 1] = w1[2 * ic + 1 + id2 + in] + w1[2 * ic + id4 + in];
1292  }
1293 
1294  for (int ic = 0; ic < m_Nc; ++ic) {
1295  int ic2 = 2 * ic;
1296  wt1r = mult_udagv_r(&u[ic2 + ig], vt1, m_Nc);
1297  wt1i = mult_udagv_i(&u[ic2 + ig], vt1, m_Nc);
1298  wt2r = mult_udagv_r(&u[ic2 + ig], vt2, m_Nc);
1299  wt2i = mult_udagv_i(&u[ic2 + ig], vt2, m_Nc);
1300 
1301  w2[ic2 + id1 + iv] += wt1r;
1302  w2[ic2 + 1 + id1 + iv] += wt1i;
1303  w2[ic2 + id2 + iv] += wt2r;
1304  w2[ic2 + 1 + id2 + iv] += wt2i;
1305  w2[ic2 + id3 + iv] += -wt1i;
1306  w2[ic2 + 1 + id3 + iv] += wt1r;
1307  w2[ic2 + id4 + iv] += wt2i;
1308  w2[ic2 + 1 + id4 + iv] += -wt2r;
1309  }
1310  }
1311  }
1312  }
1313 }
1314 
1315 
1316 //====================================================================
1318  int itask, double *vcp1, const double *v1, int ieo)
1319 {
1320  int Nvc2 = 2 * m_Nvc;
1321  int Nvcd = m_Nvc * m_Nd;
1322  int Nvcd2 = Nvcd / 2;
1323 
1324  int id1 = 0;
1325  int id2 = m_Nvc;
1326  int id3 = m_Nvc * 2;
1327  int id4 = m_Nvc * 3;
1328 
1329  int idir = 3;
1330 
1331  int isite = m_arg[itask].isite;
1332  int isite_cp = m_arg[itask].isite_cpt;
1333 
1334  // double* w2 = &vcp1[Nvcd2*isite_cp];
1335  double *w2
1336  = (double *)m_bw_send[idir]->ptr(sizeof(double) * Nvcd2 * isite_cp);
1337  const double *w1 = &v1[Nvcd * isite];
1338 
1339  double bc2 = m_boundary2[idir];
1340 
1341  if (m_arg[itask].kt0 == 1) {
1342  int Nxy = m_Nx2 * m_Ny;
1343  int it = 0;
1344  for (int iz = 0; iz < m_Mz; ++iz) {
1345  for (int ixy = 0; ixy < Nxy; ++ixy) {
1346  int is = ixy + Nxy * (iz + m_Nz * it);
1347  int is2 = ixy + Nxy * iz;
1348 
1349  int in = Nvcd * is;
1350  int ix1 = Nvc2 * is2;
1351  int ix2 = ix1 + m_Nvc;
1352 
1353  for (int ic = 0; ic < m_Nc; ++ic) {
1354  w2[2 * ic + ix1] = 2.0 * bc2 * w1[2 * ic + id3 + in];
1355  w2[2 * ic + 1 + ix1] = 2.0 * bc2 * w1[2 * ic + 1 + id3 + in];
1356  w2[2 * ic + ix2] = 2.0 * bc2 * w1[2 * ic + id4 + in];
1357  w2[2 * ic + 1 + ix2] = 2.0 * bc2 * w1[2 * ic + 1 + id4 + in];
1358  }
1359  }
1360  }
1361  }
1362 
1363  m_bw_send[idir]->start_thread(itask);
1364 }
1365 
1366 
1367 //====================================================================
1369  int itask, double *v2, const double *vcp2, int ieo)
1370 {
1371  int Nvc2 = 2 * m_Nvc;
1372  int Nvcd = m_Nvc * m_Nd;
1373  int Nvcd2 = Nvcd / 2;
1374 
1375  int id1 = 0;
1376  int id2 = m_Nvc;
1377  int id3 = m_Nvc * 2;
1378  int id4 = m_Nvc * 3;
1379 
1380  int idir = 3;
1381 
1382  double wt1r, wt1i, wt2r, wt2i;
1383 
1384  int isite = m_arg[itask].isite;
1385  int isite_cp = m_arg[itask].isite_cpt;
1386 
1387  double *w2 = &v2[Nvcd * isite];
1388  // double* w1 = &vcp2[Nvcd2*isite_cp];
1389  const double *w1
1390  = (double *)m_bw_recv[idir]->ptr(sizeof(double) * Nvcd2 * isite_cp);
1391  const double *u = m_U->ptr(m_Ndf * (isite + ieo * m_Nvol / 2 + idir * m_Nvol));
1392 
1393  m_bw_recv[idir]->wait_thread(itask);
1394 
1395  if (m_arg[itask].kt1 == 1) {
1396  int Nxy = m_Nx2 * m_Ny;
1397  int it = m_Mt - 1;
1398  for (int iz = 0; iz < m_Mz; ++iz) {
1399  for (int ixy = 0; ixy < Nxy; ++ixy) {
1400  int is = ixy + Nxy * (iz + m_Nz * it);
1401  int is2 = ixy + Nxy * iz;
1402  int iv = Nvcd * is;
1403  int ig = m_Ndf * is;
1404  int ix1 = Nvc2 * is2;
1405  int ix2 = ix1 + m_Nvc;
1406 
1407  for (int ic = 0; ic < m_Nc; ++ic) {
1408  int ic2 = ic * m_Nvc;
1409 
1410  wt1r = mult_uv_r(&u[ic2 + ig], &w1[ix1], m_Nc);
1411  wt1i = mult_uv_i(&u[ic2 + ig], &w1[ix1], m_Nc);
1412  wt2r = mult_uv_r(&u[ic2 + ig], &w1[ix2], m_Nc);
1413  wt2i = mult_uv_i(&u[ic2 + ig], &w1[ix2], m_Nc);
1414 
1415  w2[2 * ic + id3 + iv] += wt1r;
1416  w2[2 * ic + 1 + id3 + iv] += wt1i;
1417  w2[2 * ic + id4 + iv] += wt2r;
1418  w2[2 * ic + 1 + id4 + iv] += wt2i;
1419  }
1420  }
1421  }
1422  }
1423 }
1424 
1425 
1426 //====================================================================
1428  int itask, double *v2, const double *v1, int ieo)
1429 {
1430  int Nvcd = m_Nvc * m_Nd;
1431 
1432  int id1 = 0;
1433  int id2 = m_Nvc;
1434  int id3 = m_Nvc * 2;
1435  int id4 = m_Nvc * 3;
1436 
1437  int idir = 3;
1438 
1439  double vt1[m_Nvc], vt2[m_Nvc];
1440  double wt1r, wt1i, wt2r, wt2i;
1441 
1442  int isite = m_arg[itask].isite;
1443 
1444  double *w2 = &v2[Nvcd * isite];
1445  const double *w1 = &v1[Nvcd * isite];
1446  const double *u = m_U->ptr(m_Ndf * (isite + ieo * m_Nvol / 2 + idir * m_Nvol));
1447 
1448  int kt1 = m_arg[itask].kt1;
1449  int Nxy = m_Nx2 * m_Ny;
1450  int Nxyz = Nxy * m_Nz;
1451 
1452  for (int it = 0; it < m_Mt - kt1; ++it) {
1453  for (int iz = 0; iz < m_Mz; ++iz) {
1454  for (int ixy = 0; ixy < Nxy; ++ixy) {
1455  int is = ixy + Nxy * (iz + m_Nz * it);
1456  int iv = Nvcd * is;
1457  int in = Nvcd * (is + Nxyz);
1458  int ig = m_Ndf * is;
1459 
1460  for (int ic = 0; ic < m_Nc; ++ic) {
1461  vt1[2 * ic] = 2.0 * w1[2 * ic + id3 + in];
1462  vt1[2 * ic + 1] = 2.0 * w1[2 * ic + 1 + id3 + in];
1463  vt2[2 * ic] = 2.0 * w1[2 * ic + id4 + in];
1464  vt2[2 * ic + 1] = 2.0 * w1[2 * ic + 1 + id4 + in];
1465  }
1466 
1467  for (int ic = 0; ic < m_Nc; ++ic) {
1468  int ic2 = ic * m_Nvc;
1469 
1470  wt1r = mult_uv_r(&u[ic2 + ig], vt1, m_Nc);
1471  wt1i = mult_uv_i(&u[ic2 + ig], vt1, m_Nc);
1472  wt2r = mult_uv_r(&u[ic2 + ig], vt2, m_Nc);
1473  wt2i = mult_uv_i(&u[ic2 + ig], vt2, m_Nc);
1474 
1475  w2[2 * ic + id3 + iv] += wt1r;
1476  w2[2 * ic + 1 + id3 + iv] += wt1i;
1477  w2[2 * ic + id4 + iv] += wt2r;
1478  w2[2 * ic + 1 + id4 + iv] += wt2i;
1479  }
1480  }
1481  }
1482  }
1483 }
1484 
1485 
1486 //====================================================================
1488  int itask, double *vcp1, const double *v1, int ieo)
1489 {
1490  int Nvc2 = 2 * m_Nvc;
1491  int Nvcd = m_Nvc * m_Nd;
1492  int Nvcd2 = Nvcd / 2;
1493 
1494  int id1 = 0;
1495  int id2 = m_Nvc;
1496  int id3 = m_Nvc * 2;
1497  int id4 = m_Nvc * 3;
1498 
1499  int idir = 3;
1500 
1501  int isite = m_arg[itask].isite;
1502  int isite_cp = m_arg[itask].isite_cpt;
1503 
1504  // double* w2 = &vcp1[Nvcd2*isite_cp];
1505  double *w2
1506  = (double *)m_fw_send[idir]->ptr(sizeof(double) * Nvcd2 * isite_cp);
1507  const double *w1 = &v1[Nvcd * isite];
1508  const double *u = m_U->ptr(m_Ndf * (isite + (1 - ieo) * m_Nvol / 2 + idir * m_Nvol));
1509 
1510  double vt1[m_Nvc], vt2[m_Nvc];
1511 
1512  if (m_arg[itask].kt1 == 1) {
1513  int Nxy = m_Nx2 * m_Ny;
1514  int it = m_Mt - 1;
1515  for (int iz = 0; iz < m_Mz; ++iz) {
1516  for (int ixy = 0; ixy < Nxy; ++ixy) {
1517  int is = ixy + Nxy * (iz + m_Nz * it);
1518  int is2 = ixy + Nxy * iz;
1519  int in = Nvcd * is;
1520  int ig = m_Ndf * is;
1521  int ix1 = Nvc2 * is2;
1522  int ix2 = ix1 + m_Nvc;
1523 
1524  for (int ic = 0; ic < m_Nc; ++ic) {
1525  vt1[2 * ic] = 2.0 * w1[2 * ic + id1 + in];
1526  vt1[2 * ic + 1] = 2.0 * w1[2 * ic + 1 + id1 + in];
1527  vt2[2 * ic] = 2.0 * w1[2 * ic + id2 + in];
1528  vt2[2 * ic + 1] = 2.0 * w1[2 * ic + 1 + id2 + in];
1529  }
1530 
1531  for (int ic = 0; ic < m_Nc; ++ic) {
1532  int icr = 2 * ic;
1533  w2[icr + ix1] = mult_udagv_r(&u[icr + ig], vt1, m_Nc);
1534  w2[icr + 1 + ix1] = mult_udagv_i(&u[icr + ig], vt1, m_Nc);
1535  w2[icr + ix2] = mult_udagv_r(&u[icr + ig], vt2, m_Nc);
1536  w2[icr + 1 + ix2] = mult_udagv_i(&u[icr + ig], vt2, m_Nc);
1537  }
1538  }
1539  }
1540  }
1541 
1542  m_fw_send[idir]->start_thread(itask);
1543 }
1544 
1545 
1546 //====================================================================
1548  int itask, double *v2, const double *vcp2, int ieo)
1549 {
1550  int Nvc2 = 2 * m_Nvc;
1551  int Nvcd = m_Nvc * m_Nd;
1552  int Nvcd2 = Nvcd / 2;
1553 
1554  int id1 = 0;
1555  int id2 = m_Nvc;
1556  int id3 = m_Nvc * 2;
1557  int id4 = m_Nvc * 3;
1558 
1559  int idir = 3;
1560  double bc2 = m_boundary2[idir];
1561 
1562  double wt1r, wt1i, wt2r, wt2i;
1563 
1564  int isite = m_arg[itask].isite;
1565  int isite_cp = m_arg[itask].isite_cpt;
1566 
1567  double *w2 = &v2[Nvcd * isite];
1568  // double* w1 = &vcp2[Nvcd2*isite_cp];
1569  const double *w1
1570  = (double *)m_fw_recv[idir]->ptr(sizeof(double) * Nvcd2 * isite_cp);
1571 
1572  m_fw_recv[idir]->wait_thread(itask);
1573 
1574  if (m_arg[itask].kt0 == 1) {
1575  int Nxy = m_Nx2 * m_Ny;
1576  int it = 0;
1577  for (int iz = 0; iz < m_Mz; ++iz) {
1578  for (int ixy = 0; ixy < Nxy; ++ixy) {
1579  int is = ixy + Nxy * (iz + m_Nz * it);
1580  int is2 = ixy + Nxy * iz;
1581  int iv = Nvcd * is;
1582  int ix1 = Nvc2 * is2;
1583  int ix2 = ix1 + m_Nvc;
1584 
1585  for (int ic = 0; ic < m_Nc; ++ic) {
1586  int icr = 2 * ic;
1587  int ici = 2 * ic + 1;
1588  w2[icr + id1 + iv] += bc2 * w1[icr + ix1];
1589  w2[ici + id1 + iv] += bc2 * w1[ici + ix1];
1590  w2[icr + id2 + iv] += bc2 * w1[icr + ix2];
1591  w2[ici + id2 + iv] += bc2 * w1[ici + ix2];
1592  }
1593  }
1594  }
1595  }
1596 }
1597 
1598 
1599 //====================================================================
1601  int itask, double *v2, const double *v1, int ieo)
1602 {
1603  int Nvcd = m_Nvc * m_Nd;
1604 
1605  int id1 = 0;
1606  int id2 = m_Nvc;
1607  int id3 = m_Nvc * 2;
1608  int id4 = m_Nvc * 3;
1609 
1610  int idir = 3;
1611 
1612  double vt1[m_Nvc], vt2[m_Nvc];
1613  double wt1r, wt1i, wt2r, wt2i;
1614 
1615  int isite = m_arg[itask].isite;
1616 
1617  double *w2 = &v2[Nvcd * isite];
1618  const double *w1 = &v1[Nvcd * isite];
1619  const double *u = m_U->ptr(m_Ndf * (isite + (1 - ieo) * m_Nvol / 2 + idir * m_Nvol));
1620 
1621  int kt0 = m_arg[itask].kt0;
1622  int Nxy = m_Nx2 * m_Ny;
1623  int Nxyz = Nxy * m_Nz;
1624 
1625  for (int it = kt0; it < m_Mt; ++it) {
1626  for (int iz = 0; iz < m_Mz; ++iz) {
1627  for (int ixy = 0; ixy < Nxy; ++ixy) {
1628  int is = ixy + Nxy * (iz + m_Nz * it);
1629  int iv = Nvcd * is;
1630  int in = Nvcd * (is - Nxyz);
1631  int ig = m_Ndf * (is - Nxyz);
1632 
1633  for (int ic = 0; ic < m_Nc; ++ic) {
1634  vt1[2 * ic] = 2.0 * w1[2 * ic + id1 + in];
1635  vt1[2 * ic + 1] = 2.0 * w1[2 * ic + 1 + id1 + in];
1636  vt2[2 * ic] = 2.0 * w1[2 * ic + id2 + in];
1637  vt2[2 * ic + 1] = 2.0 * w1[2 * ic + 1 + id2 + in];
1638  }
1639 
1640  for (int ic = 0; ic < m_Nc; ++ic) {
1641  int ic2 = 2 * ic;
1642  wt1r = mult_udagv_r(&u[ic2 + ig], vt1, m_Nc);
1643  wt1i = mult_udagv_i(&u[ic2 + ig], vt1, m_Nc);
1644  wt2r = mult_udagv_r(&u[ic2 + ig], vt2, m_Nc);
1645  wt2i = mult_udagv_i(&u[ic2 + ig], vt2, m_Nc);
1646 
1647  w2[ic2 + id1 + iv] += wt1r;
1648  w2[ic2 + 1 + id1 + iv] += wt1i;
1649  w2[ic2 + id2 + iv] += wt2r;
1650  w2[ic2 + 1 + id2 + iv] += wt2i;
1651  }
1652  }
1653  }
1654  }
1655 }
1656 
1657 
1658 //====================================================================
1660  int itask, double *vcp1, const double *v1, int ieo)
1661 {
1662  int Nvc2 = 2 * m_Nvc;
1663  int Nvcd = m_Nvc * m_Nd;
1664  int Nvcd2 = Nvcd / 2;
1665 
1666  int id1 = 0;
1667  int id2 = m_Nvc;
1668  int id3 = m_Nvc * 2;
1669  int id4 = m_Nvc * 3;
1670 
1671  int idir = 3;
1672 
1673  int isite = m_arg[itask].isite;
1674  int isite_cp = m_arg[itask].isite_cpt;
1675 
1676  // double* w2 = &vcp1[Nvcd2*isite_cp];
1677  double *w2
1678  = (double *)m_bw_send[idir]->ptr(sizeof(double) * Nvcd2 * isite_cp);
1679  const double *w1 = &v1[Nvcd * isite];
1680 
1681  double bc2 = m_boundary2[idir];
1682 
1683  if (m_arg[itask].kt0 == 1) {
1684  int Nxy = m_Nx2 * m_Ny;
1685  int it = 0;
1686  for (int iz = 0; iz < m_Mz; ++iz) {
1687  for (int ixy = 0; ixy < Nxy; ++ixy) {
1688  int is = ixy + Nxy * (iz + m_Nz * it);
1689  int is2 = ixy + Nxy * iz;
1690 
1691  int in = Nvcd * is;
1692  int ix1 = Nvc2 * is2;
1693  int ix2 = ix1 + m_Nvc;
1694 
1695  for (int ic = 0; ic < m_Nc; ++ic) {
1696  w2[2 * ic + ix1] = bc2 * (w1[2 * ic + id1 + in] + w1[2 * ic + id3 + in]);
1697  w2[2 * ic + 1 + ix1] = bc2 * (w1[2 * ic + 1 + id1 + in] + w1[2 * ic + 1 + id3 + in]);
1698  w2[2 * ic + ix2] = bc2 * (w1[2 * ic + id2 + in] + w1[2 * ic + id4 + in]);
1699  w2[2 * ic + 1 + ix2] = bc2 * (w1[2 * ic + 1 + id2 + in] + w1[2 * ic + 1 + id4 + in]);
1700  }
1701  }
1702  }
1703  }
1704 
1705  m_bw_send[idir]->start_thread(itask);
1706 }
1707 
1708 
1709 //====================================================================
1711  int itask, double *v2, const double *vcp2, int ieo)
1712 {
1713  int Nvc2 = 2 * m_Nvc;
1714  int Nvcd = m_Nvc * m_Nd;
1715  int Nvcd2 = Nvcd / 2;
1716 
1717  int id1 = 0;
1718  int id2 = m_Nvc;
1719  int id3 = m_Nvc * 2;
1720  int id4 = m_Nvc * 3;
1721 
1722  int idir = 3;
1723 
1724  double wt1r, wt1i, wt2r, wt2i;
1725 
1726  int isite = m_arg[itask].isite;
1727  int isite_cp = m_arg[itask].isite_cpt;
1728 
1729  double *w2 = &v2[Nvcd * isite];
1730  // double* w1 = &vcp2[Nvcd2*isite_cp];
1731  const double *w1
1732  = (double *)m_bw_recv[idir]->ptr(sizeof(double) * Nvcd2 * isite_cp);
1733  const double *u = m_U->ptr(m_Ndf * (isite + ieo * m_Nvol / 2 + idir * m_Nvol));
1734 
1735  m_bw_recv[idir]->wait_thread(itask);
1736 
1737  if (m_arg[itask].kt1 == 1) {
1738  int Nxy = m_Nx2 * m_Ny;
1739  int it = m_Mt - 1;
1740  for (int iz = 0; iz < m_Mz; ++iz) {
1741  for (int ixy = 0; ixy < Nxy; ++ixy) {
1742  int is = ixy + Nxy * (iz + m_Nz * it);
1743  int is2 = ixy + Nxy * iz;
1744  int iv = Nvcd * is;
1745  int ig = m_Ndf * is;
1746  int ix1 = Nvc2 * is2;
1747  int ix2 = ix1 + m_Nvc;
1748 
1749  for (int ic = 0; ic < m_Nc; ++ic) {
1750  int ic2 = ic * m_Nvc;
1751 
1752  wt1r = mult_uv_r(&u[ic2 + ig], &w1[ix1], m_Nc);
1753  wt1i = mult_uv_i(&u[ic2 + ig], &w1[ix1], m_Nc);
1754  wt2r = mult_uv_r(&u[ic2 + ig], &w1[ix2], m_Nc);
1755  wt2i = mult_uv_i(&u[ic2 + ig], &w1[ix2], m_Nc);
1756 
1757  w2[2 * ic + id1 + iv] += wt1r;
1758  w2[2 * ic + 1 + id1 + iv] += wt1i;
1759  w2[2 * ic + id2 + iv] += wt2r;
1760  w2[2 * ic + 1 + id2 + iv] += wt2i;
1761  w2[2 * ic + id3 + iv] += wt1r;
1762  w2[2 * ic + 1 + id3 + iv] += wt1i;
1763  w2[2 * ic + id4 + iv] += wt2r;
1764  w2[2 * ic + 1 + id4 + iv] += wt2i;
1765  }
1766  }
1767  }
1768  }
1769 }
1770 
1771 
1772 //====================================================================
1774  int itask, double *v2, const double *v1, int ieo)
1775 {
1776  int Nvcd = m_Nvc * m_Nd;
1777 
1778  int id1 = 0;
1779  int id2 = m_Nvc;
1780  int id3 = m_Nvc * 2;
1781  int id4 = m_Nvc * 3;
1782 
1783  int idir = 3;
1784 
1785  double vt1[m_Nvc], vt2[m_Nvc];
1786  double wt1r, wt1i, wt2r, wt2i;
1787 
1788  int isite = m_arg[itask].isite;
1789 
1790  double *w2 = &v2[Nvcd * isite];
1791  const double *w1 = &v1[Nvcd * isite];
1792  const double *u = m_U->ptr(m_Ndf * (isite + ieo * m_Nvol / 2 + idir * m_Nvol));
1793 
1794  int kt1 = m_arg[itask].kt1;
1795  int Nxy = m_Nx2 * m_Ny;
1796  int Nxyz = Nxy * m_Nz;
1797 
1798  for (int it = 0; it < m_Mt - kt1; ++it) {
1799  for (int iz = 0; iz < m_Mz; ++iz) {
1800  for (int ixy = 0; ixy < Nxy; ++ixy) {
1801  int is = ixy + Nxy * (iz + m_Nz * it);
1802  int iv = Nvcd * is;
1803  int in = Nvcd * (is + Nxyz);
1804  int ig = m_Ndf * is;
1805 
1806  for (int ic = 0; ic < m_Nc; ++ic) {
1807  vt1[2 * ic] = w1[2 * ic + id1 + in] + w1[2 * ic + id3 + in];
1808  vt1[2 * ic + 1] = w1[2 * ic + 1 + id1 + in] + w1[2 * ic + 1 + id3 + in];
1809  vt2[2 * ic] = w1[2 * ic + id2 + in] + w1[2 * ic + id4 + in];
1810  vt2[2 * ic + 1] = w1[2 * ic + 1 + id2 + in] + w1[2 * ic + 1 + id4 + in];
1811  }
1812 
1813  for (int ic = 0; ic < m_Nc; ++ic) {
1814  int ic2 = ic * m_Nvc;
1815 
1816  wt1r = mult_uv_r(&u[ic2 + ig], vt1, m_Nc);
1817  wt1i = mult_uv_i(&u[ic2 + ig], vt1, m_Nc);
1818  wt2r = mult_uv_r(&u[ic2 + ig], vt2, m_Nc);
1819  wt2i = mult_uv_i(&u[ic2 + ig], vt2, m_Nc);
1820 
1821  w2[2 * ic + id1 + iv] += wt1r;
1822  w2[2 * ic + 1 + id1 + iv] += wt1i;
1823  w2[2 * ic + id2 + iv] += wt2r;
1824  w2[2 * ic + 1 + id2 + iv] += wt2i;
1825  w2[2 * ic + id3 + iv] += wt1r;
1826  w2[2 * ic + 1 + id3 + iv] += wt1i;
1827  w2[2 * ic + id4 + iv] += wt2r;
1828  w2[2 * ic + 1 + id4 + iv] += wt2i;
1829  }
1830  }
1831  }
1832  }
1833 }
1834 
1835 
1836 //====================================================================
1838  int itask, double *vcp1, const double *v1, int ieo)
1839 {
1840  int Nvc2 = 2 * m_Nvc;
1841  int Nvcd = m_Nvc * m_Nd;
1842  int Nvcd2 = Nvcd / 2;
1843 
1844  int id1 = 0;
1845  int id2 = m_Nvc;
1846  int id3 = m_Nvc * 2;
1847  int id4 = m_Nvc * 3;
1848 
1849  int idir = 3;
1850 
1851  int isite = m_arg[itask].isite;
1852  int isite_cp = m_arg[itask].isite_cpt;
1853 
1854  // double* w2 = &vcp1[Nvcd2*isite_cp];
1855  double *w2
1856  = (double *)m_fw_send[idir]->ptr(sizeof(double) * Nvcd2 * isite_cp);
1857  const double *w1 = &v1[Nvcd * isite];
1858  const double *u = m_U->ptr(m_Ndf * (isite + (1 - ieo) * m_Nvol / 2 + idir * m_Nvol));
1859 
1860  double vt1[m_Nvc], vt2[m_Nvc];
1861 
1862  if (m_arg[itask].kt1 == 1) {
1863  int Nxy = m_Nx2 * m_Ny;
1864  int it = m_Mt - 1;
1865  for (int iz = 0; iz < m_Mz; ++iz) {
1866  for (int ixy = 0; ixy < Nxy; ++ixy) {
1867  int is = ixy + Nxy * (iz + m_Nz * it);
1868  int is2 = ixy + Nxy * iz;
1869  int in = Nvcd * is;
1870  int ig = m_Ndf * is;
1871  int ix1 = Nvc2 * is2;
1872  int ix2 = ix1 + m_Nvc;
1873 
1874  for (int ic = 0; ic < m_Nc; ++ic) {
1875  vt1[2 * ic] = w1[2 * ic + id1 + in] - w1[2 * ic + id3 + in];
1876  vt1[2 * ic + 1] = w1[2 * ic + 1 + id1 + in] - w1[2 * ic + 1 + id3 + in];
1877  vt2[2 * ic] = w1[2 * ic + id2 + in] - w1[2 * ic + id4 + in];
1878  vt2[2 * ic + 1] = w1[2 * ic + 1 + id2 + in] - w1[2 * ic + 1 + id4 + in];
1879  }
1880 
1881  for (int ic = 0; ic < m_Nc; ++ic) {
1882  int icr = 2 * ic;
1883  w2[icr + ix1] = mult_udagv_r(&u[icr + ig], vt1, m_Nc);
1884  w2[icr + 1 + ix1] = mult_udagv_i(&u[icr + ig], vt1, m_Nc);
1885  w2[icr + ix2] = mult_udagv_r(&u[icr + ig], vt2, m_Nc);
1886  w2[icr + 1 + ix2] = mult_udagv_i(&u[icr + ig], vt2, m_Nc);
1887  }
1888  }
1889  }
1890  }
1891 
1892  m_fw_send[idir]->start_thread(itask);
1893 }
1894 
1895 
1896 //====================================================================
1898  int itask, double *v2, const double *vcp2, int ieo)
1899 {
1900  int Nvc2 = 2 * m_Nvc;
1901  int Nvcd = m_Nvc * m_Nd;
1902  int Nvcd2 = Nvcd / 2;
1903 
1904  int id1 = 0;
1905  int id2 = m_Nvc;
1906  int id3 = m_Nvc * 2;
1907  int id4 = m_Nvc * 3;
1908 
1909  int idir = 3;
1910  double bc2 = m_boundary2[idir];
1911 
1912  double wt1r, wt1i, wt2r, wt2i;
1913 
1914  int isite = m_arg[itask].isite;
1915  int isite_cp = m_arg[itask].isite_cpt;
1916 
1917  double *w2 = &v2[Nvcd * isite];
1918  // double* w1 = &vcp2[Nvcd2*isite_cp];
1919  const double *w1
1920  = (double *)m_fw_recv[idir]->ptr(sizeof(double) * Nvcd2 * isite_cp);
1921 
1922  m_fw_recv[idir]->wait_thread(itask);
1923 
1924  if (m_arg[itask].kt0 == 1) {
1925  int Nxy = m_Nx2 * m_Ny;
1926  int it = 0;
1927  for (int iz = 0; iz < m_Mz; ++iz) {
1928  for (int ixy = 0; ixy < Nxy; ++ixy) {
1929  int is = ixy + Nxy * (iz + m_Nz * it);
1930  int is2 = ixy + Nxy * iz;
1931  int iv = Nvcd * is;
1932  int ix1 = Nvc2 * is2;
1933  int ix2 = ix1 + m_Nvc;
1934 
1935  for (int ic = 0; ic < m_Nc; ++ic) {
1936  int icr = 2 * ic;
1937  int ici = 2 * ic + 1;
1938  w2[icr + id1 + iv] += bc2 * w1[icr + ix1];
1939  w2[ici + id1 + iv] += bc2 * w1[ici + ix1];
1940  w2[icr + id2 + iv] += bc2 * w1[icr + ix2];
1941  w2[ici + id2 + iv] += bc2 * w1[ici + ix2];
1942  w2[icr + id3 + iv] -= bc2 * w1[icr + ix1];
1943  w2[ici + id3 + iv] -= bc2 * w1[ici + ix1];
1944  w2[icr + id4 + iv] -= bc2 * w1[icr + ix2];
1945  w2[ici + id4 + iv] -= bc2 * w1[ici + ix2];
1946  }
1947  }
1948  }
1949  }
1950 }
1951 
1952 
1953 //====================================================================
1955  int itask, double *v2, const double *v1, int ieo)
1956 {
1957  int Nvcd = m_Nvc * m_Nd;
1958 
1959  int id1 = 0;
1960  int id2 = m_Nvc;
1961  int id3 = m_Nvc * 2;
1962  int id4 = m_Nvc * 3;
1963 
1964  int idir = 3;
1965 
1966  double vt1[m_Nvc], vt2[m_Nvc];
1967  double wt1r, wt1i, wt2r, wt2i;
1968 
1969  int isite = m_arg[itask].isite;
1970 
1971  double *w2 = &v2[Nvcd * isite];
1972  const double *w1 = &v1[Nvcd * isite];
1973  const double *u = m_U->ptr(m_Ndf * (isite + (1 - ieo) * m_Nvol / 2 + idir * m_Nvol));
1974 
1975  int kt0 = m_arg[itask].kt0;
1976  int Nxy = m_Nx2 * m_Ny;
1977  int Nxyz = Nxy * m_Nz;
1978 
1979  for (int it = kt0; it < m_Mt; ++it) {
1980  for (int iz = 0; iz < m_Mz; ++iz) {
1981  for (int ixy = 0; ixy < Nxy; ++ixy) {
1982  int is = ixy + Nxy * (iz + m_Nz * it);
1983  int iv = Nvcd * is;
1984  int in = Nvcd * (is - Nxyz);
1985  int ig = m_Ndf * (is - Nxyz);
1986 
1987  for (int ic = 0; ic < m_Nc; ++ic) {
1988  vt1[2 * ic] = w1[2 * ic + id1 + in] - w1[2 * ic + id3 + in];
1989  vt1[2 * ic + 1] = w1[2 * ic + 1 + id1 + in] - w1[2 * ic + 1 + id3 + in];
1990  vt2[2 * ic] = w1[2 * ic + id2 + in] - w1[2 * ic + id4 + in];
1991  vt2[2 * ic + 1] = w1[2 * ic + 1 + id2 + in] - w1[2 * ic + 1 + id4 + in];
1992  }
1993 
1994  for (int ic = 0; ic < m_Nc; ++ic) {
1995  int ic2 = 2 * ic;
1996  wt1r = mult_udagv_r(&u[ic2 + ig], vt1, m_Nc);
1997  wt1i = mult_udagv_i(&u[ic2 + ig], vt1, m_Nc);
1998  wt2r = mult_udagv_r(&u[ic2 + ig], vt2, m_Nc);
1999  wt2i = mult_udagv_i(&u[ic2 + ig], vt2, m_Nc);
2000 
2001  w2[ic2 + id1 + iv] += wt1r;
2002  w2[ic2 + 1 + id1 + iv] += wt1i;
2003  w2[ic2 + id2 + iv] += wt2r;
2004  w2[ic2 + 1 + id2 + iv] += wt2i;
2005  w2[ic2 + id3 + iv] -= wt1r;
2006  w2[ic2 + 1 + id3 + iv] -= wt1i;
2007  w2[ic2 + id4 + iv] -= wt2r;
2008  w2[ic2 + 1 + id4 + iv] -= wt2i;
2009  }
2010  }
2011  }
2012  }
2013 }
2014 
2015 
2016 //====================================================================
2018  int itask, double *v2, const double *v1)
2019 {
2020  int Nvcd = m_Nvc * m_Nd;
2021  int Nxy = m_Nx2 * m_Ny;
2022 
2023  int id1 = 0;
2024  int id2 = m_Nvc;
2025  int id3 = m_Nvc * 2;
2026  int id4 = m_Nvc * 3;
2027 
2028  int isite = m_arg[itask].isite;
2029  double *w2 = &v2[Nvcd * isite];
2030  const double *w1 = &v1[Nvcd * isite];
2031 
2032  for (int it = 0; it < m_Mt; ++it) {
2033  for (int iz = 0; iz < m_Mz; ++iz) {
2034  for (int ixy = 0; ixy < Nxy; ++ixy) {
2035  int iv = Nvcd * (ixy + Nxy * (iz + m_Nz * it));
2036  for (int ivc = 0; ivc < m_Nvc; ++ivc) {
2037  w2[ivc + id1 + iv] = w1[ivc + id3 + iv];
2038  w2[ivc + id2 + iv] = w1[ivc + id4 + iv];
2039  w2[ivc + id3 + iv] = w1[ivc + id1 + iv];
2040  w2[ivc + id4 + iv] = w1[ivc + id2 + iv];
2041  }
2042  }
2043  }
2044  }
2045 }
2046 
2047 
2048 //====================================================================
2050  int itask, double *v2, const double *v1)
2051 {
2052  int Nvcd = m_Nvc * m_Nd;
2053  int Nxy = m_Nx2 * m_Ny;
2054 
2055  int id1 = 0;
2056  int id2 = m_Nvc;
2057  int id3 = m_Nvc * 2;
2058  int id4 = m_Nvc * 3;
2059 
2060  int isite = m_arg[itask].isite;
2061  double *w2 = &v2[Nvcd * isite];
2062  const double *w1 = &v1[Nvcd * isite];
2063 
2064  for (int it = 0; it < m_Mt; ++it) {
2065  for (int iz = 0; iz < m_Mz; ++iz) {
2066  for (int ixy = 0; ixy < Nxy; ++ixy) {
2067  int iv = Nvcd * (ixy + Nxy * (iz + m_Nz * it));
2068  for (int ivc = 0; ivc < m_Nvc; ++ivc) {
2069  w2[ivc + id1 + iv] = w1[ivc + id1 + iv];
2070  w2[ivc + id2 + iv] = w1[ivc + id2 + iv];
2071  w2[ivc + id3 + iv] = -w1[ivc + id3 + iv];
2072  w2[ivc + id4 + iv] = -w1[ivc + id4 + iv];
2073  }
2074  }
2075  }
2076  }
2077 }
2078 
2079 
2080 //====================================================================
2082  double *v1)
2083 {
2084  int Nvcd = m_Nvc * m_Nd;
2085  int Nxy = m_Nx2 * m_Ny;
2086 
2087  int id1 = 0;
2088  int id2 = m_Nvc;
2089  int id3 = m_Nvc * 2;
2090  int id4 = m_Nvc * 3;
2091 
2092  int isite = m_arg[itask].isite;
2093  double *w1 = &v1[Nvcd * isite];
2094 
2095  for (int it = 0; it < m_Mt; ++it) {
2096  for (int iz = 0; iz < m_Mz; ++iz) {
2097  for (int ixy = 0; ixy < Nxy; ++ixy) {
2098  int iv = Nvcd * (ixy + Nxy * (iz + m_Nz * it));
2099  for (int ivc = 0; ivc < m_Nvc; ++ivc) {
2100  double wt1 = w1[ivc + id1 + iv];
2101  double wt2 = w1[ivc + id2 + iv];
2102  w1[ivc + id1 + iv] = w1[ivc + id3 + iv];
2103  w1[ivc + id2 + iv] = w1[ivc + id4 + iv];
2104  w1[ivc + id3 + iv] = wt1;
2105  w1[ivc + id4 + iv] = wt2;
2106  }
2107  }
2108  }
2109  }
2110 }
2111 
2112 
2113 //====================================================================
2115  double *v1)
2116 {
2117  int Nvcd = m_Nvc * m_Nd;
2118  int Nxy = m_Nx2 * m_Ny;
2119 
2120  int id1 = 0;
2121  int id2 = m_Nvc;
2122  int id3 = m_Nvc * 2;
2123  int id4 = m_Nvc * 3;
2124 
2125  int isite = m_arg[itask].isite;
2126  double *w1 = &v1[Nvcd * isite];
2127 
2128  for (int it = 0; it < m_Mt; ++it) {
2129  for (int iz = 0; iz < m_Mz; ++iz) {
2130  for (int ixy = 0; ixy < Nxy; ++ixy) {
2131  int iv = Nvcd * (ixy + Nxy * (iz + m_Nz * it));
2132  for (int ivc = 0; ivc < m_Nvc; ++ivc) {
2133  w1[ivc + id3 + iv] = -w1[ivc + id3 + iv];
2134  w1[ivc + id4 + iv] = -w1[ivc + id4 + iv];
2135  }
2136  }
2137  }
2138  }
2139 }
2140 
2141 
2142 //====================================================================
2143 //============================================================END=====
void mult_xpb_thread(int, double *, const double *, int)
BridgeIO vout
Definition: bridgeIO.cpp:278
void mult_tpb_dirac_thread(int, double *, const double *, int)
void mult_tmb_chiral_thread(int, double *, const double *, int)
void mult_tp2_chiral_thread(int, double *, const double *, int)
void mult_ym1_thread(int, double *, const double *, int)
void mult_ymb_thread(int, double *, const double *, int)
void mult_tmb_dirac_thread(int, double *, const double *, int)
void general(const char *format,...)
Definition: bridgeIO.cpp:65
void mult_xm2_thread(int, double *, const double *, int)
void mult_zm2_thread(int, double *, const double *, int)
void mult_zm1_thread(int, double *, const double *, int)
void mult_zp1_thread(int, double *, const double *, int)
void mult_tm2_chiral_thread(int, double *, const double *, int)
void mult_tp2_dirac_thread(int, double *, const double *, int)
void mult_ypb_thread(int, double *, const double *, int)
void mult_tm1_dirac_thread(int, double *, const double *, int)
void mult_tm2_dirac_thread(int, double *, const double *, int)
void mult_tp1_chiral_thread(int, double *, const double *, int)
void mult_yp1_thread(int, double *, const double *, int)
void mult_zmb_thread(int, double *, const double *, int)
static int get_num_threads_available()
returns number of threads (works outside of parallel region).
void mult_zpb_thread(int, double *, const double *, int)
void mult_xm1_thread(int, double *, const double *, int)
void mult_yp2_thread(int, double *, const double *, int)
void crucial(const char *format,...)
Definition: bridgeIO.cpp:48
void mult_tm1_chiral_thread(int, double *, const double *, int)
void mult_xmb_thread(int, double *, const double *, int)
void gm5_dirac_thread(int, double *, const double *)
void Meo(Field &, const Field &, const int ieo)
even-odd operatior: ieo=0: even <– odd, ieo=1: odd <– even
void mult_tp1_dirac_thread(int, double *, const double *, int)
void mult_zp2_thread(int, double *, const double *, int)
void mult_tpb_chiral_thread(int, double *, const double *, int)
void mult_xp2_thread(int, double *, const double *, int)
void mult_ym2_thread(int, double *, const double *, int)
void gm5_chiral_thread(int, double *, const double *)
void mult_xp1_thread(int, double *, const double *, int)