Bridge++  Ver. 1.2.x
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
fopr_Wilson_eo_impl_thread.cpp
Go to the documentation of this file.
1 
14 #include "fopr_Wilson_eo_impl.h"
15 
16 #include "bridgeIO.h"
17 using Bridge::vout;
18 
19 #include "threadManager_OpenMP.h"
20 
21 
22 #if defined USE_GROUP_SU3
23 #include "fopr_Wilson_impl_SU3.inc"
24 #elif defined USE_GROUP_SU2
25 #include "fopr_Wilson_impl_SU2.inc"
26 #elif defined USE_GROUP_SU_N
27 #include "fopr_Wilson_impl_SU_N.inc"
28 #endif
29 
30 //====================================================================
32 {
34 
35  // The following setup corresponds to unifirm division of volume.
36  if (m_Nthread <= m_Nt) {
38  } else if (m_Nthread <= m_Nz * m_Nt) {
39  m_Ntask_t = m_Nt;
40  } else {
41  vout.general(m_vl, " Too large Nthread: %d\n", m_Nthread);
42  abort();
43  }
45  if (m_Ntask_z * m_Ntask_t != m_Nthread) {
46  vout.general(m_vl, " Nz(%d) and Nt(%d) do not mach Nthread: %d\n",
47  m_Nz, m_Nt, m_Nthread);
48  abort();
49  }
51  m_Mz = m_Nz / m_Ntask_z;
52  m_Mt = m_Nt / m_Ntask_t;
53 
54  vout.general(m_vl, " Nthread = %d\n", m_Nthread);
55  vout.general(m_vl, " Ntask = %d\n", m_Ntask);
56  vout.general(m_vl, " Ntask_z = %d Ntask_t = %d\n", m_Ntask_z, m_Ntask_t);
57  vout.general(m_vl, " Mz = %d Mt = %d\n", m_Mz, m_Mt);
58 
59  // setup of arguments
60  int Nxy2 = m_Nx2 * m_Ny;
61  m_arg.resize(m_Ntask);
62  for (int ith_t = 0; ith_t < m_Ntask_t; ++ith_t) {
63  for (int ith_z = 0; ith_z < m_Ntask_z; ++ith_z) {
64  int itask = ith_z + m_Ntask_z * ith_t;
65 
66  m_arg[itask].isite = (ith_z * m_Mz + ith_t * (m_Nz * m_Mt)) * Nxy2;
67 
68  m_arg[itask].kt0 = 0;
69  m_arg[itask].kt1 = 0;
70  m_arg[itask].kz0 = 0;
71  m_arg[itask].kz1 = 0;
72  if (ith_t == 0) m_arg[itask].kt0 = 1;
73  if (ith_z == 0) m_arg[itask].kz0 = 1;
74  if (ith_t == m_Ntask_t - 1) m_arg[itask].kt1 = 1;
75  if (ith_z == m_Ntask_z - 1) m_arg[itask].kz1 = 1;
76 
77  m_arg[itask].isite_cpx = itask * m_Mz * m_Mt * (m_Ny / 2);
78  m_arg[itask].isite_cpy = itask * m_Mz * m_Mt * m_Nx2;
79  m_arg[itask].isite_cpz = ith_t * m_Mt * Nxy2;
80  m_arg[itask].isite_cpt = ith_z * m_Mz * Nxy2;
81  }
82  }
83 
84  // setup for async data transfer
85  int Nc = CommonParameters::Nc();
86  int Nd = CommonParameters::Nd();
87  int Nvcd2 = 2 * Nc * Nd / 2;
88 
89  valarray<int> destid(m_Ntask);
90  valarray<int> offset(m_Ntask);
91  valarray<int> datasize(m_Ntask);
92  valarray<int> offset_up(m_Ntask);
93  valarray<int> offset_lw(m_Ntask);
94  valarray<int> datasize_up(m_Ntask);
95  valarray<int> datasize_lw(m_Ntask);
96 
97  int imu = 0;
98  for (int ith_t = 0; ith_t < m_Ntask_t; ++ith_t) {
99  for (int ith_z = 0; ith_z < m_Ntask_z; ++ith_z) {
100  int itask = ith_z + ith_t * m_Ntask_z;
101  int isite_cp = itask * m_Mz * m_Mt * (m_Ny / 2);
102  destid[itask] = itask;
103  offset[itask] = sizeof(double) * Nvcd2 * isite_cp;
104  datasize[itask] = sizeof(double) * Nvcd2 * m_Mz * m_Mt * (m_Ny / 2);
105  }
106  }
107  m_bw_send[imu]->set_thread(m_Ntask, destid, offset, datasize);
108  m_fw_send[imu]->set_thread(m_Ntask, destid, offset, datasize);
109  m_bw_recv[imu]->set_thread(m_Ntask, destid, offset, datasize);
110  m_fw_recv[imu]->set_thread(m_Ntask, destid, offset, datasize);
111 
112  imu = 1;
113  for (int ith_t = 0; ith_t < m_Ntask_t; ++ith_t) {
114  for (int ith_z = 0; ith_z < m_Ntask_z; ++ith_z) {
115  int itask = ith_z + ith_t * m_Ntask_z;
116  int isite_cp = itask * m_Mz * m_Mt * m_Nx2;
117  destid[itask] = itask;
118  offset[itask] = sizeof(double) * Nvcd2 * isite_cp;
119  datasize[itask] = sizeof(double) * Nvcd2 * m_Mz * m_Mt * m_Nx2;
120  }
121  }
122  m_bw_send[imu]->set_thread(m_Ntask, destid, offset, datasize);
123  m_fw_send[imu]->set_thread(m_Ntask, destid, offset, datasize);
124  m_bw_recv[imu]->set_thread(m_Ntask, destid, offset, datasize);
125  m_fw_recv[imu]->set_thread(m_Ntask, destid, offset, datasize);
126 
127  imu = 2;
128  for (int ith_t = 0; ith_t < m_Ntask_t; ++ith_t) {
129  for (int ith_z = 0; ith_z < m_Ntask_z; ++ith_z) {
130  int itask = ith_z + m_Ntask_z * ith_t;
131  destid[itask] = -1;
132  offset_up[itask] = 0;
133  offset_lw[itask] = 0;
134  datasize_up[itask] = 0;
135  datasize_lw[itask] = 0;
136  if (ith_z == 0) {
137  destid[itask] = (m_Ntask_z - 1) + ith_t * m_Ntask_z;
138  offset_lw[itask] = sizeof(double) * Nvcd2 * ith_t * m_Mt * m_Nx2 * m_Ny;
139  datasize_lw[itask] = sizeof(double) * Nvcd2 * m_Mt * m_Nx2 * m_Ny;
140  }
141  if (ith_z == m_Ntask_z - 1) {
142  destid[itask] = ith_t * m_Ntask_z;
143  offset_up[itask] = sizeof(double) * Nvcd2 * ith_t * m_Mt * m_Nx2 * m_Ny;
144  datasize_up[itask] = sizeof(double) * Nvcd2 * m_Mt * m_Nx2 * m_Ny;
145  }
146  }
147  }
148  m_bw_send[imu]->set_thread(m_Ntask, destid, offset_lw, datasize_lw);
149  m_bw_recv[imu]->set_thread(m_Ntask, destid, offset_up, datasize_up);
150  m_fw_send[imu]->set_thread(m_Ntask, destid, offset_up, datasize_up);
151  m_fw_recv[imu]->set_thread(m_Ntask, destid, offset_lw, datasize_lw);
152 
153  imu = 3;
154  for (int ith_t = 0; ith_t < m_Ntask_t; ++ith_t) {
155  for (int ith_z = 0; ith_z < m_Ntask_z; ++ith_z) {
156  int itask = ith_z + m_Ntask_z * ith_t;
157  destid[itask] = -1;
158  offset_up[itask] = 0;
159  offset_lw[itask] = 0;
160  datasize_up[itask] = 0;
161  datasize_lw[itask] = 0;
162  if (ith_t == 0) {
163  destid[itask] = ith_z + (m_Ntask_t - 1) * m_Ntask_z;
164  offset_lw[itask] = sizeof(double) * Nvcd2 * ith_z * m_Mz * m_Nx2 * m_Ny;
165  datasize_lw[itask] = sizeof(double) * Nvcd2 * m_Mz * m_Nx2 * m_Ny;
166  }
167  if (ith_t == m_Ntask_t - 1) {
168  destid[itask] = ith_z;
169  offset_up[itask] = sizeof(double) * Nvcd2 * ith_z * m_Mz * m_Nx2 * m_Ny;
170  datasize_up[itask] = sizeof(double) * Nvcd2 * m_Mz * m_Nx2 * m_Ny;
171  }
172  }
173  }
174  m_bw_send[imu]->set_thread(m_Ntask, destid, offset_lw, datasize_lw);
175  m_bw_recv[imu]->set_thread(m_Ntask, destid, offset_up, datasize_up);
176  m_fw_send[imu]->set_thread(m_Ntask, destid, offset_up, datasize_up);
177  m_fw_recv[imu]->set_thread(m_Ntask, destid, offset_lw, datasize_lw);
178 }
179 
180 //====================================================================
182  double *w, double fac)
183 {
184  int Nvcd = m_Nvc * m_Nd;
185  int Nvxy = Nvcd * m_Nx2 * m_Ny;
186 
187  int isite = m_arg[itask].isite;
188  double *wp = &w[Nvcd * isite];
189 
190  for (int it = 0; it < m_Mt; ++it) {
191  for (int iz = 0; iz < m_Mz; ++iz) {
192  for (int ivxy = 0; ivxy < Nvxy; ++ivxy) {
193  int iv = ivxy + Nvxy * (iz + m_Nz * it);
194  wp[iv] = fac * wp[iv];
195  }
196  }
197  }
198 
199 }
200 
201 //====================================================================
203  double *v)
204 {
205  int Nvcd = m_Nvc * m_Nd;
206  int Nvxy = Nvcd * m_Nx2 * m_Ny;
207 
208  int isite = m_arg[itask].isite;
209  double *wp = &v[Nvcd * isite];
210 
211  for (int it = 0; it < m_Mt; ++it) {
212  for (int iz = 0; iz < m_Mz; ++iz) {
213  for (int ivxy = 0; ivxy < Nvxy; ++ivxy) {
214  int iv = ivxy + Nvxy * (iz + m_Nz * it);
215  wp[iv] = 0.0;
216  }
217  }
218  }
219 
220 }
221 
222 //====================================================================
224  double *vcp1, double *v1, int ieo)
225 {
226  int Nvc2 = 2 * m_Nvc;
227  int Nvcd = m_Nvc * m_Nd;
228  int Nvcd2 = Nvcd / 2;
229 
230  int id1 = 0;
231  int id2 = m_Nvc;
232  int id3 = m_Nvc * 2;
233  int id4 = m_Nvc * 3;
234 
235  int idir = 0;
236 
237  int isite = m_arg[itask].isite;
238  int isite_cp = m_arg[itask].isite_cpx;
239  int iyzt0 = isite / m_Nx2;
240 
241  // double* w2 = &vcp1[Nvcd2*isite_cp];
242  double *w2
243  = (double *)m_bw_send[idir]->ptr(sizeof(double) * Nvcd2 * isite_cp);
244  double *w1 = &v1[Nvcd * isite];
245 
246  double bc2 = m_boundary2[idir];
247 
248  int ix = 0;
249  int ibf = 0;
250 
251  for (int it = 0; it < m_Mt; ++it) {
252  for (int iz = 0; iz < m_Mz; ++iz) {
253  for (int iy = 0; iy < m_Ny; ++iy) {
254  int iyzt = iy + m_Ny * (iz + m_Nz * it);
255  int Leo = ieo + (1 - 2 * ieo) * m_Leo[iyzt0 + iyzt];
256  if (Leo == 1) {
257  int is = ix + m_Nx2 * iyzt;
258  int in = Nvcd * is;
259 
260  int ix1 = Nvc2 * ibf;
261  int ix2 = ix1 + m_Nvc;
262 
263  for (int ic = 0; ic < m_Nc; ++ic) {
264  w2[2 * ic + ix1] = bc2 * (w1[2 * ic + id1 + in] - w1[2 * ic + 1 + id4 + in]);
265  w2[2 * ic + 1 + ix1] = bc2 * (w1[2 * ic + 1 + id1 + in] + w1[2 * ic + id4 + in]);
266  w2[2 * ic + ix2] = bc2 * (w1[2 * ic + id2 + in] - w1[2 * ic + 1 + id3 + in]);
267  w2[2 * ic + 1 + ix2] = bc2 * (w1[2 * ic + 1 + id2 + in] + w1[2 * ic + id3 + in]);
268  }
269  ++ibf;
270  }
271  }
272  }
273  }
274 
275  m_bw_send[idir]->start_thread(itask);
276 }
277 
278 //====================================================================
280  double *v2, double *vcp2, int ieo)
281 {
282  int Nvc2 = 2 * m_Nvc;
283  int Nvcd = m_Nvc * m_Nd;
284  int Nvcd2 = Nvcd / 2;
285 
286  int id1 = 0;
287  int id2 = m_Nvc;
288  int id3 = m_Nvc * 2;
289  int id4 = m_Nvc * 3;
290 
291  int idir = 0;
292 
293  double wt1r, wt1i, wt2r, wt2i;
294 
295  int isite = m_arg[itask].isite;
296  int isite_cp = m_arg[itask].isite_cpx;
297  int iyzt0 = isite / m_Nx2;
298 
299  double *w2 = &v2[Nvcd * isite];
300  // double* w1 = &vcp2[Nvcd2*isite_cp];
301  double *w1
302  = (double *)m_bw_recv[idir]->ptr(sizeof(double) * Nvcd2 * isite_cp);
303  double *u = const_cast<Field_G *>(m_U)->ptr(
304  m_Ndf * (isite + ieo * m_Nvol / 2 + idir * m_Nvol));
305 
306  m_bw_recv[idir]->wait_thread(itask);
307 
308  int ix = m_Nx2 - 1;
309  int ibf = 0;
310  for (int it = 0; it < m_Mt; ++it) {
311  for (int iz = 0; iz < m_Mz; ++iz) {
312  for (int iy = 0; iy < m_Ny; ++iy) {
313  int iyzt = iy + m_Ny * (iz + m_Nz * it);
314  int Leo = ieo + (1 - 2 * ieo) * m_Leo[iyzt0 + iyzt];
315 
316  if (Leo == 1) {
317  int is = ix + m_Nx2 * iyzt;
318  int iv = Nvcd * is;
319  int ig = m_Ndf * is;
320  int ix1 = Nvc2 * ibf;
321  int ix2 = ix1 + m_Nvc;
322 
323  for (int ic = 0; ic < m_Nc; ++ic) {
324  int ic2 = ic * m_Nvc;
325  wt1r = mult_uv_r(&u[ic2 + ig], &w1[ix1], m_Nc);
326  wt1i = mult_uv_i(&u[ic2 + ig], &w1[ix1], m_Nc);
327  wt2r = mult_uv_r(&u[ic2 + ig], &w1[ix2], m_Nc);
328  wt2i = mult_uv_i(&u[ic2 + ig], &w1[ix2], m_Nc);
329  w2[2 * ic + id1 + iv] += wt1r;
330  w2[2 * ic + 1 + id1 + iv] += wt1i;
331  w2[2 * ic + id2 + iv] += wt2r;
332  w2[2 * ic + 1 + id2 + iv] += wt2i;
333  w2[2 * ic + id3 + iv] += wt2i;
334  w2[2 * ic + 1 + id3 + iv] += -wt2r;
335  w2[2 * ic + id4 + iv] += wt1i;
336  w2[2 * ic + 1 + id4 + iv] += -wt1r;
337  }
338  ++ibf;
339  }
340  }
341  }
342  }
343 
344 }
345 
346 //====================================================================
348  double *v2, double *v1, int ieo)
349 {
350  int Nvcd = m_Nvc * m_Nd;
351 
352  int id1 = 0;
353  int id2 = m_Nvc;
354  int id3 = m_Nvc * 2;
355  int id4 = m_Nvc * 3;
356 
357  int idir = 0;
358 
359  double vt1[m_Nvc], vt2[m_Nvc];
360  double wt1r, wt1i, wt2r, wt2i;
361 
362  int isite = m_arg[itask].isite;
363  int iyzt0 = isite / m_Nx2;
364 
365  double *w2 = &v2[Nvcd * isite];
366  double *w1 = &v1[Nvcd * isite];
367  double *u = const_cast<Field_G *>(m_U)->ptr(
368  m_Ndf * (isite + ieo * m_Nvol / 2 + idir * m_Nvol));
369 
370  for (int it = 0; it < m_Mt; ++it) {
371  for (int iz = 0; iz < m_Mz; ++iz) {
372  for (int iy = 0; iy < m_Ny; ++iy) {
373  int iyzt = iy + m_Ny * (iz + m_Nz * it);
374  int Leo = ieo + (1 - 2 * ieo) * m_Leo[iyzt0 + iyzt];
375  for (int ix = 0; ix < m_Nx2 - Leo; ++ix) {
376  int is = ix + m_Nx2 * iyzt;
377  int iv = Nvcd * is;
378  int in = Nvcd * (is + Leo);
379  int ig = m_Ndf * is;
380 
381  for (int ic = 0; ic < m_Nc; ++ic) {
382  vt1[2 * ic] = w1[2 * ic + id1 + in] - w1[2 * ic + 1 + id4 + in];
383  vt1[2 * ic + 1] = w1[2 * ic + 1 + id1 + in] + w1[2 * ic + id4 + in];
384  vt2[2 * ic] = w1[2 * ic + id2 + in] - w1[2 * ic + 1 + id3 + in];
385  vt2[2 * ic + 1] = w1[2 * ic + 1 + id2 + in] + w1[2 * ic + id3 + in];
386  }
387 
388  for (int ic = 0; ic < m_Nc; ++ic) {
389  int ic2 = ic * m_Nvc;
390 
391  wt1r = mult_uv_r(&u[ic2 + ig], vt1, m_Nc);
392  wt1i = mult_uv_i(&u[ic2 + ig], vt1, m_Nc);
393  wt2r = mult_uv_r(&u[ic2 + ig], vt2, m_Nc);
394  wt2i = mult_uv_i(&u[ic2 + ig], vt2, m_Nc);
395 
396  w2[2 * ic + id1 + iv] += wt1r;
397  w2[2 * ic + 1 + id1 + iv] += wt1i;
398  w2[2 * ic + id2 + iv] += wt2r;
399  w2[2 * ic + 1 + id2 + iv] += wt2i;
400  w2[2 * ic + id3 + iv] += wt2i;
401  w2[2 * ic + 1 + id3 + iv] += -wt2r;
402  w2[2 * ic + id4 + iv] += wt1i;
403  w2[2 * ic + 1 + id4 + iv] += -wt1r;
404  }
405  }
406  }
407  }
408  }
409 
410 }
411 
412 //====================================================================
414  double *vcp1, double *v1, int ieo)
415 {
416  int Nvc2 = 2 * m_Nvc;
417  int Nvcd = m_Nvc * m_Nd;
418  int Nvcd2 = Nvcd / 2;
419 
420  int id1 = 0;
421  int id2 = m_Nvc;
422  int id3 = m_Nvc * 2;
423  int id4 = m_Nvc * 3;
424 
425  int idir = 0;
426 
427  int isite = m_arg[itask].isite;
428  int isite_cp = m_arg[itask].isite_cpx;
429  int iyzt0 = isite / m_Nx2;
430 
431  // double* w2 = &vcp1[Nvcd2*isite_cp];
432  double *w2
433  = (double *)m_fw_send[idir]->ptr(sizeof(double) * Nvcd2 * isite_cp);
434  double *w1 = &v1[Nvcd * isite];
435  double *u = const_cast<Field_G *>(m_U)->ptr(
436  m_Ndf * (isite + (1 - ieo) * m_Nvol / 2 + idir * m_Nvol));
437 
438  double vt1[m_Nvc], vt2[m_Nvc];
439 
440  int ix = m_Nx2 - 1;
441  int ibf = 0;
442 
443  for (int it = 0; it < m_Mt; ++it) {
444  for (int iz = 0; iz < m_Mz; ++iz) {
445  for (int iy = 0; iy < m_Ny; ++iy) {
446  int iyzt = iy + m_Ny * (iz + m_Nz * it);
447  int Leo = ieo + (1 - 2 * ieo) * m_Leo[iyzt0 + iyzt];
448  if (Leo == 0) {
449  int is = ix + m_Nx2 * iyzt;
450  int in = Nvcd * is;
451  int ig = m_Ndf * is;
452 
453  int ix1 = Nvc2 * ibf;
454  int ix2 = ix1 + m_Nvc;
455 
456  for (int ic = 0; ic < m_Nc; ++ic) {
457  vt1[2 * ic] = w1[2 * ic + id1 + in] + w1[2 * ic + 1 + id4 + in];
458  vt1[2 * ic + 1] = w1[2 * ic + 1 + id1 + in] - w1[2 * ic + id4 + in];
459  vt2[2 * ic] = w1[2 * ic + id2 + in] + w1[2 * ic + 1 + id3 + in];
460  vt2[2 * ic + 1] = w1[2 * ic + 1 + id2 + in] - w1[2 * ic + id3 + in];
461  }
462 
463  for (int ic = 0; ic < m_Nc; ++ic) {
464  int icr = 2 * ic;
465  w2[icr + ix1] = mult_udagv_r(&u[icr + ig], vt1, m_Nc);
466  w2[icr + 1 + ix1] = mult_udagv_i(&u[icr + ig], vt1, m_Nc);
467  w2[icr + ix2] = mult_udagv_r(&u[icr + ig], vt2, m_Nc);
468  w2[icr + 1 + ix2] = mult_udagv_i(&u[icr + ig], vt2, m_Nc);
469  }
470  ++ibf;
471  }
472  }
473  }
474  }
475 
476  m_fw_send[idir]->start_thread(itask);
477 }
478 
479 //====================================================================
481  double *v2, double *vcp2, int ieo)
482 {
483  int Nvc2 = 2 * m_Nvc;
484  int Nvcd = m_Nvc * m_Nd;
485  int Nvcd2 = Nvcd / 2;
486 
487  int id1 = 0;
488  int id2 = m_Nvc;
489  int id3 = m_Nvc * 2;
490  int id4 = m_Nvc * 3;
491 
492  int idir = 0;
493  double bc2 = m_boundary2[idir];
494 
495  double wt1r, wt1i, wt2r, wt2i;
496 
497  int isite = m_arg[itask].isite;
498  int isite_cp = m_arg[itask].isite_cpx;
499  int iyzt0 = isite / m_Nx2;
500 
501  double *w2 = &v2[Nvcd * isite];
502  // double* w1 = &vcp2[Nvcd2*isite_cp];
503  double *w1
504  = (double *)m_fw_recv[idir]->ptr(sizeof(double) * Nvcd2 * isite_cp);
505 
506  m_fw_recv[idir]->wait_thread(itask);
507 
508  int ix = 0;
509  int ibf = 0;
510  for (int it = 0; it < m_Mt; ++it) {
511  for (int iz = 0; iz < m_Mz; ++iz) {
512  for (int iy = 0; iy < m_Ny; ++iy) {
513  int iyzt = iy + m_Ny * (iz + m_Nz * it);
514  int Leo = ieo + (1 - 2 * ieo) * m_Leo[iyzt0 + iyzt];
515  if (Leo == 0) {
516  int is = ix + m_Nx2 * iyzt;
517  int iv = Nvcd * is;
518 
519  int ix1 = Nvc2 * ibf;
520  int ix2 = ix1 + m_Nvc;
521 
522  for (int ic = 0; ic < m_Nc; ++ic) {
523  int icr = 2 * ic;
524  int ici = 2 * ic + 1;
525  w2[icr + id1 + iv] += bc2 * w1[icr + ix1];
526  w2[ici + id1 + iv] += bc2 * w1[ici + ix1];
527  w2[icr + id2 + iv] += bc2 * w1[icr + ix2];
528  w2[ici + id2 + iv] += bc2 * w1[ici + ix2];
529  w2[icr + id3 + iv] += -bc2 * w1[ici + ix2];
530  w2[ici + id3 + iv] += +bc2 * w1[icr + ix2];
531  w2[icr + id4 + iv] += -bc2 * w1[ici + ix1];
532  w2[ici + id4 + iv] += +bc2 * w1[icr + ix1];
533  }
534  ++ibf;
535  }
536  }
537  }
538  }
539 
540 }
541 
542 //====================================================================
544  double *v2, double *v1, int ieo)
545 {
546  int Nvcd = m_Nvc * m_Nd;
547 
548  int id1 = 0;
549  int id2 = m_Nvc;
550  int id3 = m_Nvc * 2;
551  int id4 = m_Nvc * 3;
552 
553  int idir = 0;
554 
555  double vt1[m_Nvc], vt2[m_Nvc];
556  double wt1r, wt1i, wt2r, wt2i;
557 
558  int isite = m_arg[itask].isite;
559  int iyzt0 = isite / m_Nx2;
560 
561  double *w2 = &v2[Nvcd * isite];
562  double *w1 = &v1[Nvcd * isite];
563  double *u = const_cast<Field_G *>(m_U)->ptr(
564  m_Ndf * (isite + (1 - ieo) * m_Nvol / 2 + idir * m_Nvol));
565 
566  for (int it = 0; it < m_Mt; ++it) {
567  for (int iz = 0; iz < m_Mz; ++iz) {
568  for (int iy = 0; iy < m_Ny; ++iy) {
569  int iyzt = iy + m_Ny * (iz + m_Nz * it);
570  int Leo = ieo + (1 - 2 * ieo) * m_Leo[iyzt0 + iyzt];
571  int Meo = 1 - Leo;
572  for (int ix = Meo; ix < m_Nx2; ++ix) {
573  int is = ix + m_Nx2 * iyzt;
574  int iv = Nvcd * is;
575  int in = Nvcd * (is - Meo);
576  int ig = m_Ndf * (is - Meo);
577 
578  for (int ic = 0; ic < m_Nc; ++ic) {
579  vt1[2 * ic] = w1[2 * ic + id1 + in] + w1[2 * ic + 1 + id4 + in];
580  vt1[2 * ic + 1] = w1[2 * ic + 1 + id1 + in] - w1[2 * ic + id4 + in];
581  vt2[2 * ic] = w1[2 * ic + id2 + in] + w1[2 * ic + 1 + id3 + in];
582  vt2[2 * ic + 1] = w1[2 * ic + 1 + id2 + in] - w1[2 * ic + id3 + in];
583  }
584 
585  for (int ic = 0; ic < m_Nc; ++ic) {
586  int ic2 = 2 * ic;
587 
588  wt1r = mult_udagv_r(&u[ic2 + ig], vt1, m_Nc);
589  wt1i = mult_udagv_i(&u[ic2 + ig], vt1, m_Nc);
590  wt2r = mult_udagv_r(&u[ic2 + ig], vt2, m_Nc);
591  wt2i = mult_udagv_i(&u[ic2 + ig], vt2, m_Nc);
592 
593  w2[2 * ic + id1 + iv] += wt1r;
594  w2[2 * ic + 1 + id1 + iv] += wt1i;
595  w2[2 * ic + id2 + iv] += wt2r;
596  w2[2 * ic + 1 + id2 + iv] += wt2i;
597  w2[2 * ic + id3 + iv] += -wt2i;
598  w2[2 * ic + 1 + id3 + iv] += +wt2r;
599  w2[2 * ic + id4 + iv] += -wt1i;
600  w2[2 * ic + 1 + id4 + iv] += +wt1r;
601  }
602  }
603  }
604  }
605  }
606 
607 }
608 
609 //====================================================================
611  double *vcp1, double *v1, int ieo)
612 {
613  int Nvc2 = 2 * m_Nvc;
614  int Nvcd = m_Nvc * m_Nd;
615  int Nvcd2 = Nvcd / 2;
616 
617  int id1 = 0;
618  int id2 = m_Nvc;
619  int id3 = m_Nvc * 2;
620  int id4 = m_Nvc * 3;
621 
622  int idir = 1;
623 
624  int isite = m_arg[itask].isite;
625  int isite_cp = m_arg[itask].isite_cpy;
626 
627  // double* w2 = &vcp1[Nvcd2*isite_cp];
628  double *w2
629  = (double *)m_bw_send[idir]->ptr(sizeof(double) * Nvcd2 * isite_cp);
630  double *w1 = &v1[Nvcd * isite];
631 
632  double bc2 = m_boundary2[idir];
633 
634  int iy = 0;
635 
636  for (int it = 0; it < m_Mt; ++it) {
637  for (int iz = 0; iz < m_Mz; ++iz) {
638  for (int ix = 0; ix < m_Nx2; ++ix) {
639  int is = ix + m_Nx2 * (iy + m_Ny * (iz + m_Nz * it));
640  int is2 = ix + m_Nx2 * (iz + m_Mz * it);
641  int in = Nvcd * is;
642  int ix1 = Nvc2 * is2;
643  int ix2 = ix1 + m_Nvc;
644 
645  for (int ic = 0; ic < m_Nc; ++ic) {
646  w2[2 * ic + ix1] = bc2 * (w1[2 * ic + id1 + in] + w1[2 * ic + id4 + in]);
647  w2[2 * ic + 1 + ix1] = bc2 * (w1[2 * ic + 1 + id1 + in] + w1[2 * ic + 1 + id4 + in]);
648  w2[2 * ic + ix2] = bc2 * (w1[2 * ic + id2 + in] - w1[2 * ic + id3 + in]);
649  w2[2 * ic + 1 + ix2] = bc2 * (w1[2 * ic + 1 + id2 + in] - w1[2 * ic + 1 + id3 + in]);
650  }
651  }
652  }
653  }
654 
655  m_bw_send[idir]->start_thread(itask);
656 }
657 
658 //====================================================================
660  double *v2, double *vcp2, int ieo)
661 {
662  int Nvc2 = 2 * m_Nvc;
663  int Nvcd = m_Nvc * m_Nd;
664  int Nvcd2 = Nvcd / 2;
665 
666  int id1 = 0;
667  int id2 = m_Nvc;
668  int id3 = m_Nvc * 2;
669  int id4 = m_Nvc * 3;
670 
671  int idir = 1;
672 
673  double wt1r, wt1i, wt2r, wt2i;
674 
675  int isite = m_arg[itask].isite;
676  int isite_cp = m_arg[itask].isite_cpy;
677 
678  double *w2 = &v2[Nvcd * isite];
679  // double* w1 = &vcp2[Nvcd2*isite_cp];
680  double *w1
681  = (double *)m_bw_recv[idir]->ptr(sizeof(double) * Nvcd2 * isite_cp);
682  double *u = const_cast<Field_G *>(m_U)->ptr(
683  m_Ndf * (isite + ieo * m_Nvol / 2 + idir * m_Nvol));
684 
685  m_bw_recv[idir]->wait_thread(itask);
686 
687  int iy = m_Ny - 1;
688  for (int it = 0; it < m_Mt; ++it) {
689  for (int iz = 0; iz < m_Mz; ++iz) {
690  for (int ix = 0; ix < m_Nx2; ++ix) {
691  int is = ix + m_Nx2 * (iy + m_Ny * (iz + m_Nz * it));
692  int is2 = ix + m_Nx2 * (iz + m_Mz * it);
693  int iv = Nvcd * is;
694  int ig = m_Ndf * is;
695  int ix1 = Nvc2 * is2;
696  int ix2 = ix1 + m_Nvc;
697 
698  for (int ic = 0; ic < m_Nc; ++ic) {
699  int ic2 = ic * m_Nvc;
700 
701  wt1r = mult_uv_r(&u[ic2 + ig], &w1[ix1], m_Nc);
702  wt1i = mult_uv_i(&u[ic2 + ig], &w1[ix1], m_Nc);
703  wt2r = mult_uv_r(&u[ic2 + ig], &w1[ix2], m_Nc);
704  wt2i = mult_uv_i(&u[ic2 + ig], &w1[ix2], m_Nc);
705 
706  w2[2 * ic + id1 + iv] += wt1r;
707  w2[2 * ic + 1 + id1 + iv] += wt1i;
708  w2[2 * ic + id2 + iv] += wt2r;
709  w2[2 * ic + 1 + id2 + iv] += wt2i;
710  w2[2 * ic + id3 + iv] += -wt2r;
711  w2[2 * ic + 1 + id3 + iv] += -wt2i;
712  w2[2 * ic + id4 + iv] += wt1r;
713  w2[2 * ic + 1 + id4 + iv] += wt1i;
714  }
715  }
716  }
717  }
718 
719 }
720 
721 //====================================================================
723  double *v2, double *v1, int ieo)
724 {
725  int Nvcd = m_Nvc * m_Nd;
726 
727  int id1 = 0;
728  int id2 = m_Nvc;
729  int id3 = m_Nvc * 2;
730  int id4 = m_Nvc * 3;
731 
732  int idir = 1;
733 
734  double vt1[m_Nvc], vt2[m_Nvc];
735  double wt1r, wt1i, wt2r, wt2i;
736 
737  int isite = m_arg[itask].isite;
738 
739  double *w2 = &v2[Nvcd * isite];
740  double *w1 = &v1[Nvcd * isite];
741  double *u = const_cast<Field_G *>(m_U)->ptr(
742  m_Ndf * (isite + ieo * m_Nvol / 2 + idir * m_Nvol));
743 
744  for (int it = 0; it < m_Mt; ++it) {
745  for (int iz = 0; iz < m_Mz; ++iz) {
746  for (int iy = 0; iy < m_Ny - 1; ++iy) {
747  for (int ix = 0; ix < m_Nx2; ++ix) {
748  int is = ix + m_Nx2 * (iy + m_Ny * (iz + m_Nz * it));
749  int iv = Nvcd * is;
750  int in = Nvcd * (is + m_Nx2);
751  int ig = m_Ndf * is;
752 
753  for (int ic = 0; ic < m_Nc; ++ic) {
754  vt1[2 * ic] = w1[2 * ic + id1 + in] + w1[2 * ic + id4 + in];
755  vt1[2 * ic + 1] = w1[2 * ic + 1 + id1 + in] + w1[2 * ic + 1 + id4 + in];
756  vt2[2 * ic] = w1[2 * ic + id2 + in] - w1[2 * ic + id3 + in];
757  vt2[2 * ic + 1] = w1[2 * ic + 1 + id2 + in] - w1[2 * ic + 1 + id3 + in];
758  }
759 
760  for (int ic = 0; ic < m_Nc; ++ic) {
761  int ic2 = ic * m_Nvc;
762 
763  wt1r = mult_uv_r(&u[ic2 + ig], vt1, m_Nc);
764  wt1i = mult_uv_i(&u[ic2 + ig], vt1, m_Nc);
765  wt2r = mult_uv_r(&u[ic2 + ig], vt2, m_Nc);
766  wt2i = mult_uv_i(&u[ic2 + ig], vt2, m_Nc);
767 
768  w2[2 * ic + id1 + iv] += wt1r;
769  w2[2 * ic + 1 + id1 + iv] += wt1i;
770  w2[2 * ic + id2 + iv] += wt2r;
771  w2[2 * ic + 1 + id2 + iv] += wt2i;
772  w2[2 * ic + id3 + iv] += -wt2r;
773  w2[2 * ic + 1 + id3 + iv] += -wt2i;
774  w2[2 * ic + id4 + iv] += wt1r;
775  w2[2 * ic + 1 + id4 + iv] += wt1i;
776  }
777  }
778  }
779  }
780  }
781 
782 }
783 
784 //====================================================================
786  double *vcp1, double *v1, int ieo)
787 {
788  int Nvc2 = 2 * m_Nvc;
789  int Nvcd = m_Nvc * m_Nd;
790  int Nvcd2 = Nvcd / 2;
791 
792  int id1 = 0;
793  int id2 = m_Nvc;
794  int id3 = m_Nvc * 2;
795  int id4 = m_Nvc * 3;
796 
797  int idir = 1;
798 
799  int isite = m_arg[itask].isite;
800  int isite_cp = m_arg[itask].isite_cpy;
801 
802  // double* w2 = &vcp1[Nvcd2*isite_cp];
803  double *w2
804  = (double *)m_fw_send[idir]->ptr(sizeof(double) * Nvcd2 * isite_cp);
805  double *w1 = &v1[Nvcd * isite];
806  double *u = const_cast<Field_G *>(m_U)->ptr(
807  m_Ndf * (isite + (1 - ieo) * m_Nvol / 2 + idir * m_Nvol));
808 
809  double vt1[m_Nvc], vt2[m_Nvc];
810 
811  int iy = m_Ny - 1;
812 
813  for (int it = 0; it < m_Mt; ++it) {
814  for (int iz = 0; iz < m_Mz; ++iz) {
815  for (int ix = 0; ix < m_Nx2; ++ix) {
816  int is = ix + m_Nx2 * (iy + m_Ny * (iz + m_Nz * it));
817  int is2 = ix + m_Nx2 * (iz + m_Mz * it);
818  int in = Nvcd * is;
819  int ig = m_Ndf * is;
820  int ix1 = Nvc2 * is2;
821  int ix2 = ix1 + m_Nvc;
822 
823  for (int ic = 0; ic < m_Nc; ++ic) {
824  vt1[2 * ic] = w1[2 * ic + id1 + in] - w1[2 * ic + id4 + in];
825  vt1[2 * ic + 1] = w1[2 * ic + 1 + id1 + in] - w1[2 * ic + 1 + id4 + in];
826  vt2[2 * ic] = w1[2 * ic + id2 + in] + w1[2 * ic + id3 + in];
827  vt2[2 * ic + 1] = w1[2 * ic + 1 + id2 + in] + w1[2 * ic + 1 + id3 + in];
828  }
829 
830  for (int ic = 0; ic < m_Nc; ++ic) {
831  int icr = 2 * ic;
832  w2[icr + ix1] = mult_udagv_r(&u[icr + ig], vt1, m_Nc);
833  w2[icr + 1 + ix1] = mult_udagv_i(&u[icr + ig], vt1, m_Nc);
834  w2[icr + ix2] = mult_udagv_r(&u[icr + ig], vt2, m_Nc);
835  w2[icr + 1 + ix2] = mult_udagv_i(&u[icr + ig], vt2, m_Nc);
836  }
837  }
838  }
839  }
840 
841  m_fw_send[idir]->start_thread(itask);
842 }
843 
844 //====================================================================
846  double *v2, double *vcp2, int ieo)
847 {
848  int Nvc2 = 2 * m_Nvc;
849  int Nvcd = m_Nvc * m_Nd;
850  int Nvcd2 = Nvcd / 2;
851 
852  int id1 = 0;
853  int id2 = m_Nvc;
854  int id3 = m_Nvc * 2;
855  int id4 = m_Nvc * 3;
856 
857  int idir = 1;
858  double bc2 = m_boundary2[idir];
859 
860  double wt1r, wt1i, wt2r, wt2i;
861 
862  int isite = m_arg[itask].isite;
863  int isite_cp = m_arg[itask].isite_cpy;
864 
865  double *w2 = &v2[Nvcd * isite];
866  // double* w1 = &vcp2[Nvcd2*isite_cp];
867  double *w1
868  = (double *)m_fw_recv[idir]->ptr(sizeof(double) * Nvcd2 * isite_cp);
869 
870  m_fw_recv[idir]->wait_thread(itask);
871 
872  int iy = 0;
873  for (int it = 0; it < m_Mt; ++it) {
874  for (int iz = 0; iz < m_Mz; ++iz) {
875  for (int ix = 0; ix < m_Nx2; ++ix) {
876  int is = ix + m_Nx2 * (iy + m_Ny * (iz + m_Nz * it));
877  int is2 = ix + m_Nx2 * (iz + m_Mz * it);
878  int iv = Nvcd * is;
879  int ix1 = Nvc2 * is2;
880  int ix2 = ix1 + m_Nvc;
881 
882  for (int ic = 0; ic < m_Nc; ++ic) {
883  int icr = 2 * ic;
884  int ici = 2 * ic + 1;
885  w2[icr + id1 + iv] += bc2 * w1[icr + ix1];
886  w2[ici + id1 + iv] += bc2 * w1[ici + ix1];
887  w2[icr + id2 + iv] += bc2 * w1[icr + ix2];
888  w2[ici + id2 + iv] += bc2 * w1[ici + ix2];
889  w2[icr + id3 + iv] += bc2 * w1[icr + ix2];
890  w2[ici + id3 + iv] += bc2 * w1[ici + ix2];
891  w2[icr + id4 + iv] += -bc2 * w1[icr + ix1];
892  w2[ici + id4 + iv] += -bc2 * w1[ici + ix1];
893  }
894  }
895  }
896  }
897 
898 }
899 
900 //====================================================================
902  double *v2, double *v1, int ieo)
903 {
904  int Nvcd = m_Nvc * m_Nd;
905 
906  int id1 = 0;
907  int id2 = m_Nvc;
908  int id3 = m_Nvc * 2;
909  int id4 = m_Nvc * 3;
910 
911  int idir = 1;
912 
913  double vt1[m_Nvc], vt2[m_Nvc];
914  double wt1r, wt1i, wt2r, wt2i;
915 
916  int isite = m_arg[itask].isite;
917 
918  double *w2 = &v2[Nvcd * isite];
919  double *w1 = &v1[Nvcd * isite];
920  double *u = const_cast<Field_G *>(m_U)->ptr(
921  m_Ndf * (isite + (1 - ieo) * m_Nvol / 2 + idir * m_Nvol));
922 
923  for (int it = 0; it < m_Mt; ++it) {
924  for (int iz = 0; iz < m_Mz; ++iz) {
925  for (int iy = 1; iy < m_Ny; ++iy) {
926  for (int ix = 0; ix < m_Nx2; ++ix) {
927  int is = ix + m_Nx2 * (iy + m_Ny * (iz + m_Nz * it));
928  int iv = Nvcd * is;
929  int in = Nvcd * (is - m_Nx2);
930  int ig = m_Ndf * (is - m_Nx2);
931 
932  for (int ic = 0; ic < m_Nc; ++ic) {
933  vt1[2 * ic] = w1[2 * ic + id1 + in] - w1[2 * ic + id4 + in];
934  vt1[2 * ic + 1] = w1[2 * ic + 1 + id1 + in] - w1[2 * ic + 1 + id4 + in];
935  vt2[2 * ic] = w1[2 * ic + id2 + in] + w1[2 * ic + id3 + in];
936  vt2[2 * ic + 1] = w1[2 * ic + 1 + id2 + in] + w1[2 * ic + 1 + id3 + in];
937  }
938 
939  for (int ic = 0; ic < m_Nc; ++ic) {
940  int ic2 = 2 * ic;
941  wt1r = mult_udagv_r(&u[ic2 + ig], vt1, m_Nc);
942  wt1i = mult_udagv_i(&u[ic2 + ig], vt1, m_Nc);
943  wt2r = mult_udagv_r(&u[ic2 + ig], vt2, m_Nc);
944  wt2i = mult_udagv_i(&u[ic2 + ig], vt2, m_Nc);
945 
946  w2[ic2 + id1 + iv] += wt1r;
947  w2[ic2 + 1 + id1 + iv] += wt1i;
948  w2[ic2 + id2 + iv] += wt2r;
949  w2[ic2 + 1 + id2 + iv] += wt2i;
950  w2[ic2 + id3 + iv] += wt2r;
951  w2[ic2 + 1 + id3 + iv] += wt2i;
952  w2[ic2 + id4 + iv] += -wt1r;
953  w2[ic2 + 1 + id4 + iv] += -wt1i;
954  }
955  }
956  }
957  }
958  }
959 
960 }
961 
962 //====================================================================
964  double *vcp1, double *v1, int ieo)
965 {
966  int Nvc2 = 2 * m_Nvc;
967  int Nvcd = m_Nvc * m_Nd;
968  int Nvcd2 = Nvcd / 2;
969 
970  int id1 = 0;
971  int id2 = m_Nvc;
972  int id3 = m_Nvc * 2;
973  int id4 = m_Nvc * 3;
974 
975  int idir = 2;
976 
977  int isite = m_arg[itask].isite;
978  int isite_cp = m_arg[itask].isite_cpz;
979 
980  // double* w2 = &vcp1[Nvcd2*isite_cp];
981  double *w2
982  = (double *)m_bw_send[idir]->ptr(sizeof(double) * Nvcd2 * isite_cp);
983  double *w1 = &v1[Nvcd * isite];
984 
985  double bc2 = m_boundary2[idir];
986 
987  if (m_arg[itask].kz0 == 1) {
988  int Nxy = m_Nx2 * m_Ny;
989  int iz = 0;
990  for (int it = 0; it < m_Mt; ++it) {
991  for (int ixy = 0; ixy < Nxy; ++ixy) {
992  int is = ixy + Nxy * (iz + m_Nz * it);
993  int is2 = ixy + Nxy * it;
994 
995  int in = Nvcd * is;
996  int ix1 = Nvc2 * is2;
997  int ix2 = ix1 + m_Nvc;
998 
999  for (int ic = 0; ic < m_Nc; ++ic) {
1000  w2[2 * ic + ix1] = bc2 * (w1[2 * ic + id1 + in] - w1[2 * ic + 1 + id3 + in]);
1001  w2[2 * ic + 1 + ix1] = bc2 * (w1[2 * ic + 1 + id1 + in] + w1[2 * ic + id3 + in]);
1002  w2[2 * ic + ix2] = bc2 * (w1[2 * ic + id2 + in] + w1[2 * ic + 1 + id4 + in]);
1003  w2[2 * ic + 1 + ix2] = bc2 * (w1[2 * ic + 1 + id2 + in] - w1[2 * ic + id4 + in]);
1004  }
1005  }
1006  }
1007  }
1008 
1009  m_bw_send[idir]->start_thread(itask);
1010 }
1011 
1012 //====================================================================
1014  double *v2, double *vcp2, int ieo)
1015 {
1016  int Nvc2 = 2 * m_Nvc;
1017  int Nvcd = m_Nvc * m_Nd;
1018  int Nvcd2 = Nvcd / 2;
1019 
1020  int id1 = 0;
1021  int id2 = m_Nvc;
1022  int id3 = m_Nvc * 2;
1023  int id4 = m_Nvc * 3;
1024 
1025  int idir = 2;
1026 
1027  double wt1r, wt1i, wt2r, wt2i;
1028 
1029  int isite = m_arg[itask].isite;
1030  int isite_cp = m_arg[itask].isite_cpz;
1031 
1032  double *w2 = &v2[Nvcd * isite];
1033  // double* w1 = &vcp2[Nvcd2*isite_cp];
1034  double *w1
1035  = (double *)m_bw_recv[idir]->ptr(sizeof(double) * Nvcd2 * isite_cp);
1036  double *u = const_cast<Field_G *>(m_U)->ptr(
1037  m_Ndf * (isite + ieo * m_Nvol / 2 + idir * m_Nvol));
1038 
1039  m_bw_recv[idir]->wait_thread(itask);
1040 
1041  if (m_arg[itask].kz1 == 1) {
1042  int Nxy = m_Nx2 * m_Ny;
1043  int iz = m_Mz - 1;
1044  for (int it = 0; it < m_Mt; ++it) {
1045  for (int ixy = 0; ixy < Nxy; ++ixy) {
1046  int is = ixy + Nxy * (iz + m_Nz * it);
1047  int is2 = ixy + Nxy * it;
1048  int iv = Nvcd * is;
1049  int ig = m_Ndf * is;
1050  int ix1 = Nvc2 * is2;
1051  int ix2 = ix1 + m_Nvc;
1052 
1053  for (int ic = 0; ic < m_Nc; ++ic) {
1054  int ic2 = ic * m_Nvc;
1055 
1056  wt1r = mult_uv_r(&u[ic2 + ig], &w1[ix1], m_Nc);
1057  wt1i = mult_uv_i(&u[ic2 + ig], &w1[ix1], m_Nc);
1058  wt2r = mult_uv_r(&u[ic2 + ig], &w1[ix2], m_Nc);
1059  wt2i = mult_uv_i(&u[ic2 + ig], &w1[ix2], m_Nc);
1060 
1061  w2[2 * ic + id1 + iv] += wt1r;
1062  w2[2 * ic + 1 + id1 + iv] += wt1i;
1063  w2[2 * ic + id2 + iv] += wt2r;
1064  w2[2 * ic + 1 + id2 + iv] += wt2i;
1065  w2[2 * ic + id3 + iv] += wt1i;
1066  w2[2 * ic + 1 + id3 + iv] += -wt1r;
1067  w2[2 * ic + id4 + iv] += -wt2i;
1068  w2[2 * ic + 1 + id4 + iv] += wt2r;
1069  }
1070  }
1071  }
1072  }
1073 
1074 }
1075 
1076 //====================================================================
1078  double *v2, double *v1, int ieo)
1079 {
1080  int Nvcd = m_Nvc * m_Nd;
1081 
1082  int id1 = 0;
1083  int id2 = m_Nvc;
1084  int id3 = m_Nvc * 2;
1085  int id4 = m_Nvc * 3;
1086 
1087  int idir = 2;
1088 
1089  double vt1[m_Nvc], vt2[m_Nvc];
1090  double wt1r, wt1i, wt2r, wt2i;
1091 
1092  int isite = m_arg[itask].isite;
1093 
1094  double *w2 = &v2[Nvcd * isite];
1095  double *w1 = &v1[Nvcd * isite];
1096  double *u = const_cast<Field_G *>(m_U)->ptr(
1097  m_Ndf * (isite + ieo * m_Nvol / 2 + idir * m_Nvol));
1098 
1099  int kz1 = m_arg[itask].kz1;
1100  int Nxy = m_Nx2 * m_Ny;
1101 
1102  for (int it = 0; it < m_Mt; ++it) {
1103  for (int iz = 0; iz < m_Mz - kz1; ++iz) {
1104  for (int ixy = 0; ixy < Nxy; ++ixy) {
1105  int is = ixy + Nxy * (iz + m_Nz * it);
1106  int iv = Nvcd * is;
1107  int in = Nvcd * (is + Nxy);
1108  int ig = m_Ndf * is;
1109 
1110  for (int ic = 0; ic < m_Nc; ++ic) {
1111  vt1[2 * ic] = w1[2 * ic + id1 + in] - w1[2 * ic + 1 + id3 + in];
1112  vt1[2 * ic + 1] = w1[2 * ic + 1 + id1 + in] + w1[2 * ic + id3 + in];
1113  vt2[2 * ic] = w1[2 * ic + id2 + in] + w1[2 * ic + 1 + id4 + in];
1114  vt2[2 * ic + 1] = w1[2 * ic + 1 + id2 + in] - w1[2 * ic + id4 + in];
1115  }
1116 
1117  for (int ic = 0; ic < m_Nc; ++ic) {
1118  int ic2 = ic * m_Nvc;
1119 
1120  wt1r = mult_uv_r(&u[ic2 + ig], vt1, m_Nc);
1121  wt1i = mult_uv_i(&u[ic2 + ig], vt1, m_Nc);
1122  wt2r = mult_uv_r(&u[ic2 + ig], vt2, m_Nc);
1123  wt2i = mult_uv_i(&u[ic2 + ig], vt2, m_Nc);
1124 
1125  w2[2 * ic + id1 + iv] += wt1r;
1126  w2[2 * ic + 1 + id1 + iv] += wt1i;
1127  w2[2 * ic + id2 + iv] += wt2r;
1128  w2[2 * ic + 1 + id2 + iv] += wt2i;
1129  w2[2 * ic + id3 + iv] += wt1i;
1130  w2[2 * ic + 1 + id3 + iv] += -wt1r;
1131  w2[2 * ic + id4 + iv] += -wt2i;
1132  w2[2 * ic + 1 + id4 + iv] += wt2r;
1133  }
1134  }
1135  }
1136  }
1137 
1138 }
1139 
1140 //====================================================================
1142  double *vcp1, double *v1, int ieo)
1143 {
1144  int Nvc2 = 2 * m_Nvc;
1145  int Nvcd = m_Nvc * m_Nd;
1146  int Nvcd2 = Nvcd / 2;
1147 
1148  int id1 = 0;
1149  int id2 = m_Nvc;
1150  int id3 = m_Nvc * 2;
1151  int id4 = m_Nvc * 3;
1152 
1153  int idir = 2;
1154 
1155  int isite = m_arg[itask].isite;
1156  int isite_cp = m_arg[itask].isite_cpz;
1157 
1158  // double* w2 = &vcp1[Nvcd2*isite_cp];
1159  double *w2
1160  = (double *)m_fw_send[idir]->ptr(sizeof(double) * Nvcd2 * isite_cp);
1161  double *w1 = &v1[Nvcd * isite];
1162  double *u = const_cast<Field_G *>(m_U)->ptr(
1163  m_Ndf * (isite + (1 - ieo) * m_Nvol / 2 + idir * m_Nvol));
1164 
1165  double vt1[m_Nvc], vt2[m_Nvc];
1166 
1167  if (m_arg[itask].kz1 == 1) {
1168  int Nxy = m_Nx2 * m_Ny;
1169  int iz = m_Mz - 1;
1170  for (int it = 0; it < m_Mt; ++it) {
1171  for (int ixy = 0; ixy < Nxy; ++ixy) {
1172  int is = ixy + Nxy * (iz + m_Nz * it);
1173  int is2 = ixy + Nxy * it;
1174  int in = Nvcd * is;
1175  int ig = m_Ndf * is;
1176  int ix1 = Nvc2 * is2;
1177  int ix2 = ix1 + m_Nvc;
1178 
1179  for (int ic = 0; ic < m_Nc; ++ic) {
1180  vt1[2 * ic] = w1[2 * ic + id1 + in] + w1[2 * ic + 1 + id3 + in];
1181  vt1[2 * ic + 1] = w1[2 * ic + 1 + id1 + in] - w1[2 * ic + id3 + in];
1182  vt2[2 * ic] = w1[2 * ic + id2 + in] - w1[2 * ic + 1 + id4 + in];
1183  vt2[2 * ic + 1] = w1[2 * ic + 1 + id2 + in] + w1[2 * ic + id4 + in];
1184  }
1185 
1186  for (int ic = 0; ic < m_Nc; ++ic) {
1187  int icr = 2 * ic;
1188  w2[icr + ix1] = mult_udagv_r(&u[icr + ig], vt1, m_Nc);
1189  w2[icr + 1 + ix1] = mult_udagv_i(&u[icr + ig], vt1, m_Nc);
1190  w2[icr + ix2] = mult_udagv_r(&u[icr + ig], vt2, m_Nc);
1191  w2[icr + 1 + ix2] = mult_udagv_i(&u[icr + ig], vt2, m_Nc);
1192  }
1193  }
1194  }
1195  }
1196 
1197  m_fw_send[idir]->start_thread(itask);
1198 }
1199 
1200 //====================================================================
1202  double *v2, double *vcp2, int ieo)
1203 {
1204  int Nvc2 = 2 * m_Nvc;
1205  int Nvcd = m_Nvc * m_Nd;
1206  int Nvcd2 = Nvcd / 2;
1207 
1208  int id1 = 0;
1209  int id2 = m_Nvc;
1210  int id3 = m_Nvc * 2;
1211  int id4 = m_Nvc * 3;
1212 
1213  int idir = 2;
1214  double bc2 = m_boundary2[idir];
1215 
1216  double wt1r, wt1i, wt2r, wt2i;
1217 
1218  int isite = m_arg[itask].isite;
1219  int isite_cp = m_arg[itask].isite_cpz;
1220 
1221  double *w2 = &v2[Nvcd * isite];
1222  // double* w1 = &vcp2[Nvcd2*isite_cp];
1223  double *w1
1224  = (double *)m_fw_recv[idir]->ptr(sizeof(double) * Nvcd2 * isite_cp);
1225 
1226  m_fw_recv[idir]->wait_thread(itask);
1227 
1228  if (m_arg[itask].kz0 == 1) {
1229  int Nxy = m_Nx2 * m_Ny;
1230 
1231  int iz = 0;
1232  for (int it = 0; it < m_Mt; ++it) {
1233  for (int ixy = 0; ixy < Nxy; ++ixy) {
1234  int is = ixy + Nxy * (iz + m_Nz * it);
1235  int is2 = ixy + Nxy * it;
1236  int iv = Nvcd * is;
1237  int ix1 = Nvc2 * is2;
1238  int ix2 = ix1 + m_Nvc;
1239 
1240  for (int ic = 0; ic < m_Nc; ++ic) {
1241  int icr = 2 * ic;
1242  int ici = 2 * ic + 1;
1243  w2[icr + id1 + iv] += bc2 * w1[icr + ix1];
1244  w2[ici + id1 + iv] += bc2 * w1[ici + ix1];
1245  w2[icr + id2 + iv] += bc2 * w1[icr + ix2];
1246  w2[ici + id2 + iv] += bc2 * w1[ici + ix2];
1247  w2[icr + id3 + iv] += -bc2 * w1[ici + ix1];
1248  w2[ici + id3 + iv] += bc2 * w1[icr + ix1];
1249  w2[icr + id4 + iv] += bc2 * w1[ici + ix2];
1250  w2[ici + id4 + iv] += -bc2 * w1[icr + ix2];
1251  }
1252  }
1253  }
1254  }
1255 
1256 }
1257 
1258 //====================================================================
1260  double *v2, double *v1, int ieo)
1261 {
1262  int Nvcd = m_Nvc * m_Nd;
1263 
1264  int id1 = 0;
1265  int id2 = m_Nvc;
1266  int id3 = m_Nvc * 2;
1267  int id4 = m_Nvc * 3;
1268 
1269  int idir = 2;
1270 
1271  double vt1[m_Nvc], vt2[m_Nvc];
1272  double wt1r, wt1i, wt2r, wt2i;
1273 
1274  int isite = m_arg[itask].isite;
1275 
1276  double *w2 = &v2[Nvcd * isite];
1277  double *w1 = &v1[Nvcd * isite];
1278  double *u = const_cast<Field_G *>(m_U)->ptr(
1279  m_Ndf * (isite + (1 - ieo) * m_Nvol / 2 + idir * m_Nvol));
1280 
1281  int kz0 = m_arg[itask].kz0;
1282  int Nxy = m_Nx2 * m_Ny;
1283 
1284  for (int it = 0; it < m_Mt; ++it) {
1285  for (int iz = kz0; iz < m_Mz; ++iz) {
1286  for (int ixy = 0; ixy < Nxy; ++ixy) {
1287  int is = ixy + Nxy * (iz + m_Nz * it);
1288  int iv = Nvcd * is;
1289  int in = Nvcd * (is - Nxy);
1290  int ig = m_Ndf * (is - Nxy);
1291 
1292  for (int ic = 0; ic < m_Nc; ++ic) {
1293  vt1[2 * ic] = w1[2 * ic + id1 + in] + w1[2 * ic + 1 + id3 + in];
1294  vt1[2 * ic + 1] = w1[2 * ic + 1 + id1 + in] - w1[2 * ic + id3 + in];
1295  vt2[2 * ic] = w1[2 * ic + id2 + in] - w1[2 * ic + 1 + id4 + in];
1296  vt2[2 * ic + 1] = w1[2 * ic + 1 + id2 + in] + w1[2 * ic + id4 + in];
1297  }
1298 
1299  for (int ic = 0; ic < m_Nc; ++ic) {
1300  int ic2 = 2 * ic;
1301  wt1r = mult_udagv_r(&u[ic2 + ig], vt1, m_Nc);
1302  wt1i = mult_udagv_i(&u[ic2 + ig], vt1, m_Nc);
1303  wt2r = mult_udagv_r(&u[ic2 + ig], vt2, m_Nc);
1304  wt2i = mult_udagv_i(&u[ic2 + ig], vt2, m_Nc);
1305 
1306  w2[ic2 + id1 + iv] += wt1r;
1307  w2[ic2 + 1 + id1 + iv] += wt1i;
1308  w2[ic2 + id2 + iv] += wt2r;
1309  w2[ic2 + 1 + id2 + iv] += wt2i;
1310  w2[ic2 + id3 + iv] += -wt1i;
1311  w2[ic2 + 1 + id3 + iv] += wt1r;
1312  w2[ic2 + id4 + iv] += wt2i;
1313  w2[ic2 + 1 + id4 + iv] += -wt2r;
1314  }
1315  }
1316  }
1317  }
1318 
1319 }
1320 
1321 //====================================================================
1323  double *vcp1, double *v1, int ieo)
1324 {
1325  int Nvc2 = 2 * m_Nvc;
1326  int Nvcd = m_Nvc * m_Nd;
1327  int Nvcd2 = Nvcd / 2;
1328 
1329  int id1 = 0;
1330  int id2 = m_Nvc;
1331  int id3 = m_Nvc * 2;
1332  int id4 = m_Nvc * 3;
1333 
1334  int idir = 3;
1335 
1336  int isite = m_arg[itask].isite;
1337  int isite_cp = m_arg[itask].isite_cpt;
1338 
1339  // double* w2 = &vcp1[Nvcd2*isite_cp];
1340  double *w2
1341  = (double *)m_bw_send[idir]->ptr(sizeof(double) * Nvcd2 * isite_cp);
1342  double *w1 = &v1[Nvcd * isite];
1343 
1344  double bc2 = m_boundary2[idir];
1345 
1346  if (m_arg[itask].kt0 == 1) {
1347  int Nxy = m_Nx2 * m_Ny;
1348  int it = 0;
1349  for (int iz = 0; iz < m_Mz; ++iz) {
1350  for (int ixy = 0; ixy < Nxy; ++ixy) {
1351  int is = ixy + Nxy * (iz + m_Nz * it);
1352  int is2 = ixy + Nxy * iz;
1353 
1354  int in = Nvcd * is;
1355  int ix1 = Nvc2 * is2;
1356  int ix2 = ix1 + m_Nvc;
1357 
1358  for (int ic = 0; ic < m_Nc; ++ic) {
1359  w2[2 * ic + ix1] = 2.0 * bc2 * w1[2 * ic + id3 + in];
1360  w2[2 * ic + 1 + ix1] = 2.0 * bc2 * w1[2 * ic + 1 + id3 + in];
1361  w2[2 * ic + ix2] = 2.0 * bc2 * w1[2 * ic + id4 + in];
1362  w2[2 * ic + 1 + ix2] = 2.0 * bc2 * w1[2 * ic + 1 + id4 + in];
1363  }
1364  }
1365  }
1366  }
1367 
1368  m_bw_send[idir]->start_thread(itask);
1369 }
1370 
1371 //====================================================================
1373  double *v2, double *vcp2, int ieo)
1374 {
1375  int Nvc2 = 2 * m_Nvc;
1376  int Nvcd = m_Nvc * m_Nd;
1377  int Nvcd2 = Nvcd / 2;
1378 
1379  int id1 = 0;
1380  int id2 = m_Nvc;
1381  int id3 = m_Nvc * 2;
1382  int id4 = m_Nvc * 3;
1383 
1384  int idir = 3;
1385 
1386  double wt1r, wt1i, wt2r, wt2i;
1387 
1388  int isite = m_arg[itask].isite;
1389  int isite_cp = m_arg[itask].isite_cpt;
1390 
1391  double *w2 = &v2[Nvcd * isite];
1392  // double* w1 = &vcp2[Nvcd2*isite_cp];
1393  double *w1
1394  = (double *)m_bw_recv[idir]->ptr(sizeof(double) * Nvcd2 * isite_cp);
1395  double *u = const_cast<Field_G *>(m_U)->ptr(
1396  m_Ndf * (isite + ieo * m_Nvol / 2 + idir * m_Nvol));
1397 
1398  m_bw_recv[idir]->wait_thread(itask);
1399 
1400  if (m_arg[itask].kt1 == 1) {
1401  int Nxy = m_Nx2 * m_Ny;
1402  int it = m_Mt - 1;
1403  for (int iz = 0; iz < m_Mz; ++iz) {
1404  for (int ixy = 0; ixy < Nxy; ++ixy) {
1405  int is = ixy + Nxy * (iz + m_Nz * it);
1406  int is2 = ixy + Nxy * iz;
1407  int iv = Nvcd * is;
1408  int ig = m_Ndf * is;
1409  int ix1 = Nvc2 * is2;
1410  int ix2 = ix1 + m_Nvc;
1411 
1412  for (int ic = 0; ic < m_Nc; ++ic) {
1413  int ic2 = ic * m_Nvc;
1414 
1415  wt1r = mult_uv_r(&u[ic2 + ig], &w1[ix1], m_Nc);
1416  wt1i = mult_uv_i(&u[ic2 + ig], &w1[ix1], m_Nc);
1417  wt2r = mult_uv_r(&u[ic2 + ig], &w1[ix2], m_Nc);
1418  wt2i = mult_uv_i(&u[ic2 + ig], &w1[ix2], m_Nc);
1419 
1420  w2[2 * ic + id3 + iv] += wt1r;
1421  w2[2 * ic + 1 + id3 + iv] += wt1i;
1422  w2[2 * ic + id4 + iv] += wt2r;
1423  w2[2 * ic + 1 + id4 + iv] += wt2i;
1424  }
1425  }
1426  }
1427  }
1428 
1429 }
1430 
1431 //====================================================================
1433  double *v2, double *v1, int ieo)
1434 {
1435  int Nvcd = m_Nvc * m_Nd;
1436 
1437  int id1 = 0;
1438  int id2 = m_Nvc;
1439  int id3 = m_Nvc * 2;
1440  int id4 = m_Nvc * 3;
1441 
1442  int idir = 3;
1443 
1444  double vt1[m_Nvc], vt2[m_Nvc];
1445  double wt1r, wt1i, wt2r, wt2i;
1446 
1447  int isite = m_arg[itask].isite;
1448 
1449  double *w2 = &v2[Nvcd * isite];
1450  double *w1 = &v1[Nvcd * isite];
1451  double *u = const_cast<Field_G *>(m_U)->ptr(
1452  m_Ndf * (isite + ieo * m_Nvol / 2 + idir * m_Nvol));
1453 
1454  int kt1 = m_arg[itask].kt1;
1455  int Nxy = m_Nx2 * m_Ny;
1456  int Nxyz = Nxy * m_Nz;
1457 
1458  for (int it = 0; it < m_Mt - kt1; ++it) {
1459  for (int iz = 0; iz < m_Mz; ++iz) {
1460  for (int ixy = 0; ixy < Nxy; ++ixy) {
1461  int is = ixy + Nxy * (iz + m_Nz * it);
1462  int iv = Nvcd * is;
1463  int in = Nvcd * (is + Nxyz);
1464  int ig = m_Ndf * is;
1465 
1466  for (int ic = 0; ic < m_Nc; ++ic) {
1467  vt1[2 * ic] = 2.0 * w1[2 * ic + id3 + in];
1468  vt1[2 * ic + 1] = 2.0 * w1[2 * ic + 1 + id3 + in];
1469  vt2[2 * ic] = 2.0 * w1[2 * ic + id4 + in];
1470  vt2[2 * ic + 1] = 2.0 * w1[2 * ic + 1 + id4 + in];
1471  }
1472 
1473  for (int ic = 0; ic < m_Nc; ++ic) {
1474  int ic2 = ic * m_Nvc;
1475 
1476  wt1r = mult_uv_r(&u[ic2 + ig], vt1, m_Nc);
1477  wt1i = mult_uv_i(&u[ic2 + ig], vt1, m_Nc);
1478  wt2r = mult_uv_r(&u[ic2 + ig], vt2, m_Nc);
1479  wt2i = mult_uv_i(&u[ic2 + ig], vt2, m_Nc);
1480 
1481  w2[2 * ic + id3 + iv] += wt1r;
1482  w2[2 * ic + 1 + id3 + iv] += wt1i;
1483  w2[2 * ic + id4 + iv] += wt2r;
1484  w2[2 * ic + 1 + id4 + iv] += wt2i;
1485  }
1486  }
1487  }
1488  }
1489 
1490 }
1491 
1492 //====================================================================
1494  double *vcp1, double *v1, int ieo)
1495 {
1496  int Nvc2 = 2 * m_Nvc;
1497  int Nvcd = m_Nvc * m_Nd;
1498  int Nvcd2 = Nvcd / 2;
1499 
1500  int id1 = 0;
1501  int id2 = m_Nvc;
1502  int id3 = m_Nvc * 2;
1503  int id4 = m_Nvc * 3;
1504 
1505  int idir = 3;
1506 
1507  int isite = m_arg[itask].isite;
1508  int isite_cp = m_arg[itask].isite_cpt;
1509 
1510  // double* w2 = &vcp1[Nvcd2*isite_cp];
1511  double *w2
1512  = (double *)m_fw_send[idir]->ptr(sizeof(double) * Nvcd2 * isite_cp);
1513  double *w1 = &v1[Nvcd * isite];
1514  double *u = const_cast<Field_G *>(m_U)->ptr(
1515  m_Ndf * (isite + (1 - ieo) * m_Nvol / 2 + idir * m_Nvol));
1516 
1517  double vt1[m_Nvc], vt2[m_Nvc];
1518 
1519  if (m_arg[itask].kt1 == 1) {
1520  int Nxy = m_Nx2 * m_Ny;
1521  int it = m_Mt - 1;
1522  for (int iz = 0; iz < m_Mz; ++iz) {
1523  for (int ixy = 0; ixy < Nxy; ++ixy) {
1524  int is = ixy + Nxy * (iz + m_Nz * it);
1525  int is2 = ixy + Nxy * iz;
1526  int in = Nvcd * is;
1527  int ig = m_Ndf * is;
1528  int ix1 = Nvc2 * is2;
1529  int ix2 = ix1 + m_Nvc;
1530 
1531  for (int ic = 0; ic < m_Nc; ++ic) {
1532  vt1[2 * ic] = 2.0 * w1[2 * ic + id1 + in];
1533  vt1[2 * ic + 1] = 2.0 * w1[2 * ic + 1 + id1 + in];
1534  vt2[2 * ic] = 2.0 * w1[2 * ic + id2 + in];
1535  vt2[2 * ic + 1] = 2.0 * w1[2 * ic + 1 + id2 + in];
1536  }
1537 
1538  for (int ic = 0; ic < m_Nc; ++ic) {
1539  int icr = 2 * ic;
1540  w2[icr + ix1] = mult_udagv_r(&u[icr + ig], vt1, m_Nc);
1541  w2[icr + 1 + ix1] = mult_udagv_i(&u[icr + ig], vt1, m_Nc);
1542  w2[icr + ix2] = mult_udagv_r(&u[icr + ig], vt2, m_Nc);
1543  w2[icr + 1 + ix2] = mult_udagv_i(&u[icr + ig], vt2, m_Nc);
1544  }
1545  }
1546  }
1547  }
1548 
1549  m_fw_send[idir]->start_thread(itask);
1550 }
1551 
1552 //====================================================================
1554  double *v2, double *vcp2, int ieo)
1555 {
1556  int Nvc2 = 2 * m_Nvc;
1557  int Nvcd = m_Nvc * m_Nd;
1558  int Nvcd2 = Nvcd / 2;
1559 
1560  int id1 = 0;
1561  int id2 = m_Nvc;
1562  int id3 = m_Nvc * 2;
1563  int id4 = m_Nvc * 3;
1564 
1565  int idir = 3;
1566  double bc2 = m_boundary2[idir];
1567 
1568  double wt1r, wt1i, wt2r, wt2i;
1569 
1570  int isite = m_arg[itask].isite;
1571  int isite_cp = m_arg[itask].isite_cpt;
1572 
1573  double *w2 = &v2[Nvcd * isite];
1574  // double* w1 = &vcp2[Nvcd2*isite_cp];
1575  double *w1
1576  = (double *)m_fw_recv[idir]->ptr(sizeof(double) * Nvcd2 * isite_cp);
1577 
1578  m_fw_recv[idir]->wait_thread(itask);
1579 
1580  if (m_arg[itask].kt0 == 1) {
1581  int Nxy = m_Nx2 * m_Ny;
1582  int it = 0;
1583  for (int iz = 0; iz < m_Mz; ++iz) {
1584  for (int ixy = 0; ixy < Nxy; ++ixy) {
1585  int is = ixy + Nxy * (iz + m_Nz * it);
1586  int is2 = ixy + Nxy * iz;
1587  int iv = Nvcd * is;
1588  int ix1 = Nvc2 * is2;
1589  int ix2 = ix1 + m_Nvc;
1590 
1591  for (int ic = 0; ic < m_Nc; ++ic) {
1592  int icr = 2 * ic;
1593  int ici = 2 * ic + 1;
1594  w2[icr + id1 + iv] += bc2 * w1[icr + ix1];
1595  w2[ici + id1 + iv] += bc2 * w1[ici + ix1];
1596  w2[icr + id2 + iv] += bc2 * w1[icr + ix2];
1597  w2[ici + id2 + iv] += bc2 * w1[ici + ix2];
1598  }
1599  }
1600  }
1601  }
1602 
1603 }
1604 
1605 //====================================================================
1607  double *v2, double *v1, int ieo)
1608 {
1609  int Nvcd = m_Nvc * m_Nd;
1610 
1611  int id1 = 0;
1612  int id2 = m_Nvc;
1613  int id3 = m_Nvc * 2;
1614  int id4 = m_Nvc * 3;
1615 
1616  int idir = 3;
1617 
1618  double vt1[m_Nvc], vt2[m_Nvc];
1619  double wt1r, wt1i, wt2r, wt2i;
1620 
1621  int isite = m_arg[itask].isite;
1622 
1623  double *w2 = &v2[Nvcd * isite];
1624  double *w1 = &v1[Nvcd * isite];
1625  double *u = const_cast<Field_G *>(m_U)->ptr(
1626  m_Ndf * (isite + (1 - ieo) * m_Nvol / 2 + idir * m_Nvol));
1627 
1628  int kt0 = m_arg[itask].kt0;
1629  int Nxy = m_Nx2 * m_Ny;
1630  int Nxyz = Nxy * m_Nz;
1631 
1632  for (int it = kt0; it < m_Mt; ++it) {
1633  for (int iz = 0; iz < m_Mz; ++iz) {
1634  for (int ixy = 0; ixy < Nxy; ++ixy) {
1635  int is = ixy + Nxy * (iz + m_Nz * it);
1636  int iv = Nvcd * is;
1637  int in = Nvcd * (is - Nxyz);
1638  int ig = m_Ndf * (is - Nxyz);
1639 
1640  for (int ic = 0; ic < m_Nc; ++ic) {
1641  vt1[2 * ic] = 2.0 * w1[2 * ic + id1 + in];
1642  vt1[2 * ic + 1] = 2.0 * w1[2 * ic + 1 + id1 + in];
1643  vt2[2 * ic] = 2.0 * w1[2 * ic + id2 + in];
1644  vt2[2 * ic + 1] = 2.0 * w1[2 * ic + 1 + id2 + in];
1645  }
1646 
1647  for (int ic = 0; ic < m_Nc; ++ic) {
1648  int ic2 = 2 * ic;
1649  wt1r = mult_udagv_r(&u[ic2 + ig], vt1, m_Nc);
1650  wt1i = mult_udagv_i(&u[ic2 + ig], vt1, m_Nc);
1651  wt2r = mult_udagv_r(&u[ic2 + ig], vt2, m_Nc);
1652  wt2i = mult_udagv_i(&u[ic2 + ig], vt2, m_Nc);
1653 
1654  w2[ic2 + id1 + iv] += wt1r;
1655  w2[ic2 + 1 + id1 + iv] += wt1i;
1656  w2[ic2 + id2 + iv] += wt2r;
1657  w2[ic2 + 1 + id2 + iv] += wt2i;
1658  }
1659  }
1660  }
1661  }
1662 
1663 }
1664 
1665 //====================================================================
1667  double *vcp1, double *v1, int ieo)
1668 {
1669  int Nvc2 = 2 * m_Nvc;
1670  int Nvcd = m_Nvc * m_Nd;
1671  int Nvcd2 = Nvcd / 2;
1672 
1673  int id1 = 0;
1674  int id2 = m_Nvc;
1675  int id3 = m_Nvc * 2;
1676  int id4 = m_Nvc * 3;
1677 
1678  int idir = 3;
1679 
1680  int isite = m_arg[itask].isite;
1681  int isite_cp = m_arg[itask].isite_cpt;
1682 
1683  // double* w2 = &vcp1[Nvcd2*isite_cp];
1684  double *w2
1685  = (double *)m_bw_send[idir]->ptr(sizeof(double) * Nvcd2 * isite_cp);
1686  double *w1 = &v1[Nvcd * isite];
1687 
1688  double bc2 = m_boundary2[idir];
1689 
1690  if (m_arg[itask].kt0 == 1) {
1691  int Nxy = m_Nx2 * m_Ny;
1692  int it = 0;
1693  for (int iz = 0; iz < m_Mz; ++iz) {
1694  for (int ixy = 0; ixy < Nxy; ++ixy) {
1695  int is = ixy + Nxy * (iz + m_Nz * it);
1696  int is2 = ixy + Nxy * iz;
1697 
1698  int in = Nvcd * is;
1699  int ix1 = Nvc2 * is2;
1700  int ix2 = ix1 + m_Nvc;
1701 
1702  for (int ic = 0; ic < m_Nc; ++ic) {
1703  w2[2 * ic + ix1] = bc2 * (w1[2 * ic + id1 + in] + w1[2 * ic + id3 + in]);
1704  w2[2 * ic + 1 + ix1] = bc2 * (w1[2 * ic + 1 + id1 + in] + w1[2 * ic + 1 + id3 + in]);
1705  w2[2 * ic + ix2] = bc2 * (w1[2 * ic + id2 + in] + w1[2 * ic + id4 + in]);
1706  w2[2 * ic + 1 + ix2] = bc2 * (w1[2 * ic + 1 + id2 + in] + w1[2 * ic + 1 + id4 + in]);
1707  }
1708  }
1709  }
1710  }
1711 
1712  m_bw_send[idir]->start_thread(itask);
1713 }
1714 
1715 //====================================================================
1717  double *v2, double *vcp2, int ieo)
1718 {
1719  int Nvc2 = 2 * m_Nvc;
1720  int Nvcd = m_Nvc * m_Nd;
1721  int Nvcd2 = Nvcd / 2;
1722 
1723  int id1 = 0;
1724  int id2 = m_Nvc;
1725  int id3 = m_Nvc * 2;
1726  int id4 = m_Nvc * 3;
1727 
1728  int idir = 3;
1729 
1730  double wt1r, wt1i, wt2r, wt2i;
1731 
1732  int isite = m_arg[itask].isite;
1733  int isite_cp = m_arg[itask].isite_cpt;
1734 
1735  double *w2 = &v2[Nvcd * isite];
1736  // double* w1 = &vcp2[Nvcd2*isite_cp];
1737  double *w1
1738  = (double *)m_bw_recv[idir]->ptr(sizeof(double) * Nvcd2 * isite_cp);
1739  double *u = const_cast<Field_G *>(m_U)->ptr(
1740  m_Ndf * (isite + ieo * m_Nvol / 2 + idir * m_Nvol));
1741 
1742  m_bw_recv[idir]->wait_thread(itask);
1743 
1744  if (m_arg[itask].kt1 == 1) {
1745  int Nxy = m_Nx2 * m_Ny;
1746  int it = m_Mt - 1;
1747  for (int iz = 0; iz < m_Mz; ++iz) {
1748  for (int ixy = 0; ixy < Nxy; ++ixy) {
1749  int is = ixy + Nxy * (iz + m_Nz * it);
1750  int is2 = ixy + Nxy * iz;
1751  int iv = Nvcd * is;
1752  int ig = m_Ndf * is;
1753  int ix1 = Nvc2 * is2;
1754  int ix2 = ix1 + m_Nvc;
1755 
1756  for (int ic = 0; ic < m_Nc; ++ic) {
1757  int ic2 = ic * m_Nvc;
1758 
1759  wt1r = mult_uv_r(&u[ic2 + ig], &w1[ix1], m_Nc);
1760  wt1i = mult_uv_i(&u[ic2 + ig], &w1[ix1], m_Nc);
1761  wt2r = mult_uv_r(&u[ic2 + ig], &w1[ix2], m_Nc);
1762  wt2i = mult_uv_i(&u[ic2 + ig], &w1[ix2], m_Nc);
1763 
1764  w2[2 * ic + id1 + iv] += wt1r;
1765  w2[2 * ic + 1 + id1 + iv] += wt1i;
1766  w2[2 * ic + id2 + iv] += wt2r;
1767  w2[2 * ic + 1 + id2 + iv] += wt2i;
1768  w2[2 * ic + id3 + iv] += wt1r;
1769  w2[2 * ic + 1 + id3 + iv] += wt1i;
1770  w2[2 * ic + id4 + iv] += wt2r;
1771  w2[2 * ic + 1 + id4 + iv] += wt2i;
1772  }
1773  }
1774  }
1775  }
1776 
1777 }
1778 
1779 //====================================================================
1781  double *v2, double *v1, int ieo)
1782 {
1783  int Nvcd = m_Nvc * m_Nd;
1784 
1785  int id1 = 0;
1786  int id2 = m_Nvc;
1787  int id3 = m_Nvc * 2;
1788  int id4 = m_Nvc * 3;
1789 
1790  int idir = 3;
1791 
1792  double vt1[m_Nvc], vt2[m_Nvc];
1793  double wt1r, wt1i, wt2r, wt2i;
1794 
1795  int isite = m_arg[itask].isite;
1796 
1797  double *w2 = &v2[Nvcd * isite];
1798  double *w1 = &v1[Nvcd * isite];
1799  double *u = const_cast<Field_G *>(m_U)->ptr(
1800  m_Ndf * (isite + ieo * m_Nvol / 2 + idir * m_Nvol));
1801 
1802  int kt1 = m_arg[itask].kt1;
1803  int Nxy = m_Nx2 * m_Ny;
1804  int Nxyz = Nxy * m_Nz;
1805 
1806  for (int it = 0; it < m_Mt - kt1; ++it) {
1807  for (int iz = 0; iz < m_Mz; ++iz) {
1808  for (int ixy = 0; ixy < Nxy; ++ixy) {
1809  int is = ixy + Nxy * (iz + m_Nz * it);
1810  int iv = Nvcd * is;
1811  int in = Nvcd * (is + Nxyz);
1812  int ig = m_Ndf * is;
1813 
1814  for (int ic = 0; ic < m_Nc; ++ic) {
1815  vt1[2 * ic] = w1[2 * ic + id1 + in] + w1[2 * ic + id3 + in];
1816  vt1[2 * ic + 1] = w1[2 * ic + 1 + id1 + in] + w1[2 * ic + 1 + id3 + in];
1817  vt2[2 * ic] = w1[2 * ic + id2 + in] + w1[2 * ic + id4 + in];
1818  vt2[2 * ic + 1] = w1[2 * ic + 1 + id2 + in] + w1[2 * ic + 1 + id4 + in];
1819  }
1820 
1821  for (int ic = 0; ic < m_Nc; ++ic) {
1822  int ic2 = ic * m_Nvc;
1823 
1824  wt1r = mult_uv_r(&u[ic2 + ig], vt1, m_Nc);
1825  wt1i = mult_uv_i(&u[ic2 + ig], vt1, m_Nc);
1826  wt2r = mult_uv_r(&u[ic2 + ig], vt2, m_Nc);
1827  wt2i = mult_uv_i(&u[ic2 + ig], vt2, m_Nc);
1828 
1829  w2[2 * ic + id1 + iv] += wt1r;
1830  w2[2 * ic + 1 + id1 + iv] += wt1i;
1831  w2[2 * ic + id2 + iv] += wt2r;
1832  w2[2 * ic + 1 + id2 + iv] += wt2i;
1833  w2[2 * ic + id3 + iv] += wt1r;
1834  w2[2 * ic + 1 + id3 + iv] += wt1i;
1835  w2[2 * ic + id4 + iv] += wt2r;
1836  w2[2 * ic + 1 + id4 + iv] += wt2i;
1837  }
1838  }
1839  }
1840  }
1841 
1842 }
1843 
1844 //====================================================================
1846  double *vcp1, double *v1, int ieo)
1847 {
1848  int Nvc2 = 2 * m_Nvc;
1849  int Nvcd = m_Nvc * m_Nd;
1850  int Nvcd2 = Nvcd / 2;
1851 
1852  int id1 = 0;
1853  int id2 = m_Nvc;
1854  int id3 = m_Nvc * 2;
1855  int id4 = m_Nvc * 3;
1856 
1857  int idir = 3;
1858 
1859  int isite = m_arg[itask].isite;
1860  int isite_cp = m_arg[itask].isite_cpt;
1861 
1862  // double* w2 = &vcp1[Nvcd2*isite_cp];
1863  double *w2
1864  = (double *)m_fw_send[idir]->ptr(sizeof(double) * Nvcd2 * isite_cp);
1865  double *w1 = &v1[Nvcd * isite];
1866  double *u = const_cast<Field_G *>(m_U)->ptr(
1867  m_Ndf * (isite + (1 - ieo) * m_Nvol / 2 + idir * m_Nvol));
1868 
1869  double vt1[m_Nvc], vt2[m_Nvc];
1870 
1871  if (m_arg[itask].kt1 == 1) {
1872  int Nxy = m_Nx2 * m_Ny;
1873  int it = m_Mt - 1;
1874  for (int iz = 0; iz < m_Mz; ++iz) {
1875  for (int ixy = 0; ixy < Nxy; ++ixy) {
1876  int is = ixy + Nxy * (iz + m_Nz * it);
1877  int is2 = ixy + Nxy * iz;
1878  int in = Nvcd * is;
1879  int ig = m_Ndf * is;
1880  int ix1 = Nvc2 * is2;
1881  int ix2 = ix1 + m_Nvc;
1882 
1883  for (int ic = 0; ic < m_Nc; ++ic) {
1884  vt1[2 * ic] = w1[2 * ic + id1 + in] - w1[2 * ic + id3 + in];
1885  vt1[2 * ic + 1] = w1[2 * ic + 1 + id1 + in] - w1[2 * ic + 1 + id3 + in];
1886  vt2[2 * ic] = w1[2 * ic + id2 + in] - w1[2 * ic + id4 + in];
1887  vt2[2 * ic + 1] = w1[2 * ic + 1 + id2 + in] - w1[2 * ic + 1 + id4 + in];
1888  }
1889 
1890  for (int ic = 0; ic < m_Nc; ++ic) {
1891  int icr = 2 * ic;
1892  w2[icr + ix1] = mult_udagv_r(&u[icr + ig], vt1, m_Nc);
1893  w2[icr + 1 + ix1] = mult_udagv_i(&u[icr + ig], vt1, m_Nc);
1894  w2[icr + ix2] = mult_udagv_r(&u[icr + ig], vt2, m_Nc);
1895  w2[icr + 1 + ix2] = mult_udagv_i(&u[icr + ig], vt2, m_Nc);
1896  }
1897  }
1898  }
1899  }
1900 
1901  m_fw_send[idir]->start_thread(itask);
1902 }
1903 
1904 //====================================================================
1906  double *v2, double *vcp2, int ieo)
1907 {
1908  int Nvc2 = 2 * m_Nvc;
1909  int Nvcd = m_Nvc * m_Nd;
1910  int Nvcd2 = Nvcd / 2;
1911 
1912  int id1 = 0;
1913  int id2 = m_Nvc;
1914  int id3 = m_Nvc * 2;
1915  int id4 = m_Nvc * 3;
1916 
1917  int idir = 3;
1918  double bc2 = m_boundary2[idir];
1919 
1920  double wt1r, wt1i, wt2r, wt2i;
1921 
1922  int isite = m_arg[itask].isite;
1923  int isite_cp = m_arg[itask].isite_cpt;
1924 
1925  double *w2 = &v2[Nvcd * isite];
1926  // double* w1 = &vcp2[Nvcd2*isite_cp];
1927  double *w1
1928  = (double *)m_fw_recv[idir]->ptr(sizeof(double) * Nvcd2 * isite_cp);
1929 
1930  m_fw_recv[idir]->wait_thread(itask);
1931 
1932  if (m_arg[itask].kt0 == 1) {
1933  int Nxy = m_Nx2 * m_Ny;
1934  int it = 0;
1935  for (int iz = 0; iz < m_Mz; ++iz) {
1936  for (int ixy = 0; ixy < Nxy; ++ixy) {
1937  int is = ixy + Nxy * (iz + m_Nz * it);
1938  int is2 = ixy + Nxy * iz;
1939  int iv = Nvcd * is;
1940  int ix1 = Nvc2 * is2;
1941  int ix2 = ix1 + m_Nvc;
1942 
1943  for (int ic = 0; ic < m_Nc; ++ic) {
1944  int icr = 2 * ic;
1945  int ici = 2 * ic + 1;
1946  w2[icr + id1 + iv] += bc2 * w1[icr + ix1];
1947  w2[ici + id1 + iv] += bc2 * w1[ici + ix1];
1948  w2[icr + id2 + iv] += bc2 * w1[icr + ix2];
1949  w2[ici + id2 + iv] += bc2 * w1[ici + ix2];
1950  w2[icr + id3 + iv] -= bc2 * w1[icr + ix1];
1951  w2[ici + id3 + iv] -= bc2 * w1[ici + ix1];
1952  w2[icr + id4 + iv] -= bc2 * w1[icr + ix2];
1953  w2[ici + id4 + iv] -= bc2 * w1[ici + ix2];
1954  }
1955  }
1956  }
1957  }
1958 
1959 }
1960 
1961 //====================================================================
1963  double *v2, double *v1, int ieo)
1964 {
1965  int Nvcd = m_Nvc * m_Nd;
1966 
1967  int id1 = 0;
1968  int id2 = m_Nvc;
1969  int id3 = m_Nvc * 2;
1970  int id4 = m_Nvc * 3;
1971 
1972  int idir = 3;
1973 
1974  double vt1[m_Nvc], vt2[m_Nvc];
1975  double wt1r, wt1i, wt2r, wt2i;
1976 
1977  int isite = m_arg[itask].isite;
1978 
1979  double *w2 = &v2[Nvcd * isite];
1980  double *w1 = &v1[Nvcd * isite];
1981  double *u = const_cast<Field_G *>(m_U)->ptr(
1982  m_Ndf * (isite + (1 - ieo) * m_Nvol / 2 + idir * m_Nvol));
1983 
1984  int kt0 = m_arg[itask].kt0;
1985  int Nxy = m_Nx2 * m_Ny;
1986  int Nxyz = Nxy * m_Nz;
1987 
1988  for (int it = kt0; it < m_Mt; ++it) {
1989  for (int iz = 0; iz < m_Mz; ++iz) {
1990  for (int ixy = 0; ixy < Nxy; ++ixy) {
1991  int is = ixy + Nxy * (iz + m_Nz * it);
1992  int iv = Nvcd * is;
1993  int in = Nvcd * (is - Nxyz);
1994  int ig = m_Ndf * (is - Nxyz);
1995 
1996  for (int ic = 0; ic < m_Nc; ++ic) {
1997  vt1[2 * ic] = w1[2 * ic + id1 + in] - w1[2 * ic + id3 + in];
1998  vt1[2 * ic + 1] = w1[2 * ic + 1 + id1 + in] - w1[2 * ic + 1 + id3 + in];
1999  vt2[2 * ic] = w1[2 * ic + id2 + in] - w1[2 * ic + id4 + in];
2000  vt2[2 * ic + 1] = w1[2 * ic + 1 + id2 + in] - w1[2 * ic + 1 + id4 + in];
2001  }
2002 
2003  for (int ic = 0; ic < m_Nc; ++ic) {
2004  int ic2 = 2 * ic;
2005  wt1r = mult_udagv_r(&u[ic2 + ig], vt1, m_Nc);
2006  wt1i = mult_udagv_i(&u[ic2 + ig], vt1, m_Nc);
2007  wt2r = mult_udagv_r(&u[ic2 + ig], vt2, m_Nc);
2008  wt2i = mult_udagv_i(&u[ic2 + ig], vt2, m_Nc);
2009 
2010  w2[ic2 + id1 + iv] += wt1r;
2011  w2[ic2 + 1 + id1 + iv] += wt1i;
2012  w2[ic2 + id2 + iv] += wt2r;
2013  w2[ic2 + 1 + id2 + iv] += wt2i;
2014  w2[ic2 + id3 + iv] -= wt1r;
2015  w2[ic2 + 1 + id3 + iv] -= wt1i;
2016  w2[ic2 + id4 + iv] -= wt2r;
2017  w2[ic2 + 1 + id4 + iv] -= wt2i;
2018  }
2019  }
2020  }
2021  }
2022 
2023 }
2024 
2025 //====================================================================
2027  double *v2, double *v1)
2028 {
2029  int Nvcd = m_Nvc * m_Nd;
2030  int Nxy = m_Nx2 * m_Ny;
2031 
2032  int id1 = 0;
2033  int id2 = m_Nvc;
2034  int id3 = m_Nvc * 2;
2035  int id4 = m_Nvc * 3;
2036 
2037  int isite = m_arg[itask].isite;
2038  double *w2 = &v2[Nvcd * isite];
2039  double *w1 = &v1[Nvcd * isite];
2040 
2041  for (int it = 0; it < m_Mt; ++it) {
2042  for (int iz = 0; iz < m_Mz; ++iz) {
2043  for (int ixy = 0; ixy < Nxy; ++ixy) {
2044  int iv = Nvcd * (ixy + Nxy * (iz + m_Nz * it));
2045  for (int ivc = 0; ivc < m_Nvc; ++ivc) {
2046  w2[ivc + id1 + iv] = w1[ivc + id3 + iv];
2047  w2[ivc + id2 + iv] = w1[ivc + id4 + iv];
2048  w2[ivc + id3 + iv] = w1[ivc + id1 + iv];
2049  w2[ivc + id4 + iv] = w1[ivc + id2 + iv];
2050  }
2051  }
2052  }
2053  }
2054 
2055 }
2056 
2057 //====================================================================
2059  double *v2, double *v1)
2060 {
2061  int Nvcd = m_Nvc * m_Nd;
2062  int Nxy = m_Nx2 * m_Ny;
2063 
2064  int id1 = 0;
2065  int id2 = m_Nvc;
2066  int id3 = m_Nvc * 2;
2067  int id4 = m_Nvc * 3;
2068 
2069  int isite = m_arg[itask].isite;
2070  double *w2 = &v2[Nvcd * isite];
2071  double *w1 = &v1[Nvcd * isite];
2072 
2073  for (int it = 0; it < m_Mt; ++it) {
2074  for (int iz = 0; iz < m_Mz; ++iz) {
2075  for (int ixy = 0; ixy < Nxy; ++ixy) {
2076  int iv = Nvcd * (ixy + Nxy * (iz + m_Nz * it));
2077  for (int ivc = 0; ivc < m_Nvc; ++ivc) {
2078  w2[ivc + id1 + iv] = w1[ivc + id1 + iv];
2079  w2[ivc + id2 + iv] = w1[ivc + id2 + iv];
2080  w2[ivc + id3 + iv] = -w1[ivc + id3 + iv];
2081  w2[ivc + id4 + iv] = -w1[ivc + id4 + iv];
2082  }
2083  }
2084  }
2085  }
2086 
2087 }
2088 
2089 //====================================================================
2091  double *v1)
2092 {
2093  int Nvcd = m_Nvc * m_Nd;
2094  int Nxy = m_Nx2 * m_Ny;
2095 
2096  int id1 = 0;
2097  int id2 = m_Nvc;
2098  int id3 = m_Nvc * 2;
2099  int id4 = m_Nvc * 3;
2100 
2101  int isite = m_arg[itask].isite;
2102  double *w1 = &v1[Nvcd * isite];
2103 
2104  for (int it = 0; it < m_Mt; ++it) {
2105  for (int iz = 0; iz < m_Mz; ++iz) {
2106  for (int ixy = 0; ixy < Nxy; ++ixy) {
2107  int iv = Nvcd * (ixy + Nxy * (iz + m_Nz * it));
2108  for (int ivc = 0; ivc < m_Nvc; ++ivc) {
2109  double wt1 = w1[ivc + id1 + iv];
2110  double wt2 = w1[ivc + id2 + iv];
2111  w1[ivc + id1 + iv] = w1[ivc + id3 + iv];
2112  w1[ivc + id2 + iv] = w1[ivc + id4 + iv];
2113  w1[ivc + id3 + iv] = wt1;
2114  w1[ivc + id4 + iv] = wt2;
2115  }
2116  }
2117  }
2118  }
2119 
2120 }
2121 
2122 //====================================================================
2124  double *v1)
2125 {
2126  int Nvcd = m_Nvc * m_Nd;
2127  int Nxy = m_Nx2 * m_Ny;
2128 
2129  int id1 = 0;
2130  int id2 = m_Nvc;
2131  int id3 = m_Nvc * 2;
2132  int id4 = m_Nvc * 3;
2133 
2134  int isite = m_arg[itask].isite;
2135  double *w1 = &v1[Nvcd * isite];
2136 
2137  for (int it = 0; it < m_Mt; ++it) {
2138  for (int iz = 0; iz < m_Mz; ++iz) {
2139  for (int ixy = 0; ixy < Nxy; ++ixy) {
2140  int iv = Nvcd * (ixy + Nxy * (iz + m_Nz * it));
2141  for (int ivc = 0; ivc < m_Nvc; ++ivc) {
2142  w1[ivc + id3 + iv] = -w1[ivc + id3 + iv];
2143  w1[ivc + id4 + iv] = -w1[ivc + id4 + iv];
2144  }
2145  }
2146  }
2147  }
2148 
2149 }
2150 
2151 //====================================================================
2152 //============================================================END=====
void mult_tm2_dirac_thread(int, double *, double *, int)
const Field_F Meo(const Field_F &, const int ieo)
void mult_tp1_dirac_thread(int, double *, double *, int)
BridgeIO vout
Definition: bridgeIO.cpp:207
void mult_ym1_thread(int, double *, double *, int)
void mult_xp1_thread(int, double *, double *, int)
void general(const char *format,...)
Definition: bridgeIO.cpp:38
void mult_tm2_chiral_thread(int, double *, double *, int)
std::valarray< Channel * > m_bw_recv
void mult_ymb_thread(int, double *, double *, int)
std::valarray< Channel * > m_fw_recv
void mult_tmb_dirac_thread(int, double *, double *, int)
void mult_zp1_thread(int, double *, double *, int)
void mult_xp2_thread(int, double *, double *, int)
void mult_ypb_thread(int, double *, double *, int)
SU(N) gauge field.
Definition: field_G.h:36
void mult_tm1_dirac_thread(int, double *, double *, int)
void mult_zm2_thread(int, double *, double *, int)
std::valarray< Channel * > m_bw_send
void mult_tmb_chiral_thread(int, double *, double *, int)
void mult_xm2_thread(int, double *, double *, int)
void mult_ym2_thread(int, double *, double *, int)
static int get_num_threads_available()
returns number of threads (works outside of parallel region).
void mult_xm1_thread(int, double *, double *, int)
void mult_tp1_chiral_thread(int, double *, double *, int)
std::valarray< Channel * > m_fw_send
void mult_tp2_dirac_thread(int, double *, double *, int)
void mult_tp2_chiral_thread(int, double *, double *, int)
void mult_tpb_chiral_thread(int, double *, double *, int)
void mult_zp2_thread(int, double *, double *, int)
void mult_xmb_thread(int, double *, double *, int)
void mult_zmb_thread(int, double *, double *, int)
void mult_yp1_thread(int, double *, double *, int)
void mult_zm1_thread(int, double *, double *, int)
void mult_zpb_thread(int, double *, double *, int)
void mult_yp2_thread(int, double *, double *, int)
void mult_xpb_thread(int, double *, double *, int)
void mult_tm1_chiral_thread(int, double *, double *, int)
void mult_tpb_dirac_thread(int, double *, double *, int)