Bridge++  Version 1.4.4
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
fopr_Wilson_eo_impl_thread.cpp
Go to the documentation of this file.
1 
14 #include "fopr_Wilson_eo_impl.h"
15 
16 #include "IO/bridgeIO.h"
17 using Bridge::vout;
18 
20 
21 namespace Imp_BGQ {
22 #if defined USE_GROUP_SU3
23 #include "fopr_Wilson_impl_SU3.inc"
24 #elif defined USE_GROUP_SU2
25 #include "fopr_Wilson_impl_SU2.inc"
26 #elif defined USE_GROUP_SU_N
27 #include "fopr_Wilson_impl_SU_N.inc"
28 #endif
29 
30 //====================================================================
32  {
34 
35  // The following setup corresponds to uniform division of volume.
36  if (m_Nthread <= m_Nt) {
38  } else if (m_Nthread <= m_Nz * m_Nt) {
39  m_Ntask_t = m_Nt;
40  } else {
41  vout.crucial(m_vl, "Error at %s: Too large Nthread: %d\n",
42  class_name.c_str(), m_Nthread);
43  exit(EXIT_FAILURE);
44  }
46  if (m_Ntask_z * m_Ntask_t != m_Nthread) {
47  vout.crucial(m_vl, "Error at %s: Nz(%d) and Nt(%d) \neq Nthread(%d)\n",
48  class_name.c_str(), m_Nz, m_Nt, m_Nthread);
49  exit(EXIT_FAILURE);
50  }
52  m_Mz = m_Nz / m_Ntask_z;
53  m_Mt = m_Nt / m_Ntask_t;
54 
55  vout.general(m_vl, " Nthread = %d\n", m_Nthread);
56  vout.general(m_vl, " Ntask = %d\n", m_Ntask);
57  vout.general(m_vl, " Ntask_z = %d Ntask_t = %d\n", m_Ntask_z, m_Ntask_t);
58  vout.general(m_vl, " Mz = %d Mt = %d\n", m_Mz, m_Mt);
59 
60  // setup of arguments
61  int Nxy2 = m_Nx2 * m_Ny;
62  m_arg.resize(m_Ntask);
63  for (int ith_t = 0; ith_t < m_Ntask_t; ++ith_t) {
64  for (int ith_z = 0; ith_z < m_Ntask_z; ++ith_z) {
65  int itask = ith_z + m_Ntask_z * ith_t;
66 
67  m_arg[itask].isite = (ith_z * m_Mz + ith_t * (m_Nz * m_Mt)) * Nxy2;
68 
69  m_arg[itask].kt0 = 0;
70  m_arg[itask].kt1 = 0;
71  m_arg[itask].kz0 = 0;
72  m_arg[itask].kz1 = 0;
73  if (ith_t == 0) m_arg[itask].kt0 = 1;
74  if (ith_z == 0) m_arg[itask].kz0 = 1;
75  if (ith_t == m_Ntask_t - 1) m_arg[itask].kt1 = 1;
76  if (ith_z == m_Ntask_z - 1) m_arg[itask].kz1 = 1;
77 
78  m_arg[itask].isite_cpx = itask * m_Mz * m_Mt * (m_Ny / 2);
79  m_arg[itask].isite_cpy = itask * m_Mz * m_Mt * m_Nx2;
80  m_arg[itask].isite_cpz = ith_t * m_Mt * Nxy2;
81  m_arg[itask].isite_cpt = ith_z * m_Mz * Nxy2;
82  }
83  }
84 
85  // setup for async data transfer
86  int Nc = CommonParameters::Nc();
87  int Nd = CommonParameters::Nd();
88  int Nvcd2 = 2 * Nc * Nd / 2;
89 
90  std::vector<int> destid(m_Ntask);
91  std::vector<int> offset(m_Ntask);
92  std::vector<int> datasize(m_Ntask);
93  std::vector<int> offset_up(m_Ntask);
94  std::vector<int> offset_lw(m_Ntask);
95  std::vector<int> datasize_up(m_Ntask);
96  std::vector<int> datasize_lw(m_Ntask);
97 
98  int imu = 0;
99  for (int ith_t = 0; ith_t < m_Ntask_t; ++ith_t) {
100  for (int ith_z = 0; ith_z < m_Ntask_z; ++ith_z) {
101  int itask = ith_z + ith_t * m_Ntask_z;
102  int isite_cp = itask * m_Mz * m_Mt * (m_Ny / 2);
103  destid[itask] = itask;
104  offset[itask] = sizeof(double) * Nvcd2 * isite_cp;
105  datasize[itask] = sizeof(double) * Nvcd2 * m_Mz * m_Mt * (m_Ny / 2);
106  }
107  }
108  m_bw_send[imu]->set_thread(m_Ntask, destid, offset, datasize);
109  m_fw_send[imu]->set_thread(m_Ntask, destid, offset, datasize);
110  m_bw_recv[imu]->set_thread(m_Ntask, destid, offset, datasize);
111  m_fw_recv[imu]->set_thread(m_Ntask, destid, offset, datasize);
112 
113  imu = 1;
114  for (int ith_t = 0; ith_t < m_Ntask_t; ++ith_t) {
115  for (int ith_z = 0; ith_z < m_Ntask_z; ++ith_z) {
116  int itask = ith_z + ith_t * m_Ntask_z;
117  int isite_cp = itask * m_Mz * m_Mt * m_Nx2;
118  destid[itask] = itask;
119  offset[itask] = sizeof(double) * Nvcd2 * isite_cp;
120  datasize[itask] = sizeof(double) * Nvcd2 * m_Mz * m_Mt * m_Nx2;
121  }
122  }
123  m_bw_send[imu]->set_thread(m_Ntask, destid, offset, datasize);
124  m_fw_send[imu]->set_thread(m_Ntask, destid, offset, datasize);
125  m_bw_recv[imu]->set_thread(m_Ntask, destid, offset, datasize);
126  m_fw_recv[imu]->set_thread(m_Ntask, destid, offset, datasize);
127 
128  imu = 2;
129  for (int ith_t = 0; ith_t < m_Ntask_t; ++ith_t) {
130  for (int ith_z = 0; ith_z < m_Ntask_z; ++ith_z) {
131  int itask = ith_z + m_Ntask_z * ith_t;
132  destid[itask] = -1;
133  offset_up[itask] = 0;
134  offset_lw[itask] = 0;
135  datasize_up[itask] = 0;
136  datasize_lw[itask] = 0;
137  if (ith_z == 0) {
138  destid[itask] = (m_Ntask_z - 1) + ith_t * m_Ntask_z;
139  offset_lw[itask] = sizeof(double) * Nvcd2 * ith_t * m_Mt * m_Nx2 * m_Ny;
140  datasize_lw[itask] = sizeof(double) * Nvcd2 * m_Mt * m_Nx2 * m_Ny;
141  }
142  if (ith_z == m_Ntask_z - 1) {
143  destid[itask] = ith_t * m_Ntask_z;
144  offset_up[itask] = sizeof(double) * Nvcd2 * ith_t * m_Mt * m_Nx2 * m_Ny;
145  datasize_up[itask] = sizeof(double) * Nvcd2 * m_Mt * m_Nx2 * m_Ny;
146  }
147  }
148  }
149  m_bw_send[imu]->set_thread(m_Ntask, destid, offset_lw, datasize_lw);
150  m_bw_recv[imu]->set_thread(m_Ntask, destid, offset_up, datasize_up);
151  m_fw_send[imu]->set_thread(m_Ntask, destid, offset_up, datasize_up);
152  m_fw_recv[imu]->set_thread(m_Ntask, destid, offset_lw, datasize_lw);
153 
154  imu = 3;
155  for (int ith_t = 0; ith_t < m_Ntask_t; ++ith_t) {
156  for (int ith_z = 0; ith_z < m_Ntask_z; ++ith_z) {
157  int itask = ith_z + m_Ntask_z * ith_t;
158  destid[itask] = -1;
159  offset_up[itask] = 0;
160  offset_lw[itask] = 0;
161  datasize_up[itask] = 0;
162  datasize_lw[itask] = 0;
163  if (ith_t == 0) {
164  destid[itask] = ith_z + (m_Ntask_t - 1) * m_Ntask_z;
165  offset_lw[itask] = sizeof(double) * Nvcd2 * ith_z * m_Mz * m_Nx2 * m_Ny;
166  datasize_lw[itask] = sizeof(double) * Nvcd2 * m_Mz * m_Nx2 * m_Ny;
167  }
168  if (ith_t == m_Ntask_t - 1) {
169  destid[itask] = ith_z;
170  offset_up[itask] = sizeof(double) * Nvcd2 * ith_z * m_Mz * m_Nx2 * m_Ny;
171  datasize_up[itask] = sizeof(double) * Nvcd2 * m_Mz * m_Nx2 * m_Ny;
172  }
173  }
174  }
175  m_bw_send[imu]->set_thread(m_Ntask, destid, offset_lw, datasize_lw);
176  m_bw_recv[imu]->set_thread(m_Ntask, destid, offset_up, datasize_up);
177  m_fw_send[imu]->set_thread(m_Ntask, destid, offset_up, datasize_up);
178  m_fw_recv[imu]->set_thread(m_Ntask, destid, offset_lw, datasize_lw);
179  }
180 
181 
182 //====================================================================
184  double *w, double fac)
185  {
186  int Nvcd = m_Nvc * m_Nd;
187  int Nvxy = Nvcd * m_Nx2 * m_Ny;
188 
189  int isite = m_arg[itask].isite;
190  double *wp = &w[Nvcd * isite];
191 
192  for (int it = 0; it < m_Mt; ++it) {
193  for (int iz = 0; iz < m_Mz; ++iz) {
194  for (int ivxy = 0; ivxy < Nvxy; ++ivxy) {
195  int iv = ivxy + Nvxy * (iz + m_Nz * it);
196  wp[iv] = fac * wp[iv];
197  }
198  }
199  }
200  }
201 
202 
203 //====================================================================
205  double *v)
206  {
207  int Nvcd = m_Nvc * m_Nd;
208  int Nvxy = Nvcd * m_Nx2 * m_Ny;
209 
210  int isite = m_arg[itask].isite;
211  double *wp = &v[Nvcd * isite];
212 
213  for (int it = 0; it < m_Mt; ++it) {
214  for (int iz = 0; iz < m_Mz; ++iz) {
215  for (int ivxy = 0; ivxy < Nvxy; ++ivxy) {
216  int iv = ivxy + Nvxy * (iz + m_Nz * it);
217  wp[iv] = 0.0;
218  }
219  }
220  }
221  }
222 
223 
224 //====================================================================
226  int itask, double *vcp1, const double *v1, int ieo)
227  {
228  int Nvc2 = 2 * m_Nvc;
229  int Nvcd = m_Nvc * m_Nd;
230  int Nvcd2 = Nvcd / 2;
231 
232  int id1 = 0;
233  int id2 = m_Nvc;
234  int id3 = m_Nvc * 2;
235  int id4 = m_Nvc * 3;
236 
237  int idir = 0;
238 
239  int isite = m_arg[itask].isite;
240  int isite_cp = m_arg[itask].isite_cpx;
241  int iyzt0 = isite / m_Nx2;
242 
243  // double* w2 = &vcp1[Nvcd2*isite_cp];
244  double *w2
245  = (double *)m_bw_send[idir]->ptr(sizeof(double) * Nvcd2 * isite_cp);
246  const double *w1 = &v1[Nvcd * isite];
247 
248  double bc2 = m_boundary2[idir];
249 
250  int ix = 0;
251  int ibf = 0;
252 
253  for (int it = 0; it < m_Mt; ++it) {
254  for (int iz = 0; iz < m_Mz; ++iz) {
255  for (int iy = 0; iy < m_Ny; ++iy) {
256  int iyzt = iy + m_Ny * (iz + m_Nz * it);
257  int Leo = ieo + (1 - 2 * ieo) * m_Leo[iyzt0 + iyzt];
258  if (Leo == 1) {
259  int is = ix + m_Nx2 * iyzt;
260  int in = Nvcd * is;
261 
262  int ix1 = Nvc2 * ibf;
263  int ix2 = ix1 + m_Nvc;
264 
265  for (int ic = 0; ic < m_Nc; ++ic) {
266  w2[2 * ic + ix1] = bc2 * (w1[2 * ic + id1 + in] - w1[2 * ic + 1 + id4 + in]);
267  w2[2 * ic + 1 + ix1] = bc2 * (w1[2 * ic + 1 + id1 + in] + w1[2 * ic + id4 + in]);
268  w2[2 * ic + ix2] = bc2 * (w1[2 * ic + id2 + in] - w1[2 * ic + 1 + id3 + in]);
269  w2[2 * ic + 1 + ix2] = bc2 * (w1[2 * ic + 1 + id2 + in] + w1[2 * ic + id3 + in]);
270  }
271  ++ibf;
272  }
273  }
274  }
275  }
276 
277  m_bw_send[idir]->start_thread(itask);
278  }
279 
280 
281 //====================================================================
283  int itask, double *v2, const double *vcp2, int ieo)
284  {
285  int Nvc2 = 2 * m_Nvc;
286  int Nvcd = m_Nvc * m_Nd;
287  int Nvcd2 = Nvcd / 2;
288 
289  int id1 = 0;
290  int id2 = m_Nvc;
291  int id3 = m_Nvc * 2;
292  int id4 = m_Nvc * 3;
293 
294  int idir = 0;
295 
296  double wt1r, wt1i, wt2r, wt2i;
297 
298  int isite = m_arg[itask].isite;
299  int isite_cp = m_arg[itask].isite_cpx;
300  int iyzt0 = isite / m_Nx2;
301 
302  double *w2 = &v2[Nvcd * isite];
303  // double* w1 = &vcp2[Nvcd2*isite_cp];
304  const double *w1
305  = (double *)m_bw_recv[idir]->ptr(sizeof(double) * Nvcd2 * isite_cp);
306  const double *u = m_U->ptr(m_Ndf * (isite + ieo * m_Nvol / 2 + idir * m_Nvol));
307 
308  m_bw_recv[idir]->wait_thread(itask);
309 
310  int ix = m_Nx2 - 1;
311  int ibf = 0;
312  for (int it = 0; it < m_Mt; ++it) {
313  for (int iz = 0; iz < m_Mz; ++iz) {
314  for (int iy = 0; iy < m_Ny; ++iy) {
315  int iyzt = iy + m_Ny * (iz + m_Nz * it);
316  int Leo = ieo + (1 - 2 * ieo) * m_Leo[iyzt0 + iyzt];
317 
318  if (Leo == 1) {
319  int is = ix + m_Nx2 * iyzt;
320  int iv = Nvcd * is;
321  int ig = m_Ndf * is;
322  int ix1 = Nvc2 * ibf;
323  int ix2 = ix1 + m_Nvc;
324 
325  for (int ic = 0; ic < m_Nc; ++ic) {
326  int ic2 = ic * m_Nvc;
327  wt1r = mult_uv_r(&u[ic2 + ig], &w1[ix1], m_Nc);
328  wt1i = mult_uv_i(&u[ic2 + ig], &w1[ix1], m_Nc);
329  wt2r = mult_uv_r(&u[ic2 + ig], &w1[ix2], m_Nc);
330  wt2i = mult_uv_i(&u[ic2 + ig], &w1[ix2], m_Nc);
331  w2[2 * ic + id1 + iv] += wt1r;
332  w2[2 * ic + 1 + id1 + iv] += wt1i;
333  w2[2 * ic + id2 + iv] += wt2r;
334  w2[2 * ic + 1 + id2 + iv] += wt2i;
335  w2[2 * ic + id3 + iv] += wt2i;
336  w2[2 * ic + 1 + id3 + iv] += -wt2r;
337  w2[2 * ic + id4 + iv] += wt1i;
338  w2[2 * ic + 1 + id4 + iv] += -wt1r;
339  }
340  ++ibf;
341  }
342  }
343  }
344  }
345  }
346 
347 
348 //====================================================================
350  int itask, double *v2, const double *v1, int ieo)
351  {
352  int Nvcd = m_Nvc * m_Nd;
353 
354  int id1 = 0;
355  int id2 = m_Nvc;
356  int id3 = m_Nvc * 2;
357  int id4 = m_Nvc * 3;
358 
359  int idir = 0;
360 
361  double vt1[m_Nvc], vt2[m_Nvc];
362  double wt1r, wt1i, wt2r, wt2i;
363 
364  int isite = m_arg[itask].isite;
365  int iyzt0 = isite / m_Nx2;
366 
367  double *w2 = &v2[Nvcd * isite];
368  const double *w1 = &v1[Nvcd * isite];
369  const double *u = m_U->ptr(m_Ndf * (isite + ieo * m_Nvol / 2 + idir * m_Nvol));
370 
371  for (int it = 0; it < m_Mt; ++it) {
372  for (int iz = 0; iz < m_Mz; ++iz) {
373  for (int iy = 0; iy < m_Ny; ++iy) {
374  int iyzt = iy + m_Ny * (iz + m_Nz * it);
375  int Leo = ieo + (1 - 2 * ieo) * m_Leo[iyzt0 + iyzt];
376  for (int ix = 0; ix < m_Nx2 - Leo; ++ix) {
377  int is = ix + m_Nx2 * iyzt;
378  int iv = Nvcd * is;
379  int in = Nvcd * (is + Leo);
380  int ig = m_Ndf * is;
381 
382  for (int ic = 0; ic < m_Nc; ++ic) {
383  vt1[2 * ic] = w1[2 * ic + id1 + in] - w1[2 * ic + 1 + id4 + in];
384  vt1[2 * ic + 1] = w1[2 * ic + 1 + id1 + in] + w1[2 * ic + id4 + in];
385  vt2[2 * ic] = w1[2 * ic + id2 + in] - w1[2 * ic + 1 + id3 + in];
386  vt2[2 * ic + 1] = w1[2 * ic + 1 + id2 + in] + w1[2 * ic + id3 + in];
387  }
388 
389  for (int ic = 0; ic < m_Nc; ++ic) {
390  int ic2 = ic * m_Nvc;
391 
392  wt1r = mult_uv_r(&u[ic2 + ig], vt1, m_Nc);
393  wt1i = mult_uv_i(&u[ic2 + ig], vt1, m_Nc);
394  wt2r = mult_uv_r(&u[ic2 + ig], vt2, m_Nc);
395  wt2i = mult_uv_i(&u[ic2 + ig], vt2, m_Nc);
396 
397  w2[2 * ic + id1 + iv] += wt1r;
398  w2[2 * ic + 1 + id1 + iv] += wt1i;
399  w2[2 * ic + id2 + iv] += wt2r;
400  w2[2 * ic + 1 + id2 + iv] += wt2i;
401  w2[2 * ic + id3 + iv] += wt2i;
402  w2[2 * ic + 1 + id3 + iv] += -wt2r;
403  w2[2 * ic + id4 + iv] += wt1i;
404  w2[2 * ic + 1 + id4 + iv] += -wt1r;
405  }
406  }
407  }
408  }
409  }
410  }
411 
412 
413 //====================================================================
415  int itask, double *vcp1, const double *v1, int ieo)
416  {
417  int Nvc2 = 2 * m_Nvc;
418  int Nvcd = m_Nvc * m_Nd;
419  int Nvcd2 = Nvcd / 2;
420 
421  int id1 = 0;
422  int id2 = m_Nvc;
423  int id3 = m_Nvc * 2;
424  int id4 = m_Nvc * 3;
425 
426  int idir = 0;
427 
428  int isite = m_arg[itask].isite;
429  int isite_cp = m_arg[itask].isite_cpx;
430  int iyzt0 = isite / m_Nx2;
431 
432  // double* w2 = &vcp1[Nvcd2*isite_cp];
433  double *w2
434  = (double *)m_fw_send[idir]->ptr(sizeof(double) * Nvcd2 * isite_cp);
435  const double *w1 = &v1[Nvcd * isite];
436  const double *u = m_U->ptr(m_Ndf * (isite + (1 - ieo) * m_Nvol / 2 + idir * m_Nvol));
437 
438  double vt1[m_Nvc], vt2[m_Nvc];
439 
440  int ix = m_Nx2 - 1;
441  int ibf = 0;
442 
443  for (int it = 0; it < m_Mt; ++it) {
444  for (int iz = 0; iz < m_Mz; ++iz) {
445  for (int iy = 0; iy < m_Ny; ++iy) {
446  int iyzt = iy + m_Ny * (iz + m_Nz * it);
447  int Leo = ieo + (1 - 2 * ieo) * m_Leo[iyzt0 + iyzt];
448  if (Leo == 0) {
449  int is = ix + m_Nx2 * iyzt;
450  int in = Nvcd * is;
451  int ig = m_Ndf * is;
452 
453  int ix1 = Nvc2 * ibf;
454  int ix2 = ix1 + m_Nvc;
455 
456  for (int ic = 0; ic < m_Nc; ++ic) {
457  vt1[2 * ic] = w1[2 * ic + id1 + in] + w1[2 * ic + 1 + id4 + in];
458  vt1[2 * ic + 1] = w1[2 * ic + 1 + id1 + in] - w1[2 * ic + id4 + in];
459  vt2[2 * ic] = w1[2 * ic + id2 + in] + w1[2 * ic + 1 + id3 + in];
460  vt2[2 * ic + 1] = w1[2 * ic + 1 + id2 + in] - w1[2 * ic + id3 + in];
461  }
462 
463  for (int ic = 0; ic < m_Nc; ++ic) {
464  int icr = 2 * ic;
465  w2[icr + ix1] = mult_udagv_r(&u[icr + ig], vt1, m_Nc);
466  w2[icr + 1 + ix1] = mult_udagv_i(&u[icr + ig], vt1, m_Nc);
467  w2[icr + ix2] = mult_udagv_r(&u[icr + ig], vt2, m_Nc);
468  w2[icr + 1 + ix2] = mult_udagv_i(&u[icr + ig], vt2, m_Nc);
469  }
470  ++ibf;
471  }
472  }
473  }
474  }
475 
476  m_fw_send[idir]->start_thread(itask);
477  }
478 
479 
480 //====================================================================
482  int itask, double *v2, const double *vcp2, int ieo)
483  {
484  int Nvc2 = 2 * m_Nvc;
485  int Nvcd = m_Nvc * m_Nd;
486  int Nvcd2 = Nvcd / 2;
487 
488  int id1 = 0;
489  int id2 = m_Nvc;
490  int id3 = m_Nvc * 2;
491  int id4 = m_Nvc * 3;
492 
493  int idir = 0;
494  double bc2 = m_boundary2[idir];
495 
496  double wt1r, wt1i, wt2r, wt2i;
497 
498  int isite = m_arg[itask].isite;
499  int isite_cp = m_arg[itask].isite_cpx;
500  int iyzt0 = isite / m_Nx2;
501 
502  double *w2 = &v2[Nvcd * isite];
503  // double* w1 = &vcp2[Nvcd2*isite_cp];
504  const double *w1
505  = (double *)m_fw_recv[idir]->ptr(sizeof(double) * Nvcd2 * isite_cp);
506 
507  m_fw_recv[idir]->wait_thread(itask);
508 
509  int ix = 0;
510  int ibf = 0;
511  for (int it = 0; it < m_Mt; ++it) {
512  for (int iz = 0; iz < m_Mz; ++iz) {
513  for (int iy = 0; iy < m_Ny; ++iy) {
514  int iyzt = iy + m_Ny * (iz + m_Nz * it);
515  int Leo = ieo + (1 - 2 * ieo) * m_Leo[iyzt0 + iyzt];
516  if (Leo == 0) {
517  int is = ix + m_Nx2 * iyzt;
518  int iv = Nvcd * is;
519 
520  int ix1 = Nvc2 * ibf;
521  int ix2 = ix1 + m_Nvc;
522 
523  for (int ic = 0; ic < m_Nc; ++ic) {
524  int icr = 2 * ic;
525  int ici = 2 * ic + 1;
526  w2[icr + id1 + iv] += bc2 * w1[icr + ix1];
527  w2[ici + id1 + iv] += bc2 * w1[ici + ix1];
528  w2[icr + id2 + iv] += bc2 * w1[icr + ix2];
529  w2[ici + id2 + iv] += bc2 * w1[ici + ix2];
530  w2[icr + id3 + iv] += -bc2 * w1[ici + ix2];
531  w2[ici + id3 + iv] += +bc2 * w1[icr + ix2];
532  w2[icr + id4 + iv] += -bc2 * w1[ici + ix1];
533  w2[ici + id4 + iv] += +bc2 * w1[icr + ix1];
534  }
535  ++ibf;
536  }
537  }
538  }
539  }
540  }
541 
542 
543 //====================================================================
545  int itask, double *v2, const double *v1, int ieo)
546  {
547  int Nvcd = m_Nvc * m_Nd;
548 
549  int id1 = 0;
550  int id2 = m_Nvc;
551  int id3 = m_Nvc * 2;
552  int id4 = m_Nvc * 3;
553 
554  int idir = 0;
555 
556  double vt1[m_Nvc], vt2[m_Nvc];
557  double wt1r, wt1i, wt2r, wt2i;
558 
559  int isite = m_arg[itask].isite;
560  int iyzt0 = isite / m_Nx2;
561 
562  double *w2 = &v2[Nvcd * isite];
563  const double *w1 = &v1[Nvcd * isite];
564  const double *u = m_U->ptr(m_Ndf * (isite + (1 - ieo) * m_Nvol / 2 + idir * m_Nvol));
565 
566  for (int it = 0; it < m_Mt; ++it) {
567  for (int iz = 0; iz < m_Mz; ++iz) {
568  for (int iy = 0; iy < m_Ny; ++iy) {
569  int iyzt = iy + m_Ny * (iz + m_Nz * it);
570  int Leo = ieo + (1 - 2 * ieo) * m_Leo[iyzt0 + iyzt];
571  int Meo = 1 - Leo;
572  for (int ix = Meo; ix < m_Nx2; ++ix) {
573  int is = ix + m_Nx2 * iyzt;
574  int iv = Nvcd * is;
575  int in = Nvcd * (is - Meo);
576  int ig = m_Ndf * (is - Meo);
577 
578  for (int ic = 0; ic < m_Nc; ++ic) {
579  vt1[2 * ic] = w1[2 * ic + id1 + in] + w1[2 * ic + 1 + id4 + in];
580  vt1[2 * ic + 1] = w1[2 * ic + 1 + id1 + in] - w1[2 * ic + id4 + in];
581  vt2[2 * ic] = w1[2 * ic + id2 + in] + w1[2 * ic + 1 + id3 + in];
582  vt2[2 * ic + 1] = w1[2 * ic + 1 + id2 + in] - w1[2 * ic + id3 + in];
583  }
584 
585  for (int ic = 0; ic < m_Nc; ++ic) {
586  int ic2 = 2 * ic;
587 
588  wt1r = mult_udagv_r(&u[ic2 + ig], vt1, m_Nc);
589  wt1i = mult_udagv_i(&u[ic2 + ig], vt1, m_Nc);
590  wt2r = mult_udagv_r(&u[ic2 + ig], vt2, m_Nc);
591  wt2i = mult_udagv_i(&u[ic2 + ig], vt2, m_Nc);
592 
593  w2[2 * ic + id1 + iv] += wt1r;
594  w2[2 * ic + 1 + id1 + iv] += wt1i;
595  w2[2 * ic + id2 + iv] += wt2r;
596  w2[2 * ic + 1 + id2 + iv] += wt2i;
597  w2[2 * ic + id3 + iv] += -wt2i;
598  w2[2 * ic + 1 + id3 + iv] += +wt2r;
599  w2[2 * ic + id4 + iv] += -wt1i;
600  w2[2 * ic + 1 + id4 + iv] += +wt1r;
601  }
602  }
603  }
604  }
605  }
606  }
607 
608 
609 //====================================================================
611  int itask, double *vcp1, const double *v1, int ieo)
612  {
613  int Nvc2 = 2 * m_Nvc;
614  int Nvcd = m_Nvc * m_Nd;
615  int Nvcd2 = Nvcd / 2;
616 
617  int id1 = 0;
618  int id2 = m_Nvc;
619  int id3 = m_Nvc * 2;
620  int id4 = m_Nvc * 3;
621 
622  int idir = 1;
623 
624  int isite = m_arg[itask].isite;
625  int isite_cp = m_arg[itask].isite_cpy;
626 
627  // double* w2 = &vcp1[Nvcd2*isite_cp];
628  double *w2
629  = (double *)m_bw_send[idir]->ptr(sizeof(double) * Nvcd2 * isite_cp);
630  const double *w1 = &v1[Nvcd * isite];
631 
632  double bc2 = m_boundary2[idir];
633 
634  int iy = 0;
635 
636  for (int it = 0; it < m_Mt; ++it) {
637  for (int iz = 0; iz < m_Mz; ++iz) {
638  for (int ix = 0; ix < m_Nx2; ++ix) {
639  int is = ix + m_Nx2 * (iy + m_Ny * (iz + m_Nz * it));
640  int is2 = ix + m_Nx2 * (iz + m_Mz * it);
641  int in = Nvcd * is;
642  int ix1 = Nvc2 * is2;
643  int ix2 = ix1 + m_Nvc;
644 
645  for (int ic = 0; ic < m_Nc; ++ic) {
646  w2[2 * ic + ix1] = bc2 * (w1[2 * ic + id1 + in] + w1[2 * ic + id4 + in]);
647  w2[2 * ic + 1 + ix1] = bc2 * (w1[2 * ic + 1 + id1 + in] + w1[2 * ic + 1 + id4 + in]);
648  w2[2 * ic + ix2] = bc2 * (w1[2 * ic + id2 + in] - w1[2 * ic + id3 + in]);
649  w2[2 * ic + 1 + ix2] = bc2 * (w1[2 * ic + 1 + id2 + in] - w1[2 * ic + 1 + id3 + in]);
650  }
651  }
652  }
653  }
654 
655  m_bw_send[idir]->start_thread(itask);
656  }
657 
658 
659 //====================================================================
661  int itask, double *v2, const double *vcp2, int ieo)
662  {
663  int Nvc2 = 2 * m_Nvc;
664  int Nvcd = m_Nvc * m_Nd;
665  int Nvcd2 = Nvcd / 2;
666 
667  int id1 = 0;
668  int id2 = m_Nvc;
669  int id3 = m_Nvc * 2;
670  int id4 = m_Nvc * 3;
671 
672  int idir = 1;
673 
674  double wt1r, wt1i, wt2r, wt2i;
675 
676  int isite = m_arg[itask].isite;
677  int isite_cp = m_arg[itask].isite_cpy;
678 
679  double *w2 = &v2[Nvcd * isite];
680  // double* w1 = &vcp2[Nvcd2*isite_cp];
681  const double *w1
682  = (double *)m_bw_recv[idir]->ptr(sizeof(double) * Nvcd2 * isite_cp);
683  const double *u = m_U->ptr(m_Ndf * (isite + ieo * m_Nvol / 2 + idir * m_Nvol));
684 
685  m_bw_recv[idir]->wait_thread(itask);
686 
687  int iy = m_Ny - 1;
688  for (int it = 0; it < m_Mt; ++it) {
689  for (int iz = 0; iz < m_Mz; ++iz) {
690  for (int ix = 0; ix < m_Nx2; ++ix) {
691  int is = ix + m_Nx2 * (iy + m_Ny * (iz + m_Nz * it));
692  int is2 = ix + m_Nx2 * (iz + m_Mz * it);
693  int iv = Nvcd * is;
694  int ig = m_Ndf * is;
695  int ix1 = Nvc2 * is2;
696  int ix2 = ix1 + m_Nvc;
697 
698  for (int ic = 0; ic < m_Nc; ++ic) {
699  int ic2 = ic * m_Nvc;
700 
701  wt1r = mult_uv_r(&u[ic2 + ig], &w1[ix1], m_Nc);
702  wt1i = mult_uv_i(&u[ic2 + ig], &w1[ix1], m_Nc);
703  wt2r = mult_uv_r(&u[ic2 + ig], &w1[ix2], m_Nc);
704  wt2i = mult_uv_i(&u[ic2 + ig], &w1[ix2], m_Nc);
705 
706  w2[2 * ic + id1 + iv] += wt1r;
707  w2[2 * ic + 1 + id1 + iv] += wt1i;
708  w2[2 * ic + id2 + iv] += wt2r;
709  w2[2 * ic + 1 + id2 + iv] += wt2i;
710  w2[2 * ic + id3 + iv] += -wt2r;
711  w2[2 * ic + 1 + id3 + iv] += -wt2i;
712  w2[2 * ic + id4 + iv] += wt1r;
713  w2[2 * ic + 1 + id4 + iv] += wt1i;
714  }
715  }
716  }
717  }
718  }
719 
720 
721 //====================================================================
723  int itask, double *v2, const double *v1, int ieo)
724  {
725  int Nvcd = m_Nvc * m_Nd;
726 
727  int id1 = 0;
728  int id2 = m_Nvc;
729  int id3 = m_Nvc * 2;
730  int id4 = m_Nvc * 3;
731 
732  int idir = 1;
733 
734  double vt1[m_Nvc], vt2[m_Nvc];
735  double wt1r, wt1i, wt2r, wt2i;
736 
737  int isite = m_arg[itask].isite;
738 
739  double *w2 = &v2[Nvcd * isite];
740  const double *w1 = &v1[Nvcd * isite];
741  const double *u = m_U->ptr(m_Ndf * (isite + ieo * m_Nvol / 2 + idir * m_Nvol));
742 
743  for (int it = 0; it < m_Mt; ++it) {
744  for (int iz = 0; iz < m_Mz; ++iz) {
745  for (int iy = 0; iy < m_Ny - 1; ++iy) {
746  for (int ix = 0; ix < m_Nx2; ++ix) {
747  int is = ix + m_Nx2 * (iy + m_Ny * (iz + m_Nz * it));
748  int iv = Nvcd * is;
749  int in = Nvcd * (is + m_Nx2);
750  int ig = m_Ndf * is;
751 
752  for (int ic = 0; ic < m_Nc; ++ic) {
753  vt1[2 * ic] = w1[2 * ic + id1 + in] + w1[2 * ic + id4 + in];
754  vt1[2 * ic + 1] = w1[2 * ic + 1 + id1 + in] + w1[2 * ic + 1 + id4 + in];
755  vt2[2 * ic] = w1[2 * ic + id2 + in] - w1[2 * ic + id3 + in];
756  vt2[2 * ic + 1] = w1[2 * ic + 1 + id2 + in] - w1[2 * ic + 1 + id3 + in];
757  }
758 
759  for (int ic = 0; ic < m_Nc; ++ic) {
760  int ic2 = ic * m_Nvc;
761 
762  wt1r = mult_uv_r(&u[ic2 + ig], vt1, m_Nc);
763  wt1i = mult_uv_i(&u[ic2 + ig], vt1, m_Nc);
764  wt2r = mult_uv_r(&u[ic2 + ig], vt2, m_Nc);
765  wt2i = mult_uv_i(&u[ic2 + ig], vt2, m_Nc);
766 
767  w2[2 * ic + id1 + iv] += wt1r;
768  w2[2 * ic + 1 + id1 + iv] += wt1i;
769  w2[2 * ic + id2 + iv] += wt2r;
770  w2[2 * ic + 1 + id2 + iv] += wt2i;
771  w2[2 * ic + id3 + iv] += -wt2r;
772  w2[2 * ic + 1 + id3 + iv] += -wt2i;
773  w2[2 * ic + id4 + iv] += wt1r;
774  w2[2 * ic + 1 + id4 + iv] += wt1i;
775  }
776  }
777  }
778  }
779  }
780  }
781 
782 
783 //====================================================================
785  int itask, double *vcp1, const double *v1, int ieo)
786  {
787  int Nvc2 = 2 * m_Nvc;
788  int Nvcd = m_Nvc * m_Nd;
789  int Nvcd2 = Nvcd / 2;
790 
791  int id1 = 0;
792  int id2 = m_Nvc;
793  int id3 = m_Nvc * 2;
794  int id4 = m_Nvc * 3;
795 
796  int idir = 1;
797 
798  int isite = m_arg[itask].isite;
799  int isite_cp = m_arg[itask].isite_cpy;
800 
801  // double* w2 = &vcp1[Nvcd2*isite_cp];
802  double *w2
803  = (double *)m_fw_send[idir]->ptr(sizeof(double) * Nvcd2 * isite_cp);
804  const double *w1 = &v1[Nvcd * isite];
805  const double *u = m_U->ptr(m_Ndf * (isite + (1 - ieo) * m_Nvol / 2 + idir * m_Nvol));
806 
807  double vt1[m_Nvc], vt2[m_Nvc];
808 
809  int iy = m_Ny - 1;
810 
811  for (int it = 0; it < m_Mt; ++it) {
812  for (int iz = 0; iz < m_Mz; ++iz) {
813  for (int ix = 0; ix < m_Nx2; ++ix) {
814  int is = ix + m_Nx2 * (iy + m_Ny * (iz + m_Nz * it));
815  int is2 = ix + m_Nx2 * (iz + m_Mz * it);
816  int in = Nvcd * is;
817  int ig = m_Ndf * is;
818  int ix1 = Nvc2 * is2;
819  int ix2 = ix1 + m_Nvc;
820 
821  for (int ic = 0; ic < m_Nc; ++ic) {
822  vt1[2 * ic] = w1[2 * ic + id1 + in] - w1[2 * ic + id4 + in];
823  vt1[2 * ic + 1] = w1[2 * ic + 1 + id1 + in] - w1[2 * ic + 1 + id4 + in];
824  vt2[2 * ic] = w1[2 * ic + id2 + in] + w1[2 * ic + id3 + in];
825  vt2[2 * ic + 1] = w1[2 * ic + 1 + id2 + in] + w1[2 * ic + 1 + id3 + in];
826  }
827 
828  for (int ic = 0; ic < m_Nc; ++ic) {
829  int icr = 2 * ic;
830  w2[icr + ix1] = mult_udagv_r(&u[icr + ig], vt1, m_Nc);
831  w2[icr + 1 + ix1] = mult_udagv_i(&u[icr + ig], vt1, m_Nc);
832  w2[icr + ix2] = mult_udagv_r(&u[icr + ig], vt2, m_Nc);
833  w2[icr + 1 + ix2] = mult_udagv_i(&u[icr + ig], vt2, m_Nc);
834  }
835  }
836  }
837  }
838 
839  m_fw_send[idir]->start_thread(itask);
840  }
841 
842 
843 //====================================================================
845  int itask, double *v2, const double *vcp2, int ieo)
846  {
847  int Nvc2 = 2 * m_Nvc;
848  int Nvcd = m_Nvc * m_Nd;
849  int Nvcd2 = Nvcd / 2;
850 
851  int id1 = 0;
852  int id2 = m_Nvc;
853  int id3 = m_Nvc * 2;
854  int id4 = m_Nvc * 3;
855 
856  int idir = 1;
857  double bc2 = m_boundary2[idir];
858 
859  double wt1r, wt1i, wt2r, wt2i;
860 
861  int isite = m_arg[itask].isite;
862  int isite_cp = m_arg[itask].isite_cpy;
863 
864  double *w2 = &v2[Nvcd * isite];
865  // double* w1 = &vcp2[Nvcd2*isite_cp];
866  const double *w1
867  = (double *)m_fw_recv[idir]->ptr(sizeof(double) * Nvcd2 * isite_cp);
868 
869  m_fw_recv[idir]->wait_thread(itask);
870 
871  int iy = 0;
872  for (int it = 0; it < m_Mt; ++it) {
873  for (int iz = 0; iz < m_Mz; ++iz) {
874  for (int ix = 0; ix < m_Nx2; ++ix) {
875  int is = ix + m_Nx2 * (iy + m_Ny * (iz + m_Nz * it));
876  int is2 = ix + m_Nx2 * (iz + m_Mz * it);
877  int iv = Nvcd * is;
878  int ix1 = Nvc2 * is2;
879  int ix2 = ix1 + m_Nvc;
880 
881  for (int ic = 0; ic < m_Nc; ++ic) {
882  int icr = 2 * ic;
883  int ici = 2 * ic + 1;
884  w2[icr + id1 + iv] += bc2 * w1[icr + ix1];
885  w2[ici + id1 + iv] += bc2 * w1[ici + ix1];
886  w2[icr + id2 + iv] += bc2 * w1[icr + ix2];
887  w2[ici + id2 + iv] += bc2 * w1[ici + ix2];
888  w2[icr + id3 + iv] += bc2 * w1[icr + ix2];
889  w2[ici + id3 + iv] += bc2 * w1[ici + ix2];
890  w2[icr + id4 + iv] += -bc2 * w1[icr + ix1];
891  w2[ici + id4 + iv] += -bc2 * w1[ici + ix1];
892  }
893  }
894  }
895  }
896  }
897 
898 
899 //====================================================================
901  int itask, double *v2, const double *v1, int ieo)
902  {
903  int Nvcd = m_Nvc * m_Nd;
904 
905  int id1 = 0;
906  int id2 = m_Nvc;
907  int id3 = m_Nvc * 2;
908  int id4 = m_Nvc * 3;
909 
910  int idir = 1;
911 
912  double vt1[m_Nvc], vt2[m_Nvc];
913  double wt1r, wt1i, wt2r, wt2i;
914 
915  int isite = m_arg[itask].isite;
916 
917  double *w2 = &v2[Nvcd * isite];
918  const double *w1 = &v1[Nvcd * isite];
919  const double *u = m_U->ptr(m_Ndf * (isite + (1 - ieo) * m_Nvol / 2 + idir * m_Nvol));
920 
921  for (int it = 0; it < m_Mt; ++it) {
922  for (int iz = 0; iz < m_Mz; ++iz) {
923  for (int iy = 1; iy < m_Ny; ++iy) {
924  for (int ix = 0; ix < m_Nx2; ++ix) {
925  int is = ix + m_Nx2 * (iy + m_Ny * (iz + m_Nz * it));
926  int iv = Nvcd * is;
927  int in = Nvcd * (is - m_Nx2);
928  int ig = m_Ndf * (is - m_Nx2);
929 
930  for (int ic = 0; ic < m_Nc; ++ic) {
931  vt1[2 * ic] = w1[2 * ic + id1 + in] - w1[2 * ic + id4 + in];
932  vt1[2 * ic + 1] = w1[2 * ic + 1 + id1 + in] - w1[2 * ic + 1 + id4 + in];
933  vt2[2 * ic] = w1[2 * ic + id2 + in] + w1[2 * ic + id3 + in];
934  vt2[2 * ic + 1] = w1[2 * ic + 1 + id2 + in] + w1[2 * ic + 1 + id3 + in];
935  }
936 
937  for (int ic = 0; ic < m_Nc; ++ic) {
938  int ic2 = 2 * ic;
939  wt1r = mult_udagv_r(&u[ic2 + ig], vt1, m_Nc);
940  wt1i = mult_udagv_i(&u[ic2 + ig], vt1, m_Nc);
941  wt2r = mult_udagv_r(&u[ic2 + ig], vt2, m_Nc);
942  wt2i = mult_udagv_i(&u[ic2 + ig], vt2, m_Nc);
943 
944  w2[ic2 + id1 + iv] += wt1r;
945  w2[ic2 + 1 + id1 + iv] += wt1i;
946  w2[ic2 + id2 + iv] += wt2r;
947  w2[ic2 + 1 + id2 + iv] += wt2i;
948  w2[ic2 + id3 + iv] += wt2r;
949  w2[ic2 + 1 + id3 + iv] += wt2i;
950  w2[ic2 + id4 + iv] += -wt1r;
951  w2[ic2 + 1 + id4 + iv] += -wt1i;
952  }
953  }
954  }
955  }
956  }
957  }
958 
959 
960 //====================================================================
962  int itask, double *vcp1, const double *v1, int ieo)
963  {
964  int Nvc2 = 2 * m_Nvc;
965  int Nvcd = m_Nvc * m_Nd;
966  int Nvcd2 = Nvcd / 2;
967 
968  int id1 = 0;
969  int id2 = m_Nvc;
970  int id3 = m_Nvc * 2;
971  int id4 = m_Nvc * 3;
972 
973  int idir = 2;
974 
975  int isite = m_arg[itask].isite;
976  int isite_cp = m_arg[itask].isite_cpz;
977 
978  // double* w2 = &vcp1[Nvcd2*isite_cp];
979  double *w2
980  = (double *)m_bw_send[idir]->ptr(sizeof(double) * Nvcd2 * isite_cp);
981  const double *w1 = &v1[Nvcd * isite];
982 
983  double bc2 = m_boundary2[idir];
984 
985  if (m_arg[itask].kz0 == 1) {
986  int Nxy = m_Nx2 * m_Ny;
987  int iz = 0;
988  for (int it = 0; it < m_Mt; ++it) {
989  for (int ixy = 0; ixy < Nxy; ++ixy) {
990  int is = ixy + Nxy * (iz + m_Nz * it);
991  int is2 = ixy + Nxy * it;
992 
993  int in = Nvcd * is;
994  int ix1 = Nvc2 * is2;
995  int ix2 = ix1 + m_Nvc;
996 
997  for (int ic = 0; ic < m_Nc; ++ic) {
998  w2[2 * ic + ix1] = bc2 * (w1[2 * ic + id1 + in] - w1[2 * ic + 1 + id3 + in]);
999  w2[2 * ic + 1 + ix1] = bc2 * (w1[2 * ic + 1 + id1 + in] + w1[2 * ic + id3 + in]);
1000  w2[2 * ic + ix2] = bc2 * (w1[2 * ic + id2 + in] + w1[2 * ic + 1 + id4 + in]);
1001  w2[2 * ic + 1 + ix2] = bc2 * (w1[2 * ic + 1 + id2 + in] - w1[2 * ic + id4 + in]);
1002  }
1003  }
1004  }
1005  }
1006 
1007  m_bw_send[idir]->start_thread(itask);
1008  }
1009 
1010 
1011 //====================================================================
1013  int itask, double *v2, const double *vcp2, int ieo)
1014  {
1015  int Nvc2 = 2 * m_Nvc;
1016  int Nvcd = m_Nvc * m_Nd;
1017  int Nvcd2 = Nvcd / 2;
1018 
1019  int id1 = 0;
1020  int id2 = m_Nvc;
1021  int id3 = m_Nvc * 2;
1022  int id4 = m_Nvc * 3;
1023 
1024  int idir = 2;
1025 
1026  double wt1r, wt1i, wt2r, wt2i;
1027 
1028  int isite = m_arg[itask].isite;
1029  int isite_cp = m_arg[itask].isite_cpz;
1030 
1031  double *w2 = &v2[Nvcd * isite];
1032  // double* w1 = &vcp2[Nvcd2*isite_cp];
1033  const double *w1
1034  = (double *)m_bw_recv[idir]->ptr(sizeof(double) * Nvcd2 * isite_cp);
1035  const double *u = m_U->ptr(m_Ndf * (isite + ieo * m_Nvol / 2 + idir * m_Nvol));
1036 
1037  m_bw_recv[idir]->wait_thread(itask);
1038 
1039  if (m_arg[itask].kz1 == 1) {
1040  int Nxy = m_Nx2 * m_Ny;
1041  int iz = m_Mz - 1;
1042  for (int it = 0; it < m_Mt; ++it) {
1043  for (int ixy = 0; ixy < Nxy; ++ixy) {
1044  int is = ixy + Nxy * (iz + m_Nz * it);
1045  int is2 = ixy + Nxy * it;
1046  int iv = Nvcd * is;
1047  int ig = m_Ndf * is;
1048  int ix1 = Nvc2 * is2;
1049  int ix2 = ix1 + m_Nvc;
1050 
1051  for (int ic = 0; ic < m_Nc; ++ic) {
1052  int ic2 = ic * m_Nvc;
1053 
1054  wt1r = mult_uv_r(&u[ic2 + ig], &w1[ix1], m_Nc);
1055  wt1i = mult_uv_i(&u[ic2 + ig], &w1[ix1], m_Nc);
1056  wt2r = mult_uv_r(&u[ic2 + ig], &w1[ix2], m_Nc);
1057  wt2i = mult_uv_i(&u[ic2 + ig], &w1[ix2], m_Nc);
1058 
1059  w2[2 * ic + id1 + iv] += wt1r;
1060  w2[2 * ic + 1 + id1 + iv] += wt1i;
1061  w2[2 * ic + id2 + iv] += wt2r;
1062  w2[2 * ic + 1 + id2 + iv] += wt2i;
1063  w2[2 * ic + id3 + iv] += wt1i;
1064  w2[2 * ic + 1 + id3 + iv] += -wt1r;
1065  w2[2 * ic + id4 + iv] += -wt2i;
1066  w2[2 * ic + 1 + id4 + iv] += wt2r;
1067  }
1068  }
1069  }
1070  }
1071  }
1072 
1073 
1074 //====================================================================
1076  int itask, double *v2, const double *v1, int ieo)
1077  {
1078  int Nvcd = m_Nvc * m_Nd;
1079 
1080  int id1 = 0;
1081  int id2 = m_Nvc;
1082  int id3 = m_Nvc * 2;
1083  int id4 = m_Nvc * 3;
1084 
1085  int idir = 2;
1086 
1087  double vt1[m_Nvc], vt2[m_Nvc];
1088  double wt1r, wt1i, wt2r, wt2i;
1089 
1090  int isite = m_arg[itask].isite;
1091 
1092  double *w2 = &v2[Nvcd * isite];
1093  const double *w1 = &v1[Nvcd * isite];
1094  const double *u = m_U->ptr(m_Ndf * (isite + ieo * m_Nvol / 2 + idir * m_Nvol));
1095 
1096  int kz1 = m_arg[itask].kz1;
1097  int Nxy = m_Nx2 * m_Ny;
1098 
1099  for (int it = 0; it < m_Mt; ++it) {
1100  for (int iz = 0; iz < m_Mz - kz1; ++iz) {
1101  for (int ixy = 0; ixy < Nxy; ++ixy) {
1102  int is = ixy + Nxy * (iz + m_Nz * it);
1103  int iv = Nvcd * is;
1104  int in = Nvcd * (is + Nxy);
1105  int ig = m_Ndf * is;
1106 
1107  for (int ic = 0; ic < m_Nc; ++ic) {
1108  vt1[2 * ic] = w1[2 * ic + id1 + in] - w1[2 * ic + 1 + id3 + in];
1109  vt1[2 * ic + 1] = w1[2 * ic + 1 + id1 + in] + w1[2 * ic + id3 + in];
1110  vt2[2 * ic] = w1[2 * ic + id2 + in] + w1[2 * ic + 1 + id4 + in];
1111  vt2[2 * ic + 1] = w1[2 * ic + 1 + id2 + in] - w1[2 * ic + id4 + in];
1112  }
1113 
1114  for (int ic = 0; ic < m_Nc; ++ic) {
1115  int ic2 = ic * m_Nvc;
1116 
1117  wt1r = mult_uv_r(&u[ic2 + ig], vt1, m_Nc);
1118  wt1i = mult_uv_i(&u[ic2 + ig], vt1, m_Nc);
1119  wt2r = mult_uv_r(&u[ic2 + ig], vt2, m_Nc);
1120  wt2i = mult_uv_i(&u[ic2 + ig], vt2, m_Nc);
1121 
1122  w2[2 * ic + id1 + iv] += wt1r;
1123  w2[2 * ic + 1 + id1 + iv] += wt1i;
1124  w2[2 * ic + id2 + iv] += wt2r;
1125  w2[2 * ic + 1 + id2 + iv] += wt2i;
1126  w2[2 * ic + id3 + iv] += wt1i;
1127  w2[2 * ic + 1 + id3 + iv] += -wt1r;
1128  w2[2 * ic + id4 + iv] += -wt2i;
1129  w2[2 * ic + 1 + id4 + iv] += wt2r;
1130  }
1131  }
1132  }
1133  }
1134  }
1135 
1136 
1137 //====================================================================
1139  int itask, double *vcp1, const double *v1, int ieo)
1140  {
1141  int Nvc2 = 2 * m_Nvc;
1142  int Nvcd = m_Nvc * m_Nd;
1143  int Nvcd2 = Nvcd / 2;
1144 
1145  int id1 = 0;
1146  int id2 = m_Nvc;
1147  int id3 = m_Nvc * 2;
1148  int id4 = m_Nvc * 3;
1149 
1150  int idir = 2;
1151 
1152  int isite = m_arg[itask].isite;
1153  int isite_cp = m_arg[itask].isite_cpz;
1154 
1155  // double* w2 = &vcp1[Nvcd2*isite_cp];
1156  double *w2
1157  = (double *)m_fw_send[idir]->ptr(sizeof(double) * Nvcd2 * isite_cp);
1158  const double *w1 = &v1[Nvcd * isite];
1159  const double *u = m_U->ptr(m_Ndf * (isite + (1 - ieo) * m_Nvol / 2 + idir * m_Nvol));
1160 
1161  double vt1[m_Nvc], vt2[m_Nvc];
1162 
1163  if (m_arg[itask].kz1 == 1) {
1164  int Nxy = m_Nx2 * m_Ny;
1165  int iz = m_Mz - 1;
1166  for (int it = 0; it < m_Mt; ++it) {
1167  for (int ixy = 0; ixy < Nxy; ++ixy) {
1168  int is = ixy + Nxy * (iz + m_Nz * it);
1169  int is2 = ixy + Nxy * it;
1170  int in = Nvcd * is;
1171  int ig = m_Ndf * is;
1172  int ix1 = Nvc2 * is2;
1173  int ix2 = ix1 + m_Nvc;
1174 
1175  for (int ic = 0; ic < m_Nc; ++ic) {
1176  vt1[2 * ic] = w1[2 * ic + id1 + in] + w1[2 * ic + 1 + id3 + in];
1177  vt1[2 * ic + 1] = w1[2 * ic + 1 + id1 + in] - w1[2 * ic + id3 + in];
1178  vt2[2 * ic] = w1[2 * ic + id2 + in] - w1[2 * ic + 1 + id4 + in];
1179  vt2[2 * ic + 1] = w1[2 * ic + 1 + id2 + in] + w1[2 * ic + id4 + in];
1180  }
1181 
1182  for (int ic = 0; ic < m_Nc; ++ic) {
1183  int icr = 2 * ic;
1184  w2[icr + ix1] = mult_udagv_r(&u[icr + ig], vt1, m_Nc);
1185  w2[icr + 1 + ix1] = mult_udagv_i(&u[icr + ig], vt1, m_Nc);
1186  w2[icr + ix2] = mult_udagv_r(&u[icr + ig], vt2, m_Nc);
1187  w2[icr + 1 + ix2] = mult_udagv_i(&u[icr + ig], vt2, m_Nc);
1188  }
1189  }
1190  }
1191  }
1192 
1193  m_fw_send[idir]->start_thread(itask);
1194  }
1195 
1196 
1197 //====================================================================
1199  int itask, double *v2, const double *vcp2, int ieo)
1200  {
1201  int Nvc2 = 2 * m_Nvc;
1202  int Nvcd = m_Nvc * m_Nd;
1203  int Nvcd2 = Nvcd / 2;
1204 
1205  int id1 = 0;
1206  int id2 = m_Nvc;
1207  int id3 = m_Nvc * 2;
1208  int id4 = m_Nvc * 3;
1209 
1210  int idir = 2;
1211  double bc2 = m_boundary2[idir];
1212 
1213  double wt1r, wt1i, wt2r, wt2i;
1214 
1215  int isite = m_arg[itask].isite;
1216  int isite_cp = m_arg[itask].isite_cpz;
1217 
1218  double *w2 = &v2[Nvcd * isite];
1219  // double* w1 = &vcp2[Nvcd2*isite_cp];
1220  const double *w1
1221  = (double *)m_fw_recv[idir]->ptr(sizeof(double) * Nvcd2 * isite_cp);
1222 
1223  m_fw_recv[idir]->wait_thread(itask);
1224 
1225  if (m_arg[itask].kz0 == 1) {
1226  int Nxy = m_Nx2 * m_Ny;
1227 
1228  int iz = 0;
1229  for (int it = 0; it < m_Mt; ++it) {
1230  for (int ixy = 0; ixy < Nxy; ++ixy) {
1231  int is = ixy + Nxy * (iz + m_Nz * it);
1232  int is2 = ixy + Nxy * it;
1233  int iv = Nvcd * is;
1234  int ix1 = Nvc2 * is2;
1235  int ix2 = ix1 + m_Nvc;
1236 
1237  for (int ic = 0; ic < m_Nc; ++ic) {
1238  int icr = 2 * ic;
1239  int ici = 2 * ic + 1;
1240  w2[icr + id1 + iv] += bc2 * w1[icr + ix1];
1241  w2[ici + id1 + iv] += bc2 * w1[ici + ix1];
1242  w2[icr + id2 + iv] += bc2 * w1[icr + ix2];
1243  w2[ici + id2 + iv] += bc2 * w1[ici + ix2];
1244  w2[icr + id3 + iv] += -bc2 * w1[ici + ix1];
1245  w2[ici + id3 + iv] += bc2 * w1[icr + ix1];
1246  w2[icr + id4 + iv] += bc2 * w1[ici + ix2];
1247  w2[ici + id4 + iv] += -bc2 * w1[icr + ix2];
1248  }
1249  }
1250  }
1251  }
1252  }
1253 
1254 
1255 //====================================================================
1257  int itask, double *v2, const double *v1, int ieo)
1258  {
1259  int Nvcd = m_Nvc * m_Nd;
1260 
1261  int id1 = 0;
1262  int id2 = m_Nvc;
1263  int id3 = m_Nvc * 2;
1264  int id4 = m_Nvc * 3;
1265 
1266  int idir = 2;
1267 
1268  double vt1[m_Nvc], vt2[m_Nvc];
1269  double wt1r, wt1i, wt2r, wt2i;
1270 
1271  int isite = m_arg[itask].isite;
1272 
1273  double *w2 = &v2[Nvcd * isite];
1274  const double *w1 = &v1[Nvcd * isite];
1275  const double *u = m_U->ptr(m_Ndf * (isite + (1 - ieo) * m_Nvol / 2 + idir * m_Nvol));
1276 
1277  int kz0 = m_arg[itask].kz0;
1278  int Nxy = m_Nx2 * m_Ny;
1279 
1280  for (int it = 0; it < m_Mt; ++it) {
1281  for (int iz = kz0; iz < m_Mz; ++iz) {
1282  for (int ixy = 0; ixy < Nxy; ++ixy) {
1283  int is = ixy + Nxy * (iz + m_Nz * it);
1284  int iv = Nvcd * is;
1285  int in = Nvcd * (is - Nxy);
1286  int ig = m_Ndf * (is - Nxy);
1287 
1288  for (int ic = 0; ic < m_Nc; ++ic) {
1289  vt1[2 * ic] = w1[2 * ic + id1 + in] + w1[2 * ic + 1 + id3 + in];
1290  vt1[2 * ic + 1] = w1[2 * ic + 1 + id1 + in] - w1[2 * ic + id3 + in];
1291  vt2[2 * ic] = w1[2 * ic + id2 + in] - w1[2 * ic + 1 + id4 + in];
1292  vt2[2 * ic + 1] = w1[2 * ic + 1 + id2 + in] + w1[2 * ic + id4 + in];
1293  }
1294 
1295  for (int ic = 0; ic < m_Nc; ++ic) {
1296  int ic2 = 2 * ic;
1297  wt1r = mult_udagv_r(&u[ic2 + ig], vt1, m_Nc);
1298  wt1i = mult_udagv_i(&u[ic2 + ig], vt1, m_Nc);
1299  wt2r = mult_udagv_r(&u[ic2 + ig], vt2, m_Nc);
1300  wt2i = mult_udagv_i(&u[ic2 + ig], vt2, m_Nc);
1301 
1302  w2[ic2 + id1 + iv] += wt1r;
1303  w2[ic2 + 1 + id1 + iv] += wt1i;
1304  w2[ic2 + id2 + iv] += wt2r;
1305  w2[ic2 + 1 + id2 + iv] += wt2i;
1306  w2[ic2 + id3 + iv] += -wt1i;
1307  w2[ic2 + 1 + id3 + iv] += wt1r;
1308  w2[ic2 + id4 + iv] += wt2i;
1309  w2[ic2 + 1 + id4 + iv] += -wt2r;
1310  }
1311  }
1312  }
1313  }
1314  }
1315 
1316 
1317 //====================================================================
1319  int itask, double *vcp1, const double *v1, int ieo)
1320  {
1321  int Nvc2 = 2 * m_Nvc;
1322  int Nvcd = m_Nvc * m_Nd;
1323  int Nvcd2 = Nvcd / 2;
1324 
1325  int id1 = 0;
1326  int id2 = m_Nvc;
1327  int id3 = m_Nvc * 2;
1328  int id4 = m_Nvc * 3;
1329 
1330  int idir = 3;
1331 
1332  int isite = m_arg[itask].isite;
1333  int isite_cp = m_arg[itask].isite_cpt;
1334 
1335  // double* w2 = &vcp1[Nvcd2*isite_cp];
1336  double *w2
1337  = (double *)m_bw_send[idir]->ptr(sizeof(double) * Nvcd2 * isite_cp);
1338  const double *w1 = &v1[Nvcd * isite];
1339 
1340  double bc2 = m_boundary2[idir];
1341 
1342  if (m_arg[itask].kt0 == 1) {
1343  int Nxy = m_Nx2 * m_Ny;
1344  int it = 0;
1345  for (int iz = 0; iz < m_Mz; ++iz) {
1346  for (int ixy = 0; ixy < Nxy; ++ixy) {
1347  int is = ixy + Nxy * (iz + m_Nz * it);
1348  int is2 = ixy + Nxy * iz;
1349 
1350  int in = Nvcd * is;
1351  int ix1 = Nvc2 * is2;
1352  int ix2 = ix1 + m_Nvc;
1353 
1354  for (int ic = 0; ic < m_Nc; ++ic) {
1355  w2[2 * ic + ix1] = 2.0 * bc2 * w1[2 * ic + id3 + in];
1356  w2[2 * ic + 1 + ix1] = 2.0 * bc2 * w1[2 * ic + 1 + id3 + in];
1357  w2[2 * ic + ix2] = 2.0 * bc2 * w1[2 * ic + id4 + in];
1358  w2[2 * ic + 1 + ix2] = 2.0 * bc2 * w1[2 * ic + 1 + id4 + in];
1359  }
1360  }
1361  }
1362  }
1363 
1364  m_bw_send[idir]->start_thread(itask);
1365  }
1366 
1367 
1368 //====================================================================
1370  int itask, double *v2, const double *vcp2, int ieo)
1371  {
1372  int Nvc2 = 2 * m_Nvc;
1373  int Nvcd = m_Nvc * m_Nd;
1374  int Nvcd2 = Nvcd / 2;
1375 
1376  int id1 = 0;
1377  int id2 = m_Nvc;
1378  int id3 = m_Nvc * 2;
1379  int id4 = m_Nvc * 3;
1380 
1381  int idir = 3;
1382 
1383  double wt1r, wt1i, wt2r, wt2i;
1384 
1385  int isite = m_arg[itask].isite;
1386  int isite_cp = m_arg[itask].isite_cpt;
1387 
1388  double *w2 = &v2[Nvcd * isite];
1389  // double* w1 = &vcp2[Nvcd2*isite_cp];
1390  const double *w1
1391  = (double *)m_bw_recv[idir]->ptr(sizeof(double) * Nvcd2 * isite_cp);
1392  const double *u = m_U->ptr(m_Ndf * (isite + ieo * m_Nvol / 2 + idir * m_Nvol));
1393 
1394  m_bw_recv[idir]->wait_thread(itask);
1395 
1396  if (m_arg[itask].kt1 == 1) {
1397  int Nxy = m_Nx2 * m_Ny;
1398  int it = m_Mt - 1;
1399  for (int iz = 0; iz < m_Mz; ++iz) {
1400  for (int ixy = 0; ixy < Nxy; ++ixy) {
1401  int is = ixy + Nxy * (iz + m_Nz * it);
1402  int is2 = ixy + Nxy * iz;
1403  int iv = Nvcd * is;
1404  int ig = m_Ndf * is;
1405  int ix1 = Nvc2 * is2;
1406  int ix2 = ix1 + m_Nvc;
1407 
1408  for (int ic = 0; ic < m_Nc; ++ic) {
1409  int ic2 = ic * m_Nvc;
1410 
1411  wt1r = mult_uv_r(&u[ic2 + ig], &w1[ix1], m_Nc);
1412  wt1i = mult_uv_i(&u[ic2 + ig], &w1[ix1], m_Nc);
1413  wt2r = mult_uv_r(&u[ic2 + ig], &w1[ix2], m_Nc);
1414  wt2i = mult_uv_i(&u[ic2 + ig], &w1[ix2], m_Nc);
1415 
1416  w2[2 * ic + id3 + iv] += wt1r;
1417  w2[2 * ic + 1 + id3 + iv] += wt1i;
1418  w2[2 * ic + id4 + iv] += wt2r;
1419  w2[2 * ic + 1 + id4 + iv] += wt2i;
1420  }
1421  }
1422  }
1423  }
1424  }
1425 
1426 
1427 //====================================================================
1429  int itask, double *v2, const double *v1, int ieo)
1430  {
1431  int Nvcd = m_Nvc * m_Nd;
1432 
1433  int id1 = 0;
1434  int id2 = m_Nvc;
1435  int id3 = m_Nvc * 2;
1436  int id4 = m_Nvc * 3;
1437 
1438  int idir = 3;
1439 
1440  double vt1[m_Nvc], vt2[m_Nvc];
1441  double wt1r, wt1i, wt2r, wt2i;
1442 
1443  int isite = m_arg[itask].isite;
1444 
1445  double *w2 = &v2[Nvcd * isite];
1446  const double *w1 = &v1[Nvcd * isite];
1447  const double *u = m_U->ptr(m_Ndf * (isite + ieo * m_Nvol / 2 + idir * m_Nvol));
1448 
1449  int kt1 = m_arg[itask].kt1;
1450  int Nxy = m_Nx2 * m_Ny;
1451  int Nxyz = Nxy * m_Nz;
1452 
1453  for (int it = 0; it < m_Mt - kt1; ++it) {
1454  for (int iz = 0; iz < m_Mz; ++iz) {
1455  for (int ixy = 0; ixy < Nxy; ++ixy) {
1456  int is = ixy + Nxy * (iz + m_Nz * it);
1457  int iv = Nvcd * is;
1458  int in = Nvcd * (is + Nxyz);
1459  int ig = m_Ndf * is;
1460 
1461  for (int ic = 0; ic < m_Nc; ++ic) {
1462  vt1[2 * ic] = 2.0 * w1[2 * ic + id3 + in];
1463  vt1[2 * ic + 1] = 2.0 * w1[2 * ic + 1 + id3 + in];
1464  vt2[2 * ic] = 2.0 * w1[2 * ic + id4 + in];
1465  vt2[2 * ic + 1] = 2.0 * w1[2 * ic + 1 + id4 + in];
1466  }
1467 
1468  for (int ic = 0; ic < m_Nc; ++ic) {
1469  int ic2 = ic * m_Nvc;
1470 
1471  wt1r = mult_uv_r(&u[ic2 + ig], vt1, m_Nc);
1472  wt1i = mult_uv_i(&u[ic2 + ig], vt1, m_Nc);
1473  wt2r = mult_uv_r(&u[ic2 + ig], vt2, m_Nc);
1474  wt2i = mult_uv_i(&u[ic2 + ig], vt2, m_Nc);
1475 
1476  w2[2 * ic + id3 + iv] += wt1r;
1477  w2[2 * ic + 1 + id3 + iv] += wt1i;
1478  w2[2 * ic + id4 + iv] += wt2r;
1479  w2[2 * ic + 1 + id4 + iv] += wt2i;
1480  }
1481  }
1482  }
1483  }
1484  }
1485 
1486 
1487 //====================================================================
1489  int itask, double *vcp1, const double *v1, int ieo)
1490  {
1491  int Nvc2 = 2 * m_Nvc;
1492  int Nvcd = m_Nvc * m_Nd;
1493  int Nvcd2 = Nvcd / 2;
1494 
1495  int id1 = 0;
1496  int id2 = m_Nvc;
1497  int id3 = m_Nvc * 2;
1498  int id4 = m_Nvc * 3;
1499 
1500  int idir = 3;
1501 
1502  int isite = m_arg[itask].isite;
1503  int isite_cp = m_arg[itask].isite_cpt;
1504 
1505  // double* w2 = &vcp1[Nvcd2*isite_cp];
1506  double *w2
1507  = (double *)m_fw_send[idir]->ptr(sizeof(double) * Nvcd2 * isite_cp);
1508  const double *w1 = &v1[Nvcd * isite];
1509  const double *u = m_U->ptr(m_Ndf * (isite + (1 - ieo) * m_Nvol / 2 + idir * m_Nvol));
1510 
1511  double vt1[m_Nvc], vt2[m_Nvc];
1512 
1513  if (m_arg[itask].kt1 == 1) {
1514  int Nxy = m_Nx2 * m_Ny;
1515  int it = m_Mt - 1;
1516  for (int iz = 0; iz < m_Mz; ++iz) {
1517  for (int ixy = 0; ixy < Nxy; ++ixy) {
1518  int is = ixy + Nxy * (iz + m_Nz * it);
1519  int is2 = ixy + Nxy * iz;
1520  int in = Nvcd * is;
1521  int ig = m_Ndf * is;
1522  int ix1 = Nvc2 * is2;
1523  int ix2 = ix1 + m_Nvc;
1524 
1525  for (int ic = 0; ic < m_Nc; ++ic) {
1526  vt1[2 * ic] = 2.0 * w1[2 * ic + id1 + in];
1527  vt1[2 * ic + 1] = 2.0 * w1[2 * ic + 1 + id1 + in];
1528  vt2[2 * ic] = 2.0 * w1[2 * ic + id2 + in];
1529  vt2[2 * ic + 1] = 2.0 * w1[2 * ic + 1 + id2 + in];
1530  }
1531 
1532  for (int ic = 0; ic < m_Nc; ++ic) {
1533  int icr = 2 * ic;
1534  w2[icr + ix1] = mult_udagv_r(&u[icr + ig], vt1, m_Nc);
1535  w2[icr + 1 + ix1] = mult_udagv_i(&u[icr + ig], vt1, m_Nc);
1536  w2[icr + ix2] = mult_udagv_r(&u[icr + ig], vt2, m_Nc);
1537  w2[icr + 1 + ix2] = mult_udagv_i(&u[icr + ig], vt2, m_Nc);
1538  }
1539  }
1540  }
1541  }
1542 
1543  m_fw_send[idir]->start_thread(itask);
1544  }
1545 
1546 
1547 //====================================================================
1549  int itask, double *v2, const double *vcp2, int ieo)
1550  {
1551  int Nvc2 = 2 * m_Nvc;
1552  int Nvcd = m_Nvc * m_Nd;
1553  int Nvcd2 = Nvcd / 2;
1554 
1555  int id1 = 0;
1556  int id2 = m_Nvc;
1557  int id3 = m_Nvc * 2;
1558  int id4 = m_Nvc * 3;
1559 
1560  int idir = 3;
1561  double bc2 = m_boundary2[idir];
1562 
1563  double wt1r, wt1i, wt2r, wt2i;
1564 
1565  int isite = m_arg[itask].isite;
1566  int isite_cp = m_arg[itask].isite_cpt;
1567 
1568  double *w2 = &v2[Nvcd * isite];
1569  // double* w1 = &vcp2[Nvcd2*isite_cp];
1570  const double *w1
1571  = (double *)m_fw_recv[idir]->ptr(sizeof(double) * Nvcd2 * isite_cp);
1572 
1573  m_fw_recv[idir]->wait_thread(itask);
1574 
1575  if (m_arg[itask].kt0 == 1) {
1576  int Nxy = m_Nx2 * m_Ny;
1577  int it = 0;
1578  for (int iz = 0; iz < m_Mz; ++iz) {
1579  for (int ixy = 0; ixy < Nxy; ++ixy) {
1580  int is = ixy + Nxy * (iz + m_Nz * it);
1581  int is2 = ixy + Nxy * iz;
1582  int iv = Nvcd * is;
1583  int ix1 = Nvc2 * is2;
1584  int ix2 = ix1 + m_Nvc;
1585 
1586  for (int ic = 0; ic < m_Nc; ++ic) {
1587  int icr = 2 * ic;
1588  int ici = 2 * ic + 1;
1589  w2[icr + id1 + iv] += bc2 * w1[icr + ix1];
1590  w2[ici + id1 + iv] += bc2 * w1[ici + ix1];
1591  w2[icr + id2 + iv] += bc2 * w1[icr + ix2];
1592  w2[ici + id2 + iv] += bc2 * w1[ici + ix2];
1593  }
1594  }
1595  }
1596  }
1597  }
1598 
1599 
1600 //====================================================================
1602  int itask, double *v2, const double *v1, int ieo)
1603  {
1604  int Nvcd = m_Nvc * m_Nd;
1605 
1606  int id1 = 0;
1607  int id2 = m_Nvc;
1608  int id3 = m_Nvc * 2;
1609  int id4 = m_Nvc * 3;
1610 
1611  int idir = 3;
1612 
1613  double vt1[m_Nvc], vt2[m_Nvc];
1614  double wt1r, wt1i, wt2r, wt2i;
1615 
1616  int isite = m_arg[itask].isite;
1617 
1618  double *w2 = &v2[Nvcd * isite];
1619  const double *w1 = &v1[Nvcd * isite];
1620  const double *u = m_U->ptr(m_Ndf * (isite + (1 - ieo) * m_Nvol / 2 + idir * m_Nvol));
1621 
1622  int kt0 = m_arg[itask].kt0;
1623  int Nxy = m_Nx2 * m_Ny;
1624  int Nxyz = Nxy * m_Nz;
1625 
1626  for (int it = kt0; it < m_Mt; ++it) {
1627  for (int iz = 0; iz < m_Mz; ++iz) {
1628  for (int ixy = 0; ixy < Nxy; ++ixy) {
1629  int is = ixy + Nxy * (iz + m_Nz * it);
1630  int iv = Nvcd * is;
1631  int in = Nvcd * (is - Nxyz);
1632  int ig = m_Ndf * (is - Nxyz);
1633 
1634  for (int ic = 0; ic < m_Nc; ++ic) {
1635  vt1[2 * ic] = 2.0 * w1[2 * ic + id1 + in];
1636  vt1[2 * ic + 1] = 2.0 * w1[2 * ic + 1 + id1 + in];
1637  vt2[2 * ic] = 2.0 * w1[2 * ic + id2 + in];
1638  vt2[2 * ic + 1] = 2.0 * w1[2 * ic + 1 + id2 + in];
1639  }
1640 
1641  for (int ic = 0; ic < m_Nc; ++ic) {
1642  int ic2 = 2 * ic;
1643  wt1r = mult_udagv_r(&u[ic2 + ig], vt1, m_Nc);
1644  wt1i = mult_udagv_i(&u[ic2 + ig], vt1, m_Nc);
1645  wt2r = mult_udagv_r(&u[ic2 + ig], vt2, m_Nc);
1646  wt2i = mult_udagv_i(&u[ic2 + ig], vt2, m_Nc);
1647 
1648  w2[ic2 + id1 + iv] += wt1r;
1649  w2[ic2 + 1 + id1 + iv] += wt1i;
1650  w2[ic2 + id2 + iv] += wt2r;
1651  w2[ic2 + 1 + id2 + iv] += wt2i;
1652  }
1653  }
1654  }
1655  }
1656  }
1657 
1658 
1659 //====================================================================
1661  int itask, double *vcp1, const double *v1, int ieo)
1662  {
1663  int Nvc2 = 2 * m_Nvc;
1664  int Nvcd = m_Nvc * m_Nd;
1665  int Nvcd2 = Nvcd / 2;
1666 
1667  int id1 = 0;
1668  int id2 = m_Nvc;
1669  int id3 = m_Nvc * 2;
1670  int id4 = m_Nvc * 3;
1671 
1672  int idir = 3;
1673 
1674  int isite = m_arg[itask].isite;
1675  int isite_cp = m_arg[itask].isite_cpt;
1676 
1677  // double* w2 = &vcp1[Nvcd2*isite_cp];
1678  double *w2
1679  = (double *)m_bw_send[idir]->ptr(sizeof(double) * Nvcd2 * isite_cp);
1680  const double *w1 = &v1[Nvcd * isite];
1681 
1682  double bc2 = m_boundary2[idir];
1683 
1684  if (m_arg[itask].kt0 == 1) {
1685  int Nxy = m_Nx2 * m_Ny;
1686  int it = 0;
1687  for (int iz = 0; iz < m_Mz; ++iz) {
1688  for (int ixy = 0; ixy < Nxy; ++ixy) {
1689  int is = ixy + Nxy * (iz + m_Nz * it);
1690  int is2 = ixy + Nxy * iz;
1691 
1692  int in = Nvcd * is;
1693  int ix1 = Nvc2 * is2;
1694  int ix2 = ix1 + m_Nvc;
1695 
1696  for (int ic = 0; ic < m_Nc; ++ic) {
1697  w2[2 * ic + ix1] = bc2 * (w1[2 * ic + id1 + in] + w1[2 * ic + id3 + in]);
1698  w2[2 * ic + 1 + ix1] = bc2 * (w1[2 * ic + 1 + id1 + in] + w1[2 * ic + 1 + id3 + in]);
1699  w2[2 * ic + ix2] = bc2 * (w1[2 * ic + id2 + in] + w1[2 * ic + id4 + in]);
1700  w2[2 * ic + 1 + ix2] = bc2 * (w1[2 * ic + 1 + id2 + in] + w1[2 * ic + 1 + id4 + in]);
1701  }
1702  }
1703  }
1704  }
1705 
1706  m_bw_send[idir]->start_thread(itask);
1707  }
1708 
1709 
1710 //====================================================================
1712  int itask, double *v2, const double *vcp2, int ieo)
1713  {
1714  int Nvc2 = 2 * m_Nvc;
1715  int Nvcd = m_Nvc * m_Nd;
1716  int Nvcd2 = Nvcd / 2;
1717 
1718  int id1 = 0;
1719  int id2 = m_Nvc;
1720  int id3 = m_Nvc * 2;
1721  int id4 = m_Nvc * 3;
1722 
1723  int idir = 3;
1724 
1725  double wt1r, wt1i, wt2r, wt2i;
1726 
1727  int isite = m_arg[itask].isite;
1728  int isite_cp = m_arg[itask].isite_cpt;
1729 
1730  double *w2 = &v2[Nvcd * isite];
1731  // double* w1 = &vcp2[Nvcd2*isite_cp];
1732  const double *w1
1733  = (double *)m_bw_recv[idir]->ptr(sizeof(double) * Nvcd2 * isite_cp);
1734  const double *u = m_U->ptr(m_Ndf * (isite + ieo * m_Nvol / 2 + idir * m_Nvol));
1735 
1736  m_bw_recv[idir]->wait_thread(itask);
1737 
1738  if (m_arg[itask].kt1 == 1) {
1739  int Nxy = m_Nx2 * m_Ny;
1740  int it = m_Mt - 1;
1741  for (int iz = 0; iz < m_Mz; ++iz) {
1742  for (int ixy = 0; ixy < Nxy; ++ixy) {
1743  int is = ixy + Nxy * (iz + m_Nz * it);
1744  int is2 = ixy + Nxy * iz;
1745  int iv = Nvcd * is;
1746  int ig = m_Ndf * is;
1747  int ix1 = Nvc2 * is2;
1748  int ix2 = ix1 + m_Nvc;
1749 
1750  for (int ic = 0; ic < m_Nc; ++ic) {
1751  int ic2 = ic * m_Nvc;
1752 
1753  wt1r = mult_uv_r(&u[ic2 + ig], &w1[ix1], m_Nc);
1754  wt1i = mult_uv_i(&u[ic2 + ig], &w1[ix1], m_Nc);
1755  wt2r = mult_uv_r(&u[ic2 + ig], &w1[ix2], m_Nc);
1756  wt2i = mult_uv_i(&u[ic2 + ig], &w1[ix2], m_Nc);
1757 
1758  w2[2 * ic + id1 + iv] += wt1r;
1759  w2[2 * ic + 1 + id1 + iv] += wt1i;
1760  w2[2 * ic + id2 + iv] += wt2r;
1761  w2[2 * ic + 1 + id2 + iv] += wt2i;
1762  w2[2 * ic + id3 + iv] += wt1r;
1763  w2[2 * ic + 1 + id3 + iv] += wt1i;
1764  w2[2 * ic + id4 + iv] += wt2r;
1765  w2[2 * ic + 1 + id4 + iv] += wt2i;
1766  }
1767  }
1768  }
1769  }
1770  }
1771 
1772 
1773 //====================================================================
1775  int itask, double *v2, const double *v1, int ieo)
1776  {
1777  int Nvcd = m_Nvc * m_Nd;
1778 
1779  int id1 = 0;
1780  int id2 = m_Nvc;
1781  int id3 = m_Nvc * 2;
1782  int id4 = m_Nvc * 3;
1783 
1784  int idir = 3;
1785 
1786  double vt1[m_Nvc], vt2[m_Nvc];
1787  double wt1r, wt1i, wt2r, wt2i;
1788 
1789  int isite = m_arg[itask].isite;
1790 
1791  double *w2 = &v2[Nvcd * isite];
1792  const double *w1 = &v1[Nvcd * isite];
1793  const double *u = m_U->ptr(m_Ndf * (isite + ieo * m_Nvol / 2 + idir * m_Nvol));
1794 
1795  int kt1 = m_arg[itask].kt1;
1796  int Nxy = m_Nx2 * m_Ny;
1797  int Nxyz = Nxy * m_Nz;
1798 
1799  for (int it = 0; it < m_Mt - kt1; ++it) {
1800  for (int iz = 0; iz < m_Mz; ++iz) {
1801  for (int ixy = 0; ixy < Nxy; ++ixy) {
1802  int is = ixy + Nxy * (iz + m_Nz * it);
1803  int iv = Nvcd * is;
1804  int in = Nvcd * (is + Nxyz);
1805  int ig = m_Ndf * is;
1806 
1807  for (int ic = 0; ic < m_Nc; ++ic) {
1808  vt1[2 * ic] = w1[2 * ic + id1 + in] + w1[2 * ic + id3 + in];
1809  vt1[2 * ic + 1] = w1[2 * ic + 1 + id1 + in] + w1[2 * ic + 1 + id3 + in];
1810  vt2[2 * ic] = w1[2 * ic + id2 + in] + w1[2 * ic + id4 + in];
1811  vt2[2 * ic + 1] = w1[2 * ic + 1 + id2 + in] + w1[2 * ic + 1 + id4 + in];
1812  }
1813 
1814  for (int ic = 0; ic < m_Nc; ++ic) {
1815  int ic2 = ic * m_Nvc;
1816 
1817  wt1r = mult_uv_r(&u[ic2 + ig], vt1, m_Nc);
1818  wt1i = mult_uv_i(&u[ic2 + ig], vt1, m_Nc);
1819  wt2r = mult_uv_r(&u[ic2 + ig], vt2, m_Nc);
1820  wt2i = mult_uv_i(&u[ic2 + ig], vt2, m_Nc);
1821 
1822  w2[2 * ic + id1 + iv] += wt1r;
1823  w2[2 * ic + 1 + id1 + iv] += wt1i;
1824  w2[2 * ic + id2 + iv] += wt2r;
1825  w2[2 * ic + 1 + id2 + iv] += wt2i;
1826  w2[2 * ic + id3 + iv] += wt1r;
1827  w2[2 * ic + 1 + id3 + iv] += wt1i;
1828  w2[2 * ic + id4 + iv] += wt2r;
1829  w2[2 * ic + 1 + id4 + iv] += wt2i;
1830  }
1831  }
1832  }
1833  }
1834  }
1835 
1836 
1837 //====================================================================
1839  int itask, double *vcp1, const double *v1, int ieo)
1840  {
1841  int Nvc2 = 2 * m_Nvc;
1842  int Nvcd = m_Nvc * m_Nd;
1843  int Nvcd2 = Nvcd / 2;
1844 
1845  int id1 = 0;
1846  int id2 = m_Nvc;
1847  int id3 = m_Nvc * 2;
1848  int id4 = m_Nvc * 3;
1849 
1850  int idir = 3;
1851 
1852  int isite = m_arg[itask].isite;
1853  int isite_cp = m_arg[itask].isite_cpt;
1854 
1855  // double* w2 = &vcp1[Nvcd2*isite_cp];
1856  double *w2
1857  = (double *)m_fw_send[idir]->ptr(sizeof(double) * Nvcd2 * isite_cp);
1858  const double *w1 = &v1[Nvcd * isite];
1859  const double *u = m_U->ptr(m_Ndf * (isite + (1 - ieo) * m_Nvol / 2 + idir * m_Nvol));
1860 
1861  double vt1[m_Nvc], vt2[m_Nvc];
1862 
1863  if (m_arg[itask].kt1 == 1) {
1864  int Nxy = m_Nx2 * m_Ny;
1865  int it = m_Mt - 1;
1866  for (int iz = 0; iz < m_Mz; ++iz) {
1867  for (int ixy = 0; ixy < Nxy; ++ixy) {
1868  int is = ixy + Nxy * (iz + m_Nz * it);
1869  int is2 = ixy + Nxy * iz;
1870  int in = Nvcd * is;
1871  int ig = m_Ndf * is;
1872  int ix1 = Nvc2 * is2;
1873  int ix2 = ix1 + m_Nvc;
1874 
1875  for (int ic = 0; ic < m_Nc; ++ic) {
1876  vt1[2 * ic] = w1[2 * ic + id1 + in] - w1[2 * ic + id3 + in];
1877  vt1[2 * ic + 1] = w1[2 * ic + 1 + id1 + in] - w1[2 * ic + 1 + id3 + in];
1878  vt2[2 * ic] = w1[2 * ic + id2 + in] - w1[2 * ic + id4 + in];
1879  vt2[2 * ic + 1] = w1[2 * ic + 1 + id2 + in] - w1[2 * ic + 1 + id4 + in];
1880  }
1881 
1882  for (int ic = 0; ic < m_Nc; ++ic) {
1883  int icr = 2 * ic;
1884  w2[icr + ix1] = mult_udagv_r(&u[icr + ig], vt1, m_Nc);
1885  w2[icr + 1 + ix1] = mult_udagv_i(&u[icr + ig], vt1, m_Nc);
1886  w2[icr + ix2] = mult_udagv_r(&u[icr + ig], vt2, m_Nc);
1887  w2[icr + 1 + ix2] = mult_udagv_i(&u[icr + ig], vt2, m_Nc);
1888  }
1889  }
1890  }
1891  }
1892 
1893  m_fw_send[idir]->start_thread(itask);
1894  }
1895 
1896 
1897 //====================================================================
1899  int itask, double *v2, const double *vcp2, int ieo)
1900  {
1901  int Nvc2 = 2 * m_Nvc;
1902  int Nvcd = m_Nvc * m_Nd;
1903  int Nvcd2 = Nvcd / 2;
1904 
1905  int id1 = 0;
1906  int id2 = m_Nvc;
1907  int id3 = m_Nvc * 2;
1908  int id4 = m_Nvc * 3;
1909 
1910  int idir = 3;
1911  double bc2 = m_boundary2[idir];
1912 
1913  double wt1r, wt1i, wt2r, wt2i;
1914 
1915  int isite = m_arg[itask].isite;
1916  int isite_cp = m_arg[itask].isite_cpt;
1917 
1918  double *w2 = &v2[Nvcd * isite];
1919  // double* w1 = &vcp2[Nvcd2*isite_cp];
1920  const double *w1
1921  = (double *)m_fw_recv[idir]->ptr(sizeof(double) * Nvcd2 * isite_cp);
1922 
1923  m_fw_recv[idir]->wait_thread(itask);
1924 
1925  if (m_arg[itask].kt0 == 1) {
1926  int Nxy = m_Nx2 * m_Ny;
1927  int it = 0;
1928  for (int iz = 0; iz < m_Mz; ++iz) {
1929  for (int ixy = 0; ixy < Nxy; ++ixy) {
1930  int is = ixy + Nxy * (iz + m_Nz * it);
1931  int is2 = ixy + Nxy * iz;
1932  int iv = Nvcd * is;
1933  int ix1 = Nvc2 * is2;
1934  int ix2 = ix1 + m_Nvc;
1935 
1936  for (int ic = 0; ic < m_Nc; ++ic) {
1937  int icr = 2 * ic;
1938  int ici = 2 * ic + 1;
1939  w2[icr + id1 + iv] += bc2 * w1[icr + ix1];
1940  w2[ici + id1 + iv] += bc2 * w1[ici + ix1];
1941  w2[icr + id2 + iv] += bc2 * w1[icr + ix2];
1942  w2[ici + id2 + iv] += bc2 * w1[ici + ix2];
1943  w2[icr + id3 + iv] -= bc2 * w1[icr + ix1];
1944  w2[ici + id3 + iv] -= bc2 * w1[ici + ix1];
1945  w2[icr + id4 + iv] -= bc2 * w1[icr + ix2];
1946  w2[ici + id4 + iv] -= bc2 * w1[ici + ix2];
1947  }
1948  }
1949  }
1950  }
1951  }
1952 
1953 
1954 //====================================================================
1956  int itask, double *v2, const double *v1, int ieo)
1957  {
1958  int Nvcd = m_Nvc * m_Nd;
1959 
1960  int id1 = 0;
1961  int id2 = m_Nvc;
1962  int id3 = m_Nvc * 2;
1963  int id4 = m_Nvc * 3;
1964 
1965  int idir = 3;
1966 
1967  double vt1[m_Nvc], vt2[m_Nvc];
1968  double wt1r, wt1i, wt2r, wt2i;
1969 
1970  int isite = m_arg[itask].isite;
1971 
1972  double *w2 = &v2[Nvcd * isite];
1973  const double *w1 = &v1[Nvcd * isite];
1974  const double *u = m_U->ptr(m_Ndf * (isite + (1 - ieo) * m_Nvol / 2 + idir * m_Nvol));
1975 
1976  int kt0 = m_arg[itask].kt0;
1977  int Nxy = m_Nx2 * m_Ny;
1978  int Nxyz = Nxy * m_Nz;
1979 
1980  for (int it = kt0; it < m_Mt; ++it) {
1981  for (int iz = 0; iz < m_Mz; ++iz) {
1982  for (int ixy = 0; ixy < Nxy; ++ixy) {
1983  int is = ixy + Nxy * (iz + m_Nz * it);
1984  int iv = Nvcd * is;
1985  int in = Nvcd * (is - Nxyz);
1986  int ig = m_Ndf * (is - Nxyz);
1987 
1988  for (int ic = 0; ic < m_Nc; ++ic) {
1989  vt1[2 * ic] = w1[2 * ic + id1 + in] - w1[2 * ic + id3 + in];
1990  vt1[2 * ic + 1] = w1[2 * ic + 1 + id1 + in] - w1[2 * ic + 1 + id3 + in];
1991  vt2[2 * ic] = w1[2 * ic + id2 + in] - w1[2 * ic + id4 + in];
1992  vt2[2 * ic + 1] = w1[2 * ic + 1 + id2 + in] - w1[2 * ic + 1 + id4 + in];
1993  }
1994 
1995  for (int ic = 0; ic < m_Nc; ++ic) {
1996  int ic2 = 2 * ic;
1997  wt1r = mult_udagv_r(&u[ic2 + ig], vt1, m_Nc);
1998  wt1i = mult_udagv_i(&u[ic2 + ig], vt1, m_Nc);
1999  wt2r = mult_udagv_r(&u[ic2 + ig], vt2, m_Nc);
2000  wt2i = mult_udagv_i(&u[ic2 + ig], vt2, m_Nc);
2001 
2002  w2[ic2 + id1 + iv] += wt1r;
2003  w2[ic2 + 1 + id1 + iv] += wt1i;
2004  w2[ic2 + id2 + iv] += wt2r;
2005  w2[ic2 + 1 + id2 + iv] += wt2i;
2006  w2[ic2 + id3 + iv] -= wt1r;
2007  w2[ic2 + 1 + id3 + iv] -= wt1i;
2008  w2[ic2 + id4 + iv] -= wt2r;
2009  w2[ic2 + 1 + id4 + iv] -= wt2i;
2010  }
2011  }
2012  }
2013  }
2014  }
2015 
2016 
2017 //====================================================================
2019  int itask, double *v2, const double *v1)
2020  {
2021  int Nvcd = m_Nvc * m_Nd;
2022  int Nxy = m_Nx2 * m_Ny;
2023 
2024  int id1 = 0;
2025  int id2 = m_Nvc;
2026  int id3 = m_Nvc * 2;
2027  int id4 = m_Nvc * 3;
2028 
2029  int isite = m_arg[itask].isite;
2030  double *w2 = &v2[Nvcd * isite];
2031  const double *w1 = &v1[Nvcd * isite];
2032 
2033  for (int it = 0; it < m_Mt; ++it) {
2034  for (int iz = 0; iz < m_Mz; ++iz) {
2035  for (int ixy = 0; ixy < Nxy; ++ixy) {
2036  int iv = Nvcd * (ixy + Nxy * (iz + m_Nz * it));
2037  for (int ivc = 0; ivc < m_Nvc; ++ivc) {
2038  w2[ivc + id1 + iv] = w1[ivc + id3 + iv];
2039  w2[ivc + id2 + iv] = w1[ivc + id4 + iv];
2040  w2[ivc + id3 + iv] = w1[ivc + id1 + iv];
2041  w2[ivc + id4 + iv] = w1[ivc + id2 + iv];
2042  }
2043  }
2044  }
2045  }
2046  }
2047 
2048 
2049 //====================================================================
2051  int itask, double *v2, const double *v1)
2052  {
2053  int Nvcd = m_Nvc * m_Nd;
2054  int Nxy = m_Nx2 * m_Ny;
2055 
2056  int id1 = 0;
2057  int id2 = m_Nvc;
2058  int id3 = m_Nvc * 2;
2059  int id4 = m_Nvc * 3;
2060 
2061  int isite = m_arg[itask].isite;
2062  double *w2 = &v2[Nvcd * isite];
2063  const double *w1 = &v1[Nvcd * isite];
2064 
2065  for (int it = 0; it < m_Mt; ++it) {
2066  for (int iz = 0; iz < m_Mz; ++iz) {
2067  for (int ixy = 0; ixy < Nxy; ++ixy) {
2068  int iv = Nvcd * (ixy + Nxy * (iz + m_Nz * it));
2069  for (int ivc = 0; ivc < m_Nvc; ++ivc) {
2070  w2[ivc + id1 + iv] = w1[ivc + id1 + iv];
2071  w2[ivc + id2 + iv] = w1[ivc + id2 + iv];
2072  w2[ivc + id3 + iv] = -w1[ivc + id3 + iv];
2073  w2[ivc + id4 + iv] = -w1[ivc + id4 + iv];
2074  }
2075  }
2076  }
2077  }
2078  }
2079 
2080 
2081 //====================================================================
2083  double *v1)
2084  {
2085  int Nvcd = m_Nvc * m_Nd;
2086  int Nxy = m_Nx2 * m_Ny;
2087 
2088  int id1 = 0;
2089  int id2 = m_Nvc;
2090  int id3 = m_Nvc * 2;
2091  int id4 = m_Nvc * 3;
2092 
2093  int isite = m_arg[itask].isite;
2094  double *w1 = &v1[Nvcd * isite];
2095 
2096  for (int it = 0; it < m_Mt; ++it) {
2097  for (int iz = 0; iz < m_Mz; ++iz) {
2098  for (int ixy = 0; ixy < Nxy; ++ixy) {
2099  int iv = Nvcd * (ixy + Nxy * (iz + m_Nz * it));
2100  for (int ivc = 0; ivc < m_Nvc; ++ivc) {
2101  double wt1 = w1[ivc + id1 + iv];
2102  double wt2 = w1[ivc + id2 + iv];
2103  w1[ivc + id1 + iv] = w1[ivc + id3 + iv];
2104  w1[ivc + id2 + iv] = w1[ivc + id4 + iv];
2105  w1[ivc + id3 + iv] = wt1;
2106  w1[ivc + id4 + iv] = wt2;
2107  }
2108  }
2109  }
2110  }
2111  }
2112 
2113 
2114 //====================================================================
2116  double *v1)
2117  {
2118  int Nvcd = m_Nvc * m_Nd;
2119  int Nxy = m_Nx2 * m_Ny;
2120 
2121  int id1 = 0;
2122  int id2 = m_Nvc;
2123  int id3 = m_Nvc * 2;
2124  int id4 = m_Nvc * 3;
2125 
2126  int isite = m_arg[itask].isite;
2127  double *w1 = &v1[Nvcd * isite];
2128 
2129  for (int it = 0; it < m_Mt; ++it) {
2130  for (int iz = 0; iz < m_Mz; ++iz) {
2131  for (int ixy = 0; ixy < Nxy; ++ixy) {
2132  int iv = Nvcd * (ixy + Nxy * (iz + m_Nz * it));
2133  for (int ivc = 0; ivc < m_Nvc; ++ivc) {
2134  w1[ivc + id3 + iv] = -w1[ivc + id3 + iv];
2135  w1[ivc + id4 + iv] = -w1[ivc + id4 + iv];
2136  }
2137  }
2138  }
2139  }
2140  }
2141 
2142 
2143 //====================================================================
2144 }
2145 //============================================================END=====
std::vector< Channel * > m_fw_send
BridgeIO vout
Definition: bridgeIO.cpp:495
void mult_tm1_chiral_thread(int, double *, const double *, int)
void mult_tm1_dirac_thread(int, double *, const double *, int)
std::vector< double > m_boundary2
b.c. for each node.
const double * ptr(const int jin, const int site, const int jex) const
Definition: field.h:142
std::vector< int > m_Leo
void mult_yp2_thread(int, double *, const double *, int)
void general(const char *format,...)
Definition: bridgeIO.cpp:195
static const std::string class_name
void mult_tp2_dirac_thread(int, double *, const double *, int)
std::vector< mult_arg > m_arg
void mult_ypb_thread(int, double *, const double *, int)
void gm5_chiral_thread(int, double *, const double *)
void mult_yp1_thread(int, double *, const double *, int)
void Meo(Field &, const Field &, const int ieo)
void mult_tm2_chiral_thread(int, double *, const double *, int)
void mult_ym2_thread(int, double *, const double *, int)
void scal_thread(int, double *, double)
void mult_xm1_thread(int, double *, const double *, int)
void mult_zp2_thread(int, double *, const double *, int)
void mult_tp1_dirac_thread(int, double *, const double *, int)
void gm5_dirac_thread(int, double *, const double *)
void mult_xp2_thread(int, double *, const double *, int)
void mult_zp1_thread(int, double *, const double *, int)
Bridge::VerboseLevel m_vl
Definition: fopr.h:128
void mult_tpb_dirac_thread(int, double *, const double *, int)
void mult_zmb_thread(int, double *, const double *, int)
void mult_tp1_chiral_thread(int, double *, const double *, int)
void mult_zpb_thread(int, double *, const double *, int)
void mult_tm2_dirac_thread(int, double *, const double *, int)
void mult_xmb_thread(int, double *, const double *, int)
void mult_tpb_chiral_thread(int, double *, const double *, int)
static int get_num_threads_available()
returns number of threads (works outside of parallel region).
Field_G * m_U
dummy: pointing m_Ueo.
void crucial(const char *format,...)
Definition: bridgeIO.cpp:178
std::vector< Channel * > m_bw_recv
void mult_tp2_chiral_thread(int, double *, const double *, int)
void mult_ymb_thread(int, double *, const double *, int)
void mult_tmb_dirac_thread(int, double *, const double *, int)
void mult_ym1_thread(int, double *, const double *, int)
void mult_xm2_thread(int, double *, const double *, int)
void mult_zm2_thread(int, double *, const double *, int)
void mult_xpb_thread(int, double *, const double *, int)
void mult_zm1_thread(int, double *, const double *, int)
std::vector< Channel * > m_fw_recv
void mult_tmb_chiral_thread(int, double *, const double *, int)
void mult_xp1_thread(int, double *, const double *, int)
std::vector< Channel * > m_bw_send