Bridge++  Version 1.4.4
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
fopr_Wilson_impl_thread.cpp
Go to the documentation of this file.
1 
14 #include <string>
15 using std::string;
16 
17 #include "fopr_Wilson_impl.h"
18 
20 
21 namespace Imp {
22 #if defined USE_GROUP_SU3
23 #include "fopr_Wilson_impl_SU3.inc"
24 #elif defined USE_GROUP_SU2
25 #include "fopr_Wilson_impl_SU2.inc"
26 #elif defined USE_GROUP_SU_N
27 #include "fopr_Wilson_impl_SU_N.inc"
28 #endif
29 
30 // const std::string Fopr_Wilson::class_name = "Fopr_Wilson";
31 
32 //====================================================================
34  {
36 
37  // The following setup corresponds to uniform division of volume.
38  if (m_Nthread <= m_Nt) {
40  } else if (m_Nthread <= m_Nz * m_Nt) {
41  m_Ntask_t = m_Nt;
42  } else {
43  vout.crucial(m_vl, "Error at %s: Too large Nthread = %d\n", class_name.c_str(), m_Nthread);
44  exit(EXIT_FAILURE);
45  }
46 
48 
49  if (m_Ntask_z * m_Ntask_t != m_Nthread) {
50  vout.crucial(m_vl, "Error at %s: Nz = %d and Nt = %d do not match Nthread = %d\n",
51  class_name.c_str(), m_Nz, m_Nt, m_Nthread);
52  exit(EXIT_FAILURE);
53  }
54 
56  m_Mz = m_Nz / m_Ntask_z;
57  m_Mt = m_Nt / m_Ntask_t;
58 
59  if (m_Mz * m_Ntask_z != m_Nz) {
60  vout.crucial(m_vl, "Error at %s: Mz = %d and Ntask_z = %d do not match Nz = %d\n",
61  class_name.c_str(), m_Mz, m_Ntask_z, m_Nz);
62  exit(EXIT_FAILURE);
63  }
64 
65  if (m_Mt * m_Ntask_t != m_Nt) {
66  vout.crucial(m_vl, "Error at %s: Mt = %d and Ntask_t = %d do not match Nt = %d\n",
67  class_name.c_str(), m_Mt, m_Ntask_t, m_Nt);
68  exit(EXIT_FAILURE);
69  }
70 
71  // The following setup is not monotonic division, and requires
72  // barrier at the beginning and end of mult (D and gamma5).
73  // [H.Matsufuru 22 Oct 2013]
74  // if(m_Nthread >= 64){
75  // m_Ntask_z = 8;
76  // }else if(m_Nthread >= 16){
77  // m_Ntask_z = 4;
78  // }else if(m_Nthread >= 4){
79  // m_Ntask_z = 2;
80  // }else{
81  // m_Ntask_z = 1;
82  // }
83  // m_Ntask_t = m_Nthread/m_Ntask_z;
84  // m_Ntask = m_Ntask_t * m_Ntask_z;
85  // m_Mz = m_Nz/m_Ntask_z;
86  // m_Mt = m_Nt/m_Ntask_t;
87 
88  vout.general(m_vl, " Nthread = %d\n", m_Nthread);
89  vout.general(m_vl, " Ntask = %d\n", m_Ntask);
90  vout.general(m_vl, " Ntask_z = %d Ntask_t = %d\n", m_Ntask_z, m_Ntask_t);
91  vout.general(m_vl, " Mz = %d Mt = %d\n", m_Mz, m_Mt);
92 
93  // setup of arguments
94  int Nxy = m_Nx * m_Ny;
95  m_arg.resize(m_Ntask);
96  for (int ith_t = 0; ith_t < m_Ntask_t; ++ith_t) {
97  for (int ith_z = 0; ith_z < m_Ntask_z; ++ith_z) {
98  int itask = ith_z + m_Ntask_z * ith_t;
99 
100  m_arg[itask].isite = (ith_z * m_Mz + ith_t * (m_Nz * m_Mt)) * Nxy;
101 
102  m_arg[itask].kt0 = 0;
103  m_arg[itask].kt1 = 0;
104  m_arg[itask].kz0 = 0;
105  m_arg[itask].kz1 = 0;
106  if (ith_t == 0) m_arg[itask].kt0 = 1;
107  if (ith_z == 0) m_arg[itask].kz0 = 1;
108  if (ith_t == m_Ntask_t - 1) m_arg[itask].kt1 = 1;
109  if (ith_z == m_Ntask_z - 1) m_arg[itask].kz1 = 1;
110 
111  m_arg[itask].isite_cpx = itask * m_Mz * m_Mt * m_Ny;
112  m_arg[itask].isite_cpy = itask * m_Mz * m_Mt * m_Nx;
113  m_arg[itask].isite_cpz = ith_t * m_Mt * Nxy;
114  m_arg[itask].isite_cpt = ith_z * m_Mz * Nxy;
115  }
116  }
117  }
118 
119 
120 //====================================================================
122  double *v2, double fac, const double *v1)
123  {
124  int Nvcd = m_Nvc * m_Nd;
125  int Nvxy = Nvcd * m_Nx * m_Ny;
126 
127  int isite = m_arg[itask].isite;
128  double *w2 = &v2[Nvcd * isite];
129  const double *w1 = &v1[Nvcd * isite];
130 
131  for (int it = 0; it < m_Mt; ++it) {
132  for (int iz = 0; iz < m_Mz; ++iz) {
133  for (int ivxy = 0; ivxy < Nvxy; ++ivxy) {
134  int iv = ivxy + Nvxy * (iz + m_Nz * it);
135  w2[iv] = fac * w2[iv] + w1[iv];
136  }
137  }
138  }
139  }
140 
141 
142 //====================================================================
144  double *v2)
145  {
146  int Nvcd = m_Nvc * m_Nd;
147  int Nvxy = Nvcd * m_Nx * m_Ny;
148 
149  int isite = m_arg[itask].isite;
150  double *w2 = &v2[Nvcd * isite];
151 
152  for (int it = 0; it < m_Mt; ++it) {
153  for (int iz = 0; iz < m_Mz; ++iz) {
154  for (int ivxy = 0; ivxy < Nvxy; ++ivxy) {
155  int iv = ivxy + Nvxy * (iz + m_Nz * it);
156  w2[iv] = 0.0;
157  }
158  }
159  }
160  }
161 
162 
163 //====================================================================
165  double *vcp1, const double *v1)
166  {
167  int Nvc2 = 2 * m_Nvc;
168  int Nvcd = m_Nvc * m_Nd;
169  int Nvcd2 = Nvcd / 2;
170 
171  int id1 = 0;
172  int id2 = m_Nvc;
173  int id3 = m_Nvc * 2;
174  int id4 = m_Nvc * 3;
175 
176  int isite = m_arg[itask].isite;
177  int isite_cp = m_arg[itask].isite_cpx;
178 
179  double *w2 = &vcp1[Nvcd2 * isite_cp];
180  const double *w1 = &v1[Nvcd * isite];
181 
182  int idir = 0;
183  double bc2 = m_boundary2[idir];
184 
185  int ix = 0;
186 
187  for (int it = 0; it < m_Mt; ++it) {
188  for (int iz = 0; iz < m_Mz; ++iz) {
189  for (int iy = 0; iy < m_Ny; ++iy) {
190  int is = ix + m_Nx * (iy + m_Ny * (iz + m_Nz * it));
191  int is2 = iy + m_Ny * (iz + m_Mz * it);
192  int in = Nvcd * is;
193  int ix1 = Nvc2 * is2;
194  int ix2 = ix1 + m_Nvc;
195 
196  for (int ic = 0; ic < m_Nc; ++ic) {
197  w2[2 * ic + ix1] = bc2 * (w1[2 * ic + id1 + in] - w1[2 * ic + 1 + id4 + in]);
198  w2[2 * ic + 1 + ix1] = bc2 * (w1[2 * ic + 1 + id1 + in] + w1[2 * ic + id4 + in]);
199  w2[2 * ic + ix2] = bc2 * (w1[2 * ic + id2 + in] - w1[2 * ic + 1 + id3 + in]);
200  w2[2 * ic + 1 + ix2] = bc2 * (w1[2 * ic + 1 + id2 + in] + w1[2 * ic + id3 + in]);
201  }
202  }
203  }
204  }
205  }
206 
207 
208 //====================================================================
210  double *v2, const double *vcp2)
211  {
212  int Nvc2 = 2 * m_Nvc;
213  int Nvcd = m_Nvc * m_Nd;
214  int Nvcd2 = Nvcd / 2;
215 
216  int id1 = 0;
217  int id2 = m_Nvc;
218  int id3 = m_Nvc * 2;
219  int id4 = m_Nvc * 3;
220 
221  int idir = 0;
222 
223  double wt1r, wt1i, wt2r, wt2i;
224 
225  int isite = m_arg[itask].isite;
226  int isite_cp = m_arg[itask].isite_cpx;
227 
228  double *w2 = &v2[Nvcd * isite];
229  const double *w1 = &vcp2[Nvcd2 * isite_cp];
230  const double *u = m_U->ptr(m_Ndf * (isite + idir * m_Nvol));
231 
232  int ix = m_Nx - 1;
233 
234  for (int it = 0; it < m_Mt; ++it) {
235  for (int iz = 0; iz < m_Mz; ++iz) {
236  for (int iy = 0; iy < m_Ny; ++iy) {
237  int is = ix + m_Nx * (iy + m_Ny * (iz + m_Nz * it));
238  int is2 = iy + m_Ny * (iz + m_Mz * it);
239  int iv = Nvcd * is;
240  int ig = m_Ndf * is;
241  int ix1 = Nvc2 * is2;
242  int ix2 = ix1 + m_Nvc;
243 
244  for (int ic = 0; ic < m_Nc; ++ic) {
245  int ic2 = ic * m_Nvc;
246 
247  wt1r = mult_uv_r(&u[ic2 + ig], &w1[ix1], m_Nc);
248  wt1i = mult_uv_i(&u[ic2 + ig], &w1[ix1], m_Nc);
249  wt2r = mult_uv_r(&u[ic2 + ig], &w1[ix2], m_Nc);
250  wt2i = mult_uv_i(&u[ic2 + ig], &w1[ix2], m_Nc);
251 
252  w2[2 * ic + id1 + iv] += wt1r;
253  w2[2 * ic + 1 + id1 + iv] += wt1i;
254  w2[2 * ic + id2 + iv] += wt2r;
255  w2[2 * ic + 1 + id2 + iv] += wt2i;
256  w2[2 * ic + id3 + iv] += wt2i;
257  w2[2 * ic + 1 + id3 + iv] += -wt2r;
258  w2[2 * ic + id4 + iv] += wt1i;
259  w2[2 * ic + 1 + id4 + iv] += -wt1r;
260  }
261  }
262  }
263  }
264  }
265 
266 
267 //====================================================================
269  double *v2, const double *v1)
270  {
271  int Nvcd = m_Nvc * m_Nd;
272 
273  int id1 = 0;
274  int id2 = m_Nvc;
275  int id3 = m_Nvc * 2;
276  int id4 = m_Nvc * 3;
277 
278  int idir = 0;
279 
280  double vt1[m_Nvc], vt2[m_Nvc];
281  double wt1r, wt1i, wt2r, wt2i;
282 
283  int isite = m_arg[itask].isite;
284 
285  double *w2 = &v2[Nvcd * isite];
286  const double *w1 = &v1[Nvcd * isite];
287  const double *u = m_U->ptr(m_Ndf * (isite + idir * m_Nvol));
288 
289  for (int it = 0; it < m_Mt; ++it) {
290  for (int iz = 0; iz < m_Mz; ++iz) {
291  for (int iy = 0; iy < m_Ny; ++iy) {
292  for (int ix = 0; ix < m_Nx - 1; ++ix) {
293  int is = ix + m_Nx * (iy + m_Ny * (iz + m_Nz * it));
294  int iv = Nvcd * is;
295  int in = Nvcd * (is + 1);
296  int ig = m_Ndf * is;
297 
298  for (int ic = 0; ic < m_Nc; ++ic) {
299  vt1[2 * ic] = w1[2 * ic + id1 + in] - w1[2 * ic + 1 + id4 + in];
300  vt1[2 * ic + 1] = w1[2 * ic + 1 + id1 + in] + w1[2 * ic + id4 + in];
301  vt2[2 * ic] = w1[2 * ic + id2 + in] - w1[2 * ic + 1 + id3 + in];
302  vt2[2 * ic + 1] = w1[2 * ic + 1 + id2 + in] + w1[2 * ic + id3 + in];
303  }
304 
305  for (int ic = 0; ic < m_Nc; ++ic) {
306  int ic2 = ic * m_Nvc;
307 
308  wt1r = mult_uv_r(&u[ic2 + ig], vt1, m_Nc);
309  wt1i = mult_uv_i(&u[ic2 + ig], vt1, m_Nc);
310  wt2r = mult_uv_r(&u[ic2 + ig], vt2, m_Nc);
311  wt2i = mult_uv_i(&u[ic2 + ig], vt2, m_Nc);
312 
313  w2[2 * ic + id1 + iv] += wt1r;
314  w2[2 * ic + 1 + id1 + iv] += wt1i;
315  w2[2 * ic + id2 + iv] += wt2r;
316  w2[2 * ic + 1 + id2 + iv] += wt2i;
317  w2[2 * ic + id3 + iv] += wt2i;
318  w2[2 * ic + 1 + id3 + iv] += -wt2r;
319  w2[2 * ic + id4 + iv] += wt1i;
320  w2[2 * ic + 1 + id4 + iv] += -wt1r;
321  }
322  }
323  }
324  }
325  }
326  }
327 
328 
329 //====================================================================
331  double *vcp1, const double *v1)
332  {
333  int Nvc2 = 2 * m_Nvc;
334  int Nvcd = m_Nvc * m_Nd;
335  int Nvcd2 = Nvcd / 2;
336 
337  int id1 = 0;
338  int id2 = m_Nvc;
339  int id3 = m_Nvc * 2;
340  int id4 = m_Nvc * 3;
341 
342  int idir = 0;
343 
344  int isite = m_arg[itask].isite;
345  int isite_cp = m_arg[itask].isite_cpx;
346 
347  double *w2 = &vcp1[Nvcd2 * isite_cp];
348  const double *w1 = &v1[Nvcd * isite];
349  const double *u = m_U->ptr(m_Ndf * (isite + idir * m_Nvol));
350 
351  double vt1[m_Nvc], vt2[m_Nvc];
352 
353  int ix = m_Nx - 1;
354 
355  for (int it = 0; it < m_Mt; ++it) {
356  for (int iz = 0; iz < m_Mz; ++iz) {
357  for (int iy = 0; iy < m_Ny; ++iy) {
358  int is = ix + m_Nx * (iy + m_Ny * (iz + m_Nz * it));
359  int is2 = iy + m_Ny * (iz + m_Mz * it);
360  int in = Nvcd * is;
361  int ig = m_Ndf * is;
362  int ix1 = Nvc2 * is2;
363  int ix2 = ix1 + m_Nvc;
364 
365  for (int ic = 0; ic < m_Nc; ++ic) {
366  vt1[2 * ic] = w1[2 * ic + id1 + in] + w1[2 * ic + 1 + id4 + in];
367  vt1[2 * ic + 1] = w1[2 * ic + 1 + id1 + in] - w1[2 * ic + id4 + in];
368  vt2[2 * ic] = w1[2 * ic + id2 + in] + w1[2 * ic + 1 + id3 + in];
369  vt2[2 * ic + 1] = w1[2 * ic + 1 + id2 + in] - w1[2 * ic + id3 + in];
370  }
371 
372  for (int ic = 0; ic < m_Nc; ++ic) {
373  int icr = 2 * ic;
374  w2[icr + ix1] = mult_udagv_r(&u[icr + ig], vt1, m_Nc);
375  w2[icr + 1 + ix1] = mult_udagv_i(&u[icr + ig], vt1, m_Nc);
376  w2[icr + ix2] = mult_udagv_r(&u[icr + ig], vt2, m_Nc);
377  w2[icr + 1 + ix2] = mult_udagv_i(&u[icr + ig], vt2, m_Nc);
378  }
379  }
380  }
381  }
382  }
383 
384 
385 //====================================================================
387  double *v2, const double *vcp2)
388  {
389  int Nvc2 = 2 * m_Nvc;
390  int Nvcd = m_Nvc * m_Nd;
391  int Nvcd2 = Nvcd / 2;
392 
393  int id1 = 0;
394  int id2 = m_Nvc;
395  int id3 = m_Nvc * 2;
396  int id4 = m_Nvc * 3;
397 
398  int idir = 0;
399  double bc2 = m_boundary2[idir];
400 
401  // double wt1r, wt1i, wt2r, wt2i;
402 
403  int isite = m_arg[itask].isite;
404  int isite_cp = m_arg[itask].isite_cpx;
405 
406  double *w2 = &v2[Nvcd * isite];
407  const double *w1 = &vcp2[Nvcd2 * isite_cp];
408 
409  int ix = 0;
410 
411  for (int it = 0; it < m_Mt; ++it) {
412  for (int iz = 0; iz < m_Mz; ++iz) {
413  for (int iy = 0; iy < m_Ny; ++iy) {
414  int is = ix + m_Nx * (iy + m_Ny * (iz + m_Nz * it));
415  int is2 = iy + m_Ny * (iz + m_Mz * it);
416  int iv = Nvcd * is;
417  int ix1 = Nvc2 * is2;
418  int ix2 = ix1 + m_Nvc;
419 
420  for (int ic = 0; ic < m_Nc; ++ic) {
421  int icr = 2 * ic;
422  int ici = 2 * ic + 1;
423  w2[icr + id1 + iv] += bc2 * w1[icr + ix1];
424  w2[ici + id1 + iv] += bc2 * w1[ici + ix1];
425  w2[icr + id2 + iv] += bc2 * w1[icr + ix2];
426  w2[ici + id2 + iv] += bc2 * w1[ici + ix2];
427  w2[icr + id3 + iv] += -bc2 * w1[ici + ix2];
428  w2[ici + id3 + iv] += +bc2 * w1[icr + ix2];
429  w2[icr + id4 + iv] += -bc2 * w1[ici + ix1];
430  w2[ici + id4 + iv] += +bc2 * w1[icr + ix1];
431  }
432  }
433  }
434  }
435  }
436 
437 
438 //====================================================================
440  double *v2, const double *v1)
441  {
442  int Nvcd = m_Nvc * m_Nd;
443 
444  int id1 = 0;
445  int id2 = m_Nvc;
446  int id3 = m_Nvc * 2;
447  int id4 = m_Nvc * 3;
448 
449  int idir = 0;
450 
451  double vt1[m_Nvc], vt2[m_Nvc];
452  double wt1r, wt1i, wt2r, wt2i;
453 
454  int isite = m_arg[itask].isite;
455 
456  double *w2 = &v2[Nvcd * isite];
457  const double *w1 = &v1[Nvcd * isite];
458  const double *u = m_U->ptr(m_Ndf * (isite + idir * m_Nvol));
459 
460  for (int it = 0; it < m_Mt; ++it) {
461  for (int iz = 0; iz < m_Mz; ++iz) {
462  for (int iy = 0; iy < m_Ny; ++iy) {
463  for (int ix = 1; ix < m_Nx; ++ix) {
464  int is = ix + m_Nx * (iy + m_Ny * (iz + m_Nz * it));
465  int iv = Nvcd * is;
466  int in = Nvcd * (is - 1);
467  int ig = m_Ndf * (is - 1);
468 
469  for (int ic = 0; ic < m_Nc; ++ic) {
470  vt1[2 * ic] = w1[2 * ic + id1 + in] + w1[2 * ic + 1 + id4 + in];
471  vt1[2 * ic + 1] = w1[2 * ic + 1 + id1 + in] - w1[2 * ic + id4 + in];
472  vt2[2 * ic] = w1[2 * ic + id2 + in] + w1[2 * ic + 1 + id3 + in];
473  vt2[2 * ic + 1] = w1[2 * ic + 1 + id2 + in] - w1[2 * ic + id3 + in];
474  }
475 
476  for (int ic = 0; ic < m_Nc; ++ic) {
477  int ic2 = 2 * ic;
478 
479  wt1r = mult_udagv_r(&u[ic2 + ig], vt1, m_Nc);
480  wt1i = mult_udagv_i(&u[ic2 + ig], vt1, m_Nc);
481  wt2r = mult_udagv_r(&u[ic2 + ig], vt2, m_Nc);
482  wt2i = mult_udagv_i(&u[ic2 + ig], vt2, m_Nc);
483 
484  w2[2 * ic + id1 + iv] += wt1r;
485  w2[2 * ic + 1 + id1 + iv] += wt1i;
486  w2[2 * ic + id2 + iv] += wt2r;
487  w2[2 * ic + 1 + id2 + iv] += wt2i;
488  w2[2 * ic + id3 + iv] += -wt2i;
489  w2[2 * ic + 1 + id3 + iv] += +wt2r;
490  w2[2 * ic + id4 + iv] += -wt1i;
491  w2[2 * ic + 1 + id4 + iv] += +wt1r;
492  }
493  }
494  }
495  }
496  }
497  }
498 
499 
500 //====================================================================
502  double *vcp1, const double *v1)
503  {
504  int Nvc2 = 2 * m_Nvc;
505  int Nvcd = m_Nvc * m_Nd;
506  int Nvcd2 = Nvcd / 2;
507 
508  int id1 = 0;
509  int id2 = m_Nvc;
510  int id3 = m_Nvc * 2;
511  int id4 = m_Nvc * 3;
512 
513  int isite = m_arg[itask].isite;
514  int isite_cp = m_arg[itask].isite_cpy;
515 
516  double *w2 = &vcp1[Nvcd2 * isite_cp];
517  const double *w1 = &v1[Nvcd * isite];
518 
519  int idir = 1;
520  double bc2 = m_boundary2[idir];
521 
522  int iy = 0;
523 
524  for (int it = 0; it < m_Mt; ++it) {
525  for (int iz = 0; iz < m_Mz; ++iz) {
526  for (int ix = 0; ix < m_Nx; ++ix) {
527  int is = ix + m_Nx * (iy + m_Ny * (iz + m_Nz * it));
528  int is2 = ix + m_Nx * (iz + m_Mz * it);
529  int in = Nvcd * is;
530  int ix1 = Nvc2 * is2;
531  int ix2 = ix1 + m_Nvc;
532 
533  for (int ic = 0; ic < m_Nc; ++ic) {
534  w2[2 * ic + ix1] = bc2 * (w1[2 * ic + id1 + in] + w1[2 * ic + id4 + in]);
535  w2[2 * ic + 1 + ix1] = bc2 * (w1[2 * ic + 1 + id1 + in] + w1[2 * ic + 1 + id4 + in]);
536  w2[2 * ic + ix2] = bc2 * (w1[2 * ic + id2 + in] - w1[2 * ic + id3 + in]);
537  w2[2 * ic + 1 + ix2] = bc2 * (w1[2 * ic + 1 + id2 + in] - w1[2 * ic + 1 + id3 + in]);
538  }
539  }
540  }
541  }
542  }
543 
544 
545 //====================================================================
547  double *v2, const double *vcp2)
548  {
549  int Nvc2 = 2 * m_Nvc;
550  int Nvcd = m_Nvc * m_Nd;
551  int Nvcd2 = Nvcd / 2;
552 
553  int id1 = 0;
554  int id2 = m_Nvc;
555  int id3 = m_Nvc * 2;
556  int id4 = m_Nvc * 3;
557 
558  int idir = 1;
559 
560  double wt1r, wt1i, wt2r, wt2i;
561 
562  int isite = m_arg[itask].isite;
563  int isite_cp = m_arg[itask].isite_cpy;
564 
565  double *w2 = &v2[Nvcd * isite];
566  const double *w1 = &vcp2[Nvcd2 * isite_cp];
567  const double *u = m_U->ptr(m_Ndf * (isite + idir * m_Nvol));
568 
569  int iy = m_Ny - 1;
570 
571  for (int it = 0; it < m_Mt; ++it) {
572  for (int iz = 0; iz < m_Mz; ++iz) {
573  for (int ix = 0; ix < m_Nx; ++ix) {
574  int is = ix + m_Nx * (iy + m_Ny * (iz + m_Nz * it));
575  int is2 = ix + m_Nx * (iz + m_Mz * it);
576  int iv = Nvcd * is;
577  int ig = m_Ndf * is;
578  int ix1 = Nvc2 * is2;
579  int ix2 = ix1 + m_Nvc;
580 
581  for (int ic = 0; ic < m_Nc; ++ic) {
582  int ic2 = ic * m_Nvc;
583 
584  wt1r = mult_uv_r(&u[ic2 + ig], &w1[ix1], m_Nc);
585  wt1i = mult_uv_i(&u[ic2 + ig], &w1[ix1], m_Nc);
586  wt2r = mult_uv_r(&u[ic2 + ig], &w1[ix2], m_Nc);
587  wt2i = mult_uv_i(&u[ic2 + ig], &w1[ix2], m_Nc);
588 
589  w2[2 * ic + id1 + iv] += wt1r;
590  w2[2 * ic + 1 + id1 + iv] += wt1i;
591  w2[2 * ic + id2 + iv] += wt2r;
592  w2[2 * ic + 1 + id2 + iv] += wt2i;
593  w2[2 * ic + id3 + iv] += -wt2r;
594  w2[2 * ic + 1 + id3 + iv] += -wt2i;
595  w2[2 * ic + id4 + iv] += wt1r;
596  w2[2 * ic + 1 + id4 + iv] += wt1i;
597  }
598  }
599  }
600  }
601  }
602 
603 
604 //====================================================================
606  double *v2, const double *v1)
607  {
608  int Nvcd = m_Nvc * m_Nd;
609 
610  int id1 = 0;
611  int id2 = m_Nvc;
612  int id3 = m_Nvc * 2;
613  int id4 = m_Nvc * 3;
614 
615  int idir = 1;
616 
617  double vt1[m_Nvc], vt2[m_Nvc];
618  double wt1r, wt1i, wt2r, wt2i;
619 
620  int isite = m_arg[itask].isite;
621 
622  double *w2 = &v2[Nvcd * isite];
623  const double *w1 = &v1[Nvcd * isite];
624  const double *u = m_U->ptr(m_Ndf * (isite + idir * m_Nvol));
625 
626  for (int it = 0; it < m_Mt; ++it) {
627  for (int iz = 0; iz < m_Mz; ++iz) {
628  for (int iy = 0; iy < m_Ny - 1; ++iy) {
629  for (int ix = 0; ix < m_Nx; ++ix) {
630  int is = ix + m_Nx * (iy + m_Ny * (iz + m_Nz * it));
631  int iv = Nvcd * is;
632  int in = Nvcd * (is + m_Nx);
633  int ig = m_Ndf * is;
634 
635  for (int ic = 0; ic < m_Nc; ++ic) {
636  vt1[2 * ic] = w1[2 * ic + id1 + in] + w1[2 * ic + id4 + in];
637  vt1[2 * ic + 1] = w1[2 * ic + 1 + id1 + in] + w1[2 * ic + 1 + id4 + in];
638  vt2[2 * ic] = w1[2 * ic + id2 + in] - w1[2 * ic + id3 + in];
639  vt2[2 * ic + 1] = w1[2 * ic + 1 + id2 + in] - w1[2 * ic + 1 + id3 + in];
640  }
641 
642  for (int ic = 0; ic < m_Nc; ++ic) {
643  int ic2 = ic * m_Nvc;
644 
645  wt1r = mult_uv_r(&u[ic2 + ig], vt1, m_Nc);
646  wt1i = mult_uv_i(&u[ic2 + ig], vt1, m_Nc);
647  wt2r = mult_uv_r(&u[ic2 + ig], vt2, m_Nc);
648  wt2i = mult_uv_i(&u[ic2 + ig], vt2, m_Nc);
649 
650  w2[2 * ic + id1 + iv] += wt1r;
651  w2[2 * ic + 1 + id1 + iv] += wt1i;
652  w2[2 * ic + id2 + iv] += wt2r;
653  w2[2 * ic + 1 + id2 + iv] += wt2i;
654  w2[2 * ic + id3 + iv] += -wt2r;
655  w2[2 * ic + 1 + id3 + iv] += -wt2i;
656  w2[2 * ic + id4 + iv] += wt1r;
657  w2[2 * ic + 1 + id4 + iv] += wt1i;
658  }
659  }
660  }
661  }
662  }
663  }
664 
665 
666 //====================================================================
668  double *vcp1, const double *v1)
669  {
670  int Nvc2 = 2 * m_Nvc;
671  int Nvcd = m_Nvc * m_Nd;
672  int Nvcd2 = Nvcd / 2;
673 
674  int id1 = 0;
675  int id2 = m_Nvc;
676  int id3 = m_Nvc * 2;
677  int id4 = m_Nvc * 3;
678 
679  int idir = 1;
680 
681  int isite = m_arg[itask].isite;
682  int isite_cp = m_arg[itask].isite_cpy;
683 
684  double *w2 = &vcp1[Nvcd2 * isite_cp];
685  const double *w1 = &v1[Nvcd * isite];
686  const double *u = m_U->ptr(m_Ndf * (isite + idir * m_Nvol));
687 
688  double vt1[m_Nvc], vt2[m_Nvc];
689 
690  int iy = m_Ny - 1;
691 
692  for (int it = 0; it < m_Mt; ++it) {
693  for (int iz = 0; iz < m_Mz; ++iz) {
694  for (int ix = 0; ix < m_Nx; ++ix) {
695  int is = ix + m_Nx * (iy + m_Ny * (iz + m_Nz * it));
696  int is2 = ix + m_Nx * (iz + m_Mz * it);
697  int in = Nvcd * is;
698  int ig = m_Ndf * is;
699  int ix1 = Nvc2 * is2;
700  int ix2 = ix1 + m_Nvc;
701 
702  for (int ic = 0; ic < m_Nc; ++ic) {
703  vt1[2 * ic] = w1[2 * ic + id1 + in] - w1[2 * ic + id4 + in];
704  vt1[2 * ic + 1] = w1[2 * ic + 1 + id1 + in] - w1[2 * ic + 1 + id4 + in];
705  vt2[2 * ic] = w1[2 * ic + id2 + in] + w1[2 * ic + id3 + in];
706  vt2[2 * ic + 1] = w1[2 * ic + 1 + id2 + in] + w1[2 * ic + 1 + id3 + in];
707  }
708 
709  for (int ic = 0; ic < m_Nc; ++ic) {
710  int icr = 2 * ic;
711  w2[icr + ix1] = mult_udagv_r(&u[icr + ig], vt1, m_Nc);
712  w2[icr + 1 + ix1] = mult_udagv_i(&u[icr + ig], vt1, m_Nc);
713  w2[icr + ix2] = mult_udagv_r(&u[icr + ig], vt2, m_Nc);
714  w2[icr + 1 + ix2] = mult_udagv_i(&u[icr + ig], vt2, m_Nc);
715  }
716  }
717  }
718  }
719  }
720 
721 
722 //====================================================================
724  double *v2, const double *vcp2)
725  {
726  int Nvc2 = 2 * m_Nvc;
727  int Nvcd = m_Nvc * m_Nd;
728  int Nvcd2 = Nvcd / 2;
729 
730  int id1 = 0;
731  int id2 = m_Nvc;
732  int id3 = m_Nvc * 2;
733  int id4 = m_Nvc * 3;
734 
735  int idir = 1;
736  double bc2 = m_boundary2[idir];
737 
738  // double wt1r, wt1i, wt2r, wt2i;
739 
740  int isite = m_arg[itask].isite;
741  int isite_cp = m_arg[itask].isite_cpy;
742 
743  double *w2 = &v2[Nvcd * isite];
744  const double *w1 = &vcp2[Nvcd2 * isite_cp];
745 
746  int iy = 0;
747 
748  for (int it = 0; it < m_Mt; ++it) {
749  for (int iz = 0; iz < m_Mz; ++iz) {
750  for (int ix = 0; ix < m_Nx; ++ix) {
751  int is = ix + m_Nx * (iy + m_Ny * (iz + m_Nz * it));
752  int is2 = ix + m_Nx * (iz + m_Mz * it);
753  int iv = Nvcd * is;
754  int ix1 = Nvc2 * is2;
755  int ix2 = ix1 + m_Nvc;
756 
757  for (int ic = 0; ic < m_Nc; ++ic) {
758  int icr = 2 * ic;
759  int ici = 2 * ic + 1;
760  w2[icr + id1 + iv] += bc2 * w1[icr + ix1];
761  w2[ici + id1 + iv] += bc2 * w1[ici + ix1];
762  w2[icr + id2 + iv] += bc2 * w1[icr + ix2];
763  w2[ici + id2 + iv] += bc2 * w1[ici + ix2];
764  w2[icr + id3 + iv] += bc2 * w1[icr + ix2];
765  w2[ici + id3 + iv] += bc2 * w1[ici + ix2];
766  w2[icr + id4 + iv] += -bc2 * w1[icr + ix1];
767  w2[ici + id4 + iv] += -bc2 * w1[ici + ix1];
768  }
769  }
770  }
771  }
772  }
773 
774 
775 //====================================================================
777  double *v2, const double *v1)
778  {
779  int Nvcd = m_Nvc * m_Nd;
780 
781  int id1 = 0;
782  int id2 = m_Nvc;
783  int id3 = m_Nvc * 2;
784  int id4 = m_Nvc * 3;
785 
786  int idir = 1;
787 
788  double vt1[m_Nvc], vt2[m_Nvc];
789  double wt1r, wt1i, wt2r, wt2i;
790 
791  int isite = m_arg[itask].isite;
792 
793  double *w2 = &v2[Nvcd * isite];
794  const double *w1 = &v1[Nvcd * isite];
795  const double *u = m_U->ptr(m_Ndf * (isite + idir * m_Nvol));
796 
797  for (int it = 0; it < m_Mt; ++it) {
798  for (int iz = 0; iz < m_Mz; ++iz) {
799  for (int iy = 1; iy < m_Ny; ++iy) {
800  for (int ix = 0; ix < m_Nx; ++ix) {
801  int is = ix + m_Nx * (iy + m_Ny * (iz + m_Nz * it));
802  int iv = Nvcd * is;
803  int in = Nvcd * (is - m_Nx);
804  int ig = m_Ndf * (is - m_Nx);
805 
806  for (int ic = 0; ic < m_Nc; ++ic) {
807  vt1[2 * ic] = w1[2 * ic + id1 + in] - w1[2 * ic + id4 + in];
808  vt1[2 * ic + 1] = w1[2 * ic + 1 + id1 + in] - w1[2 * ic + 1 + id4 + in];
809  vt2[2 * ic] = w1[2 * ic + id2 + in] + w1[2 * ic + id3 + in];
810  vt2[2 * ic + 1] = w1[2 * ic + 1 + id2 + in] + w1[2 * ic + 1 + id3 + in];
811  }
812 
813  for (int ic = 0; ic < m_Nc; ++ic) {
814  int ic2 = 2 * ic;
815  wt1r = mult_udagv_r(&u[ic2 + ig], vt1, m_Nc);
816  wt1i = mult_udagv_i(&u[ic2 + ig], vt1, m_Nc);
817  wt2r = mult_udagv_r(&u[ic2 + ig], vt2, m_Nc);
818  wt2i = mult_udagv_i(&u[ic2 + ig], vt2, m_Nc);
819 
820  w2[ic2 + id1 + iv] += wt1r;
821  w2[ic2 + 1 + id1 + iv] += wt1i;
822  w2[ic2 + id2 + iv] += wt2r;
823  w2[ic2 + 1 + id2 + iv] += wt2i;
824  w2[ic2 + id3 + iv] += wt2r;
825  w2[ic2 + 1 + id3 + iv] += wt2i;
826  w2[ic2 + id4 + iv] += -wt1r;
827  w2[ic2 + 1 + id4 + iv] += -wt1i;
828  }
829  }
830  }
831  }
832  }
833  }
834 
835 
836 //====================================================================
838  double *vcp1, const double *v1)
839  {
840  int Nvc2 = 2 * m_Nvc;
841  int Nvcd = m_Nvc * m_Nd;
842  int Nvcd2 = Nvcd / 2;
843 
844  int id1 = 0;
845  int id2 = m_Nvc;
846  int id3 = m_Nvc * 2;
847  int id4 = m_Nvc * 3;
848 
849  int isite = m_arg[itask].isite;
850  int isite_cp = m_arg[itask].isite_cpz;
851 
852  double *w2 = &vcp1[Nvcd2 * isite_cp];
853  const double *w1 = &v1[Nvcd * isite];
854 
855  int idir = 2;
856  double bc2 = m_boundary2[idir];
857 
858  if (m_arg[itask].kz0 == 1) {
859  int Nxy = m_Nx * m_Ny;
860  int iz = 0;
861  for (int it = 0; it < m_Mt; ++it) {
862  for (int ixy = 0; ixy < Nxy; ++ixy) {
863  int is = ixy + Nxy * (iz + m_Nz * it);
864  int is2 = ixy + Nxy * it;
865 
866  int in = Nvcd * is;
867  int ix1 = Nvc2 * is2;
868  int ix2 = ix1 + m_Nvc;
869 
870  for (int ic = 0; ic < m_Nc; ++ic) {
871  w2[2 * ic + ix1] = bc2 * (w1[2 * ic + id1 + in] - w1[2 * ic + 1 + id3 + in]);
872  w2[2 * ic + 1 + ix1] = bc2 * (w1[2 * ic + 1 + id1 + in] + w1[2 * ic + id3 + in]);
873  w2[2 * ic + ix2] = bc2 * (w1[2 * ic + id2 + in] + w1[2 * ic + 1 + id4 + in]);
874  w2[2 * ic + 1 + ix2] = bc2 * (w1[2 * ic + 1 + id2 + in] - w1[2 * ic + id4 + in]);
875  }
876  }
877  }
878  }
879  }
880 
881 
882 //====================================================================
884  double *v2, const double *vcp2)
885  {
886  int Nvc2 = 2 * m_Nvc;
887  int Nvcd = m_Nvc * m_Nd;
888  int Nvcd2 = Nvcd / 2;
889 
890  int id1 = 0;
891  int id2 = m_Nvc;
892  int id3 = m_Nvc * 2;
893  int id4 = m_Nvc * 3;
894 
895  int idir = 2;
896 
897  double wt1r, wt1i, wt2r, wt2i;
898 
899  int isite = m_arg[itask].isite;
900  int isite_cp = m_arg[itask].isite_cpz;
901 
902  double *w2 = &v2[Nvcd * isite];
903  const double *w1 = &vcp2[Nvcd2 * isite_cp];
904  const double *u = m_U->ptr(m_Ndf * (isite + idir * m_Nvol));
905 
906  if (m_arg[itask].kz1 == 1) {
907  int Nxy = m_Nx * m_Ny;
908  int iz = m_Mz - 1;
909  for (int it = 0; it < m_Mt; ++it) {
910  for (int ixy = 0; ixy < Nxy; ++ixy) {
911  int is = ixy + Nxy * (iz + m_Nz * it);
912  int is2 = ixy + Nxy * it;
913  int iv = Nvcd * is;
914  int ig = m_Ndf * is;
915  int ix1 = Nvc2 * is2;
916  int ix2 = ix1 + m_Nvc;
917 
918  for (int ic = 0; ic < m_Nc; ++ic) {
919  int ic2 = ic * m_Nvc;
920 
921  wt1r = mult_uv_r(&u[ic2 + ig], &w1[ix1], m_Nc);
922  wt1i = mult_uv_i(&u[ic2 + ig], &w1[ix1], m_Nc);
923  wt2r = mult_uv_r(&u[ic2 + ig], &w1[ix2], m_Nc);
924  wt2i = mult_uv_i(&u[ic2 + ig], &w1[ix2], m_Nc);
925 
926  w2[2 * ic + id1 + iv] += wt1r;
927  w2[2 * ic + 1 + id1 + iv] += wt1i;
928  w2[2 * ic + id2 + iv] += wt2r;
929  w2[2 * ic + 1 + id2 + iv] += wt2i;
930  w2[2 * ic + id3 + iv] += wt1i;
931  w2[2 * ic + 1 + id3 + iv] += -wt1r;
932  w2[2 * ic + id4 + iv] += -wt2i;
933  w2[2 * ic + 1 + id4 + iv] += wt2r;
934  }
935  }
936  }
937  }
938  }
939 
940 
941 //====================================================================
943  double *v2, const double *v1)
944  {
945  int Nvcd = m_Nvc * m_Nd;
946 
947  int id1 = 0;
948  int id2 = m_Nvc;
949  int id3 = m_Nvc * 2;
950  int id4 = m_Nvc * 3;
951 
952  int idir = 2;
953 
954  double vt1[m_Nvc], vt2[m_Nvc];
955  double wt1r, wt1i, wt2r, wt2i;
956 
957  int isite = m_arg[itask].isite;
958 
959  double *w2 = &v2[Nvcd * isite];
960  const double *w1 = &v1[Nvcd * isite];
961  const double *u = m_U->ptr(m_Ndf * (isite + idir * m_Nvol));
962 
963  int kz1 = m_arg[itask].kz1;
964  int Nxy = m_Nx * m_Ny;
965 
966  for (int it = 0; it < m_Mt; ++it) {
967  for (int iz = 0; iz < m_Mz - kz1; ++iz) {
968  for (int ixy = 0; ixy < Nxy; ++ixy) {
969  int is = ixy + Nxy * (iz + m_Nz * it);
970  int iv = Nvcd * is;
971  int in = Nvcd * (is + Nxy);
972  int ig = m_Ndf * is;
973 
974  for (int ic = 0; ic < m_Nc; ++ic) {
975  vt1[2 * ic] = w1[2 * ic + id1 + in] - w1[2 * ic + 1 + id3 + in];
976  vt1[2 * ic + 1] = w1[2 * ic + 1 + id1 + in] + w1[2 * ic + id3 + in];
977  vt2[2 * ic] = w1[2 * ic + id2 + in] + w1[2 * ic + 1 + id4 + in];
978  vt2[2 * ic + 1] = w1[2 * ic + 1 + id2 + in] - w1[2 * ic + id4 + in];
979  }
980 
981  for (int ic = 0; ic < m_Nc; ++ic) {
982  int ic2 = ic * m_Nvc;
983 
984  wt1r = mult_uv_r(&u[ic2 + ig], vt1, m_Nc);
985  wt1i = mult_uv_i(&u[ic2 + ig], vt1, m_Nc);
986  wt2r = mult_uv_r(&u[ic2 + ig], vt2, m_Nc);
987  wt2i = mult_uv_i(&u[ic2 + ig], vt2, m_Nc);
988 
989  w2[2 * ic + id1 + iv] += wt1r;
990  w2[2 * ic + 1 + id1 + iv] += wt1i;
991  w2[2 * ic + id2 + iv] += wt2r;
992  w2[2 * ic + 1 + id2 + iv] += wt2i;
993  w2[2 * ic + id3 + iv] += wt1i;
994  w2[2 * ic + 1 + id3 + iv] += -wt1r;
995  w2[2 * ic + id4 + iv] += -wt2i;
996  w2[2 * ic + 1 + id4 + iv] += wt2r;
997  }
998  }
999  }
1000  }
1001  }
1002 
1003 
1004 //====================================================================
1006  double *vcp1, const double *v1)
1007  {
1008  int Nvc2 = 2 * m_Nvc;
1009  int Nvcd = m_Nvc * m_Nd;
1010  int Nvcd2 = Nvcd / 2;
1011 
1012  int id1 = 0;
1013  int id2 = m_Nvc;
1014  int id3 = m_Nvc * 2;
1015  int id4 = m_Nvc * 3;
1016 
1017  int idir = 2;
1018 
1019  int isite = m_arg[itask].isite;
1020  int isite_cp = m_arg[itask].isite_cpz;
1021 
1022  double *w2 = &vcp1[Nvcd2 * isite_cp];
1023  const double *w1 = &v1[Nvcd * isite];
1024  const double *u = m_U->ptr(m_Ndf * (isite + idir * m_Nvol));
1025 
1026  double vt1[m_Nvc], vt2[m_Nvc];
1027 
1028  if (m_arg[itask].kz1 == 1) {
1029  int Nxy = m_Nx * m_Ny;
1030  int iz = m_Mz - 1;
1031  for (int it = 0; it < m_Mt; ++it) {
1032  for (int ixy = 0; ixy < Nxy; ++ixy) {
1033  int is = ixy + Nxy * (iz + m_Nz * it);
1034  int is2 = ixy + Nxy * it;
1035  int in = Nvcd * is;
1036  int ig = m_Ndf * is;
1037  int ix1 = Nvc2 * is2;
1038  int ix2 = ix1 + m_Nvc;
1039 
1040  for (int ic = 0; ic < m_Nc; ++ic) {
1041  vt1[2 * ic] = w1[2 * ic + id1 + in] + w1[2 * ic + 1 + id3 + in];
1042  vt1[2 * ic + 1] = w1[2 * ic + 1 + id1 + in] - w1[2 * ic + id3 + in];
1043  vt2[2 * ic] = w1[2 * ic + id2 + in] - w1[2 * ic + 1 + id4 + in];
1044  vt2[2 * ic + 1] = w1[2 * ic + 1 + id2 + in] + w1[2 * ic + id4 + in];
1045  }
1046 
1047  for (int ic = 0; ic < m_Nc; ++ic) {
1048  int icr = 2 * ic;
1049  w2[icr + ix1] = mult_udagv_r(&u[icr + ig], vt1, m_Nc);
1050  w2[icr + 1 + ix1] = mult_udagv_i(&u[icr + ig], vt1, m_Nc);
1051  w2[icr + ix2] = mult_udagv_r(&u[icr + ig], vt2, m_Nc);
1052  w2[icr + 1 + ix2] = mult_udagv_i(&u[icr + ig], vt2, m_Nc);
1053  }
1054  }
1055  }
1056  }
1057  }
1058 
1059 
1060 //====================================================================
1062  double *v2, const double *vcp2)
1063  {
1064  int Nvc2 = 2 * m_Nvc;
1065  int Nvcd = m_Nvc * m_Nd;
1066  int Nvcd2 = Nvcd / 2;
1067 
1068  int id1 = 0;
1069  int id2 = m_Nvc;
1070  int id3 = m_Nvc * 2;
1071  int id4 = m_Nvc * 3;
1072 
1073  int idir = 2;
1074  double bc2 = m_boundary2[idir];
1075 
1076  // double wt1r, wt1i, wt2r, wt2i;
1077 
1078  int isite = m_arg[itask].isite;
1079  int isite_cp = m_arg[itask].isite_cpz;
1080 
1081  double *w2 = &v2[Nvcd * isite];
1082  const double *w1 = &vcp2[Nvcd2 * isite_cp];
1083 
1084  if (m_arg[itask].kz0 == 1) {
1085  int Nxy = m_Nx * m_Ny;
1086 
1087  int iz = 0;
1088  for (int it = 0; it < m_Mt; ++it) {
1089  for (int ixy = 0; ixy < Nxy; ++ixy) {
1090  int is = ixy + Nxy * (iz + m_Nz * it);
1091  int is2 = ixy + Nxy * it;
1092  int iv = Nvcd * is;
1093  int ix1 = Nvc2 * is2;
1094  int ix2 = ix1 + m_Nvc;
1095 
1096  for (int ic = 0; ic < m_Nc; ++ic) {
1097  int icr = 2 * ic;
1098  int ici = 2 * ic + 1;
1099  w2[icr + id1 + iv] += bc2 * w1[icr + ix1];
1100  w2[ici + id1 + iv] += bc2 * w1[ici + ix1];
1101  w2[icr + id2 + iv] += bc2 * w1[icr + ix2];
1102  w2[ici + id2 + iv] += bc2 * w1[ici + ix2];
1103  w2[icr + id3 + iv] += -bc2 * w1[ici + ix1];
1104  w2[ici + id3 + iv] += bc2 * w1[icr + ix1];
1105  w2[icr + id4 + iv] += bc2 * w1[ici + ix2];
1106  w2[ici + id4 + iv] += -bc2 * w1[icr + ix2];
1107  }
1108  }
1109  }
1110  }
1111  }
1112 
1113 
1114 //====================================================================
1116  double *v2, const double *v1)
1117  {
1118  int Nvcd = m_Nvc * m_Nd;
1119 
1120  int id1 = 0;
1121  int id2 = m_Nvc;
1122  int id3 = m_Nvc * 2;
1123  int id4 = m_Nvc * 3;
1124 
1125  int idir = 2;
1126 
1127  double vt1[m_Nvc], vt2[m_Nvc];
1128  double wt1r, wt1i, wt2r, wt2i;
1129 
1130  int isite = m_arg[itask].isite;
1131 
1132  double *w2 = &v2[Nvcd * isite];
1133  const double *w1 = &v1[Nvcd * isite];
1134  const double *u = m_U->ptr(m_Ndf * (isite + idir * m_Nvol));
1135 
1136  int kz0 = m_arg[itask].kz0;
1137  int Nxy = m_Nx * m_Ny;
1138 
1139  for (int it = 0; it < m_Mt; ++it) {
1140  for (int iz = kz0; iz < m_Mz; ++iz) {
1141  for (int ixy = 0; ixy < Nxy; ++ixy) {
1142  int is = ixy + Nxy * (iz + m_Nz * it);
1143  int iv = Nvcd * is;
1144  int in = Nvcd * (is - Nxy);
1145  int ig = m_Ndf * (is - Nxy);
1146 
1147  for (int ic = 0; ic < m_Nc; ++ic) {
1148  vt1[2 * ic] = w1[2 * ic + id1 + in] + w1[2 * ic + 1 + id3 + in];
1149  vt1[2 * ic + 1] = w1[2 * ic + 1 + id1 + in] - w1[2 * ic + id3 + in];
1150  vt2[2 * ic] = w1[2 * ic + id2 + in] - w1[2 * ic + 1 + id4 + in];
1151  vt2[2 * ic + 1] = w1[2 * ic + 1 + id2 + in] + w1[2 * ic + id4 + in];
1152  }
1153 
1154  for (int ic = 0; ic < m_Nc; ++ic) {
1155  int ic2 = 2 * ic;
1156  wt1r = mult_udagv_r(&u[ic2 + ig], vt1, m_Nc);
1157  wt1i = mult_udagv_i(&u[ic2 + ig], vt1, m_Nc);
1158  wt2r = mult_udagv_r(&u[ic2 + ig], vt2, m_Nc);
1159  wt2i = mult_udagv_i(&u[ic2 + ig], vt2, m_Nc);
1160 
1161  w2[ic2 + id1 + iv] += wt1r;
1162  w2[ic2 + 1 + id1 + iv] += wt1i;
1163  w2[ic2 + id2 + iv] += wt2r;
1164  w2[ic2 + 1 + id2 + iv] += wt2i;
1165  w2[ic2 + id3 + iv] += -wt1i;
1166  w2[ic2 + 1 + id3 + iv] += wt1r;
1167  w2[ic2 + id4 + iv] += wt2i;
1168  w2[ic2 + 1 + id4 + iv] += -wt2r;
1169  }
1170  }
1171  }
1172  }
1173  }
1174 
1175 
1176 //====================================================================
1178  double *vcp1, const double *v1)
1179  {
1180  int Nvc2 = 2 * m_Nvc;
1181  int Nvcd = m_Nvc * m_Nd;
1182  int Nvcd2 = Nvcd / 2;
1183 
1184  // int id1 = 0;
1185  // int id2 = m_Nvc;
1186  int id3 = m_Nvc * 2;
1187  int id4 = m_Nvc * 3;
1188 
1189  int isite = m_arg[itask].isite;
1190  int isite_cp = m_arg[itask].isite_cpt;
1191 
1192  double *w2 = &vcp1[Nvcd2 * isite_cp];
1193  const double *w1 = &v1[Nvcd * isite];
1194 
1195  int idir = 3;
1196  double bc2 = m_boundary2[idir];
1197 
1198  if (m_arg[itask].kt0 == 1) {
1199  int Nxy = m_Nx * m_Ny;
1200  int it = 0;
1201  for (int iz = 0; iz < m_Mz; ++iz) {
1202  for (int ixy = 0; ixy < Nxy; ++ixy) {
1203  int is = ixy + Nxy * (iz + m_Nz * it);
1204  int is2 = ixy + Nxy * iz;
1205 
1206  int in = Nvcd * is;
1207  int ix1 = Nvc2 * is2;
1208  int ix2 = ix1 + m_Nvc;
1209 
1210  for (int ic = 0; ic < m_Nc; ++ic) {
1211  w2[2 * ic + ix1] = 2.0 * bc2 * w1[2 * ic + id3 + in];
1212  w2[2 * ic + 1 + ix1] = 2.0 * bc2 * w1[2 * ic + 1 + id3 + in];
1213  w2[2 * ic + ix2] = 2.0 * bc2 * w1[2 * ic + id4 + in];
1214  w2[2 * ic + 1 + ix2] = 2.0 * bc2 * w1[2 * ic + 1 + id4 + in];
1215  }
1216  }
1217  }
1218  }
1219  }
1220 
1221 
1222 //====================================================================
1224  double *v2, const double *vcp2)
1225  {
1226  int Nvc2 = 2 * m_Nvc;
1227  int Nvcd = m_Nvc * m_Nd;
1228  int Nvcd2 = Nvcd / 2;
1229 
1230  // int id1 = 0;
1231  // int id2 = m_Nvc;
1232  int id3 = m_Nvc * 2;
1233  int id4 = m_Nvc * 3;
1234 
1235  int idir = 3;
1236 
1237  double wt1r, wt1i, wt2r, wt2i;
1238 
1239  int isite = m_arg[itask].isite;
1240  int isite_cp = m_arg[itask].isite_cpt;
1241 
1242  double *w2 = &v2[Nvcd * isite];
1243  const double *w1 = &vcp2[Nvcd2 * isite_cp];
1244  const double *u = m_U->ptr(m_Ndf * (isite + idir * m_Nvol));
1245 
1246  if (m_arg[itask].kt1 == 1) {
1247  int Nxy = m_Nx * m_Ny;
1248  int it = m_Mt - 1;
1249  for (int iz = 0; iz < m_Mz; ++iz) {
1250  for (int ixy = 0; ixy < Nxy; ++ixy) {
1251  int is = ixy + Nxy * (iz + m_Nz * it);
1252  int is2 = ixy + Nxy * iz;
1253  int iv = Nvcd * is;
1254  int ig = m_Ndf * is;
1255  int ix1 = Nvc2 * is2;
1256  int ix2 = ix1 + m_Nvc;
1257 
1258  for (int ic = 0; ic < m_Nc; ++ic) {
1259  int ic2 = ic * m_Nvc;
1260 
1261  wt1r = mult_uv_r(&u[ic2 + ig], &w1[ix1], m_Nc);
1262  wt1i = mult_uv_i(&u[ic2 + ig], &w1[ix1], m_Nc);
1263  wt2r = mult_uv_r(&u[ic2 + ig], &w1[ix2], m_Nc);
1264  wt2i = mult_uv_i(&u[ic2 + ig], &w1[ix2], m_Nc);
1265 
1266  w2[2 * ic + id3 + iv] += wt1r;
1267  w2[2 * ic + 1 + id3 + iv] += wt1i;
1268  w2[2 * ic + id4 + iv] += wt2r;
1269  w2[2 * ic + 1 + id4 + iv] += wt2i;
1270  }
1271  }
1272  }
1273  }
1274  }
1275 
1276 
1277 //====================================================================
1279  double *v2, const double *v1)
1280  {
1281  int Nvcd = m_Nvc * m_Nd;
1282 
1283  // int id1 = 0;
1284  // int id2 = m_Nvc;
1285  int id3 = m_Nvc * 2;
1286  int id4 = m_Nvc * 3;
1287 
1288  int idir = 3;
1289 
1290  double vt1[m_Nvc], vt2[m_Nvc];
1291  double wt1r, wt1i, wt2r, wt2i;
1292 
1293  int isite = m_arg[itask].isite;
1294 
1295  double *w2 = &v2[Nvcd * isite];
1296  const double *w1 = &v1[Nvcd * isite];
1297  const double *u = m_U->ptr(m_Ndf * (isite + idir * m_Nvol));
1298 
1299  int kt1 = m_arg[itask].kt1;
1300  int Nxy = m_Nx * m_Ny;
1301  int Nxyz = Nxy * m_Nz;
1302 
1303  for (int it = 0; it < m_Mt - kt1; ++it) {
1304  for (int iz = 0; iz < m_Mz; ++iz) {
1305  for (int ixy = 0; ixy < Nxy; ++ixy) {
1306  int is = ixy + Nxy * (iz + m_Nz * it);
1307  int iv = Nvcd * is;
1308  int in = Nvcd * (is + Nxyz);
1309  int ig = m_Ndf * is;
1310 
1311  for (int ic = 0; ic < m_Nc; ++ic) {
1312  vt1[2 * ic] = 2.0 * w1[2 * ic + id3 + in];
1313  vt1[2 * ic + 1] = 2.0 * w1[2 * ic + 1 + id3 + in];
1314  vt2[2 * ic] = 2.0 * w1[2 * ic + id4 + in];
1315  vt2[2 * ic + 1] = 2.0 * w1[2 * ic + 1 + id4 + in];
1316  }
1317 
1318  for (int ic = 0; ic < m_Nc; ++ic) {
1319  int ic2 = ic * m_Nvc;
1320 
1321  wt1r = mult_uv_r(&u[ic2 + ig], vt1, m_Nc);
1322  wt1i = mult_uv_i(&u[ic2 + ig], vt1, m_Nc);
1323  wt2r = mult_uv_r(&u[ic2 + ig], vt2, m_Nc);
1324  wt2i = mult_uv_i(&u[ic2 + ig], vt2, m_Nc);
1325 
1326  w2[2 * ic + id3 + iv] += wt1r;
1327  w2[2 * ic + 1 + id3 + iv] += wt1i;
1328  w2[2 * ic + id4 + iv] += wt2r;
1329  w2[2 * ic + 1 + id4 + iv] += wt2i;
1330  }
1331  }
1332  }
1333  }
1334  }
1335 
1336 
1337 //====================================================================
1339  double *vcp1, const double *v1)
1340  {
1341  int Nvc2 = 2 * m_Nvc;
1342  int Nvcd = m_Nvc * m_Nd;
1343  int Nvcd2 = Nvcd / 2;
1344 
1345  int id1 = 0;
1346  int id2 = m_Nvc;
1347  // int id3 = m_Nvc * 2;
1348  // int id4 = m_Nvc * 3;
1349 
1350  int idir = 3;
1351 
1352  int isite = m_arg[itask].isite;
1353  int isite_cp = m_arg[itask].isite_cpt;
1354 
1355  double *w2 = &vcp1[Nvcd2 * isite_cp];
1356  const double *w1 = &v1[Nvcd * isite];
1357  const double *u = m_U->ptr(m_Ndf * (isite + idir * m_Nvol));
1358 
1359  double vt1[m_Nvc], vt2[m_Nvc];
1360 
1361  if (m_arg[itask].kt1 == 1) {
1362  int Nxy = m_Nx * m_Ny;
1363  int it = m_Mt - 1;
1364  for (int iz = 0; iz < m_Mz; ++iz) {
1365  for (int ixy = 0; ixy < Nxy; ++ixy) {
1366  int is = ixy + Nxy * (iz + m_Nz * it);
1367  int is2 = ixy + Nxy * iz;
1368  int in = Nvcd * is;
1369  int ig = m_Ndf * is;
1370  int ix1 = Nvc2 * is2;
1371  int ix2 = ix1 + m_Nvc;
1372 
1373  for (int ic = 0; ic < m_Nc; ++ic) {
1374  vt1[2 * ic] = 2.0 * w1[2 * ic + id1 + in];
1375  vt1[2 * ic + 1] = 2.0 * w1[2 * ic + 1 + id1 + in];
1376  vt2[2 * ic] = 2.0 * w1[2 * ic + id2 + in];
1377  vt2[2 * ic + 1] = 2.0 * w1[2 * ic + 1 + id2 + in];
1378  }
1379 
1380  for (int ic = 0; ic < m_Nc; ++ic) {
1381  int icr = 2 * ic;
1382  w2[icr + ix1] = mult_udagv_r(&u[icr + ig], vt1, m_Nc);
1383  w2[icr + 1 + ix1] = mult_udagv_i(&u[icr + ig], vt1, m_Nc);
1384  w2[icr + ix2] = mult_udagv_r(&u[icr + ig], vt2, m_Nc);
1385  w2[icr + 1 + ix2] = mult_udagv_i(&u[icr + ig], vt2, m_Nc);
1386  }
1387  }
1388  }
1389  }
1390  }
1391 
1392 
1393 //====================================================================
1395  double *v2, const double *vcp2)
1396  {
1397  int Nvc2 = 2 * m_Nvc;
1398  int Nvcd = m_Nvc * m_Nd;
1399  int Nvcd2 = Nvcd / 2;
1400 
1401  int id1 = 0;
1402  int id2 = m_Nvc;
1403  // int id3 = m_Nvc * 2;
1404  // int id4 = m_Nvc * 3;
1405 
1406  int idir = 3;
1407  double bc2 = m_boundary2[idir];
1408 
1409  // double wt1r, wt1i, wt2r, wt2i;
1410 
1411  int isite = m_arg[itask].isite;
1412  int isite_cp = m_arg[itask].isite_cpt;
1413 
1414  double *w2 = &v2[Nvcd * isite];
1415  const double *w1 = &vcp2[Nvcd2 * isite_cp];
1416 
1417  if (m_arg[itask].kt0 == 1) {
1418  int Nxy = m_Nx * m_Ny;
1419  int it = 0;
1420  for (int iz = 0; iz < m_Mz; ++iz) {
1421  for (int ixy = 0; ixy < Nxy; ++ixy) {
1422  int is = ixy + Nxy * (iz + m_Nz * it);
1423  int is2 = ixy + Nxy * iz;
1424  int iv = Nvcd * is;
1425  int ix1 = Nvc2 * is2;
1426  int ix2 = ix1 + m_Nvc;
1427 
1428  for (int ic = 0; ic < m_Nc; ++ic) {
1429  int icr = 2 * ic;
1430  int ici = 2 * ic + 1;
1431  w2[icr + id1 + iv] += bc2 * w1[icr + ix1];
1432  w2[ici + id1 + iv] += bc2 * w1[ici + ix1];
1433  w2[icr + id2 + iv] += bc2 * w1[icr + ix2];
1434  w2[ici + id2 + iv] += bc2 * w1[ici + ix2];
1435  }
1436  }
1437  }
1438  }
1439  }
1440 
1441 
1442 //====================================================================
1444  double *v2, const double *v1)
1445  {
1446  int Nvcd = m_Nvc * m_Nd;
1447 
1448  int id1 = 0;
1449  int id2 = m_Nvc;
1450  // int id3 = m_Nvc * 2;
1451  // int id4 = m_Nvc * 3;
1452 
1453  int idir = 3;
1454 
1455  double vt1[m_Nvc], vt2[m_Nvc];
1456  double wt1r, wt1i, wt2r, wt2i;
1457 
1458  int isite = m_arg[itask].isite;
1459 
1460  double *w2 = &v2[Nvcd * isite];
1461  const double *w1 = &v1[Nvcd * isite];
1462  const double *u = m_U->ptr(m_Ndf * (isite + idir * m_Nvol));
1463 
1464  int kt0 = m_arg[itask].kt0;
1465  int Nxy = m_Nx * m_Ny;
1466  int Nxyz = Nxy * m_Nz;
1467 
1468  for (int it = kt0; it < m_Mt; ++it) {
1469  for (int iz = 0; iz < m_Mz; ++iz) {
1470  for (int ixy = 0; ixy < Nxy; ++ixy) {
1471  int is = ixy + Nxy * (iz + m_Nz * it);
1472  int iv = Nvcd * is;
1473  int in = Nvcd * (is - Nxyz);
1474  int ig = m_Ndf * (is - Nxyz);
1475 
1476  for (int ic = 0; ic < m_Nc; ++ic) {
1477  vt1[2 * ic] = 2.0 * w1[2 * ic + id1 + in];
1478  vt1[2 * ic + 1] = 2.0 * w1[2 * ic + 1 + id1 + in];
1479  vt2[2 * ic] = 2.0 * w1[2 * ic + id2 + in];
1480  vt2[2 * ic + 1] = 2.0 * w1[2 * ic + 1 + id2 + in];
1481  }
1482 
1483  for (int ic = 0; ic < m_Nc; ++ic) {
1484  int ic2 = 2 * ic;
1485  wt1r = mult_udagv_r(&u[ic2 + ig], vt1, m_Nc);
1486  wt1i = mult_udagv_i(&u[ic2 + ig], vt1, m_Nc);
1487  wt2r = mult_udagv_r(&u[ic2 + ig], vt2, m_Nc);
1488  wt2i = mult_udagv_i(&u[ic2 + ig], vt2, m_Nc);
1489 
1490  w2[ic2 + id1 + iv] += wt1r;
1491  w2[ic2 + 1 + id1 + iv] += wt1i;
1492  w2[ic2 + id2 + iv] += wt2r;
1493  w2[ic2 + 1 + id2 + iv] += wt2i;
1494  }
1495  }
1496  }
1497  }
1498  }
1499 
1500 
1501 //====================================================================
1503  double *vcp1, const double *v1)
1504  {
1505  int Nvc2 = 2 * m_Nvc;
1506  int Nvcd = m_Nvc * m_Nd;
1507  int Nvcd2 = Nvcd / 2;
1508 
1509  int id1 = 0;
1510  int id2 = m_Nvc;
1511  int id3 = m_Nvc * 2;
1512  int id4 = m_Nvc * 3;
1513 
1514  int isite = m_arg[itask].isite;
1515  int isite_cp = m_arg[itask].isite_cpt;
1516 
1517  double *w2 = &vcp1[Nvcd2 * isite_cp];
1518  const double *w1 = &v1[Nvcd * isite];
1519 
1520  int idir = 3;
1521  double bc2 = m_boundary2[idir];
1522 
1523  if (m_arg[itask].kt0 == 1) {
1524  int Nxy = m_Nx * m_Ny;
1525  int it = 0;
1526  for (int iz = 0; iz < m_Mz; ++iz) {
1527  for (int ixy = 0; ixy < Nxy; ++ixy) {
1528  int is = ixy + Nxy * (iz + m_Nz * it);
1529  int is2 = ixy + Nxy * iz;
1530 
1531  int in = Nvcd * is;
1532  int ix1 = Nvc2 * is2;
1533  int ix2 = ix1 + m_Nvc;
1534 
1535  for (int ic = 0; ic < m_Nc; ++ic) {
1536  w2[2 * ic + ix1] = bc2 * (w1[2 * ic + id1 + in] + w1[2 * ic + id3 + in]);
1537  w2[2 * ic + 1 + ix1] = bc2 * (w1[2 * ic + 1 + id1 + in] + w1[2 * ic + 1 + id3 + in]);
1538  w2[2 * ic + ix2] = bc2 * (w1[2 * ic + id2 + in] + w1[2 * ic + id4 + in]);
1539  w2[2 * ic + 1 + ix2] = bc2 * (w1[2 * ic + 1 + id2 + in] + w1[2 * ic + 1 + id4 + in]);
1540  }
1541  }
1542  }
1543  }
1544  }
1545 
1546 
1547 //====================================================================
1549  double *v2, const double *vcp2)
1550  {
1551  int Nvc2 = 2 * m_Nvc;
1552  int Nvcd = m_Nvc * m_Nd;
1553  int Nvcd2 = Nvcd / 2;
1554 
1555  int id1 = 0;
1556  int id2 = m_Nvc;
1557  int id3 = m_Nvc * 2;
1558  int id4 = m_Nvc * 3;
1559 
1560  int idir = 3;
1561 
1562  double wt1r, wt1i, wt2r, wt2i;
1563 
1564  int isite = m_arg[itask].isite;
1565  int isite_cp = m_arg[itask].isite_cpt;
1566 
1567  double *w2 = &v2[Nvcd * isite];
1568  const double *w1 = &vcp2[Nvcd2 * isite_cp];
1569  const double *u = m_U->ptr(m_Ndf * (isite + idir * m_Nvol));
1570 
1571  if (m_arg[itask].kt1 == 1) {
1572  int Nxy = m_Nx * m_Ny;
1573  int it = m_Mt - 1;
1574  for (int iz = 0; iz < m_Mz; ++iz) {
1575  for (int ixy = 0; ixy < Nxy; ++ixy) {
1576  int is = ixy + Nxy * (iz + m_Nz * it);
1577  int is2 = ixy + Nxy * iz;
1578  int iv = Nvcd * is;
1579  int ig = m_Ndf * is;
1580  int ix1 = Nvc2 * is2;
1581  int ix2 = ix1 + m_Nvc;
1582 
1583  for (int ic = 0; ic < m_Nc; ++ic) {
1584  int ic2 = ic * m_Nvc;
1585 
1586  wt1r = mult_uv_r(&u[ic2 + ig], &w1[ix1], m_Nc);
1587  wt1i = mult_uv_i(&u[ic2 + ig], &w1[ix1], m_Nc);
1588  wt2r = mult_uv_r(&u[ic2 + ig], &w1[ix2], m_Nc);
1589  wt2i = mult_uv_i(&u[ic2 + ig], &w1[ix2], m_Nc);
1590 
1591  w2[2 * ic + id1 + iv] += wt1r;
1592  w2[2 * ic + 1 + id1 + iv] += wt1i;
1593  w2[2 * ic + id2 + iv] += wt2r;
1594  w2[2 * ic + 1 + id2 + iv] += wt2i;
1595  w2[2 * ic + id3 + iv] += wt1r;
1596  w2[2 * ic + 1 + id3 + iv] += wt1i;
1597  w2[2 * ic + id4 + iv] += wt2r;
1598  w2[2 * ic + 1 + id4 + iv] += wt2i;
1599  }
1600  }
1601  }
1602  }
1603  }
1604 
1605 
1606 //====================================================================
1608  double *v2, const double *v1)
1609  {
1610  int Nvcd = m_Nvc * m_Nd;
1611 
1612  int id1 = 0;
1613  int id2 = m_Nvc;
1614  int id3 = m_Nvc * 2;
1615  int id4 = m_Nvc * 3;
1616 
1617  int idir = 3;
1618 
1619  double vt1[m_Nvc], vt2[m_Nvc];
1620  double wt1r, wt1i, wt2r, wt2i;
1621 
1622  int isite = m_arg[itask].isite;
1623 
1624  double *w2 = &v2[Nvcd * isite];
1625  const double *w1 = &v1[Nvcd * isite];
1626  const double *u = m_U->ptr(m_Ndf * (isite + idir * m_Nvol));
1627 
1628  int kt1 = m_arg[itask].kt1;
1629  int Nxy = m_Nx * m_Ny;
1630  int Nxyz = Nxy * m_Nz;
1631 
1632  for (int it = 0; it < m_Mt - kt1; ++it) {
1633  for (int iz = 0; iz < m_Mz; ++iz) {
1634  for (int ixy = 0; ixy < Nxy; ++ixy) {
1635  int is = ixy + Nxy * (iz + m_Nz * it);
1636  int iv = Nvcd * is;
1637  int in = Nvcd * (is + Nxyz);
1638  int ig = m_Ndf * is;
1639 
1640  for (int ic = 0; ic < m_Nc; ++ic) {
1641  vt1[2 * ic] = w1[2 * ic + id1 + in] + w1[2 * ic + id3 + in];
1642  vt1[2 * ic + 1] = w1[2 * ic + 1 + id1 + in] + w1[2 * ic + 1 + id3 + in];
1643  vt2[2 * ic] = w1[2 * ic + id2 + in] + w1[2 * ic + id4 + in];
1644  vt2[2 * ic + 1] = w1[2 * ic + 1 + id2 + in] + w1[2 * ic + 1 + id4 + in];
1645  }
1646 
1647  for (int ic = 0; ic < m_Nc; ++ic) {
1648  int ic2 = ic * m_Nvc;
1649 
1650  wt1r = mult_uv_r(&u[ic2 + ig], vt1, m_Nc);
1651  wt1i = mult_uv_i(&u[ic2 + ig], vt1, m_Nc);
1652  wt2r = mult_uv_r(&u[ic2 + ig], vt2, m_Nc);
1653  wt2i = mult_uv_i(&u[ic2 + ig], vt2, m_Nc);
1654 
1655  w2[2 * ic + id1 + iv] += wt1r;
1656  w2[2 * ic + 1 + id1 + iv] += wt1i;
1657  w2[2 * ic + id2 + iv] += wt2r;
1658  w2[2 * ic + 1 + id2 + iv] += wt2i;
1659  w2[2 * ic + id3 + iv] += wt1r;
1660  w2[2 * ic + 1 + id3 + iv] += wt1i;
1661  w2[2 * ic + id4 + iv] += wt2r;
1662  w2[2 * ic + 1 + id4 + iv] += wt2i;
1663  }
1664  }
1665  }
1666  }
1667  }
1668 
1669 
1670 //====================================================================
1672  double *vcp1, const double *v1)
1673  {
1674  int Nvc2 = 2 * m_Nvc;
1675  int Nvcd = m_Nvc * m_Nd;
1676  int Nvcd2 = Nvcd / 2;
1677 
1678  int id1 = 0;
1679  int id2 = m_Nvc;
1680  int id3 = m_Nvc * 2;
1681  int id4 = m_Nvc * 3;
1682 
1683  int idir = 3;
1684 
1685  int isite = m_arg[itask].isite;
1686  int isite_cp = m_arg[itask].isite_cpt;
1687 
1688  double *w2 = &vcp1[Nvcd2 * isite_cp];
1689  const double *w1 = &v1[Nvcd * isite];
1690  const double *u = m_U->ptr(m_Ndf * (isite + idir * m_Nvol));
1691 
1692  double vt1[m_Nvc], vt2[m_Nvc];
1693 
1694  if (m_arg[itask].kt1 == 1) {
1695  int Nxy = m_Nx * m_Ny;
1696  int it = m_Mt - 1;
1697  for (int iz = 0; iz < m_Mz; ++iz) {
1698  for (int ixy = 0; ixy < Nxy; ++ixy) {
1699  int is = ixy + Nxy * (iz + m_Nz * it);
1700  int is2 = ixy + Nxy * iz;
1701  int in = Nvcd * is;
1702  int ig = m_Ndf * is;
1703  int ix1 = Nvc2 * is2;
1704  int ix2 = ix1 + m_Nvc;
1705 
1706  for (int ic = 0; ic < m_Nc; ++ic) {
1707  vt1[2 * ic] = w1[2 * ic + id1 + in] - w1[2 * ic + id3 + in];
1708  vt1[2 * ic + 1] = w1[2 * ic + 1 + id1 + in] - w1[2 * ic + 1 + id3 + in];
1709  vt2[2 * ic] = w1[2 * ic + id2 + in] - w1[2 * ic + id4 + in];
1710  vt2[2 * ic + 1] = w1[2 * ic + 1 + id2 + in] - w1[2 * ic + 1 + id4 + in];
1711  }
1712 
1713  for (int ic = 0; ic < m_Nc; ++ic) {
1714  int icr = 2 * ic;
1715  w2[icr + ix1] = mult_udagv_r(&u[icr + ig], vt1, m_Nc);
1716  w2[icr + 1 + ix1] = mult_udagv_i(&u[icr + ig], vt1, m_Nc);
1717  w2[icr + ix2] = mult_udagv_r(&u[icr + ig], vt2, m_Nc);
1718  w2[icr + 1 + ix2] = mult_udagv_i(&u[icr + ig], vt2, m_Nc);
1719  }
1720  }
1721  }
1722  }
1723  }
1724 
1725 
1726 //====================================================================
1728  double *v2, const double *vcp2)
1729  {
1730  int Nvc2 = 2 * m_Nvc;
1731  int Nvcd = m_Nvc * m_Nd;
1732  int Nvcd2 = Nvcd / 2;
1733 
1734  int id1 = 0;
1735  int id2 = m_Nvc;
1736  int id3 = m_Nvc * 2;
1737  int id4 = m_Nvc * 3;
1738 
1739  int idir = 3;
1740  double bc2 = m_boundary2[idir];
1741 
1742  // double wt1r, wt1i, wt2r, wt2i;
1743 
1744  int isite = m_arg[itask].isite;
1745  int isite_cp = m_arg[itask].isite_cpt;
1746 
1747  double *w2 = &v2[Nvcd * isite];
1748  const double *w1 = &vcp2[Nvcd2 * isite_cp];
1749 
1750  if (m_arg[itask].kt0 == 1) {
1751  int Nxy = m_Nx * m_Ny;
1752  int it = 0;
1753  for (int iz = 0; iz < m_Mz; ++iz) {
1754  for (int ixy = 0; ixy < Nxy; ++ixy) {
1755  int is = ixy + Nxy * (iz + m_Nz * it);
1756  int is2 = ixy + Nxy * iz;
1757  int iv = Nvcd * is;
1758  int ix1 = Nvc2 * is2;
1759  int ix2 = ix1 + m_Nvc;
1760 
1761  for (int ic = 0; ic < m_Nc; ++ic) {
1762  int icr = 2 * ic;
1763  int ici = 2 * ic + 1;
1764  w2[icr + id1 + iv] += bc2 * w1[icr + ix1];
1765  w2[ici + id1 + iv] += bc2 * w1[ici + ix1];
1766  w2[icr + id2 + iv] += bc2 * w1[icr + ix2];
1767  w2[ici + id2 + iv] += bc2 * w1[ici + ix2];
1768  w2[icr + id3 + iv] -= bc2 * w1[icr + ix1];
1769  w2[ici + id3 + iv] -= bc2 * w1[ici + ix1];
1770  w2[icr + id4 + iv] -= bc2 * w1[icr + ix2];
1771  w2[ici + id4 + iv] -= bc2 * w1[ici + ix2];
1772  }
1773  }
1774  }
1775  }
1776  }
1777 
1778 
1779 //====================================================================
1781  double *v2, const double *v1)
1782  {
1783  int Nvcd = m_Nvc * m_Nd;
1784 
1785  int id1 = 0;
1786  int id2 = m_Nvc;
1787  int id3 = m_Nvc * 2;
1788  int id4 = m_Nvc * 3;
1789 
1790  int idir = 3;
1791 
1792  double vt1[m_Nvc], vt2[m_Nvc];
1793  double wt1r, wt1i, wt2r, wt2i;
1794 
1795  int isite = m_arg[itask].isite;
1796 
1797  double *w2 = &v2[Nvcd * isite];
1798  const double *w1 = &v1[Nvcd * isite];
1799  const double *u = m_U->ptr(m_Ndf * (isite + idir * m_Nvol));
1800 
1801  int kt0 = m_arg[itask].kt0;
1802  int Nxy = m_Nx * m_Ny;
1803  int Nxyz = Nxy * m_Nz;
1804 
1805  for (int it = kt0; it < m_Mt; ++it) {
1806  for (int iz = 0; iz < m_Mz; ++iz) {
1807  for (int ixy = 0; ixy < Nxy; ++ixy) {
1808  int is = ixy + Nxy * (iz + m_Nz * it);
1809  int iv = Nvcd * is;
1810  int in = Nvcd * (is - Nxyz);
1811  int ig = m_Ndf * (is - Nxyz);
1812 
1813  for (int ic = 0; ic < m_Nc; ++ic) {
1814  vt1[2 * ic] = w1[2 * ic + id1 + in] - w1[2 * ic + id3 + in];
1815  vt1[2 * ic + 1] = w1[2 * ic + 1 + id1 + in] - w1[2 * ic + 1 + id3 + in];
1816  vt2[2 * ic] = w1[2 * ic + id2 + in] - w1[2 * ic + id4 + in];
1817  vt2[2 * ic + 1] = w1[2 * ic + 1 + id2 + in] - w1[2 * ic + 1 + id4 + in];
1818  }
1819 
1820  for (int ic = 0; ic < m_Nc; ++ic) {
1821  int ic2 = 2 * ic;
1822  wt1r = mult_udagv_r(&u[ic2 + ig], vt1, m_Nc);
1823  wt1i = mult_udagv_i(&u[ic2 + ig], vt1, m_Nc);
1824  wt2r = mult_udagv_r(&u[ic2 + ig], vt2, m_Nc);
1825  wt2i = mult_udagv_i(&u[ic2 + ig], vt2, m_Nc);
1826 
1827  w2[ic2 + id1 + iv] += wt1r;
1828  w2[ic2 + 1 + id1 + iv] += wt1i;
1829  w2[ic2 + id2 + iv] += wt2r;
1830  w2[ic2 + 1 + id2 + iv] += wt2i;
1831  w2[ic2 + id3 + iv] -= wt1r;
1832  w2[ic2 + 1 + id3 + iv] -= wt1i;
1833  w2[ic2 + id4 + iv] -= wt2r;
1834  w2[ic2 + 1 + id4 + iv] -= wt2i;
1835  }
1836  }
1837  }
1838  }
1839  }
1840 
1841 
1842 //====================================================================
1844  double *v2, const double *v1)
1845  {
1846  int Nvcd = m_Nvc * m_Nd;
1847  int Nxy = m_Nx * m_Ny;
1848 
1849  int id1 = 0;
1850  int id2 = m_Nvc;
1851  int id3 = m_Nvc * 2;
1852  int id4 = m_Nvc * 3;
1853 
1854  int isite = m_arg[itask].isite;
1855  double *w2 = &v2[Nvcd * isite];
1856  const double *w1 = &v1[Nvcd * isite];
1857 
1858  for (int it = 0; it < m_Mt; ++it) {
1859  for (int iz = 0; iz < m_Mz; ++iz) {
1860  for (int ixy = 0; ixy < Nxy; ++ixy) {
1861  int iv = Nvcd * (ixy + Nxy * (iz + m_Nz * it));
1862  for (int ivc = 0; ivc < m_Nvc; ++ivc) {
1863  w2[ivc + id1 + iv] = w1[ivc + id3 + iv];
1864  w2[ivc + id2 + iv] = w1[ivc + id4 + iv];
1865  w2[ivc + id3 + iv] = w1[ivc + id1 + iv];
1866  w2[ivc + id4 + iv] = w1[ivc + id2 + iv];
1867  }
1868  }
1869  }
1870  }
1871  }
1872 
1873 
1874 //====================================================================
1876  double *v2, const double *v1)
1877  {
1878  int Nvcd = m_Nvc * m_Nd;
1879  int Nxy = m_Nx * m_Ny;
1880 
1881  int id1 = 0;
1882  int id2 = m_Nvc;
1883  int id3 = m_Nvc * 2;
1884  int id4 = m_Nvc * 3;
1885 
1886  int isite = m_arg[itask].isite;
1887  double *w2 = &v2[Nvcd * isite];
1888  const double *w1 = &v1[Nvcd * isite];
1889 
1890  for (int it = 0; it < m_Mt; ++it) {
1891  for (int iz = 0; iz < m_Mz; ++iz) {
1892  for (int ixy = 0; ixy < Nxy; ++ixy) {
1893  int iv = Nvcd * (ixy + Nxy * (iz + m_Nz * it));
1894  for (int ivc = 0; ivc < m_Nvc; ++ivc) {
1895  w2[ivc + id1 + iv] = w1[ivc + id1 + iv];
1896  w2[ivc + id2 + iv] = w1[ivc + id2 + iv];
1897  w2[ivc + id3 + iv] = -w1[ivc + id3 + iv];
1898  w2[ivc + id4 + iv] = -w1[ivc + id4 + iv];
1899  }
1900  }
1901  }
1902  }
1903  }
1904 
1905 
1906 //====================================================================
1907 }
1908 //============================================================END=====
void mult_zm1_thread(int, double *, const double *)
void mult_zpb_thread(int, double *, const double *)
void mult_ymb_thread(int, double *, const double *)
BridgeIO vout
Definition: bridgeIO.cpp:495
void mult_tp2_chiral_thread(int, double *, const double *)
const double * ptr(const int jin, const int site, const int jex) const
Definition: field.h:142
void mult_ym2_thread(int, double *, const double *)
void mult_zmb_thread(int, double *, const double *)
void mult_xpb_thread(int, double *, const double *)
void general(const char *format,...)
Definition: bridgeIO.cpp:195
void mult_tm1_chiral_thread(int, double *, const double *)
Bridge::VerboseLevel m_vl
void gm5_chiral_thread(int, double *, const double *)
void mult_ypb_thread(int, double *, const double *)
const Field_G * m_U
gauge configuration.
void mult_tm1_dirac_thread(int, double *, const double *)
void mult_tp1_chiral_thread(int, double *, const double *)
void gm5_dirac_thread(int, double *, const double *)
void mult_tmb_chiral_thread(int, double *, const double *)
void mult_zp1_thread(int, double *, const double *)
void mult_ym1_thread(int, double *, const double *)
void daypx_thread(int, double *, double, const double *)
void mult_xp1_thread(int, double *, const double *)
void mult_tpb_chiral_thread(int, double *, const double *)
void mult_tm2_dirac_thread(int, double *, const double *)
std::vector< mult_arg > m_arg
static int get_num_threads_available()
returns number of threads (works outside of parallel region).
void crucial(const char *format,...)
Definition: bridgeIO.cpp:178
void mult_zp2_thread(int, double *, const double *)
std::vector< double > m_boundary2
b.c. for each node.
void mult_tm2_chiral_thread(int, double *, const double *)
void mult_tp2_dirac_thread(int, double *, const double *)
void mult_xm2_thread(int, double *, const double *)
void clear_thread(int, double *)
void mult_yp2_thread(int, double *, const double *)
void mult_xm1_thread(int, double *, const double *)
void mult_tpb_dirac_thread(int, double *, const double *)
void mult_xmb_thread(int, double *, const double *)
void mult_zm2_thread(int, double *, const double *)
void mult_tp1_dirac_thread(int, double *, const double *)
void mult_yp1_thread(int, double *, const double *)
void mult_tmb_dirac_thread(int, double *, const double *)
static const std::string class_name
void mult_xp2_thread(int, double *, const double *)