Bridge++  Version 1.4.4
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
fopr_WilsonGeneral_impl_thread.cpp
Go to the documentation of this file.
1 
15 
16 namespace Imp {
17 #if defined USE_GROUP_SU3
18 #include "fopr_Wilson_impl_SU3.inc"
19 #elif defined USE_GROUP_SU2
20 #include "fopr_Wilson_impl_SU2.inc"
21 #elif defined USE_GROUP_SU_N
22 #include "fopr_Wilson_impl_SU_N.inc"
23 #endif
24 
25 // const std::string Fopr_WilsonGeneral::class_name = "Imp::Fopr_WilsonGeneral";
26 
27 //====================================================================
29  {
31 
32  // The following setup corresponds to uniform division of volume.
33  if (m_Nthread <= m_Nt) {
35  } else if (m_Nthread <= m_Nz * m_Nt) {
36  m_Ntask_t = m_Nt;
37  } else {
38  vout.crucial(m_vl, "Error at %s: Too large Nthread = %d\n", class_name.c_str(), m_Nthread);
39  exit(EXIT_FAILURE);
40  }
41 
43 
44  if (m_Ntask_z * m_Ntask_t != m_Nthread) {
45  vout.crucial(m_vl, "Error at %s: Nz = %d and Nt = %d do not match Nthread = %d\n",
46  class_name.c_str(), m_Nz, m_Nt, m_Nthread);
47  exit(EXIT_FAILURE);
48  }
49 
51  m_Mz = m_Nz / m_Ntask_z;
52  m_Mt = m_Nt / m_Ntask_t;
53 
54  if (m_Mz * m_Ntask_z != m_Nz) {
55  vout.crucial(m_vl, "Error at %s: Mz = %d and Ntask_z = %d do not match Nz = %d\n",
56  class_name.c_str(), m_Mz, m_Ntask_z, m_Nz);
57  exit(EXIT_FAILURE);
58  }
59 
60  if (m_Mt * m_Ntask_t != m_Nt) {
61  vout.crucial(m_vl, "Error at %s: Mt = %d and Ntask_t = %d do not match Nt = %d\n",
62  class_name.c_str(), m_Mt, m_Ntask_t, m_Nt);
63  exit(EXIT_FAILURE);
64  }
65 
66  // The following setup is not monotonic division, and requires
67  // barrier at the beginning and end of mult (D and gamma5).
68  // [H.Matsufuru 22 Oct 2013]
69  // if(m_Nthread >= 64){
70  // m_Ntask_z = 8;
71  // }else if(m_Nthread >= 16){
72  // m_Ntask_z = 4;
73  // }else if(m_Nthread >= 4){
74  // m_Ntask_z = 2;
75  // }else{
76  // m_Ntask_z = 1;
77  // }
78  // m_Ntask_t = m_Nthread/m_Ntask_z;
79  // m_Ntask = m_Ntask_t * m_Ntask_z;
80  // m_Mz = m_Nz/m_Ntask_z;
81  // m_Mt = m_Nt/m_Ntask_t;
82 
83  vout.general(m_vl, " Nthread = %d\n", m_Nthread);
84  vout.general(m_vl, " Ntask = %d\n", m_Ntask);
85  vout.general(m_vl, " Ntask_z = %d Ntask_t = %d\n", m_Ntask_z, m_Ntask_t);
86  vout.general(m_vl, " Mz = %d Mt = %d\n", m_Mz, m_Mt);
87 
88  //- setup of arguments
89  int Nxy = m_Nx * m_Ny;
90  m_arg.resize(m_Ntask);
91  for (int ithread_t = 0; ithread_t < m_Ntask_t; ++ithread_t) {
92  for (int ithread_z = 0; ithread_z < m_Ntask_z; ++ithread_z) {
93  int itask = ithread_z + m_Ntask_z * ithread_t;
94 
95  m_arg[itask].isite = (ithread_z * m_Mz + ithread_t * (m_Nz * m_Mt)) * Nxy;
96 
97  m_arg[itask].kt0 = 0;
98  m_arg[itask].kt1 = 0;
99  m_arg[itask].kz0 = 0;
100  m_arg[itask].kz1 = 0;
101  if (ithread_t == 0) m_arg[itask].kt0 = 1;
102  if (ithread_z == 0) m_arg[itask].kz0 = 1;
103  if (ithread_t == m_Ntask_t - 1) m_arg[itask].kt1 = 1;
104  if (ithread_z == m_Ntask_z - 1) m_arg[itask].kz1 = 1;
105 
106  m_arg[itask].isite_cp_x = itask * m_Mz * m_Mt * m_Ny;
107  m_arg[itask].isite_cp_y = itask * m_Mz * m_Mt * m_Nx;
108  m_arg[itask].isite_cp_z = ithread_t * m_Mt * Nxy;
109  m_arg[itask].isite_cp_t = ithread_z * m_Mz * Nxy;
110  }
111  }
112  }
113 
114 
115 //====================================================================
117  int itask, double *v2, double fac, const double *v1)
118  {
119  int Nvcd = m_Nvc * m_Nd;
120  int Nvxy = Nvcd * m_Nx * m_Ny;
121 
122  int isite = m_arg[itask].isite;
123 
124  const double *w1 = &v1[Nvcd * isite];
125  double *w2 = &v2[Nvcd * isite];
126 
127  for (int it = 0; it < m_Mt; ++it) {
128  for (int iz = 0; iz < m_Mz; ++iz) {
129  for (int ivxy = 0; ivxy < Nvxy; ++ivxy) {
130  int iv = ivxy + Nvxy * (iz + m_Nz * it);
131  w2[iv] += fac * w1[iv];
132  }
133  }
134  }
135  }
136 
137 
138 //====================================================================
140  int itask, double *v2, double fac, const double *v1)
141  {
142  int Nvcd = m_Nvc * m_Nd;
143  int Nvxy = Nvcd * m_Nx * m_Ny;
144 
145  int isite = m_arg[itask].isite;
146  const double *w1 = &v1[Nvcd * isite];
147  double *w2 = &v2[Nvcd * isite];
148 
149  for (int it = 0; it < m_Mt; ++it) {
150  for (int iz = 0; iz < m_Mz; ++iz) {
151  for (int ivxy = 0; ivxy < Nvxy; ++ivxy) {
152  int iv = ivxy + Nvxy * (iz + m_Nz * it);
153  w2[iv] = fac * w2[iv] + w1[iv];
154  }
155  }
156  }
157  }
158 
159 
160 //====================================================================
162  double *v, double fac)
163  {
164  int Nvcd = m_Nvc * m_Nd;
165  int Nvxy = Nvcd * m_Nx * m_Ny;
166 
167  int isite = m_arg[itask].isite;
168  double *w = &v[Nvcd * isite];
169 
170  for (int it = 0; it < m_Mt; ++it) {
171  for (int iz = 0; iz < m_Mz; ++iz) {
172  for (int ivxy = 0; ivxy < Nvxy; ++ivxy) {
173  int iv = ivxy + Nvxy * (iz + m_Nz * it);
174  w[iv] *= fac;
175  }
176  }
177  }
178  }
179 
180 
181 //====================================================================
183  double *v2)
184  {
185  int Nvcd = m_Nvc * m_Nd;
186  int Nvxy = Nvcd * m_Nx * m_Ny;
187 
188  int isite = m_arg[itask].isite;
189  double *w2 = &v2[Nvcd * isite];
190 
191  for (int it = 0; it < m_Mt; ++it) {
192  for (int iz = 0; iz < m_Mz; ++iz) {
193  for (int ivxy = 0; ivxy < Nvxy; ++ivxy) {
194  int iv = ivxy + Nvxy * (iz + m_Nz * it);
195  w2[iv] = 0.0;
196  }
197  }
198  }
199  }
200 
201 
202 //====================================================================
204  int itask, double *vcp1, const double *v1)
205  {
206  int Nvcd = m_Nvc * m_Nd;
207 
208  int id1 = 0;
209  int id2 = m_Nvc;
210  int id3 = m_Nvc * 2;
211  int id4 = m_Nvc * 3;
212 
213  int isite = m_arg[itask].isite;
214  int isite_cp = m_arg[itask].isite_cp_x;
215 
216  const double *w1 = &v1[Nvcd * isite];
217  double *w2 = &vcp1[Nvcd * isite_cp];
218 
219  int idir = 0;
220  double bc2 = m_boundary2[idir];
221 
222  int ix = 0;
223 
224  for (int it = 0; it < m_Mt; ++it) {
225  for (int iz = 0; iz < m_Mz; ++iz) {
226  for (int iy = 0; iy < m_Ny; ++iy) {
227  int is = ix + m_Nx * (iy + m_Ny * (iz + m_Nz * it));
228  int is2 = iy + m_Ny * (iz + m_Mz * it);
229  int in = Nvcd * is;
230  int ix1 = Nvcd * is2;
231  int ix2 = ix1 + m_Nvc;
232  int ix3 = ix2 + m_Nvc;
233  int ix4 = ix3 + m_Nvc;
234 
235  for (int ic = 0; ic < m_Nc; ++ic) {
236  int ic_r = 2 * ic;
237  int ic_i = 2 * ic + 1;
238 
239  w2[ic_r + ix1] = bc2 * (m_r_s * w1[ic_r + id1 + in] - m_nu_s * w1[ic_i + id4 + in]);
240  w2[ic_i + ix1] = bc2 * (m_r_s * w1[ic_i + id1 + in] + m_nu_s * w1[ic_r + id4 + in]);
241  w2[ic_r + ix2] = bc2 * (m_r_s * w1[ic_r + id2 + in] - m_nu_s * w1[ic_i + id3 + in]);
242  w2[ic_i + ix2] = bc2 * (m_r_s * w1[ic_i + id2 + in] + m_nu_s * w1[ic_r + id3 + in]);
243 
244  w2[ic_r + ix3] = bc2 * (m_r_s * w1[ic_r + id3 + in] + m_nu_s * w1[ic_i + id2 + in]);
245  w2[ic_i + ix3] = bc2 * (m_r_s * w1[ic_i + id3 + in] - m_nu_s * w1[ic_r + id2 + in]);
246  w2[ic_r + ix4] = bc2 * (m_r_s * w1[ic_r + id4 + in] + m_nu_s * w1[ic_i + id1 + in]);
247  w2[ic_i + ix4] = bc2 * (m_r_s * w1[ic_i + id4 + in] - m_nu_s * w1[ic_r + id1 + in]);
248  }
249  }
250  }
251  }
252  }
253 
254 
255 //====================================================================
257  int itask, double *v2, const double *vcp2)
258  {
259  int Nvcd = m_Nvc * m_Nd;
260 
261  int id1 = 0;
262  int id2 = m_Nvc;
263  int id3 = m_Nvc * 2;
264  int id4 = m_Nvc * 3;
265 
266  int idir = 0;
267 
268  double wt1_r, wt1_i, wt2_r, wt2_i, wt3_r, wt3_i, wt4_r, wt4_i;
269 
270  int isite = m_arg[itask].isite;
271  int isite_cp = m_arg[itask].isite_cp_x;
272 
273  const double *w1 = &vcp2[Nvcd * isite_cp];
274  double *w2 = &v2[Nvcd * isite];
275  const double *u = m_U->ptr(m_Ndf * (isite + idir * m_Nvol));
276 
277  int ix = m_Nx - 1;
278 
279  for (int it = 0; it < m_Mt; ++it) {
280  for (int iz = 0; iz < m_Mz; ++iz) {
281  for (int iy = 0; iy < m_Ny; ++iy) {
282  int is = ix + m_Nx * (iy + m_Ny * (iz + m_Nz * it));
283  int is2 = iy + m_Ny * (iz + m_Mz * it);
284  int iv = Nvcd * is;
285  int ig = m_Ndf * is;
286  int ix1 = Nvcd * is2;
287  int ix2 = ix1 + m_Nvc;
288  int ix3 = ix2 + m_Nvc;
289  int ix4 = ix3 + m_Nvc;
290 
291  for (int ic = 0; ic < m_Nc; ++ic) {
292  int ic2 = ic * m_Nvc;
293 
294  wt1_r = mult_uv_r(&u[ic2 + ig], &w1[ix1], m_Nc);
295  wt1_i = mult_uv_i(&u[ic2 + ig], &w1[ix1], m_Nc);
296  wt2_r = mult_uv_r(&u[ic2 + ig], &w1[ix2], m_Nc);
297  wt2_i = mult_uv_i(&u[ic2 + ig], &w1[ix2], m_Nc);
298 
299  wt3_r = mult_uv_r(&u[ic2 + ig], &w1[ix3], m_Nc);
300  wt3_i = mult_uv_i(&u[ic2 + ig], &w1[ix3], m_Nc);
301  wt4_r = mult_uv_r(&u[ic2 + ig], &w1[ix4], m_Nc);
302  wt4_i = mult_uv_i(&u[ic2 + ig], &w1[ix4], m_Nc);
303 
304  int ic_r = 2 * ic;
305  int ic_i = 2 * ic + 1;
306 
307  w2[ic_r + id1 + iv] += wt1_r;
308  w2[ic_i + id1 + iv] += wt1_i;
309  w2[ic_r + id2 + iv] += wt2_r;
310  w2[ic_i + id2 + iv] += wt2_i;
311 
312  w2[ic_r + id3 + iv] += wt3_r;
313  w2[ic_i + id3 + iv] += wt3_i;
314  w2[ic_r + id4 + iv] += wt4_r;
315  w2[ic_i + id4 + iv] += wt4_i;
316  }
317  }
318  }
319  }
320  }
321 
322 
323 //====================================================================
325  int itask, double *v2, const double *v1)
326  {
327  int Nvcd = m_Nvc * m_Nd;
328 
329  int id1 = 0;
330  int id2 = m_Nvc;
331  int id3 = m_Nvc * 2;
332  int id4 = m_Nvc * 3;
333 
334  int idir = 0;
335 
336  double vt1[m_Nvc], vt2[m_Nvc], vt3[m_Nvc], vt4[m_Nvc];
337  double wt1_r, wt1_i, wt2_r, wt2_i, wt3_r, wt3_i, wt4_r, wt4_i;
338 
339  int isite = m_arg[itask].isite;
340 
341  const double *w1 = &v1[Nvcd * isite];
342  double *w2 = &v2[Nvcd * isite];
343  const double *u = m_U->ptr(m_Ndf * (isite + idir * m_Nvol));
344 
345  for (int it = 0; it < m_Mt; ++it) {
346  for (int iz = 0; iz < m_Mz; ++iz) {
347  for (int iy = 0; iy < m_Ny; ++iy) {
348  for (int ix = 0; ix < m_Nx - 1; ++ix) {
349  int is = ix + m_Nx * (iy + m_Ny * (iz + m_Nz * it));
350  int iv = Nvcd * is;
351  int in = Nvcd * (is + 1);
352  int ig = m_Ndf * is;
353 
354  for (int ic = 0; ic < m_Nc; ++ic) {
355  int ic_r = 2 * ic;
356  int ic_i = 2 * ic + 1;
357 
358  vt1[ic_r] = m_r_s * w1[ic_r + id1 + in] - m_nu_s * w1[ic_i + id4 + in];
359  vt1[ic_i] = m_r_s * w1[ic_i + id1 + in] + m_nu_s * w1[ic_r + id4 + in];
360  vt2[ic_r] = m_r_s * w1[ic_r + id2 + in] - m_nu_s * w1[ic_i + id3 + in];
361  vt2[ic_i] = m_r_s * w1[ic_i + id2 + in] + m_nu_s * w1[ic_r + id3 + in];
362 
363  vt3[ic_r] = m_r_s * w1[ic_r + id3 + in] + m_nu_s * w1[ic_i + id2 + in];
364  vt3[ic_i] = m_r_s * w1[ic_i + id3 + in] - m_nu_s * w1[ic_r + id2 + in];
365  vt4[ic_r] = m_r_s * w1[ic_r + id4 + in] + m_nu_s * w1[ic_i + id1 + in];
366  vt4[ic_i] = m_r_s * w1[ic_i + id4 + in] - m_nu_s * w1[ic_r + id1 + in];
367  }
368 
369  for (int ic = 0; ic < m_Nc; ++ic) {
370  int ic2 = ic * m_Nvc;
371 
372  wt1_r = mult_uv_r(&u[ic2 + ig], vt1, m_Nc);
373  wt1_i = mult_uv_i(&u[ic2 + ig], vt1, m_Nc);
374  wt2_r = mult_uv_r(&u[ic2 + ig], vt2, m_Nc);
375  wt2_i = mult_uv_i(&u[ic2 + ig], vt2, m_Nc);
376 
377  wt3_r = mult_uv_r(&u[ic2 + ig], vt3, m_Nc);
378  wt3_i = mult_uv_i(&u[ic2 + ig], vt3, m_Nc);
379  wt4_r = mult_uv_r(&u[ic2 + ig], vt4, m_Nc);
380  wt4_i = mult_uv_i(&u[ic2 + ig], vt4, m_Nc);
381 
382  int ic_r = 2 * ic;
383  int ic_i = 2 * ic + 1;
384 
385  w2[ic_r + id1 + iv] += wt1_r;
386  w2[ic_i + id1 + iv] += wt1_i;
387  w2[ic_r + id2 + iv] += wt2_r;
388  w2[ic_i + id2 + iv] += wt2_i;
389 
390  w2[ic_r + id3 + iv] += wt3_r;
391  w2[ic_i + id3 + iv] += wt3_i;
392  w2[ic_r + id4 + iv] += wt4_r;
393  w2[ic_i + id4 + iv] += wt4_i;
394  }
395  }
396  }
397  }
398  }
399  }
400 
401 
402 //====================================================================
404  int itask, double *vcp1, const double *v1)
405  {
406  int Nvcd = m_Nvc * m_Nd;
407 
408  int id1 = 0;
409  int id2 = m_Nvc;
410  int id3 = m_Nvc * 2;
411  int id4 = m_Nvc * 3;
412 
413  int idir = 0;
414 
415  int isite = m_arg[itask].isite;
416  int isite_cp = m_arg[itask].isite_cp_x;
417 
418  const double *w1 = &v1[Nvcd * isite];
419  double *w2 = &vcp1[Nvcd * isite_cp];
420  const double *u = m_U->ptr(m_Ndf * (isite + idir * m_Nvol));
421 
422  double vt1[m_Nvc], vt2[m_Nvc], vt3[m_Nvc], vt4[m_Nvc];
423 
424  int ix = m_Nx - 1;
425 
426  for (int it = 0; it < m_Mt; ++it) {
427  for (int iz = 0; iz < m_Mz; ++iz) {
428  for (int iy = 0; iy < m_Ny; ++iy) {
429  int is = ix + m_Nx * (iy + m_Ny * (iz + m_Nz * it));
430  int is2 = iy + m_Ny * (iz + m_Mz * it);
431  int in = Nvcd * is;
432  int ig = m_Ndf * is;
433  int ix1 = Nvcd * is2;
434  int ix2 = ix1 + m_Nvc;
435  int ix3 = ix2 + m_Nvc;
436  int ix4 = ix3 + m_Nvc;
437 
438  for (int ic = 0; ic < m_Nc; ++ic) {
439  int ic_r = 2 * ic;
440  int ic_i = 2 * ic + 1;
441 
442  vt1[ic_r] = m_r_s * w1[ic_r + id1 + in] + m_nu_s * w1[ic_i + id4 + in];
443  vt1[ic_i] = m_r_s * w1[ic_i + id1 + in] - m_nu_s * w1[ic_r + id4 + in];
444  vt2[ic_r] = m_r_s * w1[ic_r + id2 + in] + m_nu_s * w1[ic_i + id3 + in];
445  vt2[ic_i] = m_r_s * w1[ic_i + id2 + in] - m_nu_s * w1[ic_r + id3 + in];
446 
447  vt3[ic_r] = m_r_s * w1[ic_r + id3 + in] - m_nu_s * w1[ic_i + id2 + in];
448  vt3[ic_i] = m_r_s * w1[ic_i + id3 + in] + m_nu_s * w1[ic_r + id2 + in];
449  vt4[ic_r] = m_r_s * w1[ic_r + id4 + in] - m_nu_s * w1[ic_i + id1 + in];
450  vt4[ic_i] = m_r_s * w1[ic_i + id4 + in] + m_nu_s * w1[ic_r + id1 + in];
451  }
452 
453  for (int ic = 0; ic < m_Nc; ++ic) {
454  int ic2 = 2 * ic;
455 
456  int ic_r = 2 * ic;
457  int ic_i = 2 * ic + 1;
458 
459  w2[ic_r + ix1] = mult_udagv_r(&u[ic2 + ig], vt1, m_Nc);
460  w2[ic_i + ix1] = mult_udagv_i(&u[ic2 + ig], vt1, m_Nc);
461  w2[ic_r + ix2] = mult_udagv_r(&u[ic2 + ig], vt2, m_Nc);
462  w2[ic_i + ix2] = mult_udagv_i(&u[ic2 + ig], vt2, m_Nc);
463 
464  w2[ic_r + ix3] = mult_udagv_r(&u[ic2 + ig], vt3, m_Nc);
465  w2[ic_i + ix3] = mult_udagv_i(&u[ic2 + ig], vt3, m_Nc);
466  w2[ic_r + ix4] = mult_udagv_r(&u[ic2 + ig], vt4, m_Nc);
467  w2[ic_i + ix4] = mult_udagv_i(&u[ic2 + ig], vt4, m_Nc);
468  }
469  }
470  }
471  }
472  }
473 
474 
475 //====================================================================
477  int itask, double *v2, const double *vcp2)
478  {
479  int Nvcd = m_Nvc * m_Nd;
480 
481  int id1 = 0;
482  int id2 = m_Nvc;
483  int id3 = m_Nvc * 2;
484  int id4 = m_Nvc * 3;
485 
486  int idir = 0;
487  double bc2 = m_boundary2[idir];
488 
489  // double wt1_r,wt1_i, wt2_r,wt2_i;
490 
491  int isite = m_arg[itask].isite;
492  int isite_cp = m_arg[itask].isite_cp_x;
493 
494  const double *w1 = &vcp2[Nvcd * isite_cp];
495  double *w2 = &v2[Nvcd * isite];
496 
497  int ix = 0;
498 
499  for (int it = 0; it < m_Mt; ++it) {
500  for (int iz = 0; iz < m_Mz; ++iz) {
501  for (int iy = 0; iy < m_Ny; ++iy) {
502  int is = ix + m_Nx * (iy + m_Ny * (iz + m_Nz * it));
503  int is2 = iy + m_Ny * (iz + m_Mz * it);
504  int iv = Nvcd * is;
505  int ix1 = Nvcd * is2;
506  int ix2 = ix1 + m_Nvc;
507  int ix3 = ix2 + m_Nvc;
508  int ix4 = ix3 + m_Nvc;
509 
510  for (int ic = 0; ic < m_Nc; ++ic) {
511  int ic_r = 2 * ic;
512  int ic_i = 2 * ic + 1;
513 
514  w2[ic_r + id1 + iv] += bc2 * w1[ic_r + ix1];
515  w2[ic_i + id1 + iv] += bc2 * w1[ic_i + ix1];
516  w2[ic_r + id2 + iv] += bc2 * w1[ic_r + ix2];
517  w2[ic_i + id2 + iv] += bc2 * w1[ic_i + ix2];
518 
519  w2[ic_r + id3 + iv] += bc2 * w1[ic_r + ix3];
520  w2[ic_i + id3 + iv] += bc2 * w1[ic_i + ix3];
521  w2[ic_r + id4 + iv] += bc2 * w1[ic_r + ix4];
522  w2[ic_i + id4 + iv] += bc2 * w1[ic_i + ix4];
523  }
524  }
525  }
526  }
527  }
528 
529 
530 //====================================================================
532  int itask, double *v2, const double *v1)
533  {
534  int Nvcd = m_Nvc * m_Nd;
535 
536  int id1 = 0;
537  int id2 = m_Nvc;
538  int id3 = m_Nvc * 2;
539  int id4 = m_Nvc * 3;
540 
541  int idir = 0;
542 
543  double vt1[m_Nvc], vt2[m_Nvc], vt3[m_Nvc], vt4[m_Nvc];
544  double wt1_r, wt1_i, wt2_r, wt2_i, wt3_r, wt3_i, wt4_r, wt4_i;
545 
546  int isite = m_arg[itask].isite;
547 
548  const double *w1 = &v1[Nvcd * isite];
549  double *w2 = &v2[Nvcd * isite];
550  const double *u = m_U->ptr(m_Ndf * (isite + idir * m_Nvol));
551 
552  for (int it = 0; it < m_Mt; ++it) {
553  for (int iz = 0; iz < m_Mz; ++iz) {
554  for (int iy = 0; iy < m_Ny; ++iy) {
555  for (int ix = 1; ix < m_Nx; ++ix) {
556  int is = ix + m_Nx * (iy + m_Ny * (iz + m_Nz * it));
557  int iv = Nvcd * is;
558  int in = Nvcd * (is - 1);
559  int ig = m_Ndf * (is - 1);
560 
561  for (int ic = 0; ic < m_Nc; ++ic) {
562  int ic_r = 2 * ic;
563  int ic_i = 2 * ic + 1;
564 
565  vt1[ic_r] = m_r_s * w1[ic_r + id1 + in] + m_nu_s * w1[ic_i + id4 + in];
566  vt1[ic_i] = m_r_s * w1[ic_i + id1 + in] - m_nu_s * w1[ic_r + id4 + in];
567  vt2[ic_r] = m_r_s * w1[ic_r + id2 + in] + m_nu_s * w1[ic_i + id3 + in];
568  vt2[ic_i] = m_r_s * w1[ic_i + id2 + in] - m_nu_s * w1[ic_r + id3 + in];
569 
570  vt3[ic_r] = m_r_s * w1[ic_r + id3 + in] - m_nu_s * w1[ic_i + id2 + in];
571  vt3[ic_i] = m_r_s * w1[ic_i + id3 + in] + m_nu_s * w1[ic_r + id2 + in];
572  vt4[ic_r] = m_r_s * w1[ic_r + id4 + in] - m_nu_s * w1[ic_i + id1 + in];
573  vt4[ic_i] = m_r_s * w1[ic_i + id4 + in] + m_nu_s * w1[ic_r + id1 + in];
574  }
575 
576  for (int ic = 0; ic < m_Nc; ++ic) {
577  int ic2 = 2 * ic;
578 
579  wt1_r = mult_udagv_r(&u[ic2 + ig], vt1, m_Nc);
580  wt1_i = mult_udagv_i(&u[ic2 + ig], vt1, m_Nc);
581  wt2_r = mult_udagv_r(&u[ic2 + ig], vt2, m_Nc);
582  wt2_i = mult_udagv_i(&u[ic2 + ig], vt2, m_Nc);
583 
584  wt3_r = mult_udagv_r(&u[ic2 + ig], vt3, m_Nc);
585  wt3_i = mult_udagv_i(&u[ic2 + ig], vt3, m_Nc);
586  wt4_r = mult_udagv_r(&u[ic2 + ig], vt4, m_Nc);
587  wt4_i = mult_udagv_i(&u[ic2 + ig], vt4, m_Nc);
588 
589  int ic_r = 2 * ic;
590  int ic_i = 2 * ic + 1;
591 
592  w2[ic_r + id1 + iv] += wt1_r;
593  w2[ic_i + id1 + iv] += wt1_i;
594  w2[ic_r + id2 + iv] += wt2_r;
595  w2[ic_i + id2 + iv] += wt2_i;
596 
597  w2[ic_r + id3 + iv] += wt3_r;
598  w2[ic_i + id3 + iv] += wt3_i;
599  w2[ic_r + id4 + iv] += wt4_r;
600  w2[ic_i + id4 + iv] += wt4_i;
601  }
602  }
603  }
604  }
605  }
606  }
607 
608 
609 //====================================================================
611  int itask, double *vcp1, const double *v1)
612  {
613  int Nvcd = m_Nvc * m_Nd;
614 
615  int id1 = 0;
616  int id2 = m_Nvc;
617  int id3 = m_Nvc * 2;
618  int id4 = m_Nvc * 3;
619 
620  int isite = m_arg[itask].isite;
621  int isite_cp = m_arg[itask].isite_cp_y;
622 
623  const double *w1 = &v1[Nvcd * isite];
624  double *w2 = &vcp1[Nvcd * isite_cp];
625 
626  int idir = 1;
627  double bc2 = m_boundary2[idir];
628 
629  int iy = 0;
630 
631  for (int it = 0; it < m_Mt; ++it) {
632  for (int iz = 0; iz < m_Mz; ++iz) {
633  for (int ix = 0; ix < m_Nx; ++ix) {
634  int is = ix + m_Nx * (iy + m_Ny * (iz + m_Nz * it));
635  int is2 = ix + m_Nx * (iz + m_Mz * it);
636  int in = Nvcd * is;
637  int ix1 = Nvcd * is2;
638  int ix2 = ix1 + m_Nvc;
639  int ix3 = ix2 + m_Nvc;
640  int ix4 = ix3 + m_Nvc;
641 
642  for (int ic = 0; ic < m_Nc; ++ic) {
643  int ic_r = 2 * ic;
644  int ic_i = 2 * ic + 1;
645 
646  w2[ic_r + ix1] = bc2 * (m_r_s * w1[ic_r + id1 + in] + m_nu_s * w1[ic_r + id4 + in]);
647  w2[ic_i + ix1] = bc2 * (m_r_s * w1[ic_i + id1 + in] + m_nu_s * w1[ic_i + id4 + in]);
648  w2[ic_r + ix2] = bc2 * (m_r_s * w1[ic_r + id2 + in] - m_nu_s * w1[ic_r + id3 + in]);
649  w2[ic_i + ix2] = bc2 * (m_r_s * w1[ic_i + id2 + in] - m_nu_s * w1[ic_i + id3 + in]);
650 
651  w2[ic_r + ix3] = bc2 * (m_r_s * w1[ic_r + id3 + in] - m_nu_s * w1[ic_r + id2 + in]);
652  w2[ic_i + ix3] = bc2 * (m_r_s * w1[ic_i + id3 + in] - m_nu_s * w1[ic_i + id2 + in]);
653  w2[ic_r + ix4] = bc2 * (m_r_s * w1[ic_r + id4 + in] + m_nu_s * w1[ic_r + id1 + in]);
654  w2[ic_i + ix4] = bc2 * (m_r_s * w1[ic_i + id4 + in] + m_nu_s * w1[ic_i + id1 + in]);
655  }
656  }
657  }
658  }
659  }
660 
661 
662 //====================================================================
664  int itask, double *v2, const double *vcp2)
665  {
666  int Nvcd = m_Nvc * m_Nd;
667 
668  int id1 = 0;
669  int id2 = m_Nvc;
670  int id3 = m_Nvc * 2;
671  int id4 = m_Nvc * 3;
672 
673  int idir = 1;
674 
675  double wt1_r, wt1_i, wt2_r, wt2_i, wt3_r, wt3_i, wt4_r, wt4_i;
676 
677  int isite = m_arg[itask].isite;
678  int isite_cp = m_arg[itask].isite_cp_y;
679 
680  const double *w1 = &vcp2[Nvcd * isite_cp];
681  double *w2 = &v2[Nvcd * isite];
682  const double *u = m_U->ptr(m_Ndf * (isite + idir * m_Nvol));
683 
684  int iy = m_Ny - 1;
685 
686  for (int it = 0; it < m_Mt; ++it) {
687  for (int iz = 0; iz < m_Mz; ++iz) {
688  for (int ix = 0; ix < m_Nx; ++ix) {
689  int is = ix + m_Nx * (iy + m_Ny * (iz + m_Nz * it));
690  int is2 = ix + m_Nx * (iz + m_Mz * it);
691  int iv = Nvcd * is;
692  int ig = m_Ndf * is;
693  int ix1 = Nvcd * is2;
694  int ix2 = ix1 + m_Nvc;
695  int ix3 = ix2 + m_Nvc;
696  int ix4 = ix3 + m_Nvc;
697 
698  for (int ic = 0; ic < m_Nc; ++ic) {
699  int ic2 = ic * m_Nvc;
700 
701  wt1_r = mult_uv_r(&u[ic2 + ig], &w1[ix1], m_Nc);
702  wt1_i = mult_uv_i(&u[ic2 + ig], &w1[ix1], m_Nc);
703  wt2_r = mult_uv_r(&u[ic2 + ig], &w1[ix2], m_Nc);
704  wt2_i = mult_uv_i(&u[ic2 + ig], &w1[ix2], m_Nc);
705 
706  wt3_r = mult_uv_r(&u[ic2 + ig], &w1[ix3], m_Nc);
707  wt3_i = mult_uv_i(&u[ic2 + ig], &w1[ix3], m_Nc);
708  wt4_r = mult_uv_r(&u[ic2 + ig], &w1[ix4], m_Nc);
709  wt4_i = mult_uv_i(&u[ic2 + ig], &w1[ix4], m_Nc);
710 
711  int ic_r = 2 * ic;
712  int ic_i = 2 * ic + 1;
713 
714  w2[ic_r + id1 + iv] += wt1_r;
715  w2[ic_i + id1 + iv] += wt1_i;
716  w2[ic_r + id2 + iv] += wt2_r;
717  w2[ic_i + id2 + iv] += wt2_i;
718 
719  w2[ic_r + id3 + iv] += wt3_r;
720  w2[ic_i + id3 + iv] += wt3_i;
721  w2[ic_r + id4 + iv] += wt4_r;
722  w2[ic_i + id4 + iv] += wt4_i;
723  }
724  }
725  }
726  }
727  }
728 
729 
730 //====================================================================
732  int itask, double *v2, const double *v1)
733  {
734  int Nvcd = m_Nvc * m_Nd;
735 
736  int id1 = 0;
737  int id2 = m_Nvc;
738  int id3 = m_Nvc * 2;
739  int id4 = m_Nvc * 3;
740 
741  int idir = 1;
742 
743  double vt1[m_Nvc], vt2[m_Nvc], vt3[m_Nvc], vt4[m_Nvc];
744  double wt1_r, wt1_i, wt2_r, wt2_i, wt3_r, wt3_i, wt4_r, wt4_i;
745 
746  int isite = m_arg[itask].isite;
747 
748  const double *w1 = &v1[Nvcd * isite];
749  double *w2 = &v2[Nvcd * isite];
750  const double *u = m_U->ptr(m_Ndf * (isite + idir * m_Nvol));
751 
752  for (int it = 0; it < m_Mt; ++it) {
753  for (int iz = 0; iz < m_Mz; ++iz) {
754  for (int iy = 0; iy < m_Ny - 1; ++iy) {
755  for (int ix = 0; ix < m_Nx; ++ix) {
756  int is = ix + m_Nx * (iy + m_Ny * (iz + m_Nz * it));
757  int iv = Nvcd * is;
758  int in = Nvcd * (is + m_Nx);
759  int ig = m_Ndf * is;
760 
761  for (int ic = 0; ic < m_Nc; ++ic) {
762  int ic_r = 2 * ic;
763  int ic_i = 2 * ic + 1;
764 
765  vt1[ic_r] = m_r_s * w1[ic_r + id1 + in] + m_nu_s * w1[ic_r + id4 + in];
766  vt1[ic_i] = m_r_s * w1[ic_i + id1 + in] + m_nu_s * w1[ic_i + id4 + in];
767  vt2[ic_r] = m_r_s * w1[ic_r + id2 + in] - m_nu_s * w1[ic_r + id3 + in];
768  vt2[ic_i] = m_r_s * w1[ic_i + id2 + in] - m_nu_s * w1[ic_i + id3 + in];
769 
770  vt3[ic_r] = m_r_s * w1[ic_r + id3 + in] - m_nu_s * w1[ic_r + id2 + in];
771  vt3[ic_i] = m_r_s * w1[ic_i + id3 + in] - m_nu_s * w1[ic_i + id2 + in];
772  vt4[ic_r] = m_r_s * w1[ic_r + id4 + in] + m_nu_s * w1[ic_r + id1 + in];
773  vt4[ic_i] = m_r_s * w1[ic_i + id4 + in] + m_nu_s * w1[ic_i + id1 + in];
774  }
775 
776  for (int ic = 0; ic < m_Nc; ++ic) {
777  int ic2 = ic * m_Nvc;
778 
779  wt1_r = mult_uv_r(&u[ic2 + ig], vt1, m_Nc);
780  wt1_i = mult_uv_i(&u[ic2 + ig], vt1, m_Nc);
781  wt2_r = mult_uv_r(&u[ic2 + ig], vt2, m_Nc);
782  wt2_i = mult_uv_i(&u[ic2 + ig], vt2, m_Nc);
783 
784  wt3_r = mult_uv_r(&u[ic2 + ig], vt3, m_Nc);
785  wt3_i = mult_uv_i(&u[ic2 + ig], vt3, m_Nc);
786  wt4_r = mult_uv_r(&u[ic2 + ig], vt4, m_Nc);
787  wt4_i = mult_uv_i(&u[ic2 + ig], vt4, m_Nc);
788 
789  int ic_r = 2 * ic;
790  int ic_i = 2 * ic + 1;
791 
792  w2[ic_r + id1 + iv] += wt1_r;
793  w2[ic_i + id1 + iv] += wt1_i;
794  w2[ic_r + id2 + iv] += wt2_r;
795  w2[ic_i + id2 + iv] += wt2_i;
796 
797  w2[ic_r + id3 + iv] += wt3_r;
798  w2[ic_i + id3 + iv] += wt3_i;
799  w2[ic_r + id4 + iv] += wt4_r;
800  w2[ic_i + id4 + iv] += wt4_i;
801  }
802  }
803  }
804  }
805  }
806  }
807 
808 
809 //====================================================================
811  int itask, double *vcp1, const double *v1)
812  {
813  int Nvcd = m_Nvc * m_Nd;
814 
815  int id1 = 0;
816  int id2 = m_Nvc;
817  int id3 = m_Nvc * 2;
818  int id4 = m_Nvc * 3;
819 
820  int idir = 1;
821 
822  int isite = m_arg[itask].isite;
823  int isite_cp = m_arg[itask].isite_cp_y;
824 
825  const double *w1 = &v1[Nvcd * isite];
826  double *w2 = &vcp1[Nvcd * isite_cp];
827  const double *u = m_U->ptr(m_Ndf * (isite + idir * m_Nvol));
828 
829  double vt1[m_Nvc], vt2[m_Nvc], vt3[m_Nvc], vt4[m_Nvc];
830 
831  int iy = m_Ny - 1;
832 
833  for (int it = 0; it < m_Mt; ++it) {
834  for (int iz = 0; iz < m_Mz; ++iz) {
835  for (int ix = 0; ix < m_Nx; ++ix) {
836  int is = ix + m_Nx * (iy + m_Ny * (iz + m_Nz * it));
837  int is2 = ix + m_Nx * (iz + m_Mz * it);
838  int in = Nvcd * is;
839  int ig = m_Ndf * is;
840  int ix1 = Nvcd * is2;
841  int ix2 = ix1 + m_Nvc;
842  int ix3 = ix2 + m_Nvc;
843  int ix4 = ix3 + m_Nvc;
844 
845  for (int ic = 0; ic < m_Nc; ++ic) {
846  int ic_r = 2 * ic;
847  int ic_i = 2 * ic + 1;
848 
849  vt1[ic_r] = m_r_s * w1[ic_r + id1 + in] - m_nu_s * w1[ic_r + id4 + in];
850  vt1[ic_i] = m_r_s * w1[ic_i + id1 + in] - m_nu_s * w1[ic_i + id4 + in];
851  vt2[ic_r] = m_r_s * w1[ic_r + id2 + in] + m_nu_s * w1[ic_r + id3 + in];
852  vt2[ic_i] = m_r_s * w1[ic_i + id2 + in] + m_nu_s * w1[ic_i + id3 + in];
853 
854  vt3[ic_r] = m_r_s * w1[ic_r + id3 + in] + m_nu_s * w1[ic_r + id2 + in];
855  vt3[ic_i] = m_r_s * w1[ic_i + id3 + in] + m_nu_s * w1[ic_i + id2 + in];
856  vt4[ic_r] = m_r_s * w1[ic_r + id4 + in] - m_nu_s * w1[ic_r + id1 + in];
857  vt4[ic_i] = m_r_s * w1[ic_i + id4 + in] - m_nu_s * w1[ic_i + id1 + in];
858  }
859 
860  for (int ic = 0; ic < m_Nc; ++ic) {
861  int ic2 = 2 * ic;
862 
863  int ic_r = 2 * ic;
864  int ic_i = 2 * ic + 1;
865 
866  w2[ic_r + ix1] = mult_udagv_r(&u[ic2 + ig], vt1, m_Nc);
867  w2[ic_i + ix1] = mult_udagv_i(&u[ic2 + ig], vt1, m_Nc);
868  w2[ic_r + ix2] = mult_udagv_r(&u[ic2 + ig], vt2, m_Nc);
869  w2[ic_i + ix2] = mult_udagv_i(&u[ic2 + ig], vt2, m_Nc);
870 
871  w2[ic_r + ix3] = mult_udagv_r(&u[ic2 + ig], vt3, m_Nc);
872  w2[ic_i + ix3] = mult_udagv_i(&u[ic2 + ig], vt3, m_Nc);
873  w2[ic_r + ix4] = mult_udagv_r(&u[ic2 + ig], vt4, m_Nc);
874  w2[ic_i + ix4] = mult_udagv_i(&u[ic2 + ig], vt4, m_Nc);
875  }
876  }
877  }
878  }
879  }
880 
881 
882 //====================================================================
884  int itask, double *v2, const double *vcp2)
885  {
886  int Nvcd = m_Nvc * m_Nd;
887 
888  int id1 = 0;
889  int id2 = m_Nvc;
890  int id3 = m_Nvc * 2;
891  int id4 = m_Nvc * 3;
892 
893  int idir = 1;
894  double bc2 = m_boundary2[idir];
895 
896  // double wt1_r,wt1_i, wt2_r,wt2_i;
897 
898  int isite = m_arg[itask].isite;
899  int isite_cp = m_arg[itask].isite_cp_y;
900 
901  const double *w1 = &vcp2[Nvcd * isite_cp];
902  double *w2 = &v2[Nvcd * isite];
903 
904  int iy = 0;
905 
906  for (int it = 0; it < m_Mt; ++it) {
907  for (int iz = 0; iz < m_Mz; ++iz) {
908  for (int ix = 0; ix < m_Nx; ++ix) {
909  int is = ix + m_Nx * (iy + m_Ny * (iz + m_Nz * it));
910  int is2 = ix + m_Nx * (iz + m_Mz * it);
911  int iv = Nvcd * is;
912  int ix1 = Nvcd * is2;
913  int ix2 = ix1 + m_Nvc;
914  int ix3 = ix2 + m_Nvc;
915  int ix4 = ix3 + m_Nvc;
916 
917  for (int ic = 0; ic < m_Nc; ++ic) {
918  int ic_r = 2 * ic;
919  int ic_i = 2 * ic + 1;
920 
921  w2[ic_r + id1 + iv] += bc2 * w1[ic_r + ix1];
922  w2[ic_i + id1 + iv] += bc2 * w1[ic_i + ix1];
923  w2[ic_r + id2 + iv] += bc2 * w1[ic_r + ix2];
924  w2[ic_i + id2 + iv] += bc2 * w1[ic_i + ix2];
925 
926  w2[ic_r + id3 + iv] += bc2 * w1[ic_r + ix3];
927  w2[ic_i + id3 + iv] += bc2 * w1[ic_i + ix3];
928  w2[ic_r + id4 + iv] += bc2 * w1[ic_r + ix4];
929  w2[ic_i + id4 + iv] += bc2 * w1[ic_i + ix4];
930  }
931  }
932  }
933  }
934  }
935 
936 
937 //====================================================================
939  int itask, double *v2, const double *v1)
940  {
941  int Nvcd = m_Nvc * m_Nd;
942 
943  int id1 = 0;
944  int id2 = m_Nvc;
945  int id3 = m_Nvc * 2;
946  int id4 = m_Nvc * 3;
947 
948  int idir = 1;
949 
950  double vt1[m_Nvc], vt2[m_Nvc], vt3[m_Nvc], vt4[m_Nvc];
951  double wt1_r, wt1_i, wt2_r, wt2_i, wt3_r, wt3_i, wt4_r, wt4_i;
952 
953  int isite = m_arg[itask].isite;
954 
955  const double *w1 = &v1[Nvcd * isite];
956  double *w2 = &v2[Nvcd * isite];
957  const double *u = m_U->ptr(m_Ndf * (isite + idir * m_Nvol));
958 
959  for (int it = 0; it < m_Mt; ++it) {
960  for (int iz = 0; iz < m_Mz; ++iz) {
961  for (int iy = 1; iy < m_Ny; ++iy) {
962  for (int ix = 0; ix < m_Nx; ++ix) {
963  int is = ix + m_Nx * (iy + m_Ny * (iz + m_Nz * it));
964  int iv = Nvcd * is;
965  int in = Nvcd * (is - m_Nx);
966  int ig = m_Ndf * (is - m_Nx);
967 
968  for (int ic = 0; ic < m_Nc; ++ic) {
969  int ic_r = 2 * ic;
970  int ic_i = 2 * ic + 1;
971 
972  vt1[ic_r] = m_r_s * w1[ic_r + id1 + in] - m_nu_s * w1[ic_r + id4 + in];
973  vt1[ic_i] = m_r_s * w1[ic_i + id1 + in] - m_nu_s * w1[ic_i + id4 + in];
974  vt2[ic_r] = m_r_s * w1[ic_r + id2 + in] + m_nu_s * w1[ic_r + id3 + in];
975  vt2[ic_i] = m_r_s * w1[ic_i + id2 + in] + m_nu_s * w1[ic_i + id3 + in];
976 
977  vt3[ic_r] = m_r_s * w1[ic_r + id3 + in] + m_nu_s * w1[ic_r + id2 + in];
978  vt3[ic_i] = m_r_s * w1[ic_i + id3 + in] + m_nu_s * w1[ic_i + id2 + in];
979  vt4[ic_r] = m_r_s * w1[ic_r + id4 + in] - m_nu_s * w1[ic_r + id1 + in];
980  vt4[ic_i] = m_r_s * w1[ic_i + id4 + in] - m_nu_s * w1[ic_i + id1 + in];
981  }
982 
983  for (int ic = 0; ic < m_Nc; ++ic) {
984  int ic2 = 2 * ic;
985 
986  wt1_r = mult_udagv_r(&u[ic2 + ig], vt1, m_Nc);
987  wt1_i = mult_udagv_i(&u[ic2 + ig], vt1, m_Nc);
988  wt2_r = mult_udagv_r(&u[ic2 + ig], vt2, m_Nc);
989  wt2_i = mult_udagv_i(&u[ic2 + ig], vt2, m_Nc);
990 
991  wt3_r = mult_udagv_r(&u[ic2 + ig], vt3, m_Nc);
992  wt3_i = mult_udagv_i(&u[ic2 + ig], vt3, m_Nc);
993  wt4_r = mult_udagv_r(&u[ic2 + ig], vt4, m_Nc);
994  wt4_i = mult_udagv_i(&u[ic2 + ig], vt4, m_Nc);
995 
996  int ic_r = 2 * ic;
997  int ic_i = 2 * ic + 1;
998 
999  w2[ic_r + id1 + iv] += wt1_r;
1000  w2[ic_i + id1 + iv] += wt1_i;
1001  w2[ic_r + id2 + iv] += wt2_r;
1002  w2[ic_i + id2 + iv] += wt2_i;
1003 
1004  w2[ic_r + id3 + iv] += wt3_r;
1005  w2[ic_i + id3 + iv] += wt3_i;
1006  w2[ic_r + id4 + iv] += wt4_r;
1007  w2[ic_i + id4 + iv] += wt4_i;
1008  }
1009  }
1010  }
1011  }
1012  }
1013  }
1014 
1015 
1016 //====================================================================
1018  int itask, double *vcp1, const double *v1)
1019  {
1020  int Nvcd = m_Nvc * m_Nd;
1021 
1022  int id1 = 0;
1023  int id2 = m_Nvc;
1024  int id3 = m_Nvc * 2;
1025  int id4 = m_Nvc * 3;
1026 
1027  int isite = m_arg[itask].isite;
1028  int isite_cp = m_arg[itask].isite_cp_z;
1029 
1030  const double *w1 = &v1[Nvcd * isite];
1031  double *w2 = &vcp1[Nvcd * isite_cp];
1032 
1033  int idir = 2;
1034  double bc2 = m_boundary2[idir];
1035 
1036  if (m_arg[itask].kz0 == 1) {
1037  int Nxy = m_Nx * m_Ny;
1038  int iz = 0;
1039  for (int it = 0; it < m_Mt; ++it) {
1040  for (int ixy = 0; ixy < Nxy; ++ixy) {
1041  int is = ixy + Nxy * (iz + m_Nz * it);
1042  int is2 = ixy + Nxy * it;
1043 
1044  int in = Nvcd * is;
1045  int ix1 = Nvcd * is2;
1046  int ix2 = ix1 + m_Nvc;
1047  int ix3 = ix2 + m_Nvc;
1048  int ix4 = ix3 + m_Nvc;
1049 
1050  for (int ic = 0; ic < m_Nc; ++ic) {
1051  int ic_r = 2 * ic;
1052  int ic_i = 2 * ic + 1;
1053 
1054  w2[ic_r + ix1] = bc2 * (m_r_s * w1[ic_r + id1 + in] - m_nu_s * w1[ic_i + id3 + in]);
1055  w2[ic_i + ix1] = bc2 * (m_r_s * w1[ic_i + id1 + in] + m_nu_s * w1[ic_r + id3 + in]);
1056  w2[ic_r + ix2] = bc2 * (m_r_s * w1[ic_r + id2 + in] + m_nu_s * w1[ic_i + id4 + in]);
1057  w2[ic_i + ix2] = bc2 * (m_r_s * w1[ic_i + id2 + in] - m_nu_s * w1[ic_r + id4 + in]);
1058 
1059  w2[ic_r + ix3] = bc2 * (m_r_s * w1[ic_r + id3 + in] + m_nu_s * w1[ic_i + id1 + in]);
1060  w2[ic_i + ix3] = bc2 * (m_r_s * w1[ic_i + id3 + in] - m_nu_s * w1[ic_r + id1 + in]);
1061  w2[ic_r + ix4] = bc2 * (m_r_s * w1[ic_r + id4 + in] - m_nu_s * w1[ic_i + id2 + in]);
1062  w2[ic_i + ix4] = bc2 * (m_r_s * w1[ic_i + id4 + in] + m_nu_s * w1[ic_r + id2 + in]);
1063  }
1064  }
1065  }
1066  }
1067  }
1068 
1069 
1070 //====================================================================
1072  int itask, double *v2, const double *vcp2)
1073  {
1074  int Nvcd = m_Nvc * m_Nd;
1075 
1076  int id1 = 0;
1077  int id2 = m_Nvc;
1078  int id3 = m_Nvc * 2;
1079  int id4 = m_Nvc * 3;
1080 
1081  int idir = 2;
1082 
1083  double wt1_r, wt1_i, wt2_r, wt2_i, wt3_r, wt3_i, wt4_r, wt4_i;
1084 
1085  int isite = m_arg[itask].isite;
1086  int isite_cp = m_arg[itask].isite_cp_z;
1087 
1088  const double *w1 = &vcp2[Nvcd * isite_cp];
1089  double *w2 = &v2[Nvcd * isite];
1090  const double *u = m_U->ptr(m_Ndf * (isite + idir * m_Nvol));
1091 
1092  if (m_arg[itask].kz1 == 1) {
1093  int Nxy = m_Nx * m_Ny;
1094  int iz = m_Mz - 1;
1095  for (int it = 0; it < m_Mt; ++it) {
1096  for (int ixy = 0; ixy < Nxy; ++ixy) {
1097  int is = ixy + Nxy * (iz + m_Nz * it);
1098  int is2 = ixy + Nxy * it;
1099  int iv = Nvcd * is;
1100  int ig = m_Ndf * is;
1101  int ix1 = Nvcd * is2;
1102  int ix2 = ix1 + m_Nvc;
1103  int ix3 = ix2 + m_Nvc;
1104  int ix4 = ix3 + m_Nvc;
1105 
1106  for (int ic = 0; ic < m_Nc; ++ic) {
1107  int ic2 = ic * m_Nvc;
1108 
1109  wt1_r = mult_uv_r(&u[ic2 + ig], &w1[ix1], m_Nc);
1110  wt1_i = mult_uv_i(&u[ic2 + ig], &w1[ix1], m_Nc);
1111  wt2_r = mult_uv_r(&u[ic2 + ig], &w1[ix2], m_Nc);
1112  wt2_i = mult_uv_i(&u[ic2 + ig], &w1[ix2], m_Nc);
1113 
1114  wt3_r = mult_uv_r(&u[ic2 + ig], &w1[ix3], m_Nc);
1115  wt3_i = mult_uv_i(&u[ic2 + ig], &w1[ix3], m_Nc);
1116  wt4_r = mult_uv_r(&u[ic2 + ig], &w1[ix4], m_Nc);
1117  wt4_i = mult_uv_i(&u[ic2 + ig], &w1[ix4], m_Nc);
1118 
1119  int ic_r = 2 * ic;
1120  int ic_i = 2 * ic + 1;
1121 
1122  w2[ic_r + id1 + iv] += wt1_r;
1123  w2[ic_i + id1 + iv] += wt1_i;
1124  w2[ic_r + id2 + iv] += wt2_r;
1125  w2[ic_i + id2 + iv] += wt2_i;
1126 
1127  w2[ic_r + id3 + iv] += wt3_r;
1128  w2[ic_i + id3 + iv] += wt3_i;
1129  w2[ic_r + id4 + iv] += wt4_r;
1130  w2[ic_i + id4 + iv] += wt4_i;
1131  }
1132  }
1133  }
1134  }
1135  }
1136 
1137 
1138 //====================================================================
1140  int itask, double *v2, const double *v1)
1141  {
1142  int Nvcd = m_Nvc * m_Nd;
1143 
1144  int id1 = 0;
1145  int id2 = m_Nvc;
1146  int id3 = m_Nvc * 2;
1147  int id4 = m_Nvc * 3;
1148 
1149  int idir = 2;
1150 
1151  double vt1[m_Nvc], vt2[m_Nvc], vt3[m_Nvc], vt4[m_Nvc];
1152  double wt1_r, wt1_i, wt2_r, wt2_i, wt3_r, wt3_i, wt4_r, wt4_i;
1153 
1154  int isite = m_arg[itask].isite;
1155 
1156  const double *w1 = &v1[Nvcd * isite];
1157  double *w2 = &v2[Nvcd * isite];
1158  const double *u = m_U->ptr(m_Ndf * (isite + idir * m_Nvol));
1159 
1160  int kz1 = m_arg[itask].kz1;
1161  int Nxy = m_Nx * m_Ny;
1162 
1163  for (int it = 0; it < m_Mt; ++it) {
1164  for (int iz = 0; iz < m_Mz - kz1; ++iz) {
1165  for (int ixy = 0; ixy < Nxy; ++ixy) {
1166  int is = ixy + Nxy * (iz + m_Nz * it);
1167  int iv = Nvcd * is;
1168  int in = Nvcd * (is + Nxy);
1169  int ig = m_Ndf * is;
1170 
1171  for (int ic = 0; ic < m_Nc; ++ic) {
1172  int ic_r = 2 * ic;
1173  int ic_i = 2 * ic + 1;
1174 
1175  vt1[ic_r] = m_r_s * w1[ic_r + id1 + in] - m_nu_s * w1[ic_i + id3 + in];
1176  vt1[ic_i] = m_r_s * w1[ic_i + id1 + in] + m_nu_s * w1[ic_r + id3 + in];
1177  vt2[ic_r] = m_r_s * w1[ic_r + id2 + in] + m_nu_s * w1[ic_i + id4 + in];
1178  vt2[ic_i] = m_r_s * w1[ic_i + id2 + in] - m_nu_s * w1[ic_r + id4 + in];
1179 
1180  vt3[ic_r] = m_r_s * w1[ic_r + id3 + in] + m_nu_s * w1[ic_i + id1 + in];
1181  vt3[ic_i] = m_r_s * w1[ic_i + id3 + in] - m_nu_s * w1[ic_r + id1 + in];
1182  vt4[ic_r] = m_r_s * w1[ic_r + id4 + in] - m_nu_s * w1[ic_i + id2 + in];
1183  vt4[ic_i] = m_r_s * w1[ic_i + id4 + in] + m_nu_s * w1[ic_r + id2 + in];
1184  }
1185 
1186  for (int ic = 0; ic < m_Nc; ++ic) {
1187  int ic2 = ic * m_Nvc;
1188 
1189  wt1_r = mult_uv_r(&u[ic2 + ig], vt1, m_Nc);
1190  wt1_i = mult_uv_i(&u[ic2 + ig], vt1, m_Nc);
1191  wt2_r = mult_uv_r(&u[ic2 + ig], vt2, m_Nc);
1192  wt2_i = mult_uv_i(&u[ic2 + ig], vt2, m_Nc);
1193 
1194  wt3_r = mult_uv_r(&u[ic2 + ig], vt3, m_Nc);
1195  wt3_i = mult_uv_i(&u[ic2 + ig], vt3, m_Nc);
1196  wt4_r = mult_uv_r(&u[ic2 + ig], vt4, m_Nc);
1197  wt4_i = mult_uv_i(&u[ic2 + ig], vt4, m_Nc);
1198 
1199  int ic_r = 2 * ic;
1200  int ic_i = 2 * ic + 1;
1201 
1202  w2[ic_r + id1 + iv] += wt1_r;
1203  w2[ic_i + id1 + iv] += wt1_i;
1204  w2[ic_r + id2 + iv] += wt2_r;
1205  w2[ic_i + id2 + iv] += wt2_i;
1206 
1207  w2[ic_r + id3 + iv] += wt3_r;
1208  w2[ic_i + id3 + iv] += wt3_i;
1209  w2[ic_r + id4 + iv] += wt4_r;
1210  w2[ic_i + id4 + iv] += wt4_i;
1211  }
1212  }
1213  }
1214  }
1215  }
1216 
1217 
1218 //====================================================================
1220  int itask, double *vcp1, const double *v1)
1221  {
1222  int Nvcd = m_Nvc * m_Nd;
1223 
1224  int id1 = 0;
1225  int id2 = m_Nvc;
1226  int id3 = m_Nvc * 2;
1227  int id4 = m_Nvc * 3;
1228 
1229  int idir = 2;
1230 
1231  int isite = m_arg[itask].isite;
1232  int isite_cp = m_arg[itask].isite_cp_z;
1233 
1234  const double *w1 = &v1[Nvcd * isite];
1235  double *w2 = &vcp1[Nvcd * isite_cp];
1236  const double *u = m_U->ptr(m_Ndf * (isite + idir * m_Nvol));
1237 
1238  double vt1[m_Nvc], vt2[m_Nvc], vt3[m_Nvc], vt4[m_Nvc];
1239 
1240  if (m_arg[itask].kz1 == 1) {
1241  int Nxy = m_Nx * m_Ny;
1242  int iz = m_Mz - 1;
1243  for (int it = 0; it < m_Mt; ++it) {
1244  for (int ixy = 0; ixy < Nxy; ++ixy) {
1245  int is = ixy + Nxy * (iz + m_Nz * it);
1246  int is2 = ixy + Nxy * it;
1247  int in = Nvcd * is;
1248  int ig = m_Ndf * is;
1249  int ix1 = Nvcd * is2;
1250  int ix2 = ix1 + m_Nvc;
1251  int ix3 = ix2 + m_Nvc;
1252  int ix4 = ix3 + m_Nvc;
1253 
1254  for (int ic = 0; ic < m_Nc; ++ic) {
1255  int ic_r = 2 * ic;
1256  int ic_i = 2 * ic + 1;
1257 
1258  vt1[ic_r] = m_r_s * w1[ic_r + id1 + in] + m_nu_s * w1[ic_i + id3 + in];
1259  vt1[ic_i] = m_r_s * w1[ic_i + id1 + in] - m_nu_s * w1[ic_r + id3 + in];
1260  vt2[ic_r] = m_r_s * w1[ic_r + id2 + in] - m_nu_s * w1[ic_i + id4 + in];
1261  vt2[ic_i] = m_r_s * w1[ic_i + id2 + in] + m_nu_s * w1[ic_r + id4 + in];
1262 
1263  vt3[ic_r] = m_r_s * w1[ic_r + id3 + in] - m_nu_s * w1[ic_i + id1 + in];
1264  vt3[ic_i] = m_r_s * w1[ic_i + id3 + in] + m_nu_s * w1[ic_r + id1 + in];
1265  vt4[ic_r] = m_r_s * w1[ic_r + id4 + in] + m_nu_s * w1[ic_i + id2 + in];
1266  vt4[ic_i] = m_r_s * w1[ic_i + id4 + in] - m_nu_s * w1[ic_r + id2 + in];
1267  }
1268 
1269  for (int ic = 0; ic < m_Nc; ++ic) {
1270  int ic2 = 2 * ic;
1271 
1272  int ic_r = 2 * ic;
1273  int ic_i = 2 * ic + 1;
1274 
1275  w2[ic_r + ix1] = mult_udagv_r(&u[ic2 + ig], vt1, m_Nc);
1276  w2[ic_i + ix1] = mult_udagv_i(&u[ic2 + ig], vt1, m_Nc);
1277  w2[ic_r + ix2] = mult_udagv_r(&u[ic2 + ig], vt2, m_Nc);
1278  w2[ic_i + ix2] = mult_udagv_i(&u[ic2 + ig], vt2, m_Nc);
1279 
1280  w2[ic_r + ix3] = mult_udagv_r(&u[ic2 + ig], vt3, m_Nc);
1281  w2[ic_i + ix3] = mult_udagv_i(&u[ic2 + ig], vt3, m_Nc);
1282  w2[ic_r + ix4] = mult_udagv_r(&u[ic2 + ig], vt4, m_Nc);
1283  w2[ic_i + ix4] = mult_udagv_i(&u[ic2 + ig], vt4, m_Nc);
1284  }
1285  }
1286  }
1287  }
1288  }
1289 
1290 
1291 //====================================================================
1293  int itask, double *v2, const double *vcp2)
1294  {
1295  int Nvcd = m_Nvc * m_Nd;
1296 
1297  int id1 = 0;
1298  int id2 = m_Nvc;
1299  int id3 = m_Nvc * 2;
1300  int id4 = m_Nvc * 3;
1301 
1302  int idir = 2;
1303  double bc2 = m_boundary2[idir];
1304 
1305  // double wt1_r,wt1_i, wt2_r,wt2_i;
1306 
1307  int isite = m_arg[itask].isite;
1308  int isite_cp = m_arg[itask].isite_cp_z;
1309 
1310  const double *w1 = &vcp2[Nvcd * isite_cp];
1311  double *w2 = &v2[Nvcd * isite];
1312 
1313  if (m_arg[itask].kz0 == 1) {
1314  int Nxy = m_Nx * m_Ny;
1315 
1316  int iz = 0;
1317  for (int it = 0; it < m_Mt; ++it) {
1318  for (int ixy = 0; ixy < Nxy; ++ixy) {
1319  int is = ixy + Nxy * (iz + m_Nz * it);
1320  int is2 = ixy + Nxy * it;
1321  int iv = Nvcd * is;
1322  int ix1 = Nvcd * is2;
1323  int ix2 = ix1 + m_Nvc;
1324  int ix3 = ix2 + m_Nvc;
1325  int ix4 = ix3 + m_Nvc;
1326 
1327  for (int ic = 0; ic < m_Nc; ++ic) {
1328  int ic_r = 2 * ic;
1329  int ic_i = 2 * ic + 1;
1330 
1331  w2[ic_r + id1 + iv] += bc2 * w1[ic_r + ix1];
1332  w2[ic_i + id1 + iv] += bc2 * w1[ic_i + ix1];
1333  w2[ic_r + id2 + iv] += bc2 * w1[ic_r + ix2];
1334  w2[ic_i + id2 + iv] += bc2 * w1[ic_i + ix2];
1335 
1336  w2[ic_r + id3 + iv] += bc2 * w1[ic_r + ix3];
1337  w2[ic_i + id3 + iv] += bc2 * w1[ic_i + ix3];
1338  w2[ic_r + id4 + iv] += bc2 * w1[ic_r + ix4];
1339  w2[ic_i + id4 + iv] += bc2 * w1[ic_i + ix4];
1340  }
1341  }
1342  }
1343  }
1344  }
1345 
1346 
1347 //====================================================================
1349  int itask, double *v2, const double *v1)
1350  {
1351  int Nvcd = m_Nvc * m_Nd;
1352 
1353  int id1 = 0;
1354  int id2 = m_Nvc;
1355  int id3 = m_Nvc * 2;
1356  int id4 = m_Nvc * 3;
1357 
1358  int idir = 2;
1359 
1360  double vt1[m_Nvc], vt2[m_Nvc], vt3[m_Nvc], vt4[m_Nvc];
1361  double wt1_r, wt1_i, wt2_r, wt2_i, wt3_r, wt3_i, wt4_r, wt4_i;
1362 
1363  int isite = m_arg[itask].isite;
1364 
1365  const double *w1 = &v1[Nvcd * isite];
1366  double *w2 = &v2[Nvcd * isite];
1367  const double *u = m_U->ptr(m_Ndf * (isite + idir * m_Nvol));
1368 
1369  int kz0 = m_arg[itask].kz0;
1370  int Nxy = m_Nx * m_Ny;
1371 
1372  for (int it = 0; it < m_Mt; ++it) {
1373  for (int iz = kz0; iz < m_Mz; ++iz) {
1374  for (int ixy = 0; ixy < Nxy; ++ixy) {
1375  int is = ixy + Nxy * (iz + m_Nz * it);
1376  int iv = Nvcd * is;
1377  int in = Nvcd * (is - Nxy);
1378  int ig = m_Ndf * (is - Nxy);
1379 
1380  for (int ic = 0; ic < m_Nc; ++ic) {
1381  int ic_r = 2 * ic;
1382  int ic_i = 2 * ic + 1;
1383 
1384  vt1[ic_r] = m_r_s * w1[ic_r + id1 + in] + m_nu_s * w1[ic_i + id3 + in];
1385  vt1[ic_i] = m_r_s * w1[ic_i + id1 + in] - m_nu_s * w1[ic_r + id3 + in];
1386  vt2[ic_r] = m_r_s * w1[ic_r + id2 + in] - m_nu_s * w1[ic_i + id4 + in];
1387  vt2[ic_i] = m_r_s * w1[ic_i + id2 + in] + m_nu_s * w1[ic_r + id4 + in];
1388 
1389  vt3[ic_r] = m_r_s * w1[ic_r + id3 + in] - m_nu_s * w1[ic_i + id1 + in];
1390  vt3[ic_i] = m_r_s * w1[ic_i + id3 + in] + m_nu_s * w1[ic_r + id1 + in];
1391  vt4[ic_r] = m_r_s * w1[ic_r + id4 + in] + m_nu_s * w1[ic_i + id2 + in];
1392  vt4[ic_i] = m_r_s * w1[ic_i + id4 + in] - m_nu_s * w1[ic_r + id2 + in];
1393  }
1394 
1395  for (int ic = 0; ic < m_Nc; ++ic) {
1396  int ic2 = 2 * ic;
1397 
1398  wt1_r = mult_udagv_r(&u[ic2 + ig], vt1, m_Nc);
1399  wt1_i = mult_udagv_i(&u[ic2 + ig], vt1, m_Nc);
1400  wt2_r = mult_udagv_r(&u[ic2 + ig], vt2, m_Nc);
1401  wt2_i = mult_udagv_i(&u[ic2 + ig], vt2, m_Nc);
1402 
1403  wt3_r = mult_udagv_r(&u[ic2 + ig], vt3, m_Nc);
1404  wt3_i = mult_udagv_i(&u[ic2 + ig], vt3, m_Nc);
1405  wt4_r = mult_udagv_r(&u[ic2 + ig], vt4, m_Nc);
1406  wt4_i = mult_udagv_i(&u[ic2 + ig], vt4, m_Nc);
1407 
1408  int ic_r = 2 * ic;
1409  int ic_i = 2 * ic + 1;
1410 
1411  w2[ic_r + id1 + iv] += wt1_r;
1412  w2[ic_i + id1 + iv] += wt1_i;
1413  w2[ic_r + id2 + iv] += wt2_r;
1414  w2[ic_i + id2 + iv] += wt2_i;
1415 
1416  w2[ic_r + id3 + iv] += wt3_r;
1417  w2[ic_i + id3 + iv] += wt3_i;
1418  w2[ic_r + id4 + iv] += wt4_r;
1419  w2[ic_i + id4 + iv] += wt4_i;
1420  }
1421  }
1422  }
1423  }
1424  }
1425 
1426 
1427 //====================================================================
1429  int itask, double *vcp1, const double *v1)
1430  {
1431  int Nvc2 = 2 * m_Nvc;
1432  int Nvcd = m_Nvc * m_Nd;
1433  int Nvcd2 = Nvcd / 2;
1434 
1435  // int id1 = 0;
1436  // int id2 = m_Nvc;
1437  int id3 = m_Nvc * 2;
1438  int id4 = m_Nvc * 3;
1439 
1440  int isite = m_arg[itask].isite;
1441  int isite_cp = m_arg[itask].isite_cp_t;
1442 
1443  const double *w1 = &v1[Nvcd * isite];
1444  double *w2 = &vcp1[Nvcd2 * isite_cp];
1445 
1446  int idir = 3;
1447  double bc2 = m_boundary2[idir];
1448 
1449  if (m_arg[itask].kt0 == 1) {
1450  int Nxy = m_Nx * m_Ny;
1451  int it = 0;
1452  for (int iz = 0; iz < m_Mz; ++iz) {
1453  for (int ixy = 0; ixy < Nxy; ++ixy) {
1454  int is = ixy + Nxy * (iz + m_Nz * it);
1455  int is2 = ixy + Nxy * iz;
1456 
1457  int in = Nvcd * is;
1458  int ix1 = Nvc2 * is2;
1459  int ix2 = ix1 + m_Nvc;
1460 
1461  for (int ic = 0; ic < m_Nc; ++ic) {
1462  int ic_r = 2 * ic;
1463  int ic_i = 2 * ic + 1;
1464 
1465  w2[ic_r + ix1] = 2.0 * bc2 * w1[ic_r + id3 + in];
1466  w2[ic_i + ix1] = 2.0 * bc2 * w1[ic_i + id3 + in];
1467  w2[ic_r + ix2] = 2.0 * bc2 * w1[ic_r + id4 + in];
1468  w2[ic_i + ix2] = 2.0 * bc2 * w1[ic_i + id4 + in];
1469  }
1470  }
1471  }
1472  }
1473  }
1474 
1475 
1476 //====================================================================
1478  int itask, double *v2, const double *vcp2)
1479  {
1480  int Nvc2 = 2 * m_Nvc;
1481  int Nvcd = m_Nvc * m_Nd;
1482  int Nvcd2 = Nvcd / 2;
1483 
1484  // int id1 = 0;
1485  // int id2 = m_Nvc;
1486  int id3 = m_Nvc * 2;
1487  int id4 = m_Nvc * 3;
1488 
1489  int idir = 3;
1490 
1491  double wt1_r, wt1_i, wt2_r, wt2_i;
1492 
1493  int isite = m_arg[itask].isite;
1494  int isite_cp = m_arg[itask].isite_cp_t;
1495 
1496  const double *w1 = &vcp2[Nvcd2 * isite_cp];
1497  double *w2 = &v2[Nvcd * isite];
1498  const double *u = m_U->ptr(m_Ndf * (isite + idir * m_Nvol));
1499 
1500  if (m_arg[itask].kt1 == 1) {
1501  int Nxy = m_Nx * m_Ny;
1502  int it = m_Mt - 1;
1503  for (int iz = 0; iz < m_Mz; ++iz) {
1504  for (int ixy = 0; ixy < Nxy; ++ixy) {
1505  int is = ixy + Nxy * (iz + m_Nz * it);
1506  int is2 = ixy + Nxy * iz;
1507  int iv = Nvcd * is;
1508  int ig = m_Ndf * is;
1509  int ix1 = Nvc2 * is2;
1510  int ix2 = ix1 + m_Nvc;
1511 
1512  for (int ic = 0; ic < m_Nc; ++ic) {
1513  int ic2 = ic * m_Nvc;
1514 
1515  wt1_r = mult_uv_r(&u[ic2 + ig], &w1[ix1], m_Nc);
1516  wt1_i = mult_uv_i(&u[ic2 + ig], &w1[ix1], m_Nc);
1517  wt2_r = mult_uv_r(&u[ic2 + ig], &w1[ix2], m_Nc);
1518  wt2_i = mult_uv_i(&u[ic2 + ig], &w1[ix2], m_Nc);
1519 
1520  int ic_r = 2 * ic;
1521  int ic_i = 2 * ic + 1;
1522 
1523  w2[ic_r + id3 + iv] += wt1_r;
1524  w2[ic_i + id3 + iv] += wt1_i;
1525  w2[ic_r + id4 + iv] += wt2_r;
1526  w2[ic_i + id4 + iv] += wt2_i;
1527  }
1528  }
1529  }
1530  }
1531  }
1532 
1533 
1534 //====================================================================
1536  int itask, double *v2, const double *v1)
1537  {
1538  int Nvcd = m_Nvc * m_Nd;
1539 
1540  // int id1 = 0;
1541  // int id2 = m_Nvc;
1542  int id3 = m_Nvc * 2;
1543  int id4 = m_Nvc * 3;
1544 
1545  int idir = 3;
1546 
1547  double vt1[m_Nvc], vt2[m_Nvc];
1548  double wt1_r, wt1_i, wt2_r, wt2_i;
1549 
1550  int isite = m_arg[itask].isite;
1551 
1552  const double *w1 = &v1[Nvcd * isite];
1553  double *w2 = &v2[Nvcd * isite];
1554  const double *u = m_U->ptr(m_Ndf * (isite + idir * m_Nvol));
1555 
1556  int kt1 = m_arg[itask].kt1;
1557  int Nxy = m_Nx * m_Ny;
1558  int Nxyz = Nxy * m_Nz;
1559 
1560  for (int it = 0; it < m_Mt - kt1; ++it) {
1561  for (int iz = 0; iz < m_Mz; ++iz) {
1562  for (int ixy = 0; ixy < Nxy; ++ixy) {
1563  int is = ixy + Nxy * (iz + m_Nz * it);
1564  int iv = Nvcd * is;
1565  int in = Nvcd * (is + Nxyz);
1566  int ig = m_Ndf * is;
1567 
1568  for (int ic = 0; ic < m_Nc; ++ic) {
1569  int ic_r = 2 * ic;
1570  int ic_i = 2 * ic + 1;
1571 
1572  vt1[ic_r] = 2.0 * w1[ic_r + id3 + in];
1573  vt1[ic_i] = 2.0 * w1[ic_i + id3 + in];
1574  vt2[ic_r] = 2.0 * w1[ic_r + id4 + in];
1575  vt2[ic_i] = 2.0 * w1[ic_i + id4 + in];
1576  }
1577 
1578  for (int ic = 0; ic < m_Nc; ++ic) {
1579  int ic2 = ic * m_Nvc;
1580 
1581  wt1_r = mult_uv_r(&u[ic2 + ig], vt1, m_Nc);
1582  wt1_i = mult_uv_i(&u[ic2 + ig], vt1, m_Nc);
1583  wt2_r = mult_uv_r(&u[ic2 + ig], vt2, m_Nc);
1584  wt2_i = mult_uv_i(&u[ic2 + ig], vt2, m_Nc);
1585 
1586  int ic_r = 2 * ic;
1587  int ic_i = 2 * ic + 1;
1588 
1589  w2[ic_r + id3 + iv] += wt1_r;
1590  w2[ic_i + id3 + iv] += wt1_i;
1591  w2[ic_r + id4 + iv] += wt2_r;
1592  w2[ic_i + id4 + iv] += wt2_i;
1593  }
1594  }
1595  }
1596  }
1597  }
1598 
1599 
1600 //====================================================================
1602  int itask, double *vcp1, const double *v1)
1603  {
1604  int Nvc2 = 2 * m_Nvc;
1605  int Nvcd = m_Nvc * m_Nd;
1606  int Nvcd2 = Nvcd / 2;
1607 
1608  int id1 = 0;
1609  int id2 = m_Nvc;
1610  // int id3 = m_Nvc * 2;
1611  // int id4 = m_Nvc * 3;
1612 
1613  int idir = 3;
1614 
1615  int isite = m_arg[itask].isite;
1616  int isite_cp = m_arg[itask].isite_cp_t;
1617 
1618  const double *w1 = &v1[Nvcd * isite];
1619  double *w2 = &vcp1[Nvcd2 * isite_cp];
1620  const double *u = m_U->ptr(m_Ndf * (isite + idir * m_Nvol));
1621 
1622  double vt1[m_Nvc], vt2[m_Nvc];
1623 
1624  if (m_arg[itask].kt1 == 1) {
1625  int Nxy = m_Nx * m_Ny;
1626  int it = m_Mt - 1;
1627  for (int iz = 0; iz < m_Mz; ++iz) {
1628  for (int ixy = 0; ixy < Nxy; ++ixy) {
1629  int is = ixy + Nxy * (iz + m_Nz * it);
1630  int is2 = ixy + Nxy * iz;
1631  int in = Nvcd * is;
1632  int ig = m_Ndf * is;
1633  int ix1 = Nvc2 * is2;
1634  int ix2 = ix1 + m_Nvc;
1635 
1636  for (int ic = 0; ic < m_Nc; ++ic) {
1637  int ic_r = 2 * ic;
1638  int ic_i = 2 * ic + 1;
1639 
1640  vt1[ic_r] = 2.0 * w1[ic_r + id1 + in];
1641  vt1[ic_i] = 2.0 * w1[ic_i + id1 + in];
1642  vt2[ic_r] = 2.0 * w1[ic_r + id2 + in];
1643  vt2[ic_i] = 2.0 * w1[ic_i + id2 + in];
1644  }
1645 
1646  for (int ic = 0; ic < m_Nc; ++ic) {
1647  int ic2 = 2 * ic;
1648 
1649  int ic_r = 2 * ic;
1650  int ic_i = 2 * ic + 1;
1651 
1652  w2[ic_r + ix1] = mult_udagv_r(&u[ic2 + ig], vt1, m_Nc);
1653  w2[ic_i + ix1] = mult_udagv_i(&u[ic2 + ig], vt1, m_Nc);
1654  w2[ic_r + ix2] = mult_udagv_r(&u[ic2 + ig], vt2, m_Nc);
1655  w2[ic_i + ix2] = mult_udagv_i(&u[ic2 + ig], vt2, m_Nc);
1656  }
1657  }
1658  }
1659  }
1660  }
1661 
1662 
1663 //====================================================================
1665  int itask, double *v2, const double *vcp2)
1666  {
1667  int Nvc2 = 2 * m_Nvc;
1668  int Nvcd = m_Nvc * m_Nd;
1669  int Nvcd2 = Nvcd / 2;
1670 
1671  int id1 = 0;
1672  int id2 = m_Nvc;
1673  // int id3 = m_Nvc * 2;
1674  // int id4 = m_Nvc * 3;
1675 
1676  int idir = 3;
1677  double bc2 = m_boundary2[idir];
1678 
1679  // double wt1_r,wt1_i, wt2_r,wt2_i;
1680 
1681  int isite = m_arg[itask].isite;
1682  int isite_cp = m_arg[itask].isite_cp_t;
1683 
1684  const double *w1 = &vcp2[Nvcd2 * isite_cp];
1685  double *w2 = &v2[Nvcd * isite];
1686 
1687  if (m_arg[itask].kt0 == 1) {
1688  int Nxy = m_Nx * m_Ny;
1689  int it = 0;
1690  for (int iz = 0; iz < m_Mz; ++iz) {
1691  for (int ixy = 0; ixy < Nxy; ++ixy) {
1692  int is = ixy + Nxy * (iz + m_Nz * it);
1693  int is2 = ixy + Nxy * iz;
1694  int iv = Nvcd * is;
1695  int ix1 = Nvc2 * is2;
1696  int ix2 = ix1 + m_Nvc;
1697 
1698  for (int ic = 0; ic < m_Nc; ++ic) {
1699  int ic_r = 2 * ic;
1700  int ic_i = 2 * ic + 1;
1701 
1702  w2[ic_r + id1 + iv] += bc2 * w1[ic_r + ix1];
1703  w2[ic_i + id1 + iv] += bc2 * w1[ic_i + ix1];
1704  w2[ic_r + id2 + iv] += bc2 * w1[ic_r + ix2];
1705  w2[ic_i + id2 + iv] += bc2 * w1[ic_i + ix2];
1706  }
1707  }
1708  }
1709  }
1710  }
1711 
1712 
1713 //====================================================================
1715  int itask, double *v2, const double *v1)
1716  {
1717  int Nvcd = m_Nvc * m_Nd;
1718 
1719  int id1 = 0;
1720  int id2 = m_Nvc;
1721  // int id3 = m_Nvc * 2;
1722  // int id4 = m_Nvc * 3;
1723 
1724  int idir = 3;
1725 
1726  double vt1[m_Nvc], vt2[m_Nvc];
1727  double wt1_r, wt1_i, wt2_r, wt2_i;
1728 
1729  int isite = m_arg[itask].isite;
1730 
1731  const double *w1 = &v1[Nvcd * isite];
1732  double *w2 = &v2[Nvcd * isite];
1733  const double *u = m_U->ptr(m_Ndf * (isite + idir * m_Nvol));
1734 
1735  int kt0 = m_arg[itask].kt0;
1736  int Nxy = m_Nx * m_Ny;
1737  int Nxyz = Nxy * m_Nz;
1738 
1739  for (int it = kt0; it < m_Mt; ++it) {
1740  for (int iz = 0; iz < m_Mz; ++iz) {
1741  for (int ixy = 0; ixy < Nxy; ++ixy) {
1742  int is = ixy + Nxy * (iz + m_Nz * it);
1743  int iv = Nvcd * is;
1744  int in = Nvcd * (is - Nxyz);
1745  int ig = m_Ndf * (is - Nxyz);
1746 
1747  for (int ic = 0; ic < m_Nc; ++ic) {
1748  int ic_r = 2 * ic;
1749  int ic_i = 2 * ic + 1;
1750 
1751  vt1[ic_r] = 2.0 * w1[ic_r + id1 + in];
1752  vt1[ic_i] = 2.0 * w1[ic_i + id1 + in];
1753  vt2[ic_r] = 2.0 * w1[ic_r + id2 + in];
1754  vt2[ic_i] = 2.0 * w1[ic_i + id2 + in];
1755  }
1756 
1757  for (int ic = 0; ic < m_Nc; ++ic) {
1758  int ic2 = 2 * ic;
1759 
1760  wt1_r = mult_udagv_r(&u[ic2 + ig], vt1, m_Nc);
1761  wt1_i = mult_udagv_i(&u[ic2 + ig], vt1, m_Nc);
1762  wt2_r = mult_udagv_r(&u[ic2 + ig], vt2, m_Nc);
1763  wt2_i = mult_udagv_i(&u[ic2 + ig], vt2, m_Nc);
1764 
1765  int ic_r = 2 * ic;
1766  int ic_i = 2 * ic + 1;
1767 
1768  w2[ic_r + id1 + iv] += wt1_r;
1769  w2[ic_i + id1 + iv] += wt1_i;
1770  w2[ic_r + id2 + iv] += wt2_r;
1771  w2[ic_i + id2 + iv] += wt2_i;
1772  }
1773  }
1774  }
1775  }
1776  }
1777 
1778 
1779 //====================================================================
1781  int itask, double *vcp1, const double *v1)
1782  {
1783  int Nvc2 = 2 * m_Nvc;
1784  int Nvcd = m_Nvc * m_Nd;
1785  int Nvcd2 = Nvcd / 2;
1786 
1787  int id1 = 0;
1788  int id2 = m_Nvc;
1789  int id3 = m_Nvc * 2;
1790  int id4 = m_Nvc * 3;
1791 
1792  int isite = m_arg[itask].isite;
1793  int isite_cp = m_arg[itask].isite_cp_t;
1794 
1795  const double *w1 = &v1[Nvcd * isite];
1796  double *w2 = &vcp1[Nvcd2 * isite_cp];
1797 
1798  int idir = 3;
1799  double bc2 = m_boundary2[idir];
1800 
1801  if (m_arg[itask].kt0 == 1) {
1802  int Nxy = m_Nx * m_Ny;
1803  int it = 0;
1804  for (int iz = 0; iz < m_Mz; ++iz) {
1805  for (int ixy = 0; ixy < Nxy; ++ixy) {
1806  int is = ixy + Nxy * (iz + m_Nz * it);
1807  int is2 = ixy + Nxy * iz;
1808 
1809  int in = Nvcd * is;
1810  int ix1 = Nvc2 * is2;
1811  int ix2 = ix1 + m_Nvc;
1812 
1813  for (int ic = 0; ic < m_Nc; ++ic) {
1814  int ic_r = 2 * ic;
1815  int ic_i = 2 * ic + 1;
1816 
1817  w2[ic_r + ix1] = bc2 * (w1[ic_r + id1 + in] + w1[ic_r + id3 + in]);
1818  w2[ic_i + ix1] = bc2 * (w1[ic_i + id1 + in] + w1[ic_i + id3 + in]);
1819  w2[ic_r + ix2] = bc2 * (w1[ic_r + id2 + in] + w1[ic_r + id4 + in]);
1820  w2[ic_i + ix2] = bc2 * (w1[ic_i + id2 + in] + w1[ic_i + id4 + in]);
1821  }
1822  }
1823  }
1824  }
1825  }
1826 
1827 
1828 //====================================================================
1830  int itask, double *v2, const double *vcp2)
1831  {
1832  int Nvc2 = 2 * m_Nvc;
1833  int Nvcd = m_Nvc * m_Nd;
1834  int Nvcd2 = Nvcd / 2;
1835 
1836  int id1 = 0;
1837  int id2 = m_Nvc;
1838  int id3 = m_Nvc * 2;
1839  int id4 = m_Nvc * 3;
1840 
1841  int idir = 3;
1842 
1843  double wt1_r, wt1_i, wt2_r, wt2_i;
1844 
1845  int isite = m_arg[itask].isite;
1846  int isite_cp = m_arg[itask].isite_cp_t;
1847 
1848 
1849  const double *w1 = &vcp2[Nvcd2 * isite_cp];
1850  double *w2 = &v2[Nvcd * isite];
1851  const double *u = m_U->ptr(m_Ndf * (isite + idir * m_Nvol));
1852 
1853  if (m_arg[itask].kt1 == 1) {
1854  int Nxy = m_Nx * m_Ny;
1855  int it = m_Mt - 1;
1856  for (int iz = 0; iz < m_Mz; ++iz) {
1857  for (int ixy = 0; ixy < Nxy; ++ixy) {
1858  int is = ixy + Nxy * (iz + m_Nz * it);
1859  int is2 = ixy + Nxy * iz;
1860  int iv = Nvcd * is;
1861  int ig = m_Ndf * is;
1862  int ix1 = Nvc2 * is2;
1863  int ix2 = ix1 + m_Nvc;
1864 
1865  for (int ic = 0; ic < m_Nc; ++ic) {
1866  int ic2 = ic * m_Nvc;
1867 
1868  wt1_r = mult_uv_r(&u[ic2 + ig], &w1[ix1], m_Nc);
1869  wt1_i = mult_uv_i(&u[ic2 + ig], &w1[ix1], m_Nc);
1870  wt2_r = mult_uv_r(&u[ic2 + ig], &w1[ix2], m_Nc);
1871  wt2_i = mult_uv_i(&u[ic2 + ig], &w1[ix2], m_Nc);
1872 
1873  int ic_r = 2 * ic;
1874  int ic_i = 2 * ic + 1;
1875 
1876  w2[ic_r + id1 + iv] += wt1_r;
1877  w2[ic_i + id1 + iv] += wt1_i;
1878  w2[ic_r + id2 + iv] += wt2_r;
1879  w2[ic_i + id2 + iv] += wt2_i;
1880 
1881  w2[ic_r + id3 + iv] += wt1_r;
1882  w2[ic_i + id3 + iv] += wt1_i;
1883  w2[ic_r + id4 + iv] += wt2_r;
1884  w2[ic_i + id4 + iv] += wt2_i;
1885  }
1886  }
1887  }
1888  }
1889  }
1890 
1891 
1892 //====================================================================
1894  int itask, double *v2, const double *v1)
1895  {
1896  int Nvcd = m_Nvc * m_Nd;
1897 
1898  int id1 = 0;
1899  int id2 = m_Nvc;
1900  int id3 = m_Nvc * 2;
1901  int id4 = m_Nvc * 3;
1902 
1903  int idir = 3;
1904 
1905  double vt1[m_Nvc], vt2[m_Nvc];
1906  double wt1_r, wt1_i, wt2_r, wt2_i;
1907 
1908  int isite = m_arg[itask].isite;
1909 
1910  const double *w1 = &v1[Nvcd * isite];
1911  double *w2 = &v2[Nvcd * isite];
1912  const double *u = m_U->ptr(m_Ndf * (isite + idir * m_Nvol));
1913 
1914  int kt1 = m_arg[itask].kt1;
1915  int Nxy = m_Nx * m_Ny;
1916  int Nxyz = Nxy * m_Nz;
1917 
1918  for (int it = 0; it < m_Mt - kt1; ++it) {
1919  for (int iz = 0; iz < m_Mz; ++iz) {
1920  for (int ixy = 0; ixy < Nxy; ++ixy) {
1921  int is = ixy + Nxy * (iz + m_Nz * it);
1922  int iv = Nvcd * is;
1923  int in = Nvcd * (is + Nxyz);
1924  int ig = m_Ndf * is;
1925 
1926  for (int ic = 0; ic < m_Nc; ++ic) {
1927  int ic_r = 2 * ic;
1928  int ic_i = 2 * ic + 1;
1929 
1930  vt1[ic_r] = w1[ic_r + id1 + in] + w1[ic_r + id3 + in];
1931  vt1[ic_i] = w1[ic_i + id1 + in] + w1[ic_i + id3 + in];
1932  vt2[ic_r] = w1[ic_r + id2 + in] + w1[ic_r + id4 + in];
1933  vt2[ic_i] = w1[ic_i + id2 + in] + w1[ic_i + id4 + in];
1934  }
1935 
1936  for (int ic = 0; ic < m_Nc; ++ic) {
1937  int ic2 = ic * m_Nvc;
1938 
1939  wt1_r = mult_uv_r(&u[ic2 + ig], vt1, m_Nc);
1940  wt1_i = mult_uv_i(&u[ic2 + ig], vt1, m_Nc);
1941  wt2_r = mult_uv_r(&u[ic2 + ig], vt2, m_Nc);
1942  wt2_i = mult_uv_i(&u[ic2 + ig], vt2, m_Nc);
1943 
1944  int ic_r = 2 * ic;
1945  int ic_i = 2 * ic + 1;
1946 
1947  w2[ic_r + id1 + iv] += wt1_r;
1948  w2[ic_i + id1 + iv] += wt1_i;
1949  w2[ic_r + id2 + iv] += wt2_r;
1950  w2[ic_i + id2 + iv] += wt2_i;
1951 
1952  w2[ic_r + id3 + iv] += wt1_r;
1953  w2[ic_i + id3 + iv] += wt1_i;
1954  w2[ic_r + id4 + iv] += wt2_r;
1955  w2[ic_i + id4 + iv] += wt2_i;
1956  }
1957  }
1958  }
1959  }
1960  }
1961 
1962 
1963 //====================================================================
1965  int itask, double *vcp1, const double *v1)
1966  {
1967  int Nvc2 = 2 * m_Nvc;
1968  int Nvcd = m_Nvc * m_Nd;
1969  int Nvcd2 = Nvcd / 2;
1970 
1971  int id1 = 0;
1972  int id2 = m_Nvc;
1973  int id3 = m_Nvc * 2;
1974  int id4 = m_Nvc * 3;
1975 
1976  int idir = 3;
1977 
1978  int isite = m_arg[itask].isite;
1979  int isite_cp = m_arg[itask].isite_cp_t;
1980 
1981 
1982  const double *w1 = &v1[Nvcd * isite];
1983  double *w2 = &vcp1[Nvcd2 * isite_cp];
1984  const double *u = m_U->ptr(m_Ndf * (isite + idir * m_Nvol));
1985 
1986  double vt1[m_Nvc], vt2[m_Nvc];
1987 
1988  if (m_arg[itask].kt1 == 1) {
1989  int Nxy = m_Nx * m_Ny;
1990  int it = m_Mt - 1;
1991  for (int iz = 0; iz < m_Mz; ++iz) {
1992  for (int ixy = 0; ixy < Nxy; ++ixy) {
1993  int is = ixy + Nxy * (iz + m_Nz * it);
1994  int is2 = ixy + Nxy * iz;
1995  int in = Nvcd * is;
1996  int ig = m_Ndf * is;
1997  int ix1 = Nvc2 * is2;
1998  int ix2 = ix1 + m_Nvc;
1999 
2000  for (int ic = 0; ic < m_Nc; ++ic) {
2001  int ic_r = 2 * ic;
2002  int ic_i = 2 * ic + 1;
2003 
2004  vt1[ic_r] = w1[ic_r + id1 + in] - w1[ic_r + id3 + in];
2005  vt1[ic_i] = w1[ic_i + id1 + in] - w1[ic_i + id3 + in];
2006  vt2[ic_r] = w1[ic_r + id2 + in] - w1[ic_r + id4 + in];
2007  vt2[ic_i] = w1[ic_i + id2 + in] - w1[ic_i + id4 + in];
2008  }
2009 
2010  for (int ic = 0; ic < m_Nc; ++ic) {
2011  int ic2 = 2 * ic;
2012 
2013  int ic_r = 2 * ic;
2014  int ic_i = 2 * ic + 1;
2015 
2016  w2[ic_r + ix1] = mult_udagv_r(&u[ic2 + ig], vt1, m_Nc);
2017  w2[ic_i + ix1] = mult_udagv_i(&u[ic2 + ig], vt1, m_Nc);
2018  w2[ic_r + ix2] = mult_udagv_r(&u[ic2 + ig], vt2, m_Nc);
2019  w2[ic_i + ix2] = mult_udagv_i(&u[ic2 + ig], vt2, m_Nc);
2020  }
2021  }
2022  }
2023  }
2024  }
2025 
2026 
2027 //====================================================================
2029  int itask, double *v2, const double *vcp2)
2030  {
2031  int Nvc2 = 2 * m_Nvc;
2032  int Nvcd = m_Nvc * m_Nd;
2033  int Nvcd2 = Nvcd / 2;
2034 
2035  int id1 = 0;
2036  int id2 = m_Nvc;
2037  int id3 = m_Nvc * 2;
2038  int id4 = m_Nvc * 3;
2039 
2040  int idir = 3;
2041  double bc2 = m_boundary2[idir];
2042 
2043  // double wt1_r,wt1_i, wt2_r,wt2_i;
2044 
2045  int isite = m_arg[itask].isite;
2046  int isite_cp = m_arg[itask].isite_cp_t;
2047 
2048  const double *w1 = &vcp2[Nvcd2 * isite_cp];
2049  double *w2 = &v2[Nvcd * isite];
2050 
2051  if (m_arg[itask].kt0 == 1) {
2052  int Nxy = m_Nx * m_Ny;
2053  int it = 0;
2054  for (int iz = 0; iz < m_Mz; ++iz) {
2055  for (int ixy = 0; ixy < Nxy; ++ixy) {
2056  int is = ixy + Nxy * (iz + m_Nz * it);
2057  int is2 = ixy + Nxy * iz;
2058  int iv = Nvcd * is;
2059  int ix1 = Nvc2 * is2;
2060  int ix2 = ix1 + m_Nvc;
2061 
2062  for (int ic = 0; ic < m_Nc; ++ic) {
2063  int ic_r = 2 * ic;
2064  int ic_i = 2 * ic + 1;
2065 
2066  w2[ic_r + id1 + iv] += bc2 * w1[ic_r + ix1];
2067  w2[ic_i + id1 + iv] += bc2 * w1[ic_i + ix1];
2068  w2[ic_r + id2 + iv] += bc2 * w1[ic_r + ix2];
2069  w2[ic_i + id2 + iv] += bc2 * w1[ic_i + ix2];
2070 
2071  w2[ic_r + id3 + iv] -= bc2 * w1[ic_r + ix1];
2072  w2[ic_i + id3 + iv] -= bc2 * w1[ic_i + ix1];
2073  w2[ic_r + id4 + iv] -= bc2 * w1[ic_r + ix2];
2074  w2[ic_i + id4 + iv] -= bc2 * w1[ic_i + ix2];
2075  }
2076  }
2077  }
2078  }
2079  }
2080 
2081 
2082 //====================================================================
2084  int itask, double *v2, const double *v1)
2085  {
2086  int Nvcd = m_Nvc * m_Nd;
2087 
2088  int id1 = 0;
2089  int id2 = m_Nvc;
2090  int id3 = m_Nvc * 2;
2091  int id4 = m_Nvc * 3;
2092 
2093  int idir = 3;
2094 
2095  double vt1[m_Nvc], vt2[m_Nvc];
2096  double wt1_r, wt1_i, wt2_r, wt2_i;
2097 
2098  int isite = m_arg[itask].isite;
2099 
2100  const double *w1 = &v1[Nvcd * isite];
2101  double *w2 = &v2[Nvcd * isite];
2102  const double *u = m_U->ptr(m_Ndf * (isite + idir * m_Nvol));
2103 
2104  int kt0 = m_arg[itask].kt0;
2105  int Nxy = m_Nx * m_Ny;
2106  int Nxyz = Nxy * m_Nz;
2107 
2108  for (int it = kt0; it < m_Mt; ++it) {
2109  for (int iz = 0; iz < m_Mz; ++iz) {
2110  for (int ixy = 0; ixy < Nxy; ++ixy) {
2111  int is = ixy + Nxy * (iz + m_Nz * it);
2112  int iv = Nvcd * is;
2113  int in = Nvcd * (is - Nxyz);
2114  int ig = m_Ndf * (is - Nxyz);
2115 
2116  for (int ic = 0; ic < m_Nc; ++ic) {
2117  int ic_r = 2 * ic;
2118  int ic_i = 2 * ic + 1;
2119 
2120  vt1[ic_r] = w1[ic_r + id1 + in] - w1[ic_r + id3 + in];
2121  vt1[ic_i] = w1[ic_i + id1 + in] - w1[ic_i + id3 + in];
2122  vt2[ic_r] = w1[ic_r + id2 + in] - w1[ic_r + id4 + in];
2123  vt2[ic_i] = w1[ic_i + id2 + in] - w1[ic_i + id4 + in];
2124  }
2125 
2126  for (int ic = 0; ic < m_Nc; ++ic) {
2127  int ic2 = 2 * ic;
2128 
2129  wt1_r = mult_udagv_r(&u[ic2 + ig], vt1, m_Nc);
2130  wt1_i = mult_udagv_i(&u[ic2 + ig], vt1, m_Nc);
2131  wt2_r = mult_udagv_r(&u[ic2 + ig], vt2, m_Nc);
2132  wt2_i = mult_udagv_i(&u[ic2 + ig], vt2, m_Nc);
2133 
2134  int ic_r = 2 * ic;
2135  int ic_i = 2 * ic + 1;
2136 
2137  w2[ic_r + id1 + iv] += wt1_r;
2138  w2[ic_i + id1 + iv] += wt1_i;
2139  w2[ic_r + id2 + iv] += wt2_r;
2140  w2[ic_i + id2 + iv] += wt2_i;
2141 
2142  w2[ic_r + id3 + iv] -= wt1_r;
2143  w2[ic_i + id3 + iv] -= wt1_i;
2144  w2[ic_r + id4 + iv] -= wt2_r;
2145  w2[ic_i + id4 + iv] -= wt2_i;
2146  }
2147  }
2148  }
2149  }
2150  }
2151 
2152 
2153 //====================================================================
2155  int itask, double *v2, const double *v1)
2156  {
2157  int Nvcd = m_Nvc * m_Nd;
2158  int Nxy = m_Nx * m_Ny;
2159 
2160  int id1 = 0;
2161  int id2 = m_Nvc;
2162  int id3 = m_Nvc * 2;
2163  int id4 = m_Nvc * 3;
2164 
2165  int isite = m_arg[itask].isite;
2166 
2167  const double *w1 = &v1[Nvcd * isite];
2168  double *w2 = &v2[Nvcd * isite];
2169 
2170  for (int it = 0; it < m_Mt; ++it) {
2171  for (int iz = 0; iz < m_Mz; ++iz) {
2172  for (int ixy = 0; ixy < Nxy; ++ixy) {
2173  int iv = Nvcd * (ixy + Nxy * (iz + m_Nz * it));
2174  for (int ivc = 0; ivc < m_Nvc; ++ivc) {
2175  w2[ivc + id1 + iv] = w1[ivc + id3 + iv];
2176  w2[ivc + id2 + iv] = w1[ivc + id4 + iv];
2177  w2[ivc + id3 + iv] = w1[ivc + id1 + iv];
2178  w2[ivc + id4 + iv] = w1[ivc + id2 + iv];
2179  }
2180  }
2181  }
2182  }
2183  }
2184 
2185 
2186 //====================================================================
2188  int itask, double *v2, const double *v1)
2189  {
2190  int Nvcd = m_Nvc * m_Nd;
2191  int Nxy = m_Nx * m_Ny;
2192 
2193  int id1 = 0;
2194  int id2 = m_Nvc;
2195  int id3 = m_Nvc * 2;
2196  int id4 = m_Nvc * 3;
2197 
2198  int isite = m_arg[itask].isite;
2199 
2200  const double *w1 = &v1[Nvcd * isite];
2201  double *w2 = &v2[Nvcd * isite];
2202 
2203  for (int it = 0; it < m_Mt; ++it) {
2204  for (int iz = 0; iz < m_Mz; ++iz) {
2205  for (int ixy = 0; ixy < Nxy; ++ixy) {
2206  int iv = Nvcd * (ixy + Nxy * (iz + m_Nz * it));
2207  for (int ivc = 0; ivc < m_Nvc; ++ivc) {
2208  w2[ivc + id1 + iv] = w1[ivc + id1 + iv];
2209  w2[ivc + id2 + iv] = w1[ivc + id2 + iv];
2210  w2[ivc + id3 + iv] = -w1[ivc + id3 + iv];
2211  w2[ivc + id4 + iv] = -w1[ivc + id4 + iv];
2212  }
2213  }
2214  }
2215  }
2216  }
2217 
2218 
2219 //====================================================================
2220 }
2221 //============================================================END=====
void mult_t_plus1_chiral_thread(int, double *, const double *)
BridgeIO vout
Definition: bridgeIO.cpp:495
const double * ptr(const int jin, const int site, const int jex) const
Definition: field.h:142
void mult_y_plus2_thread(int, double *, const double *)
void mult_t_plus2_dirac_thread(int, double *, const double *)
void general(const char *format,...)
Definition: bridgeIO.cpp:195
void mult_x_minus_bulk_thread(int, double *, const double *)
void mult_x_plus2_thread(int, double *, const double *)
void mult_t_plus_bulk_dirac_thread(int, double *, const double *)
void mult_t_minus2_chiral_thread(int, double *, const double *)
void gm5_dirac_thread(int, double *, const double *)
void daxpy_thread(int, double *, double, const double *)
void gm5_chiral_thread(int, double *, const double *)
void mult_x_minus1_thread(int, double *, const double *)
void mult_y_plus_bulk_thread(int, double *, const double *)
void mult_x_minus2_thread(int, double *, const double *)
void mult_z_plus_bulk_thread(int, double *, const double *)
void mult_y_minus_bulk_thread(int, double *, const double *)
void mult_z_minus1_thread(int, double *, const double *)
void mult_t_minus1_dirac_thread(int, double *, const double *)
void mult_t_minus_bulk_chiral_thread(int, double *, const double *)
void mult_x_plus1_thread(int, double *, const double *)
const Field_G * m_U
gauge configuration.
void daypx_thread(int, double *, double, const double *)
std::vector< double > m_boundary2
b.c. for each node.
void mult_t_plus2_chiral_thread(int, double *, const double *)
void mult_x_plus_bulk_thread(int, double *, const double *)
void mult_t_minus2_dirac_thread(int, double *, const double *)
static int get_num_threads_available()
returns number of threads (works outside of parallel region).
void mult_y_minus1_thread(int, double *, const double *)
void crucial(const char *format,...)
Definition: bridgeIO.cpp:178
void mult_y_minus2_thread(int, double *, const double *)
void mult_z_plus2_thread(int, double *, const double *)
void scal_thread(int, double *, double)
static const std::string class_name
void mult_t_plus_bulk_chiral_thread(int, double *, const double *)
void mult_z_minus_bulk_thread(int, double *, const double *)
void mult_z_plus1_thread(int, double *, const double *)
std::vector< mult_arg > m_arg
void mult_y_plus1_thread(int, double *, const double *)
void mult_t_minus1_chiral_thread(int, double *, const double *)
void mult_t_plus1_dirac_thread(int, double *, const double *)
void mult_t_minus_bulk_dirac_thread(int, double *, const double *)
void mult_z_minus2_thread(int, double *, const double *)