Bridge++  Ver. 1.3.x
fopr_Wilson_General_impl_thread.cpp
Go to the documentation of this file.
1 
15 
16 #if defined USE_GROUP_SU3
17 #include "fopr_Wilson_impl_SU3.inc"
18 #elif defined USE_GROUP_SU2
19 #include "fopr_Wilson_impl_SU2.inc"
20 #elif defined USE_GROUP_SU_N
21 #include "fopr_Wilson_impl_SU_N.inc"
22 #endif
23 
24 // const std::string Fopr_Wilson_General::Fopr_Wilson_General_impl::class_name = "Fopr_Wilson_General";
25 
26 //====================================================================
28 {
30 
31  // The following setup corresponds to uniform division of volume.
32  if (m_Nthread <= m_Nt) {
34  } else if (m_Nthread <= m_Nz * m_Nt) {
35  m_Ntask_t = m_Nt;
36  } else {
37  vout.crucial(m_vl, "%s: Too large Nthread = %d\n", class_name.c_str(), m_Nthread);
38  exit(EXIT_FAILURE);
39  }
40 
42 
43  if (m_Ntask_z * m_Ntask_t != m_Nthread) {
44  vout.crucial(m_vl, "%s: Nz = %d and Nt = %d do not match Nthread = %d\n",
45  class_name.c_str(), m_Nz, m_Nt, m_Nthread);
46  exit(EXIT_FAILURE);
47  }
48 
50  m_Mz = m_Nz / m_Ntask_z;
51  m_Mt = m_Nt / m_Ntask_t;
52 
53  if (m_Mz * m_Ntask_z != m_Nz) {
54  vout.crucial(m_vl, "%s: Mz = %d and Ntask_z = %d do not match Nz = %d\n",
55  class_name.c_str(), m_Mz, m_Ntask_z, m_Nz);
56  exit(EXIT_FAILURE);
57  }
58 
59  if (m_Mt * m_Ntask_t != m_Nt) {
60  vout.crucial(m_vl, "%s: Mt = %d and Ntask_t = %d do not match Nt = %d\n",
61  class_name.c_str(), m_Mt, m_Ntask_t, m_Nt);
62  exit(EXIT_FAILURE);
63  }
64 
65  // The following setup is not monotonic division, and requires
66  // barrier at the beginning and end of mult (D and gamma5).
67  // [H.Matsufuru 22 Oct 2013]
68  // if(m_Nthread >= 64){
69  // m_Ntask_z = 8;
70  // }else if(m_Nthread >= 16){
71  // m_Ntask_z = 4;
72  // }else if(m_Nthread >= 4){
73  // m_Ntask_z = 2;
74  // }else{
75  // m_Ntask_z = 1;
76  // }
77  // m_Ntask_t = m_Nthread/m_Ntask_z;
78  // m_Ntask = m_Ntask_t * m_Ntask_z;
79  // m_Mz = m_Nz/m_Ntask_z;
80  // m_Mt = m_Nt/m_Ntask_t;
81 
82  vout.general(m_vl, " Nthread = %d\n", m_Nthread);
83  vout.general(m_vl, " Ntask = %d\n", m_Ntask);
84  vout.general(m_vl, " Ntask_z = %d Ntask_t = %d\n", m_Ntask_z, m_Ntask_t);
85  vout.general(m_vl, " Mz = %d Mt = %d\n", m_Mz, m_Mt);
86 
87  //- setup of arguments
88  int Nxy = m_Nx * m_Ny;
89  m_arg.resize(m_Ntask);
90  for (int ithread_t = 0; ithread_t < m_Ntask_t; ++ithread_t) {
91  for (int ithread_z = 0; ithread_z < m_Ntask_z; ++ithread_z) {
92  int itask = ithread_z + m_Ntask_z * ithread_t;
93 
94  m_arg[itask].isite = (ithread_z * m_Mz + ithread_t * (m_Nz * m_Mt)) * Nxy;
95 
96  m_arg[itask].kt0 = 0;
97  m_arg[itask].kt1 = 0;
98  m_arg[itask].kz0 = 0;
99  m_arg[itask].kz1 = 0;
100  if (ithread_t == 0) m_arg[itask].kt0 = 1;
101  if (ithread_z == 0) m_arg[itask].kz0 = 1;
102  if (ithread_t == m_Ntask_t - 1) m_arg[itask].kt1 = 1;
103  if (ithread_z == m_Ntask_z - 1) m_arg[itask].kz1 = 1;
104 
105  m_arg[itask].isite_cp_x = itask * m_Mz * m_Mt * m_Ny;
106  m_arg[itask].isite_cp_y = itask * m_Mz * m_Mt * m_Nx;
107  m_arg[itask].isite_cp_z = ithread_t * m_Mt * Nxy;
108  m_arg[itask].isite_cp_t = ithread_z * m_Mz * Nxy;
109  }
110  }
111 }
112 
113 
114 //====================================================================
116  int itask, double *v2, double fac, const double *v1)
117 {
118  int Nvcd = m_Nvc * m_Nd;
119  int Nvxy = Nvcd * m_Nx * m_Ny;
120 
121  int isite = m_arg[itask].isite;
122 
123  const double *w1 = &v1[Nvcd * isite];
124  double *w2 = &v2[Nvcd * isite];
125 
126  for (int it = 0; it < m_Mt; ++it) {
127  for (int iz = 0; iz < m_Mz; ++iz) {
128  for (int ivxy = 0; ivxy < Nvxy; ++ivxy) {
129  int iv = ivxy + Nvxy * (iz + m_Nz * it);
130  w2[iv] += fac * w1[iv];
131  }
132  }
133  }
134 }
135 
136 
137 //====================================================================
139  int itask, double *v2, double fac, const double *v1)
140 {
141  int Nvcd = m_Nvc * m_Nd;
142  int Nvxy = Nvcd * m_Nx * m_Ny;
143 
144  int isite = m_arg[itask].isite;
145  const double *w1 = &v1[Nvcd * isite];
146  double *w2 = &v2[Nvcd * isite];
147 
148  for (int it = 0; it < m_Mt; ++it) {
149  for (int iz = 0; iz < m_Mz; ++iz) {
150  for (int ivxy = 0; ivxy < Nvxy; ++ivxy) {
151  int iv = ivxy + Nvxy * (iz + m_Nz * it);
152  w2[iv] = fac * w2[iv] + w1[iv];
153  }
154  }
155  }
156 }
157 
158 
159 //====================================================================
161  double *v, double fac)
162 {
163  int Nvcd = m_Nvc * m_Nd;
164  int Nvxy = Nvcd * m_Nx * m_Ny;
165 
166  int isite = m_arg[itask].isite;
167  double *w = &v[Nvcd * isite];
168 
169  for (int it = 0; it < m_Mt; ++it) {
170  for (int iz = 0; iz < m_Mz; ++iz) {
171  for (int ivxy = 0; ivxy < Nvxy; ++ivxy) {
172  int iv = ivxy + Nvxy * (iz + m_Nz * it);
173  w[iv] *= fac;
174  }
175  }
176  }
177 }
178 
179 
180 //====================================================================
182  double *v2)
183 {
184  int Nvcd = m_Nvc * m_Nd;
185  int Nvxy = Nvcd * m_Nx * m_Ny;
186 
187  int isite = m_arg[itask].isite;
188  double *w2 = &v2[Nvcd * isite];
189 
190  for (int it = 0; it < m_Mt; ++it) {
191  for (int iz = 0; iz < m_Mz; ++iz) {
192  for (int ivxy = 0; ivxy < Nvxy; ++ivxy) {
193  int iv = ivxy + Nvxy * (iz + m_Nz * it);
194  w2[iv] = 0.0;
195  }
196  }
197  }
198 }
199 
200 
201 //====================================================================
203  int itask, double *vcp1, const double *v1)
204 {
205  int Nvcd = m_Nvc * m_Nd;
206 
207  int id1 = 0;
208  int id2 = m_Nvc;
209  int id3 = m_Nvc * 2;
210  int id4 = m_Nvc * 3;
211 
212  int isite = m_arg[itask].isite;
213  int isite_cp = m_arg[itask].isite_cp_x;
214 
215  const double *w1 = &v1[Nvcd * isite];
216  double *w2 = &vcp1[Nvcd * isite_cp];
217 
218  int idir = 0;
219  double bc2 = m_boundary2[idir];
220 
221  int ix = 0;
222 
223  for (int it = 0; it < m_Mt; ++it) {
224  for (int iz = 0; iz < m_Mz; ++iz) {
225  for (int iy = 0; iy < m_Ny; ++iy) {
226  int is = ix + m_Nx * (iy + m_Ny * (iz + m_Nz * it));
227  int is2 = iy + m_Ny * (iz + m_Mz * it);
228  int in = Nvcd * is;
229  int ix1 = Nvcd * is2;
230  int ix2 = ix1 + m_Nvc;
231  int ix3 = ix2 + m_Nvc;
232  int ix4 = ix3 + m_Nvc;
233 
234  for (int ic = 0; ic < m_Nc; ++ic) {
235  int ic_r = 2 * ic;
236  int ic_i = 2 * ic + 1;
237 
238  w2[ic_r + ix1] = bc2 * (m_r_s * w1[ic_r + id1 + in] - m_nu_s * w1[ic_i + id4 + in]);
239  w2[ic_i + ix1] = bc2 * (m_r_s * w1[ic_i + id1 + in] + m_nu_s * w1[ic_r + id4 + in]);
240  w2[ic_r + ix2] = bc2 * (m_r_s * w1[ic_r + id2 + in] - m_nu_s * w1[ic_i + id3 + in]);
241  w2[ic_i + ix2] = bc2 * (m_r_s * w1[ic_i + id2 + in] + m_nu_s * w1[ic_r + id3 + in]);
242 
243  w2[ic_r + ix3] = bc2 * (m_r_s * w1[ic_r + id3 + in] + m_nu_s * w1[ic_i + id2 + in]);
244  w2[ic_i + ix3] = bc2 * (m_r_s * w1[ic_i + id3 + in] - m_nu_s * w1[ic_r + id2 + in]);
245  w2[ic_r + ix4] = bc2 * (m_r_s * w1[ic_r + id4 + in] + m_nu_s * w1[ic_i + id1 + in]);
246  w2[ic_i + ix4] = bc2 * (m_r_s * w1[ic_i + id4 + in] - m_nu_s * w1[ic_r + id1 + in]);
247  }
248  }
249  }
250  }
251 }
252 
253 
254 //====================================================================
256  int itask, double *v2, const double *vcp2)
257 {
258  int Nvcd = m_Nvc * m_Nd;
259 
260  int id1 = 0;
261  int id2 = m_Nvc;
262  int id3 = m_Nvc * 2;
263  int id4 = m_Nvc * 3;
264 
265  int idir = 0;
266 
267  double wt1_r, wt1_i, wt2_r, wt2_i, wt3_r, wt3_i, wt4_r, wt4_i;
268 
269  int isite = m_arg[itask].isite;
270  int isite_cp = m_arg[itask].isite_cp_x;
271 
272  const double *w1 = &vcp2[Nvcd * isite_cp];
273  double *w2 = &v2[Nvcd * isite];
274  const double *u = m_U->ptr(m_Ndf * (isite + idir * m_Nvol));
275 
276  int ix = m_Nx - 1;
277 
278  for (int it = 0; it < m_Mt; ++it) {
279  for (int iz = 0; iz < m_Mz; ++iz) {
280  for (int iy = 0; iy < m_Ny; ++iy) {
281  int is = ix + m_Nx * (iy + m_Ny * (iz + m_Nz * it));
282  int is2 = iy + m_Ny * (iz + m_Mz * it);
283  int iv = Nvcd * is;
284  int ig = m_Ndf * is;
285  int ix1 = Nvcd * is2;
286  int ix2 = ix1 + m_Nvc;
287  int ix3 = ix2 + m_Nvc;
288  int ix4 = ix3 + m_Nvc;
289 
290  for (int ic = 0; ic < m_Nc; ++ic) {
291  int ic2 = ic * m_Nvc;
292 
293  wt1_r = mult_uv_r(&u[ic2 + ig], &w1[ix1], m_Nc);
294  wt1_i = mult_uv_i(&u[ic2 + ig], &w1[ix1], m_Nc);
295  wt2_r = mult_uv_r(&u[ic2 + ig], &w1[ix2], m_Nc);
296  wt2_i = mult_uv_i(&u[ic2 + ig], &w1[ix2], m_Nc);
297 
298  wt3_r = mult_uv_r(&u[ic2 + ig], &w1[ix3], m_Nc);
299  wt3_i = mult_uv_i(&u[ic2 + ig], &w1[ix3], m_Nc);
300  wt4_r = mult_uv_r(&u[ic2 + ig], &w1[ix4], m_Nc);
301  wt4_i = mult_uv_i(&u[ic2 + ig], &w1[ix4], m_Nc);
302 
303  int ic_r = 2 * ic;
304  int ic_i = 2 * ic + 1;
305 
306  w2[ic_r + id1 + iv] += wt1_r;
307  w2[ic_i + id1 + iv] += wt1_i;
308  w2[ic_r + id2 + iv] += wt2_r;
309  w2[ic_i + id2 + iv] += wt2_i;
310 
311  w2[ic_r + id3 + iv] += wt3_r;
312  w2[ic_i + id3 + iv] += wt3_i;
313  w2[ic_r + id4 + iv] += wt4_r;
314  w2[ic_i + id4 + iv] += wt4_i;
315  }
316  }
317  }
318  }
319 }
320 
321 
322 //====================================================================
324  int itask, double *v2, const double *v1)
325 {
326  int Nvcd = m_Nvc * m_Nd;
327 
328  int id1 = 0;
329  int id2 = m_Nvc;
330  int id3 = m_Nvc * 2;
331  int id4 = m_Nvc * 3;
332 
333  int idir = 0;
334 
335  double vt1[m_Nvc], vt2[m_Nvc], vt3[m_Nvc], vt4[m_Nvc];
336  double wt1_r, wt1_i, wt2_r, wt2_i, wt3_r, wt3_i, wt4_r, wt4_i;
337 
338  int isite = m_arg[itask].isite;
339 
340  const double *w1 = &v1[Nvcd * isite];
341  double *w2 = &v2[Nvcd * isite];
342  const double *u = m_U->ptr(m_Ndf * (isite + idir * m_Nvol));
343 
344  for (int it = 0; it < m_Mt; ++it) {
345  for (int iz = 0; iz < m_Mz; ++iz) {
346  for (int iy = 0; iy < m_Ny; ++iy) {
347  for (int ix = 0; ix < m_Nx - 1; ++ix) {
348  int is = ix + m_Nx * (iy + m_Ny * (iz + m_Nz * it));
349  int iv = Nvcd * is;
350  int in = Nvcd * (is + 1);
351  int ig = m_Ndf * is;
352 
353  for (int ic = 0; ic < m_Nc; ++ic) {
354  int ic_r = 2 * ic;
355  int ic_i = 2 * ic + 1;
356 
357  vt1[ic_r] = m_r_s * w1[ic_r + id1 + in] - m_nu_s * w1[ic_i + id4 + in];
358  vt1[ic_i] = m_r_s * w1[ic_i + id1 + in] + m_nu_s * w1[ic_r + id4 + in];
359  vt2[ic_r] = m_r_s * w1[ic_r + id2 + in] - m_nu_s * w1[ic_i + id3 + in];
360  vt2[ic_i] = m_r_s * w1[ic_i + id2 + in] + m_nu_s * w1[ic_r + id3 + in];
361 
362  vt3[ic_r] = m_r_s * w1[ic_r + id3 + in] + m_nu_s * w1[ic_i + id2 + in];
363  vt3[ic_i] = m_r_s * w1[ic_i + id3 + in] - m_nu_s * w1[ic_r + id2 + in];
364  vt4[ic_r] = m_r_s * w1[ic_r + id4 + in] + m_nu_s * w1[ic_i + id1 + in];
365  vt4[ic_i] = m_r_s * w1[ic_i + id4 + in] - m_nu_s * w1[ic_r + id1 + in];
366  }
367 
368  for (int ic = 0; ic < m_Nc; ++ic) {
369  int ic2 = ic * m_Nvc;
370 
371  wt1_r = mult_uv_r(&u[ic2 + ig], vt1, m_Nc);
372  wt1_i = mult_uv_i(&u[ic2 + ig], vt1, m_Nc);
373  wt2_r = mult_uv_r(&u[ic2 + ig], vt2, m_Nc);
374  wt2_i = mult_uv_i(&u[ic2 + ig], vt2, m_Nc);
375 
376  wt3_r = mult_uv_r(&u[ic2 + ig], vt3, m_Nc);
377  wt3_i = mult_uv_i(&u[ic2 + ig], vt3, m_Nc);
378  wt4_r = mult_uv_r(&u[ic2 + ig], vt4, m_Nc);
379  wt4_i = mult_uv_i(&u[ic2 + ig], vt4, m_Nc);
380 
381  int ic_r = 2 * ic;
382  int ic_i = 2 * ic + 1;
383 
384  w2[ic_r + id1 + iv] += wt1_r;
385  w2[ic_i + id1 + iv] += wt1_i;
386  w2[ic_r + id2 + iv] += wt2_r;
387  w2[ic_i + id2 + iv] += wt2_i;
388 
389  w2[ic_r + id3 + iv] += wt3_r;
390  w2[ic_i + id3 + iv] += wt3_i;
391  w2[ic_r + id4 + iv] += wt4_r;
392  w2[ic_i + id4 + iv] += wt4_i;
393  }
394  }
395  }
396  }
397  }
398 }
399 
400 
401 //====================================================================
403  int itask, double *vcp1, const double *v1)
404 {
405  int Nvcd = m_Nvc * m_Nd;
406 
407  int id1 = 0;
408  int id2 = m_Nvc;
409  int id3 = m_Nvc * 2;
410  int id4 = m_Nvc * 3;
411 
412  int idir = 0;
413 
414  int isite = m_arg[itask].isite;
415  int isite_cp = m_arg[itask].isite_cp_x;
416 
417  const double *w1 = &v1[Nvcd * isite];
418  double *w2 = &vcp1[Nvcd * isite_cp];
419  const double *u = m_U->ptr(m_Ndf * (isite + idir * m_Nvol));
420 
421  double vt1[m_Nvc], vt2[m_Nvc], vt3[m_Nvc], vt4[m_Nvc];
422 
423  int ix = m_Nx - 1;
424 
425  for (int it = 0; it < m_Mt; ++it) {
426  for (int iz = 0; iz < m_Mz; ++iz) {
427  for (int iy = 0; iy < m_Ny; ++iy) {
428  int is = ix + m_Nx * (iy + m_Ny * (iz + m_Nz * it));
429  int is2 = iy + m_Ny * (iz + m_Mz * it);
430  int in = Nvcd * is;
431  int ig = m_Ndf * is;
432  int ix1 = Nvcd * is2;
433  int ix2 = ix1 + m_Nvc;
434  int ix3 = ix2 + m_Nvc;
435  int ix4 = ix3 + m_Nvc;
436 
437  for (int ic = 0; ic < m_Nc; ++ic) {
438  int ic_r = 2 * ic;
439  int ic_i = 2 * ic + 1;
440 
441  vt1[ic_r] = m_r_s * w1[ic_r + id1 + in] + m_nu_s * w1[ic_i + id4 + in];
442  vt1[ic_i] = m_r_s * w1[ic_i + id1 + in] - m_nu_s * w1[ic_r + id4 + in];
443  vt2[ic_r] = m_r_s * w1[ic_r + id2 + in] + m_nu_s * w1[ic_i + id3 + in];
444  vt2[ic_i] = m_r_s * w1[ic_i + id2 + in] - m_nu_s * w1[ic_r + id3 + in];
445 
446  vt3[ic_r] = m_r_s * w1[ic_r + id3 + in] - m_nu_s * w1[ic_i + id2 + in];
447  vt3[ic_i] = m_r_s * w1[ic_i + id3 + in] + m_nu_s * w1[ic_r + id2 + in];
448  vt4[ic_r] = m_r_s * w1[ic_r + id4 + in] - m_nu_s * w1[ic_i + id1 + in];
449  vt4[ic_i] = m_r_s * w1[ic_i + id4 + in] + m_nu_s * w1[ic_r + id1 + in];
450  }
451 
452  for (int ic = 0; ic < m_Nc; ++ic) {
453  int ic2 = 2 * ic;
454 
455  int ic_r = 2 * ic;
456  int ic_i = 2 * ic + 1;
457 
458  w2[ic_r + ix1] = mult_udagv_r(&u[ic2 + ig], vt1, m_Nc);
459  w2[ic_i + ix1] = mult_udagv_i(&u[ic2 + ig], vt1, m_Nc);
460  w2[ic_r + ix2] = mult_udagv_r(&u[ic2 + ig], vt2, m_Nc);
461  w2[ic_i + ix2] = mult_udagv_i(&u[ic2 + ig], vt2, m_Nc);
462 
463  w2[ic_r + ix3] = mult_udagv_r(&u[ic2 + ig], vt3, m_Nc);
464  w2[ic_i + ix3] = mult_udagv_i(&u[ic2 + ig], vt3, m_Nc);
465  w2[ic_r + ix4] = mult_udagv_r(&u[ic2 + ig], vt4, m_Nc);
466  w2[ic_i + ix4] = mult_udagv_i(&u[ic2 + ig], vt4, m_Nc);
467  }
468  }
469  }
470  }
471 }
472 
473 
474 //====================================================================
476  int itask, double *v2, const double *vcp2)
477 {
478  int Nvcd = m_Nvc * m_Nd;
479 
480  int id1 = 0;
481  int id2 = m_Nvc;
482  int id3 = m_Nvc * 2;
483  int id4 = m_Nvc * 3;
484 
485  int idir = 0;
486  double bc2 = m_boundary2[idir];
487 
488  // double wt1_r,wt1_i, wt2_r,wt2_i;
489 
490  int isite = m_arg[itask].isite;
491  int isite_cp = m_arg[itask].isite_cp_x;
492 
493  const double *w1 = &vcp2[Nvcd * isite_cp];
494  double *w2 = &v2[Nvcd * isite];
495 
496  int ix = 0;
497 
498  for (int it = 0; it < m_Mt; ++it) {
499  for (int iz = 0; iz < m_Mz; ++iz) {
500  for (int iy = 0; iy < m_Ny; ++iy) {
501  int is = ix + m_Nx * (iy + m_Ny * (iz + m_Nz * it));
502  int is2 = iy + m_Ny * (iz + m_Mz * it);
503  int iv = Nvcd * is;
504  int ix1 = Nvcd * is2;
505  int ix2 = ix1 + m_Nvc;
506  int ix3 = ix2 + m_Nvc;
507  int ix4 = ix3 + m_Nvc;
508 
509  for (int ic = 0; ic < m_Nc; ++ic) {
510  int ic_r = 2 * ic;
511  int ic_i = 2 * ic + 1;
512 
513  w2[ic_r + id1 + iv] += bc2 * w1[ic_r + ix1];
514  w2[ic_i + id1 + iv] += bc2 * w1[ic_i + ix1];
515  w2[ic_r + id2 + iv] += bc2 * w1[ic_r + ix2];
516  w2[ic_i + id2 + iv] += bc2 * w1[ic_i + ix2];
517 
518  w2[ic_r + id3 + iv] += bc2 * w1[ic_r + ix3];
519  w2[ic_i + id3 + iv] += bc2 * w1[ic_i + ix3];
520  w2[ic_r + id4 + iv] += bc2 * w1[ic_r + ix4];
521  w2[ic_i + id4 + iv] += bc2 * w1[ic_i + ix4];
522  }
523  }
524  }
525  }
526 }
527 
528 
529 //====================================================================
531  int itask, double *v2, const double *v1)
532 {
533  int Nvcd = m_Nvc * m_Nd;
534 
535  int id1 = 0;
536  int id2 = m_Nvc;
537  int id3 = m_Nvc * 2;
538  int id4 = m_Nvc * 3;
539 
540  int idir = 0;
541 
542  double vt1[m_Nvc], vt2[m_Nvc], vt3[m_Nvc], vt4[m_Nvc];
543  double wt1_r, wt1_i, wt2_r, wt2_i, wt3_r, wt3_i, wt4_r, wt4_i;
544 
545  int isite = m_arg[itask].isite;
546 
547  const double *w1 = &v1[Nvcd * isite];
548  double *w2 = &v2[Nvcd * isite];
549  const double *u = m_U->ptr(m_Ndf * (isite + idir * m_Nvol));
550 
551  for (int it = 0; it < m_Mt; ++it) {
552  for (int iz = 0; iz < m_Mz; ++iz) {
553  for (int iy = 0; iy < m_Ny; ++iy) {
554  for (int ix = 1; ix < m_Nx; ++ix) {
555  int is = ix + m_Nx * (iy + m_Ny * (iz + m_Nz * it));
556  int iv = Nvcd * is;
557  int in = Nvcd * (is - 1);
558  int ig = m_Ndf * (is - 1);
559 
560  for (int ic = 0; ic < m_Nc; ++ic) {
561  int ic_r = 2 * ic;
562  int ic_i = 2 * ic + 1;
563 
564  vt1[ic_r] = m_r_s * w1[ic_r + id1 + in] + m_nu_s * w1[ic_i + id4 + in];
565  vt1[ic_i] = m_r_s * w1[ic_i + id1 + in] - m_nu_s * w1[ic_r + id4 + in];
566  vt2[ic_r] = m_r_s * w1[ic_r + id2 + in] + m_nu_s * w1[ic_i + id3 + in];
567  vt2[ic_i] = m_r_s * w1[ic_i + id2 + in] - m_nu_s * w1[ic_r + id3 + in];
568 
569  vt3[ic_r] = m_r_s * w1[ic_r + id3 + in] - m_nu_s * w1[ic_i + id2 + in];
570  vt3[ic_i] = m_r_s * w1[ic_i + id3 + in] + m_nu_s * w1[ic_r + id2 + in];
571  vt4[ic_r] = m_r_s * w1[ic_r + id4 + in] - m_nu_s * w1[ic_i + id1 + in];
572  vt4[ic_i] = m_r_s * w1[ic_i + id4 + in] + m_nu_s * w1[ic_r + id1 + in];
573  }
574 
575  for (int ic = 0; ic < m_Nc; ++ic) {
576  int ic2 = 2 * ic;
577 
578  wt1_r = mult_udagv_r(&u[ic2 + ig], vt1, m_Nc);
579  wt1_i = mult_udagv_i(&u[ic2 + ig], vt1, m_Nc);
580  wt2_r = mult_udagv_r(&u[ic2 + ig], vt2, m_Nc);
581  wt2_i = mult_udagv_i(&u[ic2 + ig], vt2, m_Nc);
582 
583  wt3_r = mult_udagv_r(&u[ic2 + ig], vt3, m_Nc);
584  wt3_i = mult_udagv_i(&u[ic2 + ig], vt3, m_Nc);
585  wt4_r = mult_udagv_r(&u[ic2 + ig], vt4, m_Nc);
586  wt4_i = mult_udagv_i(&u[ic2 + ig], vt4, m_Nc);
587 
588  int ic_r = 2 * ic;
589  int ic_i = 2 * ic + 1;
590 
591  w2[ic_r + id1 + iv] += wt1_r;
592  w2[ic_i + id1 + iv] += wt1_i;
593  w2[ic_r + id2 + iv] += wt2_r;
594  w2[ic_i + id2 + iv] += wt2_i;
595 
596  w2[ic_r + id3 + iv] += wt3_r;
597  w2[ic_i + id3 + iv] += wt3_i;
598  w2[ic_r + id4 + iv] += wt4_r;
599  w2[ic_i + id4 + iv] += wt4_i;
600  }
601  }
602  }
603  }
604  }
605 }
606 
607 
608 //====================================================================
610  int itask, double *vcp1, const double *v1)
611 {
612  int Nvcd = m_Nvc * m_Nd;
613 
614  int id1 = 0;
615  int id2 = m_Nvc;
616  int id3 = m_Nvc * 2;
617  int id4 = m_Nvc * 3;
618 
619  int isite = m_arg[itask].isite;
620  int isite_cp = m_arg[itask].isite_cp_y;
621 
622  const double *w1 = &v1[Nvcd * isite];
623  double *w2 = &vcp1[Nvcd * isite_cp];
624 
625  int idir = 1;
626  double bc2 = m_boundary2[idir];
627 
628  int iy = 0;
629 
630  for (int it = 0; it < m_Mt; ++it) {
631  for (int iz = 0; iz < m_Mz; ++iz) {
632  for (int ix = 0; ix < m_Nx; ++ix) {
633  int is = ix + m_Nx * (iy + m_Ny * (iz + m_Nz * it));
634  int is2 = ix + m_Nx * (iz + m_Mz * it);
635  int in = Nvcd * is;
636  int ix1 = Nvcd * is2;
637  int ix2 = ix1 + m_Nvc;
638  int ix3 = ix2 + m_Nvc;
639  int ix4 = ix3 + m_Nvc;
640 
641  for (int ic = 0; ic < m_Nc; ++ic) {
642  int ic_r = 2 * ic;
643  int ic_i = 2 * ic + 1;
644 
645  w2[ic_r + ix1] = bc2 * (m_r_s * w1[ic_r + id1 + in] + m_nu_s * w1[ic_r + id4 + in]);
646  w2[ic_i + ix1] = bc2 * (m_r_s * w1[ic_i + id1 + in] + m_nu_s * w1[ic_i + id4 + in]);
647  w2[ic_r + ix2] = bc2 * (m_r_s * w1[ic_r + id2 + in] - m_nu_s * w1[ic_r + id3 + in]);
648  w2[ic_i + ix2] = bc2 * (m_r_s * w1[ic_i + id2 + in] - m_nu_s * w1[ic_i + id3 + in]);
649 
650  w2[ic_r + ix3] = bc2 * (m_r_s * w1[ic_r + id3 + in] - m_nu_s * w1[ic_r + id2 + in]);
651  w2[ic_i + ix3] = bc2 * (m_r_s * w1[ic_i + id3 + in] - m_nu_s * w1[ic_i + id2 + in]);
652  w2[ic_r + ix4] = bc2 * (m_r_s * w1[ic_r + id4 + in] + m_nu_s * w1[ic_r + id1 + in]);
653  w2[ic_i + ix4] = bc2 * (m_r_s * w1[ic_i + id4 + in] + m_nu_s * w1[ic_i + id1 + in]);
654  }
655  }
656  }
657  }
658 }
659 
660 
661 //====================================================================
663  int itask, double *v2, const double *vcp2)
664 {
665  int Nvcd = m_Nvc * m_Nd;
666 
667  int id1 = 0;
668  int id2 = m_Nvc;
669  int id3 = m_Nvc * 2;
670  int id4 = m_Nvc * 3;
671 
672  int idir = 1;
673 
674  double wt1_r, wt1_i, wt2_r, wt2_i, wt3_r, wt3_i, wt4_r, wt4_i;
675 
676  int isite = m_arg[itask].isite;
677  int isite_cp = m_arg[itask].isite_cp_y;
678 
679  const double *w1 = &vcp2[Nvcd * isite_cp];
680  double *w2 = &v2[Nvcd * isite];
681  const double *u = m_U->ptr(m_Ndf * (isite + idir * m_Nvol));
682 
683  int iy = m_Ny - 1;
684 
685  for (int it = 0; it < m_Mt; ++it) {
686  for (int iz = 0; iz < m_Mz; ++iz) {
687  for (int ix = 0; ix < m_Nx; ++ix) {
688  int is = ix + m_Nx * (iy + m_Ny * (iz + m_Nz * it));
689  int is2 = ix + m_Nx * (iz + m_Mz * it);
690  int iv = Nvcd * is;
691  int ig = m_Ndf * is;
692  int ix1 = Nvcd * is2;
693  int ix2 = ix1 + m_Nvc;
694  int ix3 = ix2 + m_Nvc;
695  int ix4 = ix3 + m_Nvc;
696 
697  for (int ic = 0; ic < m_Nc; ++ic) {
698  int ic2 = ic * m_Nvc;
699 
700  wt1_r = mult_uv_r(&u[ic2 + ig], &w1[ix1], m_Nc);
701  wt1_i = mult_uv_i(&u[ic2 + ig], &w1[ix1], m_Nc);
702  wt2_r = mult_uv_r(&u[ic2 + ig], &w1[ix2], m_Nc);
703  wt2_i = mult_uv_i(&u[ic2 + ig], &w1[ix2], m_Nc);
704 
705  wt3_r = mult_uv_r(&u[ic2 + ig], &w1[ix3], m_Nc);
706  wt3_i = mult_uv_i(&u[ic2 + ig], &w1[ix3], m_Nc);
707  wt4_r = mult_uv_r(&u[ic2 + ig], &w1[ix4], m_Nc);
708  wt4_i = mult_uv_i(&u[ic2 + ig], &w1[ix4], m_Nc);
709 
710  int ic_r = 2 * ic;
711  int ic_i = 2 * ic + 1;
712 
713  w2[ic_r + id1 + iv] += wt1_r;
714  w2[ic_i + id1 + iv] += wt1_i;
715  w2[ic_r + id2 + iv] += wt2_r;
716  w2[ic_i + id2 + iv] += wt2_i;
717 
718  w2[ic_r + id3 + iv] += wt3_r;
719  w2[ic_i + id3 + iv] += wt3_i;
720  w2[ic_r + id4 + iv] += wt4_r;
721  w2[ic_i + id4 + iv] += wt4_i;
722  }
723  }
724  }
725  }
726 }
727 
728 
729 //====================================================================
731  int itask, double *v2, const double *v1)
732 {
733  int Nvcd = m_Nvc * m_Nd;
734 
735  int id1 = 0;
736  int id2 = m_Nvc;
737  int id3 = m_Nvc * 2;
738  int id4 = m_Nvc * 3;
739 
740  int idir = 1;
741 
742  double vt1[m_Nvc], vt2[m_Nvc], vt3[m_Nvc], vt4[m_Nvc];
743  double wt1_r, wt1_i, wt2_r, wt2_i, wt3_r, wt3_i, wt4_r, wt4_i;
744 
745  int isite = m_arg[itask].isite;
746 
747  const double *w1 = &v1[Nvcd * isite];
748  double *w2 = &v2[Nvcd * isite];
749  const double *u = m_U->ptr(m_Ndf * (isite + idir * m_Nvol));
750 
751  for (int it = 0; it < m_Mt; ++it) {
752  for (int iz = 0; iz < m_Mz; ++iz) {
753  for (int iy = 0; iy < m_Ny - 1; ++iy) {
754  for (int ix = 0; ix < m_Nx; ++ix) {
755  int is = ix + m_Nx * (iy + m_Ny * (iz + m_Nz * it));
756  int iv = Nvcd * is;
757  int in = Nvcd * (is + m_Nx);
758  int ig = m_Ndf * is;
759 
760  for (int ic = 0; ic < m_Nc; ++ic) {
761  int ic_r = 2 * ic;
762  int ic_i = 2 * ic + 1;
763 
764  vt1[ic_r] = m_r_s * w1[ic_r + id1 + in] + m_nu_s * w1[ic_r + id4 + in];
765  vt1[ic_i] = m_r_s * w1[ic_i + id1 + in] + m_nu_s * w1[ic_i + id4 + in];
766  vt2[ic_r] = m_r_s * w1[ic_r + id2 + in] - m_nu_s * w1[ic_r + id3 + in];
767  vt2[ic_i] = m_r_s * w1[ic_i + id2 + in] - m_nu_s * w1[ic_i + id3 + in];
768 
769  vt3[ic_r] = m_r_s * w1[ic_r + id3 + in] - m_nu_s * w1[ic_r + id2 + in];
770  vt3[ic_i] = m_r_s * w1[ic_i + id3 + in] - m_nu_s * w1[ic_i + id2 + in];
771  vt4[ic_r] = m_r_s * w1[ic_r + id4 + in] + m_nu_s * w1[ic_r + id1 + in];
772  vt4[ic_i] = m_r_s * w1[ic_i + id4 + in] + m_nu_s * w1[ic_i + id1 + in];
773  }
774 
775  for (int ic = 0; ic < m_Nc; ++ic) {
776  int ic2 = ic * m_Nvc;
777 
778  wt1_r = mult_uv_r(&u[ic2 + ig], vt1, m_Nc);
779  wt1_i = mult_uv_i(&u[ic2 + ig], vt1, m_Nc);
780  wt2_r = mult_uv_r(&u[ic2 + ig], vt2, m_Nc);
781  wt2_i = mult_uv_i(&u[ic2 + ig], vt2, m_Nc);
782 
783  wt3_r = mult_uv_r(&u[ic2 + ig], vt3, m_Nc);
784  wt3_i = mult_uv_i(&u[ic2 + ig], vt3, m_Nc);
785  wt4_r = mult_uv_r(&u[ic2 + ig], vt4, m_Nc);
786  wt4_i = mult_uv_i(&u[ic2 + ig], vt4, m_Nc);
787 
788  int ic_r = 2 * ic;
789  int ic_i = 2 * ic + 1;
790 
791  w2[ic_r + id1 + iv] += wt1_r;
792  w2[ic_i + id1 + iv] += wt1_i;
793  w2[ic_r + id2 + iv] += wt2_r;
794  w2[ic_i + id2 + iv] += wt2_i;
795 
796  w2[ic_r + id3 + iv] += wt3_r;
797  w2[ic_i + id3 + iv] += wt3_i;
798  w2[ic_r + id4 + iv] += wt4_r;
799  w2[ic_i + id4 + iv] += wt4_i;
800  }
801  }
802  }
803  }
804  }
805 }
806 
807 
808 //====================================================================
810  int itask, double *vcp1, const double *v1)
811 {
812  int Nvcd = m_Nvc * m_Nd;
813 
814  int id1 = 0;
815  int id2 = m_Nvc;
816  int id3 = m_Nvc * 2;
817  int id4 = m_Nvc * 3;
818 
819  int idir = 1;
820 
821  int isite = m_arg[itask].isite;
822  int isite_cp = m_arg[itask].isite_cp_y;
823 
824  const double *w1 = &v1[Nvcd * isite];
825  double *w2 = &vcp1[Nvcd * isite_cp];
826  const double *u = m_U->ptr(m_Ndf * (isite + idir * m_Nvol));
827 
828  double vt1[m_Nvc], vt2[m_Nvc], vt3[m_Nvc], vt4[m_Nvc];
829 
830  int iy = m_Ny - 1;
831 
832  for (int it = 0; it < m_Mt; ++it) {
833  for (int iz = 0; iz < m_Mz; ++iz) {
834  for (int ix = 0; ix < m_Nx; ++ix) {
835  int is = ix + m_Nx * (iy + m_Ny * (iz + m_Nz * it));
836  int is2 = ix + m_Nx * (iz + m_Mz * it);
837  int in = Nvcd * is;
838  int ig = m_Ndf * is;
839  int ix1 = Nvcd * is2;
840  int ix2 = ix1 + m_Nvc;
841  int ix3 = ix2 + m_Nvc;
842  int ix4 = ix3 + m_Nvc;
843 
844  for (int ic = 0; ic < m_Nc; ++ic) {
845  int ic_r = 2 * ic;
846  int ic_i = 2 * ic + 1;
847 
848  vt1[ic_r] = m_r_s * w1[ic_r + id1 + in] - m_nu_s * w1[ic_r + id4 + in];
849  vt1[ic_i] = m_r_s * w1[ic_i + id1 + in] - m_nu_s * w1[ic_i + id4 + in];
850  vt2[ic_r] = m_r_s * w1[ic_r + id2 + in] + m_nu_s * w1[ic_r + id3 + in];
851  vt2[ic_i] = m_r_s * w1[ic_i + id2 + in] + m_nu_s * w1[ic_i + id3 + in];
852 
853  vt3[ic_r] = m_r_s * w1[ic_r + id3 + in] + m_nu_s * w1[ic_r + id2 + in];
854  vt3[ic_i] = m_r_s * w1[ic_i + id3 + in] + m_nu_s * w1[ic_i + id2 + in];
855  vt4[ic_r] = m_r_s * w1[ic_r + id4 + in] - m_nu_s * w1[ic_r + id1 + in];
856  vt4[ic_i] = m_r_s * w1[ic_i + id4 + in] - m_nu_s * w1[ic_i + id1 + in];
857  }
858 
859  for (int ic = 0; ic < m_Nc; ++ic) {
860  int ic2 = 2 * ic;
861 
862  int ic_r = 2 * ic;
863  int ic_i = 2 * ic + 1;
864 
865  w2[ic_r + ix1] = mult_udagv_r(&u[ic2 + ig], vt1, m_Nc);
866  w2[ic_i + ix1] = mult_udagv_i(&u[ic2 + ig], vt1, m_Nc);
867  w2[ic_r + ix2] = mult_udagv_r(&u[ic2 + ig], vt2, m_Nc);
868  w2[ic_i + ix2] = mult_udagv_i(&u[ic2 + ig], vt2, m_Nc);
869 
870  w2[ic_r + ix3] = mult_udagv_r(&u[ic2 + ig], vt3, m_Nc);
871  w2[ic_i + ix3] = mult_udagv_i(&u[ic2 + ig], vt3, m_Nc);
872  w2[ic_r + ix4] = mult_udagv_r(&u[ic2 + ig], vt4, m_Nc);
873  w2[ic_i + ix4] = mult_udagv_i(&u[ic2 + ig], vt4, m_Nc);
874  }
875  }
876  }
877  }
878 }
879 
880 
881 //====================================================================
883  int itask, double *v2, const double *vcp2)
884 {
885  int Nvcd = m_Nvc * m_Nd;
886 
887  int id1 = 0;
888  int id2 = m_Nvc;
889  int id3 = m_Nvc * 2;
890  int id4 = m_Nvc * 3;
891 
892  int idir = 1;
893  double bc2 = m_boundary2[idir];
894 
895  // double wt1_r,wt1_i, wt2_r,wt2_i;
896 
897  int isite = m_arg[itask].isite;
898  int isite_cp = m_arg[itask].isite_cp_y;
899 
900  const double *w1 = &vcp2[Nvcd * isite_cp];
901  double *w2 = &v2[Nvcd * isite];
902 
903  int iy = 0;
904 
905  for (int it = 0; it < m_Mt; ++it) {
906  for (int iz = 0; iz < m_Mz; ++iz) {
907  for (int ix = 0; ix < m_Nx; ++ix) {
908  int is = ix + m_Nx * (iy + m_Ny * (iz + m_Nz * it));
909  int is2 = ix + m_Nx * (iz + m_Mz * it);
910  int iv = Nvcd * is;
911  int ix1 = Nvcd * is2;
912  int ix2 = ix1 + m_Nvc;
913  int ix3 = ix2 + m_Nvc;
914  int ix4 = ix3 + m_Nvc;
915 
916  for (int ic = 0; ic < m_Nc; ++ic) {
917  int ic_r = 2 * ic;
918  int ic_i = 2 * ic + 1;
919 
920  w2[ic_r + id1 + iv] += bc2 * w1[ic_r + ix1];
921  w2[ic_i + id1 + iv] += bc2 * w1[ic_i + ix1];
922  w2[ic_r + id2 + iv] += bc2 * w1[ic_r + ix2];
923  w2[ic_i + id2 + iv] += bc2 * w1[ic_i + ix2];
924 
925  w2[ic_r + id3 + iv] += bc2 * w1[ic_r + ix3];
926  w2[ic_i + id3 + iv] += bc2 * w1[ic_i + ix3];
927  w2[ic_r + id4 + iv] += bc2 * w1[ic_r + ix4];
928  w2[ic_i + id4 + iv] += bc2 * w1[ic_i + ix4];
929  }
930  }
931  }
932  }
933 }
934 
935 
936 //====================================================================
938  int itask, double *v2, const double *v1)
939 {
940  int Nvcd = m_Nvc * m_Nd;
941 
942  int id1 = 0;
943  int id2 = m_Nvc;
944  int id3 = m_Nvc * 2;
945  int id4 = m_Nvc * 3;
946 
947  int idir = 1;
948 
949  double vt1[m_Nvc], vt2[m_Nvc], vt3[m_Nvc], vt4[m_Nvc];
950  double wt1_r, wt1_i, wt2_r, wt2_i, wt3_r, wt3_i, wt4_r, wt4_i;
951 
952  int isite = m_arg[itask].isite;
953 
954  const double *w1 = &v1[Nvcd * isite];
955  double *w2 = &v2[Nvcd * isite];
956  const double *u = m_U->ptr(m_Ndf * (isite + idir * m_Nvol));
957 
958  for (int it = 0; it < m_Mt; ++it) {
959  for (int iz = 0; iz < m_Mz; ++iz) {
960  for (int iy = 1; iy < m_Ny; ++iy) {
961  for (int ix = 0; ix < m_Nx; ++ix) {
962  int is = ix + m_Nx * (iy + m_Ny * (iz + m_Nz * it));
963  int iv = Nvcd * is;
964  int in = Nvcd * (is - m_Nx);
965  int ig = m_Ndf * (is - m_Nx);
966 
967  for (int ic = 0; ic < m_Nc; ++ic) {
968  int ic_r = 2 * ic;
969  int ic_i = 2 * ic + 1;
970 
971  vt1[ic_r] = m_r_s * w1[ic_r + id1 + in] - m_nu_s * w1[ic_r + id4 + in];
972  vt1[ic_i] = m_r_s * w1[ic_i + id1 + in] - m_nu_s * w1[ic_i + id4 + in];
973  vt2[ic_r] = m_r_s * w1[ic_r + id2 + in] + m_nu_s * w1[ic_r + id3 + in];
974  vt2[ic_i] = m_r_s * w1[ic_i + id2 + in] + m_nu_s * w1[ic_i + id3 + in];
975 
976  vt3[ic_r] = m_r_s * w1[ic_r + id3 + in] + m_nu_s * w1[ic_r + id2 + in];
977  vt3[ic_i] = m_r_s * w1[ic_i + id3 + in] + m_nu_s * w1[ic_i + id2 + in];
978  vt4[ic_r] = m_r_s * w1[ic_r + id4 + in] - m_nu_s * w1[ic_r + id1 + in];
979  vt4[ic_i] = m_r_s * w1[ic_i + id4 + in] - m_nu_s * w1[ic_i + id1 + in];
980  }
981 
982  for (int ic = 0; ic < m_Nc; ++ic) {
983  int ic2 = 2 * ic;
984 
985  wt1_r = mult_udagv_r(&u[ic2 + ig], vt1, m_Nc);
986  wt1_i = mult_udagv_i(&u[ic2 + ig], vt1, m_Nc);
987  wt2_r = mult_udagv_r(&u[ic2 + ig], vt2, m_Nc);
988  wt2_i = mult_udagv_i(&u[ic2 + ig], vt2, m_Nc);
989 
990  wt3_r = mult_udagv_r(&u[ic2 + ig], vt3, m_Nc);
991  wt3_i = mult_udagv_i(&u[ic2 + ig], vt3, m_Nc);
992  wt4_r = mult_udagv_r(&u[ic2 + ig], vt4, m_Nc);
993  wt4_i = mult_udagv_i(&u[ic2 + ig], vt4, m_Nc);
994 
995  int ic_r = 2 * ic;
996  int ic_i = 2 * ic + 1;
997 
998  w2[ic_r + id1 + iv] += wt1_r;
999  w2[ic_i + id1 + iv] += wt1_i;
1000  w2[ic_r + id2 + iv] += wt2_r;
1001  w2[ic_i + id2 + iv] += wt2_i;
1002 
1003  w2[ic_r + id3 + iv] += wt3_r;
1004  w2[ic_i + id3 + iv] += wt3_i;
1005  w2[ic_r + id4 + iv] += wt4_r;
1006  w2[ic_i + id4 + iv] += wt4_i;
1007  }
1008  }
1009  }
1010  }
1011  }
1012 }
1013 
1014 
1015 //====================================================================
1017  int itask, double *vcp1, const double *v1)
1018 {
1019  int Nvcd = m_Nvc * m_Nd;
1020 
1021  int id1 = 0;
1022  int id2 = m_Nvc;
1023  int id3 = m_Nvc * 2;
1024  int id4 = m_Nvc * 3;
1025 
1026  int isite = m_arg[itask].isite;
1027  int isite_cp = m_arg[itask].isite_cp_z;
1028 
1029  const double *w1 = &v1[Nvcd * isite];
1030  double *w2 = &vcp1[Nvcd * isite_cp];
1031 
1032  int idir = 2;
1033  double bc2 = m_boundary2[idir];
1034 
1035  if (m_arg[itask].kz0 == 1) {
1036  int Nxy = m_Nx * m_Ny;
1037  int iz = 0;
1038  for (int it = 0; it < m_Mt; ++it) {
1039  for (int ixy = 0; ixy < Nxy; ++ixy) {
1040  int is = ixy + Nxy * (iz + m_Nz * it);
1041  int is2 = ixy + Nxy * it;
1042 
1043  int in = Nvcd * is;
1044  int ix1 = Nvcd * is2;
1045  int ix2 = ix1 + m_Nvc;
1046  int ix3 = ix2 + m_Nvc;
1047  int ix4 = ix3 + m_Nvc;
1048 
1049  for (int ic = 0; ic < m_Nc; ++ic) {
1050  int ic_r = 2 * ic;
1051  int ic_i = 2 * ic + 1;
1052 
1053  w2[ic_r + ix1] = bc2 * (m_r_s * w1[ic_r + id1 + in] - m_nu_s * w1[ic_i + id3 + in]);
1054  w2[ic_i + ix1] = bc2 * (m_r_s * w1[ic_i + id1 + in] + m_nu_s * w1[ic_r + id3 + in]);
1055  w2[ic_r + ix2] = bc2 * (m_r_s * w1[ic_r + id2 + in] + m_nu_s * w1[ic_i + id4 + in]);
1056  w2[ic_i + ix2] = bc2 * (m_r_s * w1[ic_i + id2 + in] - m_nu_s * w1[ic_r + id4 + in]);
1057 
1058  w2[ic_r + ix3] = bc2 * (m_r_s * w1[ic_r + id3 + in] + m_nu_s * w1[ic_i + id1 + in]);
1059  w2[ic_i + ix3] = bc2 * (m_r_s * w1[ic_i + id3 + in] - m_nu_s * w1[ic_r + id1 + in]);
1060  w2[ic_r + ix4] = bc2 * (m_r_s * w1[ic_r + id4 + in] - m_nu_s * w1[ic_i + id2 + in]);
1061  w2[ic_i + ix4] = bc2 * (m_r_s * w1[ic_i + id4 + in] + m_nu_s * w1[ic_r + id2 + in]);
1062  }
1063  }
1064  }
1065  }
1066 }
1067 
1068 
1069 //====================================================================
1071  int itask, double *v2, const double *vcp2)
1072 {
1073  int Nvcd = m_Nvc * m_Nd;
1074 
1075  int id1 = 0;
1076  int id2 = m_Nvc;
1077  int id3 = m_Nvc * 2;
1078  int id4 = m_Nvc * 3;
1079 
1080  int idir = 2;
1081 
1082  double wt1_r, wt1_i, wt2_r, wt2_i, wt3_r, wt3_i, wt4_r, wt4_i;
1083 
1084  int isite = m_arg[itask].isite;
1085  int isite_cp = m_arg[itask].isite_cp_z;
1086 
1087  const double *w1 = &vcp2[Nvcd * isite_cp];
1088  double *w2 = &v2[Nvcd * isite];
1089  const double *u = m_U->ptr(m_Ndf * (isite + idir * m_Nvol));
1090 
1091  if (m_arg[itask].kz1 == 1) {
1092  int Nxy = m_Nx * m_Ny;
1093  int iz = m_Mz - 1;
1094  for (int it = 0; it < m_Mt; ++it) {
1095  for (int ixy = 0; ixy < Nxy; ++ixy) {
1096  int is = ixy + Nxy * (iz + m_Nz * it);
1097  int is2 = ixy + Nxy * it;
1098  int iv = Nvcd * is;
1099  int ig = m_Ndf * is;
1100  int ix1 = Nvcd * is2;
1101  int ix2 = ix1 + m_Nvc;
1102  int ix3 = ix2 + m_Nvc;
1103  int ix4 = ix3 + m_Nvc;
1104 
1105  for (int ic = 0; ic < m_Nc; ++ic) {
1106  int ic2 = ic * m_Nvc;
1107 
1108  wt1_r = mult_uv_r(&u[ic2 + ig], &w1[ix1], m_Nc);
1109  wt1_i = mult_uv_i(&u[ic2 + ig], &w1[ix1], m_Nc);
1110  wt2_r = mult_uv_r(&u[ic2 + ig], &w1[ix2], m_Nc);
1111  wt2_i = mult_uv_i(&u[ic2 + ig], &w1[ix2], m_Nc);
1112 
1113  wt3_r = mult_uv_r(&u[ic2 + ig], &w1[ix3], m_Nc);
1114  wt3_i = mult_uv_i(&u[ic2 + ig], &w1[ix3], m_Nc);
1115  wt4_r = mult_uv_r(&u[ic2 + ig], &w1[ix4], m_Nc);
1116  wt4_i = mult_uv_i(&u[ic2 + ig], &w1[ix4], m_Nc);
1117 
1118  int ic_r = 2 * ic;
1119  int ic_i = 2 * ic + 1;
1120 
1121  w2[ic_r + id1 + iv] += wt1_r;
1122  w2[ic_i + id1 + iv] += wt1_i;
1123  w2[ic_r + id2 + iv] += wt2_r;
1124  w2[ic_i + id2 + iv] += wt2_i;
1125 
1126  w2[ic_r + id3 + iv] += wt3_r;
1127  w2[ic_i + id3 + iv] += wt3_i;
1128  w2[ic_r + id4 + iv] += wt4_r;
1129  w2[ic_i + id4 + iv] += wt4_i;
1130  }
1131  }
1132  }
1133  }
1134 }
1135 
1136 
1137 //====================================================================
1139  int itask, double *v2, const double *v1)
1140 {
1141  int Nvcd = m_Nvc * m_Nd;
1142 
1143  int id1 = 0;
1144  int id2 = m_Nvc;
1145  int id3 = m_Nvc * 2;
1146  int id4 = m_Nvc * 3;
1147 
1148  int idir = 2;
1149 
1150  double vt1[m_Nvc], vt2[m_Nvc], vt3[m_Nvc], vt4[m_Nvc];
1151  double wt1_r, wt1_i, wt2_r, wt2_i, wt3_r, wt3_i, wt4_r, wt4_i;
1152 
1153  int isite = m_arg[itask].isite;
1154 
1155  const double *w1 = &v1[Nvcd * isite];
1156  double *w2 = &v2[Nvcd * isite];
1157  const double *u = m_U->ptr(m_Ndf * (isite + idir * m_Nvol));
1158 
1159  int kz1 = m_arg[itask].kz1;
1160  int Nxy = m_Nx * m_Ny;
1161 
1162  for (int it = 0; it < m_Mt; ++it) {
1163  for (int iz = 0; iz < m_Mz - kz1; ++iz) {
1164  for (int ixy = 0; ixy < Nxy; ++ixy) {
1165  int is = ixy + Nxy * (iz + m_Nz * it);
1166  int iv = Nvcd * is;
1167  int in = Nvcd * (is + Nxy);
1168  int ig = m_Ndf * is;
1169 
1170  for (int ic = 0; ic < m_Nc; ++ic) {
1171  int ic_r = 2 * ic;
1172  int ic_i = 2 * ic + 1;
1173 
1174  vt1[ic_r] = m_r_s * w1[ic_r + id1 + in] - m_nu_s * w1[ic_i + id3 + in];
1175  vt1[ic_i] = m_r_s * w1[ic_i + id1 + in] + m_nu_s * w1[ic_r + id3 + in];
1176  vt2[ic_r] = m_r_s * w1[ic_r + id2 + in] + m_nu_s * w1[ic_i + id4 + in];
1177  vt2[ic_i] = m_r_s * w1[ic_i + id2 + in] - m_nu_s * w1[ic_r + id4 + in];
1178 
1179  vt3[ic_r] = m_r_s * w1[ic_r + id3 + in] + m_nu_s * w1[ic_i + id1 + in];
1180  vt3[ic_i] = m_r_s * w1[ic_i + id3 + in] - m_nu_s * w1[ic_r + id1 + in];
1181  vt4[ic_r] = m_r_s * w1[ic_r + id4 + in] - m_nu_s * w1[ic_i + id2 + in];
1182  vt4[ic_i] = m_r_s * w1[ic_i + id4 + in] + m_nu_s * w1[ic_r + id2 + in];
1183  }
1184 
1185  for (int ic = 0; ic < m_Nc; ++ic) {
1186  int ic2 = ic * m_Nvc;
1187 
1188  wt1_r = mult_uv_r(&u[ic2 + ig], vt1, m_Nc);
1189  wt1_i = mult_uv_i(&u[ic2 + ig], vt1, m_Nc);
1190  wt2_r = mult_uv_r(&u[ic2 + ig], vt2, m_Nc);
1191  wt2_i = mult_uv_i(&u[ic2 + ig], vt2, m_Nc);
1192 
1193  wt3_r = mult_uv_r(&u[ic2 + ig], vt3, m_Nc);
1194  wt3_i = mult_uv_i(&u[ic2 + ig], vt3, m_Nc);
1195  wt4_r = mult_uv_r(&u[ic2 + ig], vt4, m_Nc);
1196  wt4_i = mult_uv_i(&u[ic2 + ig], vt4, m_Nc);
1197 
1198  int ic_r = 2 * ic;
1199  int ic_i = 2 * ic + 1;
1200 
1201  w2[ic_r + id1 + iv] += wt1_r;
1202  w2[ic_i + id1 + iv] += wt1_i;
1203  w2[ic_r + id2 + iv] += wt2_r;
1204  w2[ic_i + id2 + iv] += wt2_i;
1205 
1206  w2[ic_r + id3 + iv] += wt3_r;
1207  w2[ic_i + id3 + iv] += wt3_i;
1208  w2[ic_r + id4 + iv] += wt4_r;
1209  w2[ic_i + id4 + iv] += wt4_i;
1210  }
1211  }
1212  }
1213  }
1214 }
1215 
1216 
1217 //====================================================================
1219  int itask, double *vcp1, const double *v1)
1220 {
1221  int Nvcd = m_Nvc * m_Nd;
1222 
1223  int id1 = 0;
1224  int id2 = m_Nvc;
1225  int id3 = m_Nvc * 2;
1226  int id4 = m_Nvc * 3;
1227 
1228  int idir = 2;
1229 
1230  int isite = m_arg[itask].isite;
1231  int isite_cp = m_arg[itask].isite_cp_z;
1232 
1233  const double *w1 = &v1[Nvcd * isite];
1234  double *w2 = &vcp1[Nvcd * isite_cp];
1235  const double *u = m_U->ptr(m_Ndf * (isite + idir * m_Nvol));
1236 
1237  double vt1[m_Nvc], vt2[m_Nvc], vt3[m_Nvc], vt4[m_Nvc];
1238 
1239  if (m_arg[itask].kz1 == 1) {
1240  int Nxy = m_Nx * m_Ny;
1241  int iz = m_Mz - 1;
1242  for (int it = 0; it < m_Mt; ++it) {
1243  for (int ixy = 0; ixy < Nxy; ++ixy) {
1244  int is = ixy + Nxy * (iz + m_Nz * it);
1245  int is2 = ixy + Nxy * it;
1246  int in = Nvcd * is;
1247  int ig = m_Ndf * is;
1248  int ix1 = Nvcd * is2;
1249  int ix2 = ix1 + m_Nvc;
1250  int ix3 = ix2 + m_Nvc;
1251  int ix4 = ix3 + m_Nvc;
1252 
1253  for (int ic = 0; ic < m_Nc; ++ic) {
1254  int ic_r = 2 * ic;
1255  int ic_i = 2 * ic + 1;
1256 
1257  vt1[ic_r] = m_r_s * w1[ic_r + id1 + in] + m_nu_s * w1[ic_i + id3 + in];
1258  vt1[ic_i] = m_r_s * w1[ic_i + id1 + in] - m_nu_s * w1[ic_r + id3 + in];
1259  vt2[ic_r] = m_r_s * w1[ic_r + id2 + in] - m_nu_s * w1[ic_i + id4 + in];
1260  vt2[ic_i] = m_r_s * w1[ic_i + id2 + in] + m_nu_s * w1[ic_r + id4 + in];
1261 
1262  vt3[ic_r] = m_r_s * w1[ic_r + id3 + in] - m_nu_s * w1[ic_i + id1 + in];
1263  vt3[ic_i] = m_r_s * w1[ic_i + id3 + in] + m_nu_s * w1[ic_r + id1 + in];
1264  vt4[ic_r] = m_r_s * w1[ic_r + id4 + in] + m_nu_s * w1[ic_i + id2 + in];
1265  vt4[ic_i] = m_r_s * w1[ic_i + id4 + in] - m_nu_s * w1[ic_r + id2 + in];
1266  }
1267 
1268  for (int ic = 0; ic < m_Nc; ++ic) {
1269  int ic2 = 2 * ic;
1270 
1271  int ic_r = 2 * ic;
1272  int ic_i = 2 * ic + 1;
1273 
1274  w2[ic_r + ix1] = mult_udagv_r(&u[ic2 + ig], vt1, m_Nc);
1275  w2[ic_i + ix1] = mult_udagv_i(&u[ic2 + ig], vt1, m_Nc);
1276  w2[ic_r + ix2] = mult_udagv_r(&u[ic2 + ig], vt2, m_Nc);
1277  w2[ic_i + ix2] = mult_udagv_i(&u[ic2 + ig], vt2, m_Nc);
1278 
1279  w2[ic_r + ix3] = mult_udagv_r(&u[ic2 + ig], vt3, m_Nc);
1280  w2[ic_i + ix3] = mult_udagv_i(&u[ic2 + ig], vt3, m_Nc);
1281  w2[ic_r + ix4] = mult_udagv_r(&u[ic2 + ig], vt4, m_Nc);
1282  w2[ic_i + ix4] = mult_udagv_i(&u[ic2 + ig], vt4, m_Nc);
1283  }
1284  }
1285  }
1286  }
1287 }
1288 
1289 
1290 //====================================================================
1292  int itask, double *v2, const double *vcp2)
1293 {
1294  int Nvcd = m_Nvc * m_Nd;
1295 
1296  int id1 = 0;
1297  int id2 = m_Nvc;
1298  int id3 = m_Nvc * 2;
1299  int id4 = m_Nvc * 3;
1300 
1301  int idir = 2;
1302  double bc2 = m_boundary2[idir];
1303 
1304  // double wt1_r,wt1_i, wt2_r,wt2_i;
1305 
1306  int isite = m_arg[itask].isite;
1307  int isite_cp = m_arg[itask].isite_cp_z;
1308 
1309  const double *w1 = &vcp2[Nvcd * isite_cp];
1310  double *w2 = &v2[Nvcd * isite];
1311 
1312  if (m_arg[itask].kz0 == 1) {
1313  int Nxy = m_Nx * m_Ny;
1314 
1315  int iz = 0;
1316  for (int it = 0; it < m_Mt; ++it) {
1317  for (int ixy = 0; ixy < Nxy; ++ixy) {
1318  int is = ixy + Nxy * (iz + m_Nz * it);
1319  int is2 = ixy + Nxy * it;
1320  int iv = Nvcd * is;
1321  int ix1 = Nvcd * is2;
1322  int ix2 = ix1 + m_Nvc;
1323  int ix3 = ix2 + m_Nvc;
1324  int ix4 = ix3 + m_Nvc;
1325 
1326  for (int ic = 0; ic < m_Nc; ++ic) {
1327  int ic_r = 2 * ic;
1328  int ic_i = 2 * ic + 1;
1329 
1330  w2[ic_r + id1 + iv] += bc2 * w1[ic_r + ix1];
1331  w2[ic_i + id1 + iv] += bc2 * w1[ic_i + ix1];
1332  w2[ic_r + id2 + iv] += bc2 * w1[ic_r + ix2];
1333  w2[ic_i + id2 + iv] += bc2 * w1[ic_i + ix2];
1334 
1335  w2[ic_r + id3 + iv] += bc2 * w1[ic_r + ix3];
1336  w2[ic_i + id3 + iv] += bc2 * w1[ic_i + ix3];
1337  w2[ic_r + id4 + iv] += bc2 * w1[ic_r + ix4];
1338  w2[ic_i + id4 + iv] += bc2 * w1[ic_i + ix4];
1339  }
1340  }
1341  }
1342  }
1343 }
1344 
1345 
1346 //====================================================================
1348  int itask, double *v2, const double *v1)
1349 {
1350  int Nvcd = m_Nvc * m_Nd;
1351 
1352  int id1 = 0;
1353  int id2 = m_Nvc;
1354  int id3 = m_Nvc * 2;
1355  int id4 = m_Nvc * 3;
1356 
1357  int idir = 2;
1358 
1359  double vt1[m_Nvc], vt2[m_Nvc], vt3[m_Nvc], vt4[m_Nvc];
1360  double wt1_r, wt1_i, wt2_r, wt2_i, wt3_r, wt3_i, wt4_r, wt4_i;
1361 
1362  int isite = m_arg[itask].isite;
1363 
1364  const double *w1 = &v1[Nvcd * isite];
1365  double *w2 = &v2[Nvcd * isite];
1366  const double *u = m_U->ptr(m_Ndf * (isite + idir * m_Nvol));
1367 
1368  int kz0 = m_arg[itask].kz0;
1369  int Nxy = m_Nx * m_Ny;
1370 
1371  for (int it = 0; it < m_Mt; ++it) {
1372  for (int iz = kz0; iz < m_Mz; ++iz) {
1373  for (int ixy = 0; ixy < Nxy; ++ixy) {
1374  int is = ixy + Nxy * (iz + m_Nz * it);
1375  int iv = Nvcd * is;
1376  int in = Nvcd * (is - Nxy);
1377  int ig = m_Ndf * (is - Nxy);
1378 
1379  for (int ic = 0; ic < m_Nc; ++ic) {
1380  int ic_r = 2 * ic;
1381  int ic_i = 2 * ic + 1;
1382 
1383  vt1[ic_r] = m_r_s * w1[ic_r + id1 + in] + m_nu_s * w1[ic_i + id3 + in];
1384  vt1[ic_i] = m_r_s * w1[ic_i + id1 + in] - m_nu_s * w1[ic_r + id3 + in];
1385  vt2[ic_r] = m_r_s * w1[ic_r + id2 + in] - m_nu_s * w1[ic_i + id4 + in];
1386  vt2[ic_i] = m_r_s * w1[ic_i + id2 + in] + m_nu_s * w1[ic_r + id4 + in];
1387 
1388  vt3[ic_r] = m_r_s * w1[ic_r + id3 + in] - m_nu_s * w1[ic_i + id1 + in];
1389  vt3[ic_i] = m_r_s * w1[ic_i + id3 + in] + m_nu_s * w1[ic_r + id1 + in];
1390  vt4[ic_r] = m_r_s * w1[ic_r + id4 + in] + m_nu_s * w1[ic_i + id2 + in];
1391  vt4[ic_i] = m_r_s * w1[ic_i + id4 + in] - m_nu_s * w1[ic_r + id2 + in];
1392  }
1393 
1394  for (int ic = 0; ic < m_Nc; ++ic) {
1395  int ic2 = 2 * ic;
1396 
1397  wt1_r = mult_udagv_r(&u[ic2 + ig], vt1, m_Nc);
1398  wt1_i = mult_udagv_i(&u[ic2 + ig], vt1, m_Nc);
1399  wt2_r = mult_udagv_r(&u[ic2 + ig], vt2, m_Nc);
1400  wt2_i = mult_udagv_i(&u[ic2 + ig], vt2, m_Nc);
1401 
1402  wt3_r = mult_udagv_r(&u[ic2 + ig], vt3, m_Nc);
1403  wt3_i = mult_udagv_i(&u[ic2 + ig], vt3, m_Nc);
1404  wt4_r = mult_udagv_r(&u[ic2 + ig], vt4, m_Nc);
1405  wt4_i = mult_udagv_i(&u[ic2 + ig], vt4, m_Nc);
1406 
1407  int ic_r = 2 * ic;
1408  int ic_i = 2 * ic + 1;
1409 
1410  w2[ic_r + id1 + iv] += wt1_r;
1411  w2[ic_i + id1 + iv] += wt1_i;
1412  w2[ic_r + id2 + iv] += wt2_r;
1413  w2[ic_i + id2 + iv] += wt2_i;
1414 
1415  w2[ic_r + id3 + iv] += wt3_r;
1416  w2[ic_i + id3 + iv] += wt3_i;
1417  w2[ic_r + id4 + iv] += wt4_r;
1418  w2[ic_i + id4 + iv] += wt4_i;
1419  }
1420  }
1421  }
1422  }
1423 }
1424 
1425 
1426 //====================================================================
1428  int itask, double *vcp1, const double *v1)
1429 {
1430  int Nvc2 = 2 * m_Nvc;
1431  int Nvcd = m_Nvc * m_Nd;
1432  int Nvcd2 = Nvcd / 2;
1433 
1434  // int id1 = 0;
1435  // int id2 = m_Nvc;
1436  int id3 = m_Nvc * 2;
1437  int id4 = m_Nvc * 3;
1438 
1439  int isite = m_arg[itask].isite;
1440  int isite_cp = m_arg[itask].isite_cp_t;
1441 
1442  const double *w1 = &v1[Nvcd * isite];
1443  double *w2 = &vcp1[Nvcd2 * isite_cp];
1444 
1445  int idir = 3;
1446  double bc2 = m_boundary2[idir];
1447 
1448  if (m_arg[itask].kt0 == 1) {
1449  int Nxy = m_Nx * m_Ny;
1450  int it = 0;
1451  for (int iz = 0; iz < m_Mz; ++iz) {
1452  for (int ixy = 0; ixy < Nxy; ++ixy) {
1453  int is = ixy + Nxy * (iz + m_Nz * it);
1454  int is2 = ixy + Nxy * iz;
1455 
1456  int in = Nvcd * is;
1457  int ix1 = Nvc2 * is2;
1458  int ix2 = ix1 + m_Nvc;
1459 
1460  for (int ic = 0; ic < m_Nc; ++ic) {
1461  int ic_r = 2 * ic;
1462  int ic_i = 2 * ic + 1;
1463 
1464  w2[ic_r + ix1] = 2.0 * bc2 * w1[ic_r + id3 + in];
1465  w2[ic_i + ix1] = 2.0 * bc2 * w1[ic_i + id3 + in];
1466  w2[ic_r + ix2] = 2.0 * bc2 * w1[ic_r + id4 + in];
1467  w2[ic_i + ix2] = 2.0 * bc2 * w1[ic_i + id4 + in];
1468  }
1469  }
1470  }
1471  }
1472 }
1473 
1474 
1475 //====================================================================
1477  int itask, double *v2, const double *vcp2)
1478 {
1479  int Nvc2 = 2 * m_Nvc;
1480  int Nvcd = m_Nvc * m_Nd;
1481  int Nvcd2 = Nvcd / 2;
1482 
1483  // int id1 = 0;
1484  // int id2 = m_Nvc;
1485  int id3 = m_Nvc * 2;
1486  int id4 = m_Nvc * 3;
1487 
1488  int idir = 3;
1489 
1490  double wt1_r, wt1_i, wt2_r, wt2_i;
1491 
1492  int isite = m_arg[itask].isite;
1493  int isite_cp = m_arg[itask].isite_cp_t;
1494 
1495  const double *w1 = &vcp2[Nvcd2 * isite_cp];
1496  double *w2 = &v2[Nvcd * isite];
1497  const double *u = m_U->ptr(m_Ndf * (isite + idir * m_Nvol));
1498 
1499  if (m_arg[itask].kt1 == 1) {
1500  int Nxy = m_Nx * m_Ny;
1501  int it = m_Mt - 1;
1502  for (int iz = 0; iz < m_Mz; ++iz) {
1503  for (int ixy = 0; ixy < Nxy; ++ixy) {
1504  int is = ixy + Nxy * (iz + m_Nz * it);
1505  int is2 = ixy + Nxy * iz;
1506  int iv = Nvcd * is;
1507  int ig = m_Ndf * is;
1508  int ix1 = Nvc2 * is2;
1509  int ix2 = ix1 + m_Nvc;
1510 
1511  for (int ic = 0; ic < m_Nc; ++ic) {
1512  int ic2 = ic * m_Nvc;
1513 
1514  wt1_r = mult_uv_r(&u[ic2 + ig], &w1[ix1], m_Nc);
1515  wt1_i = mult_uv_i(&u[ic2 + ig], &w1[ix1], m_Nc);
1516  wt2_r = mult_uv_r(&u[ic2 + ig], &w1[ix2], m_Nc);
1517  wt2_i = mult_uv_i(&u[ic2 + ig], &w1[ix2], m_Nc);
1518 
1519  int ic_r = 2 * ic;
1520  int ic_i = 2 * ic + 1;
1521 
1522  w2[ic_r + id3 + iv] += wt1_r;
1523  w2[ic_i + id3 + iv] += wt1_i;
1524  w2[ic_r + id4 + iv] += wt2_r;
1525  w2[ic_i + id4 + iv] += wt2_i;
1526  }
1527  }
1528  }
1529  }
1530 }
1531 
1532 
1533 //====================================================================
1535  int itask, double *v2, const double *v1)
1536 {
1537  int Nvcd = m_Nvc * m_Nd;
1538 
1539  // int id1 = 0;
1540  // int id2 = m_Nvc;
1541  int id3 = m_Nvc * 2;
1542  int id4 = m_Nvc * 3;
1543 
1544  int idir = 3;
1545 
1546  double vt1[m_Nvc], vt2[m_Nvc];
1547  double wt1_r, wt1_i, wt2_r, wt2_i;
1548 
1549  int isite = m_arg[itask].isite;
1550 
1551  const double *w1 = &v1[Nvcd * isite];
1552  double *w2 = &v2[Nvcd * isite];
1553  const double *u = m_U->ptr(m_Ndf * (isite + idir * m_Nvol));
1554 
1555  int kt1 = m_arg[itask].kt1;
1556  int Nxy = m_Nx * m_Ny;
1557  int Nxyz = Nxy * m_Nz;
1558 
1559  for (int it = 0; it < m_Mt - kt1; ++it) {
1560  for (int iz = 0; iz < m_Mz; ++iz) {
1561  for (int ixy = 0; ixy < Nxy; ++ixy) {
1562  int is = ixy + Nxy * (iz + m_Nz * it);
1563  int iv = Nvcd * is;
1564  int in = Nvcd * (is + Nxyz);
1565  int ig = m_Ndf * is;
1566 
1567  for (int ic = 0; ic < m_Nc; ++ic) {
1568  int ic_r = 2 * ic;
1569  int ic_i = 2 * ic + 1;
1570 
1571  vt1[ic_r] = 2.0 * w1[ic_r + id3 + in];
1572  vt1[ic_i] = 2.0 * w1[ic_i + id3 + in];
1573  vt2[ic_r] = 2.0 * w1[ic_r + id4 + in];
1574  vt2[ic_i] = 2.0 * w1[ic_i + id4 + in];
1575  }
1576 
1577  for (int ic = 0; ic < m_Nc; ++ic) {
1578  int ic2 = ic * m_Nvc;
1579 
1580  wt1_r = mult_uv_r(&u[ic2 + ig], vt1, m_Nc);
1581  wt1_i = mult_uv_i(&u[ic2 + ig], vt1, m_Nc);
1582  wt2_r = mult_uv_r(&u[ic2 + ig], vt2, m_Nc);
1583  wt2_i = mult_uv_i(&u[ic2 + ig], vt2, m_Nc);
1584 
1585  int ic_r = 2 * ic;
1586  int ic_i = 2 * ic + 1;
1587 
1588  w2[ic_r + id3 + iv] += wt1_r;
1589  w2[ic_i + id3 + iv] += wt1_i;
1590  w2[ic_r + id4 + iv] += wt2_r;
1591  w2[ic_i + id4 + iv] += wt2_i;
1592  }
1593  }
1594  }
1595  }
1596 }
1597 
1598 
1599 //====================================================================
1601  int itask, double *vcp1, const double *v1)
1602 {
1603  int Nvc2 = 2 * m_Nvc;
1604  int Nvcd = m_Nvc * m_Nd;
1605  int Nvcd2 = Nvcd / 2;
1606 
1607  int id1 = 0;
1608  int id2 = m_Nvc;
1609  // int id3 = m_Nvc * 2;
1610  // int id4 = m_Nvc * 3;
1611 
1612  int idir = 3;
1613 
1614  int isite = m_arg[itask].isite;
1615  int isite_cp = m_arg[itask].isite_cp_t;
1616 
1617  const double *w1 = &v1[Nvcd * isite];
1618  double *w2 = &vcp1[Nvcd2 * isite_cp];
1619  const double *u = m_U->ptr(m_Ndf * (isite + idir * m_Nvol));
1620 
1621  double vt1[m_Nvc], vt2[m_Nvc];
1622 
1623  if (m_arg[itask].kt1 == 1) {
1624  int Nxy = m_Nx * m_Ny;
1625  int it = m_Mt - 1;
1626  for (int iz = 0; iz < m_Mz; ++iz) {
1627  for (int ixy = 0; ixy < Nxy; ++ixy) {
1628  int is = ixy + Nxy * (iz + m_Nz * it);
1629  int is2 = ixy + Nxy * iz;
1630  int in = Nvcd * is;
1631  int ig = m_Ndf * is;
1632  int ix1 = Nvc2 * is2;
1633  int ix2 = ix1 + m_Nvc;
1634 
1635  for (int ic = 0; ic < m_Nc; ++ic) {
1636  int ic_r = 2 * ic;
1637  int ic_i = 2 * ic + 1;
1638 
1639  vt1[ic_r] = 2.0 * w1[ic_r + id1 + in];
1640  vt1[ic_i] = 2.0 * w1[ic_i + id1 + in];
1641  vt2[ic_r] = 2.0 * w1[ic_r + id2 + in];
1642  vt2[ic_i] = 2.0 * w1[ic_i + id2 + in];
1643  }
1644 
1645  for (int ic = 0; ic < m_Nc; ++ic) {
1646  int ic2 = 2 * ic;
1647 
1648  int ic_r = 2 * ic;
1649  int ic_i = 2 * ic + 1;
1650 
1651  w2[ic_r + ix1] = mult_udagv_r(&u[ic2 + ig], vt1, m_Nc);
1652  w2[ic_i + ix1] = mult_udagv_i(&u[ic2 + ig], vt1, m_Nc);
1653  w2[ic_r + ix2] = mult_udagv_r(&u[ic2 + ig], vt2, m_Nc);
1654  w2[ic_i + ix2] = mult_udagv_i(&u[ic2 + ig], vt2, m_Nc);
1655  }
1656  }
1657  }
1658  }
1659 }
1660 
1661 
1662 //====================================================================
1664  int itask, double *v2, const double *vcp2)
1665 {
1666  int Nvc2 = 2 * m_Nvc;
1667  int Nvcd = m_Nvc * m_Nd;
1668  int Nvcd2 = Nvcd / 2;
1669 
1670  int id1 = 0;
1671  int id2 = m_Nvc;
1672  // int id3 = m_Nvc * 2;
1673  // int id4 = m_Nvc * 3;
1674 
1675  int idir = 3;
1676  double bc2 = m_boundary2[idir];
1677 
1678  // double wt1_r,wt1_i, wt2_r,wt2_i;
1679 
1680  int isite = m_arg[itask].isite;
1681  int isite_cp = m_arg[itask].isite_cp_t;
1682 
1683  const double *w1 = &vcp2[Nvcd2 * isite_cp];
1684  double *w2 = &v2[Nvcd * isite];
1685 
1686  if (m_arg[itask].kt0 == 1) {
1687  int Nxy = m_Nx * m_Ny;
1688  int it = 0;
1689  for (int iz = 0; iz < m_Mz; ++iz) {
1690  for (int ixy = 0; ixy < Nxy; ++ixy) {
1691  int is = ixy + Nxy * (iz + m_Nz * it);
1692  int is2 = ixy + Nxy * iz;
1693  int iv = Nvcd * is;
1694  int ix1 = Nvc2 * is2;
1695  int ix2 = ix1 + m_Nvc;
1696 
1697  for (int ic = 0; ic < m_Nc; ++ic) {
1698  int ic_r = 2 * ic;
1699  int ic_i = 2 * ic + 1;
1700 
1701  w2[ic_r + id1 + iv] += bc2 * w1[ic_r + ix1];
1702  w2[ic_i + id1 + iv] += bc2 * w1[ic_i + ix1];
1703  w2[ic_r + id2 + iv] += bc2 * w1[ic_r + ix2];
1704  w2[ic_i + id2 + iv] += bc2 * w1[ic_i + ix2];
1705  }
1706  }
1707  }
1708  }
1709 }
1710 
1711 
1712 //====================================================================
1714  int itask, double *v2, const double *v1)
1715 {
1716  int Nvcd = m_Nvc * m_Nd;
1717 
1718  int id1 = 0;
1719  int id2 = m_Nvc;
1720  // int id3 = m_Nvc * 2;
1721  // int id4 = m_Nvc * 3;
1722 
1723  int idir = 3;
1724 
1725  double vt1[m_Nvc], vt2[m_Nvc];
1726  double wt1_r, wt1_i, wt2_r, wt2_i;
1727 
1728  int isite = m_arg[itask].isite;
1729 
1730  const double *w1 = &v1[Nvcd * isite];
1731  double *w2 = &v2[Nvcd * isite];
1732  const double *u = m_U->ptr(m_Ndf * (isite + idir * m_Nvol));
1733 
1734  int kt0 = m_arg[itask].kt0;
1735  int Nxy = m_Nx * m_Ny;
1736  int Nxyz = Nxy * m_Nz;
1737 
1738  for (int it = kt0; it < m_Mt; ++it) {
1739  for (int iz = 0; iz < m_Mz; ++iz) {
1740  for (int ixy = 0; ixy < Nxy; ++ixy) {
1741  int is = ixy + Nxy * (iz + m_Nz * it);
1742  int iv = Nvcd * is;
1743  int in = Nvcd * (is - Nxyz);
1744  int ig = m_Ndf * (is - Nxyz);
1745 
1746  for (int ic = 0; ic < m_Nc; ++ic) {
1747  int ic_r = 2 * ic;
1748  int ic_i = 2 * ic + 1;
1749 
1750  vt1[ic_r] = 2.0 * w1[ic_r + id1 + in];
1751  vt1[ic_i] = 2.0 * w1[ic_i + id1 + in];
1752  vt2[ic_r] = 2.0 * w1[ic_r + id2 + in];
1753  vt2[ic_i] = 2.0 * w1[ic_i + id2 + in];
1754  }
1755 
1756  for (int ic = 0; ic < m_Nc; ++ic) {
1757  int ic2 = 2 * ic;
1758 
1759  wt1_r = mult_udagv_r(&u[ic2 + ig], vt1, m_Nc);
1760  wt1_i = mult_udagv_i(&u[ic2 + ig], vt1, m_Nc);
1761  wt2_r = mult_udagv_r(&u[ic2 + ig], vt2, m_Nc);
1762  wt2_i = mult_udagv_i(&u[ic2 + ig], vt2, m_Nc);
1763 
1764  int ic_r = 2 * ic;
1765  int ic_i = 2 * ic + 1;
1766 
1767  w2[ic_r + id1 + iv] += wt1_r;
1768  w2[ic_i + id1 + iv] += wt1_i;
1769  w2[ic_r + id2 + iv] += wt2_r;
1770  w2[ic_i + id2 + iv] += wt2_i;
1771  }
1772  }
1773  }
1774  }
1775 }
1776 
1777 
1778 //====================================================================
1780  int itask, double *vcp1, const double *v1)
1781 {
1782  int Nvc2 = 2 * m_Nvc;
1783  int Nvcd = m_Nvc * m_Nd;
1784  int Nvcd2 = Nvcd / 2;
1785 
1786  int id1 = 0;
1787  int id2 = m_Nvc;
1788  int id3 = m_Nvc * 2;
1789  int id4 = m_Nvc * 3;
1790 
1791  int isite = m_arg[itask].isite;
1792  int isite_cp = m_arg[itask].isite_cp_t;
1793 
1794  const double *w1 = &v1[Nvcd * isite];
1795  double *w2 = &vcp1[Nvcd2 * isite_cp];
1796 
1797  int idir = 3;
1798  double bc2 = m_boundary2[idir];
1799 
1800  if (m_arg[itask].kt0 == 1) {
1801  int Nxy = m_Nx * m_Ny;
1802  int it = 0;
1803  for (int iz = 0; iz < m_Mz; ++iz) {
1804  for (int ixy = 0; ixy < Nxy; ++ixy) {
1805  int is = ixy + Nxy * (iz + m_Nz * it);
1806  int is2 = ixy + Nxy * iz;
1807 
1808  int in = Nvcd * is;
1809  int ix1 = Nvc2 * is2;
1810  int ix2 = ix1 + m_Nvc;
1811 
1812  for (int ic = 0; ic < m_Nc; ++ic) {
1813  int ic_r = 2 * ic;
1814  int ic_i = 2 * ic + 1;
1815 
1816  w2[ic_r + ix1] = bc2 * (w1[ic_r + id1 + in] + w1[ic_r + id3 + in]);
1817  w2[ic_i + ix1] = bc2 * (w1[ic_i + id1 + in] + w1[ic_i + id3 + in]);
1818  w2[ic_r + ix2] = bc2 * (w1[ic_r + id2 + in] + w1[ic_r + id4 + in]);
1819  w2[ic_i + ix2] = bc2 * (w1[ic_i + id2 + in] + w1[ic_i + id4 + in]);
1820  }
1821  }
1822  }
1823  }
1824 }
1825 
1826 
1827 //====================================================================
1829  int itask, double *v2, const double *vcp2)
1830 {
1831  int Nvc2 = 2 * m_Nvc;
1832  int Nvcd = m_Nvc * m_Nd;
1833  int Nvcd2 = Nvcd / 2;
1834 
1835  int id1 = 0;
1836  int id2 = m_Nvc;
1837  int id3 = m_Nvc * 2;
1838  int id4 = m_Nvc * 3;
1839 
1840  int idir = 3;
1841 
1842  double wt1_r, wt1_i, wt2_r, wt2_i;
1843 
1844  int isite = m_arg[itask].isite;
1845  int isite_cp = m_arg[itask].isite_cp_t;
1846 
1847 
1848  const double *w1 = &vcp2[Nvcd2 * isite_cp];
1849  double *w2 = &v2[Nvcd * isite];
1850  const double *u = m_U->ptr(m_Ndf * (isite + idir * m_Nvol));
1851 
1852  if (m_arg[itask].kt1 == 1) {
1853  int Nxy = m_Nx * m_Ny;
1854  int it = m_Mt - 1;
1855  for (int iz = 0; iz < m_Mz; ++iz) {
1856  for (int ixy = 0; ixy < Nxy; ++ixy) {
1857  int is = ixy + Nxy * (iz + m_Nz * it);
1858  int is2 = ixy + Nxy * iz;
1859  int iv = Nvcd * is;
1860  int ig = m_Ndf * is;
1861  int ix1 = Nvc2 * is2;
1862  int ix2 = ix1 + m_Nvc;
1863 
1864  for (int ic = 0; ic < m_Nc; ++ic) {
1865  int ic2 = ic * m_Nvc;
1866 
1867  wt1_r = mult_uv_r(&u[ic2 + ig], &w1[ix1], m_Nc);
1868  wt1_i = mult_uv_i(&u[ic2 + ig], &w1[ix1], m_Nc);
1869  wt2_r = mult_uv_r(&u[ic2 + ig], &w1[ix2], m_Nc);
1870  wt2_i = mult_uv_i(&u[ic2 + ig], &w1[ix2], m_Nc);
1871 
1872  int ic_r = 2 * ic;
1873  int ic_i = 2 * ic + 1;
1874 
1875  w2[ic_r + id1 + iv] += wt1_r;
1876  w2[ic_i + id1 + iv] += wt1_i;
1877  w2[ic_r + id2 + iv] += wt2_r;
1878  w2[ic_i + id2 + iv] += wt2_i;
1879 
1880  w2[ic_r + id3 + iv] += wt1_r;
1881  w2[ic_i + id3 + iv] += wt1_i;
1882  w2[ic_r + id4 + iv] += wt2_r;
1883  w2[ic_i + id4 + iv] += wt2_i;
1884  }
1885  }
1886  }
1887  }
1888 }
1889 
1890 
1891 //====================================================================
1893  int itask, double *v2, const double *v1)
1894 {
1895  int Nvcd = m_Nvc * m_Nd;
1896 
1897  int id1 = 0;
1898  int id2 = m_Nvc;
1899  int id3 = m_Nvc * 2;
1900  int id4 = m_Nvc * 3;
1901 
1902  int idir = 3;
1903 
1904  double vt1[m_Nvc], vt2[m_Nvc];
1905  double wt1_r, wt1_i, wt2_r, wt2_i;
1906 
1907  int isite = m_arg[itask].isite;
1908 
1909  const double *w1 = &v1[Nvcd * isite];
1910  double *w2 = &v2[Nvcd * isite];
1911  const double *u = m_U->ptr(m_Ndf * (isite + idir * m_Nvol));
1912 
1913  int kt1 = m_arg[itask].kt1;
1914  int Nxy = m_Nx * m_Ny;
1915  int Nxyz = Nxy * m_Nz;
1916 
1917  for (int it = 0; it < m_Mt - kt1; ++it) {
1918  for (int iz = 0; iz < m_Mz; ++iz) {
1919  for (int ixy = 0; ixy < Nxy; ++ixy) {
1920  int is = ixy + Nxy * (iz + m_Nz * it);
1921  int iv = Nvcd * is;
1922  int in = Nvcd * (is + Nxyz);
1923  int ig = m_Ndf * is;
1924 
1925  for (int ic = 0; ic < m_Nc; ++ic) {
1926  int ic_r = 2 * ic;
1927  int ic_i = 2 * ic + 1;
1928 
1929  vt1[ic_r] = w1[ic_r + id1 + in] + w1[ic_r + id3 + in];
1930  vt1[ic_i] = w1[ic_i + id1 + in] + w1[ic_i + id3 + in];
1931  vt2[ic_r] = w1[ic_r + id2 + in] + w1[ic_r + id4 + in];
1932  vt2[ic_i] = w1[ic_i + id2 + in] + w1[ic_i + id4 + in];
1933  }
1934 
1935  for (int ic = 0; ic < m_Nc; ++ic) {
1936  int ic2 = ic * m_Nvc;
1937 
1938  wt1_r = mult_uv_r(&u[ic2 + ig], vt1, m_Nc);
1939  wt1_i = mult_uv_i(&u[ic2 + ig], vt1, m_Nc);
1940  wt2_r = mult_uv_r(&u[ic2 + ig], vt2, m_Nc);
1941  wt2_i = mult_uv_i(&u[ic2 + ig], vt2, m_Nc);
1942 
1943  int ic_r = 2 * ic;
1944  int ic_i = 2 * ic + 1;
1945 
1946  w2[ic_r + id1 + iv] += wt1_r;
1947  w2[ic_i + id1 + iv] += wt1_i;
1948  w2[ic_r + id2 + iv] += wt2_r;
1949  w2[ic_i + id2 + iv] += wt2_i;
1950 
1951  w2[ic_r + id3 + iv] += wt1_r;
1952  w2[ic_i + id3 + iv] += wt1_i;
1953  w2[ic_r + id4 + iv] += wt2_r;
1954  w2[ic_i + id4 + iv] += wt2_i;
1955  }
1956  }
1957  }
1958  }
1959 }
1960 
1961 
1962 //====================================================================
1964  int itask, double *vcp1, const double *v1)
1965 {
1966  int Nvc2 = 2 * m_Nvc;
1967  int Nvcd = m_Nvc * m_Nd;
1968  int Nvcd2 = Nvcd / 2;
1969 
1970  int id1 = 0;
1971  int id2 = m_Nvc;
1972  int id3 = m_Nvc * 2;
1973  int id4 = m_Nvc * 3;
1974 
1975  int idir = 3;
1976 
1977  int isite = m_arg[itask].isite;
1978  int isite_cp = m_arg[itask].isite_cp_t;
1979 
1980 
1981  const double *w1 = &v1[Nvcd * isite];
1982  double *w2 = &vcp1[Nvcd2 * isite_cp];
1983  const double *u = m_U->ptr(m_Ndf * (isite + idir * m_Nvol));
1984 
1985  double vt1[m_Nvc], vt2[m_Nvc];
1986 
1987  if (m_arg[itask].kt1 == 1) {
1988  int Nxy = m_Nx * m_Ny;
1989  int it = m_Mt - 1;
1990  for (int iz = 0; iz < m_Mz; ++iz) {
1991  for (int ixy = 0; ixy < Nxy; ++ixy) {
1992  int is = ixy + Nxy * (iz + m_Nz * it);
1993  int is2 = ixy + Nxy * iz;
1994  int in = Nvcd * is;
1995  int ig = m_Ndf * is;
1996  int ix1 = Nvc2 * is2;
1997  int ix2 = ix1 + m_Nvc;
1998 
1999  for (int ic = 0; ic < m_Nc; ++ic) {
2000  int ic_r = 2 * ic;
2001  int ic_i = 2 * ic + 1;
2002 
2003  vt1[ic_r] = w1[ic_r + id1 + in] - w1[ic_r + id3 + in];
2004  vt1[ic_i] = w1[ic_i + id1 + in] - w1[ic_i + id3 + in];
2005  vt2[ic_r] = w1[ic_r + id2 + in] - w1[ic_r + id4 + in];
2006  vt2[ic_i] = w1[ic_i + id2 + in] - w1[ic_i + id4 + in];
2007  }
2008 
2009  for (int ic = 0; ic < m_Nc; ++ic) {
2010  int ic2 = 2 * ic;
2011 
2012  int ic_r = 2 * ic;
2013  int ic_i = 2 * ic + 1;
2014 
2015  w2[ic_r + ix1] = mult_udagv_r(&u[ic2 + ig], vt1, m_Nc);
2016  w2[ic_i + ix1] = mult_udagv_i(&u[ic2 + ig], vt1, m_Nc);
2017  w2[ic_r + ix2] = mult_udagv_r(&u[ic2 + ig], vt2, m_Nc);
2018  w2[ic_i + ix2] = mult_udagv_i(&u[ic2 + ig], vt2, m_Nc);
2019  }
2020  }
2021  }
2022  }
2023 }
2024 
2025 
2026 //====================================================================
2028  int itask, double *v2, const double *vcp2)
2029 {
2030  int Nvc2 = 2 * m_Nvc;
2031  int Nvcd = m_Nvc * m_Nd;
2032  int Nvcd2 = Nvcd / 2;
2033 
2034  int id1 = 0;
2035  int id2 = m_Nvc;
2036  int id3 = m_Nvc * 2;
2037  int id4 = m_Nvc * 3;
2038 
2039  int idir = 3;
2040  double bc2 = m_boundary2[idir];
2041 
2042  // double wt1_r,wt1_i, wt2_r,wt2_i;
2043 
2044  int isite = m_arg[itask].isite;
2045  int isite_cp = m_arg[itask].isite_cp_t;
2046 
2047  const double *w1 = &vcp2[Nvcd2 * isite_cp];
2048  double *w2 = &v2[Nvcd * isite];
2049 
2050  if (m_arg[itask].kt0 == 1) {
2051  int Nxy = m_Nx * m_Ny;
2052  int it = 0;
2053  for (int iz = 0; iz < m_Mz; ++iz) {
2054  for (int ixy = 0; ixy < Nxy; ++ixy) {
2055  int is = ixy + Nxy * (iz + m_Nz * it);
2056  int is2 = ixy + Nxy * iz;
2057  int iv = Nvcd * is;
2058  int ix1 = Nvc2 * is2;
2059  int ix2 = ix1 + m_Nvc;
2060 
2061  for (int ic = 0; ic < m_Nc; ++ic) {
2062  int ic_r = 2 * ic;
2063  int ic_i = 2 * ic + 1;
2064 
2065  w2[ic_r + id1 + iv] += bc2 * w1[ic_r + ix1];
2066  w2[ic_i + id1 + iv] += bc2 * w1[ic_i + ix1];
2067  w2[ic_r + id2 + iv] += bc2 * w1[ic_r + ix2];
2068  w2[ic_i + id2 + iv] += bc2 * w1[ic_i + ix2];
2069 
2070  w2[ic_r + id3 + iv] -= bc2 * w1[ic_r + ix1];
2071  w2[ic_i + id3 + iv] -= bc2 * w1[ic_i + ix1];
2072  w2[ic_r + id4 + iv] -= bc2 * w1[ic_r + ix2];
2073  w2[ic_i + id4 + iv] -= bc2 * w1[ic_i + ix2];
2074  }
2075  }
2076  }
2077  }
2078 }
2079 
2080 
2081 //====================================================================
2083  int itask, double *v2, const double *v1)
2084 {
2085  int Nvcd = m_Nvc * m_Nd;
2086 
2087  int id1 = 0;
2088  int id2 = m_Nvc;
2089  int id3 = m_Nvc * 2;
2090  int id4 = m_Nvc * 3;
2091 
2092  int idir = 3;
2093 
2094  double vt1[m_Nvc], vt2[m_Nvc];
2095  double wt1_r, wt1_i, wt2_r, wt2_i;
2096 
2097  int isite = m_arg[itask].isite;
2098 
2099  const double *w1 = &v1[Nvcd * isite];
2100  double *w2 = &v2[Nvcd * isite];
2101  const double *u = m_U->ptr(m_Ndf * (isite + idir * m_Nvol));
2102 
2103  int kt0 = m_arg[itask].kt0;
2104  int Nxy = m_Nx * m_Ny;
2105  int Nxyz = Nxy * m_Nz;
2106 
2107  for (int it = kt0; it < m_Mt; ++it) {
2108  for (int iz = 0; iz < m_Mz; ++iz) {
2109  for (int ixy = 0; ixy < Nxy; ++ixy) {
2110  int is = ixy + Nxy * (iz + m_Nz * it);
2111  int iv = Nvcd * is;
2112  int in = Nvcd * (is - Nxyz);
2113  int ig = m_Ndf * (is - Nxyz);
2114 
2115  for (int ic = 0; ic < m_Nc; ++ic) {
2116  int ic_r = 2 * ic;
2117  int ic_i = 2 * ic + 1;
2118 
2119  vt1[ic_r] = w1[ic_r + id1 + in] - w1[ic_r + id3 + in];
2120  vt1[ic_i] = w1[ic_i + id1 + in] - w1[ic_i + id3 + in];
2121  vt2[ic_r] = w1[ic_r + id2 + in] - w1[ic_r + id4 + in];
2122  vt2[ic_i] = w1[ic_i + id2 + in] - w1[ic_i + id4 + in];
2123  }
2124 
2125  for (int ic = 0; ic < m_Nc; ++ic) {
2126  int ic2 = 2 * ic;
2127 
2128  wt1_r = mult_udagv_r(&u[ic2 + ig], vt1, m_Nc);
2129  wt1_i = mult_udagv_i(&u[ic2 + ig], vt1, m_Nc);
2130  wt2_r = mult_udagv_r(&u[ic2 + ig], vt2, m_Nc);
2131  wt2_i = mult_udagv_i(&u[ic2 + ig], vt2, m_Nc);
2132 
2133  int ic_r = 2 * ic;
2134  int ic_i = 2 * ic + 1;
2135 
2136  w2[ic_r + id1 + iv] += wt1_r;
2137  w2[ic_i + id1 + iv] += wt1_i;
2138  w2[ic_r + id2 + iv] += wt2_r;
2139  w2[ic_i + id2 + iv] += wt2_i;
2140 
2141  w2[ic_r + id3 + iv] -= wt1_r;
2142  w2[ic_i + id3 + iv] -= wt1_i;
2143  w2[ic_r + id4 + iv] -= wt2_r;
2144  w2[ic_i + id4 + iv] -= wt2_i;
2145  }
2146  }
2147  }
2148  }
2149 }
2150 
2151 
2152 //====================================================================
2154  int itask, double *v2, const double *v1)
2155 {
2156  int Nvcd = m_Nvc * m_Nd;
2157  int Nxy = m_Nx * m_Ny;
2158 
2159  int id1 = 0;
2160  int id2 = m_Nvc;
2161  int id3 = m_Nvc * 2;
2162  int id4 = m_Nvc * 3;
2163 
2164  int isite = m_arg[itask].isite;
2165 
2166  const double *w1 = &v1[Nvcd * isite];
2167  double *w2 = &v2[Nvcd * isite];
2168 
2169  for (int it = 0; it < m_Mt; ++it) {
2170  for (int iz = 0; iz < m_Mz; ++iz) {
2171  for (int ixy = 0; ixy < Nxy; ++ixy) {
2172  int iv = Nvcd * (ixy + Nxy * (iz + m_Nz * it));
2173  for (int ivc = 0; ivc < m_Nvc; ++ivc) {
2174  w2[ivc + id1 + iv] = w1[ivc + id3 + iv];
2175  w2[ivc + id2 + iv] = w1[ivc + id4 + iv];
2176  w2[ivc + id3 + iv] = w1[ivc + id1 + iv];
2177  w2[ivc + id4 + iv] = w1[ivc + id2 + iv];
2178  }
2179  }
2180  }
2181  }
2182 }
2183 
2184 
2185 //====================================================================
2187  int itask, double *v2, const double *v1)
2188 {
2189  int Nvcd = m_Nvc * m_Nd;
2190  int Nxy = m_Nx * m_Ny;
2191 
2192  int id1 = 0;
2193  int id2 = m_Nvc;
2194  int id3 = m_Nvc * 2;
2195  int id4 = m_Nvc * 3;
2196 
2197  int isite = m_arg[itask].isite;
2198 
2199  const double *w1 = &v1[Nvcd * isite];
2200  double *w2 = &v2[Nvcd * isite];
2201 
2202  for (int it = 0; it < m_Mt; ++it) {
2203  for (int iz = 0; iz < m_Mz; ++iz) {
2204  for (int ixy = 0; ixy < Nxy; ++ixy) {
2205  int iv = Nvcd * (ixy + Nxy * (iz + m_Nz * it));
2206  for (int ivc = 0; ivc < m_Nvc; ++ivc) {
2207  w2[ivc + id1 + iv] = w1[ivc + id1 + iv];
2208  w2[ivc + id2 + iv] = w1[ivc + id2 + iv];
2209  w2[ivc + id3 + iv] = -w1[ivc + id3 + iv];
2210  w2[ivc + id4 + iv] = -w1[ivc + id4 + iv];
2211  }
2212  }
2213  }
2214  }
2215 }
2216 
2217 
2218 //====================================================================
2219 //============================================================END=====
void mult_t_plus_bulk_chiral_thread(int, double *, const double *)
BridgeIO vout
Definition: bridgeIO.cpp:278
void general(const char *format,...)
Definition: bridgeIO.cpp:65
void daypx_thread(int, double *, double, const double *)
void daxpy_thread(int, double *, double, const double *)
static int get_num_threads_available()
returns number of threads (works outside of parallel region).
void mult_t_minus_bulk_dirac_thread(int, double *, const double *)
void crucial(const char *format,...)
Definition: bridgeIO.cpp:48
void mult_t_minus_bulk_chiral_thread(int, double *, const double *)