Bridge++  Version 1.5.4
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
fopr_WilsonGeneral_impl_thread.cpp
Go to the documentation of this file.
1 
15 
16 namespace Imp {
17 #if defined USE_GROUP_SU3
19 #elif defined USE_GROUP_SU2
21 #elif defined USE_GROUP_SU_N
23 #endif
24 
25 // const std::string Fopr_WilsonGeneral::class_name = "Imp::Fopr_WilsonGeneral";
26 
27 //====================================================================
29  {
31 
32  // The following setup corresponds to uniform division of volume.
33  if (m_Nthread <= m_Nt) {
35  } else if (m_Nthread <= m_Nz * m_Nt) {
36  m_Ntask_t = m_Nt;
37  } else {
38  vout.crucial(m_vl, "Error at %s: Too large Nthread = %d\n", class_name.c_str(), m_Nthread);
39  exit(EXIT_FAILURE);
40  }
41 
43 
44  if (m_Ntask_z * m_Ntask_t != m_Nthread) {
45  vout.crucial(m_vl, "Error at %s: Nz = %d and Nt = %d do not match Nthread = %d\n",
46  class_name.c_str(), m_Nz, m_Nt, m_Nthread);
47  exit(EXIT_FAILURE);
48  }
49 
51  m_Mz = m_Nz / m_Ntask_z;
52  m_Mt = m_Nt / m_Ntask_t;
53 
54  if (m_Mz * m_Ntask_z != m_Nz) {
55  vout.crucial(m_vl, "Error at %s: Mz = %d and Ntask_z = %d do not match Nz = %d\n",
56  class_name.c_str(), m_Mz, m_Ntask_z, m_Nz);
57  exit(EXIT_FAILURE);
58  }
59 
60  if (m_Mt * m_Ntask_t != m_Nt) {
61  vout.crucial(m_vl, "Error at %s: Mt = %d and Ntask_t = %d do not match Nt = %d\n",
62  class_name.c_str(), m_Mt, m_Ntask_t, m_Nt);
63  exit(EXIT_FAILURE);
64  }
65 
66  // The following setup is not monotonic division, and requires
67  // barrier at the beginning and end of mult (D and gamma5).
68  // [H.Matsufuru 22 Oct 2013]
69  // if(m_Nthread >= 64){
70  // m_Ntask_z = 8;
71  // }else if(m_Nthread >= 16){
72  // m_Ntask_z = 4;
73  // }else if(m_Nthread >= 4){
74  // m_Ntask_z = 2;
75  // }else{
76  // m_Ntask_z = 1;
77  // }
78  // m_Ntask_t = m_Nthread/m_Ntask_z;
79  // m_Ntask = m_Ntask_t * m_Ntask_z;
80  // m_Mz = m_Nz/m_Ntask_z;
81  // m_Mt = m_Nt/m_Ntask_t;
82 
83  vout.general(m_vl, " Nthread = %d\n", m_Nthread);
84  vout.general(m_vl, " Ntask = %d\n", m_Ntask);
85  vout.general(m_vl, " Ntask_z = %d Ntask_t = %d\n", m_Ntask_z, m_Ntask_t);
86  vout.general(m_vl, " Mz = %d Mt = %d\n", m_Mz, m_Mt);
87 
88  //- setup of arguments
89  const int Nxy = m_Nx * m_Ny;
90  m_arg.resize(m_Ntask);
91  for (int ithread_t = 0; ithread_t < m_Ntask_t; ++ithread_t) {
92  for (int ithread_z = 0; ithread_z < m_Ntask_z; ++ithread_z) {
93  int itask = ithread_z + m_Ntask_z * ithread_t;
94 
95  m_arg[itask].isite = (ithread_z * m_Mz + ithread_t * (m_Nz * m_Mt)) * Nxy;
96 
97  m_arg[itask].kt0 = 0;
98  m_arg[itask].kt1 = 0;
99  m_arg[itask].kz0 = 0;
100  m_arg[itask].kz1 = 0;
101  if (ithread_t == 0) m_arg[itask].kt0 = 1;
102  if (ithread_z == 0) m_arg[itask].kz0 = 1;
103  if (ithread_t == m_Ntask_t - 1) m_arg[itask].kt1 = 1;
104  if (ithread_z == m_Ntask_z - 1) m_arg[itask].kz1 = 1;
105 
106  m_arg[itask].isite_cp_x = itask * m_Mz * m_Mt * m_Ny;
107  m_arg[itask].isite_cp_y = itask * m_Mz * m_Mt * m_Nx;
108  m_arg[itask].isite_cp_z = ithread_t * m_Mt * Nxy;
109  m_arg[itask].isite_cp_t = ithread_z * m_Mz * Nxy;
110  }
111  }
112  }
113 
114 
115 //====================================================================
116  void Fopr_WilsonGeneral::daxpy_thread(const int itask,
117  double *v2, const double fac, const double *v1)
118  {
119  const int Nvcd = m_Nvc * m_Nd;
120  const int Nvxy = Nvcd * m_Nx * m_Ny;
121 
122  const int isite = m_arg[itask].isite;
123 
124  const double *w1 = &v1[Nvcd * isite];
125  double *w2 = &v2[Nvcd * isite];
126 
127  for (int it = 0; it < m_Mt; ++it) {
128  for (int iz = 0; iz < m_Mz; ++iz) {
129  for (int ivxy = 0; ivxy < Nvxy; ++ivxy) {
130  int iv = ivxy + Nvxy * (iz + m_Nz * it);
131  w2[iv] += fac * w1[iv];
132  }
133  }
134  }
135  }
136 
137 
138 //====================================================================
139  void Fopr_WilsonGeneral::daypx_thread(const int itask,
140  double *v2, const double fac, const double *v1)
141  {
142  const int Nvcd = m_Nvc * m_Nd;
143  const int Nvxy = Nvcd * m_Nx * m_Ny;
144 
145  const int isite = m_arg[itask].isite;
146  const double *w1 = &v1[Nvcd * isite];
147  double *w2 = &v2[Nvcd * isite];
148 
149  for (int it = 0; it < m_Mt; ++it) {
150  for (int iz = 0; iz < m_Mz; ++iz) {
151  for (int ivxy = 0; ivxy < Nvxy; ++ivxy) {
152  int iv = ivxy + Nvxy * (iz + m_Nz * it);
153  w2[iv] = fac * w2[iv] + w1[iv];
154  }
155  }
156  }
157  }
158 
159 
160 //====================================================================
161  void Fopr_WilsonGeneral::scal_thread(const int itask,
162  double *v, const double fac)
163  {
164  const int Nvcd = m_Nvc * m_Nd;
165  const int Nvxy = Nvcd * m_Nx * m_Ny;
166 
167  const int isite = m_arg[itask].isite;
168  double *w = &v[Nvcd * isite];
169 
170  for (int it = 0; it < m_Mt; ++it) {
171  for (int iz = 0; iz < m_Mz; ++iz) {
172  for (int ivxy = 0; ivxy < Nvxy; ++ivxy) {
173  int iv = ivxy + Nvxy * (iz + m_Nz * it);
174  w[iv] *= fac;
175  }
176  }
177  }
178  }
179 
180 
181 //====================================================================
182  void Fopr_WilsonGeneral::clear_thread(const int itask,
183  double *v2)
184  {
185  const int Nvcd = m_Nvc * m_Nd;
186  const int Nvxy = Nvcd * m_Nx * m_Ny;
187 
188  const int isite = m_arg[itask].isite;
189  double *w2 = &v2[Nvcd * isite];
190 
191  for (int it = 0; it < m_Mt; ++it) {
192  for (int iz = 0; iz < m_Mz; ++iz) {
193  for (int ivxy = 0; ivxy < Nvxy; ++ivxy) {
194  int iv = ivxy + Nvxy * (iz + m_Nz * it);
195  w2[iv] = 0.0;
196  }
197  }
198  }
199  }
200 
201 
202 //====================================================================
204  double *vcp1, const double *v1)
205  {
206  const int Nvcd = m_Nvc * m_Nd;
207 
208  const int id1 = 0;
209  const int id2 = m_Nvc;
210  const int id3 = m_Nvc * 2;
211  const int id4 = m_Nvc * 3;
212 
213  const int idir = 0;
214  const int ix = 0;
215  const double bc2 = m_boundary_each_node[idir];
216 
217  const int isite = m_arg[itask].isite;
218  const int isite_cp = m_arg[itask].isite_cp_x;
219 
220  const double *w1 = &v1[Nvcd * isite];
221  double *w2 = &vcp1[Nvcd * isite_cp];
222 
223 
224  for (int it = 0; it < m_Mt; ++it) {
225  for (int iz = 0; iz < m_Mz; ++iz) {
226  for (int iy = 0; iy < m_Ny; ++iy) {
227  int is = ix + m_Nx * (iy + m_Ny * (iz + m_Nz * it));
228  int is2 = iy + m_Ny * (iz + m_Mz * it);
229  int in = Nvcd * is;
230  int ix1 = Nvcd * is2;
231  int ix2 = ix1 + m_Nvc;
232  int ix3 = ix2 + m_Nvc;
233  int ix4 = ix3 + m_Nvc;
234 
235  for (int ic = 0; ic < m_Nc; ++ic) {
236  int ic_r = 2 * ic;
237  int ic_i = 2 * ic + 1;
238 
239  w2[ic_r + ix1] = bc2 * (m_r_s * w1[ic_r + id1 + in] - m_nu_s * w1[ic_i + id4 + in]);
240  w2[ic_i + ix1] = bc2 * (m_r_s * w1[ic_i + id1 + in] + m_nu_s * w1[ic_r + id4 + in]);
241  w2[ic_r + ix2] = bc2 * (m_r_s * w1[ic_r + id2 + in] - m_nu_s * w1[ic_i + id3 + in]);
242  w2[ic_i + ix2] = bc2 * (m_r_s * w1[ic_i + id2 + in] + m_nu_s * w1[ic_r + id3 + in]);
243 
244  w2[ic_r + ix3] = bc2 * (m_r_s * w1[ic_r + id3 + in] + m_nu_s * w1[ic_i + id2 + in]);
245  w2[ic_i + ix3] = bc2 * (m_r_s * w1[ic_i + id3 + in] - m_nu_s * w1[ic_r + id2 + in]);
246  w2[ic_r + ix4] = bc2 * (m_r_s * w1[ic_r + id4 + in] + m_nu_s * w1[ic_i + id1 + in]);
247  w2[ic_i + ix4] = bc2 * (m_r_s * w1[ic_i + id4 + in] - m_nu_s * w1[ic_r + id1 + in]);
248  }
249  }
250  }
251  }
252  }
253 
254 
255 //====================================================================
257  double *v2, const double *vcp2)
258  {
259  const int Nvcd = m_Nvc * m_Nd;
260 
261  const int id1 = 0;
262  const int id2 = m_Nvc;
263  const int id3 = m_Nvc * 2;
264  const int id4 = m_Nvc * 3;
265 
266  const int idir = 0;
267  const int ix = m_Nx - 1;
268 
269  const int isite = m_arg[itask].isite;
270  const int isite_cp = m_arg[itask].isite_cp_x;
271 
272  const double *w1 = &vcp2[Nvcd * isite_cp];
273  double *w2 = &v2[Nvcd * isite];
274  const double *u = m_U->ptr(m_Ndf * (isite + idir * m_Nvol));
275 
276 
277  for (int it = 0; it < m_Mt; ++it) {
278  for (int iz = 0; iz < m_Mz; ++iz) {
279  for (int iy = 0; iy < m_Ny; ++iy) {
280  int is = ix + m_Nx * (iy + m_Ny * (iz + m_Nz * it));
281  int is2 = iy + m_Ny * (iz + m_Mz * it);
282  int iv = Nvcd * is;
283  int ig = m_Ndf * is;
284  int ix1 = Nvcd * is2;
285  int ix2 = ix1 + m_Nvc;
286  int ix3 = ix2 + m_Nvc;
287  int ix4 = ix3 + m_Nvc;
288 
289  for (int ic = 0; ic < m_Nc; ++ic) {
290  int ic2 = ic * m_Nvc;
291 
292  double wt1_r = mult_uv_r(&u[ic2 + ig], &w1[ix1], m_Nc);
293  double wt1_i = mult_uv_i(&u[ic2 + ig], &w1[ix1], m_Nc);
294  double wt2_r = mult_uv_r(&u[ic2 + ig], &w1[ix2], m_Nc);
295  double wt2_i = mult_uv_i(&u[ic2 + ig], &w1[ix2], m_Nc);
296 
297  double wt3_r = mult_uv_r(&u[ic2 + ig], &w1[ix3], m_Nc);
298  double wt3_i = mult_uv_i(&u[ic2 + ig], &w1[ix3], m_Nc);
299  double wt4_r = mult_uv_r(&u[ic2 + ig], &w1[ix4], m_Nc);
300  double wt4_i = mult_uv_i(&u[ic2 + ig], &w1[ix4], m_Nc);
301 
302  int ic_r = 2 * ic;
303  int ic_i = 2 * ic + 1;
304 
305  w2[ic_r + id1 + iv] += wt1_r;
306  w2[ic_i + id1 + iv] += wt1_i;
307  w2[ic_r + id2 + iv] += wt2_r;
308  w2[ic_i + id2 + iv] += wt2_i;
309 
310  w2[ic_r + id3 + iv] += wt3_r;
311  w2[ic_i + id3 + iv] += wt3_i;
312  w2[ic_r + id4 + iv] += wt4_r;
313  w2[ic_i + id4 + iv] += wt4_i;
314  }
315  }
316  }
317  }
318  }
319 
320 
321 //====================================================================
323  double *v2, const double *v1)
324  {
325  const int Nvcd = m_Nvc * m_Nd;
326 
327  const int id1 = 0;
328  const int id2 = m_Nvc;
329  const int id3 = m_Nvc * 2;
330  const int id4 = m_Nvc * 3;
331 
332  const int idir = 0;
333 
334  const int isite = m_arg[itask].isite;
335 
336  const double *w1 = &v1[Nvcd * isite];
337  double *w2 = &v2[Nvcd * isite];
338  const double *u = m_U->ptr(m_Ndf * (isite + idir * m_Nvol));
339 
340 
341  for (int it = 0; it < m_Mt; ++it) {
342  for (int iz = 0; iz < m_Mz; ++iz) {
343  for (int iy = 0; iy < m_Ny; ++iy) {
344  for (int ix = 0; ix < m_Nx - 1; ++ix) {
345  int is = ix + m_Nx * (iy + m_Ny * (iz + m_Nz * it));
346  int iv = Nvcd * is;
347  int in = Nvcd * (is + 1);
348  int ig = m_Ndf * is;
349 
350  double vt1[m_Nvc], vt2[m_Nvc], vt3[m_Nvc], vt4[m_Nvc];
351 
352  for (int ic = 0; ic < m_Nc; ++ic) {
353  int ic_r = 2 * ic;
354  int ic_i = 2 * ic + 1;
355 
356  vt1[ic_r] = m_r_s * w1[ic_r + id1 + in] - m_nu_s * w1[ic_i + id4 + in];
357  vt1[ic_i] = m_r_s * w1[ic_i + id1 + in] + m_nu_s * w1[ic_r + id4 + in];
358  vt2[ic_r] = m_r_s * w1[ic_r + id2 + in] - m_nu_s * w1[ic_i + id3 + in];
359  vt2[ic_i] = m_r_s * w1[ic_i + id2 + in] + m_nu_s * w1[ic_r + id3 + in];
360 
361  vt3[ic_r] = m_r_s * w1[ic_r + id3 + in] + m_nu_s * w1[ic_i + id2 + in];
362  vt3[ic_i] = m_r_s * w1[ic_i + id3 + in] - m_nu_s * w1[ic_r + id2 + in];
363  vt4[ic_r] = m_r_s * w1[ic_r + id4 + in] + m_nu_s * w1[ic_i + id1 + in];
364  vt4[ic_i] = m_r_s * w1[ic_i + id4 + in] - m_nu_s * w1[ic_r + id1 + in];
365  }
366 
367  for (int ic = 0; ic < m_Nc; ++ic) {
368  int ic2 = ic * m_Nvc;
369 
370  double wt1_r = mult_uv_r(&u[ic2 + ig], vt1, m_Nc);
371  double wt1_i = mult_uv_i(&u[ic2 + ig], vt1, m_Nc);
372  double wt2_r = mult_uv_r(&u[ic2 + ig], vt2, m_Nc);
373  double wt2_i = mult_uv_i(&u[ic2 + ig], vt2, m_Nc);
374 
375  double wt3_r = mult_uv_r(&u[ic2 + ig], vt3, m_Nc);
376  double wt3_i = mult_uv_i(&u[ic2 + ig], vt3, m_Nc);
377  double wt4_r = mult_uv_r(&u[ic2 + ig], vt4, m_Nc);
378  double wt4_i = mult_uv_i(&u[ic2 + ig], vt4, m_Nc);
379 
380  int ic_r = 2 * ic;
381  int ic_i = 2 * ic + 1;
382 
383  w2[ic_r + id1 + iv] += wt1_r;
384  w2[ic_i + id1 + iv] += wt1_i;
385  w2[ic_r + id2 + iv] += wt2_r;
386  w2[ic_i + id2 + iv] += wt2_i;
387 
388  w2[ic_r + id3 + iv] += wt3_r;
389  w2[ic_i + id3 + iv] += wt3_i;
390  w2[ic_r + id4 + iv] += wt4_r;
391  w2[ic_i + id4 + iv] += wt4_i;
392  }
393  }
394  }
395  }
396  }
397  }
398 
399 
400 //====================================================================
402  double *vcp1, const double *v1)
403  {
404  const int Nvcd = m_Nvc * m_Nd;
405 
406  const int id1 = 0;
407  const int id2 = m_Nvc;
408  const int id3 = m_Nvc * 2;
409  const int id4 = m_Nvc * 3;
410 
411  const int idir = 0;
412  const int ix = m_Nx - 1;
413 
414  const int isite = m_arg[itask].isite;
415  const int isite_cp = m_arg[itask].isite_cp_x;
416 
417  const double *w1 = &v1[Nvcd * isite];
418  double *w2 = &vcp1[Nvcd * isite_cp];
419  const double *u = m_U->ptr(m_Ndf * (isite + idir * m_Nvol));
420 
421 
422  for (int it = 0; it < m_Mt; ++it) {
423  for (int iz = 0; iz < m_Mz; ++iz) {
424  for (int iy = 0; iy < m_Ny; ++iy) {
425  int is = ix + m_Nx * (iy + m_Ny * (iz + m_Nz * it));
426  int is2 = iy + m_Ny * (iz + m_Mz * it);
427  int in = Nvcd * is;
428  int ig = m_Ndf * is;
429  int ix1 = Nvcd * is2;
430  int ix2 = ix1 + m_Nvc;
431  int ix3 = ix2 + m_Nvc;
432  int ix4 = ix3 + m_Nvc;
433 
434  double vt1[m_Nvc], vt2[m_Nvc], vt3[m_Nvc], vt4[m_Nvc];
435 
436  for (int ic = 0; ic < m_Nc; ++ic) {
437  int ic_r = 2 * ic;
438  int ic_i = 2 * ic + 1;
439 
440  vt1[ic_r] = m_r_s * w1[ic_r + id1 + in] + m_nu_s * w1[ic_i + id4 + in];
441  vt1[ic_i] = m_r_s * w1[ic_i + id1 + in] - m_nu_s * w1[ic_r + id4 + in];
442  vt2[ic_r] = m_r_s * w1[ic_r + id2 + in] + m_nu_s * w1[ic_i + id3 + in];
443  vt2[ic_i] = m_r_s * w1[ic_i + id2 + in] - m_nu_s * w1[ic_r + id3 + in];
444 
445  vt3[ic_r] = m_r_s * w1[ic_r + id3 + in] - m_nu_s * w1[ic_i + id2 + in];
446  vt3[ic_i] = m_r_s * w1[ic_i + id3 + in] + m_nu_s * w1[ic_r + id2 + in];
447  vt4[ic_r] = m_r_s * w1[ic_r + id4 + in] - m_nu_s * w1[ic_i + id1 + in];
448  vt4[ic_i] = m_r_s * w1[ic_i + id4 + in] + m_nu_s * w1[ic_r + id1 + in];
449  }
450 
451  for (int ic = 0; ic < m_Nc; ++ic) {
452  int ic2 = 2 * ic;
453 
454  int ic_r = 2 * ic;
455  int ic_i = 2 * ic + 1;
456 
457  w2[ic_r + ix1] = mult_udagv_r(&u[ic2 + ig], vt1, m_Nc);
458  w2[ic_i + ix1] = mult_udagv_i(&u[ic2 + ig], vt1, m_Nc);
459  w2[ic_r + ix2] = mult_udagv_r(&u[ic2 + ig], vt2, m_Nc);
460  w2[ic_i + ix2] = mult_udagv_i(&u[ic2 + ig], vt2, m_Nc);
461 
462  w2[ic_r + ix3] = mult_udagv_r(&u[ic2 + ig], vt3, m_Nc);
463  w2[ic_i + ix3] = mult_udagv_i(&u[ic2 + ig], vt3, m_Nc);
464  w2[ic_r + ix4] = mult_udagv_r(&u[ic2 + ig], vt4, m_Nc);
465  w2[ic_i + ix4] = mult_udagv_i(&u[ic2 + ig], vt4, m_Nc);
466  }
467  }
468  }
469  }
470  }
471 
472 
473 //====================================================================
475  double *v2, const double *vcp2)
476  {
477  const int Nvcd = m_Nvc * m_Nd;
478 
479  const int id1 = 0;
480  const int id2 = m_Nvc;
481  const int id3 = m_Nvc * 2;
482  const int id4 = m_Nvc * 3;
483 
484  const int idir = 0;
485  const int ix = 0;
486  const double bc2 = m_boundary_each_node[idir];
487 
488  const int isite = m_arg[itask].isite;
489  const int isite_cp = m_arg[itask].isite_cp_x;
490 
491  const double *w1 = &vcp2[Nvcd * isite_cp];
492  double *w2 = &v2[Nvcd * isite];
493 
494 
495  for (int it = 0; it < m_Mt; ++it) {
496  for (int iz = 0; iz < m_Mz; ++iz) {
497  for (int iy = 0; iy < m_Ny; ++iy) {
498  int is = ix + m_Nx * (iy + m_Ny * (iz + m_Nz * it));
499  int is2 = iy + m_Ny * (iz + m_Mz * it);
500  int iv = Nvcd * is;
501  int ix1 = Nvcd * is2;
502  int ix2 = ix1 + m_Nvc;
503  int ix3 = ix2 + m_Nvc;
504  int ix4 = ix3 + m_Nvc;
505 
506  for (int ic = 0; ic < m_Nc; ++ic) {
507  int ic_r = 2 * ic;
508  int ic_i = 2 * ic + 1;
509 
510  w2[ic_r + id1 + iv] += bc2 * w1[ic_r + ix1];
511  w2[ic_i + id1 + iv] += bc2 * w1[ic_i + ix1];
512  w2[ic_r + id2 + iv] += bc2 * w1[ic_r + ix2];
513  w2[ic_i + id2 + iv] += bc2 * w1[ic_i + ix2];
514 
515  w2[ic_r + id3 + iv] += bc2 * w1[ic_r + ix3];
516  w2[ic_i + id3 + iv] += bc2 * w1[ic_i + ix3];
517  w2[ic_r + id4 + iv] += bc2 * w1[ic_r + ix4];
518  w2[ic_i + id4 + iv] += bc2 * w1[ic_i + ix4];
519  }
520  }
521  }
522  }
523  }
524 
525 
526 //====================================================================
528  double *v2, const double *v1)
529  {
530  const int Nvcd = m_Nvc * m_Nd;
531 
532  const int id1 = 0;
533  const int id2 = m_Nvc;
534  const int id3 = m_Nvc * 2;
535  const int id4 = m_Nvc * 3;
536 
537  const int idir = 0;
538 
539  const int isite = m_arg[itask].isite;
540 
541  const double *w1 = &v1[Nvcd * isite];
542  double *w2 = &v2[Nvcd * isite];
543  const double *u = m_U->ptr(m_Ndf * (isite + idir * m_Nvol));
544 
545 
546  for (int it = 0; it < m_Mt; ++it) {
547  for (int iz = 0; iz < m_Mz; ++iz) {
548  for (int iy = 0; iy < m_Ny; ++iy) {
549  for (int ix = 1; ix < m_Nx; ++ix) {
550  int is = ix + m_Nx * (iy + m_Ny * (iz + m_Nz * it));
551  int iv = Nvcd * is;
552  int in = Nvcd * (is - 1);
553  int ig = m_Ndf * (is - 1);
554 
555  double vt1[m_Nvc], vt2[m_Nvc], vt3[m_Nvc], vt4[m_Nvc];
556 
557  for (int ic = 0; ic < m_Nc; ++ic) {
558  int ic_r = 2 * ic;
559  int ic_i = 2 * ic + 1;
560 
561  vt1[ic_r] = m_r_s * w1[ic_r + id1 + in] + m_nu_s * w1[ic_i + id4 + in];
562  vt1[ic_i] = m_r_s * w1[ic_i + id1 + in] - m_nu_s * w1[ic_r + id4 + in];
563  vt2[ic_r] = m_r_s * w1[ic_r + id2 + in] + m_nu_s * w1[ic_i + id3 + in];
564  vt2[ic_i] = m_r_s * w1[ic_i + id2 + in] - m_nu_s * w1[ic_r + id3 + in];
565 
566  vt3[ic_r] = m_r_s * w1[ic_r + id3 + in] - m_nu_s * w1[ic_i + id2 + in];
567  vt3[ic_i] = m_r_s * w1[ic_i + id3 + in] + m_nu_s * w1[ic_r + id2 + in];
568  vt4[ic_r] = m_r_s * w1[ic_r + id4 + in] - m_nu_s * w1[ic_i + id1 + in];
569  vt4[ic_i] = m_r_s * w1[ic_i + id4 + in] + m_nu_s * w1[ic_r + id1 + in];
570  }
571 
572  for (int ic = 0; ic < m_Nc; ++ic) {
573  int ic2 = 2 * ic;
574 
575  double wt1_r = mult_udagv_r(&u[ic2 + ig], vt1, m_Nc);
576  double wt1_i = mult_udagv_i(&u[ic2 + ig], vt1, m_Nc);
577  double wt2_r = mult_udagv_r(&u[ic2 + ig], vt2, m_Nc);
578  double wt2_i = mult_udagv_i(&u[ic2 + ig], vt2, m_Nc);
579 
580  double wt3_r = mult_udagv_r(&u[ic2 + ig], vt3, m_Nc);
581  double wt3_i = mult_udagv_i(&u[ic2 + ig], vt3, m_Nc);
582  double wt4_r = mult_udagv_r(&u[ic2 + ig], vt4, m_Nc);
583  double wt4_i = mult_udagv_i(&u[ic2 + ig], vt4, m_Nc);
584 
585  int ic_r = 2 * ic;
586  int ic_i = 2 * ic + 1;
587 
588  w2[ic_r + id1 + iv] += wt1_r;
589  w2[ic_i + id1 + iv] += wt1_i;
590  w2[ic_r + id2 + iv] += wt2_r;
591  w2[ic_i + id2 + iv] += wt2_i;
592 
593  w2[ic_r + id3 + iv] += wt3_r;
594  w2[ic_i + id3 + iv] += wt3_i;
595  w2[ic_r + id4 + iv] += wt4_r;
596  w2[ic_i + id4 + iv] += wt4_i;
597  }
598  }
599  }
600  }
601  }
602  }
603 
604 
605 //====================================================================
607  double *vcp1, const double *v1)
608  {
609  const int Nvcd = m_Nvc * m_Nd;
610 
611  const int id1 = 0;
612  const int id2 = m_Nvc;
613  const int id3 = m_Nvc * 2;
614  const int id4 = m_Nvc * 3;
615 
616  const int idir = 1;
617  const int iy = 0;
618  const double bc2 = m_boundary_each_node[idir];
619 
620  const int isite = m_arg[itask].isite;
621  const int isite_cp = m_arg[itask].isite_cp_y;
622 
623  const double *w1 = &v1[Nvcd * isite];
624  double *w2 = &vcp1[Nvcd * isite_cp];
625 
626 
627  for (int it = 0; it < m_Mt; ++it) {
628  for (int iz = 0; iz < m_Mz; ++iz) {
629  for (int ix = 0; ix < m_Nx; ++ix) {
630  int is = ix + m_Nx * (iy + m_Ny * (iz + m_Nz * it));
631  int is2 = ix + m_Nx * (iz + m_Mz * it);
632  int in = Nvcd * is;
633  int ix1 = Nvcd * is2;
634  int ix2 = ix1 + m_Nvc;
635  int ix3 = ix2 + m_Nvc;
636  int ix4 = ix3 + m_Nvc;
637 
638  for (int ic = 0; ic < m_Nc; ++ic) {
639  int ic_r = 2 * ic;
640  int ic_i = 2 * ic + 1;
641 
642  w2[ic_r + ix1] = bc2 * (m_r_s * w1[ic_r + id1 + in] + m_nu_s * w1[ic_r + id4 + in]);
643  w2[ic_i + ix1] = bc2 * (m_r_s * w1[ic_i + id1 + in] + m_nu_s * w1[ic_i + id4 + in]);
644  w2[ic_r + ix2] = bc2 * (m_r_s * w1[ic_r + id2 + in] - m_nu_s * w1[ic_r + id3 + in]);
645  w2[ic_i + ix2] = bc2 * (m_r_s * w1[ic_i + id2 + in] - m_nu_s * w1[ic_i + id3 + in]);
646 
647  w2[ic_r + ix3] = bc2 * (m_r_s * w1[ic_r + id3 + in] - m_nu_s * w1[ic_r + id2 + in]);
648  w2[ic_i + ix3] = bc2 * (m_r_s * w1[ic_i + id3 + in] - m_nu_s * w1[ic_i + id2 + in]);
649  w2[ic_r + ix4] = bc2 * (m_r_s * w1[ic_r + id4 + in] + m_nu_s * w1[ic_r + id1 + in]);
650  w2[ic_i + ix4] = bc2 * (m_r_s * w1[ic_i + id4 + in] + m_nu_s * w1[ic_i + id1 + in]);
651  }
652  }
653  }
654  }
655  }
656 
657 
658 //====================================================================
660  const int itask, double *v2, const double *vcp2)
661  {
662  const int Nvcd = m_Nvc * m_Nd;
663 
664  const int id1 = 0;
665  const int id2 = m_Nvc;
666  const int id3 = m_Nvc * 2;
667  const int id4 = m_Nvc * 3;
668 
669  const int idir = 1;
670  const int iy = m_Ny - 1;
671 
672  const int isite = m_arg[itask].isite;
673  const int isite_cp = m_arg[itask].isite_cp_y;
674 
675  const double *w1 = &vcp2[Nvcd * isite_cp];
676  double *w2 = &v2[Nvcd * isite];
677  const double *u = m_U->ptr(m_Ndf * (isite + idir * m_Nvol));
678 
679 
680  for (int it = 0; it < m_Mt; ++it) {
681  for (int iz = 0; iz < m_Mz; ++iz) {
682  for (int ix = 0; ix < m_Nx; ++ix) {
683  int is = ix + m_Nx * (iy + m_Ny * (iz + m_Nz * it));
684  int is2 = ix + m_Nx * (iz + m_Mz * it);
685  int iv = Nvcd * is;
686  int ig = m_Ndf * is;
687  int ix1 = Nvcd * is2;
688  int ix2 = ix1 + m_Nvc;
689  int ix3 = ix2 + m_Nvc;
690  int ix4 = ix3 + m_Nvc;
691 
692  for (int ic = 0; ic < m_Nc; ++ic) {
693  int ic2 = ic * m_Nvc;
694 
695  double wt1_r = mult_uv_r(&u[ic2 + ig], &w1[ix1], m_Nc);
696  double wt1_i = mult_uv_i(&u[ic2 + ig], &w1[ix1], m_Nc);
697  double wt2_r = mult_uv_r(&u[ic2 + ig], &w1[ix2], m_Nc);
698  double wt2_i = mult_uv_i(&u[ic2 + ig], &w1[ix2], m_Nc);
699 
700  double wt3_r = mult_uv_r(&u[ic2 + ig], &w1[ix3], m_Nc);
701  double wt3_i = mult_uv_i(&u[ic2 + ig], &w1[ix3], m_Nc);
702  double wt4_r = mult_uv_r(&u[ic2 + ig], &w1[ix4], m_Nc);
703  double wt4_i = mult_uv_i(&u[ic2 + ig], &w1[ix4], m_Nc);
704 
705  int ic_r = 2 * ic;
706  int ic_i = 2 * ic + 1;
707 
708  w2[ic_r + id1 + iv] += wt1_r;
709  w2[ic_i + id1 + iv] += wt1_i;
710  w2[ic_r + id2 + iv] += wt2_r;
711  w2[ic_i + id2 + iv] += wt2_i;
712 
713  w2[ic_r + id3 + iv] += wt3_r;
714  w2[ic_i + id3 + iv] += wt3_i;
715  w2[ic_r + id4 + iv] += wt4_r;
716  w2[ic_i + id4 + iv] += wt4_i;
717  }
718  }
719  }
720  }
721  }
722 
723 
724 //====================================================================
726  double *v2, const double *v1)
727  {
728  const int Nvcd = m_Nvc * m_Nd;
729 
730  const int id1 = 0;
731  const int id2 = m_Nvc;
732  const int id3 = m_Nvc * 2;
733  const int id4 = m_Nvc * 3;
734 
735  const int idir = 1;
736 
737  const int isite = m_arg[itask].isite;
738 
739  const double *w1 = &v1[Nvcd * isite];
740  double *w2 = &v2[Nvcd * isite];
741  const double *u = m_U->ptr(m_Ndf * (isite + idir * m_Nvol));
742 
743 
744  for (int it = 0; it < m_Mt; ++it) {
745  for (int iz = 0; iz < m_Mz; ++iz) {
746  for (int iy = 0; iy < m_Ny - 1; ++iy) {
747  for (int ix = 0; ix < m_Nx; ++ix) {
748  int is = ix + m_Nx * (iy + m_Ny * (iz + m_Nz * it));
749  int iv = Nvcd * is;
750  int in = Nvcd * (is + m_Nx);
751  int ig = m_Ndf * is;
752 
753  double vt1[m_Nvc], vt2[m_Nvc], vt3[m_Nvc], vt4[m_Nvc];
754 
755  for (int ic = 0; ic < m_Nc; ++ic) {
756  int ic_r = 2 * ic;
757  int ic_i = 2 * ic + 1;
758 
759  vt1[ic_r] = m_r_s * w1[ic_r + id1 + in] + m_nu_s * w1[ic_r + id4 + in];
760  vt1[ic_i] = m_r_s * w1[ic_i + id1 + in] + m_nu_s * w1[ic_i + id4 + in];
761  vt2[ic_r] = m_r_s * w1[ic_r + id2 + in] - m_nu_s * w1[ic_r + id3 + in];
762  vt2[ic_i] = m_r_s * w1[ic_i + id2 + in] - m_nu_s * w1[ic_i + id3 + in];
763 
764  vt3[ic_r] = m_r_s * w1[ic_r + id3 + in] - m_nu_s * w1[ic_r + id2 + in];
765  vt3[ic_i] = m_r_s * w1[ic_i + id3 + in] - m_nu_s * w1[ic_i + id2 + in];
766  vt4[ic_r] = m_r_s * w1[ic_r + id4 + in] + m_nu_s * w1[ic_r + id1 + in];
767  vt4[ic_i] = m_r_s * w1[ic_i + id4 + in] + m_nu_s * w1[ic_i + id1 + in];
768  }
769 
770  for (int ic = 0; ic < m_Nc; ++ic) {
771  int ic2 = ic * m_Nvc;
772 
773  double wt1_r = mult_uv_r(&u[ic2 + ig], vt1, m_Nc);
774  double wt1_i = mult_uv_i(&u[ic2 + ig], vt1, m_Nc);
775  double wt2_r = mult_uv_r(&u[ic2 + ig], vt2, m_Nc);
776  double wt2_i = mult_uv_i(&u[ic2 + ig], vt2, m_Nc);
777 
778  double wt3_r = mult_uv_r(&u[ic2 + ig], vt3, m_Nc);
779  double wt3_i = mult_uv_i(&u[ic2 + ig], vt3, m_Nc);
780  double wt4_r = mult_uv_r(&u[ic2 + ig], vt4, m_Nc);
781  double wt4_i = mult_uv_i(&u[ic2 + ig], vt4, m_Nc);
782 
783  int ic_r = 2 * ic;
784  int ic_i = 2 * ic + 1;
785 
786  w2[ic_r + id1 + iv] += wt1_r;
787  w2[ic_i + id1 + iv] += wt1_i;
788  w2[ic_r + id2 + iv] += wt2_r;
789  w2[ic_i + id2 + iv] += wt2_i;
790 
791  w2[ic_r + id3 + iv] += wt3_r;
792  w2[ic_i + id3 + iv] += wt3_i;
793  w2[ic_r + id4 + iv] += wt4_r;
794  w2[ic_i + id4 + iv] += wt4_i;
795  }
796  }
797  }
798  }
799  }
800  }
801 
802 
803 //====================================================================
805  double *vcp1, const double *v1)
806  {
807  const int Nvcd = m_Nvc * m_Nd;
808 
809  const int id1 = 0;
810  const int id2 = m_Nvc;
811  const int id3 = m_Nvc * 2;
812  const int id4 = m_Nvc * 3;
813 
814  const int idir = 1;
815  const int iy = m_Ny - 1;
816 
817  const int isite = m_arg[itask].isite;
818  const int isite_cp = m_arg[itask].isite_cp_y;
819 
820  const double *w1 = &v1[Nvcd * isite];
821  double *w2 = &vcp1[Nvcd * isite_cp];
822  const double *u = m_U->ptr(m_Ndf * (isite + idir * m_Nvol));
823 
824 
825  for (int it = 0; it < m_Mt; ++it) {
826  for (int iz = 0; iz < m_Mz; ++iz) {
827  for (int ix = 0; ix < m_Nx; ++ix) {
828  int is = ix + m_Nx * (iy + m_Ny * (iz + m_Nz * it));
829  int is2 = ix + m_Nx * (iz + m_Mz * it);
830  int in = Nvcd * is;
831  int ig = m_Ndf * is;
832  int ix1 = Nvcd * is2;
833  int ix2 = ix1 + m_Nvc;
834  int ix3 = ix2 + m_Nvc;
835  int ix4 = ix3 + m_Nvc;
836 
837  double vt1[m_Nvc], vt2[m_Nvc], vt3[m_Nvc], vt4[m_Nvc];
838 
839  for (int ic = 0; ic < m_Nc; ++ic) {
840  int ic_r = 2 * ic;
841  int ic_i = 2 * ic + 1;
842 
843  vt1[ic_r] = m_r_s * w1[ic_r + id1 + in] - m_nu_s * w1[ic_r + id4 + in];
844  vt1[ic_i] = m_r_s * w1[ic_i + id1 + in] - m_nu_s * w1[ic_i + id4 + in];
845  vt2[ic_r] = m_r_s * w1[ic_r + id2 + in] + m_nu_s * w1[ic_r + id3 + in];
846  vt2[ic_i] = m_r_s * w1[ic_i + id2 + in] + m_nu_s * w1[ic_i + id3 + in];
847 
848  vt3[ic_r] = m_r_s * w1[ic_r + id3 + in] + m_nu_s * w1[ic_r + id2 + in];
849  vt3[ic_i] = m_r_s * w1[ic_i + id3 + in] + m_nu_s * w1[ic_i + id2 + in];
850  vt4[ic_r] = m_r_s * w1[ic_r + id4 + in] - m_nu_s * w1[ic_r + id1 + in];
851  vt4[ic_i] = m_r_s * w1[ic_i + id4 + in] - m_nu_s * w1[ic_i + id1 + in];
852  }
853 
854  for (int ic = 0; ic < m_Nc; ++ic) {
855  int ic2 = 2 * ic;
856 
857  int ic_r = 2 * ic;
858  int ic_i = 2 * ic + 1;
859 
860  w2[ic_r + ix1] = mult_udagv_r(&u[ic2 + ig], vt1, m_Nc);
861  w2[ic_i + ix1] = mult_udagv_i(&u[ic2 + ig], vt1, m_Nc);
862  w2[ic_r + ix2] = mult_udagv_r(&u[ic2 + ig], vt2, m_Nc);
863  w2[ic_i + ix2] = mult_udagv_i(&u[ic2 + ig], vt2, m_Nc);
864 
865  w2[ic_r + ix3] = mult_udagv_r(&u[ic2 + ig], vt3, m_Nc);
866  w2[ic_i + ix3] = mult_udagv_i(&u[ic2 + ig], vt3, m_Nc);
867  w2[ic_r + ix4] = mult_udagv_r(&u[ic2 + ig], vt4, m_Nc);
868  w2[ic_i + ix4] = mult_udagv_i(&u[ic2 + ig], vt4, m_Nc);
869  }
870  }
871  }
872  }
873  }
874 
875 
876 //====================================================================
878  double *v2, const double *vcp2)
879  {
880  const int Nvcd = m_Nvc * m_Nd;
881 
882  const int id1 = 0;
883  const int id2 = m_Nvc;
884  const int id3 = m_Nvc * 2;
885  const int id4 = m_Nvc * 3;
886 
887  const int idir = 1;
888  const int iy = 0;
889  const double bc2 = m_boundary_each_node[idir];
890 
891  const int isite = m_arg[itask].isite;
892  const int isite_cp = m_arg[itask].isite_cp_y;
893 
894  const double *w1 = &vcp2[Nvcd * isite_cp];
895  double *w2 = &v2[Nvcd * isite];
896 
897 
898  for (int it = 0; it < m_Mt; ++it) {
899  for (int iz = 0; iz < m_Mz; ++iz) {
900  for (int ix = 0; ix < m_Nx; ++ix) {
901  int is = ix + m_Nx * (iy + m_Ny * (iz + m_Nz * it));
902  int is2 = ix + m_Nx * (iz + m_Mz * it);
903  int iv = Nvcd * is;
904  int ix1 = Nvcd * is2;
905  int ix2 = ix1 + m_Nvc;
906  int ix3 = ix2 + m_Nvc;
907  int ix4 = ix3 + m_Nvc;
908 
909  for (int ic = 0; ic < m_Nc; ++ic) {
910  int ic_r = 2 * ic;
911  int ic_i = 2 * ic + 1;
912 
913  w2[ic_r + id1 + iv] += bc2 * w1[ic_r + ix1];
914  w2[ic_i + id1 + iv] += bc2 * w1[ic_i + ix1];
915  w2[ic_r + id2 + iv] += bc2 * w1[ic_r + ix2];
916  w2[ic_i + id2 + iv] += bc2 * w1[ic_i + ix2];
917 
918  w2[ic_r + id3 + iv] += bc2 * w1[ic_r + ix3];
919  w2[ic_i + id3 + iv] += bc2 * w1[ic_i + ix3];
920  w2[ic_r + id4 + iv] += bc2 * w1[ic_r + ix4];
921  w2[ic_i + id4 + iv] += bc2 * w1[ic_i + ix4];
922  }
923  }
924  }
925  }
926  }
927 
928 
929 //====================================================================
931  double *v2, const double *v1)
932  {
933  const int Nvcd = m_Nvc * m_Nd;
934 
935  const int id1 = 0;
936  const int id2 = m_Nvc;
937  const int id3 = m_Nvc * 2;
938  const int id4 = m_Nvc * 3;
939 
940  const int idir = 1;
941 
942  const int isite = m_arg[itask].isite;
943 
944  const double *w1 = &v1[Nvcd * isite];
945  double *w2 = &v2[Nvcd * isite];
946  const double *u = m_U->ptr(m_Ndf * (isite + idir * m_Nvol));
947 
948 
949  for (int it = 0; it < m_Mt; ++it) {
950  for (int iz = 0; iz < m_Mz; ++iz) {
951  for (int iy = 1; iy < m_Ny; ++iy) {
952  for (int ix = 0; ix < m_Nx; ++ix) {
953  int is = ix + m_Nx * (iy + m_Ny * (iz + m_Nz * it));
954  int iv = Nvcd * is;
955  int in = Nvcd * (is - m_Nx);
956  int ig = m_Ndf * (is - m_Nx);
957 
958  double vt1[m_Nvc], vt2[m_Nvc], vt3[m_Nvc], vt4[m_Nvc];
959 
960  for (int ic = 0; ic < m_Nc; ++ic) {
961  int ic_r = 2 * ic;
962  int ic_i = 2 * ic + 1;
963 
964  vt1[ic_r] = m_r_s * w1[ic_r + id1 + in] - m_nu_s * w1[ic_r + id4 + in];
965  vt1[ic_i] = m_r_s * w1[ic_i + id1 + in] - m_nu_s * w1[ic_i + id4 + in];
966  vt2[ic_r] = m_r_s * w1[ic_r + id2 + in] + m_nu_s * w1[ic_r + id3 + in];
967  vt2[ic_i] = m_r_s * w1[ic_i + id2 + in] + m_nu_s * w1[ic_i + id3 + in];
968 
969  vt3[ic_r] = m_r_s * w1[ic_r + id3 + in] + m_nu_s * w1[ic_r + id2 + in];
970  vt3[ic_i] = m_r_s * w1[ic_i + id3 + in] + m_nu_s * w1[ic_i + id2 + in];
971  vt4[ic_r] = m_r_s * w1[ic_r + id4 + in] - m_nu_s * w1[ic_r + id1 + in];
972  vt4[ic_i] = m_r_s * w1[ic_i + id4 + in] - m_nu_s * w1[ic_i + id1 + in];
973  }
974 
975  for (int ic = 0; ic < m_Nc; ++ic) {
976  int ic2 = 2 * ic;
977 
978  double wt1_r = mult_udagv_r(&u[ic2 + ig], vt1, m_Nc);
979  double wt1_i = mult_udagv_i(&u[ic2 + ig], vt1, m_Nc);
980  double wt2_r = mult_udagv_r(&u[ic2 + ig], vt2, m_Nc);
981  double wt2_i = mult_udagv_i(&u[ic2 + ig], vt2, m_Nc);
982 
983  double wt3_r = mult_udagv_r(&u[ic2 + ig], vt3, m_Nc);
984  double wt3_i = mult_udagv_i(&u[ic2 + ig], vt3, m_Nc);
985  double wt4_r = mult_udagv_r(&u[ic2 + ig], vt4, m_Nc);
986  double wt4_i = mult_udagv_i(&u[ic2 + ig], vt4, m_Nc);
987 
988  int ic_r = 2 * ic;
989  int ic_i = 2 * ic + 1;
990 
991  w2[ic_r + id1 + iv] += wt1_r;
992  w2[ic_i + id1 + iv] += wt1_i;
993  w2[ic_r + id2 + iv] += wt2_r;
994  w2[ic_i + id2 + iv] += wt2_i;
995 
996  w2[ic_r + id3 + iv] += wt3_r;
997  w2[ic_i + id3 + iv] += wt3_i;
998  w2[ic_r + id4 + iv] += wt4_r;
999  w2[ic_i + id4 + iv] += wt4_i;
1000  }
1001  }
1002  }
1003  }
1004  }
1005  }
1006 
1007 
1008 //====================================================================
1010  double *vcp1, const double *v1)
1011  {
1012  const int Nvcd = m_Nvc * m_Nd;
1013 
1014  const int id1 = 0;
1015  const int id2 = m_Nvc;
1016  const int id3 = m_Nvc * 2;
1017  const int id4 = m_Nvc * 3;
1018 
1019  const int idir = 2;
1020  const double bc2 = m_boundary_each_node[idir];
1021 
1022  const int isite = m_arg[itask].isite;
1023  const int isite_cp = m_arg[itask].isite_cp_z;
1024 
1025  const double *w1 = &v1[Nvcd * isite];
1026  double *w2 = &vcp1[Nvcd * isite_cp];
1027 
1028 
1029  if (m_arg[itask].kz0 == 1) {
1030  const int Nxy = m_Nx * m_Ny;
1031  const int iz = 0;
1032 
1033  for (int it = 0; it < m_Mt; ++it) {
1034  for (int ixy = 0; ixy < Nxy; ++ixy) {
1035  int is = ixy + Nxy * (iz + m_Nz * it);
1036  int is2 = ixy + Nxy * it;
1037 
1038  int in = Nvcd * is;
1039  int ix1 = Nvcd * is2;
1040  int ix2 = ix1 + m_Nvc;
1041  int ix3 = ix2 + m_Nvc;
1042  int ix4 = ix3 + m_Nvc;
1043 
1044  for (int ic = 0; ic < m_Nc; ++ic) {
1045  int ic_r = 2 * ic;
1046  int ic_i = 2 * ic + 1;
1047 
1048  w2[ic_r + ix1] = bc2 * (m_r_s * w1[ic_r + id1 + in] - m_nu_s * w1[ic_i + id3 + in]);
1049  w2[ic_i + ix1] = bc2 * (m_r_s * w1[ic_i + id1 + in] + m_nu_s * w1[ic_r + id3 + in]);
1050  w2[ic_r + ix2] = bc2 * (m_r_s * w1[ic_r + id2 + in] + m_nu_s * w1[ic_i + id4 + in]);
1051  w2[ic_i + ix2] = bc2 * (m_r_s * w1[ic_i + id2 + in] - m_nu_s * w1[ic_r + id4 + in]);
1052 
1053  w2[ic_r + ix3] = bc2 * (m_r_s * w1[ic_r + id3 + in] + m_nu_s * w1[ic_i + id1 + in]);
1054  w2[ic_i + ix3] = bc2 * (m_r_s * w1[ic_i + id3 + in] - m_nu_s * w1[ic_r + id1 + in]);
1055  w2[ic_r + ix4] = bc2 * (m_r_s * w1[ic_r + id4 + in] - m_nu_s * w1[ic_i + id2 + in]);
1056  w2[ic_i + ix4] = bc2 * (m_r_s * w1[ic_i + id4 + in] + m_nu_s * w1[ic_r + id2 + in]);
1057  }
1058  }
1059  }
1060  }
1061  }
1062 
1063 
1064 //====================================================================
1066  double *v2, const double *vcp2)
1067  {
1068  const int Nvcd = m_Nvc * m_Nd;
1069 
1070  const int id1 = 0;
1071  const int id2 = m_Nvc;
1072  const int id3 = m_Nvc * 2;
1073  const int id4 = m_Nvc * 3;
1074 
1075  const int idir = 2;
1076 
1077  const int isite = m_arg[itask].isite;
1078  const int isite_cp = m_arg[itask].isite_cp_z;
1079 
1080  const double *w1 = &vcp2[Nvcd * isite_cp];
1081  double *w2 = &v2[Nvcd * isite];
1082  const double *u = m_U->ptr(m_Ndf * (isite + idir * m_Nvol));
1083 
1084 
1085  if (m_arg[itask].kz1 == 1) {
1086  const int Nxy = m_Nx * m_Ny;
1087  const int iz = m_Mz - 1;
1088 
1089  for (int it = 0; it < m_Mt; ++it) {
1090  for (int ixy = 0; ixy < Nxy; ++ixy) {
1091  int is = ixy + Nxy * (iz + m_Nz * it);
1092  int is2 = ixy + Nxy * it;
1093  int iv = Nvcd * is;
1094  int ig = m_Ndf * is;
1095  int ix1 = Nvcd * is2;
1096  int ix2 = ix1 + m_Nvc;
1097  int ix3 = ix2 + m_Nvc;
1098  int ix4 = ix3 + m_Nvc;
1099 
1100  for (int ic = 0; ic < m_Nc; ++ic) {
1101  int ic2 = ic * m_Nvc;
1102 
1103  double wt1_r = mult_uv_r(&u[ic2 + ig], &w1[ix1], m_Nc);
1104  double wt1_i = mult_uv_i(&u[ic2 + ig], &w1[ix1], m_Nc);
1105  double wt2_r = mult_uv_r(&u[ic2 + ig], &w1[ix2], m_Nc);
1106  double wt2_i = mult_uv_i(&u[ic2 + ig], &w1[ix2], m_Nc);
1107 
1108  double wt3_r = mult_uv_r(&u[ic2 + ig], &w1[ix3], m_Nc);
1109  double wt3_i = mult_uv_i(&u[ic2 + ig], &w1[ix3], m_Nc);
1110  double wt4_r = mult_uv_r(&u[ic2 + ig], &w1[ix4], m_Nc);
1111  double wt4_i = mult_uv_i(&u[ic2 + ig], &w1[ix4], m_Nc);
1112 
1113  int ic_r = 2 * ic;
1114  int ic_i = 2 * ic + 1;
1115 
1116  w2[ic_r + id1 + iv] += wt1_r;
1117  w2[ic_i + id1 + iv] += wt1_i;
1118  w2[ic_r + id2 + iv] += wt2_r;
1119  w2[ic_i + id2 + iv] += wt2_i;
1120 
1121  w2[ic_r + id3 + iv] += wt3_r;
1122  w2[ic_i + id3 + iv] += wt3_i;
1123  w2[ic_r + id4 + iv] += wt4_r;
1124  w2[ic_i + id4 + iv] += wt4_i;
1125  }
1126  }
1127  }
1128  }
1129  }
1130 
1131 
1132 //====================================================================
1134  double *v2, const double *v1)
1135  {
1136  const int Nvcd = m_Nvc * m_Nd;
1137 
1138  const int id1 = 0;
1139  const int id2 = m_Nvc;
1140  const int id3 = m_Nvc * 2;
1141  const int id4 = m_Nvc * 3;
1142 
1143  const int idir = 2;
1144  const int Nxy = m_Nx * m_Ny;
1145 
1146  const int isite = m_arg[itask].isite;
1147  const int kz1 = m_arg[itask].kz1;
1148 
1149  const double *w1 = &v1[Nvcd * isite];
1150  double *w2 = &v2[Nvcd * isite];
1151  const double *u = m_U->ptr(m_Ndf * (isite + idir * m_Nvol));
1152 
1153 
1154  for (int it = 0; it < m_Mt; ++it) {
1155  for (int iz = 0; iz < m_Mz - kz1; ++iz) {
1156  for (int ixy = 0; ixy < Nxy; ++ixy) {
1157  int is = ixy + Nxy * (iz + m_Nz * it);
1158  int iv = Nvcd * is;
1159  int in = Nvcd * (is + Nxy);
1160  int ig = m_Ndf * is;
1161 
1162  double vt1[m_Nvc], vt2[m_Nvc], vt3[m_Nvc], vt4[m_Nvc];
1163 
1164  for (int ic = 0; ic < m_Nc; ++ic) {
1165  int ic_r = 2 * ic;
1166  int ic_i = 2 * ic + 1;
1167 
1168  vt1[ic_r] = m_r_s * w1[ic_r + id1 + in] - m_nu_s * w1[ic_i + id3 + in];
1169  vt1[ic_i] = m_r_s * w1[ic_i + id1 + in] + m_nu_s * w1[ic_r + id3 + in];
1170  vt2[ic_r] = m_r_s * w1[ic_r + id2 + in] + m_nu_s * w1[ic_i + id4 + in];
1171  vt2[ic_i] = m_r_s * w1[ic_i + id2 + in] - m_nu_s * w1[ic_r + id4 + in];
1172 
1173  vt3[ic_r] = m_r_s * w1[ic_r + id3 + in] + m_nu_s * w1[ic_i + id1 + in];
1174  vt3[ic_i] = m_r_s * w1[ic_i + id3 + in] - m_nu_s * w1[ic_r + id1 + in];
1175  vt4[ic_r] = m_r_s * w1[ic_r + id4 + in] - m_nu_s * w1[ic_i + id2 + in];
1176  vt4[ic_i] = m_r_s * w1[ic_i + id4 + in] + m_nu_s * w1[ic_r + id2 + in];
1177  }
1178 
1179  for (int ic = 0; ic < m_Nc; ++ic) {
1180  int ic2 = ic * m_Nvc;
1181 
1182  double wt1_r = mult_uv_r(&u[ic2 + ig], vt1, m_Nc);
1183  double wt1_i = mult_uv_i(&u[ic2 + ig], vt1, m_Nc);
1184  double wt2_r = mult_uv_r(&u[ic2 + ig], vt2, m_Nc);
1185  double wt2_i = mult_uv_i(&u[ic2 + ig], vt2, m_Nc);
1186 
1187  double wt3_r = mult_uv_r(&u[ic2 + ig], vt3, m_Nc);
1188  double wt3_i = mult_uv_i(&u[ic2 + ig], vt3, m_Nc);
1189  double wt4_r = mult_uv_r(&u[ic2 + ig], vt4, m_Nc);
1190  double wt4_i = mult_uv_i(&u[ic2 + ig], vt4, m_Nc);
1191 
1192  int ic_r = 2 * ic;
1193  int ic_i = 2 * ic + 1;
1194 
1195  w2[ic_r + id1 + iv] += wt1_r;
1196  w2[ic_i + id1 + iv] += wt1_i;
1197  w2[ic_r + id2 + iv] += wt2_r;
1198  w2[ic_i + id2 + iv] += wt2_i;
1199 
1200  w2[ic_r + id3 + iv] += wt3_r;
1201  w2[ic_i + id3 + iv] += wt3_i;
1202  w2[ic_r + id4 + iv] += wt4_r;
1203  w2[ic_i + id4 + iv] += wt4_i;
1204  }
1205  }
1206  }
1207  }
1208  }
1209 
1210 
1211 //====================================================================
1213  double *vcp1, const double *v1)
1214  {
1215  const int Nvcd = m_Nvc * m_Nd;
1216 
1217  const int id1 = 0;
1218  const int id2 = m_Nvc;
1219  const int id3 = m_Nvc * 2;
1220  const int id4 = m_Nvc * 3;
1221 
1222  const int idir = 2;
1223 
1224  const int isite = m_arg[itask].isite;
1225  const int isite_cp = m_arg[itask].isite_cp_z;
1226 
1227  const double *w1 = &v1[Nvcd * isite];
1228  double *w2 = &vcp1[Nvcd * isite_cp];
1229  const double *u = m_U->ptr(m_Ndf * (isite + idir * m_Nvol));
1230 
1231 
1232  if (m_arg[itask].kz1 == 1) {
1233  const int Nxy = m_Nx * m_Ny;
1234  const int iz = m_Mz - 1;
1235 
1236  for (int it = 0; it < m_Mt; ++it) {
1237  for (int ixy = 0; ixy < Nxy; ++ixy) {
1238  int is = ixy + Nxy * (iz + m_Nz * it);
1239  int is2 = ixy + Nxy * it;
1240  int in = Nvcd * is;
1241  int ig = m_Ndf * is;
1242  int ix1 = Nvcd * is2;
1243  int ix2 = ix1 + m_Nvc;
1244  int ix3 = ix2 + m_Nvc;
1245  int ix4 = ix3 + m_Nvc;
1246 
1247  double vt1[m_Nvc], vt2[m_Nvc], vt3[m_Nvc], vt4[m_Nvc];
1248 
1249  for (int ic = 0; ic < m_Nc; ++ic) {
1250  int ic_r = 2 * ic;
1251  int ic_i = 2 * ic + 1;
1252 
1253  vt1[ic_r] = m_r_s * w1[ic_r + id1 + in] + m_nu_s * w1[ic_i + id3 + in];
1254  vt1[ic_i] = m_r_s * w1[ic_i + id1 + in] - m_nu_s * w1[ic_r + id3 + in];
1255  vt2[ic_r] = m_r_s * w1[ic_r + id2 + in] - m_nu_s * w1[ic_i + id4 + in];
1256  vt2[ic_i] = m_r_s * w1[ic_i + id2 + in] + m_nu_s * w1[ic_r + id4 + in];
1257 
1258  vt3[ic_r] = m_r_s * w1[ic_r + id3 + in] - m_nu_s * w1[ic_i + id1 + in];
1259  vt3[ic_i] = m_r_s * w1[ic_i + id3 + in] + m_nu_s * w1[ic_r + id1 + in];
1260  vt4[ic_r] = m_r_s * w1[ic_r + id4 + in] + m_nu_s * w1[ic_i + id2 + in];
1261  vt4[ic_i] = m_r_s * w1[ic_i + id4 + in] - m_nu_s * w1[ic_r + id2 + in];
1262  }
1263 
1264  for (int ic = 0; ic < m_Nc; ++ic) {
1265  int ic2 = 2 * ic;
1266 
1267  int ic_r = 2 * ic;
1268  int ic_i = 2 * ic + 1;
1269 
1270  w2[ic_r + ix1] = mult_udagv_r(&u[ic2 + ig], vt1, m_Nc);
1271  w2[ic_i + ix1] = mult_udagv_i(&u[ic2 + ig], vt1, m_Nc);
1272  w2[ic_r + ix2] = mult_udagv_r(&u[ic2 + ig], vt2, m_Nc);
1273  w2[ic_i + ix2] = mult_udagv_i(&u[ic2 + ig], vt2, m_Nc);
1274 
1275  w2[ic_r + ix3] = mult_udagv_r(&u[ic2 + ig], vt3, m_Nc);
1276  w2[ic_i + ix3] = mult_udagv_i(&u[ic2 + ig], vt3, m_Nc);
1277  w2[ic_r + ix4] = mult_udagv_r(&u[ic2 + ig], vt4, m_Nc);
1278  w2[ic_i + ix4] = mult_udagv_i(&u[ic2 + ig], vt4, m_Nc);
1279  }
1280  }
1281  }
1282  }
1283  }
1284 
1285 
1286 //====================================================================
1288  double *v2, const double *vcp2)
1289  {
1290  const int Nvcd = m_Nvc * m_Nd;
1291 
1292  const int id1 = 0;
1293  const int id2 = m_Nvc;
1294  const int id3 = m_Nvc * 2;
1295  const int id4 = m_Nvc * 3;
1296 
1297  const int idir = 2;
1298  const double bc2 = m_boundary_each_node[idir];
1299 
1300  const int isite = m_arg[itask].isite;
1301  const int isite_cp = m_arg[itask].isite_cp_z;
1302 
1303  const double *w1 = &vcp2[Nvcd * isite_cp];
1304  double *w2 = &v2[Nvcd * isite];
1305 
1306 
1307  if (m_arg[itask].kz0 == 1) {
1308  const int Nxy = m_Nx * m_Ny;
1309  const int iz = 0;
1310 
1311  for (int it = 0; it < m_Mt; ++it) {
1312  for (int ixy = 0; ixy < Nxy; ++ixy) {
1313  int is = ixy + Nxy * (iz + m_Nz * it);
1314  int is2 = ixy + Nxy * it;
1315  int iv = Nvcd * is;
1316  int ix1 = Nvcd * is2;
1317  int ix2 = ix1 + m_Nvc;
1318  int ix3 = ix2 + m_Nvc;
1319  int ix4 = ix3 + m_Nvc;
1320 
1321  for (int ic = 0; ic < m_Nc; ++ic) {
1322  int ic_r = 2 * ic;
1323  int ic_i = 2 * ic + 1;
1324 
1325  w2[ic_r + id1 + iv] += bc2 * w1[ic_r + ix1];
1326  w2[ic_i + id1 + iv] += bc2 * w1[ic_i + ix1];
1327  w2[ic_r + id2 + iv] += bc2 * w1[ic_r + ix2];
1328  w2[ic_i + id2 + iv] += bc2 * w1[ic_i + ix2];
1329 
1330  w2[ic_r + id3 + iv] += bc2 * w1[ic_r + ix3];
1331  w2[ic_i + id3 + iv] += bc2 * w1[ic_i + ix3];
1332  w2[ic_r + id4 + iv] += bc2 * w1[ic_r + ix4];
1333  w2[ic_i + id4 + iv] += bc2 * w1[ic_i + ix4];
1334  }
1335  }
1336  }
1337  }
1338  }
1339 
1340 
1341 //====================================================================
1343  double *v2, const double *v1)
1344  {
1345  const int Nvcd = m_Nvc * m_Nd;
1346 
1347  const int id1 = 0;
1348  const int id2 = m_Nvc;
1349  const int id3 = m_Nvc * 2;
1350  const int id4 = m_Nvc * 3;
1351 
1352  const int idir = 2;
1353  const int Nxy = m_Nx * m_Ny;
1354 
1355  const int isite = m_arg[itask].isite;
1356  const int kz0 = m_arg[itask].kz0;
1357 
1358  const double *w1 = &v1[Nvcd * isite];
1359  double *w2 = &v2[Nvcd * isite];
1360  const double *u = m_U->ptr(m_Ndf * (isite + idir * m_Nvol));
1361 
1362 
1363  for (int it = 0; it < m_Mt; ++it) {
1364  for (int iz = kz0; iz < m_Mz; ++iz) {
1365  for (int ixy = 0; ixy < Nxy; ++ixy) {
1366  int is = ixy + Nxy * (iz + m_Nz * it);
1367  int iv = Nvcd * is;
1368  int in = Nvcd * (is - Nxy);
1369  int ig = m_Ndf * (is - Nxy);
1370 
1371  double vt1[m_Nvc], vt2[m_Nvc], vt3[m_Nvc], vt4[m_Nvc];
1372 
1373  for (int ic = 0; ic < m_Nc; ++ic) {
1374  int ic_r = 2 * ic;
1375  int ic_i = 2 * ic + 1;
1376 
1377  vt1[ic_r] = m_r_s * w1[ic_r + id1 + in] + m_nu_s * w1[ic_i + id3 + in];
1378  vt1[ic_i] = m_r_s * w1[ic_i + id1 + in] - m_nu_s * w1[ic_r + id3 + in];
1379  vt2[ic_r] = m_r_s * w1[ic_r + id2 + in] - m_nu_s * w1[ic_i + id4 + in];
1380  vt2[ic_i] = m_r_s * w1[ic_i + id2 + in] + m_nu_s * w1[ic_r + id4 + in];
1381 
1382  vt3[ic_r] = m_r_s * w1[ic_r + id3 + in] - m_nu_s * w1[ic_i + id1 + in];
1383  vt3[ic_i] = m_r_s * w1[ic_i + id3 + in] + m_nu_s * w1[ic_r + id1 + in];
1384  vt4[ic_r] = m_r_s * w1[ic_r + id4 + in] + m_nu_s * w1[ic_i + id2 + in];
1385  vt4[ic_i] = m_r_s * w1[ic_i + id4 + in] - m_nu_s * w1[ic_r + id2 + in];
1386  }
1387 
1388  for (int ic = 0; ic < m_Nc; ++ic) {
1389  int ic2 = 2 * ic;
1390 
1391  double wt1_r = mult_udagv_r(&u[ic2 + ig], vt1, m_Nc);
1392  double wt1_i = mult_udagv_i(&u[ic2 + ig], vt1, m_Nc);
1393  double wt2_r = mult_udagv_r(&u[ic2 + ig], vt2, m_Nc);
1394  double wt2_i = mult_udagv_i(&u[ic2 + ig], vt2, m_Nc);
1395 
1396  double wt3_r = mult_udagv_r(&u[ic2 + ig], vt3, m_Nc);
1397  double wt3_i = mult_udagv_i(&u[ic2 + ig], vt3, m_Nc);
1398  double wt4_r = mult_udagv_r(&u[ic2 + ig], vt4, m_Nc);
1399  double wt4_i = mult_udagv_i(&u[ic2 + ig], vt4, m_Nc);
1400 
1401  int ic_r = 2 * ic;
1402  int ic_i = 2 * ic + 1;
1403 
1404  w2[ic_r + id1 + iv] += wt1_r;
1405  w2[ic_i + id1 + iv] += wt1_i;
1406  w2[ic_r + id2 + iv] += wt2_r;
1407  w2[ic_i + id2 + iv] += wt2_i;
1408 
1409  w2[ic_r + id3 + iv] += wt3_r;
1410  w2[ic_i + id3 + iv] += wt3_i;
1411  w2[ic_r + id4 + iv] += wt4_r;
1412  w2[ic_i + id4 + iv] += wt4_i;
1413  }
1414  }
1415  }
1416  }
1417  }
1418 
1419 
1420 //====================================================================
1422  double *vcp1, const double *v1)
1423  {
1424  const int Nvc2 = 2 * m_Nvc;
1425  const int Nvcd = m_Nvc * m_Nd;
1426  const int Nvcd2 = Nvcd / 2;
1427 
1428  const int id3 = m_Nvc * 2;
1429  const int id4 = m_Nvc * 3;
1430 
1431  const int idir = 3;
1432  const double bc2 = m_boundary_each_node[idir];
1433 
1434  const int isite = m_arg[itask].isite;
1435  const int isite_cp = m_arg[itask].isite_cp_t;
1436 
1437  const double *w1 = &v1[Nvcd * isite];
1438  double *w2 = &vcp1[Nvcd2 * isite_cp];
1439 
1440 
1441  if (m_arg[itask].kt0 == 1) {
1442  const int Nxy = m_Nx * m_Ny;
1443  const int it = 0;
1444 
1445  for (int iz = 0; iz < m_Mz; ++iz) {
1446  for (int ixy = 0; ixy < Nxy; ++ixy) {
1447  int is = ixy + Nxy * (iz + m_Nz * it);
1448  int is2 = ixy + Nxy * iz;
1449 
1450  int in = Nvcd * is;
1451  int ix1 = Nvc2 * is2;
1452  int ix2 = ix1 + m_Nvc;
1453 
1454  for (int ic = 0; ic < m_Nc; ++ic) {
1455  int ic_r = 2 * ic;
1456  int ic_i = 2 * ic + 1;
1457 
1458  w2[ic_r + ix1] = 2.0 * bc2 * w1[ic_r + id3 + in];
1459  w2[ic_i + ix1] = 2.0 * bc2 * w1[ic_i + id3 + in];
1460  w2[ic_r + ix2] = 2.0 * bc2 * w1[ic_r + id4 + in];
1461  w2[ic_i + ix2] = 2.0 * bc2 * w1[ic_i + id4 + in];
1462  }
1463  }
1464  }
1465  }
1466  }
1467 
1468 
1469 //====================================================================
1471  double *v2, const double *vcp2)
1472  {
1473  const int Nvc2 = 2 * m_Nvc;
1474  const int Nvcd = m_Nvc * m_Nd;
1475  const int Nvcd2 = Nvcd / 2;
1476 
1477  const int id3 = m_Nvc * 2;
1478  const int id4 = m_Nvc * 3;
1479 
1480  const int idir = 3;
1481 
1482  const int isite = m_arg[itask].isite;
1483  const int isite_cp = m_arg[itask].isite_cp_t;
1484 
1485  const double *w1 = &vcp2[Nvcd2 * isite_cp];
1486  double *w2 = &v2[Nvcd * isite];
1487  const double *u = m_U->ptr(m_Ndf * (isite + idir * m_Nvol));
1488 
1489 
1490  if (m_arg[itask].kt1 == 1) {
1491  const int Nxy = m_Nx * m_Ny;
1492  const int it = m_Mt - 1;
1493 
1494  for (int iz = 0; iz < m_Mz; ++iz) {
1495  for (int ixy = 0; ixy < Nxy; ++ixy) {
1496  int is = ixy + Nxy * (iz + m_Nz * it);
1497  int is2 = ixy + Nxy * iz;
1498  int iv = Nvcd * is;
1499  int ig = m_Ndf * is;
1500  int ix1 = Nvc2 * is2;
1501  int ix2 = ix1 + m_Nvc;
1502 
1503  for (int ic = 0; ic < m_Nc; ++ic) {
1504  int ic2 = ic * m_Nvc;
1505 
1506  int ic_r = 2 * ic;
1507  int ic_i = 2 * ic + 1;
1508 
1509  w2[ic_r + id3 + iv] += mult_uv_r(&u[ic2 + ig], &w1[ix1], m_Nc);
1510  w2[ic_i + id3 + iv] += mult_uv_i(&u[ic2 + ig], &w1[ix1], m_Nc);
1511  w2[ic_r + id4 + iv] += mult_uv_r(&u[ic2 + ig], &w1[ix2], m_Nc);
1512  w2[ic_i + id4 + iv] += mult_uv_i(&u[ic2 + ig], &w1[ix2], m_Nc);
1513  }
1514  }
1515  }
1516  }
1517  }
1518 
1519 
1520 //====================================================================
1522  double *v2, const double *v1)
1523  {
1524  const int Nvcd = m_Nvc * m_Nd;
1525  const int Nxy = m_Nx * m_Ny;
1526  const int Nxyz = Nxy * m_Nz;
1527 
1528  const int id3 = m_Nvc * 2;
1529  const int id4 = m_Nvc * 3;
1530 
1531  const int idir = 3;
1532 
1533  const int isite = m_arg[itask].isite;
1534  const int kt1 = m_arg[itask].kt1;
1535 
1536  const double *w1 = &v1[Nvcd * isite];
1537  double *w2 = &v2[Nvcd * isite];
1538  const double *u = m_U->ptr(m_Ndf * (isite + idir * m_Nvol));
1539 
1540 
1541  for (int it = 0; it < m_Mt - kt1; ++it) {
1542  for (int iz = 0; iz < m_Mz; ++iz) {
1543  for (int ixy = 0; ixy < Nxy; ++ixy) {
1544  int is = ixy + Nxy * (iz + m_Nz * it);
1545  int iv = Nvcd * is;
1546  int in = Nvcd * (is + Nxyz);
1547  int ig = m_Ndf * is;
1548 
1549  double vt1[m_Nvc], vt2[m_Nvc];
1550 
1551  for (int ic = 0; ic < m_Nc; ++ic) {
1552  int ic_r = 2 * ic;
1553  int ic_i = 2 * ic + 1;
1554 
1555  vt1[ic_r] = 2.0 * w1[ic_r + id3 + in];
1556  vt1[ic_i] = 2.0 * w1[ic_i + id3 + in];
1557  vt2[ic_r] = 2.0 * w1[ic_r + id4 + in];
1558  vt2[ic_i] = 2.0 * w1[ic_i + id4 + in];
1559  }
1560 
1561  for (int ic = 0; ic < m_Nc; ++ic) {
1562  int ic2 = ic * m_Nvc;
1563 
1564  int ic_r = 2 * ic;
1565  int ic_i = 2 * ic + 1;
1566 
1567  w2[ic_r + id3 + iv] += mult_uv_r(&u[ic2 + ig], vt1, m_Nc);
1568  w2[ic_i + id3 + iv] += mult_uv_i(&u[ic2 + ig], vt1, m_Nc);
1569  w2[ic_r + id4 + iv] += mult_uv_r(&u[ic2 + ig], vt2, m_Nc);
1570  w2[ic_i + id4 + iv] += mult_uv_i(&u[ic2 + ig], vt2, m_Nc);
1571  }
1572  }
1573  }
1574  }
1575  }
1576 
1577 
1578 //====================================================================
1580  double *vcp1, const double *v1)
1581  {
1582  const int Nvc2 = 2 * m_Nvc;
1583  const int Nvcd = m_Nvc * m_Nd;
1584  const int Nvcd2 = Nvcd / 2;
1585 
1586  const int id1 = 0;
1587  const int id2 = m_Nvc;
1588 
1589  const int idir = 3;
1590 
1591  const int isite = m_arg[itask].isite;
1592  const int isite_cp = m_arg[itask].isite_cp_t;
1593 
1594  const double *w1 = &v1[Nvcd * isite];
1595  double *w2 = &vcp1[Nvcd2 * isite_cp];
1596  const double *u = m_U->ptr(m_Ndf * (isite + idir * m_Nvol));
1597 
1598 
1599  if (m_arg[itask].kt1 == 1) {
1600  const int Nxy = m_Nx * m_Ny;
1601  const int it = m_Mt - 1;
1602 
1603  for (int iz = 0; iz < m_Mz; ++iz) {
1604  for (int ixy = 0; ixy < Nxy; ++ixy) {
1605  int is = ixy + Nxy * (iz + m_Nz * it);
1606  int is2 = ixy + Nxy * iz;
1607  int in = Nvcd * is;
1608  int ig = m_Ndf * is;
1609  int ix1 = Nvc2 * is2;
1610  int ix2 = ix1 + m_Nvc;
1611 
1612  double vt1[m_Nvc], vt2[m_Nvc];
1613 
1614  for (int ic = 0; ic < m_Nc; ++ic) {
1615  int ic_r = 2 * ic;
1616  int ic_i = 2 * ic + 1;
1617 
1618  vt1[ic_r] = 2.0 * w1[ic_r + id1 + in];
1619  vt1[ic_i] = 2.0 * w1[ic_i + id1 + in];
1620  vt2[ic_r] = 2.0 * w1[ic_r + id2 + in];
1621  vt2[ic_i] = 2.0 * w1[ic_i + id2 + in];
1622  }
1623 
1624  for (int ic = 0; ic < m_Nc; ++ic) {
1625  int ic2 = 2 * ic;
1626 
1627  int ic_r = 2 * ic;
1628  int ic_i = 2 * ic + 1;
1629 
1630  w2[ic_r + ix1] = mult_udagv_r(&u[ic2 + ig], vt1, m_Nc);
1631  w2[ic_i + ix1] = mult_udagv_i(&u[ic2 + ig], vt1, m_Nc);
1632  w2[ic_r + ix2] = mult_udagv_r(&u[ic2 + ig], vt2, m_Nc);
1633  w2[ic_i + ix2] = mult_udagv_i(&u[ic2 + ig], vt2, m_Nc);
1634  }
1635  }
1636  }
1637  }
1638  }
1639 
1640 
1641 //====================================================================
1643  double *v2, const double *vcp2)
1644  {
1645  const int Nvc2 = 2 * m_Nvc;
1646  const int Nvcd = m_Nvc * m_Nd;
1647  const int Nvcd2 = Nvcd / 2;
1648 
1649  const int id1 = 0;
1650  const int id2 = m_Nvc;
1651 
1652  const int idir = 3;
1653  const double bc2 = m_boundary_each_node[idir];
1654 
1655  const int isite = m_arg[itask].isite;
1656  const int isite_cp = m_arg[itask].isite_cp_t;
1657 
1658  const double *w1 = &vcp2[Nvcd2 * isite_cp];
1659  double *w2 = &v2[Nvcd * isite];
1660 
1661 
1662  if (m_arg[itask].kt0 == 1) {
1663  const int Nxy = m_Nx * m_Ny;
1664  const int it = 0;
1665 
1666  for (int iz = 0; iz < m_Mz; ++iz) {
1667  for (int ixy = 0; ixy < Nxy; ++ixy) {
1668  int is = ixy + Nxy * (iz + m_Nz * it);
1669  int is2 = ixy + Nxy * iz;
1670  int iv = Nvcd * is;
1671  int ix1 = Nvc2 * is2;
1672  int ix2 = ix1 + m_Nvc;
1673 
1674  for (int ic = 0; ic < m_Nc; ++ic) {
1675  int ic_r = 2 * ic;
1676  int ic_i = 2 * ic + 1;
1677 
1678  w2[ic_r + id1 + iv] += bc2 * w1[ic_r + ix1];
1679  w2[ic_i + id1 + iv] += bc2 * w1[ic_i + ix1];
1680  w2[ic_r + id2 + iv] += bc2 * w1[ic_r + ix2];
1681  w2[ic_i + id2 + iv] += bc2 * w1[ic_i + ix2];
1682  }
1683  }
1684  }
1685  }
1686  }
1687 
1688 
1689 //====================================================================
1691  double *v2, const double *v1)
1692  {
1693  const int Nvcd = m_Nvc * m_Nd;
1694  const int Nxy = m_Nx * m_Ny;
1695  const int Nxyz = Nxy * m_Nz;
1696 
1697  const int id1 = 0;
1698  const int id2 = m_Nvc;
1699 
1700  const int idir = 3;
1701 
1702  const int isite = m_arg[itask].isite;
1703  const int kt0 = m_arg[itask].kt0;
1704 
1705  const double *w1 = &v1[Nvcd * isite];
1706  double *w2 = &v2[Nvcd * isite];
1707  const double *u = m_U->ptr(m_Ndf * (isite + idir * m_Nvol));
1708 
1709 
1710  for (int it = kt0; it < m_Mt; ++it) {
1711  for (int iz = 0; iz < m_Mz; ++iz) {
1712  for (int ixy = 0; ixy < Nxy; ++ixy) {
1713  int is = ixy + Nxy * (iz + m_Nz * it);
1714  int iv = Nvcd * is;
1715  int in = Nvcd * (is - Nxyz);
1716  int ig = m_Ndf * (is - Nxyz);
1717 
1718  double vt1[m_Nvc], vt2[m_Nvc];
1719 
1720  for (int ic = 0; ic < m_Nc; ++ic) {
1721  int ic_r = 2 * ic;
1722  int ic_i = 2 * ic + 1;
1723 
1724  vt1[ic_r] = 2.0 * w1[ic_r + id1 + in];
1725  vt1[ic_i] = 2.0 * w1[ic_i + id1 + in];
1726  vt2[ic_r] = 2.0 * w1[ic_r + id2 + in];
1727  vt2[ic_i] = 2.0 * w1[ic_i + id2 + in];
1728  }
1729 
1730  for (int ic = 0; ic < m_Nc; ++ic) {
1731  int ic2 = 2 * ic;
1732 
1733  int ic_r = 2 * ic;
1734  int ic_i = 2 * ic + 1;
1735 
1736  w2[ic_r + id1 + iv] += mult_udagv_r(&u[ic2 + ig], vt1, m_Nc);
1737  w2[ic_i + id1 + iv] += mult_udagv_i(&u[ic2 + ig], vt1, m_Nc);
1738  w2[ic_r + id2 + iv] += mult_udagv_r(&u[ic2 + ig], vt2, m_Nc);
1739  w2[ic_i + id2 + iv] += mult_udagv_i(&u[ic2 + ig], vt2, m_Nc);
1740  }
1741  }
1742  }
1743  }
1744  }
1745 
1746 
1747 //====================================================================
1749  double *vcp1, const double *v1)
1750  {
1751  const int Nvc2 = 2 * m_Nvc;
1752  const int Nvcd = m_Nvc * m_Nd;
1753  const int Nvcd2 = Nvcd / 2;
1754 
1755  const int id1 = 0;
1756  const int id2 = m_Nvc;
1757  const int id3 = m_Nvc * 2;
1758  const int id4 = m_Nvc * 3;
1759 
1760  const int idir = 3;
1761  const double bc2 = m_boundary_each_node[idir];
1762 
1763  const int isite = m_arg[itask].isite;
1764  const int isite_cp = m_arg[itask].isite_cp_t;
1765 
1766  const double *w1 = &v1[Nvcd * isite];
1767  double *w2 = &vcp1[Nvcd2 * isite_cp];
1768 
1769 
1770  if (m_arg[itask].kt0 == 1) {
1771  const int Nxy = m_Nx * m_Ny;
1772  const int it = 0;
1773 
1774  for (int iz = 0; iz < m_Mz; ++iz) {
1775  for (int ixy = 0; ixy < Nxy; ++ixy) {
1776  int is = ixy + Nxy * (iz + m_Nz * it);
1777  int is2 = ixy + Nxy * iz;
1778 
1779  int in = Nvcd * is;
1780  int ix1 = Nvc2 * is2;
1781  int ix2 = ix1 + m_Nvc;
1782 
1783  for (int ic = 0; ic < m_Nc; ++ic) {
1784  int ic_r = 2 * ic;
1785  int ic_i = 2 * ic + 1;
1786 
1787  w2[ic_r + ix1] = bc2 * (w1[ic_r + id1 + in] + w1[ic_r + id3 + in]);
1788  w2[ic_i + ix1] = bc2 * (w1[ic_i + id1 + in] + w1[ic_i + id3 + in]);
1789  w2[ic_r + ix2] = bc2 * (w1[ic_r + id2 + in] + w1[ic_r + id4 + in]);
1790  w2[ic_i + ix2] = bc2 * (w1[ic_i + id2 + in] + w1[ic_i + id4 + in]);
1791  }
1792  }
1793  }
1794  }
1795  }
1796 
1797 
1798 //====================================================================
1800  double *v2, const double *vcp2)
1801  {
1802  const int Nvc2 = 2 * m_Nvc;
1803  const int Nvcd = m_Nvc * m_Nd;
1804  const int Nvcd2 = Nvcd / 2;
1805 
1806  const int id1 = 0;
1807  const int id2 = m_Nvc;
1808  const int id3 = m_Nvc * 2;
1809  const int id4 = m_Nvc * 3;
1810 
1811  const int idir = 3;
1812 
1813  const int isite = m_arg[itask].isite;
1814  const int isite_cp = m_arg[itask].isite_cp_t;
1815 
1816  const double *w1 = &vcp2[Nvcd2 * isite_cp];
1817  double *w2 = &v2[Nvcd * isite];
1818  const double *u = m_U->ptr(m_Ndf * (isite + idir * m_Nvol));
1819 
1820 
1821  if (m_arg[itask].kt1 == 1) {
1822  const int Nxy = m_Nx * m_Ny;
1823  const int it = m_Mt - 1;
1824 
1825  for (int iz = 0; iz < m_Mz; ++iz) {
1826  for (int ixy = 0; ixy < Nxy; ++ixy) {
1827  int is = ixy + Nxy * (iz + m_Nz * it);
1828  int is2 = ixy + Nxy * iz;
1829  int iv = Nvcd * is;
1830  int ig = m_Ndf * is;
1831  int ix1 = Nvc2 * is2;
1832  int ix2 = ix1 + m_Nvc;
1833 
1834  for (int ic = 0; ic < m_Nc; ++ic) {
1835  int ic2 = ic * m_Nvc;
1836 
1837  double wt1_r = mult_uv_r(&u[ic2 + ig], &w1[ix1], m_Nc);
1838  double wt1_i = mult_uv_i(&u[ic2 + ig], &w1[ix1], m_Nc);
1839  double wt2_r = mult_uv_r(&u[ic2 + ig], &w1[ix2], m_Nc);
1840  double wt2_i = mult_uv_i(&u[ic2 + ig], &w1[ix2], m_Nc);
1841 
1842  int ic_r = 2 * ic;
1843  int ic_i = 2 * ic + 1;
1844 
1845  w2[ic_r + id1 + iv] += wt1_r;
1846  w2[ic_i + id1 + iv] += wt1_i;
1847  w2[ic_r + id2 + iv] += wt2_r;
1848  w2[ic_i + id2 + iv] += wt2_i;
1849 
1850  w2[ic_r + id3 + iv] += wt1_r;
1851  w2[ic_i + id3 + iv] += wt1_i;
1852  w2[ic_r + id4 + iv] += wt2_r;
1853  w2[ic_i + id4 + iv] += wt2_i;
1854  }
1855  }
1856  }
1857  }
1858  }
1859 
1860 
1861 //====================================================================
1863  double *v2, const double *v1)
1864  {
1865  const int Nvcd = m_Nvc * m_Nd;
1866  const int Nxy = m_Nx * m_Ny;
1867  const int Nxyz = m_Nx * m_Ny * m_Nz;
1868 
1869  const int id1 = 0;
1870  const int id2 = m_Nvc;
1871  const int id3 = m_Nvc * 2;
1872  const int id4 = m_Nvc * 3;
1873 
1874  const int idir = 3;
1875 
1876  const int isite = m_arg[itask].isite;
1877  const int kt1 = m_arg[itask].kt1;
1878 
1879  const double *w1 = &v1[Nvcd * isite];
1880  double *w2 = &v2[Nvcd * isite];
1881  const double *u = m_U->ptr(m_Ndf * (isite + idir * m_Nvol));
1882 
1883 
1884  for (int it = 0; it < m_Mt - kt1; ++it) {
1885  for (int iz = 0; iz < m_Mz; ++iz) {
1886  for (int ixy = 0; ixy < Nxy; ++ixy) {
1887  int is = ixy + Nxy * (iz + m_Nz * it);
1888  int iv = Nvcd * is;
1889  int in = Nvcd * (is + Nxyz);
1890  int ig = m_Ndf * is;
1891 
1892  double vt1[m_Nvc], vt2[m_Nvc];
1893 
1894  for (int ic = 0; ic < m_Nc; ++ic) {
1895  int ic_r = 2 * ic;
1896  int ic_i = 2 * ic + 1;
1897 
1898  vt1[ic_r] = w1[ic_r + id1 + in] + w1[ic_r + id3 + in];
1899  vt1[ic_i] = w1[ic_i + id1 + in] + w1[ic_i + id3 + in];
1900  vt2[ic_r] = w1[ic_r + id2 + in] + w1[ic_r + id4 + in];
1901  vt2[ic_i] = w1[ic_i + id2 + in] + w1[ic_i + id4 + in];
1902  }
1903 
1904  for (int ic = 0; ic < m_Nc; ++ic) {
1905  int ic2 = ic * m_Nvc;
1906 
1907  double wt1_r = mult_uv_r(&u[ic2 + ig], vt1, m_Nc);
1908  double wt1_i = mult_uv_i(&u[ic2 + ig], vt1, m_Nc);
1909  double wt2_r = mult_uv_r(&u[ic2 + ig], vt2, m_Nc);
1910  double wt2_i = mult_uv_i(&u[ic2 + ig], vt2, m_Nc);
1911 
1912  int ic_r = 2 * ic;
1913  int ic_i = 2 * ic + 1;
1914 
1915  w2[ic_r + id1 + iv] += wt1_r;
1916  w2[ic_i + id1 + iv] += wt1_i;
1917  w2[ic_r + id2 + iv] += wt2_r;
1918  w2[ic_i + id2 + iv] += wt2_i;
1919 
1920  w2[ic_r + id3 + iv] += wt1_r;
1921  w2[ic_i + id3 + iv] += wt1_i;
1922  w2[ic_r + id4 + iv] += wt2_r;
1923  w2[ic_i + id4 + iv] += wt2_i;
1924  }
1925  }
1926  }
1927  }
1928  }
1929 
1930 
1931 //====================================================================
1933  double *vcp1, const double *v1)
1934  {
1935  const int Nvc2 = 2 * m_Nvc;
1936  const int Nvcd = m_Nvc * m_Nd;
1937  const int Nvcd2 = Nvcd / 2;
1938 
1939  const int id1 = 0;
1940  const int id2 = m_Nvc;
1941  const int id3 = m_Nvc * 2;
1942  const int id4 = m_Nvc * 3;
1943 
1944  const int idir = 3;
1945 
1946  const int isite = m_arg[itask].isite;
1947  const int isite_cp = m_arg[itask].isite_cp_t;
1948 
1949  const double *w1 = &v1[Nvcd * isite];
1950  double *w2 = &vcp1[Nvcd2 * isite_cp];
1951  const double *u = m_U->ptr(m_Ndf * (isite + idir * m_Nvol));
1952 
1953 
1954  if (m_arg[itask].kt1 == 1) {
1955  const int Nxy = m_Nx * m_Ny;
1956  const int it = m_Mt - 1;
1957 
1958  for (int iz = 0; iz < m_Mz; ++iz) {
1959  for (int ixy = 0; ixy < Nxy; ++ixy) {
1960  int is = ixy + Nxy * (iz + m_Nz * it);
1961  int is2 = ixy + Nxy * iz;
1962  int in = Nvcd * is;
1963  int ig = m_Ndf * is;
1964  int ix1 = Nvc2 * is2;
1965  int ix2 = ix1 + m_Nvc;
1966 
1967  double vt1[m_Nvc], vt2[m_Nvc];
1968 
1969  for (int ic = 0; ic < m_Nc; ++ic) {
1970  int ic_r = 2 * ic;
1971  int ic_i = 2 * ic + 1;
1972 
1973  vt1[ic_r] = w1[ic_r + id1 + in] - w1[ic_r + id3 + in];
1974  vt1[ic_i] = w1[ic_i + id1 + in] - w1[ic_i + id3 + in];
1975  vt2[ic_r] = w1[ic_r + id2 + in] - w1[ic_r + id4 + in];
1976  vt2[ic_i] = w1[ic_i + id2 + in] - w1[ic_i + id4 + in];
1977  }
1978 
1979  for (int ic = 0; ic < m_Nc; ++ic) {
1980  int ic2 = 2 * ic;
1981 
1982  int ic_r = 2 * ic;
1983  int ic_i = 2 * ic + 1;
1984 
1985  w2[ic_r + ix1] = mult_udagv_r(&u[ic2 + ig], vt1, m_Nc);
1986  w2[ic_i + ix1] = mult_udagv_i(&u[ic2 + ig], vt1, m_Nc);
1987  w2[ic_r + ix2] = mult_udagv_r(&u[ic2 + ig], vt2, m_Nc);
1988  w2[ic_i + ix2] = mult_udagv_i(&u[ic2 + ig], vt2, m_Nc);
1989  }
1990  }
1991  }
1992  }
1993  }
1994 
1995 
1996 //====================================================================
1998  double *v2, const double *vcp2)
1999  {
2000  const int Nvc2 = 2 * m_Nvc;
2001  const int Nvcd = m_Nvc * m_Nd;
2002  const int Nvcd2 = Nvcd / 2;
2003 
2004  const int id1 = 0;
2005  const int id2 = m_Nvc;
2006  const int id3 = m_Nvc * 2;
2007  const int id4 = m_Nvc * 3;
2008 
2009  const int idir = 3;
2010  const double bc2 = m_boundary_each_node[idir];
2011 
2012  const int isite = m_arg[itask].isite;
2013  const int isite_cp = m_arg[itask].isite_cp_t;
2014 
2015  const double *w1 = &vcp2[Nvcd2 * isite_cp];
2016  double *w2 = &v2[Nvcd * isite];
2017 
2018 
2019  if (m_arg[itask].kt0 == 1) {
2020  const int Nxy = m_Nx * m_Ny;
2021  const int it = 0;
2022 
2023  for (int iz = 0; iz < m_Mz; ++iz) {
2024  for (int ixy = 0; ixy < Nxy; ++ixy) {
2025  int is = ixy + Nxy * (iz + m_Nz * it);
2026  int is2 = ixy + Nxy * iz;
2027  int iv = Nvcd * is;
2028  int ix1 = Nvc2 * is2;
2029  int ix2 = ix1 + m_Nvc;
2030 
2031  for (int ic = 0; ic < m_Nc; ++ic) {
2032  int ic_r = 2 * ic;
2033  int ic_i = 2 * ic + 1;
2034 
2035  w2[ic_r + id1 + iv] += bc2 * w1[ic_r + ix1];
2036  w2[ic_i + id1 + iv] += bc2 * w1[ic_i + ix1];
2037  w2[ic_r + id2 + iv] += bc2 * w1[ic_r + ix2];
2038  w2[ic_i + id2 + iv] += bc2 * w1[ic_i + ix2];
2039 
2040  w2[ic_r + id3 + iv] -= bc2 * w1[ic_r + ix1];
2041  w2[ic_i + id3 + iv] -= bc2 * w1[ic_i + ix1];
2042  w2[ic_r + id4 + iv] -= bc2 * w1[ic_r + ix2];
2043  w2[ic_i + id4 + iv] -= bc2 * w1[ic_i + ix2];
2044  }
2045  }
2046  }
2047  }
2048  }
2049 
2050 
2051 //====================================================================
2053  double *v2, const double *v1)
2054  {
2055  const int Nvcd = m_Nvc * m_Nd;
2056  const int Nxy = m_Nx * m_Ny;
2057  const int Nxyz = m_Nx * m_Ny * m_Nz;
2058 
2059  const int id1 = 0;
2060  const int id2 = m_Nvc;
2061  const int id3 = m_Nvc * 2;
2062  const int id4 = m_Nvc * 3;
2063 
2064  const int idir = 3;
2065 
2066  const int isite = m_arg[itask].isite;
2067  const int kt0 = m_arg[itask].kt0;
2068 
2069  const double *w1 = &v1[Nvcd * isite];
2070  double *w2 = &v2[Nvcd * isite];
2071  const double *u = m_U->ptr(m_Ndf * (isite + idir * m_Nvol));
2072 
2073 
2074  for (int it = kt0; it < m_Mt; ++it) {
2075  for (int iz = 0; iz < m_Mz; ++iz) {
2076  for (int ixy = 0; ixy < Nxy; ++ixy) {
2077  int is = ixy + Nxy * (iz + m_Nz * it);
2078  int iv = Nvcd * is;
2079  int in = Nvcd * (is - Nxyz);
2080  int ig = m_Ndf * (is - Nxyz);
2081 
2082  double vt1[m_Nvc], vt2[m_Nvc];
2083 
2084  for (int ic = 0; ic < m_Nc; ++ic) {
2085  int ic_r = 2 * ic;
2086  int ic_i = 2 * ic + 1;
2087 
2088  vt1[ic_r] = w1[ic_r + id1 + in] - w1[ic_r + id3 + in];
2089  vt1[ic_i] = w1[ic_i + id1 + in] - w1[ic_i + id3 + in];
2090  vt2[ic_r] = w1[ic_r + id2 + in] - w1[ic_r + id4 + in];
2091  vt2[ic_i] = w1[ic_i + id2 + in] - w1[ic_i + id4 + in];
2092  }
2093 
2094  for (int ic = 0; ic < m_Nc; ++ic) {
2095  int ic2 = 2 * ic;
2096 
2097  double wt1_r = mult_udagv_r(&u[ic2 + ig], vt1, m_Nc);
2098  double wt1_i = mult_udagv_i(&u[ic2 + ig], vt1, m_Nc);
2099  double wt2_r = mult_udagv_r(&u[ic2 + ig], vt2, m_Nc);
2100  double wt2_i = mult_udagv_i(&u[ic2 + ig], vt2, m_Nc);
2101 
2102  int ic_r = 2 * ic;
2103  int ic_i = 2 * ic + 1;
2104 
2105  w2[ic_r + id1 + iv] += wt1_r;
2106  w2[ic_i + id1 + iv] += wt1_i;
2107  w2[ic_r + id2 + iv] += wt2_r;
2108  w2[ic_i + id2 + iv] += wt2_i;
2109 
2110  w2[ic_r + id3 + iv] -= wt1_r;
2111  w2[ic_i + id3 + iv] -= wt1_i;
2112  w2[ic_r + id4 + iv] -= wt2_r;
2113  w2[ic_i + id4 + iv] -= wt2_i;
2114  }
2115  }
2116  }
2117  }
2118  }
2119 
2120 
2121 //====================================================================
2123  double *v2, const double *v1)
2124  {
2125  const int Nvcd = m_Nvc * m_Nd;
2126  const int Nxy = m_Nx * m_Ny;
2127 
2128  const int id1 = 0;
2129  const int id2 = m_Nvc;
2130  const int id3 = m_Nvc * 2;
2131  const int id4 = m_Nvc * 3;
2132 
2133  const int isite = m_arg[itask].isite;
2134 
2135  const double *w1 = &v1[Nvcd * isite];
2136  double *w2 = &v2[Nvcd * isite];
2137 
2138 
2139  for (int it = 0; it < m_Mt; ++it) {
2140  for (int iz = 0; iz < m_Mz; ++iz) {
2141  for (int ixy = 0; ixy < Nxy; ++ixy) {
2142  int iv = Nvcd * (ixy + Nxy * (iz + m_Nz * it));
2143 
2144  for (int ivc = 0; ivc < m_Nvc; ++ivc) {
2145  w2[ivc + id1 + iv] = w1[ivc + id3 + iv];
2146  w2[ivc + id2 + iv] = w1[ivc + id4 + iv];
2147  w2[ivc + id3 + iv] = w1[ivc + id1 + iv];
2148  w2[ivc + id4 + iv] = w1[ivc + id2 + iv];
2149  }
2150  }
2151  }
2152  }
2153  }
2154 
2155 
2156 //====================================================================
2158  double *v2, const double *v1)
2159  {
2160  const int Nvcd = m_Nvc * m_Nd;
2161  const int Nxy = m_Nx * m_Ny;
2162 
2163  const int id1 = 0;
2164  const int id2 = m_Nvc;
2165  const int id3 = m_Nvc * 2;
2166  const int id4 = m_Nvc * 3;
2167 
2168  const int isite = m_arg[itask].isite;
2169 
2170  const double *w1 = &v1[Nvcd * isite];
2171  double *w2 = &v2[Nvcd * isite];
2172 
2173 
2174  for (int it = 0; it < m_Mt; ++it) {
2175  for (int iz = 0; iz < m_Mz; ++iz) {
2176  for (int ixy = 0; ixy < Nxy; ++ixy) {
2177  int iv = Nvcd * (ixy + Nxy * (iz + m_Nz * it));
2178 
2179  for (int ivc = 0; ivc < m_Nvc; ++ivc) {
2180  w2[ivc + id1 + iv] = w1[ivc + id1 + iv];
2181  w2[ivc + id2 + iv] = w1[ivc + id2 + iv];
2182  w2[ivc + id3 + iv] = -w1[ivc + id3 + iv];
2183  w2[ivc + id4 + iv] = -w1[ivc + id4 + iv];
2184  }
2185  }
2186  }
2187  }
2188  }
2189 
2190 
2191 //====================================================================
2192 }
2193 //============================================================END=====
BridgeIO vout
Definition: bridgeIO.cpp:503
void mult_x_minus2_thread(const int, double *, const double *)
void mult_z_minus_bulk_thread(const int, double *, const double *)
const double * ptr(const int jin, const int site, const int jex) const
Definition: field.h:153
void mult_x_plus_bulk_thread(const int, double *, const double *)
void clear_thread(const int, double *)
void mult_x_plus2_thread(const int, double *, const double *)
void mult_z_minus1_thread(const int, double *, const double *)
void general(const char *format,...)
Definition: bridgeIO.cpp:197
void mult_t_minus2_chiral_thread(const int, double *, const double *)
void mult_z_minus2_thread(const int, double *, const double *)
void mult_t_plus1_dirac_thread(const int, double *, const double *)
void mult_t_plus2_dirac_thread(const int, double *, const double *)
void mult_y_plus2_thread(const int, double *, const double *)
void mult_x_minus_bulk_thread(const int, double *, const double *)
void mult_x_plus1_thread(const int, double *, const double *)
void mult_t_minus2_dirac_thread(const int, double *, const double *)
void gm5_dirac_thread(const int, double *, const double *)
void mult_y_minus2_thread(const int, double *, const double *)
void mult_t_minus1_dirac_thread(const int, double *, const double *)
void mult_z_plus2_thread(const int, double *, const double *)
void daypx_thread(const int, double *, const double, const double *)
void daxpy_thread(const int, double *, const double, const double *)
void scal_thread(const int, double *, const double)
void gm5_chiral_thread(const int, double *, const double *)
void mult_z_plus_bulk_thread(const int, double *, const double *)
const Field_G * m_U
gauge configuration.
void mult_y_minus1_thread(const int, double *, const double *)
void mult_z_plus1_thread(const int, double *, const double *)
void mult_t_minus1_chiral_thread(const int, double *, const double *)
static int get_num_threads_available()
returns number of threads (works outside of parallel region).
void mult_x_minus1_thread(const int, double *, const double *)
void mult_t_plus_bulk_chiral_thread(const int, double *, const double *)
void crucial(const char *format,...)
Definition: bridgeIO.cpp:178
std::vector< double > m_boundary_each_node
b.c. for each node.
void mult_y_plus_bulk_thread(const int, double *, const double *)
void mult_t_minus_bulk_dirac_thread(const int, double *, const double *)
void mult_t_minus_bulk_chiral_thread(const int, double *, const double *)
static const std::string class_name
void mult_t_plus1_chiral_thread(const int, double *, const double *)
void mult_t_plus2_chiral_thread(const int, double *, const double *)
std::vector< mult_arg > m_arg
void mult_t_plus_bulk_dirac_thread(const int, double *, const double *)
void mult_y_minus_bulk_thread(const int, double *, const double *)
void mult_y_plus1_thread(const int, double *, const double *)