Bridge++  Version 1.5.4
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
fopr_Wilson_impl_thread.cpp
Go to the documentation of this file.
1 
14 #include "fopr_Wilson_impl.h"
15 
17 
18 namespace Imp {
19 #if defined USE_GROUP_SU3
21 #elif defined USE_GROUP_SU2
23 #elif defined USE_GROUP_SU_N
25 #endif
26 
27 // const std::string Fopr_Wilson::class_name = "Fopr_Wilson";
28 
29 //====================================================================
31  {
33 
34  // The following setup corresponds to uniform division of volume.
35  if (m_Nthread <= m_Nt) {
37  } else if (m_Nthread <= m_Nz * m_Nt) {
38  m_Ntask_t = m_Nt;
39  } else {
40  vout.crucial(m_vl, "Error at %s: Too large Nthread = %d\n", class_name.c_str(), m_Nthread);
41  exit(EXIT_FAILURE);
42  }
43 
45 
46  if (m_Ntask_z * m_Ntask_t != m_Nthread) {
47  vout.crucial(m_vl, "Error at %s: Nz = %d and Nt = %d do not match Nthread = %d\n",
48  class_name.c_str(), m_Nz, m_Nt, m_Nthread);
49  exit(EXIT_FAILURE);
50  }
51 
53  m_Mz = m_Nz / m_Ntask_z;
54  m_Mt = m_Nt / m_Ntask_t;
55 
56  if (m_Mz * m_Ntask_z != m_Nz) {
57  vout.crucial(m_vl, "Error at %s: Mz = %d and Ntask_z = %d do not match Nz = %d\n",
58  class_name.c_str(), m_Mz, m_Ntask_z, m_Nz);
59  exit(EXIT_FAILURE);
60  }
61 
62  if (m_Mt * m_Ntask_t != m_Nt) {
63  vout.crucial(m_vl, "Error at %s: Mt = %d and Ntask_t = %d do not match Nt = %d\n",
64  class_name.c_str(), m_Mt, m_Ntask_t, m_Nt);
65  exit(EXIT_FAILURE);
66  }
67 
68  // The following setup is not monotonic division, and requires
69  // barrier at the beginning and end of mult (D and gamma5).
70  // [H.Matsufuru 22 Oct 2013]
71  // if(m_Nthread >= 64){
72  // m_Ntask_z = 8;
73  // }else if(m_Nthread >= 16){
74  // m_Ntask_z = 4;
75  // }else if(m_Nthread >= 4){
76  // m_Ntask_z = 2;
77  // }else{
78  // m_Ntask_z = 1;
79  // }
80  // m_Ntask_t = m_Nthread/m_Ntask_z;
81  // m_Ntask = m_Ntask_t * m_Ntask_z;
82  // m_Mz = m_Nz/m_Ntask_z;
83  // m_Mt = m_Nt/m_Ntask_t;
84 
85  vout.general(m_vl, " Nthread = %d\n", m_Nthread);
86  vout.general(m_vl, " Ntask = %d\n", m_Ntask);
87  vout.general(m_vl, " Ntask_z = %d Ntask_t = %d\n", m_Ntask_z, m_Ntask_t);
88  vout.general(m_vl, " Mz = %d Mt = %d\n", m_Mz, m_Mt);
89 
90  // setup of arguments
91  const int Nxy = m_Nx * m_Ny;
92 
93  m_arg.resize(m_Ntask);
94  for (int ith_t = 0; ith_t < m_Ntask_t; ++ith_t) {
95  for (int ith_z = 0; ith_z < m_Ntask_z; ++ith_z) {
96  int itask = ith_z + m_Ntask_z * ith_t;
97 
98  m_arg[itask].isite = (ith_z * m_Mz + ith_t * (m_Nz * m_Mt)) * Nxy;
99 
100  m_arg[itask].kt0 = 0;
101  m_arg[itask].kt1 = 0;
102  m_arg[itask].kz0 = 0;
103  m_arg[itask].kz1 = 0;
104  if (ith_t == 0) m_arg[itask].kt0 = 1;
105  if (ith_z == 0) m_arg[itask].kz0 = 1;
106  if (ith_t == m_Ntask_t - 1) m_arg[itask].kt1 = 1;
107  if (ith_z == m_Ntask_z - 1) m_arg[itask].kz1 = 1;
108 
109  m_arg[itask].isite_cp_x = itask * m_Mz * m_Mt * m_Ny;
110  m_arg[itask].isite_cp_y = itask * m_Mz * m_Mt * m_Nx;
111  m_arg[itask].isite_cp_z = ith_t * m_Mt * Nxy;
112  m_arg[itask].isite_cp_t = ith_z * m_Mz * Nxy;
113  }
114  }
115  }
116 
117 
118 //====================================================================
119  void Fopr_Wilson::daypx_thread(const int itask,
120  double *v2, const double fac, const double *v1)
121  {
122  const int Nvcd = m_Nvc * m_Nd;
123  const int Nvxy = Nvcd * m_Nx * m_Ny;
124 
125  const int isite = m_arg[itask].isite;
126 
127  double *w2 = &v2[Nvcd * isite];
128  const double *w1 = &v1[Nvcd * isite];
129 
130  for (int it = 0; it < m_Mt; ++it) {
131  for (int iz = 0; iz < m_Mz; ++iz) {
132  for (int ivxy = 0; ivxy < Nvxy; ++ivxy) {
133  int iv = ivxy + Nvxy * (iz + m_Nz * it);
134 
135  w2[iv] = fac * w2[iv] + w1[iv];
136  }
137  }
138  }
139  }
140 
141 
142 //====================================================================
143  void Fopr_Wilson::clear_thread(const int itask,
144  double *v2)
145  {
146  const int Nvcd = m_Nvc * m_Nd;
147  const int Nvxy = Nvcd * m_Nx * m_Ny;
148 
149  const int isite = m_arg[itask].isite;
150 
151  double *w2 = &v2[Nvcd * isite];
152 
153  for (int it = 0; it < m_Mt; ++it) {
154  for (int iz = 0; iz < m_Mz; ++iz) {
155  for (int ivxy = 0; ivxy < Nvxy; ++ivxy) {
156  int iv = ivxy + Nvxy * (iz + m_Nz * it);
157 
158  w2[iv] = 0.0;
159  }
160  }
161  }
162  }
163 
164 
165 //====================================================================
166  void Fopr_Wilson::mult_xp1_thread(const int itask,
167  double *vcp1, const double *v1)
168  {
169  const int Nvc2 = 2 * m_Nvc;
170  const int Nvcd = m_Nvc * m_Nd;
171  const int Nvcd2 = Nvcd / 2;
172 
173  const int id1 = 0;
174  const int id2 = m_Nvc;
175  const int id3 = m_Nvc * 2;
176  const int id4 = m_Nvc * 3;
177 
178  const int idir = 0;
179  const int ix = 0;
180  const double bc2 = m_boundary_each_node[idir];
181 
182  const int isite = m_arg[itask].isite;
183  const int isite_cp = m_arg[itask].isite_cp_x;
184 
185  double *w2 = &vcp1[Nvcd2 * isite_cp];
186  const double *w1 = &v1[Nvcd * isite];
187 
188 
189  for (int it = 0; it < m_Mt; ++it) {
190  for (int iz = 0; iz < m_Mz; ++iz) {
191  for (int iy = 0; iy < m_Ny; ++iy) {
192  int is = ix + m_Nx * (iy + m_Ny * (iz + m_Nz * it));
193  int is2 = iy + m_Ny * (iz + m_Mz * it);
194  int in = Nvcd * is;
195  int ix1 = Nvc2 * is2;
196  int ix2 = ix1 + m_Nvc;
197 
198  for (int ic = 0; ic < m_Nc; ++ic) {
199  int ic_r = 2 * ic;
200  int ic_i = 2 * ic + 1;
201 
202  w2[ic_r + ix1] = bc2 * (w1[ic_r + id1 + in] - w1[ic_i + id4 + in]);
203  w2[ic_i + ix1] = bc2 * (w1[ic_i + id1 + in] + w1[ic_r + id4 + in]);
204  w2[ic_r + ix2] = bc2 * (w1[ic_r + id2 + in] - w1[ic_i + id3 + in]);
205  w2[ic_i + ix2] = bc2 * (w1[ic_i + id2 + in] + w1[ic_r + id3 + in]);
206  }
207  }
208  }
209  }
210  }
211 
212 
213 //====================================================================
214  void Fopr_Wilson::mult_xp2_thread(const int itask,
215  double *v2, const double *vcp2)
216  {
217  const int Nvc2 = 2 * m_Nvc;
218  const int Nvcd = m_Nvc * m_Nd;
219  const int Nvcd2 = Nvcd / 2;
220 
221  const int id1 = 0;
222  const int id2 = m_Nvc;
223  const int id3 = m_Nvc * 2;
224  const int id4 = m_Nvc * 3;
225 
226  const int idir = 0;
227  const int ix = m_Nx - 1;
228 
229  const int isite = m_arg[itask].isite;
230  const int isite_cp = m_arg[itask].isite_cp_x;
231 
232  double *w2 = &v2[Nvcd * isite];
233  const double *w1 = &vcp2[Nvcd2 * isite_cp];
234  const double *u = m_U->ptr(m_Ndf * (isite + idir * m_Nvol));
235 
236 
237  for (int it = 0; it < m_Mt; ++it) {
238  for (int iz = 0; iz < m_Mz; ++iz) {
239  for (int iy = 0; iy < m_Ny; ++iy) {
240  int is = ix + m_Nx * (iy + m_Ny * (iz + m_Nz * it));
241  int is2 = iy + m_Ny * (iz + m_Mz * it);
242  int iv = Nvcd * is;
243  int ig = m_Ndf * is;
244  int ix1 = Nvc2 * is2;
245  int ix2 = ix1 + m_Nvc;
246 
247  for (int ic = 0; ic < m_Nc; ++ic) {
248  int ic2 = ic * m_Nvc;
249 
250  double wt1r = mult_uv_r(&u[ic2 + ig], &w1[ix1], m_Nc);
251  double wt1i = mult_uv_i(&u[ic2 + ig], &w1[ix1], m_Nc);
252  double wt2r = mult_uv_r(&u[ic2 + ig], &w1[ix2], m_Nc);
253  double wt2i = mult_uv_i(&u[ic2 + ig], &w1[ix2], m_Nc);
254 
255  int ic_r = 2 * ic;
256  int ic_i = 2 * ic + 1;
257 
258  w2[ic_r + id1 + iv] += wt1r;
259  w2[ic_i + id1 + iv] += wt1i;
260  w2[ic_r + id2 + iv] += wt2r;
261  w2[ic_i + id2 + iv] += wt2i;
262 
263  w2[ic_r + id3 + iv] += wt2i;
264  w2[ic_i + id3 + iv] -= wt2r;
265  w2[ic_r + id4 + iv] += wt1i;
266  w2[ic_i + id4 + iv] -= wt1r;
267  }
268  }
269  }
270  }
271  }
272 
273 
274 //====================================================================
275  void Fopr_Wilson::mult_xpb_thread(const int itask,
276  double *v2, const double *v1)
277  {
278  const int Nvcd = m_Nvc * m_Nd;
279 
280  const int id1 = 0;
281  const int id2 = m_Nvc;
282  const int id3 = m_Nvc * 2;
283  const int id4 = m_Nvc * 3;
284 
285  const int idir = 0;
286 
287  const int isite = m_arg[itask].isite;
288 
289  double *w2 = &v2[Nvcd * isite];
290  const double *w1 = &v1[Nvcd * isite];
291  const double *u = m_U->ptr(m_Ndf * (isite + idir * m_Nvol));
292 
293 
294  for (int it = 0; it < m_Mt; ++it) {
295  for (int iz = 0; iz < m_Mz; ++iz) {
296  for (int iy = 0; iy < m_Ny; ++iy) {
297  for (int ix = 0; ix < m_Nx - 1; ++ix) {
298  int is = ix + m_Nx * (iy + m_Ny * (iz + m_Nz * it));
299  int iv = Nvcd * is;
300  int in = Nvcd * (is + 1);
301  int ig = m_Ndf * is;
302 
303  double vt1[m_Nvc], vt2[m_Nvc];
304 
305  for (int ic = 0; ic < m_Nc; ++ic) {
306  int ic_r = 2 * ic;
307  int ic_i = 2 * ic + 1;
308 
309  vt1[ic_r] = w1[ic_r + id1 + in] - w1[ic_i + id4 + in];
310  vt1[ic_i] = w1[ic_i + id1 + in] + w1[ic_r + id4 + in];
311  vt2[ic_r] = w1[ic_r + id2 + in] - w1[ic_i + id3 + in];
312  vt2[ic_i] = w1[ic_i + id2 + in] + w1[ic_r + id3 + in];
313  }
314 
315  for (int ic = 0; ic < m_Nc; ++ic) {
316  int ic2 = ic * m_Nvc;
317 
318  double wt1r = mult_uv_r(&u[ic2 + ig], vt1, m_Nc);
319  double wt1i = mult_uv_i(&u[ic2 + ig], vt1, m_Nc);
320  double wt2r = mult_uv_r(&u[ic2 + ig], vt2, m_Nc);
321  double wt2i = mult_uv_i(&u[ic2 + ig], vt2, m_Nc);
322 
323  int ic_r = 2 * ic;
324  int ic_i = 2 * ic + 1;
325 
326  w2[ic_r + id1 + iv] += wt1r;
327  w2[ic_i + id1 + iv] += wt1i;
328  w2[ic_r + id2 + iv] += wt2r;
329  w2[ic_i + id2 + iv] += wt2i;
330 
331  w2[ic_r + id3 + iv] += wt2i;
332  w2[ic_i + id3 + iv] -= wt2r;
333  w2[ic_r + id4 + iv] += wt1i;
334  w2[ic_i + id4 + iv] -= wt1r;
335  }
336  }
337  }
338  }
339  }
340  }
341 
342 
343 //====================================================================
344  void Fopr_Wilson::mult_xm1_thread(const int itask,
345  double *vcp1, const double *v1)
346  {
347  const int Nvc2 = 2 * m_Nvc;
348  const int Nvcd = m_Nvc * m_Nd;
349  const int Nvcd2 = Nvcd / 2;
350 
351  const int id1 = 0;
352  const int id2 = m_Nvc;
353  const int id3 = m_Nvc * 2;
354  const int id4 = m_Nvc * 3;
355 
356  const int idir = 0;
357  const int ix = m_Nx - 1;
358 
359  const int isite = m_arg[itask].isite;
360  const int isite_cp = m_arg[itask].isite_cp_x;
361 
362  double *w2 = &vcp1[Nvcd2 * isite_cp];
363  const double *w1 = &v1[Nvcd * isite];
364  const double *u = m_U->ptr(m_Ndf * (isite + idir * m_Nvol));
365 
366 
367  for (int it = 0; it < m_Mt; ++it) {
368  for (int iz = 0; iz < m_Mz; ++iz) {
369  for (int iy = 0; iy < m_Ny; ++iy) {
370  int is = ix + m_Nx * (iy + m_Ny * (iz + m_Nz * it));
371  int is2 = iy + m_Ny * (iz + m_Mz * it);
372  int in = Nvcd * is;
373  int ig = m_Ndf * is;
374  int ix1 = Nvc2 * is2;
375  int ix2 = ix1 + m_Nvc;
376 
377  double vt1[m_Nvc], vt2[m_Nvc];
378 
379  for (int ic = 0; ic < m_Nc; ++ic) {
380  int ic_r = 2 * ic;
381  int ic_i = 2 * ic + 1;
382 
383  vt1[ic_r] = w1[ic_r + id1 + in] + w1[ic_i + id4 + in];
384  vt1[ic_i] = w1[ic_i + id1 + in] - w1[ic_r + id4 + in];
385  vt2[ic_r] = w1[ic_r + id2 + in] + w1[ic_i + id3 + in];
386  vt2[ic_i] = w1[ic_i + id2 + in] - w1[ic_r + id3 + in];
387  }
388 
389  for (int ic = 0; ic < m_Nc; ++ic) {
390  int ic2 = 2 * ic;
391 
392  int ic_r = 2 * ic;
393  int ic_i = 2 * ic + 1;
394 
395  w2[ic_r + ix1] = mult_udagv_r(&u[ic2 + ig], vt1, m_Nc);
396  w2[ic_i + ix1] = mult_udagv_i(&u[ic2 + ig], vt1, m_Nc);
397  w2[ic_r + ix2] = mult_udagv_r(&u[ic2 + ig], vt2, m_Nc);
398  w2[ic_i + ix2] = mult_udagv_i(&u[ic2 + ig], vt2, m_Nc);
399  }
400  }
401  }
402  }
403  }
404 
405 
406 //====================================================================
407  void Fopr_Wilson::mult_xm2_thread(const int itask,
408  double *v2, const double *vcp2)
409  {
410  const int Nvc2 = 2 * m_Nvc;
411  const int Nvcd = m_Nvc * m_Nd;
412  const int Nvcd2 = Nvcd / 2;
413 
414  const int id1 = 0;
415  const int id2 = m_Nvc;
416  const int id3 = m_Nvc * 2;
417  const int id4 = m_Nvc * 3;
418 
419  const int idir = 0;
420  const int ix = 0;
421  const double bc2 = m_boundary_each_node[idir];
422 
423  const int isite = m_arg[itask].isite;
424  const int isite_cp = m_arg[itask].isite_cp_x;
425 
426  double *w2 = &v2[Nvcd * isite];
427  const double *w1 = &vcp2[Nvcd2 * isite_cp];
428 
429 
430  for (int it = 0; it < m_Mt; ++it) {
431  for (int iz = 0; iz < m_Mz; ++iz) {
432  for (int iy = 0; iy < m_Ny; ++iy) {
433  int is = ix + m_Nx * (iy + m_Ny * (iz + m_Nz * it));
434  int is2 = iy + m_Ny * (iz + m_Mz * it);
435  int iv = Nvcd * is;
436  int ix1 = Nvc2 * is2;
437  int ix2 = ix1 + m_Nvc;
438 
439  for (int ic = 0; ic < m_Nc; ++ic) {
440  int ic_r = 2 * ic;
441  int ic_i = 2 * ic + 1;
442 
443  w2[ic_r + id1 + iv] += bc2 * w1[ic_r + ix1];
444  w2[ic_i + id1 + iv] += bc2 * w1[ic_i + ix1];
445  w2[ic_r + id2 + iv] += bc2 * w1[ic_r + ix2];
446  w2[ic_i + id2 + iv] += bc2 * w1[ic_i + ix2];
447 
448  w2[ic_r + id3 + iv] -= bc2 * w1[ic_i + ix2];
449  w2[ic_i + id3 + iv] += bc2 * w1[ic_r + ix2];
450  w2[ic_r + id4 + iv] -= bc2 * w1[ic_i + ix1];
451  w2[ic_i + id4 + iv] += bc2 * w1[ic_r + ix1];
452  }
453  }
454  }
455  }
456  }
457 
458 
459 //====================================================================
460  void Fopr_Wilson::mult_xmb_thread(const int itask,
461  double *v2, const double *v1)
462  {
463  const int Nvcd = m_Nvc * m_Nd;
464 
465  const int id1 = 0;
466  const int id2 = m_Nvc;
467  const int id3 = m_Nvc * 2;
468  const int id4 = m_Nvc * 3;
469 
470  const int idir = 0;
471 
472  const int isite = m_arg[itask].isite;
473 
474  double *w2 = &v2[Nvcd * isite];
475  const double *w1 = &v1[Nvcd * isite];
476  const double *u = m_U->ptr(m_Ndf * (isite + idir * m_Nvol));
477 
478 
479  for (int it = 0; it < m_Mt; ++it) {
480  for (int iz = 0; iz < m_Mz; ++iz) {
481  for (int iy = 0; iy < m_Ny; ++iy) {
482  for (int ix = 1; ix < m_Nx; ++ix) {
483  int is = ix + m_Nx * (iy + m_Ny * (iz + m_Nz * it));
484  int iv = Nvcd * is;
485  int in = Nvcd * (is - 1);
486  int ig = m_Ndf * (is - 1);
487 
488  double vt1[m_Nvc], vt2[m_Nvc];
489 
490  for (int ic = 0; ic < m_Nc; ++ic) {
491  int ic_r = 2 * ic;
492  int ic_i = 2 * ic + 1;
493 
494  vt1[ic_r] = w1[ic_r + id1 + in] + w1[ic_i + id4 + in];
495  vt1[ic_i] = w1[ic_i + id1 + in] - w1[ic_r + id4 + in];
496  vt2[ic_r] = w1[ic_r + id2 + in] + w1[ic_i + id3 + in];
497  vt2[ic_i] = w1[ic_i + id2 + in] - w1[ic_r + id3 + in];
498  }
499 
500  for (int ic = 0; ic < m_Nc; ++ic) {
501  int ic2 = 2 * ic;
502 
503  double wt1r = mult_udagv_r(&u[ic2 + ig], vt1, m_Nc);
504  double wt1i = mult_udagv_i(&u[ic2 + ig], vt1, m_Nc);
505  double wt2r = mult_udagv_r(&u[ic2 + ig], vt2, m_Nc);
506  double wt2i = mult_udagv_i(&u[ic2 + ig], vt2, m_Nc);
507 
508  int ic_r = 2 * ic;
509  int ic_i = 2 * ic + 1;
510 
511  w2[ic_r + id1 + iv] += wt1r;
512  w2[ic_i + id1 + iv] += wt1i;
513  w2[ic_r + id2 + iv] += wt2r;
514  w2[ic_i + id2 + iv] += wt2i;
515 
516  w2[ic_r + id3 + iv] -= wt2i;
517  w2[ic_i + id3 + iv] += wt2r;
518  w2[ic_r + id4 + iv] -= wt1i;
519  w2[ic_i + id4 + iv] += wt1r;
520  }
521  }
522  }
523  }
524  }
525  }
526 
527 
528 //====================================================================
529  void Fopr_Wilson::mult_yp1_thread(const int itask,
530  double *vcp1, const double *v1)
531  {
532  const int Nvc2 = 2 * m_Nvc;
533  const int Nvcd = m_Nvc * m_Nd;
534  const int Nvcd2 = Nvcd / 2;
535 
536  const int id1 = 0;
537  const int id2 = m_Nvc;
538  const int id3 = m_Nvc * 2;
539  const int id4 = m_Nvc * 3;
540 
541  const int idir = 1;
542  const int iy = 0;
543  const double bc2 = m_boundary_each_node[idir];
544 
545  const int isite = m_arg[itask].isite;
546  const int isite_cp = m_arg[itask].isite_cp_y;
547 
548  double *w2 = &vcp1[Nvcd2 * isite_cp];
549  const double *w1 = &v1[Nvcd * isite];
550 
551 
552  for (int it = 0; it < m_Mt; ++it) {
553  for (int iz = 0; iz < m_Mz; ++iz) {
554  for (int ix = 0; ix < m_Nx; ++ix) {
555  int is = ix + m_Nx * (iy + m_Ny * (iz + m_Nz * it));
556  int is2 = ix + m_Nx * (iz + m_Mz * it);
557  int in = Nvcd * is;
558  int ix1 = Nvc2 * is2;
559  int ix2 = ix1 + m_Nvc;
560 
561  for (int ic = 0; ic < m_Nc; ++ic) {
562  int ic_r = 2 * ic;
563  int ic_i = 2 * ic + 1;
564 
565  w2[ic_r + ix1] = bc2 * (w1[ic_r + id1 + in] + w1[ic_r + id4 + in]);
566  w2[ic_i + ix1] = bc2 * (w1[ic_i + id1 + in] + w1[ic_i + id4 + in]);
567  w2[ic_r + ix2] = bc2 * (w1[ic_r + id2 + in] - w1[ic_r + id3 + in]);
568  w2[ic_i + ix2] = bc2 * (w1[ic_i + id2 + in] - w1[ic_i + id3 + in]);
569  }
570  }
571  }
572  }
573  }
574 
575 
576 //====================================================================
577  void Fopr_Wilson::mult_yp2_thread(const int itask,
578  double *v2, const double *vcp2)
579  {
580  const int Nvc2 = 2 * m_Nvc;
581  const int Nvcd = m_Nvc * m_Nd;
582  const int Nvcd2 = Nvcd / 2;
583 
584  const int id1 = 0;
585  const int id2 = m_Nvc;
586  const int id3 = m_Nvc * 2;
587  const int id4 = m_Nvc * 3;
588 
589  const int idir = 1;
590  const int iy = m_Ny - 1;
591 
592  const int isite = m_arg[itask].isite;
593  const int isite_cp = m_arg[itask].isite_cp_y;
594 
595  double *w2 = &v2[Nvcd * isite];
596  const double *w1 = &vcp2[Nvcd2 * isite_cp];
597  const double *u = m_U->ptr(m_Ndf * (isite + idir * m_Nvol));
598 
599 
600  for (int it = 0; it < m_Mt; ++it) {
601  for (int iz = 0; iz < m_Mz; ++iz) {
602  for (int ix = 0; ix < m_Nx; ++ix) {
603  int is = ix + m_Nx * (iy + m_Ny * (iz + m_Nz * it));
604  int is2 = ix + m_Nx * (iz + m_Mz * it);
605  int iv = Nvcd * is;
606  int ig = m_Ndf * is;
607  int ix1 = Nvc2 * is2;
608  int ix2 = ix1 + m_Nvc;
609 
610  for (int ic = 0; ic < m_Nc; ++ic) {
611  int ic2 = ic * m_Nvc;
612 
613  double wt1r = mult_uv_r(&u[ic2 + ig], &w1[ix1], m_Nc);
614  double wt1i = mult_uv_i(&u[ic2 + ig], &w1[ix1], m_Nc);
615  double wt2r = mult_uv_r(&u[ic2 + ig], &w1[ix2], m_Nc);
616  double wt2i = mult_uv_i(&u[ic2 + ig], &w1[ix2], m_Nc);
617 
618  int ic_r = 2 * ic;
619  int ic_i = 2 * ic + 1;
620 
621  w2[ic_r + id1 + iv] += wt1r;
622  w2[ic_i + id1 + iv] += wt1i;
623  w2[ic_r + id2 + iv] += wt2r;
624  w2[ic_i + id2 + iv] += wt2i;
625 
626  w2[ic_r + id3 + iv] -= wt2r;
627  w2[ic_i + id3 + iv] -= wt2i;
628  w2[ic_r + id4 + iv] += wt1r;
629  w2[ic_i + id4 + iv] += wt1i;
630  }
631  }
632  }
633  }
634  }
635 
636 
637 //====================================================================
638  void Fopr_Wilson::mult_ypb_thread(const int itask,
639  double *v2, const double *v1)
640  {
641  const int Nvcd = m_Nvc * m_Nd;
642 
643  const int id1 = 0;
644  const int id2 = m_Nvc;
645  const int id3 = m_Nvc * 2;
646  const int id4 = m_Nvc * 3;
647 
648  const int idir = 1;
649 
650  const int isite = m_arg[itask].isite;
651 
652  double *w2 = &v2[Nvcd * isite];
653  const double *w1 = &v1[Nvcd * isite];
654  const double *u = m_U->ptr(m_Ndf * (isite + idir * m_Nvol));
655 
656 
657  for (int it = 0; it < m_Mt; ++it) {
658  for (int iz = 0; iz < m_Mz; ++iz) {
659  for (int iy = 0; iy < m_Ny - 1; ++iy) {
660  for (int ix = 0; ix < m_Nx; ++ix) {
661  int is = ix + m_Nx * (iy + m_Ny * (iz + m_Nz * it));
662  int iv = Nvcd * is;
663  int in = Nvcd * (is + m_Nx);
664  int ig = m_Ndf * is;
665 
666  double vt1[m_Nvc], vt2[m_Nvc];
667 
668  for (int ic = 0; ic < m_Nc; ++ic) {
669  int ic_r = 2 * ic;
670  int ic_i = 2 * ic + 1;
671 
672  vt1[ic_r] = w1[ic_r + id1 + in] + w1[ic_r + id4 + in];
673  vt1[ic_i] = w1[ic_i + id1 + in] + w1[ic_i + id4 + in];
674  vt2[ic_r] = w1[ic_r + id2 + in] - w1[ic_r + id3 + in];
675  vt2[ic_i] = w1[ic_i + id2 + in] - w1[ic_i + id3 + in];
676  }
677 
678  for (int ic = 0; ic < m_Nc; ++ic) {
679  int ic2 = ic * m_Nvc;
680 
681  double wt1r = mult_uv_r(&u[ic2 + ig], vt1, m_Nc);
682  double wt1i = mult_uv_i(&u[ic2 + ig], vt1, m_Nc);
683  double wt2r = mult_uv_r(&u[ic2 + ig], vt2, m_Nc);
684  double wt2i = mult_uv_i(&u[ic2 + ig], vt2, m_Nc);
685 
686  int ic_r = 2 * ic;
687  int ic_i = 2 * ic + 1;
688 
689  w2[ic_r + id1 + iv] += wt1r;
690  w2[ic_i + id1 + iv] += wt1i;
691  w2[ic_r + id2 + iv] += wt2r;
692  w2[ic_i + id2 + iv] += wt2i;
693 
694  w2[ic_r + id3 + iv] -= wt2r;
695  w2[ic_i + id3 + iv] -= wt2i;
696  w2[ic_r + id4 + iv] += wt1r;
697  w2[ic_i + id4 + iv] += wt1i;
698  }
699  }
700  }
701  }
702  }
703  }
704 
705 
706 //====================================================================
707  void Fopr_Wilson::mult_ym1_thread(const int itask,
708  double *vcp1, const double *v1)
709  {
710  const int Nvc2 = 2 * m_Nvc;
711  const int Nvcd = m_Nvc * m_Nd;
712  const int Nvcd2 = Nvcd / 2;
713 
714  const int id1 = 0;
715  const int id2 = m_Nvc;
716  const int id3 = m_Nvc * 2;
717  const int id4 = m_Nvc * 3;
718 
719  const int idir = 1;
720  const int iy = m_Ny - 1;
721 
722  const int isite = m_arg[itask].isite;
723  const int isite_cp = m_arg[itask].isite_cp_y;
724 
725  double *w2 = &vcp1[Nvcd2 * isite_cp];
726  const double *w1 = &v1[Nvcd * isite];
727  const double *u = m_U->ptr(m_Ndf * (isite + idir * m_Nvol));
728 
729 
730  for (int it = 0; it < m_Mt; ++it) {
731  for (int iz = 0; iz < m_Mz; ++iz) {
732  for (int ix = 0; ix < m_Nx; ++ix) {
733  int is = ix + m_Nx * (iy + m_Ny * (iz + m_Nz * it));
734  int is2 = ix + m_Nx * (iz + m_Mz * it);
735  int in = Nvcd * is;
736  int ig = m_Ndf * is;
737  int ix1 = Nvc2 * is2;
738  int ix2 = ix1 + m_Nvc;
739 
740  double vt1[m_Nvc], vt2[m_Nvc];
741 
742  for (int ic = 0; ic < m_Nc; ++ic) {
743  int ic_r = 2 * ic;
744  int ic_i = 2 * ic + 1;
745 
746  vt1[ic_r] = w1[ic_r + id1 + in] - w1[ic_r + id4 + in];
747  vt1[ic_i] = w1[ic_i + id1 + in] - w1[ic_i + id4 + in];
748  vt2[ic_r] = w1[ic_r + id2 + in] + w1[ic_r + id3 + in];
749  vt2[ic_i] = w1[ic_i + id2 + in] + w1[ic_i + id3 + in];
750  }
751 
752  for (int ic = 0; ic < m_Nc; ++ic) {
753  int ic2 = 2 * ic;
754 
755  int ic_r = 2 * ic;
756  int ic_i = 2 * ic + 1;
757 
758  w2[ic_r + ix1] = mult_udagv_r(&u[ic2 + ig], vt1, m_Nc);
759  w2[ic_i + ix1] = mult_udagv_i(&u[ic2 + ig], vt1, m_Nc);
760  w2[ic_r + ix2] = mult_udagv_r(&u[ic2 + ig], vt2, m_Nc);
761  w2[ic_i + ix2] = mult_udagv_i(&u[ic2 + ig], vt2, m_Nc);
762  }
763  }
764  }
765  }
766  }
767 
768 
769 //====================================================================
770  void Fopr_Wilson::mult_ym2_thread(const int itask,
771  double *v2, const double *vcp2)
772  {
773  const int Nvc2 = 2 * m_Nvc;
774  const int Nvcd = m_Nvc * m_Nd;
775  const int Nvcd2 = Nvcd / 2;
776 
777  const int id1 = 0;
778  const int id2 = m_Nvc;
779  const int id3 = m_Nvc * 2;
780  const int id4 = m_Nvc * 3;
781 
782  const int idir = 1;
783  const int iy = 0;
784  const double bc2 = m_boundary_each_node[idir];
785 
786  const int isite = m_arg[itask].isite;
787  const int isite_cp = m_arg[itask].isite_cp_y;
788 
789  double *w2 = &v2[Nvcd * isite];
790  const double *w1 = &vcp2[Nvcd2 * isite_cp];
791 
792 
793  for (int it = 0; it < m_Mt; ++it) {
794  for (int iz = 0; iz < m_Mz; ++iz) {
795  for (int ix = 0; ix < m_Nx; ++ix) {
796  int is = ix + m_Nx * (iy + m_Ny * (iz + m_Nz * it));
797  int is2 = ix + m_Nx * (iz + m_Mz * it);
798  int iv = Nvcd * is;
799  int ix1 = Nvc2 * is2;
800  int ix2 = ix1 + m_Nvc;
801 
802  for (int ic = 0; ic < m_Nc; ++ic) {
803  int ic_r = 2 * ic;
804  int ic_i = 2 * ic + 1;
805 
806  w2[ic_r + id1 + iv] += bc2 * w1[ic_r + ix1];
807  w2[ic_i + id1 + iv] += bc2 * w1[ic_i + ix1];
808  w2[ic_r + id2 + iv] += bc2 * w1[ic_r + ix2];
809  w2[ic_i + id2 + iv] += bc2 * w1[ic_i + ix2];
810 
811  w2[ic_r + id3 + iv] += bc2 * w1[ic_r + ix2];
812  w2[ic_i + id3 + iv] += bc2 * w1[ic_i + ix2];
813  w2[ic_r + id4 + iv] -= bc2 * w1[ic_r + ix1];
814  w2[ic_i + id4 + iv] -= bc2 * w1[ic_i + ix1];
815  }
816  }
817  }
818  }
819  }
820 
821 
822 //====================================================================
823  void Fopr_Wilson::mult_ymb_thread(const int itask,
824  double *v2, const double *v1)
825  {
826  const int Nvcd = m_Nvc * m_Nd;
827 
828  const int id1 = 0;
829  const int id2 = m_Nvc;
830  const int id3 = m_Nvc * 2;
831  const int id4 = m_Nvc * 3;
832 
833  const int idir = 1;
834 
835  const int isite = m_arg[itask].isite;
836 
837  double *w2 = &v2[Nvcd * isite];
838  const double *w1 = &v1[Nvcd * isite];
839  const double *u = m_U->ptr(m_Ndf * (isite + idir * m_Nvol));
840 
841 
842  for (int it = 0; it < m_Mt; ++it) {
843  for (int iz = 0; iz < m_Mz; ++iz) {
844  for (int iy = 1; iy < m_Ny; ++iy) {
845  for (int ix = 0; ix < m_Nx; ++ix) {
846  int is = ix + m_Nx * (iy + m_Ny * (iz + m_Nz * it));
847  int iv = Nvcd * is;
848  int in = Nvcd * (is - m_Nx);
849  int ig = m_Ndf * (is - m_Nx);
850 
851  double vt1[m_Nvc], vt2[m_Nvc];
852 
853  for (int ic = 0; ic < m_Nc; ++ic) {
854  int ic_r = 2 * ic;
855  int ic_i = 2 * ic + 1;
856 
857  vt1[ic_r] = w1[ic_r + id1 + in] - w1[ic_r + id4 + in];
858  vt1[ic_i] = w1[ic_i + id1 + in] - w1[ic_i + id4 + in];
859  vt2[ic_r] = w1[ic_r + id2 + in] + w1[ic_r + id3 + in];
860  vt2[ic_i] = w1[ic_i + id2 + in] + w1[ic_i + id3 + in];
861  }
862 
863  for (int ic = 0; ic < m_Nc; ++ic) {
864  int ic2 = 2 * ic;
865 
866  double wt1r = mult_udagv_r(&u[ic2 + ig], vt1, m_Nc);
867  double wt1i = mult_udagv_i(&u[ic2 + ig], vt1, m_Nc);
868  double wt2r = mult_udagv_r(&u[ic2 + ig], vt2, m_Nc);
869  double wt2i = mult_udagv_i(&u[ic2 + ig], vt2, m_Nc);
870 
871  int ic_r = 2 * ic;
872  int ic_i = 2 * ic + 1;
873 
874  w2[ic_r + id1 + iv] += wt1r;
875  w2[ic_i + id1 + iv] += wt1i;
876  w2[ic_r + id2 + iv] += wt2r;
877  w2[ic_i + id2 + iv] += wt2i;
878 
879  w2[ic_r + id3 + iv] += wt2r;
880  w2[ic_i + id3 + iv] += wt2i;
881  w2[ic_r + id4 + iv] -= wt1r;
882  w2[ic_i + id4 + iv] -= wt1i;
883  }
884  }
885  }
886  }
887  }
888  }
889 
890 
891 //====================================================================
892  void Fopr_Wilson::mult_zp1_thread(const int itask,
893  double *vcp1, const double *v1)
894  {
895  const int Nvc2 = 2 * m_Nvc;
896  const int Nvcd = m_Nvc * m_Nd;
897  const int Nvcd2 = Nvcd / 2;
898 
899  const int id1 = 0;
900  const int id2 = m_Nvc;
901  const int id3 = m_Nvc * 2;
902  const int id4 = m_Nvc * 3;
903 
904  const int idir = 2;
905  const double bc2 = m_boundary_each_node[idir];
906 
907  const int isite = m_arg[itask].isite;
908  const int isite_cp = m_arg[itask].isite_cp_z;
909 
910  double *w2 = &vcp1[Nvcd2 * isite_cp];
911  const double *w1 = &v1[Nvcd * isite];
912 
913 
914  if (m_arg[itask].kz0 == 1) {
915  const int Nxy = m_Nx * m_Ny;
916  const int iz = 0;
917 
918  for (int it = 0; it < m_Mt; ++it) {
919  for (int ixy = 0; ixy < Nxy; ++ixy) {
920  int is = ixy + Nxy * (iz + m_Nz * it);
921  int is2 = ixy + Nxy * it;
922  int in = Nvcd * is;
923  int ix1 = Nvc2 * is2;
924  int ix2 = ix1 + m_Nvc;
925 
926  for (int ic = 0; ic < m_Nc; ++ic) {
927  int ic_r = 2 * ic;
928  int ic_i = 2 * ic + 1;
929 
930  w2[ic_r + ix1] = bc2 * (w1[ic_r + id1 + in] - w1[ic_i + id3 + in]);
931  w2[ic_i + ix1] = bc2 * (w1[ic_i + id1 + in] + w1[ic_r + id3 + in]);
932  w2[ic_r + ix2] = bc2 * (w1[ic_r + id2 + in] + w1[ic_i + id4 + in]);
933  w2[ic_i + ix2] = bc2 * (w1[ic_i + id2 + in] - w1[ic_r + id4 + in]);
934  }
935  }
936  }
937  }
938  }
939 
940 
941 //====================================================================
942  void Fopr_Wilson::mult_zp2_thread(const int itask,
943  double *v2, const double *vcp2)
944  {
945  const int Nvc2 = 2 * m_Nvc;
946  const int Nvcd = m_Nvc * m_Nd;
947  const int Nvcd2 = Nvcd / 2;
948 
949  const int id1 = 0;
950  const int id2 = m_Nvc;
951  const int id3 = m_Nvc * 2;
952  const int id4 = m_Nvc * 3;
953 
954  const int idir = 2;
955 
956  const int isite = m_arg[itask].isite;
957  const int isite_cp = m_arg[itask].isite_cp_z;
958 
959  double *w2 = &v2[Nvcd * isite];
960  const double *w1 = &vcp2[Nvcd2 * isite_cp];
961  const double *u = m_U->ptr(m_Ndf * (isite + idir * m_Nvol));
962 
963 
964  if (m_arg[itask].kz1 == 1) {
965  const int Nxy = m_Nx * m_Ny;
966  const int iz = m_Mz - 1;
967 
968  for (int it = 0; it < m_Mt; ++it) {
969  for (int ixy = 0; ixy < Nxy; ++ixy) {
970  int is = ixy + Nxy * (iz + m_Nz * it);
971  int is2 = ixy + Nxy * it;
972  int iv = Nvcd * is;
973  int ig = m_Ndf * is;
974  int ix1 = Nvc2 * is2;
975  int ix2 = ix1 + m_Nvc;
976 
977  for (int ic = 0; ic < m_Nc; ++ic) {
978  int ic2 = ic * m_Nvc;
979 
980  double wt1r = mult_uv_r(&u[ic2 + ig], &w1[ix1], m_Nc);
981  double wt1i = mult_uv_i(&u[ic2 + ig], &w1[ix1], m_Nc);
982  double wt2r = mult_uv_r(&u[ic2 + ig], &w1[ix2], m_Nc);
983  double wt2i = mult_uv_i(&u[ic2 + ig], &w1[ix2], m_Nc);
984 
985  int ic_r = 2 * ic;
986  int ic_i = 2 * ic + 1;
987 
988  w2[ic_r + id1 + iv] += wt1r;
989  w2[ic_i + id1 + iv] += wt1i;
990  w2[ic_r + id2 + iv] += wt2r;
991  w2[ic_i + id2 + iv] += wt2i;
992 
993  w2[ic_r + id3 + iv] += wt1i;
994  w2[ic_i + id3 + iv] -= wt1r;
995  w2[ic_r + id4 + iv] -= wt2i;
996  w2[ic_i + id4 + iv] += wt2r;
997  }
998  }
999  }
1000  }
1001  }
1002 
1003 
1004 //====================================================================
1005  void Fopr_Wilson::mult_zpb_thread(const int itask,
1006  double *v2, const double *v1)
1007  {
1008  const int Nvcd = m_Nvc * m_Nd;
1009  const int Nxy = m_Nx * m_Ny;
1010 
1011  const int id1 = 0;
1012  const int id2 = m_Nvc;
1013  const int id3 = m_Nvc * 2;
1014  const int id4 = m_Nvc * 3;
1015 
1016  const int idir = 2;
1017 
1018  const int isite = m_arg[itask].isite;
1019  const int kz1 = m_arg[itask].kz1;
1020 
1021  double *w2 = &v2[Nvcd * isite];
1022  const double *w1 = &v1[Nvcd * isite];
1023  const double *u = m_U->ptr(m_Ndf * (isite + idir * m_Nvol));
1024 
1025 
1026  for (int it = 0; it < m_Mt; ++it) {
1027  for (int iz = 0; iz < m_Mz - kz1; ++iz) {
1028  for (int ixy = 0; ixy < Nxy; ++ixy) {
1029  int is = ixy + Nxy * (iz + m_Nz * it);
1030  int iv = Nvcd * is;
1031  int in = Nvcd * (is + Nxy);
1032  int ig = m_Ndf * is;
1033 
1034  double vt1[m_Nvc], vt2[m_Nvc];
1035 
1036  for (int ic = 0; ic < m_Nc; ++ic) {
1037  int ic_r = 2 * ic;
1038  int ic_i = 2 * ic + 1;
1039 
1040  vt1[ic_r] = w1[ic_r + id1 + in] - w1[ic_i + id3 + in];
1041  vt1[ic_i] = w1[ic_i + id1 + in] + w1[ic_r + id3 + in];
1042  vt2[ic_r] = w1[ic_r + id2 + in] + w1[ic_i + id4 + in];
1043  vt2[ic_i] = w1[ic_i + id2 + in] - w1[ic_r + id4 + in];
1044  }
1045 
1046  for (int ic = 0; ic < m_Nc; ++ic) {
1047  int ic2 = ic * m_Nvc;
1048 
1049  double wt1r = mult_uv_r(&u[ic2 + ig], vt1, m_Nc);
1050  double wt1i = mult_uv_i(&u[ic2 + ig], vt1, m_Nc);
1051  double wt2r = mult_uv_r(&u[ic2 + ig], vt2, m_Nc);
1052  double wt2i = mult_uv_i(&u[ic2 + ig], vt2, m_Nc);
1053 
1054  int ic_r = 2 * ic;
1055  int ic_i = 2 * ic + 1;
1056 
1057  w2[ic_r + id1 + iv] += wt1r;
1058  w2[ic_i + id1 + iv] += wt1i;
1059  w2[ic_r + id2 + iv] += wt2r;
1060  w2[ic_i + id2 + iv] += wt2i;
1061 
1062  w2[ic_r + id3 + iv] += wt1i;
1063  w2[ic_i + id3 + iv] -= wt1r;
1064  w2[ic_r + id4 + iv] -= wt2i;
1065  w2[ic_i + id4 + iv] += wt2r;
1066  }
1067  }
1068  }
1069  }
1070  }
1071 
1072 
1073 //====================================================================
1074  void Fopr_Wilson::mult_zm1_thread(const int itask,
1075  double *vcp1, const double *v1)
1076  {
1077  const int Nvc2 = 2 * m_Nvc;
1078  const int Nvcd = m_Nvc * m_Nd;
1079  const int Nvcd2 = Nvcd / 2;
1080 
1081  const int id1 = 0;
1082  const int id2 = m_Nvc;
1083  const int id3 = m_Nvc * 2;
1084  const int id4 = m_Nvc * 3;
1085 
1086  const int idir = 2;
1087 
1088  const int isite = m_arg[itask].isite;
1089  const int isite_cp = m_arg[itask].isite_cp_z;
1090 
1091  double *w2 = &vcp1[Nvcd2 * isite_cp];
1092  const double *w1 = &v1[Nvcd * isite];
1093  const double *u = m_U->ptr(m_Ndf * (isite + idir * m_Nvol));
1094 
1095 
1096  if (m_arg[itask].kz1 == 1) {
1097  const int Nxy = m_Nx * m_Ny;
1098  const int iz = m_Mz - 1;
1099 
1100  for (int it = 0; it < m_Mt; ++it) {
1101  for (int ixy = 0; ixy < Nxy; ++ixy) {
1102  int is = ixy + Nxy * (iz + m_Nz * it);
1103  int is2 = ixy + Nxy * it;
1104  int in = Nvcd * is;
1105  int ig = m_Ndf * is;
1106  int ix1 = Nvc2 * is2;
1107  int ix2 = ix1 + m_Nvc;
1108 
1109  double vt1[m_Nvc], vt2[m_Nvc];
1110 
1111  for (int ic = 0; ic < m_Nc; ++ic) {
1112  int ic_r = 2 * ic;
1113  int ic_i = 2 * ic + 1;
1114 
1115  vt1[ic_r] = w1[ic_r + id1 + in] + w1[ic_i + id3 + in];
1116  vt1[ic_i] = w1[ic_i + id1 + in] - w1[ic_r + id3 + in];
1117  vt2[ic_r] = w1[ic_r + id2 + in] - w1[ic_i + id4 + in];
1118  vt2[ic_i] = w1[ic_i + id2 + in] + w1[ic_r + id4 + in];
1119  }
1120 
1121  for (int ic = 0; ic < m_Nc; ++ic) {
1122  int ic2 = 2 * ic;
1123 
1124  int ic_r = 2 * ic;
1125  int ic_i = 2 * ic + 1;
1126 
1127  w2[ic_r + ix1] = mult_udagv_r(&u[ic2 + ig], vt1, m_Nc);
1128  w2[ic_i + ix1] = mult_udagv_i(&u[ic2 + ig], vt1, m_Nc);
1129  w2[ic_r + ix2] = mult_udagv_r(&u[ic2 + ig], vt2, m_Nc);
1130  w2[ic_i + ix2] = mult_udagv_i(&u[ic2 + ig], vt2, m_Nc);
1131  }
1132  }
1133  }
1134  }
1135  }
1136 
1137 
1138 //====================================================================
1139  void Fopr_Wilson::mult_zm2_thread(const int itask,
1140  double *v2, const double *vcp2)
1141  {
1142  const int Nvc2 = 2 * m_Nvc;
1143  const int Nvcd = m_Nvc * m_Nd;
1144  const int Nvcd2 = Nvcd / 2;
1145 
1146  const int id1 = 0;
1147  const int id2 = m_Nvc;
1148  const int id3 = m_Nvc * 2;
1149  const int id4 = m_Nvc * 3;
1150 
1151  const int idir = 2;
1152  const double bc2 = m_boundary_each_node[idir];
1153 
1154  const int isite = m_arg[itask].isite;
1155  const int isite_cp = m_arg[itask].isite_cp_z;
1156 
1157  double *w2 = &v2[Nvcd * isite];
1158  const double *w1 = &vcp2[Nvcd2 * isite_cp];
1159 
1160 
1161  if (m_arg[itask].kz0 == 1) {
1162  const int Nxy = m_Nx * m_Ny;
1163  const int iz = 0;
1164 
1165  for (int it = 0; it < m_Mt; ++it) {
1166  for (int ixy = 0; ixy < Nxy; ++ixy) {
1167  int is = ixy + Nxy * (iz + m_Nz * it);
1168  int is2 = ixy + Nxy * it;
1169  int iv = Nvcd * is;
1170  int ix1 = Nvc2 * is2;
1171  int ix2 = ix1 + m_Nvc;
1172 
1173  for (int ic = 0; ic < m_Nc; ++ic) {
1174  int ic_r = 2 * ic;
1175  int ic_i = 2 * ic + 1;
1176 
1177  w2[ic_r + id1 + iv] += bc2 * w1[ic_r + ix1];
1178  w2[ic_i + id1 + iv] += bc2 * w1[ic_i + ix1];
1179  w2[ic_r + id2 + iv] += bc2 * w1[ic_r + ix2];
1180  w2[ic_i + id2 + iv] += bc2 * w1[ic_i + ix2];
1181 
1182  w2[ic_r + id3 + iv] -= bc2 * w1[ic_i + ix1];
1183  w2[ic_i + id3 + iv] += bc2 * w1[ic_r + ix1];
1184  w2[ic_r + id4 + iv] += bc2 * w1[ic_i + ix2];
1185  w2[ic_i + id4 + iv] -= bc2 * w1[ic_r + ix2];
1186  }
1187  }
1188  }
1189  }
1190  }
1191 
1192 
1193 //====================================================================
1194  void Fopr_Wilson::mult_zmb_thread(const int itask,
1195  double *v2, const double *v1)
1196  {
1197  const int Nvcd = m_Nvc * m_Nd;
1198  const int Nxy = m_Nx * m_Ny;
1199 
1200  const int id1 = 0;
1201  const int id2 = m_Nvc;
1202  const int id3 = m_Nvc * 2;
1203  const int id4 = m_Nvc * 3;
1204 
1205  const int idir = 2;
1206 
1207  const int isite = m_arg[itask].isite;
1208  const int kz0 = m_arg[itask].kz0;
1209 
1210  double *w2 = &v2[Nvcd * isite];
1211  const double *w1 = &v1[Nvcd * isite];
1212  const double *u = m_U->ptr(m_Ndf * (isite + idir * m_Nvol));
1213 
1214 
1215  for (int it = 0; it < m_Mt; ++it) {
1216  for (int iz = kz0; iz < m_Mz; ++iz) {
1217  for (int ixy = 0; ixy < Nxy; ++ixy) {
1218  int is = ixy + Nxy * (iz + m_Nz * it);
1219  int iv = Nvcd * is;
1220  int in = Nvcd * (is - Nxy);
1221  int ig = m_Ndf * (is - Nxy);
1222 
1223  double vt1[m_Nvc], vt2[m_Nvc];
1224 
1225  for (int ic = 0; ic < m_Nc; ++ic) {
1226  int ic_r = 2 * ic;
1227  int ic_i = 2 * ic + 1;
1228 
1229  vt1[ic_r] = w1[ic_r + id1 + in] + w1[ic_i + id3 + in];
1230  vt1[ic_i] = w1[ic_i + id1 + in] - w1[ic_r + id3 + in];
1231  vt2[ic_r] = w1[ic_r + id2 + in] - w1[ic_i + id4 + in];
1232  vt2[ic_i] = w1[ic_i + id2 + in] + w1[ic_r + id4 + in];
1233  }
1234 
1235  for (int ic = 0; ic < m_Nc; ++ic) {
1236  int ic2 = 2 * ic;
1237 
1238  double wt1r = mult_udagv_r(&u[ic2 + ig], vt1, m_Nc);
1239  double wt1i = mult_udagv_i(&u[ic2 + ig], vt1, m_Nc);
1240  double wt2r = mult_udagv_r(&u[ic2 + ig], vt2, m_Nc);
1241  double wt2i = mult_udagv_i(&u[ic2 + ig], vt2, m_Nc);
1242 
1243  int ic_r = 2 * ic;
1244  int ic_i = 2 * ic + 1;
1245 
1246  w2[ic_r + id1 + iv] += wt1r;
1247  w2[ic_i + id1 + iv] += wt1i;
1248  w2[ic_r + id2 + iv] += wt2r;
1249  w2[ic_i + id2 + iv] += wt2i;
1250 
1251  w2[ic_r + id3 + iv] -= wt1i;
1252  w2[ic_i + id3 + iv] += wt1r;
1253  w2[ic_r + id4 + iv] += wt2i;
1254  w2[ic_i + id4 + iv] -= wt2r;
1255  }
1256  }
1257  }
1258  }
1259  }
1260 
1261 
1262 //====================================================================
1264  double *vcp1, const double *v1)
1265  {
1266  const int Nvc2 = 2 * m_Nvc;
1267  const int Nvcd = m_Nvc * m_Nd;
1268  const int Nvcd2 = Nvcd / 2;
1269 
1270  const int id3 = m_Nvc * 2;
1271  const int id4 = m_Nvc * 3;
1272 
1273  const int idir = 3;
1274  const double bc2 = m_boundary_each_node[idir];
1275 
1276  const int isite = m_arg[itask].isite;
1277  const int isite_cp = m_arg[itask].isite_cp_t;
1278 
1279  double *w2 = &vcp1[Nvcd2 * isite_cp];
1280  const double *w1 = &v1[Nvcd * isite];
1281 
1282 
1283  if (m_arg[itask].kt0 == 1) {
1284  const int Nxy = m_Nx * m_Ny;
1285  const int it = 0;
1286 
1287  for (int iz = 0; iz < m_Mz; ++iz) {
1288  for (int ixy = 0; ixy < Nxy; ++ixy) {
1289  int is = ixy + Nxy * (iz + m_Nz * it);
1290  int is2 = ixy + Nxy * iz;
1291  int in = Nvcd * is;
1292  int ix1 = Nvc2 * is2;
1293  int ix2 = ix1 + m_Nvc;
1294 
1295  for (int ic = 0; ic < m_Nc; ++ic) {
1296  int ic_r = 2 * ic;
1297  int ic_i = 2 * ic + 1;
1298 
1299  w2[ic_r + ix1] = 2.0 * bc2 * w1[ic_r + id3 + in];
1300  w2[ic_i + ix1] = 2.0 * bc2 * w1[ic_i + id3 + in];
1301  w2[ic_r + ix2] = 2.0 * bc2 * w1[ic_r + id4 + in];
1302  w2[ic_i + ix2] = 2.0 * bc2 * w1[ic_i + id4 + in];
1303  }
1304  }
1305  }
1306  }
1307  }
1308 
1309 
1310 //====================================================================
1312  double *v2, const double *vcp2)
1313  {
1314  const int Nvc2 = 2 * m_Nvc;
1315  const int Nvcd = m_Nvc * m_Nd;
1316  const int Nvcd2 = Nvcd / 2;
1317 
1318  const int id3 = m_Nvc * 2;
1319  const int id4 = m_Nvc * 3;
1320 
1321  const int idir = 3;
1322 
1323  const int isite = m_arg[itask].isite;
1324  const int isite_cp = m_arg[itask].isite_cp_t;
1325 
1326  double *w2 = &v2[Nvcd * isite];
1327  const double *w1 = &vcp2[Nvcd2 * isite_cp];
1328  const double *u = m_U->ptr(m_Ndf * (isite + idir * m_Nvol));
1329 
1330 
1331  if (m_arg[itask].kt1 == 1) {
1332  const int Nxy = m_Nx * m_Ny;
1333  const int it = m_Mt - 1;
1334 
1335  for (int iz = 0; iz < m_Mz; ++iz) {
1336  for (int ixy = 0; ixy < Nxy; ++ixy) {
1337  int is = ixy + Nxy * (iz + m_Nz * it);
1338  int is2 = ixy + Nxy * iz;
1339  int iv = Nvcd * is;
1340  int ig = m_Ndf * is;
1341  int ix1 = Nvc2 * is2;
1342  int ix2 = ix1 + m_Nvc;
1343 
1344  for (int ic = 0; ic < m_Nc; ++ic) {
1345  int ic2 = ic * m_Nvc;
1346 
1347  int ic_r = 2 * ic;
1348  int ic_i = 2 * ic + 1;
1349 
1350  w2[ic_r + id3 + iv] += mult_uv_r(&u[ic2 + ig], &w1[ix1], m_Nc);
1351  w2[ic_i + id3 + iv] += mult_uv_i(&u[ic2 + ig], &w1[ix1], m_Nc);
1352  w2[ic_r + id4 + iv] += mult_uv_r(&u[ic2 + ig], &w1[ix2], m_Nc);
1353  w2[ic_i + id4 + iv] += mult_uv_i(&u[ic2 + ig], &w1[ix2], m_Nc);
1354  }
1355  }
1356  }
1357  }
1358  }
1359 
1360 
1361 //====================================================================
1363  double *v2, const double *v1)
1364  {
1365  const int Nvcd = m_Nvc * m_Nd;
1366  const int Nxy = m_Nx * m_Ny;
1367  const int Nxyz = m_Nx * m_Ny * m_Nz;
1368 
1369  const int id3 = m_Nvc * 2;
1370  const int id4 = m_Nvc * 3;
1371 
1372  const int idir = 3;
1373 
1374  const int isite = m_arg[itask].isite;
1375  const int kt1 = m_arg[itask].kt1;
1376 
1377  double *w2 = &v2[Nvcd * isite];
1378  const double *w1 = &v1[Nvcd * isite];
1379  const double *u = m_U->ptr(m_Ndf * (isite + idir * m_Nvol));
1380 
1381  double vt1[m_Nvc], vt2[m_Nvc];
1382 
1383 
1384  for (int it = 0; it < m_Mt - kt1; ++it) {
1385  for (int iz = 0; iz < m_Mz; ++iz) {
1386  for (int ixy = 0; ixy < Nxy; ++ixy) {
1387  int is = ixy + Nxy * (iz + m_Nz * it);
1388  int iv = Nvcd * is;
1389  int in = Nvcd * (is + Nxyz);
1390  int ig = m_Ndf * is;
1391 
1392  for (int ic = 0; ic < m_Nc; ++ic) {
1393  int ic_r = 2 * ic;
1394  int ic_i = 2 * ic + 1;
1395 
1396  vt1[ic_r] = 2.0 * w1[ic_r + id3 + in];
1397  vt1[ic_i] = 2.0 * w1[ic_i + id3 + in];
1398  vt2[ic_r] = 2.0 * w1[ic_r + id4 + in];
1399  vt2[ic_i] = 2.0 * w1[ic_i + id4 + in];
1400  }
1401 
1402  for (int ic = 0; ic < m_Nc; ++ic) {
1403  int ic2 = ic * m_Nvc;
1404 
1405  int ic_r = 2 * ic;
1406  int ic_i = 2 * ic + 1;
1407 
1408  w2[ic_r + id3 + iv] += mult_uv_r(&u[ic2 + ig], vt1, m_Nc);
1409  w2[ic_i + id3 + iv] += mult_uv_i(&u[ic2 + ig], vt1, m_Nc);
1410  w2[ic_r + id4 + iv] += mult_uv_r(&u[ic2 + ig], vt2, m_Nc);
1411  w2[ic_i + id4 + iv] += mult_uv_i(&u[ic2 + ig], vt2, m_Nc);
1412  }
1413  }
1414  }
1415  }
1416  }
1417 
1418 
1419 //====================================================================
1421  double *vcp1, const double *v1)
1422  {
1423  const int Nvc2 = 2 * m_Nvc;
1424  const int Nvcd = m_Nvc * m_Nd;
1425  const int Nvcd2 = Nvcd / 2;
1426 
1427  const int id1 = 0;
1428  const int id2 = m_Nvc;
1429 
1430  const int idir = 3;
1431 
1432  const int isite = m_arg[itask].isite;
1433  const int isite_cp = m_arg[itask].isite_cp_t;
1434 
1435  double *w2 = &vcp1[Nvcd2 * isite_cp];
1436  const double *w1 = &v1[Nvcd * isite];
1437  const double *u = m_U->ptr(m_Ndf * (isite + idir * m_Nvol));
1438 
1439 
1440  if (m_arg[itask].kt1 == 1) {
1441  const int Nxy = m_Nx * m_Ny;
1442  const int it = m_Mt - 1;
1443 
1444  for (int iz = 0; iz < m_Mz; ++iz) {
1445  for (int ixy = 0; ixy < Nxy; ++ixy) {
1446  int is = ixy + Nxy * (iz + m_Nz * it);
1447  int is2 = ixy + Nxy * iz;
1448  int in = Nvcd * is;
1449  int ig = m_Ndf * is;
1450  int ix1 = Nvc2 * is2;
1451  int ix2 = ix1 + m_Nvc;
1452 
1453  double vt1[m_Nvc], vt2[m_Nvc];
1454 
1455  for (int ic = 0; ic < m_Nc; ++ic) {
1456  int ic_r = 2 * ic;
1457  int ic_i = 2 * ic + 1;
1458 
1459  vt1[ic_r] = 2.0 * w1[ic_r + id1 + in];
1460  vt1[ic_i] = 2.0 * w1[ic_i + id1 + in];
1461  vt2[ic_r] = 2.0 * w1[ic_r + id2 + in];
1462  vt2[ic_i] = 2.0 * w1[ic_i + id2 + in];
1463  }
1464 
1465  for (int ic = 0; ic < m_Nc; ++ic) {
1466  int ic2 = 2 * ic;
1467 
1468  int ic_r = 2 * ic;
1469  int ic_i = 2 * ic + 1;
1470 
1471  w2[ic_r + ix1] = mult_udagv_r(&u[ic2 + ig], vt1, m_Nc);
1472  w2[ic_i + ix1] = mult_udagv_i(&u[ic2 + ig], vt1, m_Nc);
1473  w2[ic_r + ix2] = mult_udagv_r(&u[ic2 + ig], vt2, m_Nc);
1474  w2[ic_i + ix2] = mult_udagv_i(&u[ic2 + ig], vt2, m_Nc);
1475  }
1476  }
1477  }
1478  }
1479  }
1480 
1481 
1482 //====================================================================
1484  double *v2, const double *vcp2)
1485  {
1486  const int Nvc2 = 2 * m_Nvc;
1487  const int Nvcd = m_Nvc * m_Nd;
1488  const int Nvcd2 = Nvcd / 2;
1489 
1490  const int id1 = 0;
1491  const int id2 = m_Nvc;
1492 
1493  const int idir = 3;
1494  const double bc2 = m_boundary_each_node[idir];
1495 
1496  const int isite = m_arg[itask].isite;
1497  const int isite_cp = m_arg[itask].isite_cp_t;
1498 
1499  double *w2 = &v2[Nvcd * isite];
1500  const double *w1 = &vcp2[Nvcd2 * isite_cp];
1501 
1502 
1503  if (m_arg[itask].kt0 == 1) {
1504  const int Nxy = m_Nx * m_Ny;
1505  const int it = 0;
1506 
1507  for (int iz = 0; iz < m_Mz; ++iz) {
1508  for (int ixy = 0; ixy < Nxy; ++ixy) {
1509  int is = ixy + Nxy * (iz + m_Nz * it);
1510  int is2 = ixy + Nxy * iz;
1511  int iv = Nvcd * is;
1512  int ix1 = Nvc2 * is2;
1513  int ix2 = ix1 + m_Nvc;
1514 
1515  for (int ic = 0; ic < m_Nc; ++ic) {
1516  int ic_r = 2 * ic;
1517  int ic_i = 2 * ic + 1;
1518 
1519  w2[ic_r + id1 + iv] += bc2 * w1[ic_r + ix1];
1520  w2[ic_i + id1 + iv] += bc2 * w1[ic_i + ix1];
1521  w2[ic_r + id2 + iv] += bc2 * w1[ic_r + ix2];
1522  w2[ic_i + id2 + iv] += bc2 * w1[ic_i + ix2];
1523  }
1524  }
1525  }
1526  }
1527  }
1528 
1529 
1530 //====================================================================
1532  double *v2, const double *v1)
1533  {
1534  const int Nvcd = m_Nvc * m_Nd;
1535  const int Nxy = m_Nx * m_Ny;
1536  const int Nxyz = Nxy * m_Nz;
1537 
1538  const int id1 = 0;
1539  const int id2 = m_Nvc;
1540 
1541  const int idir = 3;
1542 
1543  const int isite = m_arg[itask].isite;
1544  const int kt0 = m_arg[itask].kt0;
1545 
1546  double *w2 = &v2[Nvcd * isite];
1547  const double *w1 = &v1[Nvcd * isite];
1548  const double *u = m_U->ptr(m_Ndf * (isite + idir * m_Nvol));
1549 
1550 
1551  for (int it = kt0; it < m_Mt; ++it) {
1552  for (int iz = 0; iz < m_Mz; ++iz) {
1553  for (int ixy = 0; ixy < Nxy; ++ixy) {
1554  int is = ixy + Nxy * (iz + m_Nz * it);
1555  int iv = Nvcd * is;
1556  int in = Nvcd * (is - Nxyz);
1557  int ig = m_Ndf * (is - Nxyz);
1558 
1559  double vt1[m_Nvc], vt2[m_Nvc];
1560 
1561  for (int ic = 0; ic < m_Nc; ++ic) {
1562  int ic_r = 2 * ic;
1563  int ic_i = 2 * ic + 1;
1564 
1565  vt1[ic_r] = 2.0 * w1[ic_r + id1 + in];
1566  vt1[ic_i] = 2.0 * w1[ic_i + id1 + in];
1567  vt2[ic_r] = 2.0 * w1[ic_r + id2 + in];
1568  vt2[ic_i] = 2.0 * w1[ic_i + id2 + in];
1569  }
1570 
1571  for (int ic = 0; ic < m_Nc; ++ic) {
1572  int ic2 = 2 * ic;
1573 
1574  int ic_r = 2 * ic;
1575  int ic_i = 2 * ic + 1;
1576 
1577  w2[ic_r + id1 + iv] += mult_udagv_r(&u[ic2 + ig], vt1, m_Nc);
1578  w2[ic_i + id1 + iv] += mult_udagv_i(&u[ic2 + ig], vt1, m_Nc);
1579  w2[ic_r + id2 + iv] += mult_udagv_r(&u[ic2 + ig], vt2, m_Nc);
1580  w2[ic_i + id2 + iv] += mult_udagv_i(&u[ic2 + ig], vt2, m_Nc);
1581  }
1582  }
1583  }
1584  }
1585  }
1586 
1587 
1588 //====================================================================
1590  double *vcp1, const double *v1)
1591  {
1592  const int Nvc2 = 2 * m_Nvc;
1593  const int Nvcd = m_Nvc * m_Nd;
1594  const int Nvcd2 = Nvcd / 2;
1595 
1596  const int id1 = 0;
1597  const int id2 = m_Nvc;
1598  const int id3 = m_Nvc * 2;
1599  const int id4 = m_Nvc * 3;
1600 
1601  const int idir = 3;
1602  const double bc2 = m_boundary_each_node[idir];
1603 
1604  const int isite = m_arg[itask].isite;
1605  const int isite_cp = m_arg[itask].isite_cp_t;
1606 
1607  double *w2 = &vcp1[Nvcd2 * isite_cp];
1608  const double *w1 = &v1[Nvcd * isite];
1609 
1610 
1611  if (m_arg[itask].kt0 == 1) {
1612  const int Nxy = m_Nx * m_Ny;
1613  const int it = 0;
1614 
1615  for (int iz = 0; iz < m_Mz; ++iz) {
1616  for (int ixy = 0; ixy < Nxy; ++ixy) {
1617  int is = ixy + Nxy * (iz + m_Nz * it);
1618  int is2 = ixy + Nxy * iz;
1619  int in = Nvcd * is;
1620  int ix1 = Nvc2 * is2;
1621  int ix2 = ix1 + m_Nvc;
1622 
1623  for (int ic = 0; ic < m_Nc; ++ic) {
1624  int ic_r = 2 * ic;
1625  int ic_i = 2 * ic + 1;
1626 
1627  w2[ic_r + ix1] = bc2 * (w1[ic_r + id1 + in] + w1[ic_r + id3 + in]);
1628  w2[ic_i + ix1] = bc2 * (w1[ic_i + id1 + in] + w1[ic_i + id3 + in]);
1629  w2[ic_r + ix2] = bc2 * (w1[ic_r + id2 + in] + w1[ic_r + id4 + in]);
1630  w2[ic_i + ix2] = bc2 * (w1[ic_i + id2 + in] + w1[ic_i + id4 + in]);
1631  }
1632  }
1633  }
1634  }
1635  }
1636 
1637 
1638 //====================================================================
1640  double *v2, const double *vcp2)
1641  {
1642  const int Nvc2 = 2 * m_Nvc;
1643  const int Nvcd = m_Nvc * m_Nd;
1644  const int Nvcd2 = Nvcd / 2;
1645 
1646  const int id1 = 0;
1647  const int id2 = m_Nvc;
1648  const int id3 = m_Nvc * 2;
1649  const int id4 = m_Nvc * 3;
1650 
1651  const int idir = 3;
1652 
1653  const int isite = m_arg[itask].isite;
1654  const int isite_cp = m_arg[itask].isite_cp_t;
1655 
1656  double *w2 = &v2[Nvcd * isite];
1657  const double *w1 = &vcp2[Nvcd2 * isite_cp];
1658  const double *u = m_U->ptr(m_Ndf * (isite + idir * m_Nvol));
1659 
1660 
1661  if (m_arg[itask].kt1 == 1) {
1662  const int Nxy = m_Nx * m_Ny;
1663  const int it = m_Mt - 1;
1664 
1665  for (int iz = 0; iz < m_Mz; ++iz) {
1666  for (int ixy = 0; ixy < Nxy; ++ixy) {
1667  int is = ixy + Nxy * (iz + m_Nz * it);
1668  int is2 = ixy + Nxy * iz;
1669  int iv = Nvcd * is;
1670  int ig = m_Ndf * is;
1671  int ix1 = Nvc2 * is2;
1672  int ix2 = ix1 + m_Nvc;
1673 
1674  for (int ic = 0; ic < m_Nc; ++ic) {
1675  int ic2 = ic * m_Nvc;
1676 
1677  double wt1r = mult_uv_r(&u[ic2 + ig], &w1[ix1], m_Nc);
1678  double wt1i = mult_uv_i(&u[ic2 + ig], &w1[ix1], m_Nc);
1679  double wt2r = mult_uv_r(&u[ic2 + ig], &w1[ix2], m_Nc);
1680  double wt2i = mult_uv_i(&u[ic2 + ig], &w1[ix2], m_Nc);
1681 
1682  int ic_r = 2 * ic;
1683  int ic_i = 2 * ic + 1;
1684 
1685  w2[ic_r + id1 + iv] += wt1r;
1686  w2[ic_i + id1 + iv] += wt1i;
1687  w2[ic_r + id2 + iv] += wt2r;
1688  w2[ic_i + id2 + iv] += wt2i;
1689 
1690  w2[ic_r + id3 + iv] += wt1r;
1691  w2[ic_i + id3 + iv] += wt1i;
1692  w2[ic_r + id4 + iv] += wt2r;
1693  w2[ic_i + id4 + iv] += wt2i;
1694  }
1695  }
1696  }
1697  }
1698  }
1699 
1700 
1701 //====================================================================
1703  double *v2, const double *v1)
1704  {
1705  const int Nvcd = m_Nvc * m_Nd;
1706  const int Nxy = m_Nx * m_Ny;
1707  const int Nxyz = m_Nx * m_Ny * m_Nz;
1708 
1709  const int id1 = 0;
1710  const int id2 = m_Nvc;
1711  const int id3 = m_Nvc * 2;
1712  const int id4 = m_Nvc * 3;
1713 
1714  const int idir = 3;
1715 
1716  const int isite = m_arg[itask].isite;
1717  const int kt1 = m_arg[itask].kt1;
1718 
1719  double *w2 = &v2[Nvcd * isite];
1720  const double *w1 = &v1[Nvcd * isite];
1721  const double *u = m_U->ptr(m_Ndf * (isite + idir * m_Nvol));
1722 
1723 
1724  for (int it = 0; it < m_Mt - kt1; ++it) {
1725  for (int iz = 0; iz < m_Mz; ++iz) {
1726  for (int ixy = 0; ixy < Nxy; ++ixy) {
1727  int is = ixy + Nxy * (iz + m_Nz * it);
1728  int iv = Nvcd * is;
1729  int in = Nvcd * (is + Nxyz);
1730  int ig = m_Ndf * is;
1731 
1732  double vt1[m_Nvc], vt2[m_Nvc];
1733 
1734  for (int ic = 0; ic < m_Nc; ++ic) {
1735  int ic_r = 2 * ic;
1736  int ic_i = 2 * ic + 1;
1737 
1738  vt1[ic_r] = w1[ic_r + id1 + in] + w1[ic_r + id3 + in];
1739  vt1[ic_i] = w1[ic_i + id1 + in] + w1[ic_i + id3 + in];
1740  vt2[ic_r] = w1[ic_r + id2 + in] + w1[ic_r + id4 + in];
1741  vt2[ic_i] = w1[ic_i + id2 + in] + w1[ic_i + id4 + in];
1742  }
1743 
1744  for (int ic = 0; ic < m_Nc; ++ic) {
1745  int ic2 = ic * m_Nvc;
1746 
1747  double wt1r = mult_uv_r(&u[ic2 + ig], vt1, m_Nc);
1748  double wt1i = mult_uv_i(&u[ic2 + ig], vt1, m_Nc);
1749  double wt2r = mult_uv_r(&u[ic2 + ig], vt2, m_Nc);
1750  double wt2i = mult_uv_i(&u[ic2 + ig], vt2, m_Nc);
1751 
1752  int ic_r = 2 * ic;
1753  int ic_i = 2 * ic + 1;
1754 
1755  w2[ic_r + id1 + iv] += wt1r;
1756  w2[ic_i + id1 + iv] += wt1i;
1757  w2[ic_r + id2 + iv] += wt2r;
1758  w2[ic_i + id2 + iv] += wt2i;
1759 
1760  w2[ic_r + id3 + iv] += wt1r;
1761  w2[ic_i + id3 + iv] += wt1i;
1762  w2[ic_r + id4 + iv] += wt2r;
1763  w2[ic_i + id4 + iv] += wt2i;
1764  }
1765  }
1766  }
1767  }
1768  }
1769 
1770 
1771 //====================================================================
1773  double *vcp1, const double *v1)
1774  {
1775  const int Nvc2 = 2 * m_Nvc;
1776  const int Nvcd = m_Nvc * m_Nd;
1777  const int Nvcd2 = Nvcd / 2;
1778 
1779  const int id1 = 0;
1780  const int id2 = m_Nvc;
1781  const int id3 = m_Nvc * 2;
1782  const int id4 = m_Nvc * 3;
1783 
1784  const int idir = 3;
1785 
1786  const int isite = m_arg[itask].isite;
1787  const int isite_cp = m_arg[itask].isite_cp_t;
1788 
1789  double *w2 = &vcp1[Nvcd2 * isite_cp];
1790  const double *w1 = &v1[Nvcd * isite];
1791  const double *u = m_U->ptr(m_Ndf * (isite + idir * m_Nvol));
1792 
1793 
1794  if (m_arg[itask].kt1 == 1) {
1795  const int Nxy = m_Nx * m_Ny;
1796  const int it = m_Mt - 1;
1797 
1798  for (int iz = 0; iz < m_Mz; ++iz) {
1799  for (int ixy = 0; ixy < Nxy; ++ixy) {
1800  int is = ixy + Nxy * (iz + m_Nz * it);
1801  int is2 = ixy + Nxy * iz;
1802  int in = Nvcd * is;
1803  int ig = m_Ndf * is;
1804  int ix1 = Nvc2 * is2;
1805  int ix2 = ix1 + m_Nvc;
1806 
1807  double vt1[m_Nvc], vt2[m_Nvc];
1808 
1809  for (int ic = 0; ic < m_Nc; ++ic) {
1810  int ic_r = 2 * ic;
1811  int ic_i = 2 * ic + 1;
1812 
1813  vt1[ic_r] = w1[ic_r + id1 + in] - w1[ic_r + id3 + in];
1814  vt1[ic_i] = w1[ic_i + id1 + in] - w1[ic_i + id3 + in];
1815  vt2[ic_r] = w1[ic_r + id2 + in] - w1[ic_r + id4 + in];
1816  vt2[ic_i] = w1[ic_i + id2 + in] - w1[ic_i + id4 + in];
1817  }
1818 
1819  for (int ic = 0; ic < m_Nc; ++ic) {
1820  int ic2 = 2 * ic;
1821 
1822  int ic_r = 2 * ic;
1823  int ic_i = 2 * ic + 1;
1824 
1825  w2[ic_r + ix1] = mult_udagv_r(&u[ic2 + ig], vt1, m_Nc);
1826  w2[ic_i + ix1] = mult_udagv_i(&u[ic2 + ig], vt1, m_Nc);
1827  w2[ic_r + ix2] = mult_udagv_r(&u[ic2 + ig], vt2, m_Nc);
1828  w2[ic_i + ix2] = mult_udagv_i(&u[ic2 + ig], vt2, m_Nc);
1829  }
1830  }
1831  }
1832  }
1833  }
1834 
1835 
1836 //====================================================================
1838  double *v2, const double *vcp2)
1839  {
1840  const int Nvc2 = 2 * m_Nvc;
1841  const int Nvcd = m_Nvc * m_Nd;
1842  const int Nvcd2 = Nvcd / 2;
1843 
1844  const int id1 = 0;
1845  const int id2 = m_Nvc;
1846  const int id3 = m_Nvc * 2;
1847  const int id4 = m_Nvc * 3;
1848 
1849  const int idir = 3;
1850  const double bc2 = m_boundary_each_node[idir];
1851 
1852  const int isite = m_arg[itask].isite;
1853  const int isite_cp = m_arg[itask].isite_cp_t;
1854 
1855  double *w2 = &v2[Nvcd * isite];
1856  const double *w1 = &vcp2[Nvcd2 * isite_cp];
1857 
1858 
1859  if (m_arg[itask].kt0 == 1) {
1860  const int Nxy = m_Nx * m_Ny;
1861  const int it = 0;
1862 
1863  for (int iz = 0; iz < m_Mz; ++iz) {
1864  for (int ixy = 0; ixy < Nxy; ++ixy) {
1865  int is = ixy + Nxy * (iz + m_Nz * it);
1866  int is2 = ixy + Nxy * iz;
1867  int iv = Nvcd * is;
1868  int ix1 = Nvc2 * is2;
1869  int ix2 = ix1 + m_Nvc;
1870 
1871  for (int ic = 0; ic < m_Nc; ++ic) {
1872  int ic_r = 2 * ic;
1873  int ic_i = 2 * ic + 1;
1874 
1875  w2[ic_r + id1 + iv] += bc2 * w1[ic_r + ix1];
1876  w2[ic_i + id1 + iv] += bc2 * w1[ic_i + ix1];
1877  w2[ic_r + id2 + iv] += bc2 * w1[ic_r + ix2];
1878  w2[ic_i + id2 + iv] += bc2 * w1[ic_i + ix2];
1879 
1880  w2[ic_r + id3 + iv] -= bc2 * w1[ic_r + ix1];
1881  w2[ic_i + id3 + iv] -= bc2 * w1[ic_i + ix1];
1882  w2[ic_r + id4 + iv] -= bc2 * w1[ic_r + ix2];
1883  w2[ic_i + id4 + iv] -= bc2 * w1[ic_i + ix2];
1884  }
1885  }
1886  }
1887  }
1888  }
1889 
1890 
1891 //====================================================================
1893  double *v2, const double *v1)
1894  {
1895  const int Nvcd = m_Nvc * m_Nd;
1896  const int Nxy = m_Nx * m_Ny;
1897  const int Nxyz = Nxy * m_Nz;
1898 
1899  const int id1 = 0;
1900  const int id2 = m_Nvc;
1901  const int id3 = m_Nvc * 2;
1902  const int id4 = m_Nvc * 3;
1903 
1904  const int idir = 3;
1905 
1906  const int isite = m_arg[itask].isite;
1907  const int kt0 = m_arg[itask].kt0;
1908 
1909  double *w2 = &v2[Nvcd * isite];
1910  const double *w1 = &v1[Nvcd * isite];
1911  const double *u = m_U->ptr(m_Ndf * (isite + idir * m_Nvol));
1912 
1913 
1914  for (int it = kt0; it < m_Mt; ++it) {
1915  for (int iz = 0; iz < m_Mz; ++iz) {
1916  for (int ixy = 0; ixy < Nxy; ++ixy) {
1917  int is = ixy + Nxy * (iz + m_Nz * it);
1918  int iv = Nvcd * is;
1919  int in = Nvcd * (is - Nxyz);
1920  int ig = m_Ndf * (is - Nxyz);
1921 
1922  double vt1[m_Nvc], vt2[m_Nvc];
1923 
1924  for (int ic = 0; ic < m_Nc; ++ic) {
1925  int ic_r = 2 * ic;
1926  int ic_i = 2 * ic + 1;
1927 
1928  vt1[ic_r] = w1[ic_r + id1 + in] - w1[ic_r + id3 + in];
1929  vt1[ic_i] = w1[ic_i + id1 + in] - w1[ic_i + id3 + in];
1930  vt2[ic_r] = w1[ic_r + id2 + in] - w1[ic_r + id4 + in];
1931  vt2[ic_i] = w1[ic_i + id2 + in] - w1[ic_i + id4 + in];
1932  }
1933 
1934  for (int ic = 0; ic < m_Nc; ++ic) {
1935  int ic2 = 2 * ic;
1936 
1937  double wt1r = mult_udagv_r(&u[ic2 + ig], vt1, m_Nc);
1938  double wt1i = mult_udagv_i(&u[ic2 + ig], vt1, m_Nc);
1939  double wt2r = mult_udagv_r(&u[ic2 + ig], vt2, m_Nc);
1940  double wt2i = mult_udagv_i(&u[ic2 + ig], vt2, m_Nc);
1941 
1942  int ic_r = 2 * ic;
1943  int ic_i = 2 * ic + 1;
1944 
1945  w2[ic_r + id1 + iv] += wt1r;
1946  w2[ic_i + id1 + iv] += wt1i;
1947  w2[ic_r + id2 + iv] += wt2r;
1948  w2[ic_i + id2 + iv] += wt2i;
1949 
1950  w2[ic_r + id3 + iv] -= wt1r;
1951  w2[ic_i + id3 + iv] -= wt1i;
1952  w2[ic_r + id4 + iv] -= wt2r;
1953  w2[ic_i + id4 + iv] -= wt2i;
1954  }
1955  }
1956  }
1957  }
1958  }
1959 
1960 
1961 //====================================================================
1962  void Fopr_Wilson::gm5_dirac_thread(const int itask,
1963  double *v2, const double *v1)
1964  {
1965  const int Nvcd = m_Nvc * m_Nd;
1966  const int Nxy = m_Nx * m_Ny;
1967 
1968  const int id1 = 0;
1969  const int id2 = m_Nvc;
1970  const int id3 = m_Nvc * 2;
1971  const int id4 = m_Nvc * 3;
1972 
1973  const int isite = m_arg[itask].isite;
1974 
1975  double *w2 = &v2[Nvcd * isite];
1976  const double *w1 = &v1[Nvcd * isite];
1977 
1978  for (int it = 0; it < m_Mt; ++it) {
1979  for (int iz = 0; iz < m_Mz; ++iz) {
1980  for (int ixy = 0; ixy < Nxy; ++ixy) {
1981  int iv = Nvcd * (ixy + Nxy * (iz + m_Nz * it));
1982 
1983  for (int ivc = 0; ivc < m_Nvc; ++ivc) {
1984  w2[ivc + id1 + iv] = w1[ivc + id3 + iv];
1985  w2[ivc + id2 + iv] = w1[ivc + id4 + iv];
1986  w2[ivc + id3 + iv] = w1[ivc + id1 + iv];
1987  w2[ivc + id4 + iv] = w1[ivc + id2 + iv];
1988  }
1989  }
1990  }
1991  }
1992  }
1993 
1994 
1995 //====================================================================
1996  void Fopr_Wilson::gm5_chiral_thread(const int itask,
1997  double *v2, const double *v1)
1998  {
1999  const int Nvcd = m_Nvc * m_Nd;
2000  const int Nxy = m_Nx * m_Ny;
2001 
2002  const int id1 = 0;
2003  const int id2 = m_Nvc;
2004  const int id3 = m_Nvc * 2;
2005  const int id4 = m_Nvc * 3;
2006 
2007  const int isite = m_arg[itask].isite;
2008 
2009  double *w2 = &v2[Nvcd * isite];
2010  const double *w1 = &v1[Nvcd * isite];
2011 
2012 
2013  for (int it = 0; it < m_Mt; ++it) {
2014  for (int iz = 0; iz < m_Mz; ++iz) {
2015  for (int ixy = 0; ixy < Nxy; ++ixy) {
2016  int iv = Nvcd * (ixy + Nxy * (iz + m_Nz * it));
2017 
2018  for (int ivc = 0; ivc < m_Nvc; ++ivc) {
2019  w2[ivc + id1 + iv] = w1[ivc + id1 + iv];
2020  w2[ivc + id2 + iv] = w1[ivc + id2 + iv];
2021  w2[ivc + id3 + iv] = -w1[ivc + id3 + iv];
2022  w2[ivc + id4 + iv] = -w1[ivc + id4 + iv];
2023  }
2024  }
2025  }
2026  }
2027  }
2028 
2029 
2030 //====================================================================
2031 }
2032 //============================================================END=====
void mult_ypb_thread(const int, double *, const double *)
BridgeIO vout
Definition: bridgeIO.cpp:503
void mult_yp2_thread(const int, double *, const double *)
const double * ptr(const int jin, const int site, const int jex) const
Definition: field.h:153
void mult_xmb_thread(const int, double *, const double *)
void mult_tmb_chiral_thread(const int, double *, const double *)
void general(const char *format,...)
Definition: bridgeIO.cpp:197
void mult_yp1_thread(const int, double *, const double *)
void mult_xpb_thread(const int, double *, const double *)
std::vector< double > m_boundary_each_node
b.c. for each node.
void mult_tp1_chiral_thread(const int, double *, const double *)
Bridge::VerboseLevel m_vl
void clear_thread(const int, double *)
const Field_G * m_U
gauge configuration.
void mult_tpb_chiral_thread(const int, double *, const double *)
void mult_tpb_dirac_thread(const int, double *, const double *)
void mult_zm2_thread(const int, double *, const double *)
void mult_tp1_dirac_thread(const int, double *, const double *)
void mult_zp2_thread(const int, double *, const double *)
void mult_xm1_thread(const int, double *, const double *)
void gm5_chiral_thread(const int, double *, const double *)
void mult_xp2_thread(const int, double *, const double *)
void mult_zmb_thread(const int, double *, const double *)
void mult_ymb_thread(const int, double *, const double *)
void daypx_thread(const int, double *, const double, const double *)
void mult_tm2_dirac_thread(const int, double *, const double *)
void mult_tm1_chiral_thread(const int, double *, const double *)
void mult_xp1_thread(const int, double *, const double *)
std::vector< mult_arg > m_arg
void mult_tmb_dirac_thread(const int, double *, const double *)
void mult_tm1_dirac_thread(const int, double *, const double *)
void mult_tp2_dirac_thread(const int, double *, const double *)
static int get_num_threads_available()
returns number of threads (works outside of parallel region).
void gm5_dirac_thread(const int, double *, const double *)
void mult_zm1_thread(const int, double *, const double *)
void crucial(const char *format,...)
Definition: bridgeIO.cpp:178
void mult_tm2_chiral_thread(const int, double *, const double *)
void mult_ym2_thread(const int, double *, const double *)
void mult_tp2_chiral_thread(const int, double *, const double *)
void mult_xm2_thread(const int, double *, const double *)
void mult_zp1_thread(const int, double *, const double *)
void mult_zpb_thread(const int, double *, const double *)
void mult_ym1_thread(const int, double *, const double *)
static const std::string class_name