Bridge++  Version 1.5.4
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
fopr_Wilson_eo_impl_thread.cpp
Go to the documentation of this file.
1 
14 #include "fopr_Wilson_eo_impl.h"
15 
16 #include "Field/field_G.h"
18 
19 namespace Imp {
20 #if defined USE_GROUP_SU3
22 #elif defined USE_GROUP_SU2
24 #elif defined USE_GROUP_SU_N
26 #endif
27 
28 //====================================================================
30  {
32 
33  // The following setup corresponds to uniform division of volume.
34  if (m_Nthread <= m_Nt) {
36  } else if (m_Nthread <= m_Nz * m_Nt) {
37  m_Ntask_t = m_Nt;
38  } else {
39  vout.crucial(m_vl, "Error at %s: Too large Nthread = %d\n", class_name.c_str(), m_Nthread);
40  exit(EXIT_FAILURE);
41  }
42 
44 
45  if (m_Ntask_z * m_Ntask_t != m_Nthread) {
46  vout.crucial(m_vl, "Error at %s: Nz = %d and Nt = %d do not match Nthread = %d\n",
47  class_name.c_str(), m_Nz, m_Nt, m_Nthread);
48  exit(EXIT_FAILURE);
49  }
50 
52  m_Mz = m_Nz / m_Ntask_z;
53  m_Mt = m_Nt / m_Ntask_t;
54 
55  if (m_Mz * m_Ntask_z != m_Nz) {
56  vout.crucial(m_vl, "Error at %s: Mz = %d and Ntask_z = %d do not match Nz = %d\n",
57  class_name.c_str(), m_Mz, m_Ntask_z, m_Nz);
58  exit(EXIT_FAILURE);
59  }
60 
61  if (m_Mt * m_Ntask_t != m_Nt) {
62  vout.crucial(m_vl, "Error at %s: Mt = %d and Ntask_t = %d do not match Nt = %d\n",
63  class_name.c_str(), m_Mt, m_Ntask_t, m_Nt);
64  exit(EXIT_FAILURE);
65  }
66 
67  vout.general(m_vl, " Nthread = %d\n", m_Nthread);
68  vout.general(m_vl, " Ntask = %d\n", m_Ntask);
69  vout.general(m_vl, " Ntask_z = %d Ntask_t = %d\n", m_Ntask_z, m_Ntask_t);
70  vout.general(m_vl, " Mz = %d Mt = %d\n", m_Mz, m_Mt);
71 
72  // setup of arguments
73  const int Nxy2 = m_Nx2 * m_Ny;
74 
75  m_arg.resize(m_Ntask);
76 
77  for (int ith_t = 0; ith_t < m_Ntask_t; ++ith_t) {
78  for (int ith_z = 0; ith_z < m_Ntask_z; ++ith_z) {
79  int itask = ith_z + m_Ntask_z * ith_t;
80 
81  m_arg[itask].isite = (ith_z * m_Mz + ith_t * (m_Nz * m_Mt)) * Nxy2;
82 
83  m_arg[itask].kt0 = 0;
84  m_arg[itask].kt1 = 0;
85  m_arg[itask].kz0 = 0;
86  m_arg[itask].kz1 = 0;
87  if (ith_t == 0) m_arg[itask].kt0 = 1;
88  if (ith_z == 0) m_arg[itask].kz0 = 1;
89  if (ith_t == m_Ntask_t - 1) m_arg[itask].kt1 = 1;
90  if (ith_z == m_Ntask_z - 1) m_arg[itask].kz1 = 1;
91 
92  m_arg[itask].isite_cp_x = itask * m_Mz * m_Mt * (m_Ny / 2);
93  m_arg[itask].isite_cp_y = itask * m_Mz * m_Mt * m_Nx2;
94  m_arg[itask].isite_cp_z = ith_t * m_Mt * Nxy2;
95  m_arg[itask].isite_cp_t = ith_z * m_Mz * Nxy2;
96  }
97  }
98  }
99 
100 
101 //====================================================================
102  void Fopr_Wilson_eo::scal_thread(const int itask,
103  double *w, const double fac)
104  {
105  const int Nvcd = m_Nvc * m_Nd;
106  const int Nv_xy = Nvcd * m_Nx2 * m_Ny;
107 
108  const int isite = m_arg[itask].isite;
109 
110  double *wp = &w[Nvcd * isite];
111 
112 
113  for (int it = 0; it < m_Mt; ++it) {
114  for (int iz = 0; iz < m_Mz; ++iz) {
115  for (int iv_xy = 0; iv_xy < Nv_xy; ++iv_xy) {
116  int iv = iv_xy + Nv_xy * (iz + m_Nz * it);
117 
118  wp[iv] = fac * wp[iv];
119  }
120  }
121  }
122  }
123 
124 
125 //====================================================================
126  void Fopr_Wilson_eo::clear_thread(const int itask,
127  double *v)
128  {
129  const int Nvcd = m_Nvc * m_Nd;
130  const int Nv_xy = Nvcd * m_Nx2 * m_Ny;
131 
132  const int isite = m_arg[itask].isite;
133 
134  double *wp = &v[Nvcd * isite];
135 
136 
137  for (int it = 0; it < m_Mt; ++it) {
138  for (int iz = 0; iz < m_Mz; ++iz) {
139  for (int iv_xy = 0; iv_xy < Nv_xy; ++iv_xy) {
140  int iv = iv_xy + Nv_xy * (iz + m_Nz * it);
141 
142  wp[iv] = 0.0;
143  }
144  }
145  }
146  }
147 
148 
149 //====================================================================
150  void Fopr_Wilson_eo::mult_xp1_thread(const int itask,
151  double *vcp1, const double *v1, const int ieo)
152  {
153  const int Nvc2 = 2 * m_Nvc;
154  const int Nvcd = m_Nvc * m_Nd;
155  const int Nvcd2 = Nvcd / 2;
156 
157  const int id1 = 0;
158  const int id2 = m_Nvc;
159  const int id3 = m_Nvc * 2;
160  const int id4 = m_Nvc * 3;
161 
162  const int idir = 0;
163  const int ix = 0;
164  const double bc2 = m_boundary_each_node[idir];
165 
166  const int isite = m_arg[itask].isite;
167  const int isite_cp = m_arg[itask].isite_cp_x;
168  const int iyzt0 = isite / m_Nx2;
169 
170  const double *w1 = &v1[Nvcd * isite];
171  double *w2 = &vcp1[Nvcd2 * isite_cp];
172 
173 
174  int ibf = 0;
175 
176  for (int it = 0; it < m_Mt; ++it) {
177  for (int iz = 0; iz < m_Mz; ++iz) {
178  for (int iy = 0; iy < m_Ny; ++iy) {
179  int iyzt = iy + m_Ny * (iz + m_Nz * it);
180  int yzt_eo = ieo + (1 - 2 * ieo) * m_yzt_eo[iyzt0 + iyzt];
181 
182  if (yzt_eo == 1) {
183  int is = ix + m_Nx2 * iyzt;
184  int in = Nvcd * is;
185  int ix1 = Nvc2 * ibf;
186  int ix2 = ix1 + m_Nvc;
187 
188  for (int ic = 0; ic < m_Nc; ++ic) {
189  int ic_r = 2 * ic;
190  int ic_i = 2 * ic + 1;
191 
192  w2[ic_r + ix1] = bc2 * (w1[ic_r + id1 + in] - w1[ic_i + id4 + in]);
193  w2[ic_i + ix1] = bc2 * (w1[ic_i + id1 + in] + w1[ic_r + id4 + in]);
194  w2[ic_r + ix2] = bc2 * (w1[ic_r + id2 + in] - w1[ic_i + id3 + in]);
195  w2[ic_i + ix2] = bc2 * (w1[ic_i + id2 + in] + w1[ic_r + id3 + in]);
196  }
197 
198  ++ibf;
199  }
200  }
201  }
202  }
203  }
204 
205 
206 //====================================================================
207  void Fopr_Wilson_eo::mult_xp2_thread(const int itask,
208  double *v2, const double *vcp2, const int ieo)
209  {
210  const int Nvc2 = 2 * m_Nvc;
211  const int Nvcd = m_Nvc * m_Nd;
212  const int Nvcd2 = Nvcd / 2;
213 
214  const int id1 = 0;
215  const int id2 = m_Nvc;
216  const int id3 = m_Nvc * 2;
217  const int id4 = m_Nvc * 3;
218 
219  const int idir = 0;
220  const int ix = m_Nx2 - 1;
221 
222  const int isite = m_arg[itask].isite;
223  const int isite_cp = m_arg[itask].isite_cp_x;
224  const int iyzt0 = isite / m_Nx2;
225 
226  const double *w1 = &vcp2[Nvcd2 * isite_cp];
227  double *w2 = &v2[Nvcd * isite];
228  const double *u = m_U->ptr(m_Ndf * (isite + ieo * m_Nvol / 2 + idir * m_Nvol));
229 
230 
231  int ibf = 0;
232 
233  for (int it = 0; it < m_Mt; ++it) {
234  for (int iz = 0; iz < m_Mz; ++iz) {
235  for (int iy = 0; iy < m_Ny; ++iy) {
236  int iyzt = iy + m_Ny * (iz + m_Nz * it);
237  int yzt_eo = ieo + (1 - 2 * ieo) * m_yzt_eo[iyzt0 + iyzt];
238 
239  if (yzt_eo == 1) {
240  int is = ix + m_Nx2 * iyzt;
241  int iv = Nvcd * is;
242  int ig = m_Ndf * is;
243  int ix1 = Nvc2 * ibf;
244  int ix2 = ix1 + m_Nvc;
245 
246  for (int ic = 0; ic < m_Nc; ++ic) {
247  int ic2 = ic * m_Nvc;
248 
249  double wt1r = mult_uv_r(&u[ic2 + ig], &w1[ix1], m_Nc);
250  double wt1i = mult_uv_i(&u[ic2 + ig], &w1[ix1], m_Nc);
251  double wt2r = mult_uv_r(&u[ic2 + ig], &w1[ix2], m_Nc);
252  double wt2i = mult_uv_i(&u[ic2 + ig], &w1[ix2], m_Nc);
253 
254  int ic_r = 2 * ic;
255  int ic_i = 2 * ic + 1;
256 
257  w2[ic_r + id1 + iv] += wt1r;
258  w2[ic_i + id1 + iv] += wt1i;
259  w2[ic_r + id2 + iv] += wt2r;
260  w2[ic_i + id2 + iv] += wt2i;
261 
262  w2[ic_r + id3 + iv] += wt2i;
263  w2[ic_i + id3 + iv] -= wt2r;
264  w2[ic_r + id4 + iv] += wt1i;
265  w2[ic_i + id4 + iv] -= wt1r;
266  }
267 
268  ++ibf;
269  }
270  }
271  }
272  }
273  }
274 
275 
276 //====================================================================
277  void Fopr_Wilson_eo::mult_xpb_thread(const int itask,
278  double *v2, const double *v1, const int ieo)
279  {
280  const int Nvcd = m_Nvc * m_Nd;
281 
282  const int id1 = 0;
283  const int id2 = m_Nvc;
284  const int id3 = m_Nvc * 2;
285  const int id4 = m_Nvc * 3;
286 
287  const int idir = 0;
288 
289  const int isite = m_arg[itask].isite;
290  const int iyzt0 = isite / m_Nx2;
291 
292  const double *w1 = &v1[Nvcd * isite];
293  double *w2 = &v2[Nvcd * isite];
294  const double *u = m_U->ptr(m_Ndf * (isite + ieo * m_Nvol / 2 + idir * m_Nvol));
295 
296 
297  for (int it = 0; it < m_Mt; ++it) {
298  for (int iz = 0; iz < m_Mz; ++iz) {
299  for (int iy = 0; iy < m_Ny; ++iy) {
300  int iyzt = iy + m_Ny * (iz + m_Nz * it);
301  int yzt_eo = ieo + (1 - 2 * ieo) * m_yzt_eo[iyzt0 + iyzt];
302 
303  for (int ix = 0; ix < m_Nx2 - yzt_eo; ++ix) {
304  int is = ix + m_Nx2 * iyzt;
305  int iv = Nvcd * is;
306  int in = Nvcd * (is + yzt_eo);
307  int ig = m_Ndf * is;
308 
309  double vt1[m_Nvc], vt2[m_Nvc];
310 
311  for (int ic = 0; ic < m_Nc; ++ic) {
312  int ic_r = 2 * ic;
313  int ic_i = 2 * ic + 1;
314 
315  vt1[ic_r] = w1[ic_r + id1 + in] - w1[ic_i + id4 + in];
316  vt1[ic_i] = w1[ic_i + id1 + in] + w1[ic_r + id4 + in];
317  vt2[ic_r] = w1[ic_r + id2 + in] - w1[ic_i + id3 + in];
318  vt2[ic_i] = w1[ic_i + id2 + in] + w1[ic_r + id3 + in];
319  }
320 
321  for (int ic = 0; ic < m_Nc; ++ic) {
322  int ic2 = ic * m_Nvc;
323 
324  double wt1r = mult_uv_r(&u[ic2 + ig], vt1, m_Nc);
325  double wt1i = mult_uv_i(&u[ic2 + ig], vt1, m_Nc);
326  double wt2r = mult_uv_r(&u[ic2 + ig], vt2, m_Nc);
327  double wt2i = mult_uv_i(&u[ic2 + ig], vt2, m_Nc);
328 
329  int ic_r = 2 * ic;
330  int ic_i = 2 * ic + 1;
331 
332  w2[ic_r + id1 + iv] += wt1r;
333  w2[ic_i + id1 + iv] += wt1i;
334  w2[ic_r + id2 + iv] += wt2r;
335  w2[ic_i + id2 + iv] += wt2i;
336 
337  w2[ic_r + id3 + iv] += wt2i;
338  w2[ic_i + id3 + iv] -= wt2r;
339  w2[ic_r + id4 + iv] += wt1i;
340  w2[ic_i + id4 + iv] -= wt1r;
341  }
342  }
343  }
344  }
345  }
346  }
347 
348 
349 //====================================================================
350  void Fopr_Wilson_eo::mult_xm1_thread(const int itask,
351  double *vcp1, const double *v1, const int ieo)
352  {
353  const int Nvc2 = 2 * m_Nvc;
354  const int Nvcd = m_Nvc * m_Nd;
355  const int Nvcd2 = Nvcd / 2;
356 
357  const int id1 = 0;
358  const int id2 = m_Nvc;
359  const int id3 = m_Nvc * 2;
360  const int id4 = m_Nvc * 3;
361 
362  const int idir = 0;
363  const int ix = m_Nx2 - 1;
364 
365  const int isite = m_arg[itask].isite;
366  const int isite_cp = m_arg[itask].isite_cp_x;
367  const int iyzt0 = isite / m_Nx2;
368 
369  const double *w1 = &v1[Nvcd * isite];
370  double *w2 = &vcp1[Nvcd2 * isite_cp];
371  const double *u = m_U->ptr(m_Ndf * (isite + (1 - ieo) * m_Nvol / 2 + idir * m_Nvol));
372 
373 
374  int ibf = 0;
375 
376  for (int it = 0; it < m_Mt; ++it) {
377  for (int iz = 0; iz < m_Mz; ++iz) {
378  for (int iy = 0; iy < m_Ny; ++iy) {
379  int iyzt = iy + m_Ny * (iz + m_Nz * it);
380  int yzt_eo = ieo + (1 - 2 * ieo) * m_yzt_eo[iyzt0 + iyzt];
381 
382  if (yzt_eo == 0) {
383  int is = ix + m_Nx2 * iyzt;
384  int in = Nvcd * is;
385  int ig = m_Ndf * is;
386  int ix1 = Nvc2 * ibf;
387  int ix2 = ix1 + m_Nvc;
388 
389  double vt1[m_Nvc], vt2[m_Nvc];
390 
391  for (int ic = 0; ic < m_Nc; ++ic) {
392  int ic_r = 2 * ic;
393  int ic_i = 2 * ic + 1;
394 
395  vt1[ic_r] = w1[ic_r + id1 + in] + w1[ic_i + id4 + in];
396  vt1[ic_i] = w1[ic_i + id1 + in] - w1[ic_r + id4 + in];
397  vt2[ic_r] = w1[ic_r + id2 + in] + w1[ic_i + id3 + in];
398  vt2[ic_i] = w1[ic_i + id2 + in] - w1[ic_r + id3 + in];
399  }
400 
401  for (int ic = 0; ic < m_Nc; ++ic) {
402  int ic2 = 2 * ic;
403 
404  int ic_r = 2 * ic;
405  int ic_i = 2 * ic + 1;
406 
407  w2[ic_r + ix1] = mult_udagv_r(&u[ic2 + ig], vt1, m_Nc);
408  w2[ic_i + ix1] = mult_udagv_i(&u[ic2 + ig], vt1, m_Nc);
409  w2[ic_r + ix2] = mult_udagv_r(&u[ic2 + ig], vt2, m_Nc);
410  w2[ic_i + ix2] = mult_udagv_i(&u[ic2 + ig], vt2, m_Nc);
411  }
412 
413  ++ibf;
414  }
415  }
416  }
417  }
418  }
419 
420 
421 //====================================================================
422  void Fopr_Wilson_eo::mult_xm2_thread(const int itask,
423  double *v2, const double *vcp2, const int ieo)
424  {
425  const int Nvc2 = 2 * m_Nvc;
426  const int Nvcd = m_Nvc * m_Nd;
427  const int Nvcd2 = Nvcd / 2;
428 
429  const int id1 = 0;
430  const int id2 = m_Nvc;
431  const int id3 = m_Nvc * 2;
432  const int id4 = m_Nvc * 3;
433 
434  const int idir = 0;
435  const int ix = 0;
436  const double bc2 = m_boundary_each_node[idir];
437 
438  const int isite = m_arg[itask].isite;
439  const int isite_cp = m_arg[itask].isite_cp_x;
440  const int iyzt0 = isite / m_Nx2;
441 
442  const double *w1 = &vcp2[Nvcd2 * isite_cp];
443  double *w2 = &v2[Nvcd * isite];
444 
445 
446  int ibf = 0;
447 
448  for (int it = 0; it < m_Mt; ++it) {
449  for (int iz = 0; iz < m_Mz; ++iz) {
450  for (int iy = 0; iy < m_Ny; ++iy) {
451  int iyzt = iy + m_Ny * (iz + m_Nz * it);
452  int yzt_eo = ieo + (1 - 2 * ieo) * m_yzt_eo[iyzt0 + iyzt];
453 
454  if (yzt_eo == 0) {
455  int is = ix + m_Nx2 * iyzt;
456  int iv = Nvcd * is;
457 
458  int ix1 = Nvc2 * ibf;
459  int ix2 = ix1 + m_Nvc;
460 
461  for (int ic = 0; ic < m_Nc; ++ic) {
462  int ic_r = 2 * ic;
463  int ic_i = 2 * ic + 1;
464 
465  w2[ic_r + id1 + iv] += bc2 * w1[ic_r + ix1];
466  w2[ic_i + id1 + iv] += bc2 * w1[ic_i + ix1];
467  w2[ic_r + id2 + iv] += bc2 * w1[ic_r + ix2];
468  w2[ic_i + id2 + iv] += bc2 * w1[ic_i + ix2];
469 
470  w2[ic_r + id3 + iv] -= bc2 * w1[ic_i + ix2];
471  w2[ic_i + id3 + iv] += bc2 * w1[ic_r + ix2];
472  w2[ic_r + id4 + iv] -= bc2 * w1[ic_i + ix1];
473  w2[ic_i + id4 + iv] += bc2 * w1[ic_r + ix1];
474  }
475 
476  ++ibf;
477  }
478  }
479  }
480  }
481  }
482 
483 
484 //====================================================================
485  void Fopr_Wilson_eo::mult_xmb_thread(const int itask,
486  double *v2, const double *v1, const int ieo)
487  {
488  const int Nvcd = m_Nvc * m_Nd;
489 
490  const int id1 = 0;
491  const int id2 = m_Nvc;
492  const int id3 = m_Nvc * 2;
493  const int id4 = m_Nvc * 3;
494 
495  const int idir = 0;
496 
497  const int isite = m_arg[itask].isite;
498  const int iyzt0 = isite / m_Nx2;
499 
500  const double *w1 = &v1[Nvcd * isite];
501  double *w2 = &v2[Nvcd * isite];
502  const double *u = m_U->ptr(m_Ndf * (isite + (1 - ieo) * m_Nvol / 2 + idir * m_Nvol));
503 
504 
505  for (int it = 0; it < m_Mt; ++it) {
506  for (int iz = 0; iz < m_Mz; ++iz) {
507  for (int iy = 0; iy < m_Ny; ++iy) {
508  int iyzt = iy + m_Ny * (iz + m_Nz * it);
509  int yzt_eo = ieo + (1 - 2 * ieo) * m_yzt_eo[iyzt0 + iyzt];
510  int Meo = 1 - yzt_eo;
511 
512  for (int ix = Meo; ix < m_Nx2; ++ix) {
513  int is = ix + m_Nx2 * iyzt;
514  int iv = Nvcd * is;
515  int in = Nvcd * (is - Meo);
516  int ig = m_Ndf * (is - Meo);
517 
518  double vt1[m_Nvc], vt2[m_Nvc];
519 
520  for (int ic = 0; ic < m_Nc; ++ic) {
521  int ic_r = 2 * ic;
522  int ic_i = 2 * ic + 1;
523 
524  vt1[ic_r] = w1[ic_r + id1 + in] + w1[ic_i + id4 + in];
525  vt1[ic_i] = w1[ic_i + id1 + in] - w1[ic_r + id4 + in];
526  vt2[ic_r] = w1[ic_r + id2 + in] + w1[ic_i + id3 + in];
527  vt2[ic_i] = w1[ic_i + id2 + in] - w1[ic_r + id3 + in];
528  }
529 
530  for (int ic = 0; ic < m_Nc; ++ic) {
531  int ic2 = 2 * ic;
532 
533  double wt1r = mult_udagv_r(&u[ic2 + ig], vt1, m_Nc);
534  double wt1i = mult_udagv_i(&u[ic2 + ig], vt1, m_Nc);
535  double wt2r = mult_udagv_r(&u[ic2 + ig], vt2, m_Nc);
536  double wt2i = mult_udagv_i(&u[ic2 + ig], vt2, m_Nc);
537 
538  int ic_r = 2 * ic;
539  int ic_i = 2 * ic + 1;
540 
541  w2[ic_r + id1 + iv] += wt1r;
542  w2[ic_i + id1 + iv] += wt1i;
543  w2[ic_r + id2 + iv] += wt2r;
544  w2[ic_i + id2 + iv] += wt2i;
545 
546  w2[ic_r + id3 + iv] -= wt2i;
547  w2[ic_i + id3 + iv] += wt2r;
548  w2[ic_r + id4 + iv] -= wt1i;
549  w2[ic_i + id4 + iv] += wt1r;
550  }
551  }
552  }
553  }
554  }
555  }
556 
557 
558 //====================================================================
559  void Fopr_Wilson_eo::mult_yp1_thread(const int itask,
560  double *vcp1, const double *v1, const int ieo)
561  {
562  const int Nvc2 = 2 * m_Nvc;
563  const int Nvcd = m_Nvc * m_Nd;
564  const int Nvcd2 = Nvcd / 2;
565 
566  const int id1 = 0;
567  const int id2 = m_Nvc;
568  const int id3 = m_Nvc * 2;
569  const int id4 = m_Nvc * 3;
570 
571  const int idir = 1;
572  const int iy = 0;
573  const double bc2 = m_boundary_each_node[idir];
574 
575  const int isite = m_arg[itask].isite;
576  const int isite_cp = m_arg[itask].isite_cp_y;
577 
578  const double *w1 = &v1[Nvcd * isite];
579  double *w2 = &vcp1[Nvcd2 * isite_cp];
580 
581 
582  for (int it = 0; it < m_Mt; ++it) {
583  for (int iz = 0; iz < m_Mz; ++iz) {
584  for (int ix = 0; ix < m_Nx2; ++ix) {
585  int is = ix + m_Nx2 * (iy + m_Ny * (iz + m_Nz * it));
586  int is2 = ix + m_Nx2 * (iz + m_Mz * it);
587  int in = Nvcd * is;
588  int ix1 = Nvc2 * is2;
589  int ix2 = ix1 + m_Nvc;
590 
591  for (int ic = 0; ic < m_Nc; ++ic) {
592  int ic_r = 2 * ic;
593  int ic_i = 2 * ic + 1;
594 
595  w2[ic_r + ix1] = bc2 * (w1[ic_r + id1 + in] + w1[ic_r + id4 + in]);
596  w2[ic_i + ix1] = bc2 * (w1[ic_i + id1 + in] + w1[ic_i + id4 + in]);
597  w2[ic_r + ix2] = bc2 * (w1[ic_r + id2 + in] - w1[ic_r + id3 + in]);
598  w2[ic_i + ix2] = bc2 * (w1[ic_i + id2 + in] - w1[ic_i + id3 + in]);
599  }
600  }
601  }
602  }
603  }
604 
605 
606 //====================================================================
607  void Fopr_Wilson_eo::mult_yp2_thread(const int itask,
608  double *v2, const double *vcp2, const int ieo)
609  {
610  const int Nvc2 = 2 * m_Nvc;
611  const int Nvcd = m_Nvc * m_Nd;
612  const int Nvcd2 = Nvcd / 2;
613 
614  const int id1 = 0;
615  const int id2 = m_Nvc;
616  const int id3 = m_Nvc * 2;
617  const int id4 = m_Nvc * 3;
618 
619  const int idir = 1;
620  const int iy = m_Ny - 1;
621 
622  const int isite = m_arg[itask].isite;
623  const int isite_cp = m_arg[itask].isite_cp_y;
624 
625  const double *w1 = &vcp2[Nvcd2 * isite_cp];
626  double *w2 = &v2[Nvcd * isite];
627  const double *u = m_U->ptr(m_Ndf * (isite + ieo * m_Nvol / 2 + idir * m_Nvol));
628 
629 
630  for (int it = 0; it < m_Mt; ++it) {
631  for (int iz = 0; iz < m_Mz; ++iz) {
632  for (int ix = 0; ix < m_Nx2; ++ix) {
633  int is = ix + m_Nx2 * (iy + m_Ny * (iz + m_Nz * it));
634  int is2 = ix + m_Nx2 * (iz + m_Mz * it);
635  int iv = Nvcd * is;
636  int ig = m_Ndf * is;
637  int ix1 = Nvc2 * is2;
638  int ix2 = ix1 + m_Nvc;
639 
640  for (int ic = 0; ic < m_Nc; ++ic) {
641  int ic2 = ic * m_Nvc;
642 
643  double wt1r = mult_uv_r(&u[ic2 + ig], &w1[ix1], m_Nc);
644  double wt1i = mult_uv_i(&u[ic2 + ig], &w1[ix1], m_Nc);
645  double wt2r = mult_uv_r(&u[ic2 + ig], &w1[ix2], m_Nc);
646  double wt2i = mult_uv_i(&u[ic2 + ig], &w1[ix2], m_Nc);
647 
648  int ic_r = 2 * ic;
649  int ic_i = 2 * ic + 1;
650 
651  w2[ic_r + id1 + iv] += wt1r;
652  w2[ic_i + id1 + iv] += wt1i;
653  w2[ic_r + id2 + iv] += wt2r;
654  w2[ic_i + id2 + iv] += wt2i;
655 
656  w2[ic_r + id3 + iv] -= wt2r;
657  w2[ic_i + id3 + iv] -= wt2i;
658  w2[ic_r + id4 + iv] += wt1r;
659  w2[ic_i + id4 + iv] += wt1i;
660  }
661  }
662  }
663  }
664  }
665 
666 
667 //====================================================================
668  void Fopr_Wilson_eo::mult_ypb_thread(const int itask,
669  double *v2, const double *v1, const int ieo)
670  {
671  const int Nvcd = m_Nvc * m_Nd;
672 
673  const int id1 = 0;
674  const int id2 = m_Nvc;
675  const int id3 = m_Nvc * 2;
676  const int id4 = m_Nvc * 3;
677 
678  const int idir = 1;
679 
680  const int isite = m_arg[itask].isite;
681 
682  const double *w1 = &v1[Nvcd * isite];
683  double *w2 = &v2[Nvcd * isite];
684  const double *u = m_U->ptr(m_Ndf * (isite + ieo * m_Nvol / 2 + idir * m_Nvol));
685 
686 
687  for (int it = 0; it < m_Mt; ++it) {
688  for (int iz = 0; iz < m_Mz; ++iz) {
689  for (int iy = 0; iy < m_Ny - 1; ++iy) {
690  for (int ix = 0; ix < m_Nx2; ++ix) {
691  int is = ix + m_Nx2 * (iy + m_Ny * (iz + m_Nz * it));
692  int iv = Nvcd * is;
693  int in = Nvcd * (is + m_Nx2);
694  int ig = m_Ndf * is;
695 
696  double vt1[m_Nvc], vt2[m_Nvc];
697 
698  for (int ic = 0; ic < m_Nc; ++ic) {
699  int ic_r = 2 * ic;
700  int ic_i = 2 * ic + 1;
701 
702  vt1[ic_r] = w1[ic_r + id1 + in] + w1[ic_r + id4 + in];
703  vt1[ic_i] = w1[ic_i + id1 + in] + w1[ic_i + id4 + in];
704  vt2[ic_r] = w1[ic_r + id2 + in] - w1[ic_r + id3 + in];
705  vt2[ic_i] = w1[ic_i + id2 + in] - w1[ic_i + id3 + in];
706  }
707 
708  for (int ic = 0; ic < m_Nc; ++ic) {
709  int ic2 = ic * m_Nvc;
710 
711  double wt1r = mult_uv_r(&u[ic2 + ig], vt1, m_Nc);
712  double wt1i = mult_uv_i(&u[ic2 + ig], vt1, m_Nc);
713  double wt2r = mult_uv_r(&u[ic2 + ig], vt2, m_Nc);
714  double wt2i = mult_uv_i(&u[ic2 + ig], vt2, m_Nc);
715 
716  int ic_r = 2 * ic;
717  int ic_i = 2 * ic + 1;
718 
719  w2[ic_r + id1 + iv] += wt1r;
720  w2[ic_i + id1 + iv] += wt1i;
721  w2[ic_r + id2 + iv] += wt2r;
722  w2[ic_i + id2 + iv] += wt2i;
723 
724  w2[ic_r + id3 + iv] -= wt2r;
725  w2[ic_i + id3 + iv] -= wt2i;
726  w2[ic_r + id4 + iv] += wt1r;
727  w2[ic_i + id4 + iv] += wt1i;
728  }
729  }
730  }
731  }
732  }
733  }
734 
735 
736 //====================================================================
737  void Fopr_Wilson_eo::mult_ym1_thread(const int itask,
738  double *vcp1, const double *v1, const int ieo)
739  {
740  const int Nvc2 = 2 * m_Nvc;
741  const int Nvcd = m_Nvc * m_Nd;
742  const int Nvcd2 = Nvcd / 2;
743 
744  const int id1 = 0;
745  const int id2 = m_Nvc;
746  const int id3 = m_Nvc * 2;
747  const int id4 = m_Nvc * 3;
748 
749  const int idir = 1;
750  const int iy = m_Ny - 1;
751 
752  const int isite = m_arg[itask].isite;
753  const int isite_cp = m_arg[itask].isite_cp_y;
754 
755  const double *w1 = &v1[Nvcd * isite];
756  double *w2 = &vcp1[Nvcd2 * isite_cp];
757  const double *u = m_U->ptr(m_Ndf * (isite + (1 - ieo) * m_Nvol / 2 + idir * m_Nvol));
758 
759 
760  for (int it = 0; it < m_Mt; ++it) {
761  for (int iz = 0; iz < m_Mz; ++iz) {
762  for (int ix = 0; ix < m_Nx2; ++ix) {
763  int is = ix + m_Nx2 * (iy + m_Ny * (iz + m_Nz * it));
764  int is2 = ix + m_Nx2 * (iz + m_Mz * it);
765  int in = Nvcd * is;
766  int ig = m_Ndf * is;
767  int ix1 = Nvc2 * is2;
768  int ix2 = ix1 + m_Nvc;
769 
770  double vt1[m_Nvc], vt2[m_Nvc];
771 
772  for (int ic = 0; ic < m_Nc; ++ic) {
773  int ic_r = 2 * ic;
774  int ic_i = 2 * ic + 1;
775 
776  vt1[ic_r] = w1[ic_r + id1 + in] - w1[ic_r + id4 + in];
777  vt1[ic_i] = w1[ic_i + id1 + in] - w1[ic_i + id4 + in];
778  vt2[ic_r] = w1[ic_r + id2 + in] + w1[ic_r + id3 + in];
779  vt2[ic_i] = w1[ic_i + id2 + in] + w1[ic_i + id3 + in];
780  }
781 
782  for (int ic = 0; ic < m_Nc; ++ic) {
783  int ic2 = 2 * ic;
784 
785  int ic_r = 2 * ic;
786  int ic_i = 2 * ic + 1;
787 
788  w2[ic_r + ix1] = mult_udagv_r(&u[ic2 + ig], vt1, m_Nc);
789  w2[ic_i + ix1] = mult_udagv_i(&u[ic2 + ig], vt1, m_Nc);
790  w2[ic_r + ix2] = mult_udagv_r(&u[ic2 + ig], vt2, m_Nc);
791  w2[ic_i + ix2] = mult_udagv_i(&u[ic2 + ig], vt2, m_Nc);
792  }
793  }
794  }
795  }
796  }
797 
798 
799 //====================================================================
800  void Fopr_Wilson_eo::mult_ym2_thread(const int itask,
801  double *v2, const double *vcp2, const int ieo)
802  {
803  const int Nvc2 = 2 * m_Nvc;
804  const int Nvcd = m_Nvc * m_Nd;
805  const int Nvcd2 = Nvcd / 2;
806 
807  const int id1 = 0;
808  const int id2 = m_Nvc;
809  const int id3 = m_Nvc * 2;
810  const int id4 = m_Nvc * 3;
811 
812  const int idir = 1;
813  const int iy = 0;
814  const double bc2 = m_boundary_each_node[idir];
815 
816  const int isite = m_arg[itask].isite;
817  const int isite_cp = m_arg[itask].isite_cp_y;
818 
819  const double *w1 = &vcp2[Nvcd2 * isite_cp];
820  double *w2 = &v2[Nvcd * isite];
821 
822 
823  for (int it = 0; it < m_Mt; ++it) {
824  for (int iz = 0; iz < m_Mz; ++iz) {
825  for (int ix = 0; ix < m_Nx2; ++ix) {
826  int is = ix + m_Nx2 * (iy + m_Ny * (iz + m_Nz * it));
827  int is2 = ix + m_Nx2 * (iz + m_Mz * it);
828  int iv = Nvcd * is;
829  int ix1 = Nvc2 * is2;
830  int ix2 = ix1 + m_Nvc;
831 
832  for (int ic = 0; ic < m_Nc; ++ic) {
833  int ic_r = 2 * ic;
834  int ic_i = 2 * ic + 1;
835 
836  w2[ic_r + id1 + iv] += bc2 * w1[ic_r + ix1];
837  w2[ic_i + id1 + iv] += bc2 * w1[ic_i + ix1];
838  w2[ic_r + id2 + iv] += bc2 * w1[ic_r + ix2];
839  w2[ic_i + id2 + iv] += bc2 * w1[ic_i + ix2];
840 
841  w2[ic_r + id3 + iv] += bc2 * w1[ic_r + ix2];
842  w2[ic_i + id3 + iv] += bc2 * w1[ic_i + ix2];
843  w2[ic_r + id4 + iv] -= bc2 * w1[ic_r + ix1];
844  w2[ic_i + id4 + iv] -= bc2 * w1[ic_i + ix1];
845  }
846  }
847  }
848  }
849  }
850 
851 
852 //====================================================================
853  void Fopr_Wilson_eo::mult_ymb_thread(const int itask,
854  double *v2, const double *v1, const int ieo)
855  {
856  const int Nvcd = m_Nvc * m_Nd;
857 
858  const int id1 = 0;
859  const int id2 = m_Nvc;
860  const int id3 = m_Nvc * 2;
861  const int id4 = m_Nvc * 3;
862 
863  const int idir = 1;
864 
865  const int isite = m_arg[itask].isite;
866 
867  const double *w1 = &v1[Nvcd * isite];
868  double *w2 = &v2[Nvcd * isite];
869  const double *u = m_U->ptr(m_Ndf * (isite + (1 - ieo) * m_Nvol / 2 + idir * m_Nvol));
870 
871 
872  for (int it = 0; it < m_Mt; ++it) {
873  for (int iz = 0; iz < m_Mz; ++iz) {
874  for (int iy = 1; iy < m_Ny; ++iy) {
875  for (int ix = 0; ix < m_Nx2; ++ix) {
876  int is = ix + m_Nx2 * (iy + m_Ny * (iz + m_Nz * it));
877  int iv = Nvcd * is;
878  int in = Nvcd * (is - m_Nx2);
879  int ig = m_Ndf * (is - m_Nx2);
880 
881  double vt1[m_Nvc], vt2[m_Nvc];
882 
883  for (int ic = 0; ic < m_Nc; ++ic) {
884  int ic_r = 2 * ic;
885  int ic_i = 2 * ic + 1;
886 
887  vt1[ic_r] = w1[ic_r + id1 + in] - w1[ic_r + id4 + in];
888  vt1[ic_i] = w1[ic_i + id1 + in] - w1[ic_i + id4 + in];
889  vt2[ic_r] = w1[ic_r + id2 + in] + w1[ic_r + id3 + in];
890  vt2[ic_i] = w1[ic_i + id2 + in] + w1[ic_i + id3 + in];
891  }
892 
893  for (int ic = 0; ic < m_Nc; ++ic) {
894  int ic2 = 2 * ic;
895 
896  double wt1r = mult_udagv_r(&u[ic2 + ig], vt1, m_Nc);
897  double wt1i = mult_udagv_i(&u[ic2 + ig], vt1, m_Nc);
898  double wt2r = mult_udagv_r(&u[ic2 + ig], vt2, m_Nc);
899  double wt2i = mult_udagv_i(&u[ic2 + ig], vt2, m_Nc);
900 
901  int ic_r = 2 * ic;
902  int ic_i = 2 * ic + 1;
903 
904  w2[ic_r + id1 + iv] += wt1r;
905  w2[ic_i + id1 + iv] += wt1i;
906  w2[ic_r + id2 + iv] += wt2r;
907  w2[ic_i + id2 + iv] += wt2i;
908 
909  w2[ic_r + id3 + iv] += wt2r;
910  w2[ic_i + id3 + iv] += wt2i;
911  w2[ic_r + id4 + iv] -= wt1r;
912  w2[ic_i + id4 + iv] -= wt1i;
913  }
914  }
915  }
916  }
917  }
918  }
919 
920 
921 //====================================================================
922  void Fopr_Wilson_eo::mult_zp1_thread(const int itask,
923  double *vcp1, const double *v1, const int ieo)
924  {
925  const int Nvc2 = 2 * m_Nvc;
926  const int Nvcd = m_Nvc * m_Nd;
927  const int Nvcd2 = Nvcd / 2;
928 
929  const int id1 = 0;
930  const int id2 = m_Nvc;
931  const int id3 = m_Nvc * 2;
932  const int id4 = m_Nvc * 3;
933 
934  const int idir = 2;
935  const double bc2 = m_boundary_each_node[idir];
936 
937  const int isite = m_arg[itask].isite;
938  const int isite_cp = m_arg[itask].isite_cp_z;
939 
940  const double *w1 = &v1[Nvcd * isite];
941  double *w2 = &vcp1[Nvcd2 * isite_cp];
942 
943 
944  if (m_arg[itask].kz0 == 1) {
945  const int Nxy = m_Nx2 * m_Ny;
946  const int iz = 0;
947 
948  for (int it = 0; it < m_Mt; ++it) {
949  for (int i_xy = 0; i_xy < Nxy; ++i_xy) {
950  int is = i_xy + Nxy * (iz + m_Nz * it);
951  int is2 = i_xy + Nxy * it;
952  int in = Nvcd * is;
953  int ix1 = Nvc2 * is2;
954  int ix2 = ix1 + m_Nvc;
955 
956  for (int ic = 0; ic < m_Nc; ++ic) {
957  int ic_r = 2 * ic;
958  int ic_i = 2 * ic + 1;
959 
960  w2[ic_r + ix1] = bc2 * (w1[ic_r + id1 + in] - w1[ic_i + id3 + in]);
961  w2[ic_i + ix1] = bc2 * (w1[ic_i + id1 + in] + w1[ic_r + id3 + in]);
962  w2[ic_r + ix2] = bc2 * (w1[ic_r + id2 + in] + w1[ic_i + id4 + in]);
963  w2[ic_i + ix2] = bc2 * (w1[ic_i + id2 + in] - w1[ic_r + id4 + in]);
964  }
965  }
966  }
967  }
968  }
969 
970 
971 //====================================================================
972  void Fopr_Wilson_eo::mult_zp2_thread(const int itask,
973  double *v2, const double *vcp2, const int ieo)
974  {
975  const int Nvc2 = 2 * m_Nvc;
976  const int Nvcd = m_Nvc * m_Nd;
977  const int Nvcd2 = Nvcd / 2;
978 
979  const int id1 = 0;
980  const int id2 = m_Nvc;
981  const int id3 = m_Nvc * 2;
982  const int id4 = m_Nvc * 3;
983 
984  const int idir = 2;
985 
986  const int isite = m_arg[itask].isite;
987  const int isite_cp = m_arg[itask].isite_cp_z;
988 
989  const double *w1 = &vcp2[Nvcd2 * isite_cp];
990  double *w2 = &v2[Nvcd * isite];
991  const double *u = m_U->ptr(m_Ndf * (isite + ieo * m_Nvol / 2 + idir * m_Nvol));
992 
993 
994  if (m_arg[itask].kz1 == 1) {
995  const int Nxy = m_Nx2 * m_Ny;
996  const int iz = m_Mz - 1;
997 
998  for (int it = 0; it < m_Mt; ++it) {
999  for (int i_xy = 0; i_xy < Nxy; ++i_xy) {
1000  int is = i_xy + Nxy * (iz + m_Nz * it);
1001  int is2 = i_xy + Nxy * it;
1002  int iv = Nvcd * is;
1003  int ig = m_Ndf * is;
1004  int ix1 = Nvc2 * is2;
1005  int ix2 = ix1 + m_Nvc;
1006 
1007  for (int ic = 0; ic < m_Nc; ++ic) {
1008  int ic2 = ic * m_Nvc;
1009 
1010  double wt1r = mult_uv_r(&u[ic2 + ig], &w1[ix1], m_Nc);
1011  double wt1i = mult_uv_i(&u[ic2 + ig], &w1[ix1], m_Nc);
1012  double wt2r = mult_uv_r(&u[ic2 + ig], &w1[ix2], m_Nc);
1013  double wt2i = mult_uv_i(&u[ic2 + ig], &w1[ix2], m_Nc);
1014 
1015  int ic_r = 2 * ic;
1016  int ic_i = 2 * ic + 1;
1017 
1018  w2[ic_r + id1 + iv] += wt1r;
1019  w2[ic_i + id1 + iv] += wt1i;
1020  w2[ic_r + id2 + iv] += wt2r;
1021  w2[ic_i + id2 + iv] += wt2i;
1022 
1023  w2[ic_r + id3 + iv] += wt1i;
1024  w2[ic_i + id3 + iv] -= wt1r;
1025  w2[ic_r + id4 + iv] -= wt2i;
1026  w2[ic_i + id4 + iv] += wt2r;
1027  }
1028  }
1029  }
1030  }
1031  }
1032 
1033 
1034 //====================================================================
1035  void Fopr_Wilson_eo::mult_zpb_thread(const int itask,
1036  double *v2, const double *v1, const int ieo)
1037  {
1038  const int Nvcd = m_Nvc * m_Nd;
1039  const int Nxy = m_Nx2 * m_Ny;
1040 
1041  const int id1 = 0;
1042  const int id2 = m_Nvc;
1043  const int id3 = m_Nvc * 2;
1044  const int id4 = m_Nvc * 3;
1045 
1046  const int idir = 2;
1047 
1048  const int isite = m_arg[itask].isite;
1049  const int kz1 = m_arg[itask].kz1;
1050 
1051  const double *w1 = &v1[Nvcd * isite];
1052  double *w2 = &v2[Nvcd * isite];
1053  const double *u = m_U->ptr(m_Ndf * (isite + ieo * m_Nvol / 2 + idir * m_Nvol));
1054 
1055 
1056  for (int it = 0; it < m_Mt; ++it) {
1057  for (int iz = 0; iz < m_Mz - kz1; ++iz) {
1058  for (int i_xy = 0; i_xy < Nxy; ++i_xy) {
1059  int is = i_xy + Nxy * (iz + m_Nz * it);
1060  int iv = Nvcd * is;
1061  int in = Nvcd * (is + Nxy);
1062  int ig = m_Ndf * is;
1063 
1064  double vt1[m_Nvc], vt2[m_Nvc];
1065 
1066  for (int ic = 0; ic < m_Nc; ++ic) {
1067  int ic_r = 2 * ic;
1068  int ic_i = 2 * ic + 1;
1069 
1070  vt1[ic_r] = w1[ic_r + id1 + in] - w1[ic_i + id3 + in];
1071  vt1[ic_i] = w1[ic_i + id1 + in] + w1[ic_r + id3 + in];
1072  vt2[ic_r] = w1[ic_r + id2 + in] + w1[ic_i + id4 + in];
1073  vt2[ic_i] = w1[ic_i + id2 + in] - w1[ic_r + id4 + in];
1074  }
1075 
1076  for (int ic = 0; ic < m_Nc; ++ic) {
1077  int ic2 = ic * m_Nvc;
1078 
1079  double wt1r = mult_uv_r(&u[ic2 + ig], vt1, m_Nc);
1080  double wt1i = mult_uv_i(&u[ic2 + ig], vt1, m_Nc);
1081  double wt2r = mult_uv_r(&u[ic2 + ig], vt2, m_Nc);
1082  double wt2i = mult_uv_i(&u[ic2 + ig], vt2, m_Nc);
1083 
1084  int ic_r = 2 * ic;
1085  int ic_i = 2 * ic + 1;
1086 
1087  w2[ic_r + id1 + iv] += wt1r;
1088  w2[ic_i + id1 + iv] += wt1i;
1089  w2[ic_r + id2 + iv] += wt2r;
1090  w2[ic_i + id2 + iv] += wt2i;
1091 
1092  w2[ic_r + id3 + iv] += wt1i;
1093  w2[ic_i + id3 + iv] -= wt1r;
1094  w2[ic_r + id4 + iv] -= wt2i;
1095  w2[ic_i + id4 + iv] += wt2r;
1096  }
1097  }
1098  }
1099  }
1100  }
1101 
1102 
1103 //====================================================================
1104  void Fopr_Wilson_eo::mult_zm1_thread(const int itask,
1105  double *vcp1, const double *v1, const int ieo)
1106  {
1107  const int Nvc2 = 2 * m_Nvc;
1108  const int Nvcd = m_Nvc * m_Nd;
1109  const int Nvcd2 = Nvcd / 2;
1110 
1111  const int id1 = 0;
1112  const int id2 = m_Nvc;
1113  const int id3 = m_Nvc * 2;
1114  const int id4 = m_Nvc * 3;
1115 
1116  const int idir = 2;
1117 
1118  const int isite = m_arg[itask].isite;
1119  const int isite_cp = m_arg[itask].isite_cp_z;
1120 
1121  const double *w1 = &v1[Nvcd * isite];
1122  double *w2 = &vcp1[Nvcd2 * isite_cp];
1123  const double *u = m_U->ptr(m_Ndf * (isite + (1 - ieo) * m_Nvol / 2 + idir * m_Nvol));
1124 
1125 
1126  if (m_arg[itask].kz1 == 1) {
1127  const int Nxy = m_Nx2 * m_Ny;
1128  const int iz = m_Mz - 1;
1129 
1130  for (int it = 0; it < m_Mt; ++it) {
1131  for (int i_xy = 0; i_xy < Nxy; ++i_xy) {
1132  int is = i_xy + Nxy * (iz + m_Nz * it);
1133  int is2 = i_xy + Nxy * it;
1134  int in = Nvcd * is;
1135  int ig = m_Ndf * is;
1136  int ix1 = Nvc2 * is2;
1137  int ix2 = ix1 + m_Nvc;
1138 
1139  double vt1[m_Nvc], vt2[m_Nvc];
1140 
1141  for (int ic = 0; ic < m_Nc; ++ic) {
1142  int ic_r = 2 * ic;
1143  int ic_i = 2 * ic + 1;
1144 
1145  vt1[ic_r] = w1[ic_r + id1 + in] + w1[ic_i + id3 + in];
1146  vt1[ic_i] = w1[ic_i + id1 + in] - w1[ic_r + id3 + in];
1147  vt2[ic_r] = w1[ic_r + id2 + in] - w1[ic_i + id4 + in];
1148  vt2[ic_i] = w1[ic_i + id2 + in] + w1[ic_r + id4 + in];
1149  }
1150 
1151  for (int ic = 0; ic < m_Nc; ++ic) {
1152  int ic2 = 2 * ic;
1153 
1154  int ic_r = 2 * ic;
1155  int ic_i = 2 * ic + 1;
1156 
1157  w2[ic_r + ix1] = mult_udagv_r(&u[ic2 + ig], vt1, m_Nc);
1158  w2[ic_i + ix1] = mult_udagv_i(&u[ic2 + ig], vt1, m_Nc);
1159  w2[ic_r + ix2] = mult_udagv_r(&u[ic2 + ig], vt2, m_Nc);
1160  w2[ic_i + ix2] = mult_udagv_i(&u[ic2 + ig], vt2, m_Nc);
1161  }
1162  }
1163  }
1164  }
1165  }
1166 
1167 
1168 //====================================================================
1169  void Fopr_Wilson_eo::mult_zm2_thread(const int itask,
1170  double *v2, const double *vcp2, const int ieo)
1171  {
1172  const int Nvc2 = 2 * m_Nvc;
1173  const int Nvcd = m_Nvc * m_Nd;
1174  const int Nvcd2 = Nvcd / 2;
1175 
1176  const int id1 = 0;
1177  const int id2 = m_Nvc;
1178  const int id3 = m_Nvc * 2;
1179  const int id4 = m_Nvc * 3;
1180 
1181  const int idir = 2;
1182  const double bc2 = m_boundary_each_node[idir];
1183 
1184  const int isite = m_arg[itask].isite;
1185  const int isite_cp = m_arg[itask].isite_cp_z;
1186 
1187  const double *w1 = &vcp2[Nvcd2 * isite_cp];
1188  double *w2 = &v2[Nvcd * isite];
1189 
1190 
1191  if (m_arg[itask].kz0 == 1) {
1192  const int Nxy = m_Nx2 * m_Ny;
1193  const int iz = 0;
1194 
1195  for (int it = 0; it < m_Mt; ++it) {
1196  for (int i_xy = 0; i_xy < Nxy; ++i_xy) {
1197  int is = i_xy + Nxy * (iz + m_Nz * it);
1198  int is2 = i_xy + Nxy * it;
1199  int iv = Nvcd * is;
1200  int ix1 = Nvc2 * is2;
1201  int ix2 = ix1 + m_Nvc;
1202 
1203  for (int ic = 0; ic < m_Nc; ++ic) {
1204  int ic_r = 2 * ic;
1205  int ic_i = 2 * ic + 1;
1206 
1207  w2[ic_r + id1 + iv] += bc2 * w1[ic_r + ix1];
1208  w2[ic_i + id1 + iv] += bc2 * w1[ic_i + ix1];
1209  w2[ic_r + id2 + iv] += bc2 * w1[ic_r + ix2];
1210  w2[ic_i + id2 + iv] += bc2 * w1[ic_i + ix2];
1211 
1212  w2[ic_r + id3 + iv] -= bc2 * w1[ic_i + ix1];
1213  w2[ic_i + id3 + iv] += bc2 * w1[ic_r + ix1];
1214  w2[ic_r + id4 + iv] += bc2 * w1[ic_i + ix2];
1215  w2[ic_i + id4 + iv] -= bc2 * w1[ic_r + ix2];
1216  }
1217  }
1218  }
1219  }
1220  }
1221 
1222 
1223 //====================================================================
1224  void Fopr_Wilson_eo::mult_zmb_thread(const int itask,
1225  double *v2, const double *v1, const int ieo)
1226  {
1227  const int Nvcd = m_Nvc * m_Nd;
1228  const int Nxy = m_Nx2 * m_Ny;
1229 
1230  const int id1 = 0;
1231  const int id2 = m_Nvc;
1232  const int id3 = m_Nvc * 2;
1233  const int id4 = m_Nvc * 3;
1234 
1235  const int idir = 2;
1236 
1237  const int isite = m_arg[itask].isite;
1238  const int kz0 = m_arg[itask].kz0;
1239 
1240  const double *w1 = &v1[Nvcd * isite];
1241  double *w2 = &v2[Nvcd * isite];
1242  const double *u = m_U->ptr(m_Ndf * (isite + (1 - ieo) * m_Nvol / 2 + idir * m_Nvol));
1243 
1244 
1245  for (int it = 0; it < m_Mt; ++it) {
1246  for (int iz = kz0; iz < m_Mz; ++iz) {
1247  for (int i_xy = 0; i_xy < Nxy; ++i_xy) {
1248  int is = i_xy + Nxy * (iz + m_Nz * it);
1249  int iv = Nvcd * is;
1250  int in = Nvcd * (is - Nxy);
1251  int ig = m_Ndf * (is - Nxy);
1252 
1253  double vt1[m_Nvc], vt2[m_Nvc];
1254 
1255  for (int ic = 0; ic < m_Nc; ++ic) {
1256  int ic_r = 2 * ic;
1257  int ic_i = 2 * ic + 1;
1258 
1259  vt1[ic_r] = w1[ic_r + id1 + in] + w1[ic_i + id3 + in];
1260  vt1[ic_i] = w1[ic_i + id1 + in] - w1[ic_r + id3 + in];
1261  vt2[ic_r] = w1[ic_r + id2 + in] - w1[ic_i + id4 + in];
1262  vt2[ic_i] = w1[ic_i + id2 + in] + w1[ic_r + id4 + in];
1263  }
1264 
1265  for (int ic = 0; ic < m_Nc; ++ic) {
1266  int ic2 = 2 * ic;
1267 
1268  double wt1r = mult_udagv_r(&u[ic2 + ig], vt1, m_Nc);
1269  double wt1i = mult_udagv_i(&u[ic2 + ig], vt1, m_Nc);
1270  double wt2r = mult_udagv_r(&u[ic2 + ig], vt2, m_Nc);
1271  double wt2i = mult_udagv_i(&u[ic2 + ig], vt2, m_Nc);
1272 
1273  int ic_r = 2 * ic;
1274  int ic_i = 2 * ic + 1;
1275 
1276  w2[ic_r + id1 + iv] += wt1r;
1277  w2[ic_i + id1 + iv] += wt1i;
1278  w2[ic_r + id2 + iv] += wt2r;
1279  w2[ic_i + id2 + iv] += wt2i;
1280 
1281  w2[ic_r + id3 + iv] -= wt1i;
1282  w2[ic_i + id3 + iv] += wt1r;
1283  w2[ic_r + id4 + iv] += wt2i;
1284  w2[ic_i + id4 + iv] -= wt2r;
1285  }
1286  }
1287  }
1288  }
1289  }
1290 
1291 
1292 //====================================================================
1294  double *vcp1, const double *v1, const int ieo)
1295  {
1296  const int Nvc2 = 2 * m_Nvc;
1297  const int Nvcd = m_Nvc * m_Nd;
1298  const int Nvcd2 = Nvcd / 2;
1299 
1300  const int id3 = m_Nvc * 2;
1301  const int id4 = m_Nvc * 3;
1302 
1303  const int idir = 3;
1304  const double bc2 = m_boundary_each_node[idir];
1305 
1306  const int isite = m_arg[itask].isite;
1307  const int isite_cp = m_arg[itask].isite_cp_t;
1308 
1309  const double *w1 = &v1[Nvcd * isite];
1310  double *w2 = &vcp1[Nvcd2 * isite_cp];
1311 
1312 
1313  if (m_arg[itask].kt0 == 1) {
1314  const int Nxy = m_Nx2 * m_Ny;
1315  const int it = 0;
1316 
1317  for (int iz = 0; iz < m_Mz; ++iz) {
1318  for (int i_xy = 0; i_xy < Nxy; ++i_xy) {
1319  int is = i_xy + Nxy * (iz + m_Nz * it);
1320  int is2 = i_xy + Nxy * iz;
1321  int in = Nvcd * is;
1322  int ix1 = Nvc2 * is2;
1323  int ix2 = ix1 + m_Nvc;
1324 
1325  for (int ic = 0; ic < m_Nc; ++ic) {
1326  int ic_r = 2 * ic;
1327  int ic_i = 2 * ic + 1;
1328 
1329  w2[ic_r + ix1] = 2.0 * bc2 * w1[ic_r + id3 + in];
1330  w2[ic_i + ix1] = 2.0 * bc2 * w1[ic_i + id3 + in];
1331  w2[ic_r + ix2] = 2.0 * bc2 * w1[ic_r + id4 + in];
1332  w2[ic_i + ix2] = 2.0 * bc2 * w1[ic_i + id4 + in];
1333  }
1334  }
1335  }
1336  }
1337  }
1338 
1339 
1340 //====================================================================
1342  double *v2, const double *vcp2, const int ieo)
1343  {
1344  const int Nvc2 = 2 * m_Nvc;
1345  const int Nvcd = m_Nvc * m_Nd;
1346  const int Nvcd2 = Nvcd / 2;
1347 
1348  const int id3 = m_Nvc * 2;
1349  const int id4 = m_Nvc * 3;
1350 
1351  const int idir = 3;
1352 
1353  const int isite = m_arg[itask].isite;
1354  const int isite_cp = m_arg[itask].isite_cp_t;
1355 
1356  const double *w1 = &vcp2[Nvcd2 * isite_cp];
1357  double *w2 = &v2[Nvcd * isite];
1358  const double *u = m_U->ptr(m_Ndf * (isite + ieo * m_Nvol / 2 + idir * m_Nvol));
1359 
1360 
1361  if (m_arg[itask].kt1 == 1) {
1362  const int Nxy = m_Nx2 * m_Ny;
1363  const int it = m_Mt - 1;
1364 
1365  for (int iz = 0; iz < m_Mz; ++iz) {
1366  for (int i_xy = 0; i_xy < Nxy; ++i_xy) {
1367  int is = i_xy + Nxy * (iz + m_Nz * it);
1368  int is2 = i_xy + Nxy * iz;
1369  int iv = Nvcd * is;
1370  int ig = m_Ndf * is;
1371  int ix1 = Nvc2 * is2;
1372  int ix2 = ix1 + m_Nvc;
1373 
1374  for (int ic = 0; ic < m_Nc; ++ic) {
1375  int ic2 = ic * m_Nvc;
1376 
1377  int ic_r = 2 * ic;
1378  int ic_i = 2 * ic + 1;
1379 
1380  w2[ic_r + id3 + iv] += mult_uv_r(&u[ic2 + ig], &w1[ix1], m_Nc);
1381  w2[ic_i + id3 + iv] += mult_uv_i(&u[ic2 + ig], &w1[ix1], m_Nc);
1382  w2[ic_r + id4 + iv] += mult_uv_r(&u[ic2 + ig], &w1[ix2], m_Nc);
1383  w2[ic_i + id4 + iv] += mult_uv_i(&u[ic2 + ig], &w1[ix2], m_Nc);
1384  }
1385  }
1386  }
1387  }
1388  }
1389 
1390 
1391 //====================================================================
1393  double *v2, const double *v1, const int ieo)
1394  {
1395  const int Nvcd = m_Nvc * m_Nd;
1396  const int Nxy = m_Nx2 * m_Ny;
1397  const int Nxyz = Nxy * m_Nz;
1398 
1399  const int id3 = m_Nvc * 2;
1400  const int id4 = m_Nvc * 3;
1401 
1402  const int idir = 3;
1403 
1404  const int isite = m_arg[itask].isite;
1405  const int kt1 = m_arg[itask].kt1;
1406 
1407  const double *w1 = &v1[Nvcd * isite];
1408  double *w2 = &v2[Nvcd * isite];
1409  const double *u = m_U->ptr(m_Ndf * (isite + ieo * m_Nvol / 2 + idir * m_Nvol));
1410 
1411 
1412  for (int it = 0; it < m_Mt - kt1; ++it) {
1413  for (int iz = 0; iz < m_Mz; ++iz) {
1414  for (int i_xy = 0; i_xy < Nxy; ++i_xy) {
1415  int is = i_xy + Nxy * (iz + m_Nz * it);
1416  int iv = Nvcd * is;
1417  int in = Nvcd * (is + Nxyz);
1418  int ig = m_Ndf * is;
1419 
1420  double vt1[m_Nvc], vt2[m_Nvc];
1421 
1422  for (int ic = 0; ic < m_Nc; ++ic) {
1423  int ic_r = 2 * ic;
1424  int ic_i = 2 * ic + 1;
1425 
1426  vt1[ic_r] = 2.0 * w1[ic_r + id3 + in];
1427  vt1[ic_i] = 2.0 * w1[ic_i + id3 + in];
1428  vt2[ic_r] = 2.0 * w1[ic_r + id4 + in];
1429  vt2[ic_i] = 2.0 * w1[ic_i + id4 + in];
1430  }
1431 
1432  for (int ic = 0; ic < m_Nc; ++ic) {
1433  int ic2 = ic * m_Nvc;
1434 
1435  int ic_r = 2 * ic;
1436  int ic_i = 2 * ic + 1;
1437 
1438  w2[ic_r + id3 + iv] += mult_uv_r(&u[ic2 + ig], vt1, m_Nc);
1439  w2[ic_i + id3 + iv] += mult_uv_i(&u[ic2 + ig], vt1, m_Nc);
1440  w2[ic_r + id4 + iv] += mult_uv_r(&u[ic2 + ig], vt2, m_Nc);
1441  w2[ic_i + id4 + iv] += mult_uv_i(&u[ic2 + ig], vt2, m_Nc);
1442  }
1443  }
1444  }
1445  }
1446  }
1447 
1448 
1449 //====================================================================
1451  double *vcp1, const double *v1, const int ieo)
1452  {
1453  const int Nvc2 = 2 * m_Nvc;
1454  const int Nvcd = m_Nvc * m_Nd;
1455  const int Nvcd2 = Nvcd / 2;
1456 
1457  const int id1 = 0;
1458  const int id2 = m_Nvc;
1459 
1460  const int idir = 3;
1461 
1462  const int isite = m_arg[itask].isite;
1463  const int isite_cp = m_arg[itask].isite_cp_t;
1464 
1465  const double *w1 = &v1[Nvcd * isite];
1466  double *w2 = &vcp1[Nvcd2 * isite_cp];
1467  const double *u = m_U->ptr(m_Ndf * (isite + (1 - ieo) * m_Nvol / 2 + idir * m_Nvol));
1468 
1469 
1470  if (m_arg[itask].kt1 == 1) {
1471  const int Nxy = m_Nx2 * m_Ny;
1472  const int it = m_Mt - 1;
1473 
1474  for (int iz = 0; iz < m_Mz; ++iz) {
1475  for (int i_xy = 0; i_xy < Nxy; ++i_xy) {
1476  int is = i_xy + Nxy * (iz + m_Nz * it);
1477  int is2 = i_xy + Nxy * iz;
1478  int in = Nvcd * is;
1479  int ig = m_Ndf * is;
1480  int ix1 = Nvc2 * is2;
1481  int ix2 = ix1 + m_Nvc;
1482 
1483  double vt1[m_Nvc], vt2[m_Nvc];
1484 
1485  for (int ic = 0; ic < m_Nc; ++ic) {
1486  int ic_r = 2 * ic;
1487  int ic_i = 2 * ic + 1;
1488 
1489  vt1[ic_r] = 2.0 * w1[ic_r + id1 + in];
1490  vt1[ic_i] = 2.0 * w1[ic_i + id1 + in];
1491  vt2[ic_r] = 2.0 * w1[ic_r + id2 + in];
1492  vt2[ic_i] = 2.0 * w1[ic_i + id2 + in];
1493  }
1494 
1495  for (int ic = 0; ic < m_Nc; ++ic) {
1496  int ic2 = 2 * ic;
1497 
1498  int ic_r = 2 * ic;
1499  int ic_i = 2 * ic + 1;
1500 
1501  w2[ic_r + ix1] = mult_udagv_r(&u[ic2 + ig], vt1, m_Nc);
1502  w2[ic_i + ix1] = mult_udagv_i(&u[ic2 + ig], vt1, m_Nc);
1503  w2[ic_r + ix2] = mult_udagv_r(&u[ic2 + ig], vt2, m_Nc);
1504  w2[ic_i + ix2] = mult_udagv_i(&u[ic2 + ig], vt2, m_Nc);
1505  }
1506  }
1507  }
1508  }
1509  }
1510 
1511 
1512 //====================================================================
1514  double *v2, const double *vcp2, const int ieo)
1515  {
1516  const int Nvc2 = 2 * m_Nvc;
1517  const int Nvcd = m_Nvc * m_Nd;
1518  const int Nvcd2 = Nvcd / 2;
1519 
1520  const int id1 = 0;
1521  const int id2 = m_Nvc;
1522 
1523  const int idir = 3;
1524  const double bc2 = m_boundary_each_node[idir];
1525 
1526  const int isite = m_arg[itask].isite;
1527  const int isite_cp = m_arg[itask].isite_cp_t;
1528 
1529  const double *w1 = &vcp2[Nvcd2 * isite_cp];
1530  double *w2 = &v2[Nvcd * isite];
1531 
1532 
1533  if (m_arg[itask].kt0 == 1) {
1534  const int Nxy = m_Nx2 * m_Ny;
1535  const int it = 0;
1536 
1537  for (int iz = 0; iz < m_Mz; ++iz) {
1538  for (int i_xy = 0; i_xy < Nxy; ++i_xy) {
1539  int is = i_xy + Nxy * (iz + m_Nz * it);
1540  int is2 = i_xy + Nxy * iz;
1541  int iv = Nvcd * is;
1542  int ix1 = Nvc2 * is2;
1543  int ix2 = ix1 + m_Nvc;
1544 
1545  for (int ic = 0; ic < m_Nc; ++ic) {
1546  int ic_r = 2 * ic;
1547  int ic_i = 2 * ic + 1;
1548 
1549  w2[ic_r + id1 + iv] += bc2 * w1[ic_r + ix1];
1550  w2[ic_i + id1 + iv] += bc2 * w1[ic_i + ix1];
1551  w2[ic_r + id2 + iv] += bc2 * w1[ic_r + ix2];
1552  w2[ic_i + id2 + iv] += bc2 * w1[ic_i + ix2];
1553  }
1554  }
1555  }
1556  }
1557  }
1558 
1559 
1560 //====================================================================
1562  double *v2, const double *v1, const int ieo)
1563  {
1564  const int Nvcd = m_Nvc * m_Nd;
1565  const int Nxy = m_Nx2 * m_Ny;
1566  const int Nxyz = Nxy * m_Nz;
1567 
1568  const int id1 = 0;
1569  const int id2 = m_Nvc;
1570 
1571  const int idir = 3;
1572 
1573  const int isite = m_arg[itask].isite;
1574  const int kt0 = m_arg[itask].kt0;
1575 
1576  const double *w1 = &v1[Nvcd * isite];
1577  double *w2 = &v2[Nvcd * isite];
1578  const double *u = m_U->ptr(m_Ndf * (isite + (1 - ieo) * m_Nvol / 2 + idir * m_Nvol));
1579 
1580 
1581  for (int it = kt0; it < m_Mt; ++it) {
1582  for (int iz = 0; iz < m_Mz; ++iz) {
1583  for (int i_xy = 0; i_xy < Nxy; ++i_xy) {
1584  int is = i_xy + Nxy * (iz + m_Nz * it);
1585  int iv = Nvcd * is;
1586  int in = Nvcd * (is - Nxyz);
1587  int ig = m_Ndf * (is - Nxyz);
1588 
1589  double vt1[m_Nvc], vt2[m_Nvc];
1590 
1591  for (int ic = 0; ic < m_Nc; ++ic) {
1592  int ic_r = 2 * ic;
1593  int ic_i = 2 * ic + 1;
1594 
1595  vt1[ic_r] = 2.0 * w1[ic_r + id1 + in];
1596  vt1[ic_i] = 2.0 * w1[ic_i + id1 + in];
1597  vt2[ic_r] = 2.0 * w1[ic_r + id2 + in];
1598  vt2[ic_i] = 2.0 * w1[ic_i + id2 + in];
1599  }
1600 
1601  for (int ic = 0; ic < m_Nc; ++ic) {
1602  int ic2 = 2 * ic;
1603 
1604  int ic_r = 2 * ic;
1605  int ic_i = 2 * ic + 1;
1606 
1607  w2[ic_r + id1 + iv] += mult_udagv_r(&u[ic2 + ig], vt1, m_Nc);
1608  w2[ic_i + id1 + iv] += mult_udagv_i(&u[ic2 + ig], vt1, m_Nc);
1609  w2[ic_r + id2 + iv] += mult_udagv_r(&u[ic2 + ig], vt2, m_Nc);
1610  w2[ic_i + id2 + iv] += mult_udagv_i(&u[ic2 + ig], vt2, m_Nc);
1611  }
1612  }
1613  }
1614  }
1615  }
1616 
1617 
1618 //====================================================================
1620  double *vcp1, const double *v1, const int ieo)
1621  {
1622  const int Nvc2 = 2 * m_Nvc;
1623  const int Nvcd = m_Nvc * m_Nd;
1624  const int Nvcd2 = Nvcd / 2;
1625 
1626  const int id1 = 0;
1627  const int id2 = m_Nvc;
1628  const int id3 = m_Nvc * 2;
1629  const int id4 = m_Nvc * 3;
1630 
1631  const int idir = 3;
1632  const double bc2 = m_boundary_each_node[idir];
1633 
1634  const int isite = m_arg[itask].isite;
1635  const int isite_cp = m_arg[itask].isite_cp_t;
1636 
1637  const double *w1 = &v1[Nvcd * isite];
1638  double *w2 = &vcp1[Nvcd2 * isite_cp];
1639 
1640 
1641  if (m_arg[itask].kt0 == 1) {
1642  const int Nxy = m_Nx2 * m_Ny;
1643  const int it = 0;
1644 
1645  for (int iz = 0; iz < m_Mz; ++iz) {
1646  for (int i_xy = 0; i_xy < Nxy; ++i_xy) {
1647  int is = i_xy + Nxy * (iz + m_Nz * it);
1648  int is2 = i_xy + Nxy * iz;
1649  int in = Nvcd * is;
1650  int ix1 = Nvc2 * is2;
1651  int ix2 = ix1 + m_Nvc;
1652 
1653  for (int ic = 0; ic < m_Nc; ++ic) {
1654  int ic_r = 2 * ic;
1655  int ic_i = 2 * ic + 1;
1656 
1657  w2[ic_r + ix1] = bc2 * (w1[ic_r + id1 + in] + w1[ic_r + id3 + in]);
1658  w2[ic_i + ix1] = bc2 * (w1[ic_i + id1 + in] + w1[ic_i + id3 + in]);
1659  w2[ic_r + ix2] = bc2 * (w1[ic_r + id2 + in] + w1[ic_r + id4 + in]);
1660  w2[ic_i + ix2] = bc2 * (w1[ic_i + id2 + in] + w1[ic_i + id4 + in]);
1661  }
1662  }
1663  }
1664  }
1665  }
1666 
1667 
1668 //====================================================================
1670  double *v2, const double *vcp2, const int ieo)
1671  {
1672  const int Nvc2 = 2 * m_Nvc;
1673  const int Nvcd = m_Nvc * m_Nd;
1674  const int Nvcd2 = Nvcd / 2;
1675 
1676  const int id1 = 0;
1677  const int id2 = m_Nvc;
1678  const int id3 = m_Nvc * 2;
1679  const int id4 = m_Nvc * 3;
1680 
1681  const int idir = 3;
1682 
1683  const int isite = m_arg[itask].isite;
1684  const int isite_cp = m_arg[itask].isite_cp_t;
1685 
1686  const double *w1 = &vcp2[Nvcd2 * isite_cp];
1687  double *w2 = &v2[Nvcd * isite];
1688  const double *u = m_U->ptr(m_Ndf * (isite + ieo * m_Nvol / 2 + idir * m_Nvol));
1689 
1690 
1691  if (m_arg[itask].kt1 == 1) {
1692  const int Nxy = m_Nx2 * m_Ny;
1693  const int it = m_Mt - 1;
1694 
1695  for (int iz = 0; iz < m_Mz; ++iz) {
1696  for (int i_xy = 0; i_xy < Nxy; ++i_xy) {
1697  int is = i_xy + Nxy * (iz + m_Nz * it);
1698  int is2 = i_xy + Nxy * iz;
1699  int iv = Nvcd * is;
1700  int ig = m_Ndf * is;
1701  int ix1 = Nvc2 * is2;
1702  int ix2 = ix1 + m_Nvc;
1703 
1704  for (int ic = 0; ic < m_Nc; ++ic) {
1705  int ic2 = ic * m_Nvc;
1706 
1707  double wt1r = mult_uv_r(&u[ic2 + ig], &w1[ix1], m_Nc);
1708  double wt1i = mult_uv_i(&u[ic2 + ig], &w1[ix1], m_Nc);
1709  double wt2r = mult_uv_r(&u[ic2 + ig], &w1[ix2], m_Nc);
1710  double wt2i = mult_uv_i(&u[ic2 + ig], &w1[ix2], m_Nc);
1711 
1712  int ic_r = 2 * ic;
1713  int ic_i = 2 * ic + 1;
1714 
1715  w2[ic_r + id1 + iv] += wt1r;
1716  w2[ic_i + id1 + iv] += wt1i;
1717  w2[ic_r + id2 + iv] += wt2r;
1718  w2[ic_i + id2 + iv] += wt2i;
1719 
1720  w2[ic_r + id3 + iv] += wt1r;
1721  w2[ic_i + id3 + iv] += wt1i;
1722  w2[ic_r + id4 + iv] += wt2r;
1723  w2[ic_i + id4 + iv] += wt2i;
1724  }
1725  }
1726  }
1727  }
1728  }
1729 
1730 
1731 //====================================================================
1733  double *v2, const double *v1, const int ieo)
1734  {
1735  const int Nvcd = m_Nvc * m_Nd;
1736  const int Nxy = m_Nx2 * m_Ny;
1737  const int Nxyz = Nxy * m_Nz;
1738 
1739  const int id1 = 0;
1740  const int id2 = m_Nvc;
1741  const int id3 = m_Nvc * 2;
1742  const int id4 = m_Nvc * 3;
1743 
1744  const int idir = 3;
1745 
1746  const int isite = m_arg[itask].isite;
1747  const int kt1 = m_arg[itask].kt1;
1748 
1749  const double *w1 = &v1[Nvcd * isite];
1750  double *w2 = &v2[Nvcd * isite];
1751  const double *u = m_U->ptr(m_Ndf * (isite + ieo * m_Nvol / 2 + idir * m_Nvol));
1752 
1753 
1754  for (int it = 0; it < m_Mt - kt1; ++it) {
1755  for (int iz = 0; iz < m_Mz; ++iz) {
1756  for (int i_xy = 0; i_xy < Nxy; ++i_xy) {
1757  int is = i_xy + Nxy * (iz + m_Nz * it);
1758  int iv = Nvcd * is;
1759  int in = Nvcd * (is + Nxyz);
1760  int ig = m_Ndf * is;
1761 
1762  double vt1[m_Nvc], vt2[m_Nvc];
1763 
1764  for (int ic = 0; ic < m_Nc; ++ic) {
1765  int ic_r = 2 * ic;
1766  int ic_i = 2 * ic + 1;
1767 
1768  vt1[ic_r] = w1[ic_r + id1 + in] + w1[ic_r + id3 + in];
1769  vt1[ic_i] = w1[ic_i + id1 + in] + w1[ic_i + id3 + in];
1770  vt2[ic_r] = w1[ic_r + id2 + in] + w1[ic_r + id4 + in];
1771  vt2[ic_i] = w1[ic_i + id2 + in] + w1[ic_i + id4 + in];
1772  }
1773 
1774  for (int ic = 0; ic < m_Nc; ++ic) {
1775  int ic2 = ic * m_Nvc;
1776 
1777  double wt1r = mult_uv_r(&u[ic2 + ig], vt1, m_Nc);
1778  double wt1i = mult_uv_i(&u[ic2 + ig], vt1, m_Nc);
1779  double wt2r = mult_uv_r(&u[ic2 + ig], vt2, m_Nc);
1780  double wt2i = mult_uv_i(&u[ic2 + ig], vt2, m_Nc);
1781 
1782  int ic_r = 2 * ic;
1783  int ic_i = 2 * ic + 1;
1784 
1785  w2[ic_r + id1 + iv] += wt1r;
1786  w2[ic_i + id1 + iv] += wt1i;
1787  w2[ic_r + id2 + iv] += wt2r;
1788  w2[ic_i + id2 + iv] += wt2i;
1789 
1790  w2[ic_r + id3 + iv] += wt1r;
1791  w2[ic_i + id3 + iv] += wt1i;
1792  w2[ic_r + id4 + iv] += wt2r;
1793  w2[ic_i + id4 + iv] += wt2i;
1794  }
1795  }
1796  }
1797  }
1798  }
1799 
1800 
1801 //====================================================================
1803  double *vcp1, const double *v1, const int ieo)
1804  {
1805  const int Nvc2 = 2 * m_Nvc;
1806  const int Nvcd = m_Nvc * m_Nd;
1807  const int Nvcd2 = Nvcd / 2;
1808 
1809  const int id1 = 0;
1810  const int id2 = m_Nvc;
1811  const int id3 = m_Nvc * 2;
1812  const int id4 = m_Nvc * 3;
1813 
1814  const int idir = 3;
1815 
1816  const int isite = m_arg[itask].isite;
1817  const int isite_cp = m_arg[itask].isite_cp_t;
1818 
1819  const double *w1 = &v1[Nvcd * isite];
1820  double *w2 = &vcp1[Nvcd2 * isite_cp];
1821  const double *u = m_U->ptr(m_Ndf * (isite + (1 - ieo) * m_Nvol / 2 + idir * m_Nvol));
1822 
1823 
1824  if (m_arg[itask].kt1 == 1) {
1825  const int Nxy = m_Nx2 * m_Ny;
1826  const int it = m_Mt - 1;
1827 
1828  for (int iz = 0; iz < m_Mz; ++iz) {
1829  for (int i_xy = 0; i_xy < Nxy; ++i_xy) {
1830  int is = i_xy + Nxy * (iz + m_Nz * it);
1831  int is2 = i_xy + Nxy * iz;
1832  int in = Nvcd * is;
1833  int ig = m_Ndf * is;
1834  int ix1 = Nvc2 * is2;
1835  int ix2 = ix1 + m_Nvc;
1836 
1837  double vt1[m_Nvc], vt2[m_Nvc];
1838 
1839  for (int ic = 0; ic < m_Nc; ++ic) {
1840  int ic_r = 2 * ic;
1841  int ic_i = 2 * ic + 1;
1842 
1843  vt1[ic_r] = w1[ic_r + id1 + in] - w1[ic_r + id3 + in];
1844  vt1[ic_i] = w1[ic_i + id1 + in] - w1[ic_i + id3 + in];
1845  vt2[ic_r] = w1[ic_r + id2 + in] - w1[ic_r + id4 + in];
1846  vt2[ic_i] = w1[ic_i + id2 + in] - w1[ic_i + id4 + in];
1847  }
1848 
1849  for (int ic = 0; ic < m_Nc; ++ic) {
1850  int ic2 = 2 * ic;
1851 
1852  int ic_r = 2 * ic;
1853  int ic_i = 2 * ic + 1;
1854 
1855  w2[ic_r + ix1] = mult_udagv_r(&u[ic_r + ig], vt1, m_Nc);
1856  w2[ic_i + ix1] = mult_udagv_i(&u[ic_r + ig], vt1, m_Nc);
1857  w2[ic_r + ix2] = mult_udagv_r(&u[ic_r + ig], vt2, m_Nc);
1858  w2[ic_i + ix2] = mult_udagv_i(&u[ic_r + ig], vt2, m_Nc);
1859  }
1860  }
1861  }
1862  }
1863  }
1864 
1865 
1866 //====================================================================
1868  double *v2, const double *vcp2, const int ieo)
1869  {
1870  const int Nvc2 = 2 * m_Nvc;
1871  const int Nvcd = m_Nvc * m_Nd;
1872  const int Nvcd2 = Nvcd / 2;
1873 
1874  const int id1 = 0;
1875  const int id2 = m_Nvc;
1876  const int id3 = m_Nvc * 2;
1877  const int id4 = m_Nvc * 3;
1878 
1879  const int idir = 3;
1880  const double bc2 = m_boundary_each_node[idir];
1881 
1882  const int isite = m_arg[itask].isite;
1883  const int isite_cp = m_arg[itask].isite_cp_t;
1884 
1885  const double *w1 = &vcp2[Nvcd2 * isite_cp];
1886  double *w2 = &v2[Nvcd * isite];
1887 
1888 
1889  if (m_arg[itask].kt0 == 1) {
1890  const int Nxy = m_Nx2 * m_Ny;
1891  const int it = 0;
1892 
1893  for (int iz = 0; iz < m_Mz; ++iz) {
1894  for (int i_xy = 0; i_xy < Nxy; ++i_xy) {
1895  int is = i_xy + Nxy * (iz + m_Nz * it);
1896  int is2 = i_xy + Nxy * iz;
1897  int iv = Nvcd * is;
1898  int ix1 = Nvc2 * is2;
1899  int ix2 = ix1 + m_Nvc;
1900 
1901  for (int ic = 0; ic < m_Nc; ++ic) {
1902  int ic_r = 2 * ic;
1903  int ic_i = 2 * ic + 1;
1904 
1905  w2[ic_r + id1 + iv] += bc2 * w1[ic_r + ix1];
1906  w2[ic_i + id1 + iv] += bc2 * w1[ic_i + ix1];
1907  w2[ic_r + id2 + iv] += bc2 * w1[ic_r + ix2];
1908  w2[ic_i + id2 + iv] += bc2 * w1[ic_i + ix2];
1909 
1910  w2[ic_r + id3 + iv] -= bc2 * w1[ic_r + ix1];
1911  w2[ic_i + id3 + iv] -= bc2 * w1[ic_i + ix1];
1912  w2[ic_r + id4 + iv] -= bc2 * w1[ic_r + ix2];
1913  w2[ic_i + id4 + iv] -= bc2 * w1[ic_i + ix2];
1914  }
1915  }
1916  }
1917  }
1918  }
1919 
1920 
1921 //====================================================================
1923  double *v2, const double *v1, const int ieo)
1924  {
1925  const int Nvcd = m_Nvc * m_Nd;
1926  const int Nxy = m_Nx2 * m_Ny;
1927  const int Nxyz = Nxy * m_Nz;
1928 
1929  const int id1 = 0;
1930  const int id2 = m_Nvc;
1931  const int id3 = m_Nvc * 2;
1932  const int id4 = m_Nvc * 3;
1933 
1934  const int idir = 3;
1935 
1936  const int isite = m_arg[itask].isite;
1937  const int kt0 = m_arg[itask].kt0;
1938 
1939  const double *w1 = &v1[Nvcd * isite];
1940  double *w2 = &v2[Nvcd * isite];
1941  const double *u = m_U->ptr(m_Ndf * (isite + (1 - ieo) * m_Nvol / 2 + idir * m_Nvol));
1942 
1943 
1944  for (int it = kt0; it < m_Mt; ++it) {
1945  for (int iz = 0; iz < m_Mz; ++iz) {
1946  for (int i_xy = 0; i_xy < Nxy; ++i_xy) {
1947  int is = i_xy + Nxy * (iz + m_Nz * it);
1948  int iv = Nvcd * is;
1949  int in = Nvcd * (is - Nxyz);
1950  int ig = m_Ndf * (is - Nxyz);
1951 
1952  double vt1[m_Nvc], vt2[m_Nvc];
1953 
1954  for (int ic = 0; ic < m_Nc; ++ic) {
1955  int ic_r = 2 * ic;
1956  int ic_i = 2 * ic + 1;
1957 
1958  vt1[ic_r] = w1[ic_r + id1 + in] - w1[ic_r + id3 + in];
1959  vt1[ic_i] = w1[ic_i + id1 + in] - w1[ic_i + id3 + in];
1960  vt2[ic_r] = w1[ic_r + id2 + in] - w1[ic_r + id4 + in];
1961  vt2[ic_i] = w1[ic_i + id2 + in] - w1[ic_i + id4 + in];
1962  }
1963 
1964  for (int ic = 0; ic < m_Nc; ++ic) {
1965  int ic2 = 2 * ic;
1966 
1967  double wt1r = mult_udagv_r(&u[ic2 + ig], vt1, m_Nc);
1968  double wt1i = mult_udagv_i(&u[ic2 + ig], vt1, m_Nc);
1969  double wt2r = mult_udagv_r(&u[ic2 + ig], vt2, m_Nc);
1970  double wt2i = mult_udagv_i(&u[ic2 + ig], vt2, m_Nc);
1971 
1972  int ic_r = 2 * ic;
1973  int ic_i = 2 * ic + 1;
1974 
1975  w2[ic_r + id1 + iv] += wt1r;
1976  w2[ic_i + id1 + iv] += wt1i;
1977  w2[ic_r + id2 + iv] += wt2r;
1978  w2[ic_i + id2 + iv] += wt2i;
1979 
1980  w2[ic_r + id3 + iv] -= wt1r;
1981  w2[ic_i + id3 + iv] -= wt1i;
1982  w2[ic_r + id4 + iv] -= wt2r;
1983  w2[ic_i + id4 + iv] -= wt2i;
1984  }
1985  }
1986  }
1987  }
1988  }
1989 
1990 
1991 //====================================================================
1992  void Fopr_Wilson_eo::gm5_dirac_thread(const int itask,
1993  double *v2, const double *v1)
1994  {
1995  const int Nvcd = m_Nvc * m_Nd;
1996  const int Nxy = m_Nx2 * m_Ny;
1997 
1998  const int id1 = 0;
1999  const int id2 = m_Nvc;
2000  const int id3 = m_Nvc * 2;
2001  const int id4 = m_Nvc * 3;
2002 
2003  const int isite = m_arg[itask].isite;
2004 
2005  const double *w1 = &v1[Nvcd * isite];
2006  double *w2 = &v2[Nvcd * isite];
2007 
2008 
2009  for (int it = 0; it < m_Mt; ++it) {
2010  for (int iz = 0; iz < m_Mz; ++iz) {
2011  for (int i_xy = 0; i_xy < Nxy; ++i_xy) {
2012  int iv = Nvcd * (i_xy + Nxy * (iz + m_Nz * it));
2013 
2014  for (int ivc = 0; ivc < m_Nvc; ++ivc) {
2015  w2[ivc + id1 + iv] = w1[ivc + id3 + iv];
2016  w2[ivc + id2 + iv] = w1[ivc + id4 + iv];
2017 
2018  w2[ivc + id3 + iv] = w1[ivc + id1 + iv];
2019  w2[ivc + id4 + iv] = w1[ivc + id2 + iv];
2020  }
2021  }
2022  }
2023  }
2024  }
2025 
2026 
2027 //====================================================================
2029  double *v2, const double *v1)
2030  {
2031  const int Nvcd = m_Nvc * m_Nd;
2032  const int Nxy = m_Nx2 * m_Ny;
2033 
2034  const int id1 = 0;
2035  const int id2 = m_Nvc;
2036  const int id3 = m_Nvc * 2;
2037  const int id4 = m_Nvc * 3;
2038 
2039  const int isite = m_arg[itask].isite;
2040 
2041  const double *w1 = &v1[Nvcd * isite];
2042  double *w2 = &v2[Nvcd * isite];
2043 
2044 
2045  for (int it = 0; it < m_Mt; ++it) {
2046  for (int iz = 0; iz < m_Mz; ++iz) {
2047  for (int i_xy = 0; i_xy < Nxy; ++i_xy) {
2048  int iv = Nvcd * (i_xy + Nxy * (iz + m_Nz * it));
2049 
2050  for (int ivc = 0; ivc < m_Nvc; ++ivc) {
2051  w2[ivc + id1 + iv] = w1[ivc + id1 + iv];
2052  w2[ivc + id2 + iv] = w1[ivc + id2 + iv];
2053 
2054  w2[ivc + id3 + iv] = -w1[ivc + id3 + iv];
2055  w2[ivc + id4 + iv] = -w1[ivc + id4 + iv];
2056  }
2057  }
2058  }
2059  }
2060  }
2061 
2062 
2063 //====================================================================
2064  void Fopr_Wilson_eo::gm5_dirac_thread(const int itask,
2065  double *v1)
2066  {
2067  const int Nvcd = m_Nvc * m_Nd;
2068  const int Nxy = m_Nx2 * m_Ny;
2069 
2070  const int id1 = 0;
2071  const int id2 = m_Nvc;
2072  const int id3 = m_Nvc * 2;
2073  const int id4 = m_Nvc * 3;
2074 
2075  const int isite = m_arg[itask].isite;
2076 
2077  double *w1 = &v1[Nvcd * isite];
2078 
2079 
2080  for (int it = 0; it < m_Mt; ++it) {
2081  for (int iz = 0; iz < m_Mz; ++iz) {
2082  for (int i_xy = 0; i_xy < Nxy; ++i_xy) {
2083  int iv = Nvcd * (i_xy + Nxy * (iz + m_Nz * it));
2084  for (int ivc = 0; ivc < m_Nvc; ++ivc) {
2085  double wt1 = w1[ivc + id1 + iv];
2086  double wt2 = w1[ivc + id2 + iv];
2087 
2088  w1[ivc + id1 + iv] = w1[ivc + id3 + iv];
2089  w1[ivc + id2 + iv] = w1[ivc + id4 + iv];
2090 
2091  w1[ivc + id3 + iv] = wt1;
2092  w1[ivc + id4 + iv] = wt2;
2093  }
2094  }
2095  }
2096  }
2097  }
2098 
2099 
2100 //====================================================================
2102  double *v1)
2103  {
2104  const int Nvcd = m_Nvc * m_Nd;
2105  const int Nxy = m_Nx2 * m_Ny;
2106 
2107  const int id3 = m_Nvc * 2;
2108  const int id4 = m_Nvc * 3;
2109 
2110  const int isite = m_arg[itask].isite;
2111 
2112  double *w1 = &v1[Nvcd * isite];
2113 
2114 
2115  for (int it = 0; it < m_Mt; ++it) {
2116  for (int iz = 0; iz < m_Mz; ++iz) {
2117  for (int i_xy = 0; i_xy < Nxy; ++i_xy) {
2118  int iv = Nvcd * (i_xy + Nxy * (iz + m_Nz * it));
2119  for (int ivc = 0; ivc < m_Nvc; ++ivc) {
2120  w1[ivc + id3 + iv] = -w1[ivc + id3 + iv];
2121  w1[ivc + id4 + iv] = -w1[ivc + id4 + iv];
2122  }
2123  }
2124  }
2125  }
2126  }
2127 
2128 
2129 //====================================================================
2130 }
2131 //============================================================END=====
void mult_zp2_thread(const int, double *, const double *, const int)
void mult_tp1_dirac_thread(const int, double *, const double *, const int)
BridgeIO vout
Definition: bridgeIO.cpp:503
void mult_zpb_thread(const int, double *, const double *, const int)
void mult_ymb_thread(const int, double *, const double *, const int)
const double * ptr(const int jin, const int site, const int jex) const
Definition: field.h:153
void gm5_chiral_thread(const int, double *, const double *)
void mult_zm2_thread(const int, double *, const double *, const int)
void clear_thread(const int, double *)
void general(const char *format,...)
Definition: bridgeIO.cpp:197
void mult_xp2_thread(const int, double *, const double *, const int)
std::vector< double > m_boundary_each_node
b.c. for each node.
void mult_tmb_chiral_thread(const int, double *, const double *, const int)
void mult_zp1_thread(const int, double *, const double *, const int)
void mult_tp2_dirac_thread(const int, double *, const double *, const int)
std::vector< int > m_yzt_eo
void mult_xp1_thread(const int, double *, const double *, const int)
void mult_ym1_thread(const int, double *, const double *, const int)
void Meo(Field &, const Field &, const int ieo)
void gm5_dirac_thread(const int, double *, const double *)
void mult_xm1_thread(const int, double *, const double *, const int)
void mult_tpb_dirac_thread(const int, double *, const double *, const int)
void mult_tm2_dirac_thread(const int, double *, const double *, const int)
void mult_tp2_chiral_thread(const int, double *, const double *, const int)
void mult_yp1_thread(const int, double *, const double *, const int)
Bridge::VerboseLevel m_vl
Definition: fopr.h:127
void mult_zmb_thread(const int, double *, const double *, const int)
void mult_xm2_thread(const int, double *, const double *, const int)
Field_G * m_U
dummy: pointing m_Ueo.
void mult_tm1_dirac_thread(const int, double *, const double *, const int)
static int get_num_threads_available()
returns number of threads (works outside of parallel region).
void mult_tpb_chiral_thread(const int, double *, const double *, const int)
void mult_yp2_thread(const int, double *, const double *, const int)
void mult_tp1_chiral_thread(const int, double *, const double *, const int)
void crucial(const char *format,...)
Definition: bridgeIO.cpp:178
std::vector< mult_arg > m_arg
void mult_tm2_chiral_thread(const int, double *, const double *, const int)
void mult_tm1_chiral_thread(const int, double *, const double *, const int)
void mult_xmb_thread(const int, double *, const double *, const int)
void mult_xpb_thread(const int, double *, const double *, const int)
void scal_thread(const int, double *, const double)
void mult_ym2_thread(const int, double *, const double *, const int)
static const std::string class_name
void mult_tmb_dirac_thread(const int, double *, const double *, const int)
void mult_ypb_thread(const int, double *, const double *, const int)
void mult_zm1_thread(const int, double *, const double *, const int)