16 #if defined USE_GROUP_SU3
17 #include "fopr_Wilson_impl_SU3.inc"
18 #elif defined USE_GROUP_SU2
19 #include "fopr_Wilson_impl_SU2.inc"
20 #elif defined USE_GROUP_SU_N
21 #include "fopr_Wilson_impl_SU_N.inc"
44 vout.
crucial(
m_vl,
"%s: Nz = %d and Nt = %d do not match Nthread = %d\n",
54 vout.
crucial(
m_vl,
"%s: Mz = %d and Ntask_z = %d do not match Nz = %d\n",
60 vout.
crucial(
m_vl,
"%s: Mt = %d and Ntask_t = %d do not match Nt = %d\n",
90 for (
int ithread_t = 0; ithread_t <
m_Ntask_t; ++ithread_t) {
91 for (
int ithread_z = 0; ithread_z <
m_Ntask_z; ++ithread_z) {
92 int itask = ithread_z + m_Ntask_z * ithread_t;
100 if (ithread_t == 0)
m_arg[itask].kt0 = 1;
101 if (ithread_z == 0)
m_arg[itask].kz0 = 1;
102 if (ithread_t == m_Ntask_t - 1)
m_arg[itask].kt1 = 1;
103 if (ithread_z == m_Ntask_z - 1)
m_arg[itask].kz1 = 1;
107 m_arg[itask].isite_cp_z = ithread_t *
m_Mt * Nxy;
108 m_arg[itask].isite_cp_t = ithread_z *
m_Mz * Nxy;
116 int itask,
double *v2,
double fac,
const double *v1)
118 int Nvcd = m_Nvc * m_Nd;
119 int Nvxy = Nvcd * m_Nx * m_Ny;
121 int isite = m_arg[itask].isite;
123 const double *w1 = &v1[Nvcd * isite];
124 double *w2 = &v2[Nvcd * isite];
126 for (
int it = 0; it < m_Mt; ++it) {
127 for (
int iz = 0; iz < m_Mz; ++iz) {
128 for (
int ivxy = 0; ivxy < Nvxy; ++ivxy) {
129 int iv = ivxy + Nvxy * (iz + m_Nz * it);
130 w2[iv] += fac * w1[iv];
139 int itask,
double *v2,
double fac,
const double *v1)
141 int Nvcd = m_Nvc * m_Nd;
142 int Nvxy = Nvcd * m_Nx * m_Ny;
144 int isite = m_arg[itask].isite;
145 const double *w1 = &v1[Nvcd * isite];
146 double *w2 = &v2[Nvcd * isite];
148 for (
int it = 0; it < m_Mt; ++it) {
149 for (
int iz = 0; iz < m_Mz; ++iz) {
150 for (
int ivxy = 0; ivxy < Nvxy; ++ivxy) {
151 int iv = ivxy + Nvxy * (iz + m_Nz * it);
152 w2[iv] = fac * w2[iv] + w1[iv];
161 double *v,
double fac)
163 int Nvcd = m_Nvc * m_Nd;
164 int Nvxy = Nvcd * m_Nx * m_Ny;
166 int isite = m_arg[itask].isite;
167 double *w = &v[Nvcd * isite];
169 for (
int it = 0; it < m_Mt; ++it) {
170 for (
int iz = 0; iz < m_Mz; ++iz) {
171 for (
int ivxy = 0; ivxy < Nvxy; ++ivxy) {
172 int iv = ivxy + Nvxy * (iz + m_Nz * it);
184 int Nvcd = m_Nvc * m_Nd;
185 int Nvxy = Nvcd * m_Nx * m_Ny;
187 int isite = m_arg[itask].isite;
188 double *w2 = &v2[Nvcd * isite];
190 for (
int it = 0; it < m_Mt; ++it) {
191 for (
int iz = 0; iz < m_Mz; ++iz) {
192 for (
int ivxy = 0; ivxy < Nvxy; ++ivxy) {
193 int iv = ivxy + Nvxy * (iz + m_Nz * it);
203 int itask,
double *vcp1,
const double *v1)
205 int Nvcd = m_Nvc * m_Nd;
212 int isite = m_arg[itask].isite;
213 int isite_cp = m_arg[itask].isite_cp_x;
215 const double *w1 = &v1[Nvcd * isite];
216 double *w2 = &vcp1[Nvcd * isite_cp];
219 double bc2 = m_boundary2[idir];
223 for (
int it = 0; it < m_Mt; ++it) {
224 for (
int iz = 0; iz < m_Mz; ++iz) {
225 for (
int iy = 0; iy < m_Ny; ++iy) {
226 int is = ix + m_Nx * (iy + m_Ny * (iz + m_Nz * it));
227 int is2 = iy + m_Ny * (iz + m_Mz * it);
229 int ix1 = Nvcd * is2;
230 int ix2 = ix1 + m_Nvc;
231 int ix3 = ix2 + m_Nvc;
232 int ix4 = ix3 + m_Nvc;
234 for (
int ic = 0; ic < m_Nc; ++ic) {
236 int ic_i = 2 * ic + 1;
238 w2[ic_r + ix1] = bc2 * (m_r_s * w1[ic_r + id1 + in] - m_nu_s * w1[ic_i + id4 + in]);
239 w2[ic_i + ix1] = bc2 * (m_r_s * w1[ic_i + id1 + in] + m_nu_s * w1[ic_r + id4 + in]);
240 w2[ic_r + ix2] = bc2 * (m_r_s * w1[ic_r + id2 + in] - m_nu_s * w1[ic_i + id3 + in]);
241 w2[ic_i + ix2] = bc2 * (m_r_s * w1[ic_i + id2 + in] + m_nu_s * w1[ic_r + id3 + in]);
243 w2[ic_r + ix3] = bc2 * (m_r_s * w1[ic_r + id3 + in] + m_nu_s * w1[ic_i + id2 + in]);
244 w2[ic_i + ix3] = bc2 * (m_r_s * w1[ic_i + id3 + in] - m_nu_s * w1[ic_r + id2 + in]);
245 w2[ic_r + ix4] = bc2 * (m_r_s * w1[ic_r + id4 + in] + m_nu_s * w1[ic_i + id1 + in]);
246 w2[ic_i + ix4] = bc2 * (m_r_s * w1[ic_i + id4 + in] - m_nu_s * w1[ic_r + id1 + in]);
256 int itask,
double *v2,
const double *vcp2)
258 int Nvcd = m_Nvc * m_Nd;
267 double wt1_r, wt1_i, wt2_r, wt2_i, wt3_r, wt3_i, wt4_r, wt4_i;
269 int isite = m_arg[itask].isite;
270 int isite_cp = m_arg[itask].isite_cp_x;
272 const double *w1 = &vcp2[Nvcd * isite_cp];
273 double *w2 = &v2[Nvcd * isite];
274 const double *u = m_U->ptr(m_Ndf * (isite + idir * m_Nvol));
278 for (
int it = 0; it < m_Mt; ++it) {
279 for (
int iz = 0; iz < m_Mz; ++iz) {
280 for (
int iy = 0; iy < m_Ny; ++iy) {
281 int is = ix + m_Nx * (iy + m_Ny * (iz + m_Nz * it));
282 int is2 = iy + m_Ny * (iz + m_Mz * it);
285 int ix1 = Nvcd * is2;
286 int ix2 = ix1 + m_Nvc;
287 int ix3 = ix2 + m_Nvc;
288 int ix4 = ix3 + m_Nvc;
290 for (
int ic = 0; ic < m_Nc; ++ic) {
291 int ic2 = ic * m_Nvc;
293 wt1_r = mult_uv_r(&u[ic2 + ig], &w1[ix1], m_Nc);
294 wt1_i = mult_uv_i(&u[ic2 + ig], &w1[ix1], m_Nc);
295 wt2_r = mult_uv_r(&u[ic2 + ig], &w1[ix2], m_Nc);
296 wt2_i = mult_uv_i(&u[ic2 + ig], &w1[ix2], m_Nc);
298 wt3_r = mult_uv_r(&u[ic2 + ig], &w1[ix3], m_Nc);
299 wt3_i = mult_uv_i(&u[ic2 + ig], &w1[ix3], m_Nc);
300 wt4_r = mult_uv_r(&u[ic2 + ig], &w1[ix4], m_Nc);
301 wt4_i = mult_uv_i(&u[ic2 + ig], &w1[ix4], m_Nc);
304 int ic_i = 2 * ic + 1;
306 w2[ic_r + id1 + iv] += wt1_r;
307 w2[ic_i + id1 + iv] += wt1_i;
308 w2[ic_r + id2 + iv] += wt2_r;
309 w2[ic_i + id2 + iv] += wt2_i;
311 w2[ic_r + id3 + iv] += wt3_r;
312 w2[ic_i + id3 + iv] += wt3_i;
313 w2[ic_r + id4 + iv] += wt4_r;
314 w2[ic_i + id4 + iv] += wt4_i;
324 int itask,
double *v2,
const double *v1)
326 int Nvcd = m_Nvc * m_Nd;
335 double vt1[m_Nvc], vt2[m_Nvc], vt3[m_Nvc], vt4[m_Nvc];
336 double wt1_r, wt1_i, wt2_r, wt2_i, wt3_r, wt3_i, wt4_r, wt4_i;
338 int isite = m_arg[itask].isite;
340 const double *w1 = &v1[Nvcd * isite];
341 double *w2 = &v2[Nvcd * isite];
342 const double *u = m_U->ptr(m_Ndf * (isite + idir * m_Nvol));
344 for (
int it = 0; it < m_Mt; ++it) {
345 for (
int iz = 0; iz < m_Mz; ++iz) {
346 for (
int iy = 0; iy < m_Ny; ++iy) {
347 for (
int ix = 0; ix < m_Nx - 1; ++ix) {
348 int is = ix + m_Nx * (iy + m_Ny * (iz + m_Nz * it));
350 int in = Nvcd * (is + 1);
353 for (
int ic = 0; ic < m_Nc; ++ic) {
355 int ic_i = 2 * ic + 1;
357 vt1[ic_r] = m_r_s * w1[ic_r + id1 + in] - m_nu_s * w1[ic_i + id4 + in];
358 vt1[ic_i] = m_r_s * w1[ic_i + id1 + in] + m_nu_s * w1[ic_r + id4 + in];
359 vt2[ic_r] = m_r_s * w1[ic_r + id2 + in] - m_nu_s * w1[ic_i + id3 + in];
360 vt2[ic_i] = m_r_s * w1[ic_i + id2 + in] + m_nu_s * w1[ic_r + id3 + in];
362 vt3[ic_r] = m_r_s * w1[ic_r + id3 + in] + m_nu_s * w1[ic_i + id2 + in];
363 vt3[ic_i] = m_r_s * w1[ic_i + id3 + in] - m_nu_s * w1[ic_r + id2 + in];
364 vt4[ic_r] = m_r_s * w1[ic_r + id4 + in] + m_nu_s * w1[ic_i + id1 + in];
365 vt4[ic_i] = m_r_s * w1[ic_i + id4 + in] - m_nu_s * w1[ic_r + id1 + in];
368 for (
int ic = 0; ic < m_Nc; ++ic) {
369 int ic2 = ic * m_Nvc;
371 wt1_r = mult_uv_r(&u[ic2 + ig], vt1, m_Nc);
372 wt1_i = mult_uv_i(&u[ic2 + ig], vt1, m_Nc);
373 wt2_r = mult_uv_r(&u[ic2 + ig], vt2, m_Nc);
374 wt2_i = mult_uv_i(&u[ic2 + ig], vt2, m_Nc);
376 wt3_r = mult_uv_r(&u[ic2 + ig], vt3, m_Nc);
377 wt3_i = mult_uv_i(&u[ic2 + ig], vt3, m_Nc);
378 wt4_r = mult_uv_r(&u[ic2 + ig], vt4, m_Nc);
379 wt4_i = mult_uv_i(&u[ic2 + ig], vt4, m_Nc);
382 int ic_i = 2 * ic + 1;
384 w2[ic_r + id1 + iv] += wt1_r;
385 w2[ic_i + id1 + iv] += wt1_i;
386 w2[ic_r + id2 + iv] += wt2_r;
387 w2[ic_i + id2 + iv] += wt2_i;
389 w2[ic_r + id3 + iv] += wt3_r;
390 w2[ic_i + id3 + iv] += wt3_i;
391 w2[ic_r + id4 + iv] += wt4_r;
392 w2[ic_i + id4 + iv] += wt4_i;
403 int itask,
double *vcp1,
const double *v1)
405 int Nvcd = m_Nvc * m_Nd;
414 int isite = m_arg[itask].isite;
415 int isite_cp = m_arg[itask].isite_cp_x;
417 const double *w1 = &v1[Nvcd * isite];
418 double *w2 = &vcp1[Nvcd * isite_cp];
419 const double *u = m_U->ptr(m_Ndf * (isite + idir * m_Nvol));
421 double vt1[m_Nvc], vt2[m_Nvc], vt3[m_Nvc], vt4[m_Nvc];
425 for (
int it = 0; it < m_Mt; ++it) {
426 for (
int iz = 0; iz < m_Mz; ++iz) {
427 for (
int iy = 0; iy < m_Ny; ++iy) {
428 int is = ix + m_Nx * (iy + m_Ny * (iz + m_Nz * it));
429 int is2 = iy + m_Ny * (iz + m_Mz * it);
432 int ix1 = Nvcd * is2;
433 int ix2 = ix1 + m_Nvc;
434 int ix3 = ix2 + m_Nvc;
435 int ix4 = ix3 + m_Nvc;
437 for (
int ic = 0; ic < m_Nc; ++ic) {
439 int ic_i = 2 * ic + 1;
441 vt1[ic_r] = m_r_s * w1[ic_r + id1 + in] + m_nu_s * w1[ic_i + id4 + in];
442 vt1[ic_i] = m_r_s * w1[ic_i + id1 + in] - m_nu_s * w1[ic_r + id4 + in];
443 vt2[ic_r] = m_r_s * w1[ic_r + id2 + in] + m_nu_s * w1[ic_i + id3 + in];
444 vt2[ic_i] = m_r_s * w1[ic_i + id2 + in] - m_nu_s * w1[ic_r + id3 + in];
446 vt3[ic_r] = m_r_s * w1[ic_r + id3 + in] - m_nu_s * w1[ic_i + id2 + in];
447 vt3[ic_i] = m_r_s * w1[ic_i + id3 + in] + m_nu_s * w1[ic_r + id2 + in];
448 vt4[ic_r] = m_r_s * w1[ic_r + id4 + in] - m_nu_s * w1[ic_i + id1 + in];
449 vt4[ic_i] = m_r_s * w1[ic_i + id4 + in] + m_nu_s * w1[ic_r + id1 + in];
452 for (
int ic = 0; ic < m_Nc; ++ic) {
456 int ic_i = 2 * ic + 1;
458 w2[ic_r + ix1] = mult_udagv_r(&u[ic2 + ig], vt1, m_Nc);
459 w2[ic_i + ix1] = mult_udagv_i(&u[ic2 + ig], vt1, m_Nc);
460 w2[ic_r + ix2] = mult_udagv_r(&u[ic2 + ig], vt2, m_Nc);
461 w2[ic_i + ix2] = mult_udagv_i(&u[ic2 + ig], vt2, m_Nc);
463 w2[ic_r + ix3] = mult_udagv_r(&u[ic2 + ig], vt3, m_Nc);
464 w2[ic_i + ix3] = mult_udagv_i(&u[ic2 + ig], vt3, m_Nc);
465 w2[ic_r + ix4] = mult_udagv_r(&u[ic2 + ig], vt4, m_Nc);
466 w2[ic_i + ix4] = mult_udagv_i(&u[ic2 + ig], vt4, m_Nc);
476 int itask,
double *v2,
const double *vcp2)
478 int Nvcd = m_Nvc * m_Nd;
486 double bc2 = m_boundary2[idir];
490 int isite = m_arg[itask].isite;
491 int isite_cp = m_arg[itask].isite_cp_x;
493 const double *w1 = &vcp2[Nvcd * isite_cp];
494 double *w2 = &v2[Nvcd * isite];
498 for (
int it = 0; it < m_Mt; ++it) {
499 for (
int iz = 0; iz < m_Mz; ++iz) {
500 for (
int iy = 0; iy < m_Ny; ++iy) {
501 int is = ix + m_Nx * (iy + m_Ny * (iz + m_Nz * it));
502 int is2 = iy + m_Ny * (iz + m_Mz * it);
504 int ix1 = Nvcd * is2;
505 int ix2 = ix1 + m_Nvc;
506 int ix3 = ix2 + m_Nvc;
507 int ix4 = ix3 + m_Nvc;
509 for (
int ic = 0; ic < m_Nc; ++ic) {
511 int ic_i = 2 * ic + 1;
513 w2[ic_r + id1 + iv] += bc2 * w1[ic_r + ix1];
514 w2[ic_i + id1 + iv] += bc2 * w1[ic_i + ix1];
515 w2[ic_r + id2 + iv] += bc2 * w1[ic_r + ix2];
516 w2[ic_i + id2 + iv] += bc2 * w1[ic_i + ix2];
518 w2[ic_r + id3 + iv] += bc2 * w1[ic_r + ix3];
519 w2[ic_i + id3 + iv] += bc2 * w1[ic_i + ix3];
520 w2[ic_r + id4 + iv] += bc2 * w1[ic_r + ix4];
521 w2[ic_i + id4 + iv] += bc2 * w1[ic_i + ix4];
531 int itask,
double *v2,
const double *v1)
533 int Nvcd = m_Nvc * m_Nd;
542 double vt1[m_Nvc], vt2[m_Nvc], vt3[m_Nvc], vt4[m_Nvc];
543 double wt1_r, wt1_i, wt2_r, wt2_i, wt3_r, wt3_i, wt4_r, wt4_i;
545 int isite = m_arg[itask].isite;
547 const double *w1 = &v1[Nvcd * isite];
548 double *w2 = &v2[Nvcd * isite];
549 const double *u = m_U->ptr(m_Ndf * (isite + idir * m_Nvol));
551 for (
int it = 0; it < m_Mt; ++it) {
552 for (
int iz = 0; iz < m_Mz; ++iz) {
553 for (
int iy = 0; iy < m_Ny; ++iy) {
554 for (
int ix = 1; ix < m_Nx; ++ix) {
555 int is = ix + m_Nx * (iy + m_Ny * (iz + m_Nz * it));
557 int in = Nvcd * (is - 1);
558 int ig = m_Ndf * (is - 1);
560 for (
int ic = 0; ic < m_Nc; ++ic) {
562 int ic_i = 2 * ic + 1;
564 vt1[ic_r] = m_r_s * w1[ic_r + id1 + in] + m_nu_s * w1[ic_i + id4 + in];
565 vt1[ic_i] = m_r_s * w1[ic_i + id1 + in] - m_nu_s * w1[ic_r + id4 + in];
566 vt2[ic_r] = m_r_s * w1[ic_r + id2 + in] + m_nu_s * w1[ic_i + id3 + in];
567 vt2[ic_i] = m_r_s * w1[ic_i + id2 + in] - m_nu_s * w1[ic_r + id3 + in];
569 vt3[ic_r] = m_r_s * w1[ic_r + id3 + in] - m_nu_s * w1[ic_i + id2 + in];
570 vt3[ic_i] = m_r_s * w1[ic_i + id3 + in] + m_nu_s * w1[ic_r + id2 + in];
571 vt4[ic_r] = m_r_s * w1[ic_r + id4 + in] - m_nu_s * w1[ic_i + id1 + in];
572 vt4[ic_i] = m_r_s * w1[ic_i + id4 + in] + m_nu_s * w1[ic_r + id1 + in];
575 for (
int ic = 0; ic < m_Nc; ++ic) {
578 wt1_r = mult_udagv_r(&u[ic2 + ig], vt1, m_Nc);
579 wt1_i = mult_udagv_i(&u[ic2 + ig], vt1, m_Nc);
580 wt2_r = mult_udagv_r(&u[ic2 + ig], vt2, m_Nc);
581 wt2_i = mult_udagv_i(&u[ic2 + ig], vt2, m_Nc);
583 wt3_r = mult_udagv_r(&u[ic2 + ig], vt3, m_Nc);
584 wt3_i = mult_udagv_i(&u[ic2 + ig], vt3, m_Nc);
585 wt4_r = mult_udagv_r(&u[ic2 + ig], vt4, m_Nc);
586 wt4_i = mult_udagv_i(&u[ic2 + ig], vt4, m_Nc);
589 int ic_i = 2 * ic + 1;
591 w2[ic_r + id1 + iv] += wt1_r;
592 w2[ic_i + id1 + iv] += wt1_i;
593 w2[ic_r + id2 + iv] += wt2_r;
594 w2[ic_i + id2 + iv] += wt2_i;
596 w2[ic_r + id3 + iv] += wt3_r;
597 w2[ic_i + id3 + iv] += wt3_i;
598 w2[ic_r + id4 + iv] += wt4_r;
599 w2[ic_i + id4 + iv] += wt4_i;
610 int itask,
double *vcp1,
const double *v1)
612 int Nvcd = m_Nvc * m_Nd;
619 int isite = m_arg[itask].isite;
620 int isite_cp = m_arg[itask].isite_cp_y;
622 const double *w1 = &v1[Nvcd * isite];
623 double *w2 = &vcp1[Nvcd * isite_cp];
626 double bc2 = m_boundary2[idir];
630 for (
int it = 0; it < m_Mt; ++it) {
631 for (
int iz = 0; iz < m_Mz; ++iz) {
632 for (
int ix = 0; ix < m_Nx; ++ix) {
633 int is = ix + m_Nx * (iy + m_Ny * (iz + m_Nz * it));
634 int is2 = ix + m_Nx * (iz + m_Mz * it);
636 int ix1 = Nvcd * is2;
637 int ix2 = ix1 + m_Nvc;
638 int ix3 = ix2 + m_Nvc;
639 int ix4 = ix3 + m_Nvc;
641 for (
int ic = 0; ic < m_Nc; ++ic) {
643 int ic_i = 2 * ic + 1;
645 w2[ic_r + ix1] = bc2 * (m_r_s * w1[ic_r + id1 + in] + m_nu_s * w1[ic_r + id4 + in]);
646 w2[ic_i + ix1] = bc2 * (m_r_s * w1[ic_i + id1 + in] + m_nu_s * w1[ic_i + id4 + in]);
647 w2[ic_r + ix2] = bc2 * (m_r_s * w1[ic_r + id2 + in] - m_nu_s * w1[ic_r + id3 + in]);
648 w2[ic_i + ix2] = bc2 * (m_r_s * w1[ic_i + id2 + in] - m_nu_s * w1[ic_i + id3 + in]);
650 w2[ic_r + ix3] = bc2 * (m_r_s * w1[ic_r + id3 + in] - m_nu_s * w1[ic_r + id2 + in]);
651 w2[ic_i + ix3] = bc2 * (m_r_s * w1[ic_i + id3 + in] - m_nu_s * w1[ic_i + id2 + in]);
652 w2[ic_r + ix4] = bc2 * (m_r_s * w1[ic_r + id4 + in] + m_nu_s * w1[ic_r + id1 + in]);
653 w2[ic_i + ix4] = bc2 * (m_r_s * w1[ic_i + id4 + in] + m_nu_s * w1[ic_i + id1 + in]);
663 int itask,
double *v2,
const double *vcp2)
665 int Nvcd = m_Nvc * m_Nd;
674 double wt1_r, wt1_i, wt2_r, wt2_i, wt3_r, wt3_i, wt4_r, wt4_i;
676 int isite = m_arg[itask].isite;
677 int isite_cp = m_arg[itask].isite_cp_y;
679 const double *w1 = &vcp2[Nvcd * isite_cp];
680 double *w2 = &v2[Nvcd * isite];
681 const double *u = m_U->ptr(m_Ndf * (isite + idir * m_Nvol));
685 for (
int it = 0; it < m_Mt; ++it) {
686 for (
int iz = 0; iz < m_Mz; ++iz) {
687 for (
int ix = 0; ix < m_Nx; ++ix) {
688 int is = ix + m_Nx * (iy + m_Ny * (iz + m_Nz * it));
689 int is2 = ix + m_Nx * (iz + m_Mz * it);
692 int ix1 = Nvcd * is2;
693 int ix2 = ix1 + m_Nvc;
694 int ix3 = ix2 + m_Nvc;
695 int ix4 = ix3 + m_Nvc;
697 for (
int ic = 0; ic < m_Nc; ++ic) {
698 int ic2 = ic * m_Nvc;
700 wt1_r = mult_uv_r(&u[ic2 + ig], &w1[ix1], m_Nc);
701 wt1_i = mult_uv_i(&u[ic2 + ig], &w1[ix1], m_Nc);
702 wt2_r = mult_uv_r(&u[ic2 + ig], &w1[ix2], m_Nc);
703 wt2_i = mult_uv_i(&u[ic2 + ig], &w1[ix2], m_Nc);
705 wt3_r = mult_uv_r(&u[ic2 + ig], &w1[ix3], m_Nc);
706 wt3_i = mult_uv_i(&u[ic2 + ig], &w1[ix3], m_Nc);
707 wt4_r = mult_uv_r(&u[ic2 + ig], &w1[ix4], m_Nc);
708 wt4_i = mult_uv_i(&u[ic2 + ig], &w1[ix4], m_Nc);
711 int ic_i = 2 * ic + 1;
713 w2[ic_r + id1 + iv] += wt1_r;
714 w2[ic_i + id1 + iv] += wt1_i;
715 w2[ic_r + id2 + iv] += wt2_r;
716 w2[ic_i + id2 + iv] += wt2_i;
718 w2[ic_r + id3 + iv] += wt3_r;
719 w2[ic_i + id3 + iv] += wt3_i;
720 w2[ic_r + id4 + iv] += wt4_r;
721 w2[ic_i + id4 + iv] += wt4_i;
731 int itask,
double *v2,
const double *v1)
733 int Nvcd = m_Nvc * m_Nd;
742 double vt1[m_Nvc], vt2[m_Nvc], vt3[m_Nvc], vt4[m_Nvc];
743 double wt1_r, wt1_i, wt2_r, wt2_i, wt3_r, wt3_i, wt4_r, wt4_i;
745 int isite = m_arg[itask].isite;
747 const double *w1 = &v1[Nvcd * isite];
748 double *w2 = &v2[Nvcd * isite];
749 const double *u = m_U->ptr(m_Ndf * (isite + idir * m_Nvol));
751 for (
int it = 0; it < m_Mt; ++it) {
752 for (
int iz = 0; iz < m_Mz; ++iz) {
753 for (
int iy = 0; iy < m_Ny - 1; ++iy) {
754 for (
int ix = 0; ix < m_Nx; ++ix) {
755 int is = ix + m_Nx * (iy + m_Ny * (iz + m_Nz * it));
757 int in = Nvcd * (is + m_Nx);
760 for (
int ic = 0; ic < m_Nc; ++ic) {
762 int ic_i = 2 * ic + 1;
764 vt1[ic_r] = m_r_s * w1[ic_r + id1 + in] + m_nu_s * w1[ic_r + id4 + in];
765 vt1[ic_i] = m_r_s * w1[ic_i + id1 + in] + m_nu_s * w1[ic_i + id4 + in];
766 vt2[ic_r] = m_r_s * w1[ic_r + id2 + in] - m_nu_s * w1[ic_r + id3 + in];
767 vt2[ic_i] = m_r_s * w1[ic_i + id2 + in] - m_nu_s * w1[ic_i + id3 + in];
769 vt3[ic_r] = m_r_s * w1[ic_r + id3 + in] - m_nu_s * w1[ic_r + id2 + in];
770 vt3[ic_i] = m_r_s * w1[ic_i + id3 + in] - m_nu_s * w1[ic_i + id2 + in];
771 vt4[ic_r] = m_r_s * w1[ic_r + id4 + in] + m_nu_s * w1[ic_r + id1 + in];
772 vt4[ic_i] = m_r_s * w1[ic_i + id4 + in] + m_nu_s * w1[ic_i + id1 + in];
775 for (
int ic = 0; ic < m_Nc; ++ic) {
776 int ic2 = ic * m_Nvc;
778 wt1_r = mult_uv_r(&u[ic2 + ig], vt1, m_Nc);
779 wt1_i = mult_uv_i(&u[ic2 + ig], vt1, m_Nc);
780 wt2_r = mult_uv_r(&u[ic2 + ig], vt2, m_Nc);
781 wt2_i = mult_uv_i(&u[ic2 + ig], vt2, m_Nc);
783 wt3_r = mult_uv_r(&u[ic2 + ig], vt3, m_Nc);
784 wt3_i = mult_uv_i(&u[ic2 + ig], vt3, m_Nc);
785 wt4_r = mult_uv_r(&u[ic2 + ig], vt4, m_Nc);
786 wt4_i = mult_uv_i(&u[ic2 + ig], vt4, m_Nc);
789 int ic_i = 2 * ic + 1;
791 w2[ic_r + id1 + iv] += wt1_r;
792 w2[ic_i + id1 + iv] += wt1_i;
793 w2[ic_r + id2 + iv] += wt2_r;
794 w2[ic_i + id2 + iv] += wt2_i;
796 w2[ic_r + id3 + iv] += wt3_r;
797 w2[ic_i + id3 + iv] += wt3_i;
798 w2[ic_r + id4 + iv] += wt4_r;
799 w2[ic_i + id4 + iv] += wt4_i;
810 int itask,
double *vcp1,
const double *v1)
812 int Nvcd = m_Nvc * m_Nd;
821 int isite = m_arg[itask].isite;
822 int isite_cp = m_arg[itask].isite_cp_y;
824 const double *w1 = &v1[Nvcd * isite];
825 double *w2 = &vcp1[Nvcd * isite_cp];
826 const double *u = m_U->ptr(m_Ndf * (isite + idir * m_Nvol));
828 double vt1[m_Nvc], vt2[m_Nvc], vt3[m_Nvc], vt4[m_Nvc];
832 for (
int it = 0; it < m_Mt; ++it) {
833 for (
int iz = 0; iz < m_Mz; ++iz) {
834 for (
int ix = 0; ix < m_Nx; ++ix) {
835 int is = ix + m_Nx * (iy + m_Ny * (iz + m_Nz * it));
836 int is2 = ix + m_Nx * (iz + m_Mz * it);
839 int ix1 = Nvcd * is2;
840 int ix2 = ix1 + m_Nvc;
841 int ix3 = ix2 + m_Nvc;
842 int ix4 = ix3 + m_Nvc;
844 for (
int ic = 0; ic < m_Nc; ++ic) {
846 int ic_i = 2 * ic + 1;
848 vt1[ic_r] = m_r_s * w1[ic_r + id1 + in] - m_nu_s * w1[ic_r + id4 + in];
849 vt1[ic_i] = m_r_s * w1[ic_i + id1 + in] - m_nu_s * w1[ic_i + id4 + in];
850 vt2[ic_r] = m_r_s * w1[ic_r + id2 + in] + m_nu_s * w1[ic_r + id3 + in];
851 vt2[ic_i] = m_r_s * w1[ic_i + id2 + in] + m_nu_s * w1[ic_i + id3 + in];
853 vt3[ic_r] = m_r_s * w1[ic_r + id3 + in] + m_nu_s * w1[ic_r + id2 + in];
854 vt3[ic_i] = m_r_s * w1[ic_i + id3 + in] + m_nu_s * w1[ic_i + id2 + in];
855 vt4[ic_r] = m_r_s * w1[ic_r + id4 + in] - m_nu_s * w1[ic_r + id1 + in];
856 vt4[ic_i] = m_r_s * w1[ic_i + id4 + in] - m_nu_s * w1[ic_i + id1 + in];
859 for (
int ic = 0; ic < m_Nc; ++ic) {
863 int ic_i = 2 * ic + 1;
865 w2[ic_r + ix1] = mult_udagv_r(&u[ic2 + ig], vt1, m_Nc);
866 w2[ic_i + ix1] = mult_udagv_i(&u[ic2 + ig], vt1, m_Nc);
867 w2[ic_r + ix2] = mult_udagv_r(&u[ic2 + ig], vt2, m_Nc);
868 w2[ic_i + ix2] = mult_udagv_i(&u[ic2 + ig], vt2, m_Nc);
870 w2[ic_r + ix3] = mult_udagv_r(&u[ic2 + ig], vt3, m_Nc);
871 w2[ic_i + ix3] = mult_udagv_i(&u[ic2 + ig], vt3, m_Nc);
872 w2[ic_r + ix4] = mult_udagv_r(&u[ic2 + ig], vt4, m_Nc);
873 w2[ic_i + ix4] = mult_udagv_i(&u[ic2 + ig], vt4, m_Nc);
883 int itask,
double *v2,
const double *vcp2)
885 int Nvcd = m_Nvc * m_Nd;
893 double bc2 = m_boundary2[idir];
897 int isite = m_arg[itask].isite;
898 int isite_cp = m_arg[itask].isite_cp_y;
900 const double *w1 = &vcp2[Nvcd * isite_cp];
901 double *w2 = &v2[Nvcd * isite];
905 for (
int it = 0; it < m_Mt; ++it) {
906 for (
int iz = 0; iz < m_Mz; ++iz) {
907 for (
int ix = 0; ix < m_Nx; ++ix) {
908 int is = ix + m_Nx * (iy + m_Ny * (iz + m_Nz * it));
909 int is2 = ix + m_Nx * (iz + m_Mz * it);
911 int ix1 = Nvcd * is2;
912 int ix2 = ix1 + m_Nvc;
913 int ix3 = ix2 + m_Nvc;
914 int ix4 = ix3 + m_Nvc;
916 for (
int ic = 0; ic < m_Nc; ++ic) {
918 int ic_i = 2 * ic + 1;
920 w2[ic_r + id1 + iv] += bc2 * w1[ic_r + ix1];
921 w2[ic_i + id1 + iv] += bc2 * w1[ic_i + ix1];
922 w2[ic_r + id2 + iv] += bc2 * w1[ic_r + ix2];
923 w2[ic_i + id2 + iv] += bc2 * w1[ic_i + ix2];
925 w2[ic_r + id3 + iv] += bc2 * w1[ic_r + ix3];
926 w2[ic_i + id3 + iv] += bc2 * w1[ic_i + ix3];
927 w2[ic_r + id4 + iv] += bc2 * w1[ic_r + ix4];
928 w2[ic_i + id4 + iv] += bc2 * w1[ic_i + ix4];
938 int itask,
double *v2,
const double *v1)
940 int Nvcd = m_Nvc * m_Nd;
949 double vt1[m_Nvc], vt2[m_Nvc], vt3[m_Nvc], vt4[m_Nvc];
950 double wt1_r, wt1_i, wt2_r, wt2_i, wt3_r, wt3_i, wt4_r, wt4_i;
952 int isite = m_arg[itask].isite;
954 const double *w1 = &v1[Nvcd * isite];
955 double *w2 = &v2[Nvcd * isite];
956 const double *u = m_U->ptr(m_Ndf * (isite + idir * m_Nvol));
958 for (
int it = 0; it < m_Mt; ++it) {
959 for (
int iz = 0; iz < m_Mz; ++iz) {
960 for (
int iy = 1; iy < m_Ny; ++iy) {
961 for (
int ix = 0; ix < m_Nx; ++ix) {
962 int is = ix + m_Nx * (iy + m_Ny * (iz + m_Nz * it));
964 int in = Nvcd * (is - m_Nx);
965 int ig = m_Ndf * (is - m_Nx);
967 for (
int ic = 0; ic < m_Nc; ++ic) {
969 int ic_i = 2 * ic + 1;
971 vt1[ic_r] = m_r_s * w1[ic_r + id1 + in] - m_nu_s * w1[ic_r + id4 + in];
972 vt1[ic_i] = m_r_s * w1[ic_i + id1 + in] - m_nu_s * w1[ic_i + id4 + in];
973 vt2[ic_r] = m_r_s * w1[ic_r + id2 + in] + m_nu_s * w1[ic_r + id3 + in];
974 vt2[ic_i] = m_r_s * w1[ic_i + id2 + in] + m_nu_s * w1[ic_i + id3 + in];
976 vt3[ic_r] = m_r_s * w1[ic_r + id3 + in] + m_nu_s * w1[ic_r + id2 + in];
977 vt3[ic_i] = m_r_s * w1[ic_i + id3 + in] + m_nu_s * w1[ic_i + id2 + in];
978 vt4[ic_r] = m_r_s * w1[ic_r + id4 + in] - m_nu_s * w1[ic_r + id1 + in];
979 vt4[ic_i] = m_r_s * w1[ic_i + id4 + in] - m_nu_s * w1[ic_i + id1 + in];
982 for (
int ic = 0; ic < m_Nc; ++ic) {
985 wt1_r = mult_udagv_r(&u[ic2 + ig], vt1, m_Nc);
986 wt1_i = mult_udagv_i(&u[ic2 + ig], vt1, m_Nc);
987 wt2_r = mult_udagv_r(&u[ic2 + ig], vt2, m_Nc);
988 wt2_i = mult_udagv_i(&u[ic2 + ig], vt2, m_Nc);
990 wt3_r = mult_udagv_r(&u[ic2 + ig], vt3, m_Nc);
991 wt3_i = mult_udagv_i(&u[ic2 + ig], vt3, m_Nc);
992 wt4_r = mult_udagv_r(&u[ic2 + ig], vt4, m_Nc);
993 wt4_i = mult_udagv_i(&u[ic2 + ig], vt4, m_Nc);
996 int ic_i = 2 * ic + 1;
998 w2[ic_r + id1 + iv] += wt1_r;
999 w2[ic_i + id1 + iv] += wt1_i;
1000 w2[ic_r + id2 + iv] += wt2_r;
1001 w2[ic_i + id2 + iv] += wt2_i;
1003 w2[ic_r + id3 + iv] += wt3_r;
1004 w2[ic_i + id3 + iv] += wt3_i;
1005 w2[ic_r + id4 + iv] += wt4_r;
1006 w2[ic_i + id4 + iv] += wt4_i;
1017 int itask,
double *vcp1,
const double *v1)
1019 int Nvcd = m_Nvc * m_Nd;
1023 int id3 = m_Nvc * 2;
1024 int id4 = m_Nvc * 3;
1026 int isite = m_arg[itask].isite;
1027 int isite_cp = m_arg[itask].isite_cp_z;
1029 const double *w1 = &v1[Nvcd * isite];
1030 double *w2 = &vcp1[Nvcd * isite_cp];
1033 double bc2 = m_boundary2[idir];
1035 if (m_arg[itask].kz0 == 1) {
1036 int Nxy = m_Nx * m_Ny;
1038 for (
int it = 0; it < m_Mt; ++it) {
1039 for (
int ixy = 0; ixy < Nxy; ++ixy) {
1040 int is = ixy + Nxy * (iz + m_Nz * it);
1041 int is2 = ixy + Nxy * it;
1044 int ix1 = Nvcd * is2;
1045 int ix2 = ix1 + m_Nvc;
1046 int ix3 = ix2 + m_Nvc;
1047 int ix4 = ix3 + m_Nvc;
1049 for (
int ic = 0; ic < m_Nc; ++ic) {
1051 int ic_i = 2 * ic + 1;
1053 w2[ic_r + ix1] = bc2 * (m_r_s * w1[ic_r + id1 + in] - m_nu_s * w1[ic_i + id3 + in]);
1054 w2[ic_i + ix1] = bc2 * (m_r_s * w1[ic_i + id1 + in] + m_nu_s * w1[ic_r + id3 + in]);
1055 w2[ic_r + ix2] = bc2 * (m_r_s * w1[ic_r + id2 + in] + m_nu_s * w1[ic_i + id4 + in]);
1056 w2[ic_i + ix2] = bc2 * (m_r_s * w1[ic_i + id2 + in] - m_nu_s * w1[ic_r + id4 + in]);
1058 w2[ic_r + ix3] = bc2 * (m_r_s * w1[ic_r + id3 + in] + m_nu_s * w1[ic_i + id1 + in]);
1059 w2[ic_i + ix3] = bc2 * (m_r_s * w1[ic_i + id3 + in] - m_nu_s * w1[ic_r + id1 + in]);
1060 w2[ic_r + ix4] = bc2 * (m_r_s * w1[ic_r + id4 + in] - m_nu_s * w1[ic_i + id2 + in]);
1061 w2[ic_i + ix4] = bc2 * (m_r_s * w1[ic_i + id4 + in] + m_nu_s * w1[ic_r + id2 + in]);
1071 int itask,
double *v2,
const double *vcp2)
1073 int Nvcd = m_Nvc * m_Nd;
1077 int id3 = m_Nvc * 2;
1078 int id4 = m_Nvc * 3;
1082 double wt1_r, wt1_i, wt2_r, wt2_i, wt3_r, wt3_i, wt4_r, wt4_i;
1084 int isite = m_arg[itask].isite;
1085 int isite_cp = m_arg[itask].isite_cp_z;
1087 const double *w1 = &vcp2[Nvcd * isite_cp];
1088 double *w2 = &v2[Nvcd * isite];
1089 const double *u = m_U->ptr(m_Ndf * (isite + idir * m_Nvol));
1091 if (m_arg[itask].kz1 == 1) {
1092 int Nxy = m_Nx * m_Ny;
1094 for (
int it = 0; it < m_Mt; ++it) {
1095 for (
int ixy = 0; ixy < Nxy; ++ixy) {
1096 int is = ixy + Nxy * (iz + m_Nz * it);
1097 int is2 = ixy + Nxy * it;
1099 int ig = m_Ndf * is;
1100 int ix1 = Nvcd * is2;
1101 int ix2 = ix1 + m_Nvc;
1102 int ix3 = ix2 + m_Nvc;
1103 int ix4 = ix3 + m_Nvc;
1105 for (
int ic = 0; ic < m_Nc; ++ic) {
1106 int ic2 = ic * m_Nvc;
1108 wt1_r = mult_uv_r(&u[ic2 + ig], &w1[ix1], m_Nc);
1109 wt1_i = mult_uv_i(&u[ic2 + ig], &w1[ix1], m_Nc);
1110 wt2_r = mult_uv_r(&u[ic2 + ig], &w1[ix2], m_Nc);
1111 wt2_i = mult_uv_i(&u[ic2 + ig], &w1[ix2], m_Nc);
1113 wt3_r = mult_uv_r(&u[ic2 + ig], &w1[ix3], m_Nc);
1114 wt3_i = mult_uv_i(&u[ic2 + ig], &w1[ix3], m_Nc);
1115 wt4_r = mult_uv_r(&u[ic2 + ig], &w1[ix4], m_Nc);
1116 wt4_i = mult_uv_i(&u[ic2 + ig], &w1[ix4], m_Nc);
1119 int ic_i = 2 * ic + 1;
1121 w2[ic_r + id1 + iv] += wt1_r;
1122 w2[ic_i + id1 + iv] += wt1_i;
1123 w2[ic_r + id2 + iv] += wt2_r;
1124 w2[ic_i + id2 + iv] += wt2_i;
1126 w2[ic_r + id3 + iv] += wt3_r;
1127 w2[ic_i + id3 + iv] += wt3_i;
1128 w2[ic_r + id4 + iv] += wt4_r;
1129 w2[ic_i + id4 + iv] += wt4_i;
1139 int itask,
double *v2,
const double *v1)
1141 int Nvcd = m_Nvc * m_Nd;
1145 int id3 = m_Nvc * 2;
1146 int id4 = m_Nvc * 3;
1150 double vt1[m_Nvc], vt2[m_Nvc], vt3[m_Nvc], vt4[m_Nvc];
1151 double wt1_r, wt1_i, wt2_r, wt2_i, wt3_r, wt3_i, wt4_r, wt4_i;
1153 int isite = m_arg[itask].isite;
1155 const double *w1 = &v1[Nvcd * isite];
1156 double *w2 = &v2[Nvcd * isite];
1157 const double *u = m_U->ptr(m_Ndf * (isite + idir * m_Nvol));
1159 int kz1 = m_arg[itask].kz1;
1160 int Nxy = m_Nx * m_Ny;
1162 for (
int it = 0; it < m_Mt; ++it) {
1163 for (
int iz = 0; iz < m_Mz - kz1; ++iz) {
1164 for (
int ixy = 0; ixy < Nxy; ++ixy) {
1165 int is = ixy + Nxy * (iz + m_Nz * it);
1167 int in = Nvcd * (is + Nxy);
1168 int ig = m_Ndf * is;
1170 for (
int ic = 0; ic < m_Nc; ++ic) {
1172 int ic_i = 2 * ic + 1;
1174 vt1[ic_r] = m_r_s * w1[ic_r + id1 + in] - m_nu_s * w1[ic_i + id3 + in];
1175 vt1[ic_i] = m_r_s * w1[ic_i + id1 + in] + m_nu_s * w1[ic_r + id3 + in];
1176 vt2[ic_r] = m_r_s * w1[ic_r + id2 + in] + m_nu_s * w1[ic_i + id4 + in];
1177 vt2[ic_i] = m_r_s * w1[ic_i + id2 + in] - m_nu_s * w1[ic_r + id4 + in];
1179 vt3[ic_r] = m_r_s * w1[ic_r + id3 + in] + m_nu_s * w1[ic_i + id1 + in];
1180 vt3[ic_i] = m_r_s * w1[ic_i + id3 + in] - m_nu_s * w1[ic_r + id1 + in];
1181 vt4[ic_r] = m_r_s * w1[ic_r + id4 + in] - m_nu_s * w1[ic_i + id2 + in];
1182 vt4[ic_i] = m_r_s * w1[ic_i + id4 + in] + m_nu_s * w1[ic_r + id2 + in];
1185 for (
int ic = 0; ic < m_Nc; ++ic) {
1186 int ic2 = ic * m_Nvc;
1188 wt1_r = mult_uv_r(&u[ic2 + ig], vt1, m_Nc);
1189 wt1_i = mult_uv_i(&u[ic2 + ig], vt1, m_Nc);
1190 wt2_r = mult_uv_r(&u[ic2 + ig], vt2, m_Nc);
1191 wt2_i = mult_uv_i(&u[ic2 + ig], vt2, m_Nc);
1193 wt3_r = mult_uv_r(&u[ic2 + ig], vt3, m_Nc);
1194 wt3_i = mult_uv_i(&u[ic2 + ig], vt3, m_Nc);
1195 wt4_r = mult_uv_r(&u[ic2 + ig], vt4, m_Nc);
1196 wt4_i = mult_uv_i(&u[ic2 + ig], vt4, m_Nc);
1199 int ic_i = 2 * ic + 1;
1201 w2[ic_r + id1 + iv] += wt1_r;
1202 w2[ic_i + id1 + iv] += wt1_i;
1203 w2[ic_r + id2 + iv] += wt2_r;
1204 w2[ic_i + id2 + iv] += wt2_i;
1206 w2[ic_r + id3 + iv] += wt3_r;
1207 w2[ic_i + id3 + iv] += wt3_i;
1208 w2[ic_r + id4 + iv] += wt4_r;
1209 w2[ic_i + id4 + iv] += wt4_i;
1219 int itask,
double *vcp1,
const double *v1)
1221 int Nvcd = m_Nvc * m_Nd;
1225 int id3 = m_Nvc * 2;
1226 int id4 = m_Nvc * 3;
1230 int isite = m_arg[itask].isite;
1231 int isite_cp = m_arg[itask].isite_cp_z;
1233 const double *w1 = &v1[Nvcd * isite];
1234 double *w2 = &vcp1[Nvcd * isite_cp];
1235 const double *u = m_U->ptr(m_Ndf * (isite + idir * m_Nvol));
1237 double vt1[m_Nvc], vt2[m_Nvc], vt3[m_Nvc], vt4[m_Nvc];
1239 if (m_arg[itask].kz1 == 1) {
1240 int Nxy = m_Nx * m_Ny;
1242 for (
int it = 0; it < m_Mt; ++it) {
1243 for (
int ixy = 0; ixy < Nxy; ++ixy) {
1244 int is = ixy + Nxy * (iz + m_Nz * it);
1245 int is2 = ixy + Nxy * it;
1247 int ig = m_Ndf * is;
1248 int ix1 = Nvcd * is2;
1249 int ix2 = ix1 + m_Nvc;
1250 int ix3 = ix2 + m_Nvc;
1251 int ix4 = ix3 + m_Nvc;
1253 for (
int ic = 0; ic < m_Nc; ++ic) {
1255 int ic_i = 2 * ic + 1;
1257 vt1[ic_r] = m_r_s * w1[ic_r + id1 + in] + m_nu_s * w1[ic_i + id3 + in];
1258 vt1[ic_i] = m_r_s * w1[ic_i + id1 + in] - m_nu_s * w1[ic_r + id3 + in];
1259 vt2[ic_r] = m_r_s * w1[ic_r + id2 + in] - m_nu_s * w1[ic_i + id4 + in];
1260 vt2[ic_i] = m_r_s * w1[ic_i + id2 + in] + m_nu_s * w1[ic_r + id4 + in];
1262 vt3[ic_r] = m_r_s * w1[ic_r + id3 + in] - m_nu_s * w1[ic_i + id1 + in];
1263 vt3[ic_i] = m_r_s * w1[ic_i + id3 + in] + m_nu_s * w1[ic_r + id1 + in];
1264 vt4[ic_r] = m_r_s * w1[ic_r + id4 + in] + m_nu_s * w1[ic_i + id2 + in];
1265 vt4[ic_i] = m_r_s * w1[ic_i + id4 + in] - m_nu_s * w1[ic_r + id2 + in];
1268 for (
int ic = 0; ic < m_Nc; ++ic) {
1272 int ic_i = 2 * ic + 1;
1274 w2[ic_r + ix1] = mult_udagv_r(&u[ic2 + ig], vt1, m_Nc);
1275 w2[ic_i + ix1] = mult_udagv_i(&u[ic2 + ig], vt1, m_Nc);
1276 w2[ic_r + ix2] = mult_udagv_r(&u[ic2 + ig], vt2, m_Nc);
1277 w2[ic_i + ix2] = mult_udagv_i(&u[ic2 + ig], vt2, m_Nc);
1279 w2[ic_r + ix3] = mult_udagv_r(&u[ic2 + ig], vt3, m_Nc);
1280 w2[ic_i + ix3] = mult_udagv_i(&u[ic2 + ig], vt3, m_Nc);
1281 w2[ic_r + ix4] = mult_udagv_r(&u[ic2 + ig], vt4, m_Nc);
1282 w2[ic_i + ix4] = mult_udagv_i(&u[ic2 + ig], vt4, m_Nc);
1292 int itask,
double *v2,
const double *vcp2)
1294 int Nvcd = m_Nvc * m_Nd;
1298 int id3 = m_Nvc * 2;
1299 int id4 = m_Nvc * 3;
1302 double bc2 = m_boundary2[idir];
1306 int isite = m_arg[itask].isite;
1307 int isite_cp = m_arg[itask].isite_cp_z;
1309 const double *w1 = &vcp2[Nvcd * isite_cp];
1310 double *w2 = &v2[Nvcd * isite];
1312 if (m_arg[itask].kz0 == 1) {
1313 int Nxy = m_Nx * m_Ny;
1316 for (
int it = 0; it < m_Mt; ++it) {
1317 for (
int ixy = 0; ixy < Nxy; ++ixy) {
1318 int is = ixy + Nxy * (iz + m_Nz * it);
1319 int is2 = ixy + Nxy * it;
1321 int ix1 = Nvcd * is2;
1322 int ix2 = ix1 + m_Nvc;
1323 int ix3 = ix2 + m_Nvc;
1324 int ix4 = ix3 + m_Nvc;
1326 for (
int ic = 0; ic < m_Nc; ++ic) {
1328 int ic_i = 2 * ic + 1;
1330 w2[ic_r + id1 + iv] += bc2 * w1[ic_r + ix1];
1331 w2[ic_i + id1 + iv] += bc2 * w1[ic_i + ix1];
1332 w2[ic_r + id2 + iv] += bc2 * w1[ic_r + ix2];
1333 w2[ic_i + id2 + iv] += bc2 * w1[ic_i + ix2];
1335 w2[ic_r + id3 + iv] += bc2 * w1[ic_r + ix3];
1336 w2[ic_i + id3 + iv] += bc2 * w1[ic_i + ix3];
1337 w2[ic_r + id4 + iv] += bc2 * w1[ic_r + ix4];
1338 w2[ic_i + id4 + iv] += bc2 * w1[ic_i + ix4];
1348 int itask,
double *v2,
const double *v1)
1350 int Nvcd = m_Nvc * m_Nd;
1354 int id3 = m_Nvc * 2;
1355 int id4 = m_Nvc * 3;
1359 double vt1[m_Nvc], vt2[m_Nvc], vt3[m_Nvc], vt4[m_Nvc];
1360 double wt1_r, wt1_i, wt2_r, wt2_i, wt3_r, wt3_i, wt4_r, wt4_i;
1362 int isite = m_arg[itask].isite;
1364 const double *w1 = &v1[Nvcd * isite];
1365 double *w2 = &v2[Nvcd * isite];
1366 const double *u = m_U->ptr(m_Ndf * (isite + idir * m_Nvol));
1368 int kz0 = m_arg[itask].kz0;
1369 int Nxy = m_Nx * m_Ny;
1371 for (
int it = 0; it < m_Mt; ++it) {
1372 for (
int iz = kz0; iz < m_Mz; ++iz) {
1373 for (
int ixy = 0; ixy < Nxy; ++ixy) {
1374 int is = ixy + Nxy * (iz + m_Nz * it);
1376 int in = Nvcd * (is - Nxy);
1377 int ig = m_Ndf * (is - Nxy);
1379 for (
int ic = 0; ic < m_Nc; ++ic) {
1381 int ic_i = 2 * ic + 1;
1383 vt1[ic_r] = m_r_s * w1[ic_r + id1 + in] + m_nu_s * w1[ic_i + id3 + in];
1384 vt1[ic_i] = m_r_s * w1[ic_i + id1 + in] - m_nu_s * w1[ic_r + id3 + in];
1385 vt2[ic_r] = m_r_s * w1[ic_r + id2 + in] - m_nu_s * w1[ic_i + id4 + in];
1386 vt2[ic_i] = m_r_s * w1[ic_i + id2 + in] + m_nu_s * w1[ic_r + id4 + in];
1388 vt3[ic_r] = m_r_s * w1[ic_r + id3 + in] - m_nu_s * w1[ic_i + id1 + in];
1389 vt3[ic_i] = m_r_s * w1[ic_i + id3 + in] + m_nu_s * w1[ic_r + id1 + in];
1390 vt4[ic_r] = m_r_s * w1[ic_r + id4 + in] + m_nu_s * w1[ic_i + id2 + in];
1391 vt4[ic_i] = m_r_s * w1[ic_i + id4 + in] - m_nu_s * w1[ic_r + id2 + in];
1394 for (
int ic = 0; ic < m_Nc; ++ic) {
1397 wt1_r = mult_udagv_r(&u[ic2 + ig], vt1, m_Nc);
1398 wt1_i = mult_udagv_i(&u[ic2 + ig], vt1, m_Nc);
1399 wt2_r = mult_udagv_r(&u[ic2 + ig], vt2, m_Nc);
1400 wt2_i = mult_udagv_i(&u[ic2 + ig], vt2, m_Nc);
1402 wt3_r = mult_udagv_r(&u[ic2 + ig], vt3, m_Nc);
1403 wt3_i = mult_udagv_i(&u[ic2 + ig], vt3, m_Nc);
1404 wt4_r = mult_udagv_r(&u[ic2 + ig], vt4, m_Nc);
1405 wt4_i = mult_udagv_i(&u[ic2 + ig], vt4, m_Nc);
1408 int ic_i = 2 * ic + 1;
1410 w2[ic_r + id1 + iv] += wt1_r;
1411 w2[ic_i + id1 + iv] += wt1_i;
1412 w2[ic_r + id2 + iv] += wt2_r;
1413 w2[ic_i + id2 + iv] += wt2_i;
1415 w2[ic_r + id3 + iv] += wt3_r;
1416 w2[ic_i + id3 + iv] += wt3_i;
1417 w2[ic_r + id4 + iv] += wt4_r;
1418 w2[ic_i + id4 + iv] += wt4_i;
1428 int itask,
double *vcp1,
const double *v1)
1430 int Nvc2 = 2 * m_Nvc;
1431 int Nvcd = m_Nvc * m_Nd;
1432 int Nvcd2 = Nvcd / 2;
1436 int id3 = m_Nvc * 2;
1437 int id4 = m_Nvc * 3;
1439 int isite = m_arg[itask].isite;
1440 int isite_cp = m_arg[itask].isite_cp_t;
1442 const double *w1 = &v1[Nvcd * isite];
1443 double *w2 = &vcp1[Nvcd2 * isite_cp];
1446 double bc2 = m_boundary2[idir];
1448 if (m_arg[itask].kt0 == 1) {
1449 int Nxy = m_Nx * m_Ny;
1451 for (
int iz = 0; iz < m_Mz; ++iz) {
1452 for (
int ixy = 0; ixy < Nxy; ++ixy) {
1453 int is = ixy + Nxy * (iz + m_Nz * it);
1454 int is2 = ixy + Nxy * iz;
1457 int ix1 = Nvc2 * is2;
1458 int ix2 = ix1 + m_Nvc;
1460 for (
int ic = 0; ic < m_Nc; ++ic) {
1462 int ic_i = 2 * ic + 1;
1464 w2[ic_r + ix1] = 2.0 * bc2 * w1[ic_r + id3 + in];
1465 w2[ic_i + ix1] = 2.0 * bc2 * w1[ic_i + id3 + in];
1466 w2[ic_r + ix2] = 2.0 * bc2 * w1[ic_r + id4 + in];
1467 w2[ic_i + ix2] = 2.0 * bc2 * w1[ic_i + id4 + in];
1477 int itask,
double *v2,
const double *vcp2)
1479 int Nvc2 = 2 * m_Nvc;
1480 int Nvcd = m_Nvc * m_Nd;
1481 int Nvcd2 = Nvcd / 2;
1485 int id3 = m_Nvc * 2;
1486 int id4 = m_Nvc * 3;
1490 double wt1_r, wt1_i, wt2_r, wt2_i;
1492 int isite = m_arg[itask].isite;
1493 int isite_cp = m_arg[itask].isite_cp_t;
1495 const double *w1 = &vcp2[Nvcd2 * isite_cp];
1496 double *w2 = &v2[Nvcd * isite];
1497 const double *u = m_U->ptr(m_Ndf * (isite + idir * m_Nvol));
1499 if (m_arg[itask].kt1 == 1) {
1500 int Nxy = m_Nx * m_Ny;
1502 for (
int iz = 0; iz < m_Mz; ++iz) {
1503 for (
int ixy = 0; ixy < Nxy; ++ixy) {
1504 int is = ixy + Nxy * (iz + m_Nz * it);
1505 int is2 = ixy + Nxy * iz;
1507 int ig = m_Ndf * is;
1508 int ix1 = Nvc2 * is2;
1509 int ix2 = ix1 + m_Nvc;
1511 for (
int ic = 0; ic < m_Nc; ++ic) {
1512 int ic2 = ic * m_Nvc;
1514 wt1_r = mult_uv_r(&u[ic2 + ig], &w1[ix1], m_Nc);
1515 wt1_i = mult_uv_i(&u[ic2 + ig], &w1[ix1], m_Nc);
1516 wt2_r = mult_uv_r(&u[ic2 + ig], &w1[ix2], m_Nc);
1517 wt2_i = mult_uv_i(&u[ic2 + ig], &w1[ix2], m_Nc);
1520 int ic_i = 2 * ic + 1;
1522 w2[ic_r + id3 + iv] += wt1_r;
1523 w2[ic_i + id3 + iv] += wt1_i;
1524 w2[ic_r + id4 + iv] += wt2_r;
1525 w2[ic_i + id4 + iv] += wt2_i;
1535 int itask,
double *v2,
const double *v1)
1537 int Nvcd = m_Nvc * m_Nd;
1541 int id3 = m_Nvc * 2;
1542 int id4 = m_Nvc * 3;
1546 double vt1[m_Nvc], vt2[m_Nvc];
1547 double wt1_r, wt1_i, wt2_r, wt2_i;
1549 int isite = m_arg[itask].isite;
1551 const double *w1 = &v1[Nvcd * isite];
1552 double *w2 = &v2[Nvcd * isite];
1553 const double *u = m_U->ptr(m_Ndf * (isite + idir * m_Nvol));
1555 int kt1 = m_arg[itask].kt1;
1556 int Nxy = m_Nx * m_Ny;
1557 int Nxyz = Nxy * m_Nz;
1559 for (
int it = 0; it < m_Mt - kt1; ++it) {
1560 for (
int iz = 0; iz < m_Mz; ++iz) {
1561 for (
int ixy = 0; ixy < Nxy; ++ixy) {
1562 int is = ixy + Nxy * (iz + m_Nz * it);
1564 int in = Nvcd * (is + Nxyz);
1565 int ig = m_Ndf * is;
1567 for (
int ic = 0; ic < m_Nc; ++ic) {
1569 int ic_i = 2 * ic + 1;
1571 vt1[ic_r] = 2.0 * w1[ic_r + id3 + in];
1572 vt1[ic_i] = 2.0 * w1[ic_i + id3 + in];
1573 vt2[ic_r] = 2.0 * w1[ic_r + id4 + in];
1574 vt2[ic_i] = 2.0 * w1[ic_i + id4 + in];
1577 for (
int ic = 0; ic < m_Nc; ++ic) {
1578 int ic2 = ic * m_Nvc;
1580 wt1_r = mult_uv_r(&u[ic2 + ig], vt1, m_Nc);
1581 wt1_i = mult_uv_i(&u[ic2 + ig], vt1, m_Nc);
1582 wt2_r = mult_uv_r(&u[ic2 + ig], vt2, m_Nc);
1583 wt2_i = mult_uv_i(&u[ic2 + ig], vt2, m_Nc);
1586 int ic_i = 2 * ic + 1;
1588 w2[ic_r + id3 + iv] += wt1_r;
1589 w2[ic_i + id3 + iv] += wt1_i;
1590 w2[ic_r + id4 + iv] += wt2_r;
1591 w2[ic_i + id4 + iv] += wt2_i;
1601 int itask,
double *vcp1,
const double *v1)
1603 int Nvc2 = 2 * m_Nvc;
1604 int Nvcd = m_Nvc * m_Nd;
1605 int Nvcd2 = Nvcd / 2;
1614 int isite = m_arg[itask].isite;
1615 int isite_cp = m_arg[itask].isite_cp_t;
1617 const double *w1 = &v1[Nvcd * isite];
1618 double *w2 = &vcp1[Nvcd2 * isite_cp];
1619 const double *u = m_U->ptr(m_Ndf * (isite + idir * m_Nvol));
1621 double vt1[m_Nvc], vt2[m_Nvc];
1623 if (m_arg[itask].kt1 == 1) {
1624 int Nxy = m_Nx * m_Ny;
1626 for (
int iz = 0; iz < m_Mz; ++iz) {
1627 for (
int ixy = 0; ixy < Nxy; ++ixy) {
1628 int is = ixy + Nxy * (iz + m_Nz * it);
1629 int is2 = ixy + Nxy * iz;
1631 int ig = m_Ndf * is;
1632 int ix1 = Nvc2 * is2;
1633 int ix2 = ix1 + m_Nvc;
1635 for (
int ic = 0; ic < m_Nc; ++ic) {
1637 int ic_i = 2 * ic + 1;
1639 vt1[ic_r] = 2.0 * w1[ic_r + id1 + in];
1640 vt1[ic_i] = 2.0 * w1[ic_i + id1 + in];
1641 vt2[ic_r] = 2.0 * w1[ic_r + id2 + in];
1642 vt2[ic_i] = 2.0 * w1[ic_i + id2 + in];
1645 for (
int ic = 0; ic < m_Nc; ++ic) {
1649 int ic_i = 2 * ic + 1;
1651 w2[ic_r + ix1] = mult_udagv_r(&u[ic2 + ig], vt1, m_Nc);
1652 w2[ic_i + ix1] = mult_udagv_i(&u[ic2 + ig], vt1, m_Nc);
1653 w2[ic_r + ix2] = mult_udagv_r(&u[ic2 + ig], vt2, m_Nc);
1654 w2[ic_i + ix2] = mult_udagv_i(&u[ic2 + ig], vt2, m_Nc);
1664 int itask,
double *v2,
const double *vcp2)
1666 int Nvc2 = 2 * m_Nvc;
1667 int Nvcd = m_Nvc * m_Nd;
1668 int Nvcd2 = Nvcd / 2;
1676 double bc2 = m_boundary2[idir];
1680 int isite = m_arg[itask].isite;
1681 int isite_cp = m_arg[itask].isite_cp_t;
1683 const double *w1 = &vcp2[Nvcd2 * isite_cp];
1684 double *w2 = &v2[Nvcd * isite];
1686 if (m_arg[itask].kt0 == 1) {
1687 int Nxy = m_Nx * m_Ny;
1689 for (
int iz = 0; iz < m_Mz; ++iz) {
1690 for (
int ixy = 0; ixy < Nxy; ++ixy) {
1691 int is = ixy + Nxy * (iz + m_Nz * it);
1692 int is2 = ixy + Nxy * iz;
1694 int ix1 = Nvc2 * is2;
1695 int ix2 = ix1 + m_Nvc;
1697 for (
int ic = 0; ic < m_Nc; ++ic) {
1699 int ic_i = 2 * ic + 1;
1701 w2[ic_r + id1 + iv] += bc2 * w1[ic_r + ix1];
1702 w2[ic_i + id1 + iv] += bc2 * w1[ic_i + ix1];
1703 w2[ic_r + id2 + iv] += bc2 * w1[ic_r + ix2];
1704 w2[ic_i + id2 + iv] += bc2 * w1[ic_i + ix2];
1714 int itask,
double *v2,
const double *v1)
1716 int Nvcd = m_Nvc * m_Nd;
1725 double vt1[m_Nvc], vt2[m_Nvc];
1726 double wt1_r, wt1_i, wt2_r, wt2_i;
1728 int isite = m_arg[itask].isite;
1730 const double *w1 = &v1[Nvcd * isite];
1731 double *w2 = &v2[Nvcd * isite];
1732 const double *u = m_U->ptr(m_Ndf * (isite + idir * m_Nvol));
1734 int kt0 = m_arg[itask].kt0;
1735 int Nxy = m_Nx * m_Ny;
1736 int Nxyz = Nxy * m_Nz;
1738 for (
int it = kt0; it < m_Mt; ++it) {
1739 for (
int iz = 0; iz < m_Mz; ++iz) {
1740 for (
int ixy = 0; ixy < Nxy; ++ixy) {
1741 int is = ixy + Nxy * (iz + m_Nz * it);
1743 int in = Nvcd * (is - Nxyz);
1744 int ig = m_Ndf * (is - Nxyz);
1746 for (
int ic = 0; ic < m_Nc; ++ic) {
1748 int ic_i = 2 * ic + 1;
1750 vt1[ic_r] = 2.0 * w1[ic_r + id1 + in];
1751 vt1[ic_i] = 2.0 * w1[ic_i + id1 + in];
1752 vt2[ic_r] = 2.0 * w1[ic_r + id2 + in];
1753 vt2[ic_i] = 2.0 * w1[ic_i + id2 + in];
1756 for (
int ic = 0; ic < m_Nc; ++ic) {
1759 wt1_r = mult_udagv_r(&u[ic2 + ig], vt1, m_Nc);
1760 wt1_i = mult_udagv_i(&u[ic2 + ig], vt1, m_Nc);
1761 wt2_r = mult_udagv_r(&u[ic2 + ig], vt2, m_Nc);
1762 wt2_i = mult_udagv_i(&u[ic2 + ig], vt2, m_Nc);
1765 int ic_i = 2 * ic + 1;
1767 w2[ic_r + id1 + iv] += wt1_r;
1768 w2[ic_i + id1 + iv] += wt1_i;
1769 w2[ic_r + id2 + iv] += wt2_r;
1770 w2[ic_i + id2 + iv] += wt2_i;
1780 int itask,
double *vcp1,
const double *v1)
1782 int Nvc2 = 2 * m_Nvc;
1783 int Nvcd = m_Nvc * m_Nd;
1784 int Nvcd2 = Nvcd / 2;
1788 int id3 = m_Nvc * 2;
1789 int id4 = m_Nvc * 3;
1791 int isite = m_arg[itask].isite;
1792 int isite_cp = m_arg[itask].isite_cp_t;
1794 const double *w1 = &v1[Nvcd * isite];
1795 double *w2 = &vcp1[Nvcd2 * isite_cp];
1798 double bc2 = m_boundary2[idir];
1800 if (m_arg[itask].kt0 == 1) {
1801 int Nxy = m_Nx * m_Ny;
1803 for (
int iz = 0; iz < m_Mz; ++iz) {
1804 for (
int ixy = 0; ixy < Nxy; ++ixy) {
1805 int is = ixy + Nxy * (iz + m_Nz * it);
1806 int is2 = ixy + Nxy * iz;
1809 int ix1 = Nvc2 * is2;
1810 int ix2 = ix1 + m_Nvc;
1812 for (
int ic = 0; ic < m_Nc; ++ic) {
1814 int ic_i = 2 * ic + 1;
1816 w2[ic_r + ix1] = bc2 * (w1[ic_r + id1 + in] + w1[ic_r + id3 + in]);
1817 w2[ic_i + ix1] = bc2 * (w1[ic_i + id1 + in] + w1[ic_i + id3 + in]);
1818 w2[ic_r + ix2] = bc2 * (w1[ic_r + id2 + in] + w1[ic_r + id4 + in]);
1819 w2[ic_i + ix2] = bc2 * (w1[ic_i + id2 + in] + w1[ic_i + id4 + in]);
1829 int itask,
double *v2,
const double *vcp2)
1831 int Nvc2 = 2 * m_Nvc;
1832 int Nvcd = m_Nvc * m_Nd;
1833 int Nvcd2 = Nvcd / 2;
1837 int id3 = m_Nvc * 2;
1838 int id4 = m_Nvc * 3;
1842 double wt1_r, wt1_i, wt2_r, wt2_i;
1844 int isite = m_arg[itask].isite;
1845 int isite_cp = m_arg[itask].isite_cp_t;
1848 const double *w1 = &vcp2[Nvcd2 * isite_cp];
1849 double *w2 = &v2[Nvcd * isite];
1850 const double *u = m_U->ptr(m_Ndf * (isite + idir * m_Nvol));
1852 if (m_arg[itask].kt1 == 1) {
1853 int Nxy = m_Nx * m_Ny;
1855 for (
int iz = 0; iz < m_Mz; ++iz) {
1856 for (
int ixy = 0; ixy < Nxy; ++ixy) {
1857 int is = ixy + Nxy * (iz + m_Nz * it);
1858 int is2 = ixy + Nxy * iz;
1860 int ig = m_Ndf * is;
1861 int ix1 = Nvc2 * is2;
1862 int ix2 = ix1 + m_Nvc;
1864 for (
int ic = 0; ic < m_Nc; ++ic) {
1865 int ic2 = ic * m_Nvc;
1867 wt1_r = mult_uv_r(&u[ic2 + ig], &w1[ix1], m_Nc);
1868 wt1_i = mult_uv_i(&u[ic2 + ig], &w1[ix1], m_Nc);
1869 wt2_r = mult_uv_r(&u[ic2 + ig], &w1[ix2], m_Nc);
1870 wt2_i = mult_uv_i(&u[ic2 + ig], &w1[ix2], m_Nc);
1873 int ic_i = 2 * ic + 1;
1875 w2[ic_r + id1 + iv] += wt1_r;
1876 w2[ic_i + id1 + iv] += wt1_i;
1877 w2[ic_r + id2 + iv] += wt2_r;
1878 w2[ic_i + id2 + iv] += wt2_i;
1880 w2[ic_r + id3 + iv] += wt1_r;
1881 w2[ic_i + id3 + iv] += wt1_i;
1882 w2[ic_r + id4 + iv] += wt2_r;
1883 w2[ic_i + id4 + iv] += wt2_i;
1893 int itask,
double *v2,
const double *v1)
1895 int Nvcd = m_Nvc * m_Nd;
1899 int id3 = m_Nvc * 2;
1900 int id4 = m_Nvc * 3;
1904 double vt1[m_Nvc], vt2[m_Nvc];
1905 double wt1_r, wt1_i, wt2_r, wt2_i;
1907 int isite = m_arg[itask].isite;
1909 const double *w1 = &v1[Nvcd * isite];
1910 double *w2 = &v2[Nvcd * isite];
1911 const double *u = m_U->ptr(m_Ndf * (isite + idir * m_Nvol));
1913 int kt1 = m_arg[itask].kt1;
1914 int Nxy = m_Nx * m_Ny;
1915 int Nxyz = Nxy * m_Nz;
1917 for (
int it = 0; it < m_Mt - kt1; ++it) {
1918 for (
int iz = 0; iz < m_Mz; ++iz) {
1919 for (
int ixy = 0; ixy < Nxy; ++ixy) {
1920 int is = ixy + Nxy * (iz + m_Nz * it);
1922 int in = Nvcd * (is + Nxyz);
1923 int ig = m_Ndf * is;
1925 for (
int ic = 0; ic < m_Nc; ++ic) {
1927 int ic_i = 2 * ic + 1;
1929 vt1[ic_r] = w1[ic_r + id1 + in] + w1[ic_r + id3 + in];
1930 vt1[ic_i] = w1[ic_i + id1 + in] + w1[ic_i + id3 + in];
1931 vt2[ic_r] = w1[ic_r + id2 + in] + w1[ic_r + id4 + in];
1932 vt2[ic_i] = w1[ic_i + id2 + in] + w1[ic_i + id4 + in];
1935 for (
int ic = 0; ic < m_Nc; ++ic) {
1936 int ic2 = ic * m_Nvc;
1938 wt1_r = mult_uv_r(&u[ic2 + ig], vt1, m_Nc);
1939 wt1_i = mult_uv_i(&u[ic2 + ig], vt1, m_Nc);
1940 wt2_r = mult_uv_r(&u[ic2 + ig], vt2, m_Nc);
1941 wt2_i = mult_uv_i(&u[ic2 + ig], vt2, m_Nc);
1944 int ic_i = 2 * ic + 1;
1946 w2[ic_r + id1 + iv] += wt1_r;
1947 w2[ic_i + id1 + iv] += wt1_i;
1948 w2[ic_r + id2 + iv] += wt2_r;
1949 w2[ic_i + id2 + iv] += wt2_i;
1951 w2[ic_r + id3 + iv] += wt1_r;
1952 w2[ic_i + id3 + iv] += wt1_i;
1953 w2[ic_r + id4 + iv] += wt2_r;
1954 w2[ic_i + id4 + iv] += wt2_i;
1964 int itask,
double *vcp1,
const double *v1)
1966 int Nvc2 = 2 * m_Nvc;
1967 int Nvcd = m_Nvc * m_Nd;
1968 int Nvcd2 = Nvcd / 2;
1972 int id3 = m_Nvc * 2;
1973 int id4 = m_Nvc * 3;
1977 int isite = m_arg[itask].isite;
1978 int isite_cp = m_arg[itask].isite_cp_t;
1981 const double *w1 = &v1[Nvcd * isite];
1982 double *w2 = &vcp1[Nvcd2 * isite_cp];
1983 const double *u = m_U->ptr(m_Ndf * (isite + idir * m_Nvol));
1985 double vt1[m_Nvc], vt2[m_Nvc];
1987 if (m_arg[itask].kt1 == 1) {
1988 int Nxy = m_Nx * m_Ny;
1990 for (
int iz = 0; iz < m_Mz; ++iz) {
1991 for (
int ixy = 0; ixy < Nxy; ++ixy) {
1992 int is = ixy + Nxy * (iz + m_Nz * it);
1993 int is2 = ixy + Nxy * iz;
1995 int ig = m_Ndf * is;
1996 int ix1 = Nvc2 * is2;
1997 int ix2 = ix1 + m_Nvc;
1999 for (
int ic = 0; ic < m_Nc; ++ic) {
2001 int ic_i = 2 * ic + 1;
2003 vt1[ic_r] = w1[ic_r + id1 + in] - w1[ic_r + id3 + in];
2004 vt1[ic_i] = w1[ic_i + id1 + in] - w1[ic_i + id3 + in];
2005 vt2[ic_r] = w1[ic_r + id2 + in] - w1[ic_r + id4 + in];
2006 vt2[ic_i] = w1[ic_i + id2 + in] - w1[ic_i + id4 + in];
2009 for (
int ic = 0; ic < m_Nc; ++ic) {
2013 int ic_i = 2 * ic + 1;
2015 w2[ic_r + ix1] = mult_udagv_r(&u[ic2 + ig], vt1, m_Nc);
2016 w2[ic_i + ix1] = mult_udagv_i(&u[ic2 + ig], vt1, m_Nc);
2017 w2[ic_r + ix2] = mult_udagv_r(&u[ic2 + ig], vt2, m_Nc);
2018 w2[ic_i + ix2] = mult_udagv_i(&u[ic2 + ig], vt2, m_Nc);
2028 int itask,
double *v2,
const double *vcp2)
2030 int Nvc2 = 2 * m_Nvc;
2031 int Nvcd = m_Nvc * m_Nd;
2032 int Nvcd2 = Nvcd / 2;
2036 int id3 = m_Nvc * 2;
2037 int id4 = m_Nvc * 3;
2040 double bc2 = m_boundary2[idir];
2044 int isite = m_arg[itask].isite;
2045 int isite_cp = m_arg[itask].isite_cp_t;
2047 const double *w1 = &vcp2[Nvcd2 * isite_cp];
2048 double *w2 = &v2[Nvcd * isite];
2050 if (m_arg[itask].kt0 == 1) {
2051 int Nxy = m_Nx * m_Ny;
2053 for (
int iz = 0; iz < m_Mz; ++iz) {
2054 for (
int ixy = 0; ixy < Nxy; ++ixy) {
2055 int is = ixy + Nxy * (iz + m_Nz * it);
2056 int is2 = ixy + Nxy * iz;
2058 int ix1 = Nvc2 * is2;
2059 int ix2 = ix1 + m_Nvc;
2061 for (
int ic = 0; ic < m_Nc; ++ic) {
2063 int ic_i = 2 * ic + 1;
2065 w2[ic_r + id1 + iv] += bc2 * w1[ic_r + ix1];
2066 w2[ic_i + id1 + iv] += bc2 * w1[ic_i + ix1];
2067 w2[ic_r + id2 + iv] += bc2 * w1[ic_r + ix2];
2068 w2[ic_i + id2 + iv] += bc2 * w1[ic_i + ix2];
2070 w2[ic_r + id3 + iv] -= bc2 * w1[ic_r + ix1];
2071 w2[ic_i + id3 + iv] -= bc2 * w1[ic_i + ix1];
2072 w2[ic_r + id4 + iv] -= bc2 * w1[ic_r + ix2];
2073 w2[ic_i + id4 + iv] -= bc2 * w1[ic_i + ix2];
2083 int itask,
double *v2,
const double *v1)
2085 int Nvcd = m_Nvc * m_Nd;
2089 int id3 = m_Nvc * 2;
2090 int id4 = m_Nvc * 3;
2094 double vt1[m_Nvc], vt2[m_Nvc];
2095 double wt1_r, wt1_i, wt2_r, wt2_i;
2097 int isite = m_arg[itask].isite;
2099 const double *w1 = &v1[Nvcd * isite];
2100 double *w2 = &v2[Nvcd * isite];
2101 const double *u = m_U->ptr(m_Ndf * (isite + idir * m_Nvol));
2103 int kt0 = m_arg[itask].kt0;
2104 int Nxy = m_Nx * m_Ny;
2105 int Nxyz = Nxy * m_Nz;
2107 for (
int it = kt0; it < m_Mt; ++it) {
2108 for (
int iz = 0; iz < m_Mz; ++iz) {
2109 for (
int ixy = 0; ixy < Nxy; ++ixy) {
2110 int is = ixy + Nxy * (iz + m_Nz * it);
2112 int in = Nvcd * (is - Nxyz);
2113 int ig = m_Ndf * (is - Nxyz);
2115 for (
int ic = 0; ic < m_Nc; ++ic) {
2117 int ic_i = 2 * ic + 1;
2119 vt1[ic_r] = w1[ic_r + id1 + in] - w1[ic_r + id3 + in];
2120 vt1[ic_i] = w1[ic_i + id1 + in] - w1[ic_i + id3 + in];
2121 vt2[ic_r] = w1[ic_r + id2 + in] - w1[ic_r + id4 + in];
2122 vt2[ic_i] = w1[ic_i + id2 + in] - w1[ic_i + id4 + in];
2125 for (
int ic = 0; ic < m_Nc; ++ic) {
2128 wt1_r = mult_udagv_r(&u[ic2 + ig], vt1, m_Nc);
2129 wt1_i = mult_udagv_i(&u[ic2 + ig], vt1, m_Nc);
2130 wt2_r = mult_udagv_r(&u[ic2 + ig], vt2, m_Nc);
2131 wt2_i = mult_udagv_i(&u[ic2 + ig], vt2, m_Nc);
2134 int ic_i = 2 * ic + 1;
2136 w2[ic_r + id1 + iv] += wt1_r;
2137 w2[ic_i + id1 + iv] += wt1_i;
2138 w2[ic_r + id2 + iv] += wt2_r;
2139 w2[ic_i + id2 + iv] += wt2_i;
2141 w2[ic_r + id3 + iv] -= wt1_r;
2142 w2[ic_i + id3 + iv] -= wt1_i;
2143 w2[ic_r + id4 + iv] -= wt2_r;
2144 w2[ic_i + id4 + iv] -= wt2_i;
2154 int itask,
double *v2,
const double *v1)
2156 int Nvcd = m_Nvc * m_Nd;
2157 int Nxy = m_Nx * m_Ny;
2161 int id3 = m_Nvc * 2;
2162 int id4 = m_Nvc * 3;
2164 int isite = m_arg[itask].isite;
2166 const double *w1 = &v1[Nvcd * isite];
2167 double *w2 = &v2[Nvcd * isite];
2169 for (
int it = 0; it < m_Mt; ++it) {
2170 for (
int iz = 0; iz < m_Mz; ++iz) {
2171 for (
int ixy = 0; ixy < Nxy; ++ixy) {
2172 int iv = Nvcd * (ixy + Nxy * (iz + m_Nz * it));
2173 for (
int ivc = 0; ivc < m_Nvc; ++ivc) {
2174 w2[ivc + id1 + iv] = w1[ivc + id3 + iv];
2175 w2[ivc + id2 + iv] = w1[ivc + id4 + iv];
2176 w2[ivc + id3 + iv] = w1[ivc + id1 + iv];
2177 w2[ivc + id4 + iv] = w1[ivc + id2 + iv];
2187 int itask,
double *v2,
const double *v1)
2189 int Nvcd = m_Nvc * m_Nd;
2190 int Nxy = m_Nx * m_Ny;
2194 int id3 = m_Nvc * 2;
2195 int id4 = m_Nvc * 3;
2197 int isite = m_arg[itask].isite;
2199 const double *w1 = &v1[Nvcd * isite];
2200 double *w2 = &v2[Nvcd * isite];
2202 for (
int it = 0; it < m_Mt; ++it) {
2203 for (
int iz = 0; iz < m_Mz; ++iz) {
2204 for (
int ixy = 0; ixy < Nxy; ++ixy) {
2205 int iv = Nvcd * (ixy + Nxy * (iz + m_Nz * it));
2206 for (
int ivc = 0; ivc < m_Nvc; ++ivc) {
2207 w2[ivc + id1 + iv] = w1[ivc + id1 + iv];
2208 w2[ivc + id2 + iv] = w1[ivc + id2 + iv];
2209 w2[ivc + id3 + iv] = -w1[ivc + id3 + iv];
2210 w2[ivc + id4 + iv] = -w1[ivc + id4 + iv];
void mult_t_plus_bulk_chiral_thread(int, double *, const double *)
void scal_thread(int, double *, double)
void clear_thread(int, double *)
void mult_y_minus2_thread(int, double *, const double *)
void mult_y_plus2_thread(int, double *, const double *)
void general(const char *format,...)
void mult_y_plus1_thread(int, double *, const double *)
void gm5_dirac_thread(int, double *, const double *)
void mult_x_plus1_thread(int, double *, const double *)
void mult_y_plus_bulk_thread(int, double *, const double *)
void mult_t_plus2_dirac_thread(int, double *, const double *)
void mult_y_minus1_thread(int, double *, const double *)
void mult_z_plus2_thread(int, double *, const double *)
void daypx_thread(int, double *, double, const double *)
void mult_z_plus1_thread(int, double *, const double *)
void gm5_chiral_thread(int, double *, const double *)
void mult_z_plus_bulk_thread(int, double *, const double *)
void mult_t_minus1_chiral_thread(int, double *, const double *)
void mult_z_minus_bulk_thread(int, double *, const double *)
void mult_x_minus1_thread(int, double *, const double *)
void mult_x_plus2_thread(int, double *, const double *)
void mult_t_plus1_chiral_thread(int, double *, const double *)
void daxpy_thread(int, double *, double, const double *)
static int get_num_threads_available()
returns number of threads (works outside of parallel region).
void mult_t_plus_bulk_dirac_thread(int, double *, const double *)
void mult_t_minus_bulk_dirac_thread(int, double *, const double *)
void mult_t_minus2_dirac_thread(int, double *, const double *)
void mult_t_plus2_chiral_thread(int, double *, const double *)
void crucial(const char *format,...)
static const std::string class_name
void mult_x_minus_bulk_thread(int, double *, const double *)
void mult_z_minus1_thread(int, double *, const double *)
void mult_x_minus2_thread(int, double *, const double *)
void mult_t_minus2_chiral_thread(int, double *, const double *)
void mult_t_minus1_dirac_thread(int, double *, const double *)
void mult_y_minus_bulk_thread(int, double *, const double *)
Bridge::VerboseLevel m_vl
void mult_t_minus_bulk_chiral_thread(int, double *, const double *)
void mult_t_plus1_dirac_thread(int, double *, const double *)
void mult_z_minus2_thread(int, double *, const double *)
void mult_x_plus_bulk_thread(int, double *, const double *)
std::vector< mult_arg > m_arg