17 #if defined USE_GROUP_SU3
18 #include "fopr_Wilson_impl_SU3.inc"
19 #elif defined USE_GROUP_SU2
20 #include "fopr_Wilson_impl_SU2.inc"
21 #elif defined USE_GROUP_SU_N
22 #include "fopr_Wilson_impl_SU_N.inc"
45 vout.
crucial(
m_vl,
"Error at %s: Nz = %d and Nt = %d do not match Nthread = %d\n",
55 vout.
crucial(
m_vl,
"Error at %s: Mz = %d and Ntask_z = %d do not match Nz = %d\n",
61 vout.
crucial(
m_vl,
"Error at %s: Mt = %d and Ntask_t = %d do not match Nt = %d\n",
91 for (
int ithread_t = 0; ithread_t <
m_Ntask_t; ++ithread_t) {
92 for (
int ithread_z = 0; ithread_z <
m_Ntask_z; ++ithread_z) {
93 int itask = ithread_z + m_Ntask_z * ithread_t;
100 m_arg[itask].kz1 = 0;
101 if (ithread_t == 0)
m_arg[itask].kt0 = 1;
102 if (ithread_z == 0)
m_arg[itask].kz0 = 1;
103 if (ithread_t == m_Ntask_t - 1)
m_arg[itask].kt1 = 1;
104 if (ithread_z == m_Ntask_z - 1)
m_arg[itask].kz1 = 1;
108 m_arg[itask].isite_cp_z = ithread_t *
m_Mt * Nxy;
109 m_arg[itask].isite_cp_t = ithread_z *
m_Mz * Nxy;
117 int itask,
double *v2,
double fac,
const double *v1)
122 int isite =
m_arg[itask].isite;
124 const double *w1 = &v1[Nvcd * isite];
125 double *w2 = &v2[Nvcd * isite];
127 for (
int it = 0; it <
m_Mt; ++it) {
128 for (
int iz = 0; iz <
m_Mz; ++iz) {
129 for (
int ivxy = 0; ivxy < Nvxy; ++ivxy) {
130 int iv = ivxy + Nvxy * (iz +
m_Nz * it);
131 w2[iv] += fac * w1[iv];
140 int itask,
double *v2,
double fac,
const double *v1)
145 int isite =
m_arg[itask].isite;
146 const double *w1 = &v1[Nvcd * isite];
147 double *w2 = &v2[Nvcd * isite];
149 for (
int it = 0; it <
m_Mt; ++it) {
150 for (
int iz = 0; iz <
m_Mz; ++iz) {
151 for (
int ivxy = 0; ivxy < Nvxy; ++ivxy) {
152 int iv = ivxy + Nvxy * (iz +
m_Nz * it);
153 w2[iv] = fac * w2[iv] + w1[iv];
162 double *v,
double fac)
167 int isite =
m_arg[itask].isite;
168 double *w = &v[Nvcd * isite];
170 for (
int it = 0; it <
m_Mt; ++it) {
171 for (
int iz = 0; iz <
m_Mz; ++iz) {
172 for (
int ivxy = 0; ivxy < Nvxy; ++ivxy) {
173 int iv = ivxy + Nvxy * (iz +
m_Nz * it);
188 int isite =
m_arg[itask].isite;
189 double *w2 = &v2[Nvcd * isite];
191 for (
int it = 0; it <
m_Mt; ++it) {
192 for (
int iz = 0; iz <
m_Mz; ++iz) {
193 for (
int ivxy = 0; ivxy < Nvxy; ++ivxy) {
194 int iv = ivxy + Nvxy * (iz +
m_Nz * it);
204 int itask,
double *vcp1,
const double *v1)
213 int isite =
m_arg[itask].isite;
214 int isite_cp =
m_arg[itask].isite_cp_x;
216 const double *w1 = &v1[Nvcd * isite];
217 double *w2 = &vcp1[Nvcd * isite_cp];
224 for (
int it = 0; it <
m_Mt; ++it) {
225 for (
int iz = 0; iz <
m_Mz; ++iz) {
226 for (
int iy = 0; iy <
m_Ny; ++iy) {
227 int is = ix +
m_Nx * (iy + m_Ny * (iz +
m_Nz * it));
228 int is2 = iy + m_Ny * (iz + m_Mz * it);
230 int ix1 = Nvcd * is2;
231 int ix2 = ix1 +
m_Nvc;
232 int ix3 = ix2 +
m_Nvc;
233 int ix4 = ix3 +
m_Nvc;
235 for (
int ic = 0; ic <
m_Nc; ++ic) {
237 int ic_i = 2 * ic + 1;
239 w2[ic_r + ix1] = bc2 * (
m_r_s * w1[ic_r + id1 + in] -
m_nu_s * w1[ic_i + id4 + in]);
240 w2[ic_i + ix1] = bc2 * (
m_r_s * w1[ic_i + id1 + in] +
m_nu_s * w1[ic_r + id4 + in]);
241 w2[ic_r + ix2] = bc2 * (
m_r_s * w1[ic_r + id2 + in] -
m_nu_s * w1[ic_i + id3 + in]);
242 w2[ic_i + ix2] = bc2 * (
m_r_s * w1[ic_i + id2 + in] +
m_nu_s * w1[ic_r + id3 + in]);
244 w2[ic_r + ix3] = bc2 * (
m_r_s * w1[ic_r + id3 + in] +
m_nu_s * w1[ic_i + id2 + in]);
245 w2[ic_i + ix3] = bc2 * (
m_r_s * w1[ic_i + id3 + in] -
m_nu_s * w1[ic_r + id2 + in]);
246 w2[ic_r + ix4] = bc2 * (
m_r_s * w1[ic_r + id4 + in] +
m_nu_s * w1[ic_i + id1 + in]);
247 w2[ic_i + ix4] = bc2 * (
m_r_s * w1[ic_i + id4 + in] -
m_nu_s * w1[ic_r + id1 + in]);
257 int itask,
double *v2,
const double *vcp2)
268 double wt1_r, wt1_i, wt2_r, wt2_i, wt3_r, wt3_i, wt4_r, wt4_i;
270 int isite =
m_arg[itask].isite;
271 int isite_cp =
m_arg[itask].isite_cp_x;
273 const double *w1 = &vcp2[Nvcd * isite_cp];
274 double *w2 = &v2[Nvcd * isite];
279 for (
int it = 0; it <
m_Mt; ++it) {
280 for (
int iz = 0; iz <
m_Mz; ++iz) {
281 for (
int iy = 0; iy <
m_Ny; ++iy) {
282 int is = ix +
m_Nx * (iy + m_Ny * (iz +
m_Nz * it));
283 int is2 = iy + m_Ny * (iz + m_Mz * it);
286 int ix1 = Nvcd * is2;
287 int ix2 = ix1 +
m_Nvc;
288 int ix3 = ix2 +
m_Nvc;
289 int ix4 = ix3 +
m_Nvc;
291 for (
int ic = 0; ic <
m_Nc; ++ic) {
292 int ic2 = ic *
m_Nvc;
294 wt1_r = mult_uv_r(&u[ic2 + ig], &w1[ix1], m_Nc);
295 wt1_i = mult_uv_i(&u[ic2 + ig], &w1[ix1], m_Nc);
296 wt2_r = mult_uv_r(&u[ic2 + ig], &w1[ix2], m_Nc);
297 wt2_i = mult_uv_i(&u[ic2 + ig], &w1[ix2], m_Nc);
299 wt3_r = mult_uv_r(&u[ic2 + ig], &w1[ix3], m_Nc);
300 wt3_i = mult_uv_i(&u[ic2 + ig], &w1[ix3], m_Nc);
301 wt4_r = mult_uv_r(&u[ic2 + ig], &w1[ix4], m_Nc);
302 wt4_i = mult_uv_i(&u[ic2 + ig], &w1[ix4], m_Nc);
305 int ic_i = 2 * ic + 1;
307 w2[ic_r + id1 + iv] += wt1_r;
308 w2[ic_i + id1 + iv] += wt1_i;
309 w2[ic_r + id2 + iv] += wt2_r;
310 w2[ic_i + id2 + iv] += wt2_i;
312 w2[ic_r + id3 + iv] += wt3_r;
313 w2[ic_i + id3 + iv] += wt3_i;
314 w2[ic_r + id4 + iv] += wt4_r;
315 w2[ic_i + id4 + iv] += wt4_i;
325 int itask,
double *v2,
const double *v1)
337 double wt1_r, wt1_i, wt2_r, wt2_i, wt3_r, wt3_i, wt4_r, wt4_i;
339 int isite =
m_arg[itask].isite;
341 const double *w1 = &v1[Nvcd * isite];
342 double *w2 = &v2[Nvcd * isite];
345 for (
int it = 0; it <
m_Mt; ++it) {
346 for (
int iz = 0; iz <
m_Mz; ++iz) {
347 for (
int iy = 0; iy <
m_Ny; ++iy) {
348 for (
int ix = 0; ix <
m_Nx - 1; ++ix) {
349 int is = ix + m_Nx * (iy + m_Ny * (iz +
m_Nz * it));
351 int in = Nvcd * (is + 1);
354 for (
int ic = 0; ic <
m_Nc; ++ic) {
356 int ic_i = 2 * ic + 1;
358 vt1[ic_r] =
m_r_s * w1[ic_r + id1 + in] -
m_nu_s * w1[ic_i + id4 + in];
359 vt1[ic_i] =
m_r_s * w1[ic_i + id1 + in] +
m_nu_s * w1[ic_r + id4 + in];
360 vt2[ic_r] =
m_r_s * w1[ic_r + id2 + in] -
m_nu_s * w1[ic_i + id3 + in];
361 vt2[ic_i] =
m_r_s * w1[ic_i + id2 + in] +
m_nu_s * w1[ic_r + id3 + in];
363 vt3[ic_r] =
m_r_s * w1[ic_r + id3 + in] +
m_nu_s * w1[ic_i + id2 + in];
364 vt3[ic_i] =
m_r_s * w1[ic_i + id3 + in] -
m_nu_s * w1[ic_r + id2 + in];
365 vt4[ic_r] =
m_r_s * w1[ic_r + id4 + in] +
m_nu_s * w1[ic_i + id1 + in];
366 vt4[ic_i] =
m_r_s * w1[ic_i + id4 + in] -
m_nu_s * w1[ic_r + id1 + in];
369 for (
int ic = 0; ic <
m_Nc; ++ic) {
370 int ic2 = ic *
m_Nvc;
372 wt1_r = mult_uv_r(&u[ic2 + ig], vt1, m_Nc);
373 wt1_i = mult_uv_i(&u[ic2 + ig], vt1, m_Nc);
374 wt2_r = mult_uv_r(&u[ic2 + ig], vt2, m_Nc);
375 wt2_i = mult_uv_i(&u[ic2 + ig], vt2, m_Nc);
377 wt3_r = mult_uv_r(&u[ic2 + ig], vt3, m_Nc);
378 wt3_i = mult_uv_i(&u[ic2 + ig], vt3, m_Nc);
379 wt4_r = mult_uv_r(&u[ic2 + ig], vt4, m_Nc);
380 wt4_i = mult_uv_i(&u[ic2 + ig], vt4, m_Nc);
383 int ic_i = 2 * ic + 1;
385 w2[ic_r + id1 + iv] += wt1_r;
386 w2[ic_i + id1 + iv] += wt1_i;
387 w2[ic_r + id2 + iv] += wt2_r;
388 w2[ic_i + id2 + iv] += wt2_i;
390 w2[ic_r + id3 + iv] += wt3_r;
391 w2[ic_i + id3 + iv] += wt3_i;
392 w2[ic_r + id4 + iv] += wt4_r;
393 w2[ic_i + id4 + iv] += wt4_i;
404 int itask,
double *vcp1,
const double *v1)
415 int isite =
m_arg[itask].isite;
416 int isite_cp =
m_arg[itask].isite_cp_x;
418 const double *w1 = &v1[Nvcd * isite];
419 double *w2 = &vcp1[Nvcd * isite_cp];
426 for (
int it = 0; it <
m_Mt; ++it) {
427 for (
int iz = 0; iz <
m_Mz; ++iz) {
428 for (
int iy = 0; iy <
m_Ny; ++iy) {
429 int is = ix +
m_Nx * (iy + m_Ny * (iz +
m_Nz * it));
430 int is2 = iy + m_Ny * (iz + m_Mz * it);
433 int ix1 = Nvcd * is2;
434 int ix2 = ix1 +
m_Nvc;
435 int ix3 = ix2 +
m_Nvc;
436 int ix4 = ix3 +
m_Nvc;
438 for (
int ic = 0; ic <
m_Nc; ++ic) {
440 int ic_i = 2 * ic + 1;
442 vt1[ic_r] =
m_r_s * w1[ic_r + id1 + in] +
m_nu_s * w1[ic_i + id4 + in];
443 vt1[ic_i] =
m_r_s * w1[ic_i + id1 + in] -
m_nu_s * w1[ic_r + id4 + in];
444 vt2[ic_r] =
m_r_s * w1[ic_r + id2 + in] +
m_nu_s * w1[ic_i + id3 + in];
445 vt2[ic_i] =
m_r_s * w1[ic_i + id2 + in] -
m_nu_s * w1[ic_r + id3 + in];
447 vt3[ic_r] =
m_r_s * w1[ic_r + id3 + in] -
m_nu_s * w1[ic_i + id2 + in];
448 vt3[ic_i] =
m_r_s * w1[ic_i + id3 + in] +
m_nu_s * w1[ic_r + id2 + in];
449 vt4[ic_r] =
m_r_s * w1[ic_r + id4 + in] -
m_nu_s * w1[ic_i + id1 + in];
450 vt4[ic_i] =
m_r_s * w1[ic_i + id4 + in] +
m_nu_s * w1[ic_r + id1 + in];
453 for (
int ic = 0; ic <
m_Nc; ++ic) {
457 int ic_i = 2 * ic + 1;
459 w2[ic_r + ix1] = mult_udagv_r(&u[ic2 + ig], vt1, m_Nc);
460 w2[ic_i + ix1] = mult_udagv_i(&u[ic2 + ig], vt1, m_Nc);
461 w2[ic_r + ix2] = mult_udagv_r(&u[ic2 + ig], vt2, m_Nc);
462 w2[ic_i + ix2] = mult_udagv_i(&u[ic2 + ig], vt2, m_Nc);
464 w2[ic_r + ix3] = mult_udagv_r(&u[ic2 + ig], vt3, m_Nc);
465 w2[ic_i + ix3] = mult_udagv_i(&u[ic2 + ig], vt3, m_Nc);
466 w2[ic_r + ix4] = mult_udagv_r(&u[ic2 + ig], vt4, m_Nc);
467 w2[ic_i + ix4] = mult_udagv_i(&u[ic2 + ig], vt4, m_Nc);
477 int itask,
double *v2,
const double *vcp2)
491 int isite =
m_arg[itask].isite;
492 int isite_cp =
m_arg[itask].isite_cp_x;
494 const double *w1 = &vcp2[Nvcd * isite_cp];
495 double *w2 = &v2[Nvcd * isite];
499 for (
int it = 0; it <
m_Mt; ++it) {
500 for (
int iz = 0; iz <
m_Mz; ++iz) {
501 for (
int iy = 0; iy <
m_Ny; ++iy) {
502 int is = ix +
m_Nx * (iy + m_Ny * (iz +
m_Nz * it));
503 int is2 = iy + m_Ny * (iz + m_Mz * it);
505 int ix1 = Nvcd * is2;
506 int ix2 = ix1 +
m_Nvc;
507 int ix3 = ix2 +
m_Nvc;
508 int ix4 = ix3 +
m_Nvc;
510 for (
int ic = 0; ic <
m_Nc; ++ic) {
512 int ic_i = 2 * ic + 1;
514 w2[ic_r + id1 + iv] += bc2 * w1[ic_r + ix1];
515 w2[ic_i + id1 + iv] += bc2 * w1[ic_i + ix1];
516 w2[ic_r + id2 + iv] += bc2 * w1[ic_r + ix2];
517 w2[ic_i + id2 + iv] += bc2 * w1[ic_i + ix2];
519 w2[ic_r + id3 + iv] += bc2 * w1[ic_r + ix3];
520 w2[ic_i + id3 + iv] += bc2 * w1[ic_i + ix3];
521 w2[ic_r + id4 + iv] += bc2 * w1[ic_r + ix4];
522 w2[ic_i + id4 + iv] += bc2 * w1[ic_i + ix4];
532 int itask,
double *v2,
const double *v1)
544 double wt1_r, wt1_i, wt2_r, wt2_i, wt3_r, wt3_i, wt4_r, wt4_i;
546 int isite =
m_arg[itask].isite;
548 const double *w1 = &v1[Nvcd * isite];
549 double *w2 = &v2[Nvcd * isite];
552 for (
int it = 0; it <
m_Mt; ++it) {
553 for (
int iz = 0; iz <
m_Mz; ++iz) {
554 for (
int iy = 0; iy <
m_Ny; ++iy) {
555 for (
int ix = 1; ix <
m_Nx; ++ix) {
556 int is = ix + m_Nx * (iy + m_Ny * (iz +
m_Nz * it));
558 int in = Nvcd * (is - 1);
559 int ig =
m_Ndf * (is - 1);
561 for (
int ic = 0; ic <
m_Nc; ++ic) {
563 int ic_i = 2 * ic + 1;
565 vt1[ic_r] =
m_r_s * w1[ic_r + id1 + in] +
m_nu_s * w1[ic_i + id4 + in];
566 vt1[ic_i] =
m_r_s * w1[ic_i + id1 + in] -
m_nu_s * w1[ic_r + id4 + in];
567 vt2[ic_r] =
m_r_s * w1[ic_r + id2 + in] +
m_nu_s * w1[ic_i + id3 + in];
568 vt2[ic_i] =
m_r_s * w1[ic_i + id2 + in] -
m_nu_s * w1[ic_r + id3 + in];
570 vt3[ic_r] =
m_r_s * w1[ic_r + id3 + in] -
m_nu_s * w1[ic_i + id2 + in];
571 vt3[ic_i] =
m_r_s * w1[ic_i + id3 + in] +
m_nu_s * w1[ic_r + id2 + in];
572 vt4[ic_r] =
m_r_s * w1[ic_r + id4 + in] -
m_nu_s * w1[ic_i + id1 + in];
573 vt4[ic_i] =
m_r_s * w1[ic_i + id4 + in] +
m_nu_s * w1[ic_r + id1 + in];
576 for (
int ic = 0; ic <
m_Nc; ++ic) {
579 wt1_r = mult_udagv_r(&u[ic2 + ig], vt1, m_Nc);
580 wt1_i = mult_udagv_i(&u[ic2 + ig], vt1, m_Nc);
581 wt2_r = mult_udagv_r(&u[ic2 + ig], vt2, m_Nc);
582 wt2_i = mult_udagv_i(&u[ic2 + ig], vt2, m_Nc);
584 wt3_r = mult_udagv_r(&u[ic2 + ig], vt3, m_Nc);
585 wt3_i = mult_udagv_i(&u[ic2 + ig], vt3, m_Nc);
586 wt4_r = mult_udagv_r(&u[ic2 + ig], vt4, m_Nc);
587 wt4_i = mult_udagv_i(&u[ic2 + ig], vt4, m_Nc);
590 int ic_i = 2 * ic + 1;
592 w2[ic_r + id1 + iv] += wt1_r;
593 w2[ic_i + id1 + iv] += wt1_i;
594 w2[ic_r + id2 + iv] += wt2_r;
595 w2[ic_i + id2 + iv] += wt2_i;
597 w2[ic_r + id3 + iv] += wt3_r;
598 w2[ic_i + id3 + iv] += wt3_i;
599 w2[ic_r + id4 + iv] += wt4_r;
600 w2[ic_i + id4 + iv] += wt4_i;
611 int itask,
double *vcp1,
const double *v1)
620 int isite =
m_arg[itask].isite;
621 int isite_cp =
m_arg[itask].isite_cp_y;
623 const double *w1 = &v1[Nvcd * isite];
624 double *w2 = &vcp1[Nvcd * isite_cp];
631 for (
int it = 0; it <
m_Mt; ++it) {
632 for (
int iz = 0; iz <
m_Mz; ++iz) {
633 for (
int ix = 0; ix <
m_Nx; ++ix) {
634 int is = ix + m_Nx * (iy +
m_Ny * (iz +
m_Nz * it));
635 int is2 = ix + m_Nx * (iz + m_Mz * it);
637 int ix1 = Nvcd * is2;
638 int ix2 = ix1 +
m_Nvc;
639 int ix3 = ix2 +
m_Nvc;
640 int ix4 = ix3 +
m_Nvc;
642 for (
int ic = 0; ic <
m_Nc; ++ic) {
644 int ic_i = 2 * ic + 1;
646 w2[ic_r + ix1] = bc2 * (
m_r_s * w1[ic_r + id1 + in] +
m_nu_s * w1[ic_r + id4 + in]);
647 w2[ic_i + ix1] = bc2 * (
m_r_s * w1[ic_i + id1 + in] +
m_nu_s * w1[ic_i + id4 + in]);
648 w2[ic_r + ix2] = bc2 * (
m_r_s * w1[ic_r + id2 + in] -
m_nu_s * w1[ic_r + id3 + in]);
649 w2[ic_i + ix2] = bc2 * (
m_r_s * w1[ic_i + id2 + in] -
m_nu_s * w1[ic_i + id3 + in]);
651 w2[ic_r + ix3] = bc2 * (
m_r_s * w1[ic_r + id3 + in] -
m_nu_s * w1[ic_r + id2 + in]);
652 w2[ic_i + ix3] = bc2 * (
m_r_s * w1[ic_i + id3 + in] -
m_nu_s * w1[ic_i + id2 + in]);
653 w2[ic_r + ix4] = bc2 * (
m_r_s * w1[ic_r + id4 + in] +
m_nu_s * w1[ic_r + id1 + in]);
654 w2[ic_i + ix4] = bc2 * (
m_r_s * w1[ic_i + id4 + in] +
m_nu_s * w1[ic_i + id1 + in]);
664 int itask,
double *v2,
const double *vcp2)
675 double wt1_r, wt1_i, wt2_r, wt2_i, wt3_r, wt3_i, wt4_r, wt4_i;
677 int isite =
m_arg[itask].isite;
678 int isite_cp =
m_arg[itask].isite_cp_y;
680 const double *w1 = &vcp2[Nvcd * isite_cp];
681 double *w2 = &v2[Nvcd * isite];
686 for (
int it = 0; it <
m_Mt; ++it) {
687 for (
int iz = 0; iz <
m_Mz; ++iz) {
688 for (
int ix = 0; ix <
m_Nx; ++ix) {
689 int is = ix + m_Nx * (iy +
m_Ny * (iz +
m_Nz * it));
690 int is2 = ix + m_Nx * (iz + m_Mz * it);
693 int ix1 = Nvcd * is2;
694 int ix2 = ix1 +
m_Nvc;
695 int ix3 = ix2 +
m_Nvc;
696 int ix4 = ix3 +
m_Nvc;
698 for (
int ic = 0; ic <
m_Nc; ++ic) {
699 int ic2 = ic *
m_Nvc;
701 wt1_r = mult_uv_r(&u[ic2 + ig], &w1[ix1], m_Nc);
702 wt1_i = mult_uv_i(&u[ic2 + ig], &w1[ix1], m_Nc);
703 wt2_r = mult_uv_r(&u[ic2 + ig], &w1[ix2], m_Nc);
704 wt2_i = mult_uv_i(&u[ic2 + ig], &w1[ix2], m_Nc);
706 wt3_r = mult_uv_r(&u[ic2 + ig], &w1[ix3], m_Nc);
707 wt3_i = mult_uv_i(&u[ic2 + ig], &w1[ix3], m_Nc);
708 wt4_r = mult_uv_r(&u[ic2 + ig], &w1[ix4], m_Nc);
709 wt4_i = mult_uv_i(&u[ic2 + ig], &w1[ix4], m_Nc);
712 int ic_i = 2 * ic + 1;
714 w2[ic_r + id1 + iv] += wt1_r;
715 w2[ic_i + id1 + iv] += wt1_i;
716 w2[ic_r + id2 + iv] += wt2_r;
717 w2[ic_i + id2 + iv] += wt2_i;
719 w2[ic_r + id3 + iv] += wt3_r;
720 w2[ic_i + id3 + iv] += wt3_i;
721 w2[ic_r + id4 + iv] += wt4_r;
722 w2[ic_i + id4 + iv] += wt4_i;
732 int itask,
double *v2,
const double *v1)
744 double wt1_r, wt1_i, wt2_r, wt2_i, wt3_r, wt3_i, wt4_r, wt4_i;
746 int isite =
m_arg[itask].isite;
748 const double *w1 = &v1[Nvcd * isite];
749 double *w2 = &v2[Nvcd * isite];
752 for (
int it = 0; it <
m_Mt; ++it) {
753 for (
int iz = 0; iz <
m_Mz; ++iz) {
754 for (
int iy = 0; iy <
m_Ny - 1; ++iy) {
755 for (
int ix = 0; ix <
m_Nx; ++ix) {
756 int is = ix + m_Nx * (iy + m_Ny * (iz +
m_Nz * it));
758 int in = Nvcd * (is +
m_Nx);
761 for (
int ic = 0; ic <
m_Nc; ++ic) {
763 int ic_i = 2 * ic + 1;
765 vt1[ic_r] =
m_r_s * w1[ic_r + id1 + in] +
m_nu_s * w1[ic_r + id4 + in];
766 vt1[ic_i] =
m_r_s * w1[ic_i + id1 + in] +
m_nu_s * w1[ic_i + id4 + in];
767 vt2[ic_r] =
m_r_s * w1[ic_r + id2 + in] -
m_nu_s * w1[ic_r + id3 + in];
768 vt2[ic_i] =
m_r_s * w1[ic_i + id2 + in] -
m_nu_s * w1[ic_i + id3 + in];
770 vt3[ic_r] =
m_r_s * w1[ic_r + id3 + in] -
m_nu_s * w1[ic_r + id2 + in];
771 vt3[ic_i] =
m_r_s * w1[ic_i + id3 + in] -
m_nu_s * w1[ic_i + id2 + in];
772 vt4[ic_r] =
m_r_s * w1[ic_r + id4 + in] +
m_nu_s * w1[ic_r + id1 + in];
773 vt4[ic_i] =
m_r_s * w1[ic_i + id4 + in] +
m_nu_s * w1[ic_i + id1 + in];
776 for (
int ic = 0; ic <
m_Nc; ++ic) {
777 int ic2 = ic *
m_Nvc;
779 wt1_r = mult_uv_r(&u[ic2 + ig], vt1, m_Nc);
780 wt1_i = mult_uv_i(&u[ic2 + ig], vt1, m_Nc);
781 wt2_r = mult_uv_r(&u[ic2 + ig], vt2, m_Nc);
782 wt2_i = mult_uv_i(&u[ic2 + ig], vt2, m_Nc);
784 wt3_r = mult_uv_r(&u[ic2 + ig], vt3, m_Nc);
785 wt3_i = mult_uv_i(&u[ic2 + ig], vt3, m_Nc);
786 wt4_r = mult_uv_r(&u[ic2 + ig], vt4, m_Nc);
787 wt4_i = mult_uv_i(&u[ic2 + ig], vt4, m_Nc);
790 int ic_i = 2 * ic + 1;
792 w2[ic_r + id1 + iv] += wt1_r;
793 w2[ic_i + id1 + iv] += wt1_i;
794 w2[ic_r + id2 + iv] += wt2_r;
795 w2[ic_i + id2 + iv] += wt2_i;
797 w2[ic_r + id3 + iv] += wt3_r;
798 w2[ic_i + id3 + iv] += wt3_i;
799 w2[ic_r + id4 + iv] += wt4_r;
800 w2[ic_i + id4 + iv] += wt4_i;
811 int itask,
double *vcp1,
const double *v1)
822 int isite =
m_arg[itask].isite;
823 int isite_cp =
m_arg[itask].isite_cp_y;
825 const double *w1 = &v1[Nvcd * isite];
826 double *w2 = &vcp1[Nvcd * isite_cp];
833 for (
int it = 0; it <
m_Mt; ++it) {
834 for (
int iz = 0; iz <
m_Mz; ++iz) {
835 for (
int ix = 0; ix <
m_Nx; ++ix) {
836 int is = ix + m_Nx * (iy +
m_Ny * (iz +
m_Nz * it));
837 int is2 = ix + m_Nx * (iz + m_Mz * it);
840 int ix1 = Nvcd * is2;
841 int ix2 = ix1 +
m_Nvc;
842 int ix3 = ix2 +
m_Nvc;
843 int ix4 = ix3 +
m_Nvc;
845 for (
int ic = 0; ic <
m_Nc; ++ic) {
847 int ic_i = 2 * ic + 1;
849 vt1[ic_r] =
m_r_s * w1[ic_r + id1 + in] -
m_nu_s * w1[ic_r + id4 + in];
850 vt1[ic_i] =
m_r_s * w1[ic_i + id1 + in] -
m_nu_s * w1[ic_i + id4 + in];
851 vt2[ic_r] =
m_r_s * w1[ic_r + id2 + in] +
m_nu_s * w1[ic_r + id3 + in];
852 vt2[ic_i] =
m_r_s * w1[ic_i + id2 + in] +
m_nu_s * w1[ic_i + id3 + in];
854 vt3[ic_r] =
m_r_s * w1[ic_r + id3 + in] +
m_nu_s * w1[ic_r + id2 + in];
855 vt3[ic_i] =
m_r_s * w1[ic_i + id3 + in] +
m_nu_s * w1[ic_i + id2 + in];
856 vt4[ic_r] =
m_r_s * w1[ic_r + id4 + in] -
m_nu_s * w1[ic_r + id1 + in];
857 vt4[ic_i] =
m_r_s * w1[ic_i + id4 + in] -
m_nu_s * w1[ic_i + id1 + in];
860 for (
int ic = 0; ic <
m_Nc; ++ic) {
864 int ic_i = 2 * ic + 1;
866 w2[ic_r + ix1] = mult_udagv_r(&u[ic2 + ig], vt1, m_Nc);
867 w2[ic_i + ix1] = mult_udagv_i(&u[ic2 + ig], vt1, m_Nc);
868 w2[ic_r + ix2] = mult_udagv_r(&u[ic2 + ig], vt2, m_Nc);
869 w2[ic_i + ix2] = mult_udagv_i(&u[ic2 + ig], vt2, m_Nc);
871 w2[ic_r + ix3] = mult_udagv_r(&u[ic2 + ig], vt3, m_Nc);
872 w2[ic_i + ix3] = mult_udagv_i(&u[ic2 + ig], vt3, m_Nc);
873 w2[ic_r + ix4] = mult_udagv_r(&u[ic2 + ig], vt4, m_Nc);
874 w2[ic_i + ix4] = mult_udagv_i(&u[ic2 + ig], vt4, m_Nc);
884 int itask,
double *v2,
const double *vcp2)
898 int isite =
m_arg[itask].isite;
899 int isite_cp =
m_arg[itask].isite_cp_y;
901 const double *w1 = &vcp2[Nvcd * isite_cp];
902 double *w2 = &v2[Nvcd * isite];
906 for (
int it = 0; it <
m_Mt; ++it) {
907 for (
int iz = 0; iz <
m_Mz; ++iz) {
908 for (
int ix = 0; ix <
m_Nx; ++ix) {
909 int is = ix + m_Nx * (iy +
m_Ny * (iz +
m_Nz * it));
910 int is2 = ix + m_Nx * (iz + m_Mz * it);
912 int ix1 = Nvcd * is2;
913 int ix2 = ix1 +
m_Nvc;
914 int ix3 = ix2 +
m_Nvc;
915 int ix4 = ix3 +
m_Nvc;
917 for (
int ic = 0; ic <
m_Nc; ++ic) {
919 int ic_i = 2 * ic + 1;
921 w2[ic_r + id1 + iv] += bc2 * w1[ic_r + ix1];
922 w2[ic_i + id1 + iv] += bc2 * w1[ic_i + ix1];
923 w2[ic_r + id2 + iv] += bc2 * w1[ic_r + ix2];
924 w2[ic_i + id2 + iv] += bc2 * w1[ic_i + ix2];
926 w2[ic_r + id3 + iv] += bc2 * w1[ic_r + ix3];
927 w2[ic_i + id3 + iv] += bc2 * w1[ic_i + ix3];
928 w2[ic_r + id4 + iv] += bc2 * w1[ic_r + ix4];
929 w2[ic_i + id4 + iv] += bc2 * w1[ic_i + ix4];
939 int itask,
double *v2,
const double *v1)
951 double wt1_r, wt1_i, wt2_r, wt2_i, wt3_r, wt3_i, wt4_r, wt4_i;
953 int isite =
m_arg[itask].isite;
955 const double *w1 = &v1[Nvcd * isite];
956 double *w2 = &v2[Nvcd * isite];
959 for (
int it = 0; it <
m_Mt; ++it) {
960 for (
int iz = 0; iz <
m_Mz; ++iz) {
961 for (
int iy = 1; iy <
m_Ny; ++iy) {
962 for (
int ix = 0; ix <
m_Nx; ++ix) {
963 int is = ix + m_Nx * (iy + m_Ny * (iz +
m_Nz * it));
965 int in = Nvcd * (is -
m_Nx);
968 for (
int ic = 0; ic <
m_Nc; ++ic) {
970 int ic_i = 2 * ic + 1;
972 vt1[ic_r] =
m_r_s * w1[ic_r + id1 + in] -
m_nu_s * w1[ic_r + id4 + in];
973 vt1[ic_i] =
m_r_s * w1[ic_i + id1 + in] -
m_nu_s * w1[ic_i + id4 + in];
974 vt2[ic_r] =
m_r_s * w1[ic_r + id2 + in] +
m_nu_s * w1[ic_r + id3 + in];
975 vt2[ic_i] =
m_r_s * w1[ic_i + id2 + in] +
m_nu_s * w1[ic_i + id3 + in];
977 vt3[ic_r] =
m_r_s * w1[ic_r + id3 + in] +
m_nu_s * w1[ic_r + id2 + in];
978 vt3[ic_i] =
m_r_s * w1[ic_i + id3 + in] +
m_nu_s * w1[ic_i + id2 + in];
979 vt4[ic_r] =
m_r_s * w1[ic_r + id4 + in] -
m_nu_s * w1[ic_r + id1 + in];
980 vt4[ic_i] =
m_r_s * w1[ic_i + id4 + in] -
m_nu_s * w1[ic_i + id1 + in];
983 for (
int ic = 0; ic <
m_Nc; ++ic) {
986 wt1_r = mult_udagv_r(&u[ic2 + ig], vt1, m_Nc);
987 wt1_i = mult_udagv_i(&u[ic2 + ig], vt1, m_Nc);
988 wt2_r = mult_udagv_r(&u[ic2 + ig], vt2, m_Nc);
989 wt2_i = mult_udagv_i(&u[ic2 + ig], vt2, m_Nc);
991 wt3_r = mult_udagv_r(&u[ic2 + ig], vt3, m_Nc);
992 wt3_i = mult_udagv_i(&u[ic2 + ig], vt3, m_Nc);
993 wt4_r = mult_udagv_r(&u[ic2 + ig], vt4, m_Nc);
994 wt4_i = mult_udagv_i(&u[ic2 + ig], vt4, m_Nc);
997 int ic_i = 2 * ic + 1;
999 w2[ic_r + id1 + iv] += wt1_r;
1000 w2[ic_i + id1 + iv] += wt1_i;
1001 w2[ic_r + id2 + iv] += wt2_r;
1002 w2[ic_i + id2 + iv] += wt2_i;
1004 w2[ic_r + id3 + iv] += wt3_r;
1005 w2[ic_i + id3 + iv] += wt3_i;
1006 w2[ic_r + id4 + iv] += wt4_r;
1007 w2[ic_i + id4 + iv] += wt4_i;
1018 int itask,
double *vcp1,
const double *v1)
1024 int id3 =
m_Nvc * 2;
1025 int id4 =
m_Nvc * 3;
1027 int isite =
m_arg[itask].isite;
1028 int isite_cp =
m_arg[itask].isite_cp_z;
1030 const double *w1 = &v1[Nvcd * isite];
1031 double *w2 = &vcp1[Nvcd * isite_cp];
1036 if (
m_arg[itask].kz0 == 1) {
1039 for (
int it = 0; it <
m_Mt; ++it) {
1040 for (
int ixy = 0; ixy < Nxy; ++ixy) {
1041 int is = ixy + Nxy * (iz +
m_Nz * it);
1042 int is2 = ixy + Nxy * it;
1045 int ix1 = Nvcd * is2;
1046 int ix2 = ix1 +
m_Nvc;
1047 int ix3 = ix2 +
m_Nvc;
1048 int ix4 = ix3 +
m_Nvc;
1050 for (
int ic = 0; ic <
m_Nc; ++ic) {
1052 int ic_i = 2 * ic + 1;
1054 w2[ic_r + ix1] = bc2 * (
m_r_s * w1[ic_r + id1 + in] -
m_nu_s * w1[ic_i + id3 + in]);
1055 w2[ic_i + ix1] = bc2 * (
m_r_s * w1[ic_i + id1 + in] +
m_nu_s * w1[ic_r + id3 + in]);
1056 w2[ic_r + ix2] = bc2 * (
m_r_s * w1[ic_r + id2 + in] +
m_nu_s * w1[ic_i + id4 + in]);
1057 w2[ic_i + ix2] = bc2 * (
m_r_s * w1[ic_i + id2 + in] -
m_nu_s * w1[ic_r + id4 + in]);
1059 w2[ic_r + ix3] = bc2 * (
m_r_s * w1[ic_r + id3 + in] +
m_nu_s * w1[ic_i + id1 + in]);
1060 w2[ic_i + ix3] = bc2 * (
m_r_s * w1[ic_i + id3 + in] -
m_nu_s * w1[ic_r + id1 + in]);
1061 w2[ic_r + ix4] = bc2 * (
m_r_s * w1[ic_r + id4 + in] -
m_nu_s * w1[ic_i + id2 + in]);
1062 w2[ic_i + ix4] = bc2 * (
m_r_s * w1[ic_i + id4 + in] +
m_nu_s * w1[ic_r + id2 + in]);
1072 int itask,
double *v2,
const double *vcp2)
1078 int id3 =
m_Nvc * 2;
1079 int id4 =
m_Nvc * 3;
1083 double wt1_r, wt1_i, wt2_r, wt2_i, wt3_r, wt3_i, wt4_r, wt4_i;
1085 int isite =
m_arg[itask].isite;
1086 int isite_cp =
m_arg[itask].isite_cp_z;
1088 const double *w1 = &vcp2[Nvcd * isite_cp];
1089 double *w2 = &v2[Nvcd * isite];
1092 if (
m_arg[itask].kz1 == 1) {
1095 for (
int it = 0; it <
m_Mt; ++it) {
1096 for (
int ixy = 0; ixy < Nxy; ++ixy) {
1097 int is = ixy + Nxy * (iz +
m_Nz * it);
1098 int is2 = ixy + Nxy * it;
1100 int ig =
m_Ndf * is;
1101 int ix1 = Nvcd * is2;
1102 int ix2 = ix1 +
m_Nvc;
1103 int ix3 = ix2 +
m_Nvc;
1104 int ix4 = ix3 +
m_Nvc;
1106 for (
int ic = 0; ic <
m_Nc; ++ic) {
1107 int ic2 = ic *
m_Nvc;
1109 wt1_r = mult_uv_r(&u[ic2 + ig], &w1[ix1], m_Nc);
1110 wt1_i = mult_uv_i(&u[ic2 + ig], &w1[ix1], m_Nc);
1111 wt2_r = mult_uv_r(&u[ic2 + ig], &w1[ix2], m_Nc);
1112 wt2_i = mult_uv_i(&u[ic2 + ig], &w1[ix2], m_Nc);
1114 wt3_r = mult_uv_r(&u[ic2 + ig], &w1[ix3], m_Nc);
1115 wt3_i = mult_uv_i(&u[ic2 + ig], &w1[ix3], m_Nc);
1116 wt4_r = mult_uv_r(&u[ic2 + ig], &w1[ix4], m_Nc);
1117 wt4_i = mult_uv_i(&u[ic2 + ig], &w1[ix4], m_Nc);
1120 int ic_i = 2 * ic + 1;
1122 w2[ic_r + id1 + iv] += wt1_r;
1123 w2[ic_i + id1 + iv] += wt1_i;
1124 w2[ic_r + id2 + iv] += wt2_r;
1125 w2[ic_i + id2 + iv] += wt2_i;
1127 w2[ic_r + id3 + iv] += wt3_r;
1128 w2[ic_i + id3 + iv] += wt3_i;
1129 w2[ic_r + id4 + iv] += wt4_r;
1130 w2[ic_i + id4 + iv] += wt4_i;
1140 int itask,
double *v2,
const double *v1)
1146 int id3 =
m_Nvc * 2;
1147 int id4 =
m_Nvc * 3;
1152 double wt1_r, wt1_i, wt2_r, wt2_i, wt3_r, wt3_i, wt4_r, wt4_i;
1154 int isite =
m_arg[itask].isite;
1156 const double *w1 = &v1[Nvcd * isite];
1157 double *w2 = &v2[Nvcd * isite];
1160 int kz1 =
m_arg[itask].kz1;
1163 for (
int it = 0; it <
m_Mt; ++it) {
1164 for (
int iz = 0; iz <
m_Mz - kz1; ++iz) {
1165 for (
int ixy = 0; ixy < Nxy; ++ixy) {
1166 int is = ixy + Nxy * (iz +
m_Nz * it);
1168 int in = Nvcd * (is + Nxy);
1169 int ig =
m_Ndf * is;
1171 for (
int ic = 0; ic <
m_Nc; ++ic) {
1173 int ic_i = 2 * ic + 1;
1175 vt1[ic_r] =
m_r_s * w1[ic_r + id1 + in] -
m_nu_s * w1[ic_i + id3 + in];
1176 vt1[ic_i] =
m_r_s * w1[ic_i + id1 + in] +
m_nu_s * w1[ic_r + id3 + in];
1177 vt2[ic_r] =
m_r_s * w1[ic_r + id2 + in] +
m_nu_s * w1[ic_i + id4 + in];
1178 vt2[ic_i] =
m_r_s * w1[ic_i + id2 + in] -
m_nu_s * w1[ic_r + id4 + in];
1180 vt3[ic_r] =
m_r_s * w1[ic_r + id3 + in] +
m_nu_s * w1[ic_i + id1 + in];
1181 vt3[ic_i] =
m_r_s * w1[ic_i + id3 + in] -
m_nu_s * w1[ic_r + id1 + in];
1182 vt4[ic_r] =
m_r_s * w1[ic_r + id4 + in] -
m_nu_s * w1[ic_i + id2 + in];
1183 vt4[ic_i] =
m_r_s * w1[ic_i + id4 + in] +
m_nu_s * w1[ic_r + id2 + in];
1186 for (
int ic = 0; ic <
m_Nc; ++ic) {
1187 int ic2 = ic *
m_Nvc;
1189 wt1_r = mult_uv_r(&u[ic2 + ig], vt1, m_Nc);
1190 wt1_i = mult_uv_i(&u[ic2 + ig], vt1, m_Nc);
1191 wt2_r = mult_uv_r(&u[ic2 + ig], vt2, m_Nc);
1192 wt2_i = mult_uv_i(&u[ic2 + ig], vt2, m_Nc);
1194 wt3_r = mult_uv_r(&u[ic2 + ig], vt3, m_Nc);
1195 wt3_i = mult_uv_i(&u[ic2 + ig], vt3, m_Nc);
1196 wt4_r = mult_uv_r(&u[ic2 + ig], vt4, m_Nc);
1197 wt4_i = mult_uv_i(&u[ic2 + ig], vt4, m_Nc);
1200 int ic_i = 2 * ic + 1;
1202 w2[ic_r + id1 + iv] += wt1_r;
1203 w2[ic_i + id1 + iv] += wt1_i;
1204 w2[ic_r + id2 + iv] += wt2_r;
1205 w2[ic_i + id2 + iv] += wt2_i;
1207 w2[ic_r + id3 + iv] += wt3_r;
1208 w2[ic_i + id3 + iv] += wt3_i;
1209 w2[ic_r + id4 + iv] += wt4_r;
1210 w2[ic_i + id4 + iv] += wt4_i;
1220 int itask,
double *vcp1,
const double *v1)
1226 int id3 =
m_Nvc * 2;
1227 int id4 =
m_Nvc * 3;
1231 int isite =
m_arg[itask].isite;
1232 int isite_cp =
m_arg[itask].isite_cp_z;
1234 const double *w1 = &v1[Nvcd * isite];
1235 double *w2 = &vcp1[Nvcd * isite_cp];
1240 if (
m_arg[itask].kz1 == 1) {
1243 for (
int it = 0; it <
m_Mt; ++it) {
1244 for (
int ixy = 0; ixy < Nxy; ++ixy) {
1245 int is = ixy + Nxy * (iz +
m_Nz * it);
1246 int is2 = ixy + Nxy * it;
1248 int ig =
m_Ndf * is;
1249 int ix1 = Nvcd * is2;
1250 int ix2 = ix1 +
m_Nvc;
1251 int ix3 = ix2 +
m_Nvc;
1252 int ix4 = ix3 +
m_Nvc;
1254 for (
int ic = 0; ic <
m_Nc; ++ic) {
1256 int ic_i = 2 * ic + 1;
1258 vt1[ic_r] =
m_r_s * w1[ic_r + id1 + in] +
m_nu_s * w1[ic_i + id3 + in];
1259 vt1[ic_i] =
m_r_s * w1[ic_i + id1 + in] -
m_nu_s * w1[ic_r + id3 + in];
1260 vt2[ic_r] =
m_r_s * w1[ic_r + id2 + in] -
m_nu_s * w1[ic_i + id4 + in];
1261 vt2[ic_i] =
m_r_s * w1[ic_i + id2 + in] +
m_nu_s * w1[ic_r + id4 + in];
1263 vt3[ic_r] =
m_r_s * w1[ic_r + id3 + in] -
m_nu_s * w1[ic_i + id1 + in];
1264 vt3[ic_i] =
m_r_s * w1[ic_i + id3 + in] +
m_nu_s * w1[ic_r + id1 + in];
1265 vt4[ic_r] =
m_r_s * w1[ic_r + id4 + in] +
m_nu_s * w1[ic_i + id2 + in];
1266 vt4[ic_i] =
m_r_s * w1[ic_i + id4 + in] -
m_nu_s * w1[ic_r + id2 + in];
1269 for (
int ic = 0; ic <
m_Nc; ++ic) {
1273 int ic_i = 2 * ic + 1;
1275 w2[ic_r + ix1] = mult_udagv_r(&u[ic2 + ig], vt1, m_Nc);
1276 w2[ic_i + ix1] = mult_udagv_i(&u[ic2 + ig], vt1, m_Nc);
1277 w2[ic_r + ix2] = mult_udagv_r(&u[ic2 + ig], vt2, m_Nc);
1278 w2[ic_i + ix2] = mult_udagv_i(&u[ic2 + ig], vt2, m_Nc);
1280 w2[ic_r + ix3] = mult_udagv_r(&u[ic2 + ig], vt3, m_Nc);
1281 w2[ic_i + ix3] = mult_udagv_i(&u[ic2 + ig], vt3, m_Nc);
1282 w2[ic_r + ix4] = mult_udagv_r(&u[ic2 + ig], vt4, m_Nc);
1283 w2[ic_i + ix4] = mult_udagv_i(&u[ic2 + ig], vt4, m_Nc);
1293 int itask,
double *v2,
const double *vcp2)
1299 int id3 =
m_Nvc * 2;
1300 int id4 =
m_Nvc * 3;
1307 int isite =
m_arg[itask].isite;
1308 int isite_cp =
m_arg[itask].isite_cp_z;
1310 const double *w1 = &vcp2[Nvcd * isite_cp];
1311 double *w2 = &v2[Nvcd * isite];
1313 if (
m_arg[itask].kz0 == 1) {
1317 for (
int it = 0; it <
m_Mt; ++it) {
1318 for (
int ixy = 0; ixy < Nxy; ++ixy) {
1319 int is = ixy + Nxy * (iz +
m_Nz * it);
1320 int is2 = ixy + Nxy * it;
1322 int ix1 = Nvcd * is2;
1323 int ix2 = ix1 +
m_Nvc;
1324 int ix3 = ix2 +
m_Nvc;
1325 int ix4 = ix3 +
m_Nvc;
1327 for (
int ic = 0; ic <
m_Nc; ++ic) {
1329 int ic_i = 2 * ic + 1;
1331 w2[ic_r + id1 + iv] += bc2 * w1[ic_r + ix1];
1332 w2[ic_i + id1 + iv] += bc2 * w1[ic_i + ix1];
1333 w2[ic_r + id2 + iv] += bc2 * w1[ic_r + ix2];
1334 w2[ic_i + id2 + iv] += bc2 * w1[ic_i + ix2];
1336 w2[ic_r + id3 + iv] += bc2 * w1[ic_r + ix3];
1337 w2[ic_i + id3 + iv] += bc2 * w1[ic_i + ix3];
1338 w2[ic_r + id4 + iv] += bc2 * w1[ic_r + ix4];
1339 w2[ic_i + id4 + iv] += bc2 * w1[ic_i + ix4];
1349 int itask,
double *v2,
const double *v1)
1355 int id3 =
m_Nvc * 2;
1356 int id4 =
m_Nvc * 3;
1361 double wt1_r, wt1_i, wt2_r, wt2_i, wt3_r, wt3_i, wt4_r, wt4_i;
1363 int isite =
m_arg[itask].isite;
1365 const double *w1 = &v1[Nvcd * isite];
1366 double *w2 = &v2[Nvcd * isite];
1369 int kz0 =
m_arg[itask].kz0;
1372 for (
int it = 0; it <
m_Mt; ++it) {
1373 for (
int iz = kz0; iz <
m_Mz; ++iz) {
1374 for (
int ixy = 0; ixy < Nxy; ++ixy) {
1375 int is = ixy + Nxy * (iz +
m_Nz * it);
1377 int in = Nvcd * (is - Nxy);
1378 int ig =
m_Ndf * (is - Nxy);
1380 for (
int ic = 0; ic <
m_Nc; ++ic) {
1382 int ic_i = 2 * ic + 1;
1384 vt1[ic_r] =
m_r_s * w1[ic_r + id1 + in] +
m_nu_s * w1[ic_i + id3 + in];
1385 vt1[ic_i] =
m_r_s * w1[ic_i + id1 + in] -
m_nu_s * w1[ic_r + id3 + in];
1386 vt2[ic_r] =
m_r_s * w1[ic_r + id2 + in] -
m_nu_s * w1[ic_i + id4 + in];
1387 vt2[ic_i] =
m_r_s * w1[ic_i + id2 + in] +
m_nu_s * w1[ic_r + id4 + in];
1389 vt3[ic_r] =
m_r_s * w1[ic_r + id3 + in] -
m_nu_s * w1[ic_i + id1 + in];
1390 vt3[ic_i] =
m_r_s * w1[ic_i + id3 + in] +
m_nu_s * w1[ic_r + id1 + in];
1391 vt4[ic_r] =
m_r_s * w1[ic_r + id4 + in] +
m_nu_s * w1[ic_i + id2 + in];
1392 vt4[ic_i] =
m_r_s * w1[ic_i + id4 + in] -
m_nu_s * w1[ic_r + id2 + in];
1395 for (
int ic = 0; ic <
m_Nc; ++ic) {
1398 wt1_r = mult_udagv_r(&u[ic2 + ig], vt1, m_Nc);
1399 wt1_i = mult_udagv_i(&u[ic2 + ig], vt1, m_Nc);
1400 wt2_r = mult_udagv_r(&u[ic2 + ig], vt2, m_Nc);
1401 wt2_i = mult_udagv_i(&u[ic2 + ig], vt2, m_Nc);
1403 wt3_r = mult_udagv_r(&u[ic2 + ig], vt3, m_Nc);
1404 wt3_i = mult_udagv_i(&u[ic2 + ig], vt3, m_Nc);
1405 wt4_r = mult_udagv_r(&u[ic2 + ig], vt4, m_Nc);
1406 wt4_i = mult_udagv_i(&u[ic2 + ig], vt4, m_Nc);
1409 int ic_i = 2 * ic + 1;
1411 w2[ic_r + id1 + iv] += wt1_r;
1412 w2[ic_i + id1 + iv] += wt1_i;
1413 w2[ic_r + id2 + iv] += wt2_r;
1414 w2[ic_i + id2 + iv] += wt2_i;
1416 w2[ic_r + id3 + iv] += wt3_r;
1417 w2[ic_i + id3 + iv] += wt3_i;
1418 w2[ic_r + id4 + iv] += wt4_r;
1419 w2[ic_i + id4 + iv] += wt4_i;
1429 int itask,
double *vcp1,
const double *v1)
1431 int Nvc2 = 2 *
m_Nvc;
1433 int Nvcd2 = Nvcd / 2;
1437 int id3 =
m_Nvc * 2;
1438 int id4 =
m_Nvc * 3;
1440 int isite =
m_arg[itask].isite;
1441 int isite_cp =
m_arg[itask].isite_cp_t;
1443 const double *w1 = &v1[Nvcd * isite];
1444 double *w2 = &vcp1[Nvcd2 * isite_cp];
1449 if (
m_arg[itask].kt0 == 1) {
1452 for (
int iz = 0; iz <
m_Mz; ++iz) {
1453 for (
int ixy = 0; ixy < Nxy; ++ixy) {
1454 int is = ixy + Nxy * (iz +
m_Nz * it);
1455 int is2 = ixy + Nxy * iz;
1458 int ix1 = Nvc2 * is2;
1459 int ix2 = ix1 +
m_Nvc;
1461 for (
int ic = 0; ic <
m_Nc; ++ic) {
1463 int ic_i = 2 * ic + 1;
1465 w2[ic_r + ix1] = 2.0 * bc2 * w1[ic_r + id3 + in];
1466 w2[ic_i + ix1] = 2.0 * bc2 * w1[ic_i + id3 + in];
1467 w2[ic_r + ix2] = 2.0 * bc2 * w1[ic_r + id4 + in];
1468 w2[ic_i + ix2] = 2.0 * bc2 * w1[ic_i + id4 + in];
1478 int itask,
double *v2,
const double *vcp2)
1480 int Nvc2 = 2 *
m_Nvc;
1482 int Nvcd2 = Nvcd / 2;
1486 int id3 =
m_Nvc * 2;
1487 int id4 =
m_Nvc * 3;
1491 double wt1_r, wt1_i, wt2_r, wt2_i;
1493 int isite =
m_arg[itask].isite;
1494 int isite_cp =
m_arg[itask].isite_cp_t;
1496 const double *w1 = &vcp2[Nvcd2 * isite_cp];
1497 double *w2 = &v2[Nvcd * isite];
1500 if (
m_arg[itask].kt1 == 1) {
1503 for (
int iz = 0; iz <
m_Mz; ++iz) {
1504 for (
int ixy = 0; ixy < Nxy; ++ixy) {
1505 int is = ixy + Nxy * (iz +
m_Nz * it);
1506 int is2 = ixy + Nxy * iz;
1508 int ig =
m_Ndf * is;
1509 int ix1 = Nvc2 * is2;
1510 int ix2 = ix1 +
m_Nvc;
1512 for (
int ic = 0; ic <
m_Nc; ++ic) {
1513 int ic2 = ic *
m_Nvc;
1515 wt1_r = mult_uv_r(&u[ic2 + ig], &w1[ix1], m_Nc);
1516 wt1_i = mult_uv_i(&u[ic2 + ig], &w1[ix1], m_Nc);
1517 wt2_r = mult_uv_r(&u[ic2 + ig], &w1[ix2], m_Nc);
1518 wt2_i = mult_uv_i(&u[ic2 + ig], &w1[ix2], m_Nc);
1521 int ic_i = 2 * ic + 1;
1523 w2[ic_r + id3 + iv] += wt1_r;
1524 w2[ic_i + id3 + iv] += wt1_i;
1525 w2[ic_r + id4 + iv] += wt2_r;
1526 w2[ic_i + id4 + iv] += wt2_i;
1536 int itask,
double *v2,
const double *v1)
1542 int id3 =
m_Nvc * 2;
1543 int id4 =
m_Nvc * 3;
1548 double wt1_r, wt1_i, wt2_r, wt2_i;
1550 int isite =
m_arg[itask].isite;
1552 const double *w1 = &v1[Nvcd * isite];
1553 double *w2 = &v2[Nvcd * isite];
1556 int kt1 =
m_arg[itask].kt1;
1558 int Nxyz = Nxy *
m_Nz;
1560 for (
int it = 0; it <
m_Mt - kt1; ++it) {
1561 for (
int iz = 0; iz <
m_Mz; ++iz) {
1562 for (
int ixy = 0; ixy < Nxy; ++ixy) {
1563 int is = ixy + Nxy * (iz + m_Nz * it);
1565 int in = Nvcd * (is + Nxyz);
1566 int ig =
m_Ndf * is;
1568 for (
int ic = 0; ic <
m_Nc; ++ic) {
1570 int ic_i = 2 * ic + 1;
1572 vt1[ic_r] = 2.0 * w1[ic_r + id3 + in];
1573 vt1[ic_i] = 2.0 * w1[ic_i + id3 + in];
1574 vt2[ic_r] = 2.0 * w1[ic_r + id4 + in];
1575 vt2[ic_i] = 2.0 * w1[ic_i + id4 + in];
1578 for (
int ic = 0; ic <
m_Nc; ++ic) {
1579 int ic2 = ic *
m_Nvc;
1581 wt1_r = mult_uv_r(&u[ic2 + ig], vt1, m_Nc);
1582 wt1_i = mult_uv_i(&u[ic2 + ig], vt1, m_Nc);
1583 wt2_r = mult_uv_r(&u[ic2 + ig], vt2, m_Nc);
1584 wt2_i = mult_uv_i(&u[ic2 + ig], vt2, m_Nc);
1587 int ic_i = 2 * ic + 1;
1589 w2[ic_r + id3 + iv] += wt1_r;
1590 w2[ic_i + id3 + iv] += wt1_i;
1591 w2[ic_r + id4 + iv] += wt2_r;
1592 w2[ic_i + id4 + iv] += wt2_i;
1602 int itask,
double *vcp1,
const double *v1)
1604 int Nvc2 = 2 *
m_Nvc;
1606 int Nvcd2 = Nvcd / 2;
1615 int isite =
m_arg[itask].isite;
1616 int isite_cp =
m_arg[itask].isite_cp_t;
1618 const double *w1 = &v1[Nvcd * isite];
1619 double *w2 = &vcp1[Nvcd2 * isite_cp];
1624 if (
m_arg[itask].kt1 == 1) {
1627 for (
int iz = 0; iz <
m_Mz; ++iz) {
1628 for (
int ixy = 0; ixy < Nxy; ++ixy) {
1629 int is = ixy + Nxy * (iz +
m_Nz * it);
1630 int is2 = ixy + Nxy * iz;
1632 int ig =
m_Ndf * is;
1633 int ix1 = Nvc2 * is2;
1634 int ix2 = ix1 +
m_Nvc;
1636 for (
int ic = 0; ic <
m_Nc; ++ic) {
1638 int ic_i = 2 * ic + 1;
1640 vt1[ic_r] = 2.0 * w1[ic_r + id1 + in];
1641 vt1[ic_i] = 2.0 * w1[ic_i + id1 + in];
1642 vt2[ic_r] = 2.0 * w1[ic_r + id2 + in];
1643 vt2[ic_i] = 2.0 * w1[ic_i + id2 + in];
1646 for (
int ic = 0; ic <
m_Nc; ++ic) {
1650 int ic_i = 2 * ic + 1;
1652 w2[ic_r + ix1] = mult_udagv_r(&u[ic2 + ig], vt1, m_Nc);
1653 w2[ic_i + ix1] = mult_udagv_i(&u[ic2 + ig], vt1, m_Nc);
1654 w2[ic_r + ix2] = mult_udagv_r(&u[ic2 + ig], vt2, m_Nc);
1655 w2[ic_i + ix2] = mult_udagv_i(&u[ic2 + ig], vt2, m_Nc);
1665 int itask,
double *v2,
const double *vcp2)
1667 int Nvc2 = 2 *
m_Nvc;
1669 int Nvcd2 = Nvcd / 2;
1681 int isite =
m_arg[itask].isite;
1682 int isite_cp =
m_arg[itask].isite_cp_t;
1684 const double *w1 = &vcp2[Nvcd2 * isite_cp];
1685 double *w2 = &v2[Nvcd * isite];
1687 if (
m_arg[itask].kt0 == 1) {
1690 for (
int iz = 0; iz <
m_Mz; ++iz) {
1691 for (
int ixy = 0; ixy < Nxy; ++ixy) {
1692 int is = ixy + Nxy * (iz +
m_Nz * it);
1693 int is2 = ixy + Nxy * iz;
1695 int ix1 = Nvc2 * is2;
1696 int ix2 = ix1 +
m_Nvc;
1698 for (
int ic = 0; ic <
m_Nc; ++ic) {
1700 int ic_i = 2 * ic + 1;
1702 w2[ic_r + id1 + iv] += bc2 * w1[ic_r + ix1];
1703 w2[ic_i + id1 + iv] += bc2 * w1[ic_i + ix1];
1704 w2[ic_r + id2 + iv] += bc2 * w1[ic_r + ix2];
1705 w2[ic_i + id2 + iv] += bc2 * w1[ic_i + ix2];
1715 int itask,
double *v2,
const double *v1)
1727 double wt1_r, wt1_i, wt2_r, wt2_i;
1729 int isite =
m_arg[itask].isite;
1731 const double *w1 = &v1[Nvcd * isite];
1732 double *w2 = &v2[Nvcd * isite];
1735 int kt0 =
m_arg[itask].kt0;
1737 int Nxyz = Nxy *
m_Nz;
1739 for (
int it = kt0; it <
m_Mt; ++it) {
1740 for (
int iz = 0; iz <
m_Mz; ++iz) {
1741 for (
int ixy = 0; ixy < Nxy; ++ixy) {
1742 int is = ixy + Nxy * (iz + m_Nz * it);
1744 int in = Nvcd * (is - Nxyz);
1745 int ig =
m_Ndf * (is - Nxyz);
1747 for (
int ic = 0; ic <
m_Nc; ++ic) {
1749 int ic_i = 2 * ic + 1;
1751 vt1[ic_r] = 2.0 * w1[ic_r + id1 + in];
1752 vt1[ic_i] = 2.0 * w1[ic_i + id1 + in];
1753 vt2[ic_r] = 2.0 * w1[ic_r + id2 + in];
1754 vt2[ic_i] = 2.0 * w1[ic_i + id2 + in];
1757 for (
int ic = 0; ic <
m_Nc; ++ic) {
1760 wt1_r = mult_udagv_r(&u[ic2 + ig], vt1, m_Nc);
1761 wt1_i = mult_udagv_i(&u[ic2 + ig], vt1, m_Nc);
1762 wt2_r = mult_udagv_r(&u[ic2 + ig], vt2, m_Nc);
1763 wt2_i = mult_udagv_i(&u[ic2 + ig], vt2, m_Nc);
1766 int ic_i = 2 * ic + 1;
1768 w2[ic_r + id1 + iv] += wt1_r;
1769 w2[ic_i + id1 + iv] += wt1_i;
1770 w2[ic_r + id2 + iv] += wt2_r;
1771 w2[ic_i + id2 + iv] += wt2_i;
1781 int itask,
double *vcp1,
const double *v1)
1783 int Nvc2 = 2 *
m_Nvc;
1785 int Nvcd2 = Nvcd / 2;
1789 int id3 =
m_Nvc * 2;
1790 int id4 =
m_Nvc * 3;
1792 int isite =
m_arg[itask].isite;
1793 int isite_cp =
m_arg[itask].isite_cp_t;
1795 const double *w1 = &v1[Nvcd * isite];
1796 double *w2 = &vcp1[Nvcd2 * isite_cp];
1801 if (
m_arg[itask].kt0 == 1) {
1804 for (
int iz = 0; iz <
m_Mz; ++iz) {
1805 for (
int ixy = 0; ixy < Nxy; ++ixy) {
1806 int is = ixy + Nxy * (iz +
m_Nz * it);
1807 int is2 = ixy + Nxy * iz;
1810 int ix1 = Nvc2 * is2;
1811 int ix2 = ix1 +
m_Nvc;
1813 for (
int ic = 0; ic <
m_Nc; ++ic) {
1815 int ic_i = 2 * ic + 1;
1817 w2[ic_r + ix1] = bc2 * (w1[ic_r + id1 + in] + w1[ic_r + id3 + in]);
1818 w2[ic_i + ix1] = bc2 * (w1[ic_i + id1 + in] + w1[ic_i + id3 + in]);
1819 w2[ic_r + ix2] = bc2 * (w1[ic_r + id2 + in] + w1[ic_r + id4 + in]);
1820 w2[ic_i + ix2] = bc2 * (w1[ic_i + id2 + in] + w1[ic_i + id4 + in]);
1830 int itask,
double *v2,
const double *vcp2)
1832 int Nvc2 = 2 *
m_Nvc;
1834 int Nvcd2 = Nvcd / 2;
1838 int id3 =
m_Nvc * 2;
1839 int id4 =
m_Nvc * 3;
1843 double wt1_r, wt1_i, wt2_r, wt2_i;
1845 int isite =
m_arg[itask].isite;
1846 int isite_cp =
m_arg[itask].isite_cp_t;
1849 const double *w1 = &vcp2[Nvcd2 * isite_cp];
1850 double *w2 = &v2[Nvcd * isite];
1853 if (
m_arg[itask].kt1 == 1) {
1856 for (
int iz = 0; iz <
m_Mz; ++iz) {
1857 for (
int ixy = 0; ixy < Nxy; ++ixy) {
1858 int is = ixy + Nxy * (iz +
m_Nz * it);
1859 int is2 = ixy + Nxy * iz;
1861 int ig =
m_Ndf * is;
1862 int ix1 = Nvc2 * is2;
1863 int ix2 = ix1 +
m_Nvc;
1865 for (
int ic = 0; ic <
m_Nc; ++ic) {
1866 int ic2 = ic *
m_Nvc;
1868 wt1_r = mult_uv_r(&u[ic2 + ig], &w1[ix1], m_Nc);
1869 wt1_i = mult_uv_i(&u[ic2 + ig], &w1[ix1], m_Nc);
1870 wt2_r = mult_uv_r(&u[ic2 + ig], &w1[ix2], m_Nc);
1871 wt2_i = mult_uv_i(&u[ic2 + ig], &w1[ix2], m_Nc);
1874 int ic_i = 2 * ic + 1;
1876 w2[ic_r + id1 + iv] += wt1_r;
1877 w2[ic_i + id1 + iv] += wt1_i;
1878 w2[ic_r + id2 + iv] += wt2_r;
1879 w2[ic_i + id2 + iv] += wt2_i;
1881 w2[ic_r + id3 + iv] += wt1_r;
1882 w2[ic_i + id3 + iv] += wt1_i;
1883 w2[ic_r + id4 + iv] += wt2_r;
1884 w2[ic_i + id4 + iv] += wt2_i;
1894 int itask,
double *v2,
const double *v1)
1900 int id3 =
m_Nvc * 2;
1901 int id4 =
m_Nvc * 3;
1906 double wt1_r, wt1_i, wt2_r, wt2_i;
1908 int isite =
m_arg[itask].isite;
1910 const double *w1 = &v1[Nvcd * isite];
1911 double *w2 = &v2[Nvcd * isite];
1914 int kt1 =
m_arg[itask].kt1;
1916 int Nxyz = Nxy *
m_Nz;
1918 for (
int it = 0; it <
m_Mt - kt1; ++it) {
1919 for (
int iz = 0; iz <
m_Mz; ++iz) {
1920 for (
int ixy = 0; ixy < Nxy; ++ixy) {
1921 int is = ixy + Nxy * (iz + m_Nz * it);
1923 int in = Nvcd * (is + Nxyz);
1924 int ig =
m_Ndf * is;
1926 for (
int ic = 0; ic <
m_Nc; ++ic) {
1928 int ic_i = 2 * ic + 1;
1930 vt1[ic_r] = w1[ic_r + id1 + in] + w1[ic_r + id3 + in];
1931 vt1[ic_i] = w1[ic_i + id1 + in] + w1[ic_i + id3 + in];
1932 vt2[ic_r] = w1[ic_r + id2 + in] + w1[ic_r + id4 + in];
1933 vt2[ic_i] = w1[ic_i + id2 + in] + w1[ic_i + id4 + in];
1936 for (
int ic = 0; ic <
m_Nc; ++ic) {
1937 int ic2 = ic *
m_Nvc;
1939 wt1_r = mult_uv_r(&u[ic2 + ig], vt1, m_Nc);
1940 wt1_i = mult_uv_i(&u[ic2 + ig], vt1, m_Nc);
1941 wt2_r = mult_uv_r(&u[ic2 + ig], vt2, m_Nc);
1942 wt2_i = mult_uv_i(&u[ic2 + ig], vt2, m_Nc);
1945 int ic_i = 2 * ic + 1;
1947 w2[ic_r + id1 + iv] += wt1_r;
1948 w2[ic_i + id1 + iv] += wt1_i;
1949 w2[ic_r + id2 + iv] += wt2_r;
1950 w2[ic_i + id2 + iv] += wt2_i;
1952 w2[ic_r + id3 + iv] += wt1_r;
1953 w2[ic_i + id3 + iv] += wt1_i;
1954 w2[ic_r + id4 + iv] += wt2_r;
1955 w2[ic_i + id4 + iv] += wt2_i;
1965 int itask,
double *vcp1,
const double *v1)
1967 int Nvc2 = 2 *
m_Nvc;
1969 int Nvcd2 = Nvcd / 2;
1973 int id3 =
m_Nvc * 2;
1974 int id4 =
m_Nvc * 3;
1978 int isite =
m_arg[itask].isite;
1979 int isite_cp =
m_arg[itask].isite_cp_t;
1982 const double *w1 = &v1[Nvcd * isite];
1983 double *w2 = &vcp1[Nvcd2 * isite_cp];
1988 if (
m_arg[itask].kt1 == 1) {
1991 for (
int iz = 0; iz <
m_Mz; ++iz) {
1992 for (
int ixy = 0; ixy < Nxy; ++ixy) {
1993 int is = ixy + Nxy * (iz +
m_Nz * it);
1994 int is2 = ixy + Nxy * iz;
1996 int ig =
m_Ndf * is;
1997 int ix1 = Nvc2 * is2;
1998 int ix2 = ix1 +
m_Nvc;
2000 for (
int ic = 0; ic <
m_Nc; ++ic) {
2002 int ic_i = 2 * ic + 1;
2004 vt1[ic_r] = w1[ic_r + id1 + in] - w1[ic_r + id3 + in];
2005 vt1[ic_i] = w1[ic_i + id1 + in] - w1[ic_i + id3 + in];
2006 vt2[ic_r] = w1[ic_r + id2 + in] - w1[ic_r + id4 + in];
2007 vt2[ic_i] = w1[ic_i + id2 + in] - w1[ic_i + id4 + in];
2010 for (
int ic = 0; ic <
m_Nc; ++ic) {
2014 int ic_i = 2 * ic + 1;
2016 w2[ic_r + ix1] = mult_udagv_r(&u[ic2 + ig], vt1, m_Nc);
2017 w2[ic_i + ix1] = mult_udagv_i(&u[ic2 + ig], vt1, m_Nc);
2018 w2[ic_r + ix2] = mult_udagv_r(&u[ic2 + ig], vt2, m_Nc);
2019 w2[ic_i + ix2] = mult_udagv_i(&u[ic2 + ig], vt2, m_Nc);
2029 int itask,
double *v2,
const double *vcp2)
2031 int Nvc2 = 2 *
m_Nvc;
2033 int Nvcd2 = Nvcd / 2;
2037 int id3 =
m_Nvc * 2;
2038 int id4 =
m_Nvc * 3;
2045 int isite =
m_arg[itask].isite;
2046 int isite_cp =
m_arg[itask].isite_cp_t;
2048 const double *w1 = &vcp2[Nvcd2 * isite_cp];
2049 double *w2 = &v2[Nvcd * isite];
2051 if (
m_arg[itask].kt0 == 1) {
2054 for (
int iz = 0; iz <
m_Mz; ++iz) {
2055 for (
int ixy = 0; ixy < Nxy; ++ixy) {
2056 int is = ixy + Nxy * (iz +
m_Nz * it);
2057 int is2 = ixy + Nxy * iz;
2059 int ix1 = Nvc2 * is2;
2060 int ix2 = ix1 +
m_Nvc;
2062 for (
int ic = 0; ic <
m_Nc; ++ic) {
2064 int ic_i = 2 * ic + 1;
2066 w2[ic_r + id1 + iv] += bc2 * w1[ic_r + ix1];
2067 w2[ic_i + id1 + iv] += bc2 * w1[ic_i + ix1];
2068 w2[ic_r + id2 + iv] += bc2 * w1[ic_r + ix2];
2069 w2[ic_i + id2 + iv] += bc2 * w1[ic_i + ix2];
2071 w2[ic_r + id3 + iv] -= bc2 * w1[ic_r + ix1];
2072 w2[ic_i + id3 + iv] -= bc2 * w1[ic_i + ix1];
2073 w2[ic_r + id4 + iv] -= bc2 * w1[ic_r + ix2];
2074 w2[ic_i + id4 + iv] -= bc2 * w1[ic_i + ix2];
2084 int itask,
double *v2,
const double *v1)
2090 int id3 =
m_Nvc * 2;
2091 int id4 =
m_Nvc * 3;
2096 double wt1_r, wt1_i, wt2_r, wt2_i;
2098 int isite =
m_arg[itask].isite;
2100 const double *w1 = &v1[Nvcd * isite];
2101 double *w2 = &v2[Nvcd * isite];
2104 int kt0 =
m_arg[itask].kt0;
2106 int Nxyz = Nxy *
m_Nz;
2108 for (
int it = kt0; it <
m_Mt; ++it) {
2109 for (
int iz = 0; iz <
m_Mz; ++iz) {
2110 for (
int ixy = 0; ixy < Nxy; ++ixy) {
2111 int is = ixy + Nxy * (iz + m_Nz * it);
2113 int in = Nvcd * (is - Nxyz);
2114 int ig =
m_Ndf * (is - Nxyz);
2116 for (
int ic = 0; ic <
m_Nc; ++ic) {
2118 int ic_i = 2 * ic + 1;
2120 vt1[ic_r] = w1[ic_r + id1 + in] - w1[ic_r + id3 + in];
2121 vt1[ic_i] = w1[ic_i + id1 + in] - w1[ic_i + id3 + in];
2122 vt2[ic_r] = w1[ic_r + id2 + in] - w1[ic_r + id4 + in];
2123 vt2[ic_i] = w1[ic_i + id2 + in] - w1[ic_i + id4 + in];
2126 for (
int ic = 0; ic <
m_Nc; ++ic) {
2129 wt1_r = mult_udagv_r(&u[ic2 + ig], vt1, m_Nc);
2130 wt1_i = mult_udagv_i(&u[ic2 + ig], vt1, m_Nc);
2131 wt2_r = mult_udagv_r(&u[ic2 + ig], vt2, m_Nc);
2132 wt2_i = mult_udagv_i(&u[ic2 + ig], vt2, m_Nc);
2135 int ic_i = 2 * ic + 1;
2137 w2[ic_r + id1 + iv] += wt1_r;
2138 w2[ic_i + id1 + iv] += wt1_i;
2139 w2[ic_r + id2 + iv] += wt2_r;
2140 w2[ic_i + id2 + iv] += wt2_i;
2142 w2[ic_r + id3 + iv] -= wt1_r;
2143 w2[ic_i + id3 + iv] -= wt1_i;
2144 w2[ic_r + id4 + iv] -= wt2_r;
2145 w2[ic_i + id4 + iv] -= wt2_i;
2155 int itask,
double *v2,
const double *v1)
2162 int id3 =
m_Nvc * 2;
2163 int id4 =
m_Nvc * 3;
2165 int isite =
m_arg[itask].isite;
2167 const double *w1 = &v1[Nvcd * isite];
2168 double *w2 = &v2[Nvcd * isite];
2170 for (
int it = 0; it <
m_Mt; ++it) {
2171 for (
int iz = 0; iz <
m_Mz; ++iz) {
2172 for (
int ixy = 0; ixy < Nxy; ++ixy) {
2173 int iv = Nvcd * (ixy + Nxy * (iz +
m_Nz * it));
2174 for (
int ivc = 0; ivc <
m_Nvc; ++ivc) {
2175 w2[ivc + id1 + iv] = w1[ivc + id3 + iv];
2176 w2[ivc + id2 + iv] = w1[ivc + id4 + iv];
2177 w2[ivc + id3 + iv] = w1[ivc + id1 + iv];
2178 w2[ivc + id4 + iv] = w1[ivc + id2 + iv];
2188 int itask,
double *v2,
const double *v1)
2195 int id3 =
m_Nvc * 2;
2196 int id4 =
m_Nvc * 3;
2198 int isite =
m_arg[itask].isite;
2200 const double *w1 = &v1[Nvcd * isite];
2201 double *w2 = &v2[Nvcd * isite];
2203 for (
int it = 0; it <
m_Mt; ++it) {
2204 for (
int iz = 0; iz <
m_Mz; ++iz) {
2205 for (
int ixy = 0; ixy < Nxy; ++ixy) {
2206 int iv = Nvcd * (ixy + Nxy * (iz +
m_Nz * it));
2207 for (
int ivc = 0; ivc <
m_Nvc; ++ivc) {
2208 w2[ivc + id1 + iv] = w1[ivc + id1 + iv];
2209 w2[ivc + id2 + iv] = w1[ivc + id2 + iv];
2210 w2[ivc + id3 + iv] = -w1[ivc + id3 + iv];
2211 w2[ivc + id4 + iv] = -w1[ivc + id4 + iv];
void mult_t_plus1_chiral_thread(int, double *, const double *)
const double * ptr(const int jin, const int site, const int jex) const
void mult_y_plus2_thread(int, double *, const double *)
void mult_t_plus2_dirac_thread(int, double *, const double *)
void general(const char *format,...)
Bridge::VerboseLevel m_vl
void mult_x_minus_bulk_thread(int, double *, const double *)
void mult_x_plus2_thread(int, double *, const double *)
void mult_t_plus_bulk_dirac_thread(int, double *, const double *)
void mult_t_minus2_chiral_thread(int, double *, const double *)
void gm5_dirac_thread(int, double *, const double *)
void daxpy_thread(int, double *, double, const double *)
void gm5_chiral_thread(int, double *, const double *)
void mult_x_minus1_thread(int, double *, const double *)
void mult_y_plus_bulk_thread(int, double *, const double *)
void mult_x_minus2_thread(int, double *, const double *)
void mult_z_plus_bulk_thread(int, double *, const double *)
void mult_y_minus_bulk_thread(int, double *, const double *)
void mult_z_minus1_thread(int, double *, const double *)
void mult_t_minus1_dirac_thread(int, double *, const double *)
void mult_t_minus_bulk_chiral_thread(int, double *, const double *)
void mult_x_plus1_thread(int, double *, const double *)
const Field_G * m_U
gauge configuration.
void daypx_thread(int, double *, double, const double *)
std::vector< double > m_boundary2
b.c. for each node.
void mult_t_plus2_chiral_thread(int, double *, const double *)
void mult_x_plus_bulk_thread(int, double *, const double *)
void clear_thread(int, double *)
void mult_t_minus2_dirac_thread(int, double *, const double *)
static int get_num_threads_available()
returns number of threads (works outside of parallel region).
void mult_y_minus1_thread(int, double *, const double *)
void crucial(const char *format,...)
void mult_y_minus2_thread(int, double *, const double *)
void mult_z_plus2_thread(int, double *, const double *)
void scal_thread(int, double *, double)
static const std::string class_name
void mult_t_plus_bulk_chiral_thread(int, double *, const double *)
void mult_z_minus_bulk_thread(int, double *, const double *)
void mult_z_plus1_thread(int, double *, const double *)
std::vector< mult_arg > m_arg
void mult_y_plus1_thread(int, double *, const double *)
void mult_t_minus1_chiral_thread(int, double *, const double *)
void mult_t_plus1_dirac_thread(int, double *, const double *)
void mult_t_minus_bulk_dirac_thread(int, double *, const double *)
void mult_z_minus2_thread(int, double *, const double *)