17 #if defined USE_GROUP_SU3
19 #elif defined USE_GROUP_SU2
21 #elif defined USE_GROUP_SU_N
45 vout.
crucial(
m_vl,
"Error at %s: Nz = %d and Nt = %d do not match Nthread = %d\n",
55 vout.
crucial(
m_vl,
"Error at %s: Mz = %d and Ntask_z = %d do not match Nz = %d\n",
61 vout.
crucial(
m_vl,
"Error at %s: Mt = %d and Ntask_t = %d do not match Nt = %d\n",
91 for (
int ithread_t = 0; ithread_t <
m_Ntask_t; ++ithread_t) {
92 for (
int ithread_z = 0; ithread_z <
m_Ntask_z; ++ithread_z) {
93 int itask = ithread_z + m_Ntask_z * ithread_t;
100 m_arg[itask].kz1 = 0;
101 if (ithread_t == 0)
m_arg[itask].kt0 = 1;
102 if (ithread_z == 0)
m_arg[itask].kz0 = 1;
103 if (ithread_t == m_Ntask_t - 1)
m_arg[itask].kt1 = 1;
104 if (ithread_z == m_Ntask_z - 1)
m_arg[itask].kz1 = 1;
108 m_arg[itask].isite_cp_z = ithread_t *
m_Mt * Nxy;
109 m_arg[itask].isite_cp_t = ithread_z *
m_Mz * Nxy;
117 double *v2,
const double fac,
const double *v1)
120 const int Nvxy = Nvcd *
m_Nx *
m_Ny;
122 const int isite =
m_arg[itask].isite;
124 const double *w1 = &v1[Nvcd * isite];
125 double *w2 = &v2[Nvcd * isite];
127 for (
int it = 0; it <
m_Mt; ++it) {
128 for (
int iz = 0; iz <
m_Mz; ++iz) {
129 for (
int ivxy = 0; ivxy < Nvxy; ++ivxy) {
130 int iv = ivxy + Nvxy * (iz +
m_Nz * it);
131 w2[iv] += fac * w1[iv];
140 double *v2,
const double fac,
const double *v1)
143 const int Nvxy = Nvcd *
m_Nx *
m_Ny;
145 const int isite =
m_arg[itask].isite;
146 const double *w1 = &v1[Nvcd * isite];
147 double *w2 = &v2[Nvcd * isite];
149 for (
int it = 0; it <
m_Mt; ++it) {
150 for (
int iz = 0; iz <
m_Mz; ++iz) {
151 for (
int ivxy = 0; ivxy < Nvxy; ++ivxy) {
152 int iv = ivxy + Nvxy * (iz +
m_Nz * it);
153 w2[iv] = fac * w2[iv] + w1[iv];
162 double *v,
const double fac)
165 const int Nvxy = Nvcd *
m_Nx *
m_Ny;
167 const int isite =
m_arg[itask].isite;
168 double *w = &v[Nvcd * isite];
170 for (
int it = 0; it <
m_Mt; ++it) {
171 for (
int iz = 0; iz <
m_Mz; ++iz) {
172 for (
int ivxy = 0; ivxy < Nvxy; ++ivxy) {
173 int iv = ivxy + Nvxy * (iz +
m_Nz * it);
186 const int Nvxy = Nvcd *
m_Nx *
m_Ny;
188 const int isite =
m_arg[itask].isite;
189 double *w2 = &v2[Nvcd * isite];
191 for (
int it = 0; it <
m_Mt; ++it) {
192 for (
int iz = 0; iz <
m_Mz; ++iz) {
193 for (
int ivxy = 0; ivxy < Nvxy; ++ivxy) {
194 int iv = ivxy + Nvxy * (iz +
m_Nz * it);
204 double *vcp1,
const double *v1)
209 const int id2 =
m_Nvc;
210 const int id3 =
m_Nvc * 2;
211 const int id4 =
m_Nvc * 3;
217 const int isite =
m_arg[itask].isite;
218 const int isite_cp =
m_arg[itask].isite_cp_x;
220 const double *w1 = &v1[Nvcd * isite];
221 double *w2 = &vcp1[Nvcd * isite_cp];
224 for (
int it = 0; it <
m_Mt; ++it) {
225 for (
int iz = 0; iz <
m_Mz; ++iz) {
226 for (
int iy = 0; iy <
m_Ny; ++iy) {
227 int is = ix +
m_Nx * (iy + m_Ny * (iz +
m_Nz * it));
228 int is2 = iy + m_Ny * (iz + m_Mz * it);
230 int ix1 = Nvcd * is2;
231 int ix2 = ix1 +
m_Nvc;
232 int ix3 = ix2 +
m_Nvc;
233 int ix4 = ix3 +
m_Nvc;
235 for (
int ic = 0; ic <
m_Nc; ++ic) {
237 int ic_i = 2 * ic + 1;
239 w2[ic_r + ix1] = bc2 * (
m_r_s * w1[ic_r + id1 + in] -
m_nu_s * w1[ic_i + id4 + in]);
240 w2[ic_i + ix1] = bc2 * (
m_r_s * w1[ic_i + id1 + in] +
m_nu_s * w1[ic_r + id4 + in]);
241 w2[ic_r + ix2] = bc2 * (
m_r_s * w1[ic_r + id2 + in] -
m_nu_s * w1[ic_i + id3 + in]);
242 w2[ic_i + ix2] = bc2 * (
m_r_s * w1[ic_i + id2 + in] +
m_nu_s * w1[ic_r + id3 + in]);
244 w2[ic_r + ix3] = bc2 * (
m_r_s * w1[ic_r + id3 + in] +
m_nu_s * w1[ic_i + id2 + in]);
245 w2[ic_i + ix3] = bc2 * (
m_r_s * w1[ic_i + id3 + in] -
m_nu_s * w1[ic_r + id2 + in]);
246 w2[ic_r + ix4] = bc2 * (
m_r_s * w1[ic_r + id4 + in] +
m_nu_s * w1[ic_i + id1 + in]);
247 w2[ic_i + ix4] = bc2 * (
m_r_s * w1[ic_i + id4 + in] -
m_nu_s * w1[ic_r + id1 + in]);
257 double *v2,
const double *vcp2)
262 const int id2 =
m_Nvc;
263 const int id3 =
m_Nvc * 2;
264 const int id4 =
m_Nvc * 3;
267 const int ix =
m_Nx - 1;
269 const int isite =
m_arg[itask].isite;
270 const int isite_cp =
m_arg[itask].isite_cp_x;
272 const double *w1 = &vcp2[Nvcd * isite_cp];
273 double *w2 = &v2[Nvcd * isite];
277 for (
int it = 0; it <
m_Mt; ++it) {
278 for (
int iz = 0; iz <
m_Mz; ++iz) {
279 for (
int iy = 0; iy <
m_Ny; ++iy) {
280 int is = ix +
m_Nx * (iy + m_Ny * (iz +
m_Nz * it));
281 int is2 = iy + m_Ny * (iz + m_Mz * it);
284 int ix1 = Nvcd * is2;
285 int ix2 = ix1 +
m_Nvc;
286 int ix3 = ix2 +
m_Nvc;
287 int ix4 = ix3 +
m_Nvc;
289 for (
int ic = 0; ic <
m_Nc; ++ic) {
290 int ic2 = ic *
m_Nvc;
292 double wt1_r = mult_uv_r(&u[ic2 + ig], &w1[ix1], m_Nc);
293 double wt1_i = mult_uv_i(&u[ic2 + ig], &w1[ix1], m_Nc);
294 double wt2_r = mult_uv_r(&u[ic2 + ig], &w1[ix2], m_Nc);
295 double wt2_i = mult_uv_i(&u[ic2 + ig], &w1[ix2], m_Nc);
297 double wt3_r = mult_uv_r(&u[ic2 + ig], &w1[ix3], m_Nc);
298 double wt3_i = mult_uv_i(&u[ic2 + ig], &w1[ix3], m_Nc);
299 double wt4_r = mult_uv_r(&u[ic2 + ig], &w1[ix4], m_Nc);
300 double wt4_i = mult_uv_i(&u[ic2 + ig], &w1[ix4], m_Nc);
303 int ic_i = 2 * ic + 1;
305 w2[ic_r + id1 + iv] += wt1_r;
306 w2[ic_i + id1 + iv] += wt1_i;
307 w2[ic_r + id2 + iv] += wt2_r;
308 w2[ic_i + id2 + iv] += wt2_i;
310 w2[ic_r + id3 + iv] += wt3_r;
311 w2[ic_i + id3 + iv] += wt3_i;
312 w2[ic_r + id4 + iv] += wt4_r;
313 w2[ic_i + id4 + iv] += wt4_i;
323 double *v2,
const double *v1)
328 const int id2 =
m_Nvc;
329 const int id3 =
m_Nvc * 2;
330 const int id4 =
m_Nvc * 3;
334 const int isite =
m_arg[itask].isite;
336 const double *w1 = &v1[Nvcd * isite];
337 double *w2 = &v2[Nvcd * isite];
341 for (
int it = 0; it <
m_Mt; ++it) {
342 for (
int iz = 0; iz <
m_Mz; ++iz) {
343 for (
int iy = 0; iy <
m_Ny; ++iy) {
344 for (
int ix = 0; ix <
m_Nx - 1; ++ix) {
345 int is = ix + m_Nx * (iy + m_Ny * (iz +
m_Nz * it));
347 int in = Nvcd * (is + 1);
352 for (
int ic = 0; ic <
m_Nc; ++ic) {
354 int ic_i = 2 * ic + 1;
356 vt1[ic_r] =
m_r_s * w1[ic_r + id1 + in] -
m_nu_s * w1[ic_i + id4 + in];
357 vt1[ic_i] =
m_r_s * w1[ic_i + id1 + in] +
m_nu_s * w1[ic_r + id4 + in];
358 vt2[ic_r] =
m_r_s * w1[ic_r + id2 + in] -
m_nu_s * w1[ic_i + id3 + in];
359 vt2[ic_i] =
m_r_s * w1[ic_i + id2 + in] +
m_nu_s * w1[ic_r + id3 + in];
361 vt3[ic_r] =
m_r_s * w1[ic_r + id3 + in] +
m_nu_s * w1[ic_i + id2 + in];
362 vt3[ic_i] =
m_r_s * w1[ic_i + id3 + in] -
m_nu_s * w1[ic_r + id2 + in];
363 vt4[ic_r] =
m_r_s * w1[ic_r + id4 + in] +
m_nu_s * w1[ic_i + id1 + in];
364 vt4[ic_i] =
m_r_s * w1[ic_i + id4 + in] -
m_nu_s * w1[ic_r + id1 + in];
367 for (
int ic = 0; ic <
m_Nc; ++ic) {
368 int ic2 = ic *
m_Nvc;
370 double wt1_r = mult_uv_r(&u[ic2 + ig], vt1, m_Nc);
371 double wt1_i = mult_uv_i(&u[ic2 + ig], vt1, m_Nc);
372 double wt2_r = mult_uv_r(&u[ic2 + ig], vt2, m_Nc);
373 double wt2_i = mult_uv_i(&u[ic2 + ig], vt2, m_Nc);
375 double wt3_r = mult_uv_r(&u[ic2 + ig], vt3, m_Nc);
376 double wt3_i = mult_uv_i(&u[ic2 + ig], vt3, m_Nc);
377 double wt4_r = mult_uv_r(&u[ic2 + ig], vt4, m_Nc);
378 double wt4_i = mult_uv_i(&u[ic2 + ig], vt4, m_Nc);
381 int ic_i = 2 * ic + 1;
383 w2[ic_r + id1 + iv] += wt1_r;
384 w2[ic_i + id1 + iv] += wt1_i;
385 w2[ic_r + id2 + iv] += wt2_r;
386 w2[ic_i + id2 + iv] += wt2_i;
388 w2[ic_r + id3 + iv] += wt3_r;
389 w2[ic_i + id3 + iv] += wt3_i;
390 w2[ic_r + id4 + iv] += wt4_r;
391 w2[ic_i + id4 + iv] += wt4_i;
402 double *vcp1,
const double *v1)
407 const int id2 =
m_Nvc;
408 const int id3 =
m_Nvc * 2;
409 const int id4 =
m_Nvc * 3;
412 const int ix =
m_Nx - 1;
414 const int isite =
m_arg[itask].isite;
415 const int isite_cp =
m_arg[itask].isite_cp_x;
417 const double *w1 = &v1[Nvcd * isite];
418 double *w2 = &vcp1[Nvcd * isite_cp];
422 for (
int it = 0; it <
m_Mt; ++it) {
423 for (
int iz = 0; iz <
m_Mz; ++iz) {
424 for (
int iy = 0; iy <
m_Ny; ++iy) {
425 int is = ix +
m_Nx * (iy + m_Ny * (iz +
m_Nz * it));
426 int is2 = iy + m_Ny * (iz + m_Mz * it);
429 int ix1 = Nvcd * is2;
430 int ix2 = ix1 +
m_Nvc;
431 int ix3 = ix2 +
m_Nvc;
432 int ix4 = ix3 +
m_Nvc;
436 for (
int ic = 0; ic <
m_Nc; ++ic) {
438 int ic_i = 2 * ic + 1;
440 vt1[ic_r] =
m_r_s * w1[ic_r + id1 + in] +
m_nu_s * w1[ic_i + id4 + in];
441 vt1[ic_i] =
m_r_s * w1[ic_i + id1 + in] -
m_nu_s * w1[ic_r + id4 + in];
442 vt2[ic_r] =
m_r_s * w1[ic_r + id2 + in] +
m_nu_s * w1[ic_i + id3 + in];
443 vt2[ic_i] =
m_r_s * w1[ic_i + id2 + in] -
m_nu_s * w1[ic_r + id3 + in];
445 vt3[ic_r] =
m_r_s * w1[ic_r + id3 + in] -
m_nu_s * w1[ic_i + id2 + in];
446 vt3[ic_i] =
m_r_s * w1[ic_i + id3 + in] +
m_nu_s * w1[ic_r + id2 + in];
447 vt4[ic_r] =
m_r_s * w1[ic_r + id4 + in] -
m_nu_s * w1[ic_i + id1 + in];
448 vt4[ic_i] =
m_r_s * w1[ic_i + id4 + in] +
m_nu_s * w1[ic_r + id1 + in];
451 for (
int ic = 0; ic <
m_Nc; ++ic) {
455 int ic_i = 2 * ic + 1;
457 w2[ic_r + ix1] = mult_udagv_r(&u[ic2 + ig], vt1, m_Nc);
458 w2[ic_i + ix1] = mult_udagv_i(&u[ic2 + ig], vt1, m_Nc);
459 w2[ic_r + ix2] = mult_udagv_r(&u[ic2 + ig], vt2, m_Nc);
460 w2[ic_i + ix2] = mult_udagv_i(&u[ic2 + ig], vt2, m_Nc);
462 w2[ic_r + ix3] = mult_udagv_r(&u[ic2 + ig], vt3, m_Nc);
463 w2[ic_i + ix3] = mult_udagv_i(&u[ic2 + ig], vt3, m_Nc);
464 w2[ic_r + ix4] = mult_udagv_r(&u[ic2 + ig], vt4, m_Nc);
465 w2[ic_i + ix4] = mult_udagv_i(&u[ic2 + ig], vt4, m_Nc);
475 double *v2,
const double *vcp2)
480 const int id2 =
m_Nvc;
481 const int id3 =
m_Nvc * 2;
482 const int id4 =
m_Nvc * 3;
488 const int isite =
m_arg[itask].isite;
489 const int isite_cp =
m_arg[itask].isite_cp_x;
491 const double *w1 = &vcp2[Nvcd * isite_cp];
492 double *w2 = &v2[Nvcd * isite];
495 for (
int it = 0; it <
m_Mt; ++it) {
496 for (
int iz = 0; iz <
m_Mz; ++iz) {
497 for (
int iy = 0; iy <
m_Ny; ++iy) {
498 int is = ix +
m_Nx * (iy + m_Ny * (iz +
m_Nz * it));
499 int is2 = iy + m_Ny * (iz + m_Mz * it);
501 int ix1 = Nvcd * is2;
502 int ix2 = ix1 +
m_Nvc;
503 int ix3 = ix2 +
m_Nvc;
504 int ix4 = ix3 +
m_Nvc;
506 for (
int ic = 0; ic <
m_Nc; ++ic) {
508 int ic_i = 2 * ic + 1;
510 w2[ic_r + id1 + iv] += bc2 * w1[ic_r + ix1];
511 w2[ic_i + id1 + iv] += bc2 * w1[ic_i + ix1];
512 w2[ic_r + id2 + iv] += bc2 * w1[ic_r + ix2];
513 w2[ic_i + id2 + iv] += bc2 * w1[ic_i + ix2];
515 w2[ic_r + id3 + iv] += bc2 * w1[ic_r + ix3];
516 w2[ic_i + id3 + iv] += bc2 * w1[ic_i + ix3];
517 w2[ic_r + id4 + iv] += bc2 * w1[ic_r + ix4];
518 w2[ic_i + id4 + iv] += bc2 * w1[ic_i + ix4];
528 double *v2,
const double *v1)
533 const int id2 =
m_Nvc;
534 const int id3 =
m_Nvc * 2;
535 const int id4 =
m_Nvc * 3;
539 const int isite =
m_arg[itask].isite;
541 const double *w1 = &v1[Nvcd * isite];
542 double *w2 = &v2[Nvcd * isite];
546 for (
int it = 0; it <
m_Mt; ++it) {
547 for (
int iz = 0; iz <
m_Mz; ++iz) {
548 for (
int iy = 0; iy <
m_Ny; ++iy) {
549 for (
int ix = 1; ix <
m_Nx; ++ix) {
550 int is = ix + m_Nx * (iy + m_Ny * (iz +
m_Nz * it));
552 int in = Nvcd * (is - 1);
553 int ig =
m_Ndf * (is - 1);
557 for (
int ic = 0; ic <
m_Nc; ++ic) {
559 int ic_i = 2 * ic + 1;
561 vt1[ic_r] =
m_r_s * w1[ic_r + id1 + in] +
m_nu_s * w1[ic_i + id4 + in];
562 vt1[ic_i] =
m_r_s * w1[ic_i + id1 + in] -
m_nu_s * w1[ic_r + id4 + in];
563 vt2[ic_r] =
m_r_s * w1[ic_r + id2 + in] +
m_nu_s * w1[ic_i + id3 + in];
564 vt2[ic_i] =
m_r_s * w1[ic_i + id2 + in] -
m_nu_s * w1[ic_r + id3 + in];
566 vt3[ic_r] =
m_r_s * w1[ic_r + id3 + in] -
m_nu_s * w1[ic_i + id2 + in];
567 vt3[ic_i] =
m_r_s * w1[ic_i + id3 + in] +
m_nu_s * w1[ic_r + id2 + in];
568 vt4[ic_r] =
m_r_s * w1[ic_r + id4 + in] -
m_nu_s * w1[ic_i + id1 + in];
569 vt4[ic_i] =
m_r_s * w1[ic_i + id4 + in] +
m_nu_s * w1[ic_r + id1 + in];
572 for (
int ic = 0; ic <
m_Nc; ++ic) {
575 double wt1_r = mult_udagv_r(&u[ic2 + ig], vt1, m_Nc);
576 double wt1_i = mult_udagv_i(&u[ic2 + ig], vt1, m_Nc);
577 double wt2_r = mult_udagv_r(&u[ic2 + ig], vt2, m_Nc);
578 double wt2_i = mult_udagv_i(&u[ic2 + ig], vt2, m_Nc);
580 double wt3_r = mult_udagv_r(&u[ic2 + ig], vt3, m_Nc);
581 double wt3_i = mult_udagv_i(&u[ic2 + ig], vt3, m_Nc);
582 double wt4_r = mult_udagv_r(&u[ic2 + ig], vt4, m_Nc);
583 double wt4_i = mult_udagv_i(&u[ic2 + ig], vt4, m_Nc);
586 int ic_i = 2 * ic + 1;
588 w2[ic_r + id1 + iv] += wt1_r;
589 w2[ic_i + id1 + iv] += wt1_i;
590 w2[ic_r + id2 + iv] += wt2_r;
591 w2[ic_i + id2 + iv] += wt2_i;
593 w2[ic_r + id3 + iv] += wt3_r;
594 w2[ic_i + id3 + iv] += wt3_i;
595 w2[ic_r + id4 + iv] += wt4_r;
596 w2[ic_i + id4 + iv] += wt4_i;
607 double *vcp1,
const double *v1)
612 const int id2 =
m_Nvc;
613 const int id3 =
m_Nvc * 2;
614 const int id4 =
m_Nvc * 3;
620 const int isite =
m_arg[itask].isite;
621 const int isite_cp =
m_arg[itask].isite_cp_y;
623 const double *w1 = &v1[Nvcd * isite];
624 double *w2 = &vcp1[Nvcd * isite_cp];
627 for (
int it = 0; it <
m_Mt; ++it) {
628 for (
int iz = 0; iz <
m_Mz; ++iz) {
629 for (
int ix = 0; ix <
m_Nx; ++ix) {
630 int is = ix + m_Nx * (iy +
m_Ny * (iz +
m_Nz * it));
631 int is2 = ix + m_Nx * (iz + m_Mz * it);
633 int ix1 = Nvcd * is2;
634 int ix2 = ix1 +
m_Nvc;
635 int ix3 = ix2 +
m_Nvc;
636 int ix4 = ix3 +
m_Nvc;
638 for (
int ic = 0; ic <
m_Nc; ++ic) {
640 int ic_i = 2 * ic + 1;
642 w2[ic_r + ix1] = bc2 * (
m_r_s * w1[ic_r + id1 + in] +
m_nu_s * w1[ic_r + id4 + in]);
643 w2[ic_i + ix1] = bc2 * (
m_r_s * w1[ic_i + id1 + in] +
m_nu_s * w1[ic_i + id4 + in]);
644 w2[ic_r + ix2] = bc2 * (
m_r_s * w1[ic_r + id2 + in] -
m_nu_s * w1[ic_r + id3 + in]);
645 w2[ic_i + ix2] = bc2 * (
m_r_s * w1[ic_i + id2 + in] -
m_nu_s * w1[ic_i + id3 + in]);
647 w2[ic_r + ix3] = bc2 * (
m_r_s * w1[ic_r + id3 + in] -
m_nu_s * w1[ic_r + id2 + in]);
648 w2[ic_i + ix3] = bc2 * (
m_r_s * w1[ic_i + id3 + in] -
m_nu_s * w1[ic_i + id2 + in]);
649 w2[ic_r + ix4] = bc2 * (
m_r_s * w1[ic_r + id4 + in] +
m_nu_s * w1[ic_r + id1 + in]);
650 w2[ic_i + ix4] = bc2 * (
m_r_s * w1[ic_i + id4 + in] +
m_nu_s * w1[ic_i + id1 + in]);
660 const int itask,
double *v2,
const double *vcp2)
665 const int id2 =
m_Nvc;
666 const int id3 =
m_Nvc * 2;
667 const int id4 =
m_Nvc * 3;
670 const int iy =
m_Ny - 1;
672 const int isite =
m_arg[itask].isite;
673 const int isite_cp =
m_arg[itask].isite_cp_y;
675 const double *w1 = &vcp2[Nvcd * isite_cp];
676 double *w2 = &v2[Nvcd * isite];
680 for (
int it = 0; it <
m_Mt; ++it) {
681 for (
int iz = 0; iz <
m_Mz; ++iz) {
682 for (
int ix = 0; ix <
m_Nx; ++ix) {
683 int is = ix + m_Nx * (iy +
m_Ny * (iz +
m_Nz * it));
684 int is2 = ix + m_Nx * (iz + m_Mz * it);
687 int ix1 = Nvcd * is2;
688 int ix2 = ix1 +
m_Nvc;
689 int ix3 = ix2 +
m_Nvc;
690 int ix4 = ix3 +
m_Nvc;
692 for (
int ic = 0; ic <
m_Nc; ++ic) {
693 int ic2 = ic *
m_Nvc;
695 double wt1_r = mult_uv_r(&u[ic2 + ig], &w1[ix1], m_Nc);
696 double wt1_i = mult_uv_i(&u[ic2 + ig], &w1[ix1], m_Nc);
697 double wt2_r = mult_uv_r(&u[ic2 + ig], &w1[ix2], m_Nc);
698 double wt2_i = mult_uv_i(&u[ic2 + ig], &w1[ix2], m_Nc);
700 double wt3_r = mult_uv_r(&u[ic2 + ig], &w1[ix3], m_Nc);
701 double wt3_i = mult_uv_i(&u[ic2 + ig], &w1[ix3], m_Nc);
702 double wt4_r = mult_uv_r(&u[ic2 + ig], &w1[ix4], m_Nc);
703 double wt4_i = mult_uv_i(&u[ic2 + ig], &w1[ix4], m_Nc);
706 int ic_i = 2 * ic + 1;
708 w2[ic_r + id1 + iv] += wt1_r;
709 w2[ic_i + id1 + iv] += wt1_i;
710 w2[ic_r + id2 + iv] += wt2_r;
711 w2[ic_i + id2 + iv] += wt2_i;
713 w2[ic_r + id3 + iv] += wt3_r;
714 w2[ic_i + id3 + iv] += wt3_i;
715 w2[ic_r + id4 + iv] += wt4_r;
716 w2[ic_i + id4 + iv] += wt4_i;
726 double *v2,
const double *v1)
731 const int id2 =
m_Nvc;
732 const int id3 =
m_Nvc * 2;
733 const int id4 =
m_Nvc * 3;
737 const int isite =
m_arg[itask].isite;
739 const double *w1 = &v1[Nvcd * isite];
740 double *w2 = &v2[Nvcd * isite];
744 for (
int it = 0; it <
m_Mt; ++it) {
745 for (
int iz = 0; iz <
m_Mz; ++iz) {
746 for (
int iy = 0; iy <
m_Ny - 1; ++iy) {
747 for (
int ix = 0; ix <
m_Nx; ++ix) {
748 int is = ix + m_Nx * (iy + m_Ny * (iz +
m_Nz * it));
750 int in = Nvcd * (is +
m_Nx);
755 for (
int ic = 0; ic <
m_Nc; ++ic) {
757 int ic_i = 2 * ic + 1;
759 vt1[ic_r] =
m_r_s * w1[ic_r + id1 + in] +
m_nu_s * w1[ic_r + id4 + in];
760 vt1[ic_i] =
m_r_s * w1[ic_i + id1 + in] +
m_nu_s * w1[ic_i + id4 + in];
761 vt2[ic_r] =
m_r_s * w1[ic_r + id2 + in] -
m_nu_s * w1[ic_r + id3 + in];
762 vt2[ic_i] =
m_r_s * w1[ic_i + id2 + in] -
m_nu_s * w1[ic_i + id3 + in];
764 vt3[ic_r] =
m_r_s * w1[ic_r + id3 + in] -
m_nu_s * w1[ic_r + id2 + in];
765 vt3[ic_i] =
m_r_s * w1[ic_i + id3 + in] -
m_nu_s * w1[ic_i + id2 + in];
766 vt4[ic_r] =
m_r_s * w1[ic_r + id4 + in] +
m_nu_s * w1[ic_r + id1 + in];
767 vt4[ic_i] =
m_r_s * w1[ic_i + id4 + in] +
m_nu_s * w1[ic_i + id1 + in];
770 for (
int ic = 0; ic <
m_Nc; ++ic) {
771 int ic2 = ic *
m_Nvc;
773 double wt1_r = mult_uv_r(&u[ic2 + ig], vt1, m_Nc);
774 double wt1_i = mult_uv_i(&u[ic2 + ig], vt1, m_Nc);
775 double wt2_r = mult_uv_r(&u[ic2 + ig], vt2, m_Nc);
776 double wt2_i = mult_uv_i(&u[ic2 + ig], vt2, m_Nc);
778 double wt3_r = mult_uv_r(&u[ic2 + ig], vt3, m_Nc);
779 double wt3_i = mult_uv_i(&u[ic2 + ig], vt3, m_Nc);
780 double wt4_r = mult_uv_r(&u[ic2 + ig], vt4, m_Nc);
781 double wt4_i = mult_uv_i(&u[ic2 + ig], vt4, m_Nc);
784 int ic_i = 2 * ic + 1;
786 w2[ic_r + id1 + iv] += wt1_r;
787 w2[ic_i + id1 + iv] += wt1_i;
788 w2[ic_r + id2 + iv] += wt2_r;
789 w2[ic_i + id2 + iv] += wt2_i;
791 w2[ic_r + id3 + iv] += wt3_r;
792 w2[ic_i + id3 + iv] += wt3_i;
793 w2[ic_r + id4 + iv] += wt4_r;
794 w2[ic_i + id4 + iv] += wt4_i;
805 double *vcp1,
const double *v1)
810 const int id2 =
m_Nvc;
811 const int id3 =
m_Nvc * 2;
812 const int id4 =
m_Nvc * 3;
815 const int iy =
m_Ny - 1;
817 const int isite =
m_arg[itask].isite;
818 const int isite_cp =
m_arg[itask].isite_cp_y;
820 const double *w1 = &v1[Nvcd * isite];
821 double *w2 = &vcp1[Nvcd * isite_cp];
825 for (
int it = 0; it <
m_Mt; ++it) {
826 for (
int iz = 0; iz <
m_Mz; ++iz) {
827 for (
int ix = 0; ix <
m_Nx; ++ix) {
828 int is = ix + m_Nx * (iy +
m_Ny * (iz +
m_Nz * it));
829 int is2 = ix + m_Nx * (iz + m_Mz * it);
832 int ix1 = Nvcd * is2;
833 int ix2 = ix1 +
m_Nvc;
834 int ix3 = ix2 +
m_Nvc;
835 int ix4 = ix3 +
m_Nvc;
839 for (
int ic = 0; ic <
m_Nc; ++ic) {
841 int ic_i = 2 * ic + 1;
843 vt1[ic_r] =
m_r_s * w1[ic_r + id1 + in] -
m_nu_s * w1[ic_r + id4 + in];
844 vt1[ic_i] =
m_r_s * w1[ic_i + id1 + in] -
m_nu_s * w1[ic_i + id4 + in];
845 vt2[ic_r] =
m_r_s * w1[ic_r + id2 + in] +
m_nu_s * w1[ic_r + id3 + in];
846 vt2[ic_i] =
m_r_s * w1[ic_i + id2 + in] +
m_nu_s * w1[ic_i + id3 + in];
848 vt3[ic_r] =
m_r_s * w1[ic_r + id3 + in] +
m_nu_s * w1[ic_r + id2 + in];
849 vt3[ic_i] =
m_r_s * w1[ic_i + id3 + in] +
m_nu_s * w1[ic_i + id2 + in];
850 vt4[ic_r] =
m_r_s * w1[ic_r + id4 + in] -
m_nu_s * w1[ic_r + id1 + in];
851 vt4[ic_i] =
m_r_s * w1[ic_i + id4 + in] -
m_nu_s * w1[ic_i + id1 + in];
854 for (
int ic = 0; ic <
m_Nc; ++ic) {
858 int ic_i = 2 * ic + 1;
860 w2[ic_r + ix1] = mult_udagv_r(&u[ic2 + ig], vt1, m_Nc);
861 w2[ic_i + ix1] = mult_udagv_i(&u[ic2 + ig], vt1, m_Nc);
862 w2[ic_r + ix2] = mult_udagv_r(&u[ic2 + ig], vt2, m_Nc);
863 w2[ic_i + ix2] = mult_udagv_i(&u[ic2 + ig], vt2, m_Nc);
865 w2[ic_r + ix3] = mult_udagv_r(&u[ic2 + ig], vt3, m_Nc);
866 w2[ic_i + ix3] = mult_udagv_i(&u[ic2 + ig], vt3, m_Nc);
867 w2[ic_r + ix4] = mult_udagv_r(&u[ic2 + ig], vt4, m_Nc);
868 w2[ic_i + ix4] = mult_udagv_i(&u[ic2 + ig], vt4, m_Nc);
878 double *v2,
const double *vcp2)
883 const int id2 =
m_Nvc;
884 const int id3 =
m_Nvc * 2;
885 const int id4 =
m_Nvc * 3;
891 const int isite =
m_arg[itask].isite;
892 const int isite_cp =
m_arg[itask].isite_cp_y;
894 const double *w1 = &vcp2[Nvcd * isite_cp];
895 double *w2 = &v2[Nvcd * isite];
898 for (
int it = 0; it <
m_Mt; ++it) {
899 for (
int iz = 0; iz <
m_Mz; ++iz) {
900 for (
int ix = 0; ix <
m_Nx; ++ix) {
901 int is = ix + m_Nx * (iy +
m_Ny * (iz +
m_Nz * it));
902 int is2 = ix + m_Nx * (iz + m_Mz * it);
904 int ix1 = Nvcd * is2;
905 int ix2 = ix1 +
m_Nvc;
906 int ix3 = ix2 +
m_Nvc;
907 int ix4 = ix3 +
m_Nvc;
909 for (
int ic = 0; ic <
m_Nc; ++ic) {
911 int ic_i = 2 * ic + 1;
913 w2[ic_r + id1 + iv] += bc2 * w1[ic_r + ix1];
914 w2[ic_i + id1 + iv] += bc2 * w1[ic_i + ix1];
915 w2[ic_r + id2 + iv] += bc2 * w1[ic_r + ix2];
916 w2[ic_i + id2 + iv] += bc2 * w1[ic_i + ix2];
918 w2[ic_r + id3 + iv] += bc2 * w1[ic_r + ix3];
919 w2[ic_i + id3 + iv] += bc2 * w1[ic_i + ix3];
920 w2[ic_r + id4 + iv] += bc2 * w1[ic_r + ix4];
921 w2[ic_i + id4 + iv] += bc2 * w1[ic_i + ix4];
931 double *v2,
const double *v1)
936 const int id2 =
m_Nvc;
937 const int id3 =
m_Nvc * 2;
938 const int id4 =
m_Nvc * 3;
942 const int isite =
m_arg[itask].isite;
944 const double *w1 = &v1[Nvcd * isite];
945 double *w2 = &v2[Nvcd * isite];
949 for (
int it = 0; it <
m_Mt; ++it) {
950 for (
int iz = 0; iz <
m_Mz; ++iz) {
951 for (
int iy = 1; iy <
m_Ny; ++iy) {
952 for (
int ix = 0; ix <
m_Nx; ++ix) {
953 int is = ix + m_Nx * (iy + m_Ny * (iz +
m_Nz * it));
955 int in = Nvcd * (is -
m_Nx);
960 for (
int ic = 0; ic <
m_Nc; ++ic) {
962 int ic_i = 2 * ic + 1;
964 vt1[ic_r] =
m_r_s * w1[ic_r + id1 + in] -
m_nu_s * w1[ic_r + id4 + in];
965 vt1[ic_i] =
m_r_s * w1[ic_i + id1 + in] -
m_nu_s * w1[ic_i + id4 + in];
966 vt2[ic_r] =
m_r_s * w1[ic_r + id2 + in] +
m_nu_s * w1[ic_r + id3 + in];
967 vt2[ic_i] =
m_r_s * w1[ic_i + id2 + in] +
m_nu_s * w1[ic_i + id3 + in];
969 vt3[ic_r] =
m_r_s * w1[ic_r + id3 + in] +
m_nu_s * w1[ic_r + id2 + in];
970 vt3[ic_i] =
m_r_s * w1[ic_i + id3 + in] +
m_nu_s * w1[ic_i + id2 + in];
971 vt4[ic_r] =
m_r_s * w1[ic_r + id4 + in] -
m_nu_s * w1[ic_r + id1 + in];
972 vt4[ic_i] =
m_r_s * w1[ic_i + id4 + in] -
m_nu_s * w1[ic_i + id1 + in];
975 for (
int ic = 0; ic <
m_Nc; ++ic) {
978 double wt1_r = mult_udagv_r(&u[ic2 + ig], vt1, m_Nc);
979 double wt1_i = mult_udagv_i(&u[ic2 + ig], vt1, m_Nc);
980 double wt2_r = mult_udagv_r(&u[ic2 + ig], vt2, m_Nc);
981 double wt2_i = mult_udagv_i(&u[ic2 + ig], vt2, m_Nc);
983 double wt3_r = mult_udagv_r(&u[ic2 + ig], vt3, m_Nc);
984 double wt3_i = mult_udagv_i(&u[ic2 + ig], vt3, m_Nc);
985 double wt4_r = mult_udagv_r(&u[ic2 + ig], vt4, m_Nc);
986 double wt4_i = mult_udagv_i(&u[ic2 + ig], vt4, m_Nc);
989 int ic_i = 2 * ic + 1;
991 w2[ic_r + id1 + iv] += wt1_r;
992 w2[ic_i + id1 + iv] += wt1_i;
993 w2[ic_r + id2 + iv] += wt2_r;
994 w2[ic_i + id2 + iv] += wt2_i;
996 w2[ic_r + id3 + iv] += wt3_r;
997 w2[ic_i + id3 + iv] += wt3_i;
998 w2[ic_r + id4 + iv] += wt4_r;
999 w2[ic_i + id4 + iv] += wt4_i;
1010 double *vcp1,
const double *v1)
1015 const int id2 =
m_Nvc;
1016 const int id3 =
m_Nvc * 2;
1017 const int id4 =
m_Nvc * 3;
1022 const int isite =
m_arg[itask].isite;
1023 const int isite_cp =
m_arg[itask].isite_cp_z;
1025 const double *w1 = &v1[Nvcd * isite];
1026 double *w2 = &vcp1[Nvcd * isite_cp];
1029 if (
m_arg[itask].kz0 == 1) {
1033 for (
int it = 0; it <
m_Mt; ++it) {
1034 for (
int ixy = 0; ixy < Nxy; ++ixy) {
1035 int is = ixy + Nxy * (iz +
m_Nz * it);
1036 int is2 = ixy + Nxy * it;
1039 int ix1 = Nvcd * is2;
1040 int ix2 = ix1 +
m_Nvc;
1041 int ix3 = ix2 +
m_Nvc;
1042 int ix4 = ix3 +
m_Nvc;
1044 for (
int ic = 0; ic <
m_Nc; ++ic) {
1046 int ic_i = 2 * ic + 1;
1048 w2[ic_r + ix1] = bc2 * (
m_r_s * w1[ic_r + id1 + in] -
m_nu_s * w1[ic_i + id3 + in]);
1049 w2[ic_i + ix1] = bc2 * (
m_r_s * w1[ic_i + id1 + in] +
m_nu_s * w1[ic_r + id3 + in]);
1050 w2[ic_r + ix2] = bc2 * (
m_r_s * w1[ic_r + id2 + in] +
m_nu_s * w1[ic_i + id4 + in]);
1051 w2[ic_i + ix2] = bc2 * (
m_r_s * w1[ic_i + id2 + in] -
m_nu_s * w1[ic_r + id4 + in]);
1053 w2[ic_r + ix3] = bc2 * (
m_r_s * w1[ic_r + id3 + in] +
m_nu_s * w1[ic_i + id1 + in]);
1054 w2[ic_i + ix3] = bc2 * (
m_r_s * w1[ic_i + id3 + in] -
m_nu_s * w1[ic_r + id1 + in]);
1055 w2[ic_r + ix4] = bc2 * (
m_r_s * w1[ic_r + id4 + in] -
m_nu_s * w1[ic_i + id2 + in]);
1056 w2[ic_i + ix4] = bc2 * (
m_r_s * w1[ic_i + id4 + in] +
m_nu_s * w1[ic_r + id2 + in]);
1066 double *v2,
const double *vcp2)
1071 const int id2 =
m_Nvc;
1072 const int id3 =
m_Nvc * 2;
1073 const int id4 =
m_Nvc * 3;
1077 const int isite =
m_arg[itask].isite;
1078 const int isite_cp =
m_arg[itask].isite_cp_z;
1080 const double *w1 = &vcp2[Nvcd * isite_cp];
1081 double *w2 = &v2[Nvcd * isite];
1085 if (
m_arg[itask].kz1 == 1) {
1087 const int iz =
m_Mz - 1;
1089 for (
int it = 0; it <
m_Mt; ++it) {
1090 for (
int ixy = 0; ixy < Nxy; ++ixy) {
1091 int is = ixy + Nxy * (iz +
m_Nz * it);
1092 int is2 = ixy + Nxy * it;
1094 int ig =
m_Ndf * is;
1095 int ix1 = Nvcd * is2;
1096 int ix2 = ix1 +
m_Nvc;
1097 int ix3 = ix2 +
m_Nvc;
1098 int ix4 = ix3 +
m_Nvc;
1100 for (
int ic = 0; ic <
m_Nc; ++ic) {
1101 int ic2 = ic *
m_Nvc;
1103 double wt1_r = mult_uv_r(&u[ic2 + ig], &w1[ix1], m_Nc);
1104 double wt1_i = mult_uv_i(&u[ic2 + ig], &w1[ix1], m_Nc);
1105 double wt2_r = mult_uv_r(&u[ic2 + ig], &w1[ix2], m_Nc);
1106 double wt2_i = mult_uv_i(&u[ic2 + ig], &w1[ix2], m_Nc);
1108 double wt3_r = mult_uv_r(&u[ic2 + ig], &w1[ix3], m_Nc);
1109 double wt3_i = mult_uv_i(&u[ic2 + ig], &w1[ix3], m_Nc);
1110 double wt4_r = mult_uv_r(&u[ic2 + ig], &w1[ix4], m_Nc);
1111 double wt4_i = mult_uv_i(&u[ic2 + ig], &w1[ix4], m_Nc);
1114 int ic_i = 2 * ic + 1;
1116 w2[ic_r + id1 + iv] += wt1_r;
1117 w2[ic_i + id1 + iv] += wt1_i;
1118 w2[ic_r + id2 + iv] += wt2_r;
1119 w2[ic_i + id2 + iv] += wt2_i;
1121 w2[ic_r + id3 + iv] += wt3_r;
1122 w2[ic_i + id3 + iv] += wt3_i;
1123 w2[ic_r + id4 + iv] += wt4_r;
1124 w2[ic_i + id4 + iv] += wt4_i;
1134 double *v2,
const double *v1)
1139 const int id2 =
m_Nvc;
1140 const int id3 =
m_Nvc * 2;
1141 const int id4 =
m_Nvc * 3;
1146 const int isite =
m_arg[itask].isite;
1147 const int kz1 =
m_arg[itask].kz1;
1149 const double *w1 = &v1[Nvcd * isite];
1150 double *w2 = &v2[Nvcd * isite];
1154 for (
int it = 0; it <
m_Mt; ++it) {
1155 for (
int iz = 0; iz <
m_Mz - kz1; ++iz) {
1156 for (
int ixy = 0; ixy < Nxy; ++ixy) {
1157 int is = ixy + Nxy * (iz +
m_Nz * it);
1159 int in = Nvcd * (is + Nxy);
1160 int ig =
m_Ndf * is;
1164 for (
int ic = 0; ic <
m_Nc; ++ic) {
1166 int ic_i = 2 * ic + 1;
1168 vt1[ic_r] =
m_r_s * w1[ic_r + id1 + in] -
m_nu_s * w1[ic_i + id3 + in];
1169 vt1[ic_i] =
m_r_s * w1[ic_i + id1 + in] +
m_nu_s * w1[ic_r + id3 + in];
1170 vt2[ic_r] =
m_r_s * w1[ic_r + id2 + in] +
m_nu_s * w1[ic_i + id4 + in];
1171 vt2[ic_i] =
m_r_s * w1[ic_i + id2 + in] -
m_nu_s * w1[ic_r + id4 + in];
1173 vt3[ic_r] =
m_r_s * w1[ic_r + id3 + in] +
m_nu_s * w1[ic_i + id1 + in];
1174 vt3[ic_i] =
m_r_s * w1[ic_i + id3 + in] -
m_nu_s * w1[ic_r + id1 + in];
1175 vt4[ic_r] =
m_r_s * w1[ic_r + id4 + in] -
m_nu_s * w1[ic_i + id2 + in];
1176 vt4[ic_i] =
m_r_s * w1[ic_i + id4 + in] +
m_nu_s * w1[ic_r + id2 + in];
1179 for (
int ic = 0; ic <
m_Nc; ++ic) {
1180 int ic2 = ic *
m_Nvc;
1182 double wt1_r = mult_uv_r(&u[ic2 + ig], vt1, m_Nc);
1183 double wt1_i = mult_uv_i(&u[ic2 + ig], vt1, m_Nc);
1184 double wt2_r = mult_uv_r(&u[ic2 + ig], vt2, m_Nc);
1185 double wt2_i = mult_uv_i(&u[ic2 + ig], vt2, m_Nc);
1187 double wt3_r = mult_uv_r(&u[ic2 + ig], vt3, m_Nc);
1188 double wt3_i = mult_uv_i(&u[ic2 + ig], vt3, m_Nc);
1189 double wt4_r = mult_uv_r(&u[ic2 + ig], vt4, m_Nc);
1190 double wt4_i = mult_uv_i(&u[ic2 + ig], vt4, m_Nc);
1193 int ic_i = 2 * ic + 1;
1195 w2[ic_r + id1 + iv] += wt1_r;
1196 w2[ic_i + id1 + iv] += wt1_i;
1197 w2[ic_r + id2 + iv] += wt2_r;
1198 w2[ic_i + id2 + iv] += wt2_i;
1200 w2[ic_r + id3 + iv] += wt3_r;
1201 w2[ic_i + id3 + iv] += wt3_i;
1202 w2[ic_r + id4 + iv] += wt4_r;
1203 w2[ic_i + id4 + iv] += wt4_i;
1213 double *vcp1,
const double *v1)
1218 const int id2 =
m_Nvc;
1219 const int id3 =
m_Nvc * 2;
1220 const int id4 =
m_Nvc * 3;
1224 const int isite =
m_arg[itask].isite;
1225 const int isite_cp =
m_arg[itask].isite_cp_z;
1227 const double *w1 = &v1[Nvcd * isite];
1228 double *w2 = &vcp1[Nvcd * isite_cp];
1232 if (
m_arg[itask].kz1 == 1) {
1234 const int iz =
m_Mz - 1;
1236 for (
int it = 0; it <
m_Mt; ++it) {
1237 for (
int ixy = 0; ixy < Nxy; ++ixy) {
1238 int is = ixy + Nxy * (iz +
m_Nz * it);
1239 int is2 = ixy + Nxy * it;
1241 int ig =
m_Ndf * is;
1242 int ix1 = Nvcd * is2;
1243 int ix2 = ix1 +
m_Nvc;
1244 int ix3 = ix2 +
m_Nvc;
1245 int ix4 = ix3 +
m_Nvc;
1249 for (
int ic = 0; ic <
m_Nc; ++ic) {
1251 int ic_i = 2 * ic + 1;
1253 vt1[ic_r] =
m_r_s * w1[ic_r + id1 + in] +
m_nu_s * w1[ic_i + id3 + in];
1254 vt1[ic_i] =
m_r_s * w1[ic_i + id1 + in] -
m_nu_s * w1[ic_r + id3 + in];
1255 vt2[ic_r] =
m_r_s * w1[ic_r + id2 + in] -
m_nu_s * w1[ic_i + id4 + in];
1256 vt2[ic_i] =
m_r_s * w1[ic_i + id2 + in] +
m_nu_s * w1[ic_r + id4 + in];
1258 vt3[ic_r] =
m_r_s * w1[ic_r + id3 + in] -
m_nu_s * w1[ic_i + id1 + in];
1259 vt3[ic_i] =
m_r_s * w1[ic_i + id3 + in] +
m_nu_s * w1[ic_r + id1 + in];
1260 vt4[ic_r] =
m_r_s * w1[ic_r + id4 + in] +
m_nu_s * w1[ic_i + id2 + in];
1261 vt4[ic_i] =
m_r_s * w1[ic_i + id4 + in] -
m_nu_s * w1[ic_r + id2 + in];
1264 for (
int ic = 0; ic <
m_Nc; ++ic) {
1268 int ic_i = 2 * ic + 1;
1270 w2[ic_r + ix1] = mult_udagv_r(&u[ic2 + ig], vt1, m_Nc);
1271 w2[ic_i + ix1] = mult_udagv_i(&u[ic2 + ig], vt1, m_Nc);
1272 w2[ic_r + ix2] = mult_udagv_r(&u[ic2 + ig], vt2, m_Nc);
1273 w2[ic_i + ix2] = mult_udagv_i(&u[ic2 + ig], vt2, m_Nc);
1275 w2[ic_r + ix3] = mult_udagv_r(&u[ic2 + ig], vt3, m_Nc);
1276 w2[ic_i + ix3] = mult_udagv_i(&u[ic2 + ig], vt3, m_Nc);
1277 w2[ic_r + ix4] = mult_udagv_r(&u[ic2 + ig], vt4, m_Nc);
1278 w2[ic_i + ix4] = mult_udagv_i(&u[ic2 + ig], vt4, m_Nc);
1288 double *v2,
const double *vcp2)
1293 const int id2 =
m_Nvc;
1294 const int id3 =
m_Nvc * 2;
1295 const int id4 =
m_Nvc * 3;
1300 const int isite =
m_arg[itask].isite;
1301 const int isite_cp =
m_arg[itask].isite_cp_z;
1303 const double *w1 = &vcp2[Nvcd * isite_cp];
1304 double *w2 = &v2[Nvcd * isite];
1307 if (
m_arg[itask].kz0 == 1) {
1311 for (
int it = 0; it <
m_Mt; ++it) {
1312 for (
int ixy = 0; ixy < Nxy; ++ixy) {
1313 int is = ixy + Nxy * (iz +
m_Nz * it);
1314 int is2 = ixy + Nxy * it;
1316 int ix1 = Nvcd * is2;
1317 int ix2 = ix1 +
m_Nvc;
1318 int ix3 = ix2 +
m_Nvc;
1319 int ix4 = ix3 +
m_Nvc;
1321 for (
int ic = 0; ic <
m_Nc; ++ic) {
1323 int ic_i = 2 * ic + 1;
1325 w2[ic_r + id1 + iv] += bc2 * w1[ic_r + ix1];
1326 w2[ic_i + id1 + iv] += bc2 * w1[ic_i + ix1];
1327 w2[ic_r + id2 + iv] += bc2 * w1[ic_r + ix2];
1328 w2[ic_i + id2 + iv] += bc2 * w1[ic_i + ix2];
1330 w2[ic_r + id3 + iv] += bc2 * w1[ic_r + ix3];
1331 w2[ic_i + id3 + iv] += bc2 * w1[ic_i + ix3];
1332 w2[ic_r + id4 + iv] += bc2 * w1[ic_r + ix4];
1333 w2[ic_i + id4 + iv] += bc2 * w1[ic_i + ix4];
1343 double *v2,
const double *v1)
1348 const int id2 =
m_Nvc;
1349 const int id3 =
m_Nvc * 2;
1350 const int id4 =
m_Nvc * 3;
1355 const int isite =
m_arg[itask].isite;
1356 const int kz0 =
m_arg[itask].kz0;
1358 const double *w1 = &v1[Nvcd * isite];
1359 double *w2 = &v2[Nvcd * isite];
1363 for (
int it = 0; it <
m_Mt; ++it) {
1364 for (
int iz = kz0; iz <
m_Mz; ++iz) {
1365 for (
int ixy = 0; ixy < Nxy; ++ixy) {
1366 int is = ixy + Nxy * (iz +
m_Nz * it);
1368 int in = Nvcd * (is - Nxy);
1369 int ig =
m_Ndf * (is - Nxy);
1373 for (
int ic = 0; ic <
m_Nc; ++ic) {
1375 int ic_i = 2 * ic + 1;
1377 vt1[ic_r] =
m_r_s * w1[ic_r + id1 + in] +
m_nu_s * w1[ic_i + id3 + in];
1378 vt1[ic_i] =
m_r_s * w1[ic_i + id1 + in] -
m_nu_s * w1[ic_r + id3 + in];
1379 vt2[ic_r] =
m_r_s * w1[ic_r + id2 + in] -
m_nu_s * w1[ic_i + id4 + in];
1380 vt2[ic_i] =
m_r_s * w1[ic_i + id2 + in] +
m_nu_s * w1[ic_r + id4 + in];
1382 vt3[ic_r] =
m_r_s * w1[ic_r + id3 + in] -
m_nu_s * w1[ic_i + id1 + in];
1383 vt3[ic_i] =
m_r_s * w1[ic_i + id3 + in] +
m_nu_s * w1[ic_r + id1 + in];
1384 vt4[ic_r] =
m_r_s * w1[ic_r + id4 + in] +
m_nu_s * w1[ic_i + id2 + in];
1385 vt4[ic_i] =
m_r_s * w1[ic_i + id4 + in] -
m_nu_s * w1[ic_r + id2 + in];
1388 for (
int ic = 0; ic <
m_Nc; ++ic) {
1391 double wt1_r = mult_udagv_r(&u[ic2 + ig], vt1, m_Nc);
1392 double wt1_i = mult_udagv_i(&u[ic2 + ig], vt1, m_Nc);
1393 double wt2_r = mult_udagv_r(&u[ic2 + ig], vt2, m_Nc);
1394 double wt2_i = mult_udagv_i(&u[ic2 + ig], vt2, m_Nc);
1396 double wt3_r = mult_udagv_r(&u[ic2 + ig], vt3, m_Nc);
1397 double wt3_i = mult_udagv_i(&u[ic2 + ig], vt3, m_Nc);
1398 double wt4_r = mult_udagv_r(&u[ic2 + ig], vt4, m_Nc);
1399 double wt4_i = mult_udagv_i(&u[ic2 + ig], vt4, m_Nc);
1402 int ic_i = 2 * ic + 1;
1404 w2[ic_r + id1 + iv] += wt1_r;
1405 w2[ic_i + id1 + iv] += wt1_i;
1406 w2[ic_r + id2 + iv] += wt2_r;
1407 w2[ic_i + id2 + iv] += wt2_i;
1409 w2[ic_r + id3 + iv] += wt3_r;
1410 w2[ic_i + id3 + iv] += wt3_i;
1411 w2[ic_r + id4 + iv] += wt4_r;
1412 w2[ic_i + id4 + iv] += wt4_i;
1422 double *vcp1,
const double *v1)
1424 const int Nvc2 = 2 *
m_Nvc;
1426 const int Nvcd2 = Nvcd / 2;
1428 const int id3 =
m_Nvc * 2;
1429 const int id4 =
m_Nvc * 3;
1434 const int isite =
m_arg[itask].isite;
1435 const int isite_cp =
m_arg[itask].isite_cp_t;
1437 const double *w1 = &v1[Nvcd * isite];
1438 double *w2 = &vcp1[Nvcd2 * isite_cp];
1441 if (
m_arg[itask].kt0 == 1) {
1445 for (
int iz = 0; iz <
m_Mz; ++iz) {
1446 for (
int ixy = 0; ixy < Nxy; ++ixy) {
1447 int is = ixy + Nxy * (iz +
m_Nz * it);
1448 int is2 = ixy + Nxy * iz;
1451 int ix1 = Nvc2 * is2;
1452 int ix2 = ix1 +
m_Nvc;
1454 for (
int ic = 0; ic <
m_Nc; ++ic) {
1456 int ic_i = 2 * ic + 1;
1458 w2[ic_r + ix1] = 2.0 * bc2 * w1[ic_r + id3 + in];
1459 w2[ic_i + ix1] = 2.0 * bc2 * w1[ic_i + id3 + in];
1460 w2[ic_r + ix2] = 2.0 * bc2 * w1[ic_r + id4 + in];
1461 w2[ic_i + ix2] = 2.0 * bc2 * w1[ic_i + id4 + in];
1471 double *v2,
const double *vcp2)
1473 const int Nvc2 = 2 *
m_Nvc;
1475 const int Nvcd2 = Nvcd / 2;
1477 const int id3 =
m_Nvc * 2;
1478 const int id4 =
m_Nvc * 3;
1482 const int isite =
m_arg[itask].isite;
1483 const int isite_cp =
m_arg[itask].isite_cp_t;
1485 const double *w1 = &vcp2[Nvcd2 * isite_cp];
1486 double *w2 = &v2[Nvcd * isite];
1490 if (
m_arg[itask].kt1 == 1) {
1492 const int it =
m_Mt - 1;
1494 for (
int iz = 0; iz <
m_Mz; ++iz) {
1495 for (
int ixy = 0; ixy < Nxy; ++ixy) {
1496 int is = ixy + Nxy * (iz +
m_Nz * it);
1497 int is2 = ixy + Nxy * iz;
1499 int ig =
m_Ndf * is;
1500 int ix1 = Nvc2 * is2;
1501 int ix2 = ix1 +
m_Nvc;
1503 for (
int ic = 0; ic <
m_Nc; ++ic) {
1504 int ic2 = ic *
m_Nvc;
1507 int ic_i = 2 * ic + 1;
1509 w2[ic_r + id3 + iv] += mult_uv_r(&u[ic2 + ig], &w1[ix1], m_Nc);
1510 w2[ic_i + id3 + iv] += mult_uv_i(&u[ic2 + ig], &w1[ix1], m_Nc);
1511 w2[ic_r + id4 + iv] += mult_uv_r(&u[ic2 + ig], &w1[ix2], m_Nc);
1512 w2[ic_i + id4 + iv] += mult_uv_i(&u[ic2 + ig], &w1[ix2], m_Nc);
1522 double *v2,
const double *v1)
1526 const int Nxyz = Nxy *
m_Nz;
1528 const int id3 =
m_Nvc * 2;
1529 const int id4 =
m_Nvc * 3;
1533 const int isite =
m_arg[itask].isite;
1534 const int kt1 =
m_arg[itask].kt1;
1536 const double *w1 = &v1[Nvcd * isite];
1537 double *w2 = &v2[Nvcd * isite];
1541 for (
int it = 0; it <
m_Mt - kt1; ++it) {
1542 for (
int iz = 0; iz <
m_Mz; ++iz) {
1543 for (
int ixy = 0; ixy < Nxy; ++ixy) {
1544 int is = ixy + Nxy * (iz + m_Nz * it);
1546 int in = Nvcd * (is + Nxyz);
1547 int ig =
m_Ndf * is;
1551 for (
int ic = 0; ic <
m_Nc; ++ic) {
1553 int ic_i = 2 * ic + 1;
1555 vt1[ic_r] = 2.0 * w1[ic_r + id3 + in];
1556 vt1[ic_i] = 2.0 * w1[ic_i + id3 + in];
1557 vt2[ic_r] = 2.0 * w1[ic_r + id4 + in];
1558 vt2[ic_i] = 2.0 * w1[ic_i + id4 + in];
1561 for (
int ic = 0; ic <
m_Nc; ++ic) {
1562 int ic2 = ic *
m_Nvc;
1565 int ic_i = 2 * ic + 1;
1567 w2[ic_r + id3 + iv] += mult_uv_r(&u[ic2 + ig], vt1, m_Nc);
1568 w2[ic_i + id3 + iv] += mult_uv_i(&u[ic2 + ig], vt1, m_Nc);
1569 w2[ic_r + id4 + iv] += mult_uv_r(&u[ic2 + ig], vt2, m_Nc);
1570 w2[ic_i + id4 + iv] += mult_uv_i(&u[ic2 + ig], vt2, m_Nc);
1580 double *vcp1,
const double *v1)
1582 const int Nvc2 = 2 *
m_Nvc;
1584 const int Nvcd2 = Nvcd / 2;
1587 const int id2 =
m_Nvc;
1591 const int isite =
m_arg[itask].isite;
1592 const int isite_cp =
m_arg[itask].isite_cp_t;
1594 const double *w1 = &v1[Nvcd * isite];
1595 double *w2 = &vcp1[Nvcd2 * isite_cp];
1599 if (
m_arg[itask].kt1 == 1) {
1601 const int it =
m_Mt - 1;
1603 for (
int iz = 0; iz <
m_Mz; ++iz) {
1604 for (
int ixy = 0; ixy < Nxy; ++ixy) {
1605 int is = ixy + Nxy * (iz +
m_Nz * it);
1606 int is2 = ixy + Nxy * iz;
1608 int ig =
m_Ndf * is;
1609 int ix1 = Nvc2 * is2;
1610 int ix2 = ix1 +
m_Nvc;
1614 for (
int ic = 0; ic <
m_Nc; ++ic) {
1616 int ic_i = 2 * ic + 1;
1618 vt1[ic_r] = 2.0 * w1[ic_r + id1 + in];
1619 vt1[ic_i] = 2.0 * w1[ic_i + id1 + in];
1620 vt2[ic_r] = 2.0 * w1[ic_r + id2 + in];
1621 vt2[ic_i] = 2.0 * w1[ic_i + id2 + in];
1624 for (
int ic = 0; ic <
m_Nc; ++ic) {
1628 int ic_i = 2 * ic + 1;
1630 w2[ic_r + ix1] = mult_udagv_r(&u[ic2 + ig], vt1, m_Nc);
1631 w2[ic_i + ix1] = mult_udagv_i(&u[ic2 + ig], vt1, m_Nc);
1632 w2[ic_r + ix2] = mult_udagv_r(&u[ic2 + ig], vt2, m_Nc);
1633 w2[ic_i + ix2] = mult_udagv_i(&u[ic2 + ig], vt2, m_Nc);
1643 double *v2,
const double *vcp2)
1645 const int Nvc2 = 2 *
m_Nvc;
1647 const int Nvcd2 = Nvcd / 2;
1650 const int id2 =
m_Nvc;
1655 const int isite =
m_arg[itask].isite;
1656 const int isite_cp =
m_arg[itask].isite_cp_t;
1658 const double *w1 = &vcp2[Nvcd2 * isite_cp];
1659 double *w2 = &v2[Nvcd * isite];
1662 if (
m_arg[itask].kt0 == 1) {
1666 for (
int iz = 0; iz <
m_Mz; ++iz) {
1667 for (
int ixy = 0; ixy < Nxy; ++ixy) {
1668 int is = ixy + Nxy * (iz +
m_Nz * it);
1669 int is2 = ixy + Nxy * iz;
1671 int ix1 = Nvc2 * is2;
1672 int ix2 = ix1 +
m_Nvc;
1674 for (
int ic = 0; ic <
m_Nc; ++ic) {
1676 int ic_i = 2 * ic + 1;
1678 w2[ic_r + id1 + iv] += bc2 * w1[ic_r + ix1];
1679 w2[ic_i + id1 + iv] += bc2 * w1[ic_i + ix1];
1680 w2[ic_r + id2 + iv] += bc2 * w1[ic_r + ix2];
1681 w2[ic_i + id2 + iv] += bc2 * w1[ic_i + ix2];
1691 double *v2,
const double *v1)
1695 const int Nxyz = Nxy *
m_Nz;
1698 const int id2 =
m_Nvc;
1702 const int isite =
m_arg[itask].isite;
1703 const int kt0 =
m_arg[itask].kt0;
1705 const double *w1 = &v1[Nvcd * isite];
1706 double *w2 = &v2[Nvcd * isite];
1710 for (
int it = kt0; it <
m_Mt; ++it) {
1711 for (
int iz = 0; iz <
m_Mz; ++iz) {
1712 for (
int ixy = 0; ixy < Nxy; ++ixy) {
1713 int is = ixy + Nxy * (iz + m_Nz * it);
1715 int in = Nvcd * (is - Nxyz);
1716 int ig =
m_Ndf * (is - Nxyz);
1720 for (
int ic = 0; ic <
m_Nc; ++ic) {
1722 int ic_i = 2 * ic + 1;
1724 vt1[ic_r] = 2.0 * w1[ic_r + id1 + in];
1725 vt1[ic_i] = 2.0 * w1[ic_i + id1 + in];
1726 vt2[ic_r] = 2.0 * w1[ic_r + id2 + in];
1727 vt2[ic_i] = 2.0 * w1[ic_i + id2 + in];
1730 for (
int ic = 0; ic <
m_Nc; ++ic) {
1734 int ic_i = 2 * ic + 1;
1736 w2[ic_r + id1 + iv] += mult_udagv_r(&u[ic2 + ig], vt1, m_Nc);
1737 w2[ic_i + id1 + iv] += mult_udagv_i(&u[ic2 + ig], vt1, m_Nc);
1738 w2[ic_r + id2 + iv] += mult_udagv_r(&u[ic2 + ig], vt2, m_Nc);
1739 w2[ic_i + id2 + iv] += mult_udagv_i(&u[ic2 + ig], vt2, m_Nc);
1749 double *vcp1,
const double *v1)
1751 const int Nvc2 = 2 *
m_Nvc;
1753 const int Nvcd2 = Nvcd / 2;
1756 const int id2 =
m_Nvc;
1757 const int id3 =
m_Nvc * 2;
1758 const int id4 =
m_Nvc * 3;
1763 const int isite =
m_arg[itask].isite;
1764 const int isite_cp =
m_arg[itask].isite_cp_t;
1766 const double *w1 = &v1[Nvcd * isite];
1767 double *w2 = &vcp1[Nvcd2 * isite_cp];
1770 if (
m_arg[itask].kt0 == 1) {
1774 for (
int iz = 0; iz <
m_Mz; ++iz) {
1775 for (
int ixy = 0; ixy < Nxy; ++ixy) {
1776 int is = ixy + Nxy * (iz +
m_Nz * it);
1777 int is2 = ixy + Nxy * iz;
1780 int ix1 = Nvc2 * is2;
1781 int ix2 = ix1 +
m_Nvc;
1783 for (
int ic = 0; ic <
m_Nc; ++ic) {
1785 int ic_i = 2 * ic + 1;
1787 w2[ic_r + ix1] = bc2 * (w1[ic_r + id1 + in] + w1[ic_r + id3 + in]);
1788 w2[ic_i + ix1] = bc2 * (w1[ic_i + id1 + in] + w1[ic_i + id3 + in]);
1789 w2[ic_r + ix2] = bc2 * (w1[ic_r + id2 + in] + w1[ic_r + id4 + in]);
1790 w2[ic_i + ix2] = bc2 * (w1[ic_i + id2 + in] + w1[ic_i + id4 + in]);
1800 double *v2,
const double *vcp2)
1802 const int Nvc2 = 2 *
m_Nvc;
1804 const int Nvcd2 = Nvcd / 2;
1807 const int id2 =
m_Nvc;
1808 const int id3 =
m_Nvc * 2;
1809 const int id4 =
m_Nvc * 3;
1813 const int isite =
m_arg[itask].isite;
1814 const int isite_cp =
m_arg[itask].isite_cp_t;
1816 const double *w1 = &vcp2[Nvcd2 * isite_cp];
1817 double *w2 = &v2[Nvcd * isite];
1821 if (
m_arg[itask].kt1 == 1) {
1823 const int it =
m_Mt - 1;
1825 for (
int iz = 0; iz <
m_Mz; ++iz) {
1826 for (
int ixy = 0; ixy < Nxy; ++ixy) {
1827 int is = ixy + Nxy * (iz +
m_Nz * it);
1828 int is2 = ixy + Nxy * iz;
1830 int ig =
m_Ndf * is;
1831 int ix1 = Nvc2 * is2;
1832 int ix2 = ix1 +
m_Nvc;
1834 for (
int ic = 0; ic <
m_Nc; ++ic) {
1835 int ic2 = ic *
m_Nvc;
1837 double wt1_r = mult_uv_r(&u[ic2 + ig], &w1[ix1], m_Nc);
1838 double wt1_i = mult_uv_i(&u[ic2 + ig], &w1[ix1], m_Nc);
1839 double wt2_r = mult_uv_r(&u[ic2 + ig], &w1[ix2], m_Nc);
1840 double wt2_i = mult_uv_i(&u[ic2 + ig], &w1[ix2], m_Nc);
1843 int ic_i = 2 * ic + 1;
1845 w2[ic_r + id1 + iv] += wt1_r;
1846 w2[ic_i + id1 + iv] += wt1_i;
1847 w2[ic_r + id2 + iv] += wt2_r;
1848 w2[ic_i + id2 + iv] += wt2_i;
1850 w2[ic_r + id3 + iv] += wt1_r;
1851 w2[ic_i + id3 + iv] += wt1_i;
1852 w2[ic_r + id4 + iv] += wt2_r;
1853 w2[ic_i + id4 + iv] += wt2_i;
1863 double *v2,
const double *v1)
1867 const int Nxyz =
m_Nx * m_Ny *
m_Nz;
1870 const int id2 =
m_Nvc;
1871 const int id3 =
m_Nvc * 2;
1872 const int id4 =
m_Nvc * 3;
1876 const int isite =
m_arg[itask].isite;
1877 const int kt1 =
m_arg[itask].kt1;
1879 const double *w1 = &v1[Nvcd * isite];
1880 double *w2 = &v2[Nvcd * isite];
1884 for (
int it = 0; it <
m_Mt - kt1; ++it) {
1885 for (
int iz = 0; iz <
m_Mz; ++iz) {
1886 for (
int ixy = 0; ixy < Nxy; ++ixy) {
1887 int is = ixy + Nxy * (iz + m_Nz * it);
1889 int in = Nvcd * (is + Nxyz);
1890 int ig =
m_Ndf * is;
1894 for (
int ic = 0; ic <
m_Nc; ++ic) {
1896 int ic_i = 2 * ic + 1;
1898 vt1[ic_r] = w1[ic_r + id1 + in] + w1[ic_r + id3 + in];
1899 vt1[ic_i] = w1[ic_i + id1 + in] + w1[ic_i + id3 + in];
1900 vt2[ic_r] = w1[ic_r + id2 + in] + w1[ic_r + id4 + in];
1901 vt2[ic_i] = w1[ic_i + id2 + in] + w1[ic_i + id4 + in];
1904 for (
int ic = 0; ic <
m_Nc; ++ic) {
1905 int ic2 = ic *
m_Nvc;
1907 double wt1_r = mult_uv_r(&u[ic2 + ig], vt1, m_Nc);
1908 double wt1_i = mult_uv_i(&u[ic2 + ig], vt1, m_Nc);
1909 double wt2_r = mult_uv_r(&u[ic2 + ig], vt2, m_Nc);
1910 double wt2_i = mult_uv_i(&u[ic2 + ig], vt2, m_Nc);
1913 int ic_i = 2 * ic + 1;
1915 w2[ic_r + id1 + iv] += wt1_r;
1916 w2[ic_i + id1 + iv] += wt1_i;
1917 w2[ic_r + id2 + iv] += wt2_r;
1918 w2[ic_i + id2 + iv] += wt2_i;
1920 w2[ic_r + id3 + iv] += wt1_r;
1921 w2[ic_i + id3 + iv] += wt1_i;
1922 w2[ic_r + id4 + iv] += wt2_r;
1923 w2[ic_i + id4 + iv] += wt2_i;
1933 double *vcp1,
const double *v1)
1935 const int Nvc2 = 2 *
m_Nvc;
1937 const int Nvcd2 = Nvcd / 2;
1940 const int id2 =
m_Nvc;
1941 const int id3 =
m_Nvc * 2;
1942 const int id4 =
m_Nvc * 3;
1946 const int isite =
m_arg[itask].isite;
1947 const int isite_cp =
m_arg[itask].isite_cp_t;
1949 const double *w1 = &v1[Nvcd * isite];
1950 double *w2 = &vcp1[Nvcd2 * isite_cp];
1954 if (
m_arg[itask].kt1 == 1) {
1956 const int it =
m_Mt - 1;
1958 for (
int iz = 0; iz <
m_Mz; ++iz) {
1959 for (
int ixy = 0; ixy < Nxy; ++ixy) {
1960 int is = ixy + Nxy * (iz +
m_Nz * it);
1961 int is2 = ixy + Nxy * iz;
1963 int ig =
m_Ndf * is;
1964 int ix1 = Nvc2 * is2;
1965 int ix2 = ix1 +
m_Nvc;
1969 for (
int ic = 0; ic <
m_Nc; ++ic) {
1971 int ic_i = 2 * ic + 1;
1973 vt1[ic_r] = w1[ic_r + id1 + in] - w1[ic_r + id3 + in];
1974 vt1[ic_i] = w1[ic_i + id1 + in] - w1[ic_i + id3 + in];
1975 vt2[ic_r] = w1[ic_r + id2 + in] - w1[ic_r + id4 + in];
1976 vt2[ic_i] = w1[ic_i + id2 + in] - w1[ic_i + id4 + in];
1979 for (
int ic = 0; ic <
m_Nc; ++ic) {
1983 int ic_i = 2 * ic + 1;
1985 w2[ic_r + ix1] = mult_udagv_r(&u[ic2 + ig], vt1, m_Nc);
1986 w2[ic_i + ix1] = mult_udagv_i(&u[ic2 + ig], vt1, m_Nc);
1987 w2[ic_r + ix2] = mult_udagv_r(&u[ic2 + ig], vt2, m_Nc);
1988 w2[ic_i + ix2] = mult_udagv_i(&u[ic2 + ig], vt2, m_Nc);
1998 double *v2,
const double *vcp2)
2000 const int Nvc2 = 2 *
m_Nvc;
2002 const int Nvcd2 = Nvcd / 2;
2005 const int id2 =
m_Nvc;
2006 const int id3 =
m_Nvc * 2;
2007 const int id4 =
m_Nvc * 3;
2012 const int isite =
m_arg[itask].isite;
2013 const int isite_cp =
m_arg[itask].isite_cp_t;
2015 const double *w1 = &vcp2[Nvcd2 * isite_cp];
2016 double *w2 = &v2[Nvcd * isite];
2019 if (
m_arg[itask].kt0 == 1) {
2023 for (
int iz = 0; iz <
m_Mz; ++iz) {
2024 for (
int ixy = 0; ixy < Nxy; ++ixy) {
2025 int is = ixy + Nxy * (iz +
m_Nz * it);
2026 int is2 = ixy + Nxy * iz;
2028 int ix1 = Nvc2 * is2;
2029 int ix2 = ix1 +
m_Nvc;
2031 for (
int ic = 0; ic <
m_Nc; ++ic) {
2033 int ic_i = 2 * ic + 1;
2035 w2[ic_r + id1 + iv] += bc2 * w1[ic_r + ix1];
2036 w2[ic_i + id1 + iv] += bc2 * w1[ic_i + ix1];
2037 w2[ic_r + id2 + iv] += bc2 * w1[ic_r + ix2];
2038 w2[ic_i + id2 + iv] += bc2 * w1[ic_i + ix2];
2040 w2[ic_r + id3 + iv] -= bc2 * w1[ic_r + ix1];
2041 w2[ic_i + id3 + iv] -= bc2 * w1[ic_i + ix1];
2042 w2[ic_r + id4 + iv] -= bc2 * w1[ic_r + ix2];
2043 w2[ic_i + id4 + iv] -= bc2 * w1[ic_i + ix2];
2053 double *v2,
const double *v1)
2057 const int Nxyz =
m_Nx * m_Ny *
m_Nz;
2060 const int id2 =
m_Nvc;
2061 const int id3 =
m_Nvc * 2;
2062 const int id4 =
m_Nvc * 3;
2066 const int isite =
m_arg[itask].isite;
2067 const int kt0 =
m_arg[itask].kt0;
2069 const double *w1 = &v1[Nvcd * isite];
2070 double *w2 = &v2[Nvcd * isite];
2074 for (
int it = kt0; it <
m_Mt; ++it) {
2075 for (
int iz = 0; iz <
m_Mz; ++iz) {
2076 for (
int ixy = 0; ixy < Nxy; ++ixy) {
2077 int is = ixy + Nxy * (iz + m_Nz * it);
2079 int in = Nvcd * (is - Nxyz);
2080 int ig =
m_Ndf * (is - Nxyz);
2084 for (
int ic = 0; ic <
m_Nc; ++ic) {
2086 int ic_i = 2 * ic + 1;
2088 vt1[ic_r] = w1[ic_r + id1 + in] - w1[ic_r + id3 + in];
2089 vt1[ic_i] = w1[ic_i + id1 + in] - w1[ic_i + id3 + in];
2090 vt2[ic_r] = w1[ic_r + id2 + in] - w1[ic_r + id4 + in];
2091 vt2[ic_i] = w1[ic_i + id2 + in] - w1[ic_i + id4 + in];
2094 for (
int ic = 0; ic <
m_Nc; ++ic) {
2097 double wt1_r = mult_udagv_r(&u[ic2 + ig], vt1, m_Nc);
2098 double wt1_i = mult_udagv_i(&u[ic2 + ig], vt1, m_Nc);
2099 double wt2_r = mult_udagv_r(&u[ic2 + ig], vt2, m_Nc);
2100 double wt2_i = mult_udagv_i(&u[ic2 + ig], vt2, m_Nc);
2103 int ic_i = 2 * ic + 1;
2105 w2[ic_r + id1 + iv] += wt1_r;
2106 w2[ic_i + id1 + iv] += wt1_i;
2107 w2[ic_r + id2 + iv] += wt2_r;
2108 w2[ic_i + id2 + iv] += wt2_i;
2110 w2[ic_r + id3 + iv] -= wt1_r;
2111 w2[ic_i + id3 + iv] -= wt1_i;
2112 w2[ic_r + id4 + iv] -= wt2_r;
2113 w2[ic_i + id4 + iv] -= wt2_i;
2123 double *v2,
const double *v1)
2129 const int id2 =
m_Nvc;
2130 const int id3 =
m_Nvc * 2;
2131 const int id4 =
m_Nvc * 3;
2133 const int isite =
m_arg[itask].isite;
2135 const double *w1 = &v1[Nvcd * isite];
2136 double *w2 = &v2[Nvcd * isite];
2139 for (
int it = 0; it <
m_Mt; ++it) {
2140 for (
int iz = 0; iz <
m_Mz; ++iz) {
2141 for (
int ixy = 0; ixy < Nxy; ++ixy) {
2142 int iv = Nvcd * (ixy + Nxy * (iz +
m_Nz * it));
2144 for (
int ivc = 0; ivc <
m_Nvc; ++ivc) {
2145 w2[ivc + id1 + iv] = w1[ivc + id3 + iv];
2146 w2[ivc + id2 + iv] = w1[ivc + id4 + iv];
2147 w2[ivc + id3 + iv] = w1[ivc + id1 + iv];
2148 w2[ivc + id4 + iv] = w1[ivc + id2 + iv];
2158 double *v2,
const double *v1)
2164 const int id2 =
m_Nvc;
2165 const int id3 =
m_Nvc * 2;
2166 const int id4 =
m_Nvc * 3;
2168 const int isite =
m_arg[itask].isite;
2170 const double *w1 = &v1[Nvcd * isite];
2171 double *w2 = &v2[Nvcd * isite];
2174 for (
int it = 0; it <
m_Mt; ++it) {
2175 for (
int iz = 0; iz <
m_Mz; ++iz) {
2176 for (
int ixy = 0; ixy < Nxy; ++ixy) {
2177 int iv = Nvcd * (ixy + Nxy * (iz +
m_Nz * it));
2179 for (
int ivc = 0; ivc <
m_Nvc; ++ivc) {
2180 w2[ivc + id1 + iv] = w1[ivc + id1 + iv];
2181 w2[ivc + id2 + iv] = w1[ivc + id2 + iv];
2182 w2[ivc + id3 + iv] = -w1[ivc + id3 + iv];
2183 w2[ivc + id4 + iv] = -w1[ivc + id4 + iv];
void mult_x_minus2_thread(const int, double *, const double *)
void mult_z_minus_bulk_thread(const int, double *, const double *)
const double * ptr(const int jin, const int site, const int jex) const
void mult_x_plus_bulk_thread(const int, double *, const double *)
void clear_thread(const int, double *)
void mult_x_plus2_thread(const int, double *, const double *)
void mult_z_minus1_thread(const int, double *, const double *)
void general(const char *format,...)
void mult_t_minus2_chiral_thread(const int, double *, const double *)
Bridge::VerboseLevel m_vl
void mult_z_minus2_thread(const int, double *, const double *)
void mult_t_plus1_dirac_thread(const int, double *, const double *)
void mult_t_plus2_dirac_thread(const int, double *, const double *)
void mult_y_plus2_thread(const int, double *, const double *)
void mult_x_minus_bulk_thread(const int, double *, const double *)
void mult_x_plus1_thread(const int, double *, const double *)
void mult_t_minus2_dirac_thread(const int, double *, const double *)
void gm5_dirac_thread(const int, double *, const double *)
void mult_y_minus2_thread(const int, double *, const double *)
void mult_t_minus1_dirac_thread(const int, double *, const double *)
void mult_z_plus2_thread(const int, double *, const double *)
void daypx_thread(const int, double *, const double, const double *)
void daxpy_thread(const int, double *, const double, const double *)
void scal_thread(const int, double *, const double)
void gm5_chiral_thread(const int, double *, const double *)
void mult_z_plus_bulk_thread(const int, double *, const double *)
const Field_G * m_U
gauge configuration.
void mult_y_minus1_thread(const int, double *, const double *)
void mult_z_plus1_thread(const int, double *, const double *)
void mult_t_minus1_chiral_thread(const int, double *, const double *)
static int get_num_threads_available()
returns number of threads (works outside of parallel region).
void mult_x_minus1_thread(const int, double *, const double *)
void mult_t_plus_bulk_chiral_thread(const int, double *, const double *)
void crucial(const char *format,...)
std::vector< double > m_boundary_each_node
b.c. for each node.
void mult_y_plus_bulk_thread(const int, double *, const double *)
void mult_t_minus_bulk_dirac_thread(const int, double *, const double *)
void mult_t_minus_bulk_chiral_thread(const int, double *, const double *)
static const std::string class_name
void mult_t_plus1_chiral_thread(const int, double *, const double *)
void mult_t_plus2_chiral_thread(const int, double *, const double *)
std::vector< mult_arg > m_arg
void mult_t_plus_bulk_dirac_thread(const int, double *, const double *)
void mult_y_minus_bulk_thread(const int, double *, const double *)
void mult_y_plus1_thread(const int, double *, const double *)