22 #if defined USE_GROUP_SU3
23 #include "fopr_Wilson_impl_SU3.inc"
24 #elif defined USE_GROUP_SU2
25 #include "fopr_Wilson_impl_SU2.inc"
26 #elif defined USE_GROUP_SU_N
27 #include "fopr_Wilson_impl_SU_N.inc"
62 for (
int ith_t = 0; ith_t <
m_Ntask_t; ++ith_t) {
63 for (
int ith_z = 0; ith_z <
m_Ntask_z; ++ith_z) {
64 int itask = ith_z + m_Ntask_z * ith_t;
72 if (ith_t == 0)
m_arg[itask].kt0 = 1;
73 if (ith_z == 0)
m_arg[itask].kz0 = 1;
74 if (ith_t == m_Ntask_t - 1)
m_arg[itask].kt1 = 1;
75 if (ith_z == m_Ntask_z - 1)
m_arg[itask].kz1 = 1;
79 m_arg[itask].isite_cpz = ith_t *
m_Mt * Nxy2;
80 m_arg[itask].isite_cpt = ith_z *
m_Mz * Nxy2;
87 int Nvcd2 = 2 * Nc * Nd / 2;
89 std::vector<int> destid(
m_Ntask);
90 std::vector<int> offset(
m_Ntask);
91 std::vector<int> datasize(
m_Ntask);
92 std::vector<int> offset_up(
m_Ntask);
93 std::vector<int> offset_lw(
m_Ntask);
94 std::vector<int> datasize_up(
m_Ntask);
95 std::vector<int> datasize_lw(
m_Ntask);
98 for (
int ith_t = 0; ith_t <
m_Ntask_t; ++ith_t) {
99 for (
int ith_z = 0; ith_z <
m_Ntask_z; ++ith_z) {
101 int isite_cp = itask *
m_Mz *
m_Mt * (m_Ny / 2);
102 destid[itask] = itask;
103 offset[itask] =
sizeof(double) * Nvcd2 * isite_cp;
104 datasize[itask] =
sizeof(double) * Nvcd2 *
m_Mz *
m_Mt * (m_Ny / 2);
113 for (
int ith_t = 0; ith_t <
m_Ntask_t; ++ith_t) {
114 for (
int ith_z = 0; ith_z <
m_Ntask_z; ++ith_z) {
117 destid[itask] = itask;
118 offset[itask] =
sizeof(double) * Nvcd2 * isite_cp;
119 datasize[itask] =
sizeof(double) * Nvcd2 *
m_Mz *
m_Mt * m_Nx2;
128 for (
int ith_t = 0; ith_t <
m_Ntask_t; ++ith_t) {
129 for (
int ith_z = 0; ith_z <
m_Ntask_z; ++ith_z) {
130 int itask = ith_z + m_Ntask_z * ith_t;
132 offset_up[itask] = 0;
133 offset_lw[itask] = 0;
134 datasize_up[itask] = 0;
135 datasize_lw[itask] = 0;
137 destid[itask] = (m_Ntask_z - 1) + ith_t * m_Ntask_z;
138 offset_lw[itask] =
sizeof(double) * Nvcd2 * ith_t *
m_Mt *
m_Nx2 * m_Ny;
139 datasize_lw[itask] =
sizeof(double) * Nvcd2 *
m_Mt *
m_Nx2 * m_Ny;
141 if (ith_z == m_Ntask_z - 1) {
143 offset_up[itask] =
sizeof(double) * Nvcd2 * ith_t *
m_Mt *
m_Nx2 * m_Ny;
144 datasize_up[itask] =
sizeof(double) * Nvcd2 *
m_Mt *
m_Nx2 * m_Ny;
154 for (
int ith_t = 0; ith_t <
m_Ntask_t; ++ith_t) {
155 for (
int ith_z = 0; ith_z <
m_Ntask_z; ++ith_z) {
156 int itask = ith_z + m_Ntask_z * ith_t;
158 offset_up[itask] = 0;
159 offset_lw[itask] = 0;
160 datasize_up[itask] = 0;
161 datasize_lw[itask] = 0;
163 destid[itask] = ith_z + (m_Ntask_t - 1) * m_Ntask_z;
164 offset_lw[itask] =
sizeof(double) * Nvcd2 * ith_z *
m_Mz *
m_Nx2 * m_Ny;
165 datasize_lw[itask] =
sizeof(double) * Nvcd2 *
m_Mz *
m_Nx2 * m_Ny;
167 if (ith_t == m_Ntask_t - 1) {
168 destid[itask] = ith_z;
169 offset_up[itask] =
sizeof(double) * Nvcd2 * ith_z *
m_Mz *
m_Nx2 * m_Ny;
170 datasize_up[itask] =
sizeof(double) * Nvcd2 *
m_Mz *
m_Nx2 * m_Ny;
183 double *w,
double fac)
185 int Nvcd = m_Nvc *
m_Nd;
186 int Nvxy = Nvcd * m_Nx2 * m_Ny;
188 int isite = m_arg[itask].isite;
189 double *wp = &w[Nvcd * isite];
191 for (
int it = 0; it < m_Mt; ++it) {
192 for (
int iz = 0; iz < m_Mz; ++iz) {
193 for (
int ivxy = 0; ivxy < Nvxy; ++ivxy) {
194 int iv = ivxy + Nvxy * (iz + m_Nz * it);
195 wp[iv] = fac * wp[iv];
206 int Nvcd = m_Nvc *
m_Nd;
207 int Nvxy = Nvcd * m_Nx2 * m_Ny;
209 int isite = m_arg[itask].isite;
210 double *wp = &v[Nvcd * isite];
212 for (
int it = 0; it < m_Mt; ++it) {
213 for (
int iz = 0; iz < m_Mz; ++iz) {
214 for (
int ivxy = 0; ivxy < Nvxy; ++ivxy) {
215 int iv = ivxy + Nvxy * (iz + m_Nz * it);
225 int itask,
double *vcp1,
const double *
v1,
int ieo)
227 int Nvc2 = 2 * m_Nvc;
228 int Nvcd = m_Nvc *
m_Nd;
229 int Nvcd2 = Nvcd / 2;
238 int isite = m_arg[itask].isite;
239 int isite_cp = m_arg[itask].isite_cpx;
240 int iyzt0 = isite / m_Nx2;
244 = (
double *)m_bw_send[idir]->ptr(
sizeof(
double) * Nvcd2 * isite_cp);
245 const double *w1 = &v1[Nvcd * isite];
247 double bc2 = m_boundary2[idir];
252 for (
int it = 0; it < m_Mt; ++it) {
253 for (
int iz = 0; iz < m_Mz; ++iz) {
254 for (
int iy = 0; iy < m_Ny; ++iy) {
255 int iyzt = iy + m_Ny * (iz + m_Nz * it);
256 int Leo = ieo + (1 - 2 * ieo) * m_Leo[iyzt0 + iyzt];
258 int is = ix + m_Nx2 * iyzt;
261 int ix1 = Nvc2 * ibf;
262 int ix2 = ix1 + m_Nvc;
264 for (
int ic = 0; ic <
m_Nc; ++ic) {
265 w2[2 * ic + ix1] = bc2 * (w1[2 * ic + id1 + in] - w1[2 * ic + 1 + id4 + in]);
266 w2[2 * ic + 1 + ix1] = bc2 * (w1[2 * ic + 1 + id1 + in] + w1[2 * ic + id4 + in]);
267 w2[2 * ic + ix2] = bc2 * (w1[2 * ic + id2 + in] - w1[2 * ic + 1 + id3 + in]);
268 w2[2 * ic + 1 + ix2] = bc2 * (w1[2 * ic + 1 + id2 + in] + w1[2 * ic + id3 + in]);
276 m_bw_send[idir]->start_thread(itask);
282 int itask,
double *
v2,
const double *vcp2,
int ieo)
284 int Nvc2 = 2 * m_Nvc;
285 int Nvcd = m_Nvc *
m_Nd;
286 int Nvcd2 = Nvcd / 2;
295 double wt1r, wt1i, wt2r, wt2i;
297 int isite = m_arg[itask].isite;
298 int isite_cp = m_arg[itask].isite_cpx;
299 int iyzt0 = isite / m_Nx2;
301 double *w2 = &v2[Nvcd * isite];
304 = (
double *)m_bw_recv[idir]->ptr(
sizeof(
double) * Nvcd2 * isite_cp);
305 const double *u = m_U->ptr(m_Ndf * (isite + ieo *
m_Nvol / 2 + idir *
m_Nvol));
307 m_bw_recv[idir]->wait_thread(itask);
311 for (
int it = 0; it < m_Mt; ++it) {
312 for (
int iz = 0; iz < m_Mz; ++iz) {
313 for (
int iy = 0; iy < m_Ny; ++iy) {
314 int iyzt = iy + m_Ny * (iz + m_Nz * it);
315 int Leo = ieo + (1 - 2 * ieo) * m_Leo[iyzt0 + iyzt];
318 int is = ix + m_Nx2 * iyzt;
321 int ix1 = Nvc2 * ibf;
322 int ix2 = ix1 + m_Nvc;
324 for (
int ic = 0; ic <
m_Nc; ++ic) {
325 int ic2 = ic * m_Nvc;
326 wt1r = mult_uv_r(&u[ic2 + ig], &w1[ix1], m_Nc);
327 wt1i = mult_uv_i(&u[ic2 + ig], &w1[ix1], m_Nc);
328 wt2r = mult_uv_r(&u[ic2 + ig], &w1[ix2], m_Nc);
329 wt2i = mult_uv_i(&u[ic2 + ig], &w1[ix2], m_Nc);
330 w2[2 * ic + id1 + iv] += wt1r;
331 w2[2 * ic + 1 + id1 + iv] += wt1i;
332 w2[2 * ic + id2 + iv] += wt2r;
333 w2[2 * ic + 1 + id2 + iv] += wt2i;
334 w2[2 * ic + id3 + iv] += wt2i;
335 w2[2 * ic + 1 + id3 + iv] += -wt2r;
336 w2[2 * ic + id4 + iv] += wt1i;
337 w2[2 * ic + 1 + id4 + iv] += -wt1r;
349 int itask,
double *v2,
const double *v1,
int ieo)
351 int Nvcd = m_Nvc *
m_Nd;
360 double vt1[m_Nvc], vt2[m_Nvc];
361 double wt1r, wt1i, wt2r, wt2i;
363 int isite = m_arg[itask].isite;
364 int iyzt0 = isite / m_Nx2;
366 double *w2 = &v2[Nvcd * isite];
367 const double *w1 = &v1[Nvcd * isite];
368 const double *u = m_U->ptr(m_Ndf * (isite + ieo * m_Nvol / 2 + idir * m_Nvol));
370 for (
int it = 0; it < m_Mt; ++it) {
371 for (
int iz = 0; iz < m_Mz; ++iz) {
372 for (
int iy = 0; iy < m_Ny; ++iy) {
373 int iyzt = iy + m_Ny * (iz + m_Nz * it);
374 int Leo = ieo + (1 - 2 * ieo) * m_Leo[iyzt0 + iyzt];
375 for (
int ix = 0; ix < m_Nx2 - Leo; ++ix) {
376 int is = ix + m_Nx2 * iyzt;
378 int in = Nvcd * (is + Leo);
381 for (
int ic = 0; ic <
m_Nc; ++ic) {
382 vt1[2 * ic] = w1[2 * ic + id1 + in] - w1[2 * ic + 1 + id4 + in];
383 vt1[2 * ic + 1] = w1[2 * ic + 1 + id1 + in] + w1[2 * ic + id4 + in];
384 vt2[2 * ic] = w1[2 * ic + id2 + in] - w1[2 * ic + 1 + id3 + in];
385 vt2[2 * ic + 1] = w1[2 * ic + 1 + id2 + in] + w1[2 * ic + id3 + in];
388 for (
int ic = 0; ic <
m_Nc; ++ic) {
389 int ic2 = ic * m_Nvc;
391 wt1r = mult_uv_r(&u[ic2 + ig], vt1, m_Nc);
392 wt1i = mult_uv_i(&u[ic2 + ig], vt1, m_Nc);
393 wt2r = mult_uv_r(&u[ic2 + ig], vt2, m_Nc);
394 wt2i = mult_uv_i(&u[ic2 + ig], vt2, m_Nc);
396 w2[2 * ic + id1 + iv] += wt1r;
397 w2[2 * ic + 1 + id1 + iv] += wt1i;
398 w2[2 * ic + id2 + iv] += wt2r;
399 w2[2 * ic + 1 + id2 + iv] += wt2i;
400 w2[2 * ic + id3 + iv] += wt2i;
401 w2[2 * ic + 1 + id3 + iv] += -wt2r;
402 w2[2 * ic + id4 + iv] += wt1i;
403 w2[2 * ic + 1 + id4 + iv] += -wt1r;
414 int itask,
double *vcp1,
const double *v1,
int ieo)
416 int Nvc2 = 2 * m_Nvc;
417 int Nvcd = m_Nvc *
m_Nd;
418 int Nvcd2 = Nvcd / 2;
427 int isite = m_arg[itask].isite;
428 int isite_cp = m_arg[itask].isite_cpx;
429 int iyzt0 = isite / m_Nx2;
433 = (
double *)m_fw_send[idir]->ptr(
sizeof(
double) * Nvcd2 * isite_cp);
434 const double *w1 = &v1[Nvcd * isite];
435 const double *u = m_U->ptr(m_Ndf * (isite + (1 - ieo) * m_Nvol / 2 + idir * m_Nvol));
437 double vt1[m_Nvc], vt2[m_Nvc];
442 for (
int it = 0; it < m_Mt; ++it) {
443 for (
int iz = 0; iz < m_Mz; ++iz) {
444 for (
int iy = 0; iy < m_Ny; ++iy) {
445 int iyzt = iy + m_Ny * (iz + m_Nz * it);
446 int Leo = ieo + (1 - 2 * ieo) * m_Leo[iyzt0 + iyzt];
448 int is = ix + m_Nx2 * iyzt;
452 int ix1 = Nvc2 * ibf;
453 int ix2 = ix1 + m_Nvc;
455 for (
int ic = 0; ic <
m_Nc; ++ic) {
456 vt1[2 * ic] = w1[2 * ic + id1 + in] + w1[2 * ic + 1 + id4 + in];
457 vt1[2 * ic + 1] = w1[2 * ic + 1 + id1 + in] - w1[2 * ic + id4 + in];
458 vt2[2 * ic] = w1[2 * ic + id2 + in] + w1[2 * ic + 1 + id3 + in];
459 vt2[2 * ic + 1] = w1[2 * ic + 1 + id2 + in] - w1[2 * ic + id3 + in];
462 for (
int ic = 0; ic <
m_Nc; ++ic) {
464 w2[icr + ix1] = mult_udagv_r(&u[icr + ig], vt1, m_Nc);
465 w2[icr + 1 + ix1] = mult_udagv_i(&u[icr + ig], vt1, m_Nc);
466 w2[icr + ix2] = mult_udagv_r(&u[icr + ig], vt2, m_Nc);
467 w2[icr + 1 + ix2] = mult_udagv_i(&u[icr + ig], vt2, m_Nc);
475 m_fw_send[idir]->start_thread(itask);
481 int itask,
double *v2,
const double *vcp2,
int ieo)
483 int Nvc2 = 2 * m_Nvc;
484 int Nvcd = m_Nvc *
m_Nd;
485 int Nvcd2 = Nvcd / 2;
493 double bc2 = m_boundary2[idir];
495 double wt1r, wt1i, wt2r, wt2i;
497 int isite = m_arg[itask].isite;
498 int isite_cp = m_arg[itask].isite_cpx;
499 int iyzt0 = isite / m_Nx2;
501 double *w2 = &v2[Nvcd * isite];
504 = (
double *)m_fw_recv[idir]->ptr(
sizeof(
double) * Nvcd2 * isite_cp);
506 m_fw_recv[idir]->wait_thread(itask);
510 for (
int it = 0; it < m_Mt; ++it) {
511 for (
int iz = 0; iz < m_Mz; ++iz) {
512 for (
int iy = 0; iy < m_Ny; ++iy) {
513 int iyzt = iy + m_Ny * (iz + m_Nz * it);
514 int Leo = ieo + (1 - 2 * ieo) * m_Leo[iyzt0 + iyzt];
516 int is = ix + m_Nx2 * iyzt;
519 int ix1 = Nvc2 * ibf;
520 int ix2 = ix1 + m_Nvc;
522 for (
int ic = 0; ic <
m_Nc; ++ic) {
524 int ici = 2 * ic + 1;
525 w2[icr + id1 + iv] += bc2 * w1[icr + ix1];
526 w2[ici + id1 + iv] += bc2 * w1[ici + ix1];
527 w2[icr + id2 + iv] += bc2 * w1[icr + ix2];
528 w2[ici + id2 + iv] += bc2 * w1[ici + ix2];
529 w2[icr + id3 + iv] += -bc2 * w1[ici + ix2];
530 w2[ici + id3 + iv] += +bc2 * w1[icr + ix2];
531 w2[icr + id4 + iv] += -bc2 * w1[ici + ix1];
532 w2[ici + id4 + iv] += +bc2 * w1[icr + ix1];
544 int itask,
double *v2,
const double *v1,
int ieo)
546 int Nvcd = m_Nvc *
m_Nd;
555 double vt1[m_Nvc], vt2[m_Nvc];
556 double wt1r, wt1i, wt2r, wt2i;
558 int isite = m_arg[itask].isite;
559 int iyzt0 = isite / m_Nx2;
561 double *w2 = &v2[Nvcd * isite];
562 const double *w1 = &v1[Nvcd * isite];
563 const double *u = m_U->ptr(m_Ndf * (isite + (1 - ieo) * m_Nvol / 2 + idir * m_Nvol));
565 for (
int it = 0; it < m_Mt; ++it) {
566 for (
int iz = 0; iz < m_Mz; ++iz) {
567 for (
int iy = 0; iy < m_Ny; ++iy) {
568 int iyzt = iy + m_Ny * (iz + m_Nz * it);
569 int Leo = ieo + (1 - 2 * ieo) * m_Leo[iyzt0 + iyzt];
571 for (
int ix = Meo; ix < m_Nx2; ++ix) {
572 int is = ix + m_Nx2 * iyzt;
574 int in = Nvcd * (is -
Meo);
575 int ig = m_Ndf * (is -
Meo);
577 for (
int ic = 0; ic <
m_Nc; ++ic) {
578 vt1[2 * ic] = w1[2 * ic + id1 + in] + w1[2 * ic + 1 + id4 + in];
579 vt1[2 * ic + 1] = w1[2 * ic + 1 + id1 + in] - w1[2 * ic + id4 + in];
580 vt2[2 * ic] = w1[2 * ic + id2 + in] + w1[2 * ic + 1 + id3 + in];
581 vt2[2 * ic + 1] = w1[2 * ic + 1 + id2 + in] - w1[2 * ic + id3 + in];
584 for (
int ic = 0; ic <
m_Nc; ++ic) {
587 wt1r = mult_udagv_r(&u[ic2 + ig], vt1, m_Nc);
588 wt1i = mult_udagv_i(&u[ic2 + ig], vt1, m_Nc);
589 wt2r = mult_udagv_r(&u[ic2 + ig], vt2, m_Nc);
590 wt2i = mult_udagv_i(&u[ic2 + ig], vt2, m_Nc);
592 w2[2 * ic + id1 + iv] += wt1r;
593 w2[2 * ic + 1 + id1 + iv] += wt1i;
594 w2[2 * ic + id2 + iv] += wt2r;
595 w2[2 * ic + 1 + id2 + iv] += wt2i;
596 w2[2 * ic + id3 + iv] += -wt2i;
597 w2[2 * ic + 1 + id3 + iv] += +wt2r;
598 w2[2 * ic + id4 + iv] += -wt1i;
599 w2[2 * ic + 1 + id4 + iv] += +wt1r;
610 int itask,
double *vcp1,
const double *v1,
int ieo)
612 int Nvc2 = 2 * m_Nvc;
613 int Nvcd = m_Nvc *
m_Nd;
614 int Nvcd2 = Nvcd / 2;
623 int isite = m_arg[itask].isite;
624 int isite_cp = m_arg[itask].isite_cpy;
628 = (
double *)m_bw_send[idir]->ptr(
sizeof(
double) * Nvcd2 * isite_cp);
629 const double *w1 = &v1[Nvcd * isite];
631 double bc2 = m_boundary2[idir];
635 for (
int it = 0; it < m_Mt; ++it) {
636 for (
int iz = 0; iz < m_Mz; ++iz) {
637 for (
int ix = 0; ix < m_Nx2; ++ix) {
638 int is = ix + m_Nx2 * (iy + m_Ny * (iz + m_Nz * it));
639 int is2 = ix + m_Nx2 * (iz + m_Mz * it);
641 int ix1 = Nvc2 * is2;
642 int ix2 = ix1 + m_Nvc;
644 for (
int ic = 0; ic <
m_Nc; ++ic) {
645 w2[2 * ic + ix1] = bc2 * (w1[2 * ic + id1 + in] + w1[2 * ic + id4 + in]);
646 w2[2 * ic + 1 + ix1] = bc2 * (w1[2 * ic + 1 + id1 + in] + w1[2 * ic + 1 + id4 + in]);
647 w2[2 * ic + ix2] = bc2 * (w1[2 * ic + id2 + in] - w1[2 * ic + id3 + in]);
648 w2[2 * ic + 1 + ix2] = bc2 * (w1[2 * ic + 1 + id2 + in] - w1[2 * ic + 1 + id3 + in]);
654 m_bw_send[idir]->start_thread(itask);
660 int itask,
double *v2,
const double *vcp2,
int ieo)
662 int Nvc2 = 2 * m_Nvc;
663 int Nvcd = m_Nvc *
m_Nd;
664 int Nvcd2 = Nvcd / 2;
673 double wt1r, wt1i, wt2r, wt2i;
675 int isite = m_arg[itask].isite;
676 int isite_cp = m_arg[itask].isite_cpy;
678 double *w2 = &v2[Nvcd * isite];
681 = (
double *)m_bw_recv[idir]->ptr(
sizeof(
double) * Nvcd2 * isite_cp);
682 const double *u = m_U->ptr(m_Ndf * (isite + ieo * m_Nvol / 2 + idir * m_Nvol));
684 m_bw_recv[idir]->wait_thread(itask);
687 for (
int it = 0; it < m_Mt; ++it) {
688 for (
int iz = 0; iz < m_Mz; ++iz) {
689 for (
int ix = 0; ix < m_Nx2; ++ix) {
690 int is = ix + m_Nx2 * (iy + m_Ny * (iz + m_Nz * it));
691 int is2 = ix + m_Nx2 * (iz + m_Mz * it);
694 int ix1 = Nvc2 * is2;
695 int ix2 = ix1 + m_Nvc;
697 for (
int ic = 0; ic <
m_Nc; ++ic) {
698 int ic2 = ic * m_Nvc;
700 wt1r = mult_uv_r(&u[ic2 + ig], &w1[ix1], m_Nc);
701 wt1i = mult_uv_i(&u[ic2 + ig], &w1[ix1], m_Nc);
702 wt2r = mult_uv_r(&u[ic2 + ig], &w1[ix2], m_Nc);
703 wt2i = mult_uv_i(&u[ic2 + ig], &w1[ix2], m_Nc);
705 w2[2 * ic + id1 + iv] += wt1r;
706 w2[2 * ic + 1 + id1 + iv] += wt1i;
707 w2[2 * ic + id2 + iv] += wt2r;
708 w2[2 * ic + 1 + id2 + iv] += wt2i;
709 w2[2 * ic + id3 + iv] += -wt2r;
710 w2[2 * ic + 1 + id3 + iv] += -wt2i;
711 w2[2 * ic + id4 + iv] += wt1r;
712 w2[2 * ic + 1 + id4 + iv] += wt1i;
722 int itask,
double *v2,
const double *v1,
int ieo)
724 int Nvcd = m_Nvc *
m_Nd;
733 double vt1[m_Nvc], vt2[m_Nvc];
734 double wt1r, wt1i, wt2r, wt2i;
736 int isite = m_arg[itask].isite;
738 double *w2 = &v2[Nvcd * isite];
739 const double *w1 = &v1[Nvcd * isite];
740 const double *u = m_U->ptr(m_Ndf * (isite + ieo * m_Nvol / 2 + idir * m_Nvol));
742 for (
int it = 0; it < m_Mt; ++it) {
743 for (
int iz = 0; iz < m_Mz; ++iz) {
744 for (
int iy = 0; iy < m_Ny - 1; ++iy) {
745 for (
int ix = 0; ix < m_Nx2; ++ix) {
746 int is = ix + m_Nx2 * (iy + m_Ny * (iz + m_Nz * it));
748 int in = Nvcd * (is + m_Nx2);
751 for (
int ic = 0; ic <
m_Nc; ++ic) {
752 vt1[2 * ic] = w1[2 * ic + id1 + in] + w1[2 * ic + id4 + in];
753 vt1[2 * ic + 1] = w1[2 * ic + 1 + id1 + in] + w1[2 * ic + 1 + id4 + in];
754 vt2[2 * ic] = w1[2 * ic + id2 + in] - w1[2 * ic + id3 + in];
755 vt2[2 * ic + 1] = w1[2 * ic + 1 + id2 + in] - w1[2 * ic + 1 + id3 + in];
758 for (
int ic = 0; ic <
m_Nc; ++ic) {
759 int ic2 = ic * m_Nvc;
761 wt1r = mult_uv_r(&u[ic2 + ig], vt1, m_Nc);
762 wt1i = mult_uv_i(&u[ic2 + ig], vt1, m_Nc);
763 wt2r = mult_uv_r(&u[ic2 + ig], vt2, m_Nc);
764 wt2i = mult_uv_i(&u[ic2 + ig], vt2, m_Nc);
766 w2[2 * ic + id1 + iv] += wt1r;
767 w2[2 * ic + 1 + id1 + iv] += wt1i;
768 w2[2 * ic + id2 + iv] += wt2r;
769 w2[2 * ic + 1 + id2 + iv] += wt2i;
770 w2[2 * ic + id3 + iv] += -wt2r;
771 w2[2 * ic + 1 + id3 + iv] += -wt2i;
772 w2[2 * ic + id4 + iv] += wt1r;
773 w2[2 * ic + 1 + id4 + iv] += wt1i;
784 int itask,
double *vcp1,
const double *v1,
int ieo)
786 int Nvc2 = 2 * m_Nvc;
787 int Nvcd = m_Nvc *
m_Nd;
788 int Nvcd2 = Nvcd / 2;
797 int isite = m_arg[itask].isite;
798 int isite_cp = m_arg[itask].isite_cpy;
802 = (
double *)m_fw_send[idir]->ptr(
sizeof(
double) * Nvcd2 * isite_cp);
803 const double *w1 = &v1[Nvcd * isite];
804 const double *u = m_U->ptr(m_Ndf * (isite + (1 - ieo) * m_Nvol / 2 + idir * m_Nvol));
806 double vt1[m_Nvc], vt2[m_Nvc];
810 for (
int it = 0; it < m_Mt; ++it) {
811 for (
int iz = 0; iz < m_Mz; ++iz) {
812 for (
int ix = 0; ix < m_Nx2; ++ix) {
813 int is = ix + m_Nx2 * (iy + m_Ny * (iz + m_Nz * it));
814 int is2 = ix + m_Nx2 * (iz + m_Mz * it);
817 int ix1 = Nvc2 * is2;
818 int ix2 = ix1 + m_Nvc;
820 for (
int ic = 0; ic <
m_Nc; ++ic) {
821 vt1[2 * ic] = w1[2 * ic + id1 + in] - w1[2 * ic + id4 + in];
822 vt1[2 * ic + 1] = w1[2 * ic + 1 + id1 + in] - w1[2 * ic + 1 + id4 + in];
823 vt2[2 * ic] = w1[2 * ic + id2 + in] + w1[2 * ic + id3 + in];
824 vt2[2 * ic + 1] = w1[2 * ic + 1 + id2 + in] + w1[2 * ic + 1 + id3 + in];
827 for (
int ic = 0; ic <
m_Nc; ++ic) {
829 w2[icr + ix1] = mult_udagv_r(&u[icr + ig], vt1, m_Nc);
830 w2[icr + 1 + ix1] = mult_udagv_i(&u[icr + ig], vt1, m_Nc);
831 w2[icr + ix2] = mult_udagv_r(&u[icr + ig], vt2, m_Nc);
832 w2[icr + 1 + ix2] = mult_udagv_i(&u[icr + ig], vt2, m_Nc);
838 m_fw_send[idir]->start_thread(itask);
844 int itask,
double *v2,
const double *vcp2,
int ieo)
846 int Nvc2 = 2 * m_Nvc;
847 int Nvcd = m_Nvc *
m_Nd;
848 int Nvcd2 = Nvcd / 2;
856 double bc2 = m_boundary2[idir];
858 double wt1r, wt1i, wt2r, wt2i;
860 int isite = m_arg[itask].isite;
861 int isite_cp = m_arg[itask].isite_cpy;
863 double *w2 = &v2[Nvcd * isite];
866 = (
double *)m_fw_recv[idir]->ptr(
sizeof(
double) * Nvcd2 * isite_cp);
868 m_fw_recv[idir]->wait_thread(itask);
871 for (
int it = 0; it < m_Mt; ++it) {
872 for (
int iz = 0; iz < m_Mz; ++iz) {
873 for (
int ix = 0; ix < m_Nx2; ++ix) {
874 int is = ix + m_Nx2 * (iy + m_Ny * (iz + m_Nz * it));
875 int is2 = ix + m_Nx2 * (iz + m_Mz * it);
877 int ix1 = Nvc2 * is2;
878 int ix2 = ix1 + m_Nvc;
880 for (
int ic = 0; ic <
m_Nc; ++ic) {
882 int ici = 2 * ic + 1;
883 w2[icr + id1 + iv] += bc2 * w1[icr + ix1];
884 w2[ici + id1 + iv] += bc2 * w1[ici + ix1];
885 w2[icr + id2 + iv] += bc2 * w1[icr + ix2];
886 w2[ici + id2 + iv] += bc2 * w1[ici + ix2];
887 w2[icr + id3 + iv] += bc2 * w1[icr + ix2];
888 w2[ici + id3 + iv] += bc2 * w1[ici + ix2];
889 w2[icr + id4 + iv] += -bc2 * w1[icr + ix1];
890 w2[ici + id4 + iv] += -bc2 * w1[ici + ix1];
900 int itask,
double *v2,
const double *v1,
int ieo)
902 int Nvcd = m_Nvc *
m_Nd;
911 double vt1[m_Nvc], vt2[m_Nvc];
912 double wt1r, wt1i, wt2r, wt2i;
914 int isite = m_arg[itask].isite;
916 double *w2 = &v2[Nvcd * isite];
917 const double *w1 = &v1[Nvcd * isite];
918 const double *u = m_U->ptr(m_Ndf * (isite + (1 - ieo) * m_Nvol / 2 + idir * m_Nvol));
920 for (
int it = 0; it < m_Mt; ++it) {
921 for (
int iz = 0; iz < m_Mz; ++iz) {
922 for (
int iy = 1; iy < m_Ny; ++iy) {
923 for (
int ix = 0; ix < m_Nx2; ++ix) {
924 int is = ix + m_Nx2 * (iy + m_Ny * (iz + m_Nz * it));
926 int in = Nvcd * (is - m_Nx2);
927 int ig = m_Ndf * (is - m_Nx2);
929 for (
int ic = 0; ic <
m_Nc; ++ic) {
930 vt1[2 * ic] = w1[2 * ic + id1 + in] - w1[2 * ic + id4 + in];
931 vt1[2 * ic + 1] = w1[2 * ic + 1 + id1 + in] - w1[2 * ic + 1 + id4 + in];
932 vt2[2 * ic] = w1[2 * ic + id2 + in] + w1[2 * ic + id3 + in];
933 vt2[2 * ic + 1] = w1[2 * ic + 1 + id2 + in] + w1[2 * ic + 1 + id3 + in];
936 for (
int ic = 0; ic <
m_Nc; ++ic) {
938 wt1r = mult_udagv_r(&u[ic2 + ig], vt1, m_Nc);
939 wt1i = mult_udagv_i(&u[ic2 + ig], vt1, m_Nc);
940 wt2r = mult_udagv_r(&u[ic2 + ig], vt2, m_Nc);
941 wt2i = mult_udagv_i(&u[ic2 + ig], vt2, m_Nc);
943 w2[ic2 + id1 + iv] += wt1r;
944 w2[ic2 + 1 + id1 + iv] += wt1i;
945 w2[ic2 + id2 + iv] += wt2r;
946 w2[ic2 + 1 + id2 + iv] += wt2i;
947 w2[ic2 + id3 + iv] += wt2r;
948 w2[ic2 + 1 + id3 + iv] += wt2i;
949 w2[ic2 + id4 + iv] += -wt1r;
950 w2[ic2 + 1 + id4 + iv] += -wt1i;
961 int itask,
double *vcp1,
const double *v1,
int ieo)
963 int Nvc2 = 2 * m_Nvc;
964 int Nvcd = m_Nvc *
m_Nd;
965 int Nvcd2 = Nvcd / 2;
974 int isite = m_arg[itask].isite;
975 int isite_cp = m_arg[itask].isite_cpz;
979 = (
double *)m_bw_send[idir]->ptr(
sizeof(
double) * Nvcd2 * isite_cp);
980 const double *w1 = &v1[Nvcd * isite];
982 double bc2 = m_boundary2[idir];
984 if (m_arg[itask].kz0 == 1) {
985 int Nxy = m_Nx2 * m_Ny;
987 for (
int it = 0; it < m_Mt; ++it) {
988 for (
int ixy = 0; ixy < Nxy; ++ixy) {
989 int is = ixy + Nxy * (iz + m_Nz * it);
990 int is2 = ixy + Nxy * it;
993 int ix1 = Nvc2 * is2;
994 int ix2 = ix1 + m_Nvc;
996 for (
int ic = 0; ic <
m_Nc; ++ic) {
997 w2[2 * ic + ix1] = bc2 * (w1[2 * ic + id1 + in] - w1[2 * ic + 1 + id3 + in]);
998 w2[2 * ic + 1 + ix1] = bc2 * (w1[2 * ic + 1 + id1 + in] + w1[2 * ic + id3 + in]);
999 w2[2 * ic + ix2] = bc2 * (w1[2 * ic + id2 + in] + w1[2 * ic + 1 + id4 + in]);
1000 w2[2 * ic + 1 + ix2] = bc2 * (w1[2 * ic + 1 + id2 + in] - w1[2 * ic + id4 + in]);
1006 m_bw_send[idir]->start_thread(itask);
1012 int itask,
double *v2,
const double *vcp2,
int ieo)
1014 int Nvc2 = 2 * m_Nvc;
1015 int Nvcd = m_Nvc *
m_Nd;
1016 int Nvcd2 = Nvcd / 2;
1020 int id3 = m_Nvc * 2;
1021 int id4 = m_Nvc * 3;
1025 double wt1r, wt1i, wt2r, wt2i;
1027 int isite = m_arg[itask].isite;
1028 int isite_cp = m_arg[itask].isite_cpz;
1030 double *w2 = &v2[Nvcd * isite];
1033 = (
double *)m_bw_recv[idir]->ptr(
sizeof(
double) * Nvcd2 * isite_cp);
1034 const double *u = m_U->ptr(m_Ndf * (isite + ieo * m_Nvol / 2 + idir * m_Nvol));
1036 m_bw_recv[idir]->wait_thread(itask);
1038 if (m_arg[itask].kz1 == 1) {
1039 int Nxy = m_Nx2 * m_Ny;
1041 for (
int it = 0; it < m_Mt; ++it) {
1042 for (
int ixy = 0; ixy < Nxy; ++ixy) {
1043 int is = ixy + Nxy * (iz + m_Nz * it);
1044 int is2 = ixy + Nxy * it;
1046 int ig = m_Ndf * is;
1047 int ix1 = Nvc2 * is2;
1048 int ix2 = ix1 + m_Nvc;
1050 for (
int ic = 0; ic <
m_Nc; ++ic) {
1051 int ic2 = ic * m_Nvc;
1053 wt1r = mult_uv_r(&u[ic2 + ig], &w1[ix1], m_Nc);
1054 wt1i = mult_uv_i(&u[ic2 + ig], &w1[ix1], m_Nc);
1055 wt2r = mult_uv_r(&u[ic2 + ig], &w1[ix2], m_Nc);
1056 wt2i = mult_uv_i(&u[ic2 + ig], &w1[ix2], m_Nc);
1058 w2[2 * ic + id1 + iv] += wt1r;
1059 w2[2 * ic + 1 + id1 + iv] += wt1i;
1060 w2[2 * ic + id2 + iv] += wt2r;
1061 w2[2 * ic + 1 + id2 + iv] += wt2i;
1062 w2[2 * ic + id3 + iv] += wt1i;
1063 w2[2 * ic + 1 + id3 + iv] += -wt1r;
1064 w2[2 * ic + id4 + iv] += -wt2i;
1065 w2[2 * ic + 1 + id4 + iv] += wt2r;
1075 int itask,
double *v2,
const double *v1,
int ieo)
1077 int Nvcd = m_Nvc *
m_Nd;
1081 int id3 = m_Nvc * 2;
1082 int id4 = m_Nvc * 3;
1086 double vt1[m_Nvc], vt2[m_Nvc];
1087 double wt1r, wt1i, wt2r, wt2i;
1089 int isite = m_arg[itask].isite;
1091 double *w2 = &v2[Nvcd * isite];
1092 const double *w1 = &v1[Nvcd * isite];
1093 const double *u = m_U->ptr(m_Ndf * (isite + ieo * m_Nvol / 2 + idir * m_Nvol));
1095 int kz1 = m_arg[itask].kz1;
1096 int Nxy = m_Nx2 * m_Ny;
1098 for (
int it = 0; it < m_Mt; ++it) {
1099 for (
int iz = 0; iz < m_Mz - kz1; ++iz) {
1100 for (
int ixy = 0; ixy < Nxy; ++ixy) {
1101 int is = ixy + Nxy * (iz + m_Nz * it);
1103 int in = Nvcd * (is + Nxy);
1104 int ig = m_Ndf * is;
1106 for (
int ic = 0; ic <
m_Nc; ++ic) {
1107 vt1[2 * ic] = w1[2 * ic + id1 + in] - w1[2 * ic + 1 + id3 + in];
1108 vt1[2 * ic + 1] = w1[2 * ic + 1 + id1 + in] + w1[2 * ic + id3 + in];
1109 vt2[2 * ic] = w1[2 * ic + id2 + in] + w1[2 * ic + 1 + id4 + in];
1110 vt2[2 * ic + 1] = w1[2 * ic + 1 + id2 + in] - w1[2 * ic + id4 + in];
1113 for (
int ic = 0; ic <
m_Nc; ++ic) {
1114 int ic2 = ic * m_Nvc;
1116 wt1r = mult_uv_r(&u[ic2 + ig], vt1, m_Nc);
1117 wt1i = mult_uv_i(&u[ic2 + ig], vt1, m_Nc);
1118 wt2r = mult_uv_r(&u[ic2 + ig], vt2, m_Nc);
1119 wt2i = mult_uv_i(&u[ic2 + ig], vt2, m_Nc);
1121 w2[2 * ic + id1 + iv] += wt1r;
1122 w2[2 * ic + 1 + id1 + iv] += wt1i;
1123 w2[2 * ic + id2 + iv] += wt2r;
1124 w2[2 * ic + 1 + id2 + iv] += wt2i;
1125 w2[2 * ic + id3 + iv] += wt1i;
1126 w2[2 * ic + 1 + id3 + iv] += -wt1r;
1127 w2[2 * ic + id4 + iv] += -wt2i;
1128 w2[2 * ic + 1 + id4 + iv] += wt2r;
1138 int itask,
double *vcp1,
const double *v1,
int ieo)
1140 int Nvc2 = 2 * m_Nvc;
1141 int Nvcd = m_Nvc *
m_Nd;
1142 int Nvcd2 = Nvcd / 2;
1146 int id3 = m_Nvc * 2;
1147 int id4 = m_Nvc * 3;
1151 int isite = m_arg[itask].isite;
1152 int isite_cp = m_arg[itask].isite_cpz;
1156 = (
double *)m_fw_send[idir]->ptr(
sizeof(
double) * Nvcd2 * isite_cp);
1157 const double *w1 = &v1[Nvcd * isite];
1158 const double *u = m_U->ptr(m_Ndf * (isite + (1 - ieo) * m_Nvol / 2 + idir * m_Nvol));
1160 double vt1[m_Nvc], vt2[m_Nvc];
1162 if (m_arg[itask].kz1 == 1) {
1163 int Nxy = m_Nx2 * m_Ny;
1165 for (
int it = 0; it < m_Mt; ++it) {
1166 for (
int ixy = 0; ixy < Nxy; ++ixy) {
1167 int is = ixy + Nxy * (iz + m_Nz * it);
1168 int is2 = ixy + Nxy * it;
1170 int ig = m_Ndf * is;
1171 int ix1 = Nvc2 * is2;
1172 int ix2 = ix1 + m_Nvc;
1174 for (
int ic = 0; ic <
m_Nc; ++ic) {
1175 vt1[2 * ic] = w1[2 * ic + id1 + in] + w1[2 * ic + 1 + id3 + in];
1176 vt1[2 * ic + 1] = w1[2 * ic + 1 + id1 + in] - w1[2 * ic + id3 + in];
1177 vt2[2 * ic] = w1[2 * ic + id2 + in] - w1[2 * ic + 1 + id4 + in];
1178 vt2[2 * ic + 1] = w1[2 * ic + 1 + id2 + in] + w1[2 * ic + id4 + in];
1181 for (
int ic = 0; ic <
m_Nc; ++ic) {
1183 w2[icr + ix1] = mult_udagv_r(&u[icr + ig], vt1, m_Nc);
1184 w2[icr + 1 + ix1] = mult_udagv_i(&u[icr + ig], vt1, m_Nc);
1185 w2[icr + ix2] = mult_udagv_r(&u[icr + ig], vt2, m_Nc);
1186 w2[icr + 1 + ix2] = mult_udagv_i(&u[icr + ig], vt2, m_Nc);
1192 m_fw_send[idir]->start_thread(itask);
1198 int itask,
double *v2,
const double *vcp2,
int ieo)
1200 int Nvc2 = 2 * m_Nvc;
1201 int Nvcd = m_Nvc *
m_Nd;
1202 int Nvcd2 = Nvcd / 2;
1206 int id3 = m_Nvc * 2;
1207 int id4 = m_Nvc * 3;
1210 double bc2 = m_boundary2[idir];
1212 double wt1r, wt1i, wt2r, wt2i;
1214 int isite = m_arg[itask].isite;
1215 int isite_cp = m_arg[itask].isite_cpz;
1217 double *w2 = &v2[Nvcd * isite];
1220 = (
double *)m_fw_recv[idir]->ptr(
sizeof(
double) * Nvcd2 * isite_cp);
1222 m_fw_recv[idir]->wait_thread(itask);
1224 if (m_arg[itask].kz0 == 1) {
1225 int Nxy = m_Nx2 * m_Ny;
1228 for (
int it = 0; it < m_Mt; ++it) {
1229 for (
int ixy = 0; ixy < Nxy; ++ixy) {
1230 int is = ixy + Nxy * (iz + m_Nz * it);
1231 int is2 = ixy + Nxy * it;
1233 int ix1 = Nvc2 * is2;
1234 int ix2 = ix1 + m_Nvc;
1236 for (
int ic = 0; ic <
m_Nc; ++ic) {
1238 int ici = 2 * ic + 1;
1239 w2[icr + id1 + iv] += bc2 * w1[icr + ix1];
1240 w2[ici + id1 + iv] += bc2 * w1[ici + ix1];
1241 w2[icr + id2 + iv] += bc2 * w1[icr + ix2];
1242 w2[ici + id2 + iv] += bc2 * w1[ici + ix2];
1243 w2[icr + id3 + iv] += -bc2 * w1[ici + ix1];
1244 w2[ici + id3 + iv] += bc2 * w1[icr + ix1];
1245 w2[icr + id4 + iv] += bc2 * w1[ici + ix2];
1246 w2[ici + id4 + iv] += -bc2 * w1[icr + ix2];
1256 int itask,
double *v2,
const double *v1,
int ieo)
1258 int Nvcd = m_Nvc *
m_Nd;
1262 int id3 = m_Nvc * 2;
1263 int id4 = m_Nvc * 3;
1267 double vt1[m_Nvc], vt2[m_Nvc];
1268 double wt1r, wt1i, wt2r, wt2i;
1270 int isite = m_arg[itask].isite;
1272 double *w2 = &v2[Nvcd * isite];
1273 const double *w1 = &v1[Nvcd * isite];
1274 const double *u = m_U->ptr(m_Ndf * (isite + (1 - ieo) * m_Nvol / 2 + idir * m_Nvol));
1276 int kz0 = m_arg[itask].kz0;
1277 int Nxy = m_Nx2 * m_Ny;
1279 for (
int it = 0; it < m_Mt; ++it) {
1280 for (
int iz = kz0; iz < m_Mz; ++iz) {
1281 for (
int ixy = 0; ixy < Nxy; ++ixy) {
1282 int is = ixy + Nxy * (iz + m_Nz * it);
1284 int in = Nvcd * (is - Nxy);
1285 int ig = m_Ndf * (is - Nxy);
1287 for (
int ic = 0; ic <
m_Nc; ++ic) {
1288 vt1[2 * ic] = w1[2 * ic + id1 + in] + w1[2 * ic + 1 + id3 + in];
1289 vt1[2 * ic + 1] = w1[2 * ic + 1 + id1 + in] - w1[2 * ic + id3 + in];
1290 vt2[2 * ic] = w1[2 * ic + id2 + in] - w1[2 * ic + 1 + id4 + in];
1291 vt2[2 * ic + 1] = w1[2 * ic + 1 + id2 + in] + w1[2 * ic + id4 + in];
1294 for (
int ic = 0; ic <
m_Nc; ++ic) {
1296 wt1r = mult_udagv_r(&u[ic2 + ig], vt1, m_Nc);
1297 wt1i = mult_udagv_i(&u[ic2 + ig], vt1, m_Nc);
1298 wt2r = mult_udagv_r(&u[ic2 + ig], vt2, m_Nc);
1299 wt2i = mult_udagv_i(&u[ic2 + ig], vt2, m_Nc);
1301 w2[ic2 + id1 + iv] += wt1r;
1302 w2[ic2 + 1 + id1 + iv] += wt1i;
1303 w2[ic2 + id2 + iv] += wt2r;
1304 w2[ic2 + 1 + id2 + iv] += wt2i;
1305 w2[ic2 + id3 + iv] += -wt1i;
1306 w2[ic2 + 1 + id3 + iv] += wt1r;
1307 w2[ic2 + id4 + iv] += wt2i;
1308 w2[ic2 + 1 + id4 + iv] += -wt2r;
1318 int itask,
double *vcp1,
const double *v1,
int ieo)
1320 int Nvc2 = 2 * m_Nvc;
1321 int Nvcd = m_Nvc *
m_Nd;
1322 int Nvcd2 = Nvcd / 2;
1326 int id3 = m_Nvc * 2;
1327 int id4 = m_Nvc * 3;
1331 int isite = m_arg[itask].isite;
1332 int isite_cp = m_arg[itask].isite_cpt;
1336 = (
double *)m_bw_send[idir]->ptr(
sizeof(
double) * Nvcd2 * isite_cp);
1337 const double *w1 = &v1[Nvcd * isite];
1339 double bc2 = m_boundary2[idir];
1341 if (m_arg[itask].kt0 == 1) {
1342 int Nxy = m_Nx2 * m_Ny;
1344 for (
int iz = 0; iz < m_Mz; ++iz) {
1345 for (
int ixy = 0; ixy < Nxy; ++ixy) {
1346 int is = ixy + Nxy * (iz + m_Nz * it);
1347 int is2 = ixy + Nxy * iz;
1350 int ix1 = Nvc2 * is2;
1351 int ix2 = ix1 + m_Nvc;
1353 for (
int ic = 0; ic <
m_Nc; ++ic) {
1354 w2[2 * ic + ix1] = 2.0 * bc2 * w1[2 * ic + id3 + in];
1355 w2[2 * ic + 1 + ix1] = 2.0 * bc2 * w1[2 * ic + 1 + id3 + in];
1356 w2[2 * ic + ix2] = 2.0 * bc2 * w1[2 * ic + id4 + in];
1357 w2[2 * ic + 1 + ix2] = 2.0 * bc2 * w1[2 * ic + 1 + id4 + in];
1363 m_bw_send[idir]->start_thread(itask);
1369 int itask,
double *v2,
const double *vcp2,
int ieo)
1371 int Nvc2 = 2 * m_Nvc;
1372 int Nvcd = m_Nvc *
m_Nd;
1373 int Nvcd2 = Nvcd / 2;
1377 int id3 = m_Nvc * 2;
1378 int id4 = m_Nvc * 3;
1382 double wt1r, wt1i, wt2r, wt2i;
1384 int isite = m_arg[itask].isite;
1385 int isite_cp = m_arg[itask].isite_cpt;
1387 double *w2 = &v2[Nvcd * isite];
1390 = (
double *)m_bw_recv[idir]->ptr(
sizeof(
double) * Nvcd2 * isite_cp);
1391 const double *u = m_U->ptr(m_Ndf * (isite + ieo * m_Nvol / 2 + idir * m_Nvol));
1393 m_bw_recv[idir]->wait_thread(itask);
1395 if (m_arg[itask].kt1 == 1) {
1396 int Nxy = m_Nx2 * m_Ny;
1398 for (
int iz = 0; iz < m_Mz; ++iz) {
1399 for (
int ixy = 0; ixy < Nxy; ++ixy) {
1400 int is = ixy + Nxy * (iz + m_Nz * it);
1401 int is2 = ixy + Nxy * iz;
1403 int ig = m_Ndf * is;
1404 int ix1 = Nvc2 * is2;
1405 int ix2 = ix1 + m_Nvc;
1407 for (
int ic = 0; ic <
m_Nc; ++ic) {
1408 int ic2 = ic * m_Nvc;
1410 wt1r = mult_uv_r(&u[ic2 + ig], &w1[ix1], m_Nc);
1411 wt1i = mult_uv_i(&u[ic2 + ig], &w1[ix1], m_Nc);
1412 wt2r = mult_uv_r(&u[ic2 + ig], &w1[ix2], m_Nc);
1413 wt2i = mult_uv_i(&u[ic2 + ig], &w1[ix2], m_Nc);
1415 w2[2 * ic + id3 + iv] += wt1r;
1416 w2[2 * ic + 1 + id3 + iv] += wt1i;
1417 w2[2 * ic + id4 + iv] += wt2r;
1418 w2[2 * ic + 1 + id4 + iv] += wt2i;
1428 int itask,
double *v2,
const double *v1,
int ieo)
1430 int Nvcd = m_Nvc *
m_Nd;
1434 int id3 = m_Nvc * 2;
1435 int id4 = m_Nvc * 3;
1439 double vt1[m_Nvc], vt2[m_Nvc];
1440 double wt1r, wt1i, wt2r, wt2i;
1442 int isite = m_arg[itask].isite;
1444 double *w2 = &v2[Nvcd * isite];
1445 const double *w1 = &v1[Nvcd * isite];
1446 const double *u = m_U->ptr(m_Ndf * (isite + ieo * m_Nvol / 2 + idir * m_Nvol));
1448 int kt1 = m_arg[itask].kt1;
1449 int Nxy = m_Nx2 * m_Ny;
1450 int Nxyz = Nxy * m_Nz;
1452 for (
int it = 0; it < m_Mt - kt1; ++it) {
1453 for (
int iz = 0; iz < m_Mz; ++iz) {
1454 for (
int ixy = 0; ixy < Nxy; ++ixy) {
1455 int is = ixy + Nxy * (iz + m_Nz * it);
1457 int in = Nvcd * (is + Nxyz);
1458 int ig = m_Ndf * is;
1460 for (
int ic = 0; ic <
m_Nc; ++ic) {
1461 vt1[2 * ic] = 2.0 * w1[2 * ic + id3 + in];
1462 vt1[2 * ic + 1] = 2.0 * w1[2 * ic + 1 + id3 + in];
1463 vt2[2 * ic] = 2.0 * w1[2 * ic + id4 + in];
1464 vt2[2 * ic + 1] = 2.0 * w1[2 * ic + 1 + id4 + in];
1467 for (
int ic = 0; ic <
m_Nc; ++ic) {
1468 int ic2 = ic * m_Nvc;
1470 wt1r = mult_uv_r(&u[ic2 + ig], vt1, m_Nc);
1471 wt1i = mult_uv_i(&u[ic2 + ig], vt1, m_Nc);
1472 wt2r = mult_uv_r(&u[ic2 + ig], vt2, m_Nc);
1473 wt2i = mult_uv_i(&u[ic2 + ig], vt2, m_Nc);
1475 w2[2 * ic + id3 + iv] += wt1r;
1476 w2[2 * ic + 1 + id3 + iv] += wt1i;
1477 w2[2 * ic + id4 + iv] += wt2r;
1478 w2[2 * ic + 1 + id4 + iv] += wt2i;
1488 int itask,
double *vcp1,
const double *v1,
int ieo)
1490 int Nvc2 = 2 * m_Nvc;
1491 int Nvcd = m_Nvc *
m_Nd;
1492 int Nvcd2 = Nvcd / 2;
1496 int id3 = m_Nvc * 2;
1497 int id4 = m_Nvc * 3;
1501 int isite = m_arg[itask].isite;
1502 int isite_cp = m_arg[itask].isite_cpt;
1506 = (
double *)m_fw_send[idir]->ptr(
sizeof(
double) * Nvcd2 * isite_cp);
1507 const double *w1 = &v1[Nvcd * isite];
1508 const double *u = m_U->ptr(m_Ndf * (isite + (1 - ieo) * m_Nvol / 2 + idir * m_Nvol));
1510 double vt1[m_Nvc], vt2[m_Nvc];
1512 if (m_arg[itask].kt1 == 1) {
1513 int Nxy = m_Nx2 * m_Ny;
1515 for (
int iz = 0; iz < m_Mz; ++iz) {
1516 for (
int ixy = 0; ixy < Nxy; ++ixy) {
1517 int is = ixy + Nxy * (iz + m_Nz * it);
1518 int is2 = ixy + Nxy * iz;
1520 int ig = m_Ndf * is;
1521 int ix1 = Nvc2 * is2;
1522 int ix2 = ix1 + m_Nvc;
1524 for (
int ic = 0; ic <
m_Nc; ++ic) {
1525 vt1[2 * ic] = 2.0 * w1[2 * ic + id1 + in];
1526 vt1[2 * ic + 1] = 2.0 * w1[2 * ic + 1 + id1 + in];
1527 vt2[2 * ic] = 2.0 * w1[2 * ic + id2 + in];
1528 vt2[2 * ic + 1] = 2.0 * w1[2 * ic + 1 + id2 + in];
1531 for (
int ic = 0; ic <
m_Nc; ++ic) {
1533 w2[icr + ix1] = mult_udagv_r(&u[icr + ig], vt1, m_Nc);
1534 w2[icr + 1 + ix1] = mult_udagv_i(&u[icr + ig], vt1, m_Nc);
1535 w2[icr + ix2] = mult_udagv_r(&u[icr + ig], vt2, m_Nc);
1536 w2[icr + 1 + ix2] = mult_udagv_i(&u[icr + ig], vt2, m_Nc);
1542 m_fw_send[idir]->start_thread(itask);
1548 int itask,
double *v2,
const double *vcp2,
int ieo)
1550 int Nvc2 = 2 * m_Nvc;
1551 int Nvcd = m_Nvc *
m_Nd;
1552 int Nvcd2 = Nvcd / 2;
1556 int id3 = m_Nvc * 2;
1557 int id4 = m_Nvc * 3;
1560 double bc2 = m_boundary2[idir];
1562 double wt1r, wt1i, wt2r, wt2i;
1564 int isite = m_arg[itask].isite;
1565 int isite_cp = m_arg[itask].isite_cpt;
1567 double *w2 = &v2[Nvcd * isite];
1570 = (
double *)m_fw_recv[idir]->ptr(
sizeof(
double) * Nvcd2 * isite_cp);
1572 m_fw_recv[idir]->wait_thread(itask);
1574 if (m_arg[itask].kt0 == 1) {
1575 int Nxy = m_Nx2 * m_Ny;
1577 for (
int iz = 0; iz < m_Mz; ++iz) {
1578 for (
int ixy = 0; ixy < Nxy; ++ixy) {
1579 int is = ixy + Nxy * (iz + m_Nz * it);
1580 int is2 = ixy + Nxy * iz;
1582 int ix1 = Nvc2 * is2;
1583 int ix2 = ix1 + m_Nvc;
1585 for (
int ic = 0; ic <
m_Nc; ++ic) {
1587 int ici = 2 * ic + 1;
1588 w2[icr + id1 + iv] += bc2 * w1[icr + ix1];
1589 w2[ici + id1 + iv] += bc2 * w1[ici + ix1];
1590 w2[icr + id2 + iv] += bc2 * w1[icr + ix2];
1591 w2[ici + id2 + iv] += bc2 * w1[ici + ix2];
1601 int itask,
double *v2,
const double *v1,
int ieo)
1603 int Nvcd = m_Nvc *
m_Nd;
1607 int id3 = m_Nvc * 2;
1608 int id4 = m_Nvc * 3;
1612 double vt1[m_Nvc], vt2[m_Nvc];
1613 double wt1r, wt1i, wt2r, wt2i;
1615 int isite = m_arg[itask].isite;
1617 double *w2 = &v2[Nvcd * isite];
1618 const double *w1 = &v1[Nvcd * isite];
1619 const double *u = m_U->ptr(m_Ndf * (isite + (1 - ieo) * m_Nvol / 2 + idir * m_Nvol));
1621 int kt0 = m_arg[itask].kt0;
1622 int Nxy = m_Nx2 * m_Ny;
1623 int Nxyz = Nxy * m_Nz;
1625 for (
int it = kt0; it < m_Mt; ++it) {
1626 for (
int iz = 0; iz < m_Mz; ++iz) {
1627 for (
int ixy = 0; ixy < Nxy; ++ixy) {
1628 int is = ixy + Nxy * (iz + m_Nz * it);
1630 int in = Nvcd * (is - Nxyz);
1631 int ig = m_Ndf * (is - Nxyz);
1633 for (
int ic = 0; ic <
m_Nc; ++ic) {
1634 vt1[2 * ic] = 2.0 * w1[2 * ic + id1 + in];
1635 vt1[2 * ic + 1] = 2.0 * w1[2 * ic + 1 + id1 + in];
1636 vt2[2 * ic] = 2.0 * w1[2 * ic + id2 + in];
1637 vt2[2 * ic + 1] = 2.0 * w1[2 * ic + 1 + id2 + in];
1640 for (
int ic = 0; ic <
m_Nc; ++ic) {
1642 wt1r = mult_udagv_r(&u[ic2 + ig], vt1, m_Nc);
1643 wt1i = mult_udagv_i(&u[ic2 + ig], vt1, m_Nc);
1644 wt2r = mult_udagv_r(&u[ic2 + ig], vt2, m_Nc);
1645 wt2i = mult_udagv_i(&u[ic2 + ig], vt2, m_Nc);
1647 w2[ic2 + id1 + iv] += wt1r;
1648 w2[ic2 + 1 + id1 + iv] += wt1i;
1649 w2[ic2 + id2 + iv] += wt2r;
1650 w2[ic2 + 1 + id2 + iv] += wt2i;
1660 int itask,
double *vcp1,
const double *v1,
int ieo)
1662 int Nvc2 = 2 * m_Nvc;
1663 int Nvcd = m_Nvc *
m_Nd;
1664 int Nvcd2 = Nvcd / 2;
1668 int id3 = m_Nvc * 2;
1669 int id4 = m_Nvc * 3;
1673 int isite = m_arg[itask].isite;
1674 int isite_cp = m_arg[itask].isite_cpt;
1678 = (
double *)m_bw_send[idir]->ptr(
sizeof(
double) * Nvcd2 * isite_cp);
1679 const double *w1 = &v1[Nvcd * isite];
1681 double bc2 = m_boundary2[idir];
1683 if (m_arg[itask].kt0 == 1) {
1684 int Nxy = m_Nx2 * m_Ny;
1686 for (
int iz = 0; iz < m_Mz; ++iz) {
1687 for (
int ixy = 0; ixy < Nxy; ++ixy) {
1688 int is = ixy + Nxy * (iz + m_Nz * it);
1689 int is2 = ixy + Nxy * iz;
1692 int ix1 = Nvc2 * is2;
1693 int ix2 = ix1 + m_Nvc;
1695 for (
int ic = 0; ic <
m_Nc; ++ic) {
1696 w2[2 * ic + ix1] = bc2 * (w1[2 * ic + id1 + in] + w1[2 * ic + id3 + in]);
1697 w2[2 * ic + 1 + ix1] = bc2 * (w1[2 * ic + 1 + id1 + in] + w1[2 * ic + 1 + id3 + in]);
1698 w2[2 * ic + ix2] = bc2 * (w1[2 * ic + id2 + in] + w1[2 * ic + id4 + in]);
1699 w2[2 * ic + 1 + ix2] = bc2 * (w1[2 * ic + 1 + id2 + in] + w1[2 * ic + 1 + id4 + in]);
1705 m_bw_send[idir]->start_thread(itask);
1711 int itask,
double *v2,
const double *vcp2,
int ieo)
1713 int Nvc2 = 2 * m_Nvc;
1714 int Nvcd = m_Nvc *
m_Nd;
1715 int Nvcd2 = Nvcd / 2;
1719 int id3 = m_Nvc * 2;
1720 int id4 = m_Nvc * 3;
1724 double wt1r, wt1i, wt2r, wt2i;
1726 int isite = m_arg[itask].isite;
1727 int isite_cp = m_arg[itask].isite_cpt;
1729 double *w2 = &v2[Nvcd * isite];
1732 = (
double *)m_bw_recv[idir]->ptr(
sizeof(
double) * Nvcd2 * isite_cp);
1733 const double *u = m_U->ptr(m_Ndf * (isite + ieo * m_Nvol / 2 + idir * m_Nvol));
1735 m_bw_recv[idir]->wait_thread(itask);
1737 if (m_arg[itask].kt1 == 1) {
1738 int Nxy = m_Nx2 * m_Ny;
1740 for (
int iz = 0; iz < m_Mz; ++iz) {
1741 for (
int ixy = 0; ixy < Nxy; ++ixy) {
1742 int is = ixy + Nxy * (iz + m_Nz * it);
1743 int is2 = ixy + Nxy * iz;
1745 int ig = m_Ndf * is;
1746 int ix1 = Nvc2 * is2;
1747 int ix2 = ix1 + m_Nvc;
1749 for (
int ic = 0; ic <
m_Nc; ++ic) {
1750 int ic2 = ic * m_Nvc;
1752 wt1r = mult_uv_r(&u[ic2 + ig], &w1[ix1], m_Nc);
1753 wt1i = mult_uv_i(&u[ic2 + ig], &w1[ix1], m_Nc);
1754 wt2r = mult_uv_r(&u[ic2 + ig], &w1[ix2], m_Nc);
1755 wt2i = mult_uv_i(&u[ic2 + ig], &w1[ix2], m_Nc);
1757 w2[2 * ic + id1 + iv] += wt1r;
1758 w2[2 * ic + 1 + id1 + iv] += wt1i;
1759 w2[2 * ic + id2 + iv] += wt2r;
1760 w2[2 * ic + 1 + id2 + iv] += wt2i;
1761 w2[2 * ic + id3 + iv] += wt1r;
1762 w2[2 * ic + 1 + id3 + iv] += wt1i;
1763 w2[2 * ic + id4 + iv] += wt2r;
1764 w2[2 * ic + 1 + id4 + iv] += wt2i;
1774 int itask,
double *v2,
const double *v1,
int ieo)
1776 int Nvcd = m_Nvc *
m_Nd;
1780 int id3 = m_Nvc * 2;
1781 int id4 = m_Nvc * 3;
1785 double vt1[m_Nvc], vt2[m_Nvc];
1786 double wt1r, wt1i, wt2r, wt2i;
1788 int isite = m_arg[itask].isite;
1790 double *w2 = &v2[Nvcd * isite];
1791 const double *w1 = &v1[Nvcd * isite];
1792 const double *u = m_U->ptr(m_Ndf * (isite + ieo * m_Nvol / 2 + idir * m_Nvol));
1794 int kt1 = m_arg[itask].kt1;
1795 int Nxy = m_Nx2 * m_Ny;
1796 int Nxyz = Nxy * m_Nz;
1798 for (
int it = 0; it < m_Mt - kt1; ++it) {
1799 for (
int iz = 0; iz < m_Mz; ++iz) {
1800 for (
int ixy = 0; ixy < Nxy; ++ixy) {
1801 int is = ixy + Nxy * (iz + m_Nz * it);
1803 int in = Nvcd * (is + Nxyz);
1804 int ig = m_Ndf * is;
1806 for (
int ic = 0; ic <
m_Nc; ++ic) {
1807 vt1[2 * ic] = w1[2 * ic + id1 + in] + w1[2 * ic + id3 + in];
1808 vt1[2 * ic + 1] = w1[2 * ic + 1 + id1 + in] + w1[2 * ic + 1 + id3 + in];
1809 vt2[2 * ic] = w1[2 * ic + id2 + in] + w1[2 * ic + id4 + in];
1810 vt2[2 * ic + 1] = w1[2 * ic + 1 + id2 + in] + w1[2 * ic + 1 + id4 + in];
1813 for (
int ic = 0; ic <
m_Nc; ++ic) {
1814 int ic2 = ic * m_Nvc;
1816 wt1r = mult_uv_r(&u[ic2 + ig], vt1, m_Nc);
1817 wt1i = mult_uv_i(&u[ic2 + ig], vt1, m_Nc);
1818 wt2r = mult_uv_r(&u[ic2 + ig], vt2, m_Nc);
1819 wt2i = mult_uv_i(&u[ic2 + ig], vt2, m_Nc);
1821 w2[2 * ic + id1 + iv] += wt1r;
1822 w2[2 * ic + 1 + id1 + iv] += wt1i;
1823 w2[2 * ic + id2 + iv] += wt2r;
1824 w2[2 * ic + 1 + id2 + iv] += wt2i;
1825 w2[2 * ic + id3 + iv] += wt1r;
1826 w2[2 * ic + 1 + id3 + iv] += wt1i;
1827 w2[2 * ic + id4 + iv] += wt2r;
1828 w2[2 * ic + 1 + id4 + iv] += wt2i;
1838 int itask,
double *vcp1,
const double *v1,
int ieo)
1840 int Nvc2 = 2 * m_Nvc;
1841 int Nvcd = m_Nvc *
m_Nd;
1842 int Nvcd2 = Nvcd / 2;
1846 int id3 = m_Nvc * 2;
1847 int id4 = m_Nvc * 3;
1851 int isite = m_arg[itask].isite;
1852 int isite_cp = m_arg[itask].isite_cpt;
1856 = (
double *)m_fw_send[idir]->ptr(
sizeof(
double) * Nvcd2 * isite_cp);
1857 const double *w1 = &v1[Nvcd * isite];
1858 const double *u = m_U->ptr(m_Ndf * (isite + (1 - ieo) * m_Nvol / 2 + idir * m_Nvol));
1860 double vt1[m_Nvc], vt2[m_Nvc];
1862 if (m_arg[itask].kt1 == 1) {
1863 int Nxy = m_Nx2 * m_Ny;
1865 for (
int iz = 0; iz < m_Mz; ++iz) {
1866 for (
int ixy = 0; ixy < Nxy; ++ixy) {
1867 int is = ixy + Nxy * (iz + m_Nz * it);
1868 int is2 = ixy + Nxy * iz;
1870 int ig = m_Ndf * is;
1871 int ix1 = Nvc2 * is2;
1872 int ix2 = ix1 + m_Nvc;
1874 for (
int ic = 0; ic <
m_Nc; ++ic) {
1875 vt1[2 * ic] = w1[2 * ic + id1 + in] - w1[2 * ic + id3 + in];
1876 vt1[2 * ic + 1] = w1[2 * ic + 1 + id1 + in] - w1[2 * ic + 1 + id3 + in];
1877 vt2[2 * ic] = w1[2 * ic + id2 + in] - w1[2 * ic + id4 + in];
1878 vt2[2 * ic + 1] = w1[2 * ic + 1 + id2 + in] - w1[2 * ic + 1 + id4 + in];
1881 for (
int ic = 0; ic <
m_Nc; ++ic) {
1883 w2[icr + ix1] = mult_udagv_r(&u[icr + ig], vt1, m_Nc);
1884 w2[icr + 1 + ix1] = mult_udagv_i(&u[icr + ig], vt1, m_Nc);
1885 w2[icr + ix2] = mult_udagv_r(&u[icr + ig], vt2, m_Nc);
1886 w2[icr + 1 + ix2] = mult_udagv_i(&u[icr + ig], vt2, m_Nc);
1892 m_fw_send[idir]->start_thread(itask);
1898 int itask,
double *v2,
const double *vcp2,
int ieo)
1900 int Nvc2 = 2 * m_Nvc;
1901 int Nvcd = m_Nvc *
m_Nd;
1902 int Nvcd2 = Nvcd / 2;
1906 int id3 = m_Nvc * 2;
1907 int id4 = m_Nvc * 3;
1910 double bc2 = m_boundary2[idir];
1912 double wt1r, wt1i, wt2r, wt2i;
1914 int isite = m_arg[itask].isite;
1915 int isite_cp = m_arg[itask].isite_cpt;
1917 double *w2 = &v2[Nvcd * isite];
1920 = (
double *)m_fw_recv[idir]->ptr(
sizeof(
double) * Nvcd2 * isite_cp);
1922 m_fw_recv[idir]->wait_thread(itask);
1924 if (m_arg[itask].kt0 == 1) {
1925 int Nxy = m_Nx2 * m_Ny;
1927 for (
int iz = 0; iz < m_Mz; ++iz) {
1928 for (
int ixy = 0; ixy < Nxy; ++ixy) {
1929 int is = ixy + Nxy * (iz + m_Nz * it);
1930 int is2 = ixy + Nxy * iz;
1932 int ix1 = Nvc2 * is2;
1933 int ix2 = ix1 + m_Nvc;
1935 for (
int ic = 0; ic <
m_Nc; ++ic) {
1937 int ici = 2 * ic + 1;
1938 w2[icr + id1 + iv] += bc2 * w1[icr + ix1];
1939 w2[ici + id1 + iv] += bc2 * w1[ici + ix1];
1940 w2[icr + id2 + iv] += bc2 * w1[icr + ix2];
1941 w2[ici + id2 + iv] += bc2 * w1[ici + ix2];
1942 w2[icr + id3 + iv] -= bc2 * w1[icr + ix1];
1943 w2[ici + id3 + iv] -= bc2 * w1[ici + ix1];
1944 w2[icr + id4 + iv] -= bc2 * w1[icr + ix2];
1945 w2[ici + id4 + iv] -= bc2 * w1[ici + ix2];
1955 int itask,
double *v2,
const double *v1,
int ieo)
1957 int Nvcd = m_Nvc *
m_Nd;
1961 int id3 = m_Nvc * 2;
1962 int id4 = m_Nvc * 3;
1966 double vt1[m_Nvc], vt2[m_Nvc];
1967 double wt1r, wt1i, wt2r, wt2i;
1969 int isite = m_arg[itask].isite;
1971 double *w2 = &v2[Nvcd * isite];
1972 const double *w1 = &v1[Nvcd * isite];
1973 const double *u = m_U->ptr(m_Ndf * (isite + (1 - ieo) * m_Nvol / 2 + idir * m_Nvol));
1975 int kt0 = m_arg[itask].kt0;
1976 int Nxy = m_Nx2 * m_Ny;
1977 int Nxyz = Nxy * m_Nz;
1979 for (
int it = kt0; it < m_Mt; ++it) {
1980 for (
int iz = 0; iz < m_Mz; ++iz) {
1981 for (
int ixy = 0; ixy < Nxy; ++ixy) {
1982 int is = ixy + Nxy * (iz + m_Nz * it);
1984 int in = Nvcd * (is - Nxyz);
1985 int ig = m_Ndf * (is - Nxyz);
1987 for (
int ic = 0; ic <
m_Nc; ++ic) {
1988 vt1[2 * ic] = w1[2 * ic + id1 + in] - w1[2 * ic + id3 + in];
1989 vt1[2 * ic + 1] = w1[2 * ic + 1 + id1 + in] - w1[2 * ic + 1 + id3 + in];
1990 vt2[2 * ic] = w1[2 * ic + id2 + in] - w1[2 * ic + id4 + in];
1991 vt2[2 * ic + 1] = w1[2 * ic + 1 + id2 + in] - w1[2 * ic + 1 + id4 + in];
1994 for (
int ic = 0; ic <
m_Nc; ++ic) {
1996 wt1r = mult_udagv_r(&u[ic2 + ig], vt1, m_Nc);
1997 wt1i = mult_udagv_i(&u[ic2 + ig], vt1, m_Nc);
1998 wt2r = mult_udagv_r(&u[ic2 + ig], vt2, m_Nc);
1999 wt2i = mult_udagv_i(&u[ic2 + ig], vt2, m_Nc);
2001 w2[ic2 + id1 + iv] += wt1r;
2002 w2[ic2 + 1 + id1 + iv] += wt1i;
2003 w2[ic2 + id2 + iv] += wt2r;
2004 w2[ic2 + 1 + id2 + iv] += wt2i;
2005 w2[ic2 + id3 + iv] -= wt1r;
2006 w2[ic2 + 1 + id3 + iv] -= wt1i;
2007 w2[ic2 + id4 + iv] -= wt2r;
2008 w2[ic2 + 1 + id4 + iv] -= wt2i;
2018 int itask,
double *v2,
const double *v1)
2020 int Nvcd = m_Nvc *
m_Nd;
2021 int Nxy = m_Nx2 * m_Ny;
2025 int id3 = m_Nvc * 2;
2026 int id4 = m_Nvc * 3;
2028 int isite = m_arg[itask].isite;
2029 double *w2 = &v2[Nvcd * isite];
2030 const double *w1 = &v1[Nvcd * isite];
2032 for (
int it = 0; it < m_Mt; ++it) {
2033 for (
int iz = 0; iz < m_Mz; ++iz) {
2034 for (
int ixy = 0; ixy < Nxy; ++ixy) {
2035 int iv = Nvcd * (ixy + Nxy * (iz + m_Nz * it));
2036 for (
int ivc = 0; ivc < m_Nvc; ++ivc) {
2037 w2[ivc + id1 + iv] = w1[ivc + id3 + iv];
2038 w2[ivc + id2 + iv] = w1[ivc + id4 + iv];
2039 w2[ivc + id3 + iv] = w1[ivc + id1 + iv];
2040 w2[ivc + id4 + iv] = w1[ivc + id2 + iv];
2050 int itask,
double *v2,
const double *v1)
2052 int Nvcd = m_Nvc *
m_Nd;
2053 int Nxy = m_Nx2 * m_Ny;
2057 int id3 = m_Nvc * 2;
2058 int id4 = m_Nvc * 3;
2060 int isite = m_arg[itask].isite;
2061 double *w2 = &v2[Nvcd * isite];
2062 const double *w1 = &v1[Nvcd * isite];
2064 for (
int it = 0; it < m_Mt; ++it) {
2065 for (
int iz = 0; iz < m_Mz; ++iz) {
2066 for (
int ixy = 0; ixy < Nxy; ++ixy) {
2067 int iv = Nvcd * (ixy + Nxy * (iz + m_Nz * it));
2068 for (
int ivc = 0; ivc < m_Nvc; ++ivc) {
2069 w2[ivc + id1 + iv] = w1[ivc + id1 + iv];
2070 w2[ivc + id2 + iv] = w1[ivc + id2 + iv];
2071 w2[ivc + id3 + iv] = -w1[ivc + id3 + iv];
2072 w2[ivc + id4 + iv] = -w1[ivc + id4 + iv];
2084 int Nvcd = m_Nvc *
m_Nd;
2085 int Nxy = m_Nx2 * m_Ny;
2089 int id3 = m_Nvc * 2;
2090 int id4 = m_Nvc * 3;
2092 int isite = m_arg[itask].isite;
2093 double *w1 = &v1[Nvcd * isite];
2095 for (
int it = 0; it < m_Mt; ++it) {
2096 for (
int iz = 0; iz < m_Mz; ++iz) {
2097 for (
int ixy = 0; ixy < Nxy; ++ixy) {
2098 int iv = Nvcd * (ixy + Nxy * (iz + m_Nz * it));
2099 for (
int ivc = 0; ivc < m_Nvc; ++ivc) {
2100 double wt1 = w1[ivc + id1 + iv];
2101 double wt2 = w1[ivc + id2 + iv];
2102 w1[ivc + id1 + iv] = w1[ivc + id3 + iv];
2103 w1[ivc + id2 + iv] = w1[ivc + id4 + iv];
2104 w1[ivc + id3 + iv] = wt1;
2105 w1[ivc + id4 + iv] = wt2;
2117 int Nvcd = m_Nvc *
m_Nd;
2118 int Nxy = m_Nx2 * m_Ny;
2122 int id3 = m_Nvc * 2;
2123 int id4 = m_Nvc * 3;
2125 int isite = m_arg[itask].isite;
2126 double *w1 = &v1[Nvcd * isite];
2128 for (
int it = 0; it < m_Mt; ++it) {
2129 for (
int iz = 0; iz < m_Mz; ++iz) {
2130 for (
int ixy = 0; ixy < Nxy; ++ixy) {
2131 int iv = Nvcd * (ixy + Nxy * (iz + m_Nz * it));
2132 for (
int ivc = 0; ivc < m_Nvc; ++ivc) {
2133 w1[ivc + id3 + iv] = -w1[ivc + id3 + iv];
2134 w1[ivc + id4 + iv] = -w1[ivc + id4 + iv];
void mult_xpb_thread(int, double *, const double *, int)
void mult_tpb_dirac_thread(int, double *, const double *, int)
void mult_tmb_chiral_thread(int, double *, const double *, int)
void mult_tp2_chiral_thread(int, double *, const double *, int)
void mult_ym1_thread(int, double *, const double *, int)
void mult_ymb_thread(int, double *, const double *, int)
void mult_tmb_dirac_thread(int, double *, const double *, int)
void general(const char *format,...)
void mult_xm2_thread(int, double *, const double *, int)
void mult_zm2_thread(int, double *, const double *, int)
void mult_zm1_thread(int, double *, const double *, int)
void clear_thread(int, double *)
void mult_zp1_thread(int, double *, const double *, int)
void mult_tm2_chiral_thread(int, double *, const double *, int)
void mult_tp2_dirac_thread(int, double *, const double *, int)
void mult_ypb_thread(int, double *, const double *, int)
void mult_tm1_dirac_thread(int, double *, const double *, int)
void mult_tm2_dirac_thread(int, double *, const double *, int)
void mult_tp1_chiral_thread(int, double *, const double *, int)
void mult_yp1_thread(int, double *, const double *, int)
void mult_zmb_thread(int, double *, const double *, int)
std::vector< Channel * > m_bw_recv
static int get_num_threads_available()
returns number of threads (works outside of parallel region).
void mult_zpb_thread(int, double *, const double *, int)
void mult_xm1_thread(int, double *, const double *, int)
void mult_yp2_thread(int, double *, const double *, int)
void crucial(const char *format,...)
void mult_tm1_chiral_thread(int, double *, const double *, int)
std::vector< mult_arg > m_arg
void mult_xmb_thread(int, double *, const double *, int)
std::vector< Channel * > m_fw_send
std::vector< Channel * > m_fw_recv
void gm5_dirac_thread(int, double *, const double *)
void Meo(Field &, const Field &, const int ieo)
even-odd operatior: ieo=0: even <– odd, ieo=1: odd <– even
void mult_tp1_dirac_thread(int, double *, const double *, int)
Bridge::VerboseLevel m_vl
void mult_zp2_thread(int, double *, const double *, int)
void mult_tpb_chiral_thread(int, double *, const double *, int)
std::vector< Channel * > m_bw_send
void mult_xp2_thread(int, double *, const double *, int)
void scal_thread(int, double *, double)
void mult_ym2_thread(int, double *, const double *, int)
void gm5_chiral_thread(int, double *, const double *)
void mult_xp1_thread(int, double *, const double *, int)