19 #if defined USE_GROUP_SU3
21 #elif defined USE_GROUP_SU2
23 #elif defined USE_GROUP_SU_N
60 vout.
crucial(
"Error in %s: irrelevant mult mode = %s\n",
140 vout.
crucial(
"Error at %s: input parameter not found.\n",
152 const std::vector<int> bc)
154 assert(bc.size() ==
m_Ndim);
169 for (
int mu = 0; mu <
m_Ndim; ++mu) {
265 if (ith == 0)
m_mode = mode;
296 }
else if (
m_repr ==
"Chiral") {
308 const double *v1 = f.
ptr(0);
309 double *v2 = w.
ptr(0);
313 const int id3 = Nvc * 2;
314 const int id4 = Nvc * 3;
320 const int is =
m_Nvol * i_thread / Nthread;
321 const int ns =
m_Nvol * (i_thread + 1) / Nthread - is;
323 for (
int site = is; site < is + ns; ++site) {
325 for (
int icc = 0; icc < Nvc; icc++) {
326 int in = Nvc * Nd * site;
328 v2[icc + id1 + in] = v1[icc + id3 + in];
329 v2[icc + id2 + in] = v1[icc + id4 + in];
330 v2[icc + id3 + in] = v1[icc + id1 + in];
331 v2[icc + id4 + in] = v1[icc + id2 + in];
343 const double *v1 = f.
ptr(0);
344 double *v2 = w.
ptr(0);
348 const int id3 = Nvc * 2;
349 const int id4 = Nvc * 3;
355 const int is =
m_Nvol * i_thread / Nthread;
356 const int ns =
m_Nvol * (i_thread + 1) / Nthread - is;
358 for (
int site = is; site < is + ns; ++site) {
360 for (
int icc = 0; icc < Nvc; icc++) {
361 int in = Nvc * Nd * site;
363 v2[icc + id1 + in] = v1[icc + id1 + in];
364 v2[icc + id2 + in] = v1[icc + id2 + in];
365 v2[icc + id3 + in] = -v1[icc + id3 + in];
366 v2[icc + id4 + in] = -v1[icc + id4 + in];
374 const int mu,
const int nu)
399 }
else if (
m_repr ==
"Chiral") {
410 assert(w.
nex() == 1);
413 const int Nvc = 2 * Nc;
414 const int Ndf = 2 * Nc * Nc;
416 const int Nvol = w.
nvol();
420 const int id3 = Nvc * 2;
421 const int id4 = Nvc * 3;
425 const double *w2 = w.
ptr(0);
426 double *v2 = v.
ptr(0);
439 const int is =
m_Nvol * i_thread / Nthread;
440 const int ns =
m_Nvol * (i_thread + 1) / Nthread - is;
442 for (
int site = is; site < is + ns; ++site) {
443 int iv = Nvc * Nd * site;
446 for (
int ic = 0; ic < Nc; ++ic) {
449 int ic_g = ic * Nvc + ig;
451 v2[ic_r + id1 + iv] = 0.0;
452 v2[ic_i + id1 + iv] = 0.0;
453 v2[ic_r + id2 + iv] = 0.0;
454 v2[ic_i + id2 + iv] = 0.0;
456 v2[ic_r + id3 + iv] = 0.0;
457 v2[ic_i + id3 + iv] = 0.0;
458 v2[ic_r + id4 + iv] = 0.0;
459 v2[ic_i + id4 + iv] = 0.0;
462 v2[ic_r + id1 + iv] -= mult_uv_i(&Bx[ic_g], &w2[id2 + iv], Nc);
463 v2[ic_i + id1 + iv] += mult_uv_r(&Bx[ic_g], &w2[id2 + iv], Nc);
464 v2[ic_r + id2 + iv] -= mult_uv_i(&Bx[ic_g], &w2[id1 + iv], Nc);
465 v2[ic_i + id2 + iv] += mult_uv_r(&Bx[ic_g], &w2[id1 + iv], Nc);
467 v2[ic_r + id3 + iv] -= mult_uv_i(&Bx[ic_g], &w2[id4 + iv], Nc);
468 v2[ic_i + id3 + iv] += mult_uv_r(&Bx[ic_g], &w2[id4 + iv], Nc);
469 v2[ic_r + id4 + iv] -= mult_uv_i(&Bx[ic_g], &w2[id3 + iv], Nc);
470 v2[ic_i + id4 + iv] += mult_uv_r(&Bx[ic_g], &w2[id3 + iv], Nc);
473 v2[ic_r + id1 + iv] += mult_uv_r(&By[ic_g], &w2[id2 + iv], Nc);
474 v2[ic_i + id1 + iv] += mult_uv_i(&By[ic_g], &w2[id2 + iv], Nc);
475 v2[ic_r + id2 + iv] -= mult_uv_r(&By[ic_g], &w2[id1 + iv], Nc);
476 v2[ic_i + id2 + iv] -= mult_uv_i(&By[ic_g], &w2[id1 + iv], Nc);
478 v2[ic_r + id3 + iv] += mult_uv_r(&By[ic_g], &w2[id4 + iv], Nc);
479 v2[ic_i + id3 + iv] += mult_uv_i(&By[ic_g], &w2[id4 + iv], Nc);
480 v2[ic_r + id4 + iv] -= mult_uv_r(&By[ic_g], &w2[id3 + iv], Nc);
481 v2[ic_i + id4 + iv] -= mult_uv_i(&By[ic_g], &w2[id3 + iv], Nc);
484 v2[ic_r + id1 + iv] -= mult_uv_i(&Bz[ic_g], &w2[id1 + iv], Nc);
485 v2[ic_i + id1 + iv] += mult_uv_r(&Bz[ic_g], &w2[id1 + iv], Nc);
486 v2[ic_r + id2 + iv] += mult_uv_i(&Bz[ic_g], &w2[id2 + iv], Nc);
487 v2[ic_i + id2 + iv] -= mult_uv_r(&Bz[ic_g], &w2[id2 + iv], Nc);
489 v2[ic_r + id3 + iv] -= mult_uv_i(&Bz[ic_g], &w2[id3 + iv], Nc);
490 v2[ic_i + id3 + iv] += mult_uv_r(&Bz[ic_g], &w2[id3 + iv], Nc);
491 v2[ic_r + id4 + iv] += mult_uv_i(&Bz[ic_g], &w2[id4 + iv], Nc);
492 v2[ic_i + id4 + iv] -= mult_uv_r(&Bz[ic_g], &w2[id4 + iv], Nc);
495 v2[ic_r + id1 + iv] += mult_uv_i(&Ex[ic_g], &w2[id4 + iv], Nc);
496 v2[ic_i + id1 + iv] -= mult_uv_r(&Ex[ic_g], &w2[id4 + iv], Nc);
497 v2[ic_r + id2 + iv] += mult_uv_i(&Ex[ic_g], &w2[id3 + iv], Nc);
498 v2[ic_i + id2 + iv] -= mult_uv_r(&Ex[ic_g], &w2[id3 + iv], Nc);
500 v2[ic_r + id3 + iv] += mult_uv_i(&Ex[ic_g], &w2[id2 + iv], Nc);
501 v2[ic_i + id3 + iv] -= mult_uv_r(&Ex[ic_g], &w2[id2 + iv], Nc);
502 v2[ic_r + id4 + iv] += mult_uv_i(&Ex[ic_g], &w2[id1 + iv], Nc);
503 v2[ic_i + id4 + iv] -= mult_uv_r(&Ex[ic_g], &w2[id1 + iv], Nc);
506 v2[ic_r + id1 + iv] -= mult_uv_r(&Ey[ic_g], &w2[id4 + iv], Nc);
507 v2[ic_i + id1 + iv] -= mult_uv_i(&Ey[ic_g], &w2[id4 + iv], Nc);
508 v2[ic_r + id2 + iv] += mult_uv_r(&Ey[ic_g], &w2[id3 + iv], Nc);
509 v2[ic_i + id2 + iv] += mult_uv_i(&Ey[ic_g], &w2[id3 + iv], Nc);
511 v2[ic_r + id3 + iv] -= mult_uv_r(&Ey[ic_g], &w2[id2 + iv], Nc);
512 v2[ic_i + id3 + iv] -= mult_uv_i(&Ey[ic_g], &w2[id2 + iv], Nc);
513 v2[ic_r + id4 + iv] += mult_uv_r(&Ey[ic_g], &w2[id1 + iv], Nc);
514 v2[ic_i + id4 + iv] += mult_uv_i(&Ey[ic_g], &w2[id1 + iv], Nc);
517 v2[ic_r + id1 + iv] += mult_uv_i(&Ez[ic_g], &w2[id3 + iv], Nc);
518 v2[ic_i + id1 + iv] -= mult_uv_r(&Ez[ic_g], &w2[id3 + iv], Nc);
519 v2[ic_r + id2 + iv] -= mult_uv_i(&Ez[ic_g], &w2[id4 + iv], Nc);
520 v2[ic_i + id2 + iv] += mult_uv_r(&Ez[ic_g], &w2[id4 + iv], Nc);
522 v2[ic_r + id3 + iv] += mult_uv_i(&Ez[ic_g], &w2[id1 + iv], Nc);
523 v2[ic_i + id3 + iv] -= mult_uv_r(&Ez[ic_g], &w2[id1 + iv], Nc);
524 v2[ic_r + id4 + iv] -= mult_uv_i(&Ez[ic_g], &w2[id2 + iv], Nc);
525 v2[ic_i + id4 + iv] += mult_uv_r(&Ez[ic_g], &w2[id2 + iv], Nc);
528 v2[ic_r + id1 + iv] *= kappa_cSW;
529 v2[ic_i + id1 + iv] *= kappa_cSW;
530 v2[ic_r + id2 + iv] *= kappa_cSW;
531 v2[ic_i + id2 + iv] *= kappa_cSW;
533 v2[ic_r + id3 + iv] *= kappa_cSW;
534 v2[ic_i + id3 + iv] *= kappa_cSW;
535 v2[ic_r + id4 + iv] *= kappa_cSW;
536 v2[ic_i + id4 + iv] *= kappa_cSW;
548 assert(w.
nex() == 1);
551 const int Nvc = 2 * Nc;
552 const int Ndf = 2 * Nc * Nc;
554 const int Nvol = w.
nvol();
558 const int id3 = Nvc * 2;
559 const int id4 = Nvc * 3;
563 const double *w2 = w.
ptr(0);
564 double *v2 = v.
ptr(0);
573 int ith, nth, is, ns;
574 set_threadtask(ith, nth, is, ns,
m_Nvol);
578 for (
int site = is; site < ns; ++site) {
579 int iv = Nvc * Nd * site;
582 for (
int ic = 0; ic < Nc; ++ic) {
585 int ic_g = ic * Nvc + ig;
587 v2[ic_r + id1 + iv] = 0.0;
588 v2[ic_i + id1 + iv] = 0.0;
589 v2[ic_r + id2 + iv] = 0.0;
590 v2[ic_i + id2 + iv] = 0.0;
592 v2[ic_r + id3 + iv] = 0.0;
593 v2[ic_i + id3 + iv] = 0.0;
594 v2[ic_r + id4 + iv] = 0.0;
595 v2[ic_i + id4 + iv] = 0.0;
598 v2[ic_r + id1 + iv] -= mult_uv_i(&Bx[ic_g], &w2[id2 + iv], Nc);
599 v2[ic_i + id1 + iv] += mult_uv_r(&Bx[ic_g], &w2[id2 + iv], Nc);
600 v2[ic_r + id2 + iv] -= mult_uv_i(&Bx[ic_g], &w2[id1 + iv], Nc);
601 v2[ic_i + id2 + iv] += mult_uv_r(&Bx[ic_g], &w2[id1 + iv], Nc);
603 v2[ic_r + id3 + iv] -= mult_uv_i(&Bx[ic_g], &w2[id4 + iv], Nc);
604 v2[ic_i + id3 + iv] += mult_uv_r(&Bx[ic_g], &w2[id4 + iv], Nc);
605 v2[ic_r + id4 + iv] -= mult_uv_i(&Bx[ic_g], &w2[id3 + iv], Nc);
606 v2[ic_i + id4 + iv] += mult_uv_r(&Bx[ic_g], &w2[id3 + iv], Nc);
609 v2[ic_r + id1 + iv] += mult_uv_r(&By[ic_g], &w2[id2 + iv], Nc);
610 v2[ic_i + id1 + iv] += mult_uv_i(&By[ic_g], &w2[id2 + iv], Nc);
611 v2[ic_r + id2 + iv] -= mult_uv_r(&By[ic_g], &w2[id1 + iv], Nc);
612 v2[ic_i + id2 + iv] -= mult_uv_i(&By[ic_g], &w2[id1 + iv], Nc);
614 v2[ic_r + id3 + iv] += mult_uv_r(&By[ic_g], &w2[id4 + iv], Nc);
615 v2[ic_i + id3 + iv] += mult_uv_i(&By[ic_g], &w2[id4 + iv], Nc);
616 v2[ic_r + id4 + iv] -= mult_uv_r(&By[ic_g], &w2[id3 + iv], Nc);
617 v2[ic_i + id4 + iv] -= mult_uv_i(&By[ic_g], &w2[id3 + iv], Nc);
620 v2[ic_r + id1 + iv] -= mult_uv_i(&Bz[ic_g], &w2[id1 + iv], Nc);
621 v2[ic_i + id1 + iv] += mult_uv_r(&Bz[ic_g], &w2[id1 + iv], Nc);
622 v2[ic_r + id2 + iv] += mult_uv_i(&Bz[ic_g], &w2[id2 + iv], Nc);
623 v2[ic_i + id2 + iv] -= mult_uv_r(&Bz[ic_g], &w2[id2 + iv], Nc);
625 v2[ic_r + id3 + iv] -= mult_uv_i(&Bz[ic_g], &w2[id3 + iv], Nc);
626 v2[ic_i + id3 + iv] += mult_uv_r(&Bz[ic_g], &w2[id3 + iv], Nc);
627 v2[ic_r + id4 + iv] += mult_uv_i(&Bz[ic_g], &w2[id4 + iv], Nc);
628 v2[ic_i + id4 + iv] -= mult_uv_r(&Bz[ic_g], &w2[id4 + iv], Nc);
631 v2[ic_r + id1 + iv] += mult_uv_i(&Ex[ic_g], &w2[id2 + iv], Nc);
632 v2[ic_i + id1 + iv] -= mult_uv_r(&Ex[ic_g], &w2[id2 + iv], Nc);
633 v2[ic_r + id2 + iv] += mult_uv_i(&Ex[ic_g], &w2[id1 + iv], Nc);
634 v2[ic_i + id2 + iv] -= mult_uv_r(&Ex[ic_g], &w2[id1 + iv], Nc);
636 v2[ic_r + id3 + iv] -= mult_uv_i(&Ex[ic_g], &w2[id4 + iv], Nc);
637 v2[ic_i + id3 + iv] += mult_uv_r(&Ex[ic_g], &w2[id4 + iv], Nc);
638 v2[ic_r + id4 + iv] -= mult_uv_i(&Ex[ic_g], &w2[id3 + iv], Nc);
639 v2[ic_i + id4 + iv] += mult_uv_r(&Ex[ic_g], &w2[id3 + iv], Nc);
642 v2[ic_r + id1 + iv] -= mult_uv_r(&Ey[ic_g], &w2[id2 + iv], Nc);
643 v2[ic_i + id1 + iv] -= mult_uv_i(&Ey[ic_g], &w2[id2 + iv], Nc);
644 v2[ic_r + id2 + iv] += mult_uv_r(&Ey[ic_g], &w2[id1 + iv], Nc);
645 v2[ic_i + id2 + iv] += mult_uv_i(&Ey[ic_g], &w2[id1 + iv], Nc);
647 v2[ic_r + id3 + iv] += mult_uv_r(&Ey[ic_g], &w2[id4 + iv], Nc);
648 v2[ic_i + id3 + iv] += mult_uv_i(&Ey[ic_g], &w2[id4 + iv], Nc);
649 v2[ic_r + id4 + iv] -= mult_uv_r(&Ey[ic_g], &w2[id3 + iv], Nc);
650 v2[ic_i + id4 + iv] -= mult_uv_i(&Ey[ic_g], &w2[id3 + iv], Nc);
653 v2[ic_r + id1 + iv] += mult_uv_i(&Ez[ic_g], &w2[id1 + iv], Nc);
654 v2[ic_i + id1 + iv] -= mult_uv_r(&Ez[ic_g], &w2[id1 + iv], Nc);
655 v2[ic_r + id2 + iv] -= mult_uv_i(&Ez[ic_g], &w2[id2 + iv], Nc);
656 v2[ic_i + id2 + iv] += mult_uv_r(&Ez[ic_g], &w2[id2 + iv], Nc);
658 v2[ic_r + id3 + iv] -= mult_uv_i(&Ez[ic_g], &w2[id3 + iv], Nc);
659 v2[ic_i + id3 + iv] += mult_uv_r(&Ez[ic_g], &w2[id3 + iv], Nc);
660 v2[ic_r + id4 + iv] += mult_uv_i(&Ez[ic_g], &w2[id4 + iv], Nc);
661 v2[ic_i + id4 + iv] -= mult_uv_r(&Ez[ic_g], &w2[id4 + iv], Nc);
664 v2[ic_r + id1 + iv] *= kappa_cSW;
665 v2[ic_i + id1 + iv] *= kappa_cSW;
666 v2[ic_r + id2 + iv] *= kappa_cSW;
667 v2[ic_i + id2 + iv] *= kappa_cSW;
669 v2[ic_r + id3 + iv] *= kappa_cSW;
670 v2[ic_i + id3 + iv] *= kappa_cSW;
671 v2[ic_r + id4 + iv] *= kappa_cSW;
672 v2[ic_i + id4 + iv] *= kappa_cSW;
694 const int mu,
const int nu)
733 const double gflop = flop_site * (Nvol * (NPE / 1.0e+9));