25 template<
typename FIELD,
typename FOPR>
27 class_name =
"AEigensolver_IRArnoldi";
30 template<
typename FIELD,
typename FOPR>
33 if (m_sorter)
delete m_sorter;
38 template<
typename FIELD,
typename FOPR>
48 string str_sortfield_type;
51 double Enorm_eigen, Vthreshold;
54 err += params.
fetch_string(
"eigensolver_mode", str_sortfield_type);
55 err += params.
fetch_int(
"number_of_wanted_eigenvectors", Nk);
56 err += params.
fetch_int(
"number_of_working_eigenvectors", Np);
57 err += params.
fetch_int(
"maximum_number_of_iteration", Niter_eigen);
58 err += params.
fetch_double(
"convergence_criterion_squared", Enorm_eigen);
59 err += params.
fetch_double(
"threshold_value", Vthreshold);
62 vout.
crucial(m_vl,
"Error at %s: input parameter not found.\n",
67 set_parameters(str_sortfield_type, Nk, Np, Niter_eigen, Enorm_eigen,
73 template<
typename FIELD,
typename FOPR>
76 params.
set_string(
"eigensolver_mode", m_sort_type);
77 params.
set_int(
"number_of_wanted_eigenvectors", m_Nk);
78 params.
set_int(
"number_of_working_eigenvectors", m_Np);
79 params.
set_int(
"maximum_number_of_iteration", m_Niter_eigen);
80 params.
set_double(
"convergence_criterion_squared", m_Enorm_eigen);
81 params.
set_double(
"threshold_value", m_Vthreshold);
88 template<
typename FIELD,
typename FOPR>
90 const std::string& sort_type,
96 if (m_sorter)
delete m_sorter;
98 m_sort_type = sort_type;
101 set_parameters(Nk, Np, Niter_eigen, Enorm_eigen, Vthreshold);
106 template<
typename FIELD,
typename FOPR>
122 vout.
crucial(m_vl,
"Error at %s: parameter range check failed.\n",
131 m_Niter_eigen = Niter_eigen;
132 m_Enorm_eigen =
real_t(Enorm_eigen);
133 m_Vthreshold =
real_t(Vthreshold);
139 vout.
general(m_vl,
" Niter_eigen = %d\n", m_Niter_eigen);
140 vout.
general(m_vl,
" Enorm_eigen = %16.8e\n", m_Enorm_eigen);
141 vout.
general(m_vl,
" Vthreshold = %16.8e\n", m_Vthreshold);
143 int Nm = m_Nk + m_Np;
147 m_Qt.resize(Nm * Nm);
150 m_Ht.resize((Nm + 1) * Nm);
151 m_Ht2.resize((Nm + 1) * Nm);
152 m_Yt.resize(Nm * Nm);
154 int Nin = m_fopr->field_nin();
155 int Nvol = m_fopr->field_nvol();
156 int Nex = m_fopr->field_nex();
159 for (
int k = 0; k < Nm; ++k) {
160 m_B[k].reset(Nin, Nvol, Nex);
163 m_f.reset(Nin, Nvol, Nex);
164 m_v.reset(Nin, Nvol, Nex);
169 template<
typename FIELD,
typename FOPR>
171 std::vector<complex_t>& TDa,
172 std::vector<FIELD>& vk,
173 int& Nsbt,
int& Nconv,
const FIELD& b)
177 int Niter_eigen = m_Niter_eigen;
178 real_t Enorm_eigen = m_Enorm_eigen;
179 real_t Vthreshold = m_Vthreshold;
181 real_t Enorm_almost = 1.e-12;
185 if (Nk + Np > TDa.size()) {
186 vout.
crucial(m_vl,
"Error at %s: std::vector TDa is too small.\n",
189 }
else if (Nk + Np > vk.size()) {
190 vout.
crucial(m_vl,
"Error at %s: std::vector vk is too small.\n",
195 vout.
general(m_vl,
"Implicitly Restarted Arnoldi algorithm start\n");
198 vout.
general(m_vl,
" size of TDa = %d\n", TDa.size());
199 vout.
general(m_vl,
" size of vk = %d\n", vk.size());
200 vout.
general(m_vl,
" Enorm_eigen = %e\n", Enorm_eigen);
201 vout.
general(m_vl,
" Enorm_almost = %e\n", Enorm_almost);
218 real_t bnorm2 = b.norm2();
219 if (bnorm2 > 1.e-12) {
225 real_t vnorm = vk[0].norm2();
226 vnorm = 1.0 / sqrt(vnorm);
231 for (
int j = 0; j < Nm; ++j) {
232 for (
int k = 0; k < Nm + 1; ++k) {
241 for (
int k = 0; k < k2; ++k) {
242 step(Nm, k, vk, m_f);
248 for (
int iter = 0; iter < Niter_eigen; ++iter) {
258 beta = m_Ht[index(k2, k2 - 1)];
259 m_Ht[index(k2, k2 - 1)] = cmplx(
real_t(0.0),
real_t(0.0));
262 setUnit_Qt(Nm, m_Qt);
264 tqri(m_Ht2, k1, Nk, Nm, m_Qt, nconv);
266 for (
int j = k1; j < Nk; ++j) {
267 TDa[j] = m_Ht2[index(j, j)];
271 std::vector<complex_t> Xt(Nm * Nm);
273 eigenvector_Ht(Xt, m_Ht2, Nk, Nm);
274 mult_Qt(m_Yt, m_Qt, Xt, Nk, Nm);
275 check_eigen_Ht(m_Ht, TDa, m_Yt, Nk, Nm);
288 for (
int j = 0; j < k1; ++j) {
289 m_fopr->mult(m_v, m_B[j]);
290 axpy(m_v, -TDa[j], m_B[j]);
291 real_t bnorm = m_B[j].norm2();
292 real_t vv = m_v.norm2() / bnorm;
293 vout.
general(m_vl,
" %4d* (%12.8f, %12.8f) %12.8f %12.4e\n",
294 j, real(TDa[j]), imag(TDa[j]), abs(TDa[j]), vv);
297 for (
int j = k1; j < Nk; ++j) {
299 for (
int k = 0; k < Nk; ++k) {
300 axpy(m_B[j], m_Yt[index(k, j)], vk[k]);
302 m_fopr->mult(m_v, m_B[j]);
303 axpy(m_v, -TDa[j], m_B[j]);
304 real_t bnorm = m_B[j].norm2();
305 real_t vv = m_v.norm2() / bnorm;
306 vout.
general(m_vl,
" %4d (%12.8f, %12.8f) %12.8f %12.4e\n",
307 j, real(TDa[j]), imag(TDa[j]), abs(TDa[j]), vv);
313 if (vv < Enorm_eigen) {
316 if (!m_sorter->comp(m_TDa2[j], Vthreshold)) {
319 }
else if ((vv < Enorm_almost) &&
320 m_sorter->comp(m_TDa2[j], Vthreshold)) {
327 vout.
detailed(m_vl,
" #modes already converged: %4d\n", k1);
328 vout.
detailed(m_vl,
" #modes newly converged: %4d\n", Kdis);
329 vout.
detailed(m_vl,
" #modes almost converged: %4d\n", Kalmost);
331 if ((Kthreshold > 0) && (Kalmost == 0)) {
337 for (
int i = k1; i < Kdis; ++i) {
338 TDa[i] = m_TDa2[m_Iconv[i]];
340 Nsbt = Kdis - Kthreshold;
360 m_Ht[index(k2, k2 - 1)] = beta;
367 for (
int k = k2 - 1; k < Nm; ++k) {
368 step(Nm, k, vk, m_f);
373 for (
int j = 0; j < Nm; ++j) {
374 for (
int k = 0; k < Nm; ++k) {
375 m_Ht2[index(k, j)] = m_Ht[index(k, j)];
380 setUnit_Qt(Nm, m_Qt);
384 tqri(m_Ht2, k1, Nm, Nm, m_Qt, nconv);
386 for (
int j = 0; j < Nm; ++j) {
387 m_TDa2[j] = m_Ht2[index(j, j)];
391 std::vector<complex_t> Xt(Nm * Nm);
393 eigenvector_Ht(Xt, m_Ht2, Nm, Nm);
394 mult_Qt(m_Yt, m_Qt, Xt, Nm, Nm);
395 check_eigen_Ht(m_Ht, m_TDa2, m_Yt, Nm, Nm);
399 m_sorter->sort(m_TDa2, Nm);
406 setUnit_Qt(Nm, m_Qt);
409 for (
int ip = k2; ip < Nm; ++ip) {
415 qrtrf(m_Ht, Nm, Nm, m_Qt, Dsh, kmin, kmax);
421 for (
int j = k1; j <= k2; ++j) {
424 for (
int k = k1; k < Nm; ++k) {
425 axpy(m_B[j], m_Qt[index(k, j)], vk[k]);
429 for (
int j = k1; j <= k2; ++j) {
441 scal(m_f, m_Ht[index(k2, k2 - 1)]);
443 real_t beta_k = m_f.norm2();
444 beta_k = sqrt(beta_k);
449 reconst_Ht(m_Ht2, m_Qt, beta, vk, Nk);
454 for (
int i = 0; i < Nk; ++i) {
455 for (
int j = 0; j < Nk; ++j) {
456 real_t diff1 = abs(m_Ht[index(i, j)] - m_Ht2[index(i, j)]);
457 diff += diff1 * diff1;
460 diff = diff /
real_t(Nk * Nk);
461 vout.
general(m_vl,
" difference of Hessenberg = %e\n", diff);
481 std::vector<int>
idx = m_sorter->sort_index(TDa, Kdis);
482 for (
int i = 0; i < Kdis; ++i) {
483 copy(vk[i], m_B[m_Iconv[
idx[i]]]);
495 vout.
crucial(m_vl,
"Error at %s: NOT converged.\n", class_name.c_str());
503 vout.
general(m_vl,
"%s: solve finished.\n\n", class_name.c_str());
509 template<
typename FIELD,
typename FOPR>
511 int k1,
int k2,
int Kdis,
512 std::vector<complex_t>& TDa,
513 std::vector<FIELD>& vk,
516 for (
int i = k2 - 1; i >= k1 + Kdis; --i) {
517 copy(vk[i], vk[i - Kdis]);
520 for (
int i = 0; i < Kdis; ++i) {
521 copy(vk[k1 + i], m_B[m_Iconv[i]]);
522 if ((k1 + i) != m_Iconv[i])
copy(m_B[k1 + i], m_B[m_Iconv[i]]);
525 TDa[k1 + i] = m_TDa2[m_Iconv[i]];
531 reconst_Ht(m_Ht, m_Qt, beta, vk, k2);
535 for (
int i = 0; i < k1 + Kdis; ++i) {
536 m_Ht[index(i + 1, i)] = cmplx(0.0, 0.0);
544 template<
typename FIELD,
typename FOPR>
546 std::vector<FIELD>& vk,
550 vout.
crucial(m_vl,
"Error at %s: k is larger than Nm.\n",
555 m_fopr->mult(w, vk[k]);
557 for (
int j = 0; j <= k; ++j) {
559 axpy(w, -alph, vk[j]);
561 m_Ht[index(j, k)] = alph;
569 real_t beta_r = 1.0 / beta;
571 scal(vk[k + 1], beta_r);
575 m_Ht[index(k + 1, k)] = cmplx(beta,
real_t(0.0));
581 template<
typename FIELD,
typename FOPR>
584 std::vector<FIELD>& vk)
586 for (
int j = 0; j < Nk; ++j) {
587 for (
int i = 0; i < j; ++i) {
589 axpy(vk[j], -vv, vk[i]);
591 real_t vv = vk[j].norm2();
599 template<
typename FIELD,
typename FOPR>
601 int Nm, std::vector<complex_t>& Qt)
603 for (
int i = 0; i < Qt.size(); ++i) {
604 Qt[i] = cmplx(0.0, 0.0);
607 for (
int k = 0; k < Nm; ++k) {
608 Qt[index(k, k)] = cmplx(1.0, 0.0);
614 template<
typename FIELD,
typename FOPR>
616 std::vector<complex_t>& Ht,
int k1,
617 int Nk,
int Nm, std::vector<complex_t>& Qt,
int& nconv)
619 int Niter = 100 * Nm;
627 std::vector<complex_t> Hr(Nm * Nm);
628 for (
int i = 0; i < Nm; ++i) {
629 for (
int j = 0; j < Nm; ++j) {
630 Hr[index(i, j)] = Ht[(index(i, j))];
634 for (
int iter = 0; iter < Niter; ++iter) {
637 shift_wilkinson(Dsh, Ht[index(kmax - 2, kmax - 2)],
638 Ht[index(kmax - 2, kmax - 1)],
639 Ht[index(kmax - 1, kmax - 2)],
640 Ht[index(kmax - 1, kmax - 1)]);
642 Dsh = Ht[index(kmax - 1, kmax - 1)];
647 qrtrf(Ht, Nk, Nm, Qt, Dsh, kmin, kmax);
649 check_Qt(Nk, Nm, Qt, Ht, Hr);
652 for (
int j = kmax - 1; j >= kmin; --j) {
653 real_t dds = abs(Ht[index(j - 1, j - 1)]) + abs(Ht[index(j, j)]);
654 if (abs(Ht[index(j, j - 1)]) + dds > dds) {
656 for (
int i = 0; i < kmax - 1; ++i) {
657 real_t dds = abs(Ht[index(j, j)]) + abs(Ht[index(j + 1, j + 1)]);
658 if (abs(Ht[index(i + 1, i)]) + dds > dds) {
668 vout.
detailed(m_vl,
" tqri: converged at iter = %d\n", nconv);
676 vout.
crucial(m_vl,
"Error at %s: QR method NOT converged, Niter = %d.\n",
677 class_name.c_str(), Niter);
684 template<
typename FIELD,
typename FOPR>
686 std::vector<complex_t>& Ht,
687 int Nk,
int Nm, std::vector<complex_t>& Qt,
692 real_t f1 = abs(Ht[index(k, k)] - Dsh);
693 real_t f2 = abs(Ht[index(k + 1, k)]);
694 real_t Fden = 1.0 / sqrt(f1 * f1 + f2 * f2);
697 complex_t beta = (Ht[index(k, k)] - Dsh) / f1;
699 for (
int j = 0; j < Nk; ++j) {
702 Ht[index(k, j)] = c * tmp1 - beta * s * tmp2;
703 Ht[index(k + 1, j)] = s * tmp1 + beta * c * tmp2;
705 for (
int j = 0; j < Nk; ++j) {
708 Ht[index(j, k)] = tmp1 * c - tmp2 * conj(beta) * s;
709 Ht[index(j, k + 1)] = tmp1 * s + tmp2 * conj(beta) * c;
712 for (
int j = 0; j < Nk; ++j) {
715 Qt[index(j, k)] = c * tmp1 - conj(beta) * s * tmp2;
716 Qt[index(j, k + 1)] = s * tmp1 + conj(beta) * c * tmp2;
720 for (
int k = kmin; k < kmax - 1; ++k) {
721 real_t f1 = abs(Ht[index(k, k - 1)]);
722 real_t f2 = abs(Ht[index(k + 1, k - 1)]);
723 real_t Fden = 1.0 / sqrt(f1 * f1 + f2 * f2);
724 complex_t alph = conj(Ht[index(k, k - 1)]) / f1;
725 complex_t beta = conj(Ht[index(k + 1, k - 1)]) / f2;
729 for (
int j = 0; j < Nk; ++j) {
732 Ht[index(k, j)] = alph * c * tmp1 - beta * s * tmp2;
733 Ht[index(k + 1, j)] = alph * s * tmp1 + beta * c * tmp2;
735 for (
int j = 0; j < Nk; ++j) {
740 Ht[index(j, k)] = tmp1 * alphc * c - tmp2 * betac * s;
741 Ht[index(j, k + 1)] = tmp1 * alphc * s + tmp2 * betac * c;
745 Ht[index(k + 1, k - 1)] = cmplx(
real_t(0.0),
real_t(0.0));
748 for (
int j = 0; j < Nk; ++j) {
751 Qt[index(j, k)] = conj(alph) * c * tmp1 - conj(beta) * s * tmp2;
752 Qt[index(j, k + 1)] = conj(alph) * s * tmp1 + conj(beta) * c * tmp2;
760 if (abs(tmp1) + abs(tmpd) != abs(tmpd)) {
764 for (
int j = 0; j < Nk; ++j) {
765 Ht[index(k, j)] *= alph;
768 for (
int j = 0; j < Nk; ++j) {
769 Ht[index(j, k)] *= conj(alph);
772 for (
int j = 0; j < Nk; ++j) {
773 Qt[index(j, k)] *= conj(alph);
778 for (
int j = 0; j < Nk; ++j) {
779 for (
int k = j + 2; k < Nk; ++k) {
780 Ht[index(k, j)] = cmplx(0.0, 0.0);
785 for (
int j = 0; j < Nk - 1; ++j) {
787 Ht[index(k, j)] = cmplx(real(Ht[index(k, j)]),
real_t(0.0));
796 template<
typename FIELD,
typename FOPR>
798 const int Nk,
const int Nm,
799 std::vector<complex_t>& Qt,
800 std::vector<complex_t>& Ht,
801 std::vector<complex_t>& At)
803 std::vector<complex_t> Bt(Nm * Nm);
804 std::vector<complex_t> Ct(Nm * Nm);
806 for (
int i = 0; i < Nk; ++i) {
807 for (
int j = 0; j < Nk; ++j) {
808 Bt[index(i, j)] = cmplx(0.0, 0.0);
809 for (
int k = 0; k < Nk; ++k) {
810 Bt[index(i, j)] += Ht[index(i, k)] * conj(Qt[index(j, k)]);
815 for (
int i = 0; i < Nk; ++i) {
816 for (
int j = 0; j < Nk; ++j) {
817 Ct[index(i, j)] = cmplx(0.0, 0.0);
818 for (
int k = 0; k < Nk; ++k) {
819 Ct[index(i, j)] += Qt[index(i, k)] * Bt[index(k, j)];
825 for (
int i = 0; i < Nk; ++i) {
826 for (
int j = 0; j < Nk; ++j) {
827 Ct[index(i, j)] -= At[index(i, j)];
828 res += abs(Ct[index(i, j)]) * abs(Ct[index(i, j)]);
834 vout.
crucial(m_vl,
"%s: check A = Q^d H Q: too large res = %e\n",
835 class_name.c_str(), res);
841 template<
typename FIELD,
typename FOPR>
847 real_t s = abs(a) + abs(b) + abs(c) + abs(d);
853 real_t abst1 = abs(p) * abs(p) + abs(r) * abs(r);
854 real_t abst2 = 2.0 * real(p * conj(r));
856 real_t absr1 = abst1 + abst2;
857 real_t absr2 = abst1 - abst2;
870 complex_t res1 = rmax * rmax - (a - d) * rmax - b * c;
871 complex_t res2 = rmin * rmin - (a - d) * rmin - b * c;
872 real_t res = (abs(res1) + abs(res2)) / s;
884 template<
typename FIELD,
typename FOPR>
886 std::vector<complex_t>& Yt,
887 std::vector<complex_t>& Ht,
891 Yt[index(0, i)] = cmplx(1.0, 0.0);
892 for (
int j = 1; j < Nm; ++j) {
893 Yt[index(j, i)] = cmplx(0.0, 0.0);
896 for (
int i = 1; i < km; ++i) {
899 Yt[index(i, i)] = cmplx(1.0, 0.0);
901 for (
int j = i + 1; j < Nm; ++j) {
902 Yt[index(j, i)] = cmplx(0.0, 0.0);
905 Yt[index(i - 1, i)] = -Ht[index(i - 1, i)] / (Ht[index(i - 1, i - 1)] - lambda);
907 for (
int j = i - 2; j >= 0; --j) {
908 Yt[index(j, i)] = -Ht[index(j, i)];
909 for (
int k = j + 1; k < i; ++k) {
910 Yt[index(j, i)] += -Ht[index(j, k)] * Yt[index(k, i)];
912 Yt[index(j, i)] *=
real_t(1.0) / (Ht[index(j, j)] - lambda);
919 template<
typename FIELD,
typename FOPR>
921 std::vector<complex_t>& Yt,
922 std::vector<complex_t>& Qt,
923 std::vector<complex_t>& Xt,
926 for (
int j = 0; j < km; ++j) {
927 for (
int i = 0; i < Nm; ++i) {
928 Yt[index(i, j)] = cmplx(0.0, 0.0);
929 for (
int k = 0; k < Nm; ++k) {
930 Yt[index(i, j)] += Qt[index(i, k)] * Xt[index(k, j)];
938 template<
typename FIELD,
typename FOPR>
940 std::vector<complex_t>& Ht,
941 std::vector<complex_t>& TDa,
942 std::vector<complex_t>& Xt,
948 for (
int i = 0; i < km; ++i) {
950 for (
int j = 0; j < Nm; ++j) {
951 complex_t diff2 = -TDa[i] * Xt[index(j, i)];
952 for (
int k = 0; k < km; ++k) {
953 diff2 += Ht[index(j, k)] * Xt[index(k, i)];
955 diff1 += abs(diff2) * abs(diff2);
961 diff = diff /
real_t(km * Nm);
963 vout.
detailed(m_vl,
" eigenrelation of Ht: diff2 = %e\n", diff);
971 template<
typename FIELD,
typename FOPR>
973 std::vector<complex_t>& Qt,
976 for (
int j = 0; j < Nk; ++j) {
977 for (
int i = 0; i < j; ++i) {
979 for (
int k = 0; k < Nk; ++k) {
981 qq += conj(Qt[index(j, k)]) * Qt[index(i, k)];
984 for (
int k = 0; k < Nk; ++k) {
986 Qt[index(i, k)] -= qq * Qt[index(j, k)];
991 for (
int k = 0; k < Nk; ++k) {
993 real_t qq1 = abs(Qt[index(j, k)]);
997 for (
int k = 0; k < Nk; ++k) {
999 Qt[index(j, k)] *= qq;
1006 template<
typename FIELD,
typename FOPR>
1008 std::vector<complex_t>& Ht,
1009 std::vector<complex_t>& Qt,
1011 std::vector<FIELD>& vk,
1014 std::vector<complex_t> u(Nk);
1015 std::vector<complex_t> x(Nk);
1017 for (
int j = 0; j < Nk; ++j) {
1018 m_fopr->mult(m_v, vk[j]);
1019 for (
int i = 0; i < Nk; ++i) {
1022 Ht[index(i, j)] = Htmp;
1028 for (
int i = 0; i < Nk; ++i) {
1029 axpy(m_v, -Ht[index(i, j)], vk[i]);
1037 for (
int j = 0; j < Nk; ++j) {
1038 for (
int k = j + 2; k < Nk; ++k) {
1043 for (
int j = 0; j < Nk - 1; ++j) {
1045 Ht[index(k, j)] = cmplx(real(Ht[index(k, j)]),
real_t(0.0));