25 template<
typename FIELD,
typename FOPR>
27 class_name =
"AEigensolver_IRLanczos";
30 template<
typename FIELD,
typename FOPR>
33 if (m_sorter)
delete m_sorter;
38 template<
typename FIELD,
typename FOPR>
48 string str_sortfield_type;
51 double Enorm_eigen, Vthreshold;
54 err += params.
fetch_string(
"eigensolver_mode", str_sortfield_type);
55 err += params.
fetch_int(
"number_of_wanted_eigenvectors", Nk);
56 err += params.
fetch_int(
"number_of_working_eigenvectors", Np);
57 err += params.
fetch_int(
"maximum_number_of_iteration", Niter_eigen);
58 err += params.
fetch_double(
"convergence_criterion_squared", Enorm_eigen);
59 err += params.
fetch_double(
"threshold_value", Vthreshold);
62 vout.
crucial(m_vl,
"Error at %s: input parameter not found.\n",
67 set_parameters(str_sortfield_type, Nk, Np, Niter_eigen, Enorm_eigen,
73 template<
typename FIELD,
typename FOPR>
76 params.
set_string(
"eigensolver_mode", m_sort_type);
77 params.
set_int(
"number_of_wanted_eigenvectors", m_Nk);
78 params.
set_int(
"number_of_working_eigenvectors", m_Np);
79 params.
set_int(
"maximum_number_of_iteration", m_Niter_eigen);
80 params.
set_double(
"convergence_criterion_squared", m_Enorm_eigen);
81 params.
set_double(
"threshold_value", m_Vthreshold);
88 template<
typename FIELD,
typename FOPR>
90 const std::string& sort_type,
96 if (m_sorter)
delete m_sorter;
98 m_sort_type = sort_type;
101 set_parameters(Nk, Np, Niter_eigen, Enorm_eigen, Vthreshold);
106 template<
typename FIELD,
typename FOPR>
122 vout.
crucial(m_vl,
"Error at %s: parameter range check failed.\n",
130 m_Niter_eigen = Niter_eigen;
131 m_Enorm_eigen =
real_t(Enorm_eigen);
132 m_Vthreshold =
real_t(Vthreshold);
138 vout.
general(m_vl,
" Niter_eigen = %d\n", m_Niter_eigen);
139 vout.
general(m_vl,
" Enorm_eigen = %16.8e\n", m_Enorm_eigen);
140 vout.
general(m_vl,
" Vthreshold = %16.8e\n", m_Vthreshold);
142 int Nm = m_Nk + m_Np;
146 m_Qt.resize(Nm * Nm);
149 int Nin = m_fopr->field_nin();
150 int Nvol = m_fopr->field_nvol();
151 int Nex = m_fopr->field_nex();
154 for (
int k = 0; k < Nm; ++k) {
155 m_B[k].reset(Nin, Nvol, Nex);
158 m_f.reset(Nin, Nvol, Nex);
159 m_v.reset(Nin, Nvol, Nex);
164 template<
typename FIELD,
typename FOPR>
166 std::vector<complex_t>& TDac,
167 std::vector<FIELD>& vk,
168 int& Nsbt,
int& Nconv,
const FIELD& b)
170 int size = TDac.size();
171 std::vector<real_t> TDa(size);
173 solve(TDa, vk, Nsbt, Nconv, b);
175 for (
int i = 0; i < size; ++i) {
176 TDac[i] = cmplx(TDa[i],
real_t(0.0));
182 template<
typename FIELD,
typename FOPR>
184 std::vector<real_t>& TDa,
185 std::vector<FIELD>& vk,
186 int& Nsbt,
int& Nconv,
const FIELD& b)
190 int Niter_eigen = m_Niter_eigen;
191 real_t Enorm_eigen = m_Enorm_eigen;
192 real_t Vthreshold = m_Vthreshold;
196 if (Nk + Np > TDa.size()) {
197 vout.
crucial(m_vl,
"Error at %s: std::vector TDa is too small.\n",
200 }
else if (Nk + Np > vk.size()) {
201 vout.
crucial(m_vl,
"Error at %s: std::vector vk is too small.\n",
208 vout.
general(m_vl,
" size of TDa = %d\n", TDa.size());
209 vout.
general(m_vl,
" size of vk = %d\n", vk.size());
224 real_t bnorm2 = b.norm2();
225 if (bnorm2 > 1.e-12) {
232 real_t vnorm = vk[0].norm2();
233 vnorm = 1.0 / sqrt(vnorm);
237 for (
int k = 0; k < k2; ++k) {
238 step(Nm, k, TDa, m_TDb, vk, m_f);
244 for (
int iter = 0; iter < Niter_eigen; ++iter) {
247 for (
int k = k2; k < Nm; ++k) {
248 step(Nm, k, TDa, m_TDb, vk, m_f);
251 scal(m_f, m_TDb[Nm - 1]);
255 for (
int k = 0; k < Nm; ++k) {
256 m_TDa2[k] = TDa[k + k1 - 1];
257 m_TDb2[k] = m_TDb[k + k1 - 1];
261 setUnit_Qt(Nm, m_Qt);
262 tqri(m_TDa2, m_TDb2, Nm, Nm, m_Qt, nconv);
267 vout.
crucial(m_vl,
"Error at %s: QR method NOT converged.\n",
271 vout.
paranoiac(m_vl,
" tqri: converged at iter = %d\n", nconv);
277 m_sorter->sort(m_TDa2, Nm);
280 setUnit_Qt(Nm, m_Qt);
281 for (
int ip = k2; ip < Nm; ++ip) {
285 qrtrf(TDa, m_TDb, Nm, Nm, m_Qt, Dsh, kmin, kmax);
290 for (
int i = 0; i < (Nk + 1); ++i) {
294 for (
int j = k1 - 1; j < k2 + 1; ++j) {
295 for (
int k = 0; k < Nm; ++k) {
296 axpy(m_B[j], m_Qt[k + Nm * j], vk[k]);
300 for (
int j = k1 - 1; j < k2 + 1; ++j) {
305 scal(m_f, m_Qt[Nm - 1 + Nm * (k2 - 1)]);
307 axpy(m_f, m_TDb[k2 - 1], vk[k2]);
310 beta_k = m_f.norm2();
311 beta_k = sqrt(beta_k);
315 real_t beta_r = 1.0 / beta_k;
317 scal(vk[k2], beta_r);
323 m_TDb[k2 - 1] = beta_k;
328 setUnit_Qt(Nm, m_Qt);
330 tqri(m_TDa2, m_TDb2, Nk, Nm, m_Qt, nconv);
335 vout.
crucial(m_vl,
"Error at %s: QR method NOT converged.\n",
339 vout.
paranoiac(m_vl,
" tqri: converged at iter = %d\n", nconv);
342 for (
int k = 0; k < Nk; ++k) {
346 for (
int j = 0; j < Nk; ++j) {
347 for (
int k = 0; k < Nk; ++k) {
348 axpy(m_B[j], m_Qt[k + j * Nm], vk[k]);
358 for (
int i = 0; i < Nk; ++i) {
359 m_fopr->mult(m_v, m_B[i]);
365 real_t TDa2_tmp = vnum / vden;
366 axpy(m_v, -TDa2_tmp, m_B[i]);
370 vout.
detailed(m_vl,
" %4d %18.14f %18.14e\n", i, TDa2_tmp, vv);
374 m_TDa2[i] = TDa2_tmp;
375 if (vv < Enorm_eigen) {
378 if (!m_sorter->comp(m_TDa2[i], Vthreshold)) {
388 if (Kthreshold > 0) {
393 for (
int i = 0; i < Kdis; ++i) {
394 TDa[i] = m_TDa2[m_Iconv[i]];
396 Nsbt = Kdis - Kthreshold;
415 std::vector<int>
idx = m_sorter->sort_index(TDa, Kdis);
416 for (
int i = 0; i < Kdis; ++i) {
417 copy(vk[i], m_B[m_Iconv[
idx[i]]]);
418 real_t vv = vk[i].norm2();
426 vout.
crucial(m_vl,
"Error at %s: NOT converged.\n", class_name.c_str());
439 template<
typename FIELD,
typename FOPR>
441 int Nm,
int k, std::vector<real_t>& TDa,
442 std::vector<real_t>& TDb, std::vector<FIELD>& vk,
446 vout.
crucial(m_vl,
"Error at %s: k is larger than Nm.\n",
450 m_fopr->mult(w, vk[k]);
454 axpy(w, -alph, vk[k]);
458 real_t beta_r = 1.0 / beta;
460 scal(vk[k + 1], beta_r);
470 m_fopr->mult(w, vk[k]);
472 axpy(w, -TDb[k - 1], vk[k - 1]);
476 axpy(w, -alph, vk[k]);
480 real_t beta_r = 1.0 / beta;
492 schmidt_orthogonalization(w, vk, k);
494 if (k < Nm - 1)
copy(vk[k + 1], w);
500 template<
typename FIELD,
typename FOPR>
503 std::vector<FIELD>& vk,
506 for (
int j = 0; j < k; ++j) {
509 prod *= cmplx(-1.0, 0.0);
510 axpy(w, prod, vk[j]);
519 template<
typename FIELD,
typename FOPR>
521 int Nm, std::vector<real_t>& Qt)
523 for (
int i = 0; i < Qt.size(); ++i) {
527 for (
int k = 0; k < Nm; ++k) {
528 Qt[k + k * Nm] = 1.0;
534 template<
typename FIELD,
typename FOPR>
536 std::vector<real_t>& TDa, std::vector<real_t>& TDb,
537 int Nk,
int Nm, std::vector<real_t>& Qt,
int& nconv)
539 int Niter = 100 * Nm;
546 for (
int iter = 0; iter < Niter; ++iter) {
548 real_t dsub = TDa[kmax - 1] - TDa[kmax - 2];
549 real_t dd = sqrt(dsub * dsub + 4.0 * TDb[kmax - 2] * TDb[kmax - 2]);
550 real_t Dsh = 0.5 * (TDa[kmax - 2] + TDa[kmax - 1]
551 + fabs(dd) * (dsub / fabs(dsub)));
555 qrtrf(TDa, TDb, Nk, Nm, Qt, Dsh, kmin, kmax);
558 for (
int j = kmax - 1; j >= kmin; --j) {
559 real_t dds = fabs(TDa[j - 1]) + fabs(TDa[j]);
560 if (fabs(TDb[j - 1]) + dds > dds) {
563 for (
int j = 0; j < kmax - 1; ++j) {
564 real_t dds = fabs(TDa[j]) + fabs(TDa[j + 1]);
566 if (fabs(TDb[j]) + dds > dds) {
595 template<
typename FIELD,
typename FOPR>
597 std::vector<real_t>& TDa,
598 std::vector<real_t>& TDb,
599 int Nk,
int Nm, std::vector<real_t>& Qt,
600 real_t Dsh,
int kmin,
int kmax)
605 real_t Fden = 1.0 / sqrt((TDa[k] - Dsh) * (TDa[k] - Dsh)
607 real_t c = (TDa[k] - Dsh) * Fden;
608 real_t s = -TDb[k] * Fden;
611 real_t tmpa2 = TDa[k + 1];
614 TDa[k] = c * c * tmpa1 + s * s * tmpa2 - 2.0 * c * s * tmpb;
615 TDa[k + 1] = s * s * tmpa1 + c * c * tmpa2 + 2.0 * c * s * tmpb;
616 TDb[k] = c * s * (tmpa1 - tmpa2) + (c * c - s * s) * tmpb;
618 TDb[k + 1] = c * TDb[k + 1];
620 for (
int i = 0; i < Nk; ++i) {
621 real_t Qtmp1 = Qt[i + Nm * k];
622 real_t Qtmp2 = Qt[i + Nm * (k + 1)];
623 Qt[i + Nm * k] = c * Qtmp1 - s * Qtmp2;
624 Qt[i + Nm * (k + 1)] = s * Qtmp1 + c * Qtmp2;
628 for (
int k = kmin; k < kmax - 1; ++k) {
629 real_t Fden = 1.0 / sqrt(x * x + TDb[k - 1] * TDb[k - 1]);
630 real_t c = TDb[k - 1] * Fden;
634 real_t tmpa2 = TDa[k + 1];
636 TDa[k] = c * c * tmpa1 + s * s * tmpa2 - 2.0 * c * s * tmpb;
637 TDa[k + 1] = s * s * tmpa1 + c * c * tmpa2 + 2.0 * c * s * tmpb;
638 TDb[k] = c * s * (tmpa1 - tmpa2) + (c * c - s * s) * tmpb;
639 TDb[k - 1] = c * TDb[k - 1] - s * x;
642 TDb[k + 1] = c * TDb[k + 1];
645 for (
int i = 0; i < Nk; ++i) {
646 real_t Qtmp1 = Qt[i + Nm * k];
647 real_t Qtmp2 = Qt[i + Nm * (k + 1)];
648 Qt[i + Nm * k] = c * Qtmp1 - s * Qtmp2;
649 Qt[i + Nm * (k + 1)] = s * Qtmp1 + c * Qtmp2;