/*!
        @file    $Id:: eigensolver_IRLanczos.cpp #$

        @brief

        @author  <Hideo Matsufuru> hideo.matsufuru@kek.jp(matsufuru)
                 $LastChangedBy: aoym $

        @date    $LastChangedDate:: 2013-03-21 15:28:34 #$

        @version $LastChangedRevision: 854 $
*/

#include "eigensolver_IRLanczos.h"

using std::valarray;
using std::string;

#ifdef USE_PARAMETERS_FACTORY
#include "parameters_factory.h"
#endif

//- parameter entries
namespace {
  void append_entry(Parameters& param)
  {
    param.Register_string("eigensolver_mode", "NULL");
    param.Register_int("number_of_wanted_eigenvectors", 0);
    param.Register_int("number_of_working_eigenvectors", 0);
    param.Register_int("maximum_number_of_iteration", 0);
    param.Register_double("convergence_criterion_squared", 0.0);
    param.Register_double("threshold_value", 0.0);

    param.Register_string("verbose_level", "NULL");
  }


#ifdef USE_PARAMETERS_FACTORY
  bool init_param = ParametersFactory::Register("Eigensolver.IRLanczos", append_entry);
#endif
}
//- end

//- parameters class
Parameters_Eigensolver_IRLanczos::Parameters_Eigensolver_IRLanczos() { append_entry(*this); }
//- end

//====================================================================
void Eigensolver_IRLanczos::set_parameters(const Parameters& params)
{
  const string str_vlevel = params.get_string("verbose_level");

  m_vl = vout.set_verbose_level(str_vlevel);

  //- fetch and check input parameters
  string str_sortfield_type;
  int    Nk, Np;
  int    Niter_eigen;
  double Enorm_eigen, Vthreshold;

  int err = 0;
  err += params.fetch_string("eigensolver_mode", str_sortfield_type);
  err += params.fetch_int("number_of_wanted_eigenvectors", Nk);
  err += params.fetch_int("number_of_working_eigenvectors", Np);
  err += params.fetch_int("maximum_number_of_iteration", Niter_eigen);
  err += params.fetch_double("convergence_criterion_squared", Enorm_eigen);
  err += params.fetch_double("threshold_value", Vthreshold);

  if (err) {
    vout.crucial(m_vl, "Eigensolver_IRLanczos: fetch error, input parameter not found.\n");
    abort();
  }


  set_parameters(Nk, Np, Niter_eigen, Enorm_eigen, Vthreshold);
}


//====================================================================
void Eigensolver_IRLanczos::set_parameters(int Nk, int Np,
                                           int Niter_eigen, double Enorm_eigen,
                                           double Vthreshold)
{
  //- print input parameters
  vout.general(m_vl, "Parameters of Eigensolver_IRLanczos:\n");
  vout.general(m_vl, "  Nk          = %d\n", Nk);
  vout.general(m_vl, "  Np          = %d\n", Np);
  vout.general(m_vl, "  Niter_eigen = %d\n", Niter_eigen);
  vout.general(m_vl, "  Enorm_eigen = %16.8e\n", Enorm_eigen);
  vout.general(m_vl, "  Vthreshold  = %16.8e\n", Vthreshold);

  //- range check
  int err = 0;
  err += ParameterCheck::non_negative(Nk);
  err += ParameterCheck::non_negative(Np);
  err += ParameterCheck::non_negative(Niter_eigen);
  err += ParameterCheck::square_non_zero(Enorm_eigen);
  // NB. Vthreshold == 0 is allowed.

  if (err) {
    vout.crucial(m_vl, "Eigensolver_IRLanczos: parameter range check failed.\n");
    abort();
  }

  //- store values
  m_Nk          = Nk;
  m_Np          = Np;
  m_Niter_eigen = Niter_eigen;
  m_Enorm_eigen = Enorm_eigen;
  m_Vthreshold  = Vthreshold;
}


//====================================================================
void Eigensolver_IRLanczos::solve(valarray<double>& TDa, valarray<Field>& vk,
                                  int& Nsbt, int& Nconv, const Field& b)
{
  int    Nk          = m_Nk;
  int    Np          = m_Np;
  int    Niter_eigen = m_Niter_eigen;
  double Enorm_eigen = m_Enorm_eigen;
  double Vthreshold  = m_Vthreshold;


  Nconv = -1;
  Nsbt  = 0;

  int Nm = Nk + Np;

  if (Nk + Np > TDa.size()) {
    vout.crucial(m_vl, "Eigensolver_IRLanczos: valarray TDa is too small.\n");
    abort();
  } else if (Nk + Np > vk.size()) {
    vout.crucial(m_vl, "Eigensolver_IRLanczos: valarray vk is too small.\n");
    abort();
  }

  valarray<double> TDb(Nm);
  valarray<double> TDa2(Nm);
  valarray<double> TDb2(Nm);
  valarray<double> Qt(Nm * Nm);
  valarray<int>    Iconv(Nm);

  int             Nin  = vk[0].nin();
  int             Nvol = vk[0].nvol();
  int             Nex  = vk[0].nex();
  valarray<Field> B(Nm);
  for (int k = 0; k < Nm; ++k) {
    B[k].reset(Nin, Nvol, Nex);
  }

  Field f(vk[0]);
  Field v(vk[0]);

  vout.general(m_vl, "  Nk = %d  Np = %d\n", Nk, Np);
  vout.general(m_vl, "  Nm = %d\n", Nm);
  vout.general(m_vl, "  size of TDa = %d\n", TDa.size());
  vout.general(m_vl, "  size of vk  = %d\n", vk.size());

  int k1    = 1;
  int k2    = Nk;
  int kconv = 0;

  int    Kdis       = 0;
  int    Kthreshold = 0;
  double beta_k;

  //- Set initial vector
  vk[0] = 1.0;
  double vnorm = vk[0] * vk[0];
  vk[0] = 1.0 / sqrt(vnorm);
  // (uniform vector)

  //- Initial Nk steps
  for (int k = 0; k < k2; ++k) {
    step(Nm, k, TDa, TDb, vk, f);
  }

  //- Restarting loop begins
  for (int iter = 0; iter < Niter_eigen; ++iter) {
    vout.paranoiac(m_vl, "\n iter=%d\n", iter);

    int Nm2 = Nm - kconv;

    for (int k = k2; k < Nm; ++k) {
      step(Nm, k, TDa, TDb, vk, f);
    }

    f *= TDb[Nm - 1];

    //- getting eigenvalues
    for (int k = 0; k < Nm2; ++k) {
      TDa2[k] = TDa[k + k1 - 1];
      TDb2[k] = TDb[k + k1 - 1];
    }
    setUnit_Qt(Nm, Qt);
    tqri(TDa2, TDb2, Nm2, Nm, Qt);

    //- sorting
    m_sort->sort(Nm, TDa2);

    //- Implicitly shifted QR transformations
    setUnit_Qt(Nm, Qt);
    for (int ip = k2; ip < Nm; ++ip) {
      double Dsh  = TDa2[ip - kconv];
      int    kmin = k1;
      int    kmax = Nm;
      qrtrf(TDa, TDb, Nm, Nm, Qt, Dsh, kmin, kmax);
    }

    for (int i = 0; i < (Nk + 1); ++i) {
      B[i] = 0.0;
    }

    for (int j = k1 - 1; j < k2 + 1; ++j) {
      for (int k = 0; k < Nm; ++k) {
        B[j] += Qt[k + Nm * j] * vk[k];
      }
    }

    for (int j = k1 - 1; j < k2 + 1; ++j) {
      vk[j] = B[j];
    }

    //- Compressed vector f and beta(k2)
    f     *= Qt[Nm - 1 + Nm * (k2 - 1)];
    f     += TDb[k2 - 1] * vk[k2];
    beta_k = f * f;
    beta_k = sqrt(beta_k);

    vout.paranoiac(m_vl, " beta(k) = %20.14f\n", beta_k);


    double beta_r = 1.0 / beta_k;
    vk[k2]      = beta_r * f;
    TDb[k2 - 1] = beta_k;

    //- Convergence test
    TDa2 = TDa;
    TDb2 = TDb;
    setUnit_Qt(Nm, Qt);

    tqri(TDa2, TDb2, Nk, Nm, Qt);
    for (int k = 0; k < Nk; ++k) {
      B[k] = 0.0;
    }

    for (int j = 0; j < Nk; ++j) {
      for (int k = 0; k < Nk; ++k) {
        B[j] += Qt[k + j * Nm] * vk[k];
      }
    }

    Kdis       = 0;
    Kthreshold = 0;

    for (int i = 0; i < Nk; ++i) {
      v = m_fopr->mult(B[i]);
      double vnum = B[i] * v;
      double vden = B[i] * B[i];

      //      vout.paranoiac(m_vl, " vden = %20.14e\n",vden);

      TDa2[i] = vnum / vden;
      v      -= TDa2[i] * B[i];
      double vv = v * v;

      vout.paranoiac(m_vl, "  %4d  %18.14f  %18.14e\n", i, TDa2[i], vv);

      if (vv < Enorm_eigen) {
        Iconv[Kdis] = i;
        ++Kdis;
        //        if(fabs(TDa2[i]) > Vthreshold){
        if (m_sort->converged(TDa2[i], Vthreshold)) {
          ++Kthreshold;
        }
      }
    }  // i-loop end


    vout.paranoiac(m_vl, " #modes converged: %d\n", Kdis);


    if (Kthreshold > 0) {
      // (there is a converged eigenvalue larger than Vthreshold.)
      Nconv = iter;

      //- Sorting
      for (int i = 0; i < Kdis; ++i) {
        TDa[i] = TDa2[Iconv[i]];
        vk[i]  = B[Iconv[i]];
      }

      m_sort->sort(Kdis, TDa, vk);

      Nsbt = Kdis - Kthreshold;

      vout.general(m_vl, "\n Converged:\n");
      vout.general(m_vl, "  Nconv   = %d\n", Nconv);
      vout.general(m_vl, "  beta(k) = %20.14e\n", beta_k);
      vout.general(m_vl, "  Kdis    = %d\n", Kdis);
      vout.general(m_vl, "  Nsbt    = %d\n", Nsbt);

      return;
    }
  } // end of iter loop


  if (Nconv == -1) {
    vout.crucial(m_vl, "Eigensolver_IRLanczos: NOT converged.\n");
    abort();
  }
}


//====================================================================
void Eigensolver_IRLanczos::step(int Nm, int k, valarray<double>& TDa,
                                 valarray<double>& TDb, valarray<Field>& vk,
                                 Field& w)
{
  if (k >= Nm) {
    vout.crucial(m_vl, "Eigensolver_IRLanczos: k is larger than Nm.\n");
    abort();
  } else if (k == 0) {  // Initial step
    w = m_fopr->mult(vk[k]);
    double alph = vk[k] * w;

    w -= alph * vk[k];
    double beta = w * w;
    beta = sqrt(beta);
    double beta_r = 1.0 / beta;
    vk[k + 1] = beta_r * w;

    TDa[k] = alph;
    TDb[k] = beta;
  } else {   // Iteration step
    w  = m_fopr->mult(vk[k]);
    w -= TDb[k - 1] * vk[k - 1];
    double alph = vk[k] * w;

    w -= alph * vk[k];
    double beta = w * w;
    beta = sqrt(beta);
    double beta_r = 1.0 / beta;
    w *= beta_r;

    TDa[k] = alph;
    TDb[k] = beta;

    schmidt_orthogonalization(w, vk, k);

    if (k < Nm - 1) vk[k + 1] = w;
  }
}


//====================================================================
void Eigensolver_IRLanczos::schmidt_orthogonalization(Field& w,
                                                      valarray<Field>& vk, int k)
{
  for (int j = 0; j < k; ++j) {
    dcomplex prod = ddotc_complex(vk[j], w);
    w.daxpy(-prod, vk[j]);
  }
}


//====================================================================
void Eigensolver_IRLanczos::setUnit_Qt(int Nm, valarray<double>& Qt)
{
  for (int i = 0; i < Qt.size(); ++i) {
    Qt[i] = 0.0;
  }

  for (int k = 0; k < Nm; ++k) {
    Qt[k + k * Nm] = 1.0;
  }
}


//====================================================================
void Eigensolver_IRLanczos::tqri(valarray<double>& TDa,
                                 valarray<double>& TDb,
                                 int Nk, int Nm, valarray<double>& Qt)
{
  int Niter = 100 * Nm;
  int kmin  = 1;
  int kmax  = Nk;
  // (these parameters should be tuned)


  int Nconv = -1;

  for (int iter = 0; iter < Niter; ++iter) {
    //- determination of 2x2 leading submatrix
    double dsub = TDa[kmax - 1] - TDa[kmax - 2];
    double dd   = sqrt(dsub * dsub + 4.0 * TDb[kmax - 2] * TDb[kmax - 2]);
    double Dsh  = 0.5 * (TDa[kmax - 2] + TDa[kmax - 1]
                         + fabs(dd) * (dsub / fabs(dsub)));
    // (Dsh: shift)

    //- transformation
    qrtrf(TDa, TDb, Nk, Nm, Qt, Dsh, kmin, kmax);

    //- Convergence criterion (redef of kmin and kmax)
    for (int j = kmax - 1; j >= kmin; --j) {
      double dds = fabs(TDa[j - 1]) + fabs(TDa[j]);
      if (fabs(TDb[j - 1]) + dds > dds) {
        kmax = j + 1;

        for (int j = 0; j < kmax - 1; ++j) {
          double dds = fabs(TDa[j]) + fabs(TDa[j + 1]);

          if (fabs(TDb[j]) + dds > dds) {
            kmin = j + 1;

            break;
          }
        }

        break;
      }

      if (j == kmin) {
        Nconv = iter;
        vout.paranoiac(m_vl, "  converged at iter = %d\n", Nconv);

        return;
      }
    } // end of j loop
  }   // end of iter loop

  if (Nconv == -1) {
    vout.crucial(m_vl, "Eigensolver_IRLanczos: QL method NOT converged, Niter = %d.\n", Niter);
    abort();
  }
}


//====================================================================
void Eigensolver_IRLanczos::qrtrf(valarray<double>& TDa,
                                  valarray<double>& TDb,
                                  int Nk, int Nm, valarray<double>& Qt,
                                  double Dsh, int kmin, int kmax)
{
  int    k = kmin - 1;
  double x;

  double Fden = 1.0 / sqrt((TDa[k] - Dsh) * (TDa[k] - Dsh)
                           + TDb[k] * TDb[k]);
  double c = (TDa[k] - Dsh) * Fden;
  double s = -TDb[k] * Fden;

  double tmpa1 = TDa[k];
  double tmpa2 = TDa[k + 1];
  double tmpb  = TDb[k];

  TDa[k]     = c * c * tmpa1 + s * s * tmpa2 - 2.0 * c * s * tmpb;
  TDa[k + 1] = s * s * tmpa1 + c * c * tmpa2 + 2.0 * c * s * tmpb;
  TDb[k]     = c * s * (tmpa1 - tmpa2) + (c * c - s * s) * tmpb;
  x          = -s * TDb[k + 1];
  TDb[k + 1] = c * TDb[k + 1];

  for (int i = 0; i < Nk; ++i) {
    double Qtmp1 = Qt[i + Nm * k];
    double Qtmp2 = Qt[i + Nm * (k + 1)];
    Qt[i + Nm * k]       = c * Qtmp1 - s * Qtmp2;
    Qt[i + Nm * (k + 1)] = s * Qtmp1 + c * Qtmp2;
  }

  //- Givens transformations
  for (int k = kmin; k < kmax - 1; ++k) {
    double Fden = 1.0 / sqrt(x * x + TDb[k - 1] * TDb[k - 1]);
    double c    = TDb[k - 1] * Fden;
    double s    = -x * Fden;

    double tmpa1 = TDa[k];
    double tmpa2 = TDa[k + 1];
    double tmpb  = TDb[k];
    TDa[k]     = c * c * tmpa1 + s * s * tmpa2 - 2.0 * c * s * tmpb;
    TDa[k + 1] = s * s * tmpa1 + c * c * tmpa2 + 2.0 * c * s * tmpb;
    TDb[k]     = c * s * (tmpa1 - tmpa2) + (c * c - s * s) * tmpb;
    TDb[k - 1] = c * TDb[k - 1] - s * x;
    if (k != kmax - 2) {
      x          = -s * TDb[k + 1];
      TDb[k + 1] = c * TDb[k + 1];
    }

    for (int i = 0; i < Nk; ++i) {
      double Qtmp1 = Qt[i + Nm * k];
      double Qtmp2 = Qt[i + Nm * (k + 1)];
      Qt[i + Nm * k]       = c * Qtmp1 - s * Qtmp2;
      Qt[i + Nm * (k + 1)] = s * Qtmp1 + c * Qtmp2;
    }
  }
}


//====================================================================
//============================================================END=====
