Bridge++  Version 1.5.4
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
solver_BiCGStab_IDS_L_Cmplx.cpp
Go to the documentation of this file.
1 
15 
16 #ifdef USE_FACTORY_AUTOREGISTER
17 namespace {
18  bool init = Solver_BiCGStab_IDS_L_Cmplx::register_factory();
19 }
20 #endif
21 
22 const std::string Solver_BiCGStab_IDS_L_Cmplx::class_name = "Solver_BiCGStab_IDS_L_Cmplx";
23 
24 //====================================================================
26 {
27  const string str_vlevel = params.get_string("verbose_level");
28 
29  m_vl = vout.set_verbose_level(str_vlevel);
30 
31  //- fetch and check input parameters
32  int Niter, Nrestart;
33  double Stop_cond;
34  bool use_init_guess;
35  double Omega_tolerance;
36  int N_L;
37  double Tol_L;
38 
39  int err = 0;
40  err += params.fetch_int("maximum_number_of_iteration", Niter);
41  err += params.fetch_int("maximum_number_of_restart", Nrestart);
42  err += params.fetch_double("convergence_criterion_squared", Stop_cond);
43  err += params.fetch_bool("use_initial_guess", use_init_guess);
44  err += params.fetch_double("Omega_tolerance", Omega_tolerance);
45  err += params.fetch_int("number_of_orthonormal_vectors", N_L);
46  err += params.fetch_double("tolerance_for_DynamicSelection_of_L", Tol_L);
47 
48  if (err) {
49  vout.crucial(m_vl, "Error at %s: input parameter not found.\n", class_name.c_str());
50  exit(EXIT_FAILURE);
51  }
52 
53  set_parameters(Niter, Nrestart, Stop_cond, use_init_guess);
54  set_parameters_BiCGStab_series(Omega_tolerance);
55  set_parameters_DS_L(N_L, Tol_L);
56 }
57 
58 
59 //====================================================================
60 void Solver_BiCGStab_IDS_L_Cmplx::set_parameters(const int Niter, const int Nrestart, const double Stop_cond)
61 {
63 
64  //- print input parameters
65  vout.general(m_vl, "%s: input parameters\n", class_name.c_str());
66  vout.general(m_vl, " Niter = %d\n", Niter);
67  vout.general(m_vl, " Nrestart = %d\n", Nrestart);
68  vout.general(m_vl, " Stop_cond = %8.2e\n", Stop_cond);
69 
70  //- range check
71  int err = 0;
72  err += ParameterCheck::non_negative(Niter);
73  err += ParameterCheck::non_negative(Nrestart);
74  err += ParameterCheck::square_non_zero(Stop_cond);
75 
76  if (err) {
77  vout.crucial(m_vl, "Error at %s: parameter range check failed.\n", class_name.c_str());
78  exit(EXIT_FAILURE);
79  }
80 
81  //- store values
82  m_Niter = Niter;
83  m_Nrestart = Nrestart;
84  m_Stop_cond = Stop_cond;
85 }
86 
87 
88 //====================================================================
89 void Solver_BiCGStab_IDS_L_Cmplx::set_parameters(const int Niter, const int Nrestart, const double Stop_cond, const bool use_init_guess)
90 {
92 
93  //- print input parameters
94  vout.general(m_vl, "%s: input parameters\n", class_name.c_str());
95  vout.general(m_vl, " Niter = %d\n", Niter);
96  vout.general(m_vl, " Nrestart = %d\n", Nrestart);
97  vout.general(m_vl, " Stop_cond = %8.2e\n", Stop_cond);
98  vout.general(m_vl, " use_init_guess = %s\n", use_init_guess ? "true" : "false");
99 
100  //- range check
101  int err = 0;
102  err += ParameterCheck::non_negative(Niter);
103  err += ParameterCheck::non_negative(Nrestart);
104  err += ParameterCheck::square_non_zero(Stop_cond);
105 
106  if (err) {
107  vout.crucial(m_vl, "Error at %s: parameter range check failed.\n", class_name.c_str());
108  exit(EXIT_FAILURE);
109  }
110 
111  //- store values
112  m_Niter = Niter;
113  m_Nrestart = Nrestart;
114  m_Stop_cond = Stop_cond;
115  m_use_init_guess = use_init_guess;
116 }
117 
118 
119 //====================================================================
121 {
123 
124  //- print input parameters
125  vout.general(m_vl, " Omega_tolerance = %8.2e\n", Omega_tolerance);
126 
127  //- range check
128  // NB. Omega_tolerance == 0.0 is allowed.
129 
130  //- store values
131  m_Omega_tolerance = Omega_tolerance;
132 }
133 
134 
135 //====================================================================
136 void Solver_BiCGStab_IDS_L_Cmplx::set_parameters_DS_L(const int N_L, const double Tol_L)
137 {
138  //- print input parameters
139  vout.general(m_vl, " N_L = %d\n", N_L);
140  vout.general(m_vl, " Tol_L = %16.8e\n", Tol_L);
141 
142  //- range check
143  int err = 0;
144  err += ParameterCheck::non_negative(N_L);
145  err += ParameterCheck::square_non_zero(Tol_L);
146 
147  if (err) {
148  vout.crucial(m_vl, "Error at %s: parameter range check failed.\n", class_name.c_str());
149  exit(EXIT_FAILURE);
150  }
151 
152  //- store values
153  m_N_L = N_L;
154  m_Tol_L = Tol_L;
155 }
156 
157 
158 //====================================================================
160  int& Nconv, double& diff)
161 {
162  const double bnorm2 = b.norm2();
163  const int bsize = b.size();
164 
165  vout.paranoiac(m_vl, "%s: starts\n", class_name.c_str());
166  vout.paranoiac(m_vl, " norm of b = %16.8e\n", bnorm2);
167  vout.paranoiac(m_vl, " size of b = %d\n", bsize);
168 
169  bool is_converged = false;
170  int Nconv2 = 0;
171  double diff2 = 1.0; // superficial initialization
172  double rr;
173 
174  int Nconv_unit = 1;
175  // if (m_fopr->get_mode() == "DdagD" || m_fopr->get_mode() == "DDdag") {
176  // Nconv_unit = 2;
177  // }
178 
179  reset_field(b);
180 
181  if (m_use_init_guess) {
182  copy(m_s, xq); // s = xq;
183  } else {
184  copy(m_s, b); // s = b;
185  }
186  solve_init(b, rr);
187  Nconv2 += Nconv_unit;
188 
189  vout.detailed(m_vl, " iter: %8d %22.15e\n", Nconv2, rr / bnorm2);
190 
191 
192  for (int i_restart = 0; i_restart < m_Nrestart; i_restart++) {
193  for (int iter = 0; iter < m_Niter; iter++) {
194  if (rr / bnorm2 < m_Stop_cond) break;
195 
196  solve_step(rr);
197  Nconv2 += 2 * Nconv_unit * m_N_L_prev;
198 
199  vout.paranoiac(m_vl, " iter,N_L: %8d %8d\n", Nconv2, m_N_L_prev);
200  vout.detailed(m_vl, " iter: %8d %22.15e\n", Nconv2, rr / bnorm2);
201  }
202 
203  //- calculate true residual
204  m_fopr->mult(m_s, m_x); // s = m_fopr->mult(x);
205  axpy(m_s, -1.0, b); // s -= b;
206  diff2 = m_s.norm2();
207 
208  if (diff2 / bnorm2 < m_Stop_cond) {
209  vout.detailed(m_vl, "%s: converged.\n", class_name.c_str());
210  vout.detailed(m_vl, " iter(final): %8d %22.15e\n", Nconv2, diff2 / bnorm2);
211 
212  is_converged = true;
213 
214  m_Nrestart_count = i_restart;
215  m_Nconv_count = Nconv2;
216 
217  break;
218  } else {
219  //- restart with new approximate solution
220  copy(m_s, m_x); // s = x;
221  solve_init(b, rr);
222 
223  vout.detailed(m_vl, "%s: restarted.\n", class_name.c_str());
224  }
225 
226  if (is_converged) break;
227  }
228 
229 
230  if (diff2 / bnorm2 > m_Stop_cond) {
231  vout.crucial(m_vl, "Error at %s: not converged.\n", class_name.c_str());
232  vout.crucial(m_vl, " iter(final): %8d %22.15e\n", Nconv2, diff2 / bnorm2);
233  exit(EXIT_FAILURE);
234  }
235 
236 
237  copy(xq, m_x); // xq = x;
238 
239 #pragma omp barrier
240 #pragma omp master
241  {
242  diff = sqrt(diff2 / bnorm2);
243  Nconv = Nconv2;
244  }
245 #pragma omp barrier
246 }
247 
248 
249 //====================================================================
251 {
252 #pragma omp barrier
253 #pragma omp master
254  {
255  const int Nin = b.nin();
256  const int Nvol = b.nvol();
257  const int Nex = b.nex();
258 
259  if ((m_s.nin() != Nin) || (m_s.nvol() != Nvol) || (m_s.nex() != Nex)) {
260  m_s.reset(Nin, Nvol, Nex);
261  m_x.reset(Nin, Nvol, Nex);
262  m_r_init.reset(Nin, Nvol, Nex);
263  m_v.reset(Nin, Nvol, Nex);
264  }
265 
266  m_u.resize(m_N_L + 1);
267  m_r.resize(m_N_L + 1);
268 
269  for (int i = 0; i < m_N_L + 1; ++i) {
270  m_u[i].reset(Nin, Nvol, Nex);
271  m_r[i].reset(Nin, Nvol, Nex);
272  }
273  }
274 #pragma omp barrier
275 
276  vout.paranoiac(m_vl, " %s: field size reset.\n", class_name.c_str());
277 }
278 
279 
280 //====================================================================
282 {
283  copy(m_x, m_s); // x = s;
284 
285  for (int i = 0; i < m_N_L + 1; ++i) {
286  m_r[i].set(0.0); // r[i] = 0.0;
287  m_u[i].set(0.0); // u[i] = 0.0;
288  }
289 
290  // r[0] = b - A x_0
291  m_fopr->mult(m_v, m_s); // m_v = m_fopr->mult(s);
292  copy(m_r[0], b); // r[0] = b;
293  axpy(m_r[0], -1.0, m_v); // r[0] -= m_v;
294 
295  copy(m_r_init, m_r[0]); // r_init = r[0];
296  rr = m_r[0].norm2(); // rr = r[0] * r[0];
297 
298 #pragma omp barrier
299 #pragma omp master
300  {
301  m_rho_prev = cmplx(-1.0, 0.0);
302 
303  // NB. m_alpha_prev = 0.0 \neq 1.0
304  m_alpha_prev = cmplx(0.0, 0.0);
305 
306  m_rr_prev = rr;
307  m_N_L_prev = m_N_L;
308  }
309 #pragma omp barrier
310 }
311 
312 
313 //====================================================================
315 {
316  dcomplex rho_prev2 = m_rho_prev;
317  dcomplex alpha_prev2 = m_alpha_prev;
318 
319  int N_L_tmp = 0; // superficial initialization
320  dcomplex c_Rayleigh_prev = cmplx(0.0, 0.0);
321 
322  bool is_converged_L = false;
323 
324 
325  for (int j = 0; j < m_N_L_prev; ++j) {
326  if (!is_converged_L) {
327  dcomplex rho = dotc(m_r[j], m_r_init); // rho = r[j] * r_init;
328  rho = conj(rho);
329 
330  dcomplex beta = alpha_prev2 * (rho / rho_prev2);
331 
332  rho_prev2 = rho;
333 
334  for (int i = 0; i < j + 1; ++i) {
335  aypx(-beta, m_u[i], m_r[i]); // u[i] = - beta * u[i] + r[i];
336  }
337 
338  m_fopr->mult(m_u[j + 1], m_u[j]); // u[j+1] = m_fopr->mult(u[j]);
339 
340  dcomplex gamma = dotc(m_u[j + 1], m_r_init);
341 
342  alpha_prev2 = rho_prev2 / conj(gamma);
343 
344  for (int i = 0; i < j + 1; ++i) {
345  axpy(m_r[i], -alpha_prev2, m_u[i + 1]); // r[i] -= alpha_prev * u[i+1];
346  }
347 
348  m_fopr->mult(m_r[j + 1], m_r[j]); // r[j+1] = m_fopr->mult(r[j]);
349 
350  axpy(m_x, alpha_prev2, m_u[0]); // x += alpha_prev * u[0];
351 
352 
353  //- calculate the Rayleigh quotient for N_L_tmp
354 
355  // dcomplex c_Rayleigh = (r[j] * r[j+1]) / (r[j] * r[j]);
356  double r_tmp = m_r[j].norm2(); // r_tmp = r[j] * r[j];
357  dcomplex c_Rayleigh = dotc(m_r[j], m_r[j + 1]) / r_tmp; // c_Rayleigh = r[j] * r[j+1] / r_tmp;
358 
359  dcomplex c_E = (c_Rayleigh - c_Rayleigh_prev) / c_Rayleigh;
360 
361  // #pragma omp master
362  c_Rayleigh_prev = c_Rayleigh;
363  // #pragma omp barrier
364 
365  N_L_tmp = j + 1;
366 
367  // vout.paranoiac(m_vl, "N_L_tmp,abs(c_E),m_Tol_L = %d %f %f\n",N_L_tmp,abs(c_E),m_Tol_L);
368 
369  if (abs(c_E) < m_Tol_L) {
370  // vout.paranoiac(m_vl, "N_L_tmp = %d\n",N_L_tmp);
371  // break;
372  is_converged_L = true;
373  }
374  }
375  }
376 
377 
378  std::vector<double> sigma(m_N_L + 1);
379  std::vector<dcomplex> gamma_prime(m_N_L + 1);
380 
381  // NB. tau(m_N_L,m_N_L+1), not (m_N_L+1,m_N_L+1)
382  std::vector<dcomplex> tau(m_N_L * (m_N_L + 1));
383 
384  const double sigma_0 = m_r[0].norm2();
385 
386  for (int j = 1; j < N_L_tmp + 1; ++j) {
387  for (int i = 1; i < j; ++i) {
388  int ij = index_ij(i, j);
389 
390  dcomplex r_ji = dotc(m_r[j], m_r[i]);
391  tau[ij] = conj(r_ji) / sigma[i]; // tau[ij] = (r[j] * r[i]) / sigma[i];
392  axpy(m_r[j], -tau[ij], m_r[i]); // r[j] -= tau[ij] * r[i];
393  }
394 
395  sigma[j] = m_r[j].norm2(); // sigma[j] = r[j] * r[j];
396 
397  dcomplex r_0j = dotc(m_r[0], m_r[j]);
398  gamma_prime[j] = conj(r_0j) / sigma[j]; // gamma_prime[j] = (r[0] * r[j]) / sigma[j];
399 
400  //- a prescription to improve stability of BiCGStab(L)
401  double abs_rho = abs(r_0j) / sqrt(sigma[j] * sigma_0);
402  if (abs_rho < m_Omega_tolerance) {
403  gamma_prime[j] *= m_Omega_tolerance / abs_rho;
404  }
405  }
406 
407 
408  std::vector<dcomplex> gamma(m_N_L + 1);
409  gamma[N_L_tmp] = gamma_prime[N_L_tmp];
410 
411  for (int j = N_L_tmp - 1; j > 0; --j) {
412  dcomplex c_tmp = cmplx(0.0, 0.0);
413 
414  for (int i = j + 1; i < N_L_tmp + 1; ++i) {
415  int ji = index_ij(j, i);
416  c_tmp += tau[ji] * gamma[i];
417  }
418 
419  gamma[j] = gamma_prime[j] - c_tmp;
420  }
421 
422 
423  // NB. gamma_double_prime(m_N_L), not (m_N_L+1)
424  std::vector<dcomplex> gamma_double_prime(m_N_L);
425 
426  for (int j = 1; j < N_L_tmp; ++j) {
427  dcomplex c_tmp = cmplx(0.0, 0.0);
428 
429  for (int i = j + 1; i < N_L_tmp; ++i) {
430  int ji = index_ij(j, i);
431  c_tmp += tau[ji] * gamma[i + 1];
432  }
433 
434  gamma_double_prime[j] = gamma[j + 1] + c_tmp;
435  }
436 
437 
438  axpy(m_x, gamma[1], m_r[0]); // x += gamma[ 1] * r[ 0];
439  axpy(m_r[0], -gamma_prime[N_L_tmp], m_r[N_L_tmp]); // r[0] -= gamma_prime[N_L_tmp] * r[N_L_tmp];
440  axpy(m_u[0], -gamma[N_L_tmp], m_u[N_L_tmp]); // u[0] -= gamma[ N_L_tmp] * u[N_L_tmp];
441 
442  for (int j = 1; j < N_L_tmp; ++j) {
443  axpy(m_x, gamma_double_prime[j], m_r[j]); // x += gamma_double_prime[j] * r[j];
444  axpy(m_r[0], -gamma_prime[j], m_r[j]); // r[0] -= gamma_prime[ j] * r[j];
445  axpy(m_u[0], -gamma[j], m_u[j]); // u[0] -= gamma[ j] * u[j];
446  }
447 
448  rr = m_r[0].norm2(); // rr = r[0] * r[0];
449 
450 
451  //- calculate the residual difference for N_L_prev
452  const double c_phi = abs(rr - m_rr_prev) / m_rr_prev;
453 
454  // NB. another criterion for c_phi is possible, m_Tol_phi /= m_Tol_L
455  if (c_phi < m_Tol_L) {
456  N_L_tmp = m_N_L;
457  }
458 
459 
460 #pragma omp barrier
461 #pragma omp master
462  {
463  m_rho_prev = rho_prev2;
464  m_alpha_prev = alpha_prev2;
465  m_N_L_prev = N_L_tmp;
466  m_rr_prev = rr;
467 
468  m_rho_prev *= -gamma_prime[N_L_tmp];
469  }
470 #pragma omp barrier
471 }
472 
473 
474 //====================================================================
476 {
477  const int NPE = CommonParameters::NPE();
478 
479  //- NB1 Nin = 2 * Nc * Nd, Nex = 1 for field_F
480  //- NB2 Nvol = CommonParameters::Nvol()/2 for eo
481  const int Nin = m_x.nin();
482  const int Nvol = m_x.nvol();
483  const int Nex = m_x.nex();
484 
485  const double gflop_fopr = m_fopr->flop_count();
486 
487  if (gflop_fopr < CommonParameters::epsilon_criterion()) {
488  vout.crucial(m_vl, "Warning at %s: no fopr->flop_count() is available, setting flop = 0\n", class_name.c_str());
489  return 0.0;
490  }
491 
492  const double gflop_axpy = (Nin * Nex * 2) * ((Nvol * NPE) / 1.0e+9);
493  const double gflop_dotc = (Nin * Nex * 4) * ((Nvol * NPE) / 1.0e+9);
494  const double gflop_norm = (Nin * Nex * 2) * ((Nvol * NPE) / 1.0e+9);
495 
496  const int N_L_prev_total = (m_Nconv_count - 1) / 2;
497 
498  const double gflop_init = gflop_fopr + gflop_axpy + gflop_norm;
499  const double gflop_step_BiCG_part = 2 * N_L_prev_total * gflop_fopr
500  + 3 * N_L_prev_total * gflop_dotc
501  + (N_L_prev_total + 2 * m_N_L_part_count) * gflop_axpy
502  + N_L_prev_total * gflop_norm;
503  const double gflop_step_L_part = (m_N_L_part_count + N_L_prev_total) * gflop_dotc
504  + (m_N_L_part_count + 3 * N_L_prev_total) * gflop_axpy
505  + (N_L_prev_total + m_Niter_count) * gflop_norm;
506  const double gflop_step = gflop_step_BiCG_part + gflop_step_L_part;
507  const double gflop_true_residual = gflop_fopr + gflop_axpy + gflop_norm;
508 
509  const double gflop = gflop_norm + gflop_init + gflop_step + gflop_true_residual * (m_Nrestart_count + 1)
510  + gflop_init * m_Nrestart_count;
511 
512 
513  return gflop;
514 }
515 
516 
517 //====================================================================
518 //============================================================END=====
BridgeIO vout
Definition: bridgeIO.cpp:503
int fetch_bool(const string &key, bool &value) const
Definition: parameters.cpp:391
void detailed(const char *format,...)
Definition: bridgeIO.cpp:216
static double epsilon_criterion()
double norm2() const
Definition: field.cpp:592
void set_parameters_DS_L(const int N_L, const double Tol_L)
void general(const char *format,...)
Definition: bridgeIO.cpp:197
Container of Field-type object.
Definition: field.h:45
int fetch_double(const string &key, double &value) const
Definition: parameters.cpp:327
int nvol() const
Definition: field.h:127
int index_ij(const int i, const int j)
Class for parameters.
Definition: parameters.h:46
void copy(Field &y, const Field &x)
copy(y, x): y = x
Definition: field.cpp:532
void solve_init(const Field &, double &)
int square_non_zero(const double v)
int nin() const
Definition: field.h:126
dcomplex dotc(const Field &y, const Field &x)
Definition: field.cpp:155
int fetch_int(const string &key, int &value) const
Definition: parameters.cpp:346
virtual double flop_count()
returns the flop in giga unit
Definition: fopr.h:120
void aypx(const double a, Field &y, const Field &x)
aypx(y, a, x): y := a * y + x
Definition: field.cpp:612
int nex() const
Definition: field.h:128
void paranoiac(const char *format,...)
Definition: bridgeIO.cpp:235
void reset(const int Nin, const int Nvol, const int Nex, const element_type cmpl=Element_type::COMPLEX)
Definition: field.h:95
void axpy(Field &y, const double a, const Field &x)
axpy(y, a, x): y := a * x + y
Definition: field.cpp:319
void crucial(const char *format,...)
Definition: bridgeIO.cpp:178
void set_parameters_BiCGStab_series(const double Omega_tolerance)
virtual void mult(Field &, const Field &)=0
multiplies fermion operator to a given field (2nd argument)
int non_negative(const int v)
void set_parameters(const Parameters &params)
static void assert_single_thread(const std::string &class_name)
assert currently running on single thread.
string get_string(const string &key) const
Definition: parameters.cpp:221
void solve(Field &solution, const Field &source, int &Nconv, double &diff)
Bridge::VerboseLevel m_vl
Definition: solver.h:63
static VerboseLevel set_verbose_level(const std::string &str)
Definition: bridgeIO.cpp:131
int size() const
Definition: field.h:132