Bridge++  Ver. 1.2.x
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
solver_BiCGStab_DS_L_Cmplx.cpp
Go to the documentation of this file.
1 
15 
16 using std::valarray;
17 
18 
19 #ifdef USE_FACTORY
20 namespace {
21  Solver *create_object(Fopr *fopr)
22  {
23  return new Solver_BiCGStab_DS_L_Cmplx(fopr);
24  }
25 
26 
27  bool init = Solver::Factory::Register("BiCGStab_DS_L_Cmplx", create_object);
28 }
29 #endif
30 
31 //- parameter entries
32 namespace {
33  void append_entry(Parameters& param)
34  {
35  param.Register_int("maximum_number_of_iteration", 0);
36  param.Register_double("convergence_criterion_squared", 0.0);
37 
38  param.Register_int("number_of_orthonormal_vectors", 0);
39  param.Register_double("tolerance_for_DynamicSelection_of_L", 0);
40 
41  param.Register_string("verbose_level", "NULL");
42  }
43 
44 
45 #ifdef USE_PARAMETERS_FACTORY
46  bool init_param = ParametersFactory::Register("Solver.BiCGStab_DS_L_Cmplx", append_entry);
47 #endif
48 }
49 //- end
50 
51 //- parameters class
53 //- end
54 
55 const std::string Solver_BiCGStab_DS_L_Cmplx::class_name = "Solver_BiCGStab_DS_L_Cmplx";
56 
57 //====================================================================
59 {
60  const string str_vlevel = params.get_string("verbose_level");
61 
62  m_vl = vout.set_verbose_level(str_vlevel);
63 
64  //- fetch and check input parameters
65  int Niter;
66  double Stop_cond;
67  int N_L;
68  double Tol_L;
69 
70  int err = 0;
71  err += params.fetch_int("maximum_number_of_iteration", Niter);
72  err += params.fetch_double("convergence_criterion_squared", Stop_cond);
73  err += params.fetch_int("number_of_orthonormal_vectors", N_L);
74  err += params.fetch_double("tolerance_for_DynamicSelection_of_L", Tol_L);
75 
76  if (err) {
77  vout.crucial(m_vl, "%s: fetch error, input parameter not found.\n", class_name.c_str());
78  abort();
79  }
80 
81 
82  set_parameters(Niter, Stop_cond);
83  set_parameters_DS_L(N_L, Tol_L);
84 }
85 
86 
87 //====================================================================
88 void Solver_BiCGStab_DS_L_Cmplx::set_parameters(const int Niter, const double Stop_cond)
89 {
91 
92  //- print input parameters
93  vout.general(m_vl, "%s: input parameters\n", class_name.c_str());
94  vout.general(m_vl, " Niter = %d\n", Niter);
95  vout.general(m_vl, " Stop_cond = %16.8e\n", Stop_cond);
96 
97  //- range check
98  int err = 0;
99  err += ParameterCheck::non_negative(Niter);
100  err += ParameterCheck::square_non_zero(Stop_cond);
101 
102  if (err) {
103  vout.crucial(m_vl, "%s: parameter range check failed.\n", class_name.c_str());
104  abort();
105  }
106 
107  //- store values
108  m_Niter = Niter;
109  m_Stop_cond = Stop_cond;
110 }
111 
112 
113 //====================================================================
114 void Solver_BiCGStab_DS_L_Cmplx::set_parameters_DS_L(const int N_L, const double Tol_L)
115 {
116  //- print input parameters
117  vout.general(m_vl, " N_L = %d\n", N_L);
118  vout.general(m_vl, " Tol_L = %16.8e\n", Tol_L);
119 
120  //- range check
121  int err = 0;
122  err += ParameterCheck::non_negative(N_L);
123  err += ParameterCheck::square_non_zero(Tol_L);
124 
125  if (err) {
126  vout.crucial(m_vl, "%s: parameter range check failed.\n", class_name.c_str());
127  abort();
128  }
129 
130  //- store values
131  m_N_L = N_L;
132  m_Tol_L = Tol_L;
133 }
134 
135 
136 //====================================================================
138  int& Nconv, double& diff)
139 {
140  //#pragma omp parallel
141  {
142  double bnorm2 = b.norm2();
143  double snorm = 1.0 / bnorm2;
144  int bsize = b.size();
145  double rr;
146 
149 
150 #pragma omp master
151  {
152  vout.paranoiac(m_vl, "%s: starts\n", class_name.c_str());
153  vout.paranoiac(m_vl, " norm of b = %16.8e\n", bnorm2);
154  vout.paranoiac(m_vl, " size of b = %d\n", bsize);
155  }
156 #pragma omp barrier
157 
158 
159  reset_field(b);
160 
161 
162  // Nconv = -1;
163  int Nconv2 = 0;
164  copy(s, b); // s = b;
165 
166  solve_init(b, rr);
167 
168  bool is_converged = false;
169 
170 #pragma omp master
171  vout.detailed(m_vl, " iter: %8d %22.15e\n", 0, rr * snorm);
172 #pragma omp barrier
173 
174 
175  for (int iter = 0; iter < m_Niter; iter++) {
176  if (!is_converged) {
177  solve_step(rr);
178 
179  Nconv2 += 2 * N_L_prev;
180 
181 #pragma omp master
182  {
183  vout.detailed(m_vl, " iter,N_L: %8d %8d\n", Nconv2, N_L_prev);
184  vout.detailed(m_vl, " iter: %8d %22.15e\n", Nconv2, rr * snorm);
185  }
186 #pragma omp barrier
187 
188  if (rr * snorm < m_Stop_cond) {
189  m_fopr->mult(s, x); // s = m_fopr->mult(x);
190  axpy(s, -1.0, b); // s -= b;
191 
192  double diff2 = s.norm2();
194 
196  if (ith == 0) vout.detailed(m_vl, " iter0: %8d %22.15e\n", nth, diff2 * snorm);
197 
198  if (diff2 * snorm < m_Stop_cond) {
199  // NB. Nconv is calculated above.
200  // break;
201  is_converged = true;
202  }
203 
204  copy(s, x); // s = x;
205  solve_init(b, rr);
206 
208  if (ith == 0) vout.detailed(m_vl, " iter1: %8d %22.15e\n", nth, rr * snorm);
209  }
210  }
211  }
212 
213  m_fopr->mult(s, x); // s = m_fopr->mult(x);
214  axpy(s, -1.0, b); // s -= b;
215 
216  copy(xq, x); // xq = x;
217 
218  double diff2 = s.norm2();
219 
220 #pragma omp master
221  {
222  diff = sqrt(diff2);
223  Nconv = Nconv2;
224  }
225 #pragma omp barrier
226 
227  if (diff2 * snorm > m_Stop_cond) {
228 #pragma omp master
229  vout.crucial(m_vl, "%s: not converged.\n", class_name.c_str());
230 #pragma omp barrier
231  abort();
232  }
233  } // end of parallel region
234 }
235 
236 
237 //====================================================================
239 {
240 #pragma omp master
241  {
242  int Nin = b.nin();
243  int Nvol = b.nvol();
244  int Nex = b.nex();
245 
246  if ((s.nin() != Nin) || (s.nvol() != Nvol) || (s.nex() != Nex)) {
247  s.reset(Nin, Nvol, Nex);
248  x.reset(Nin, Nvol, Nex);
249 
250  r_init.reset(Nin, Nvol, Nex);
251 
252  v_tmp.reset(Nin, Nvol, Nex);
253 
254  vout.paranoiac(m_vl, "%s: field size reset.\n", class_name.c_str());
255  }
256 
257  u.resize(m_N_L + 1);
258  r.resize(m_N_L + 1);
259 
260  for (int i = 0; i < m_N_L + 1; ++i) {
261  u[i].reset(Nin, Nvol, Nex);
262  r[i].reset(Nin, Nvol, Nex);
263  }
264  }
265 #pragma omp barrier
266 }
267 
268 
269 //====================================================================
271 {
272  copy(x, s); // x = s;
273 
274  //- r[0] = b - A x_0
275  m_fopr->mult(v_tmp, s); // v_tmp = m_fopr->mult(s);
276  copy(r[0], b); // r[0] = b;
277  axpy(r[0], -1.0, v_tmp); // r[0] -= v_tmp;
278 
279  copy(r_init, r[0]); // r_init = r[0];
280  rr = r[0].norm2(); // rr = r[0] * r[0];
281 
282  u[0].set(0.0); // u[0] = 0.0;
283 
284  // NB. alpha_prev = 0.0 \neq 1.0
285 #pragma omp master
286  {
287  rho_prev = cmplx(-1.0, 0.0);
288  alpha_prev = cmplx(0.0, 0.0);
289 
290  N_L_prev = m_N_L;
291  }
292 #pragma omp barrier
293 }
294 
295 
296 //====================================================================
298 {
299  dcomplex rho_prev2 = rho_prev;
300  dcomplex alpha_prev2 = alpha_prev;
301 
302  int N_L_tmp = 0; // superficial initialization
303  dcomplex c_Rayleigh_prev = cmplx(0.0, 0.0);
304 
305  bool is_converged_L = false;
306 
307 
308  for (int j = 0; j < N_L_prev; ++j) {
309  if (!is_converged_L) {
310  dcomplex rho = dotc(r[j], r_init); // rho = r[j] * r_init;
311  rho = conj(rho);
312 
313  dcomplex beta = alpha_prev2 * (rho / rho_prev2);
314 
315  rho_prev2 = rho;
316 
317  for (int i = 0; i < j + 1; ++i) {
318  aypx(-beta, u[i], r[i]); // u[i] = - beta * u[i] + r[i];
319  }
320 
321  m_fopr->mult(u[j + 1], u[j]); // u[j+1] = m_fopr->mult(u[j]);
322 
323  dcomplex gamma = dotc(u[j + 1], r_init);
324  alpha_prev2 = rho_prev2 / conj(gamma);
325 
326  for (int i = 0; i < j + 1; ++i) {
327  axpy(r[i], -alpha_prev2, u[i + 1]); // r[i] -= alpha_prev * u[i+1];
328  }
329 
330  m_fopr->mult(r[j + 1], r[j]); // r[j+1] = m_fopr->mult(r[j]);
331 
332  axpy(x, alpha_prev2, u[0]); // x += alpha_prev * u[0];
333 
334 
335  //- calculate the Rayleigh quotient for N_L_tmp
336 
337  // dcomplex c_Rayleigh = (r[j] * r[j+1]) / (r[j] * r[j]);
338  double r_tmp = r[j].norm2(); // r_tmp = r[j] * r[j];
339  dcomplex c_Rayleigh = dotc(r[j], r[j + 1]) / r_tmp; // c_Rayleigh = r[j] * r[j+1] / r_tmp;
340 
341  dcomplex c_E = (c_Rayleigh - c_Rayleigh_prev) / c_Rayleigh;
342 
343  c_Rayleigh_prev = c_Rayleigh;
344 
345  N_L_tmp = j + 1;
346 
347  // vout.paranoiac(m_vl, "N_L_tmp,abs(c_E),m_Tol_L = %d %f %f\n",N_L_tmp,abs(c_E),m_Tol_L);
348 
349  if (abs(c_E) < m_Tol_L) {
350  // vout.paranoiac(m_vl, "N_L_tmp = %d\n",N_L_tmp);
351 
352  // break;
353  is_converged_L = true;
354  }
355  }
356  }
357 
358 
359  valarray<double> sigma(m_N_L + 1);
360  valarray<dcomplex> gamma_prime(m_N_L + 1);
361 
362  // NB. tau(m_N_L,m_N_L+1), not (m_N_L+1,m_N_L+1)
363  valarray<dcomplex> tau(m_N_L * (m_N_L + 1));
364  int ij, ji;
365 
366  for (int j = 1; j < N_L_tmp + 1; ++j) {
367  for (int i = 1; i < j; ++i) {
368  ij = index_ij(i, j);
369 
370  dcomplex r_ji = dotc(r[j], r[i]);
371  tau[ij] = conj(r_ji) / sigma[i]; // tau[ij] = (r[j] * r[i]) / sigma[i];
372  axpy(r[j], -tau[ij], r[i]); // r[j] -= tau[ij] * r[i];
373  }
374 
375  sigma[j] = r[j].norm2(); // sigma[j] = r[j] * r[j];
376 
377  dcomplex r_0j = dotc(r[0], r[j]);
378  gamma_prime[j] = conj(r_0j) / sigma[j]; // gamma_prime[j] = (r[0] * r[j]) / sigma[j];
379  }
380 
381 
382  valarray<dcomplex> gamma(m_N_L + 1);
383  dcomplex c_tmp;
384 
385  gamma[N_L_tmp] = gamma_prime[N_L_tmp];
386 
387  for (int j = N_L_tmp - 1; j > 0; --j) {
388  c_tmp = cmplx(0.0, 0.0);
389 
390  for (int i = j + 1; i < N_L_tmp + 1; ++i) {
391  ji = index_ij(j, i);
392  c_tmp += tau[ji] * gamma[i];
393  }
394 
395  gamma[j] = gamma_prime[j] - c_tmp;
396  }
397 
398 
399  // NB. gamma_double_prime(m_N_L), not (m_N_L+1)
400  valarray<dcomplex> gamma_double_prime(m_N_L);
401 
402  for (int j = 1; j < N_L_tmp; ++j) {
403  c_tmp = cmplx(0.0, 0.0);
404 
405  for (int i = j + 1; i < N_L_tmp; ++i) {
406  ji = index_ij(j, i);
407  c_tmp += tau[ji] * gamma[i + 1];
408  }
409 
410  gamma_double_prime[j] = gamma[j + 1] + c_tmp;
411  }
412 
413 
414  axpy(x, gamma[1], r[0]); // x += gamma[ 1] * r[ 0];
415  axpy(r[0], -gamma_prime[N_L_tmp], r[N_L_tmp]); // r[0] -= gamma_prime[N_L_tmp] * r[N_L_tmp];
416  axpy(u[0], -gamma[N_L_tmp], u[N_L_tmp]); // u[0] -= gamma[ N_L_tmp] * u[N_L_tmp];
417 
418  for (int j = 1; j < N_L_tmp; ++j) {
419  axpy(x, gamma_double_prime[j], r[j]); // x += gamma_double_prime[j] * r[j];
420  axpy(r[0], -gamma_prime[j], r[j]); // r[0] -= gamma_prime[ j] * r[j];
421  axpy(u[0], -gamma[j], u[j]); // u[0] -= gamma[ j] * u[j];
422  }
423 
424  rr = r[0].norm2(); // rr = r[0] * r[0];
425 
426  // #pragma omp master
428  if (ith == 0)
429  {
430  rho_prev = rho_prev2;
431  alpha_prev = alpha_prev2;
432  N_L_prev = N_L_tmp;
433  rho_prev *= -gamma_prime[N_L_tmp];
434  }
435 #pragma omp barrier
436 }
437 
438 
439 //====================================================================
440 //============================================================END=====
void set_parameters_DS_L(const int N_L, const double Tol_L)
BridgeIO vout
Definition: bridgeIO.cpp:207
void detailed(const char *format,...)
Definition: bridgeIO.cpp:50
static int get_num_threads()
returns available number of threads.
void Register_string(const string &, const string &)
Definition: parameters.cpp:352
void solve_init(const Field &, double &)
double norm2() const
Definition: field.cpp:469
virtual const Field mult(const Field &)=0
multiplies fermion operator to a given field and returns the resultant field.
BiCGStab(DS_L) algorithm.
void general(const char *format,...)
Definition: bridgeIO.cpp:38
void Register_int(const string &, const int)
Definition: parameters.cpp:331
Container of Field-type object.
Definition: field.h:37
int nvol() const
Definition: field.h:101
Class for parameters.
Definition: parameters.h:40
void copy(Field &y, const Field &x)
copy(y, x): y = x
Definition: field.cpp:409
static int get_thread_id()
returns thread id.
void solve(Field &solution, const Field &source, int &Nconv, double &diff)
int square_non_zero(const double v)
Definition: checker.cpp:41
int nin() const
Definition: field.h:100
dcomplex dotc(const Field &y, const Field &x)
Definition: field.cpp:98
void reset(const int Nin, const int Nvol, const int Nex, const element_type cmpl=COMPLEX)
Definition: field.h:82
static void sync_barrier_all()
barrier among all the threads and nodes.
void aypx(const double a, Field &y, const Field &x)
aypx(y, a, x): y := a * y + x
Definition: field.cpp:489
int nex() const
Definition: field.h:102
void paranoiac(const char *format,...)
Definition: bridgeIO.cpp:62
void axpy(Field &y, const double a, const Field &x)
axpy(y, a, x): y := a * x + y
Definition: field.cpp:193
void crucial(const char *format,...)
Definition: bridgeIO.cpp:26
static const std::string class_name
Base class for linear solver class family.
Definition: solver.h:37
static bool Register(const std::string &realm, const creator_callback &cb)
int non_negative(const int v)
Definition: checker.cpp:21
void Register_double(const string &, const double)
Definition: parameters.cpp:324
void set_parameters(const Parameters &params)
Base class of fermion operator family.
Definition: fopr.h:39
int fetch_double(const string &key, double &val) const
Definition: parameters.cpp:124
string get_string(const string &key) const
Definition: parameters.cpp:85
int fetch_int(const string &key, int &val) const
Definition: parameters.cpp:141
Bridge::VerboseLevel m_vl
Definition: solver.h:56
static VerboseLevel set_verbose_level(const std::string &str)
Definition: bridgeIO.cpp:191
static void assert_single_thread(const std::string &classname)
assert currently running on single thread.
int size() const
Definition: field.h:106