Bridge++  Ver. 2.0.2
fprop_alt_Standard_eo_Richardson-tmpl.h
Go to the documentation of this file.
1 
10 template<typename AFIELD_d, typename AFIELD_f>
12  = "Fprop_alt_Standard_eo_Richardson";
13 //====================================================================
14 template<typename AFIELD_d, typename AFIELD_f>
16  const Parameters& params_solver)
17 {
18  // this constructor assumes that the factories are available.
19  vout.general(m_vl, "\n");
20  vout.general(m_vl, "%s: being setup.\n", class_name.c_str());
21 
22  string fopr_type = params_fopr.get_string("fermion_type");
23  fopr_type += "_eo";
24 
25  m_kernel_d = 0;
26  m_fopr_d = AFopr<AFIELD_d>::New(fopr_type, params_fopr);
27 
28  m_kernel_f = 0;
29  m_fopr_f = AFopr<AFIELD_f>::New(fopr_type, params_fopr);
30 
31  m_dr_smear = 0;
32 
33  // solver parameters for preconditioner solver
34  Parameters params_solver2;
35  int max_iter = params_solver.get_int("maximum_number_of_iteration");
36  int max_restart = params_solver.get_int("maximum_number_of_restart");
37  params_solver2.set_int("maximum_number_of_iteration", max_iter * max_restart);
38  params_solver2.set_int("maximum_number_of_restart", 1);
39  params_solver2.set_double("convergence_criterion_squared", 1.0e-8);
40  string str_vlevel = params_solver.get_string("verbose_level");
41  params_solver2.set_string("verbose_level", str_vlevel);
42  params_solver2.set_string("initial_guess_mode", "ZERO");
43 
44  string solver_type = params_solver.get_string("solver_type");
45  m_solver_prec = ASolver<AFIELD_f>::New(solver_type, m_fopr_f);
46  m_solver_prec->set_parameters(params_solver2);
47 
48  m_precond
49  = new APrecond_Mixedprec<AFIELD_d, AFIELD_f>(m_solver_prec);
50 
51  m_solver
52  = new ASolver_Richardson<AFIELD_d>(m_fopr_d, m_precond);
53  m_solver->set_parameters(params_solver);
54 
55  m_use_DdagD = false;
56  if (solver_type == "CGNR") {
57  m_use_DdagD = true;
58  }
59  reset_performance();
60 
61 
62  int nin = m_fopr_d->field_nin();
63  int nvol2 = m_fopr_d->field_nvol();
64  int nex = m_fopr_d->field_nex();
65  int nvol = 2 * nvol2;
66 
67  m_axq.reset(nin, nvol, nex);
68  m_abq.reset(nin, nvol, nex);
69 
70  m_be.reset(nin, nvol2, nex);
71  m_bo.reset(nin, nvol2, nex);
72  m_xe.reset(nin, nvol2, nex);
73  m_xo.reset(nin, nvol2, nex);
74  m_y1.reset(nin, nvol2, nex);
75  m_y2.reset(nin, nvol2, nex);
76  m_y1f.reset(nin, nvol2, nex);
77  m_y2f.reset(nin, nvol2, nex);
78 
79 
80  vout.general(m_vl, "%s: setup finished.\n", class_name.c_str());
81 }
82 
83 
84 //====================================================================
85 template<typename AFIELD_d, typename AFIELD_f>
87  const Parameters& params_solver,
88  Director_Smear *dr_smear)
89 {
90  // this constructor assumes that the factories are available.
91  vout.general(m_vl, "\n");
92  vout.general(m_vl, "%s: being setup.\n", class_name.c_str());
93 
94  m_dr_smear = dr_smear;
95 
96  string fopr_type = params_fopr.get_string("fermion_type");
97  fopr_type += "_eo";
98 
99  m_kernel_d = AFopr<AFIELD_d>::New(fopr_type, params_fopr);
100  m_fopr_d = new AFopr_Smeared<AFIELD_d>(m_kernel_d, m_dr_smear);
101 
102  m_kernel_f = AFopr<AFIELD_f>::New(fopr_type, params_fopr);
103  m_fopr_f = new AFopr_Smeared<AFIELD_f>(m_kernel_f, m_dr_smear);
104 
105  // solver parameters for preconditioner solver
106  Parameters params_solver2;
107  params_solver2.set_int("maximum_number_of_iteration", 200);
108  params_solver2.set_int("maximum_number_of_restart", 1);
109  params_solver2.set_double("convergence_criterion_squared", 2.5e-8);
110  string str_vlevel = params_solver.get_string("verbose_level");
111  params_solver2.set_string("verbose_level", str_vlevel);
112  params_solver2.set_string("initial_guess_mode", "ZERO");
113 
114  string solver_type = params_solver.get_string("solver_type");
115  m_solver_prec = ASolver<AFIELD_f>::New(solver_type, m_fopr_f);
116  m_solver_prec->set_parameters(params_solver2);
117 
118  m_precond
119  = new APrecond_Mixedprec<AFIELD_d, AFIELD_f>(m_solver_prec);
120 
121  m_solver = new
122  ASolver_Richardson<AFIELD_d>(m_fopr_d, m_precond);
123  m_solver->set_parameters(params_solver);
124 
125  m_use_DdagD = false;
126  if (solver_type == "CGNR") {
127  m_use_DdagD = true;
128  }
129  reset_performance();
130 
131  int nin = m_fopr_d->field_nin();
132  int nvol2 = m_fopr_d->field_nvol();
133  int nex = m_fopr_d->field_nex();
134  int nvol = 2 * nvol2;
135 
136  m_axq.reset(nin, nvol, nex);
137  m_abq.reset(nin, nvol, nex);
138 
139  m_be.reset(nin, nvol2, nex);
140  m_bo.reset(nin, nvol2, nex);
141  m_xe.reset(nin, nvol2, nex);
142  m_xo.reset(nin, nvol2, nex);
143  m_y1.reset(nin, nvol2, nex);
144  m_y2.reset(nin, nvol2, nex);
145  m_y1f.reset(nin, nvol2, nex);
146  m_y2f.reset(nin, nvol2, nex);
147 
148  vout.general(m_vl, "%s: setup finished.\n", class_name.c_str());
149  fflush(0);
150  exit(EXIT_SUCCESS);
151 }
152 
153 
154 //====================================================================
155 template<typename AFIELD_d, typename AFIELD_f>
157 {
158  delete m_solver;
159  delete m_precond;
160  delete m_solver_prec;
161  delete m_fopr_f;
162  delete m_fopr_d;
163  if (m_kernel_d != 0) delete m_kernel_d;
164  if (m_kernel_f != 0) delete m_kernel_f;
165 }
166 
167 
168 //====================================================================
169 template<typename AFIELD_d, typename AFIELD_f>
171 {
172  m_fopr_d->set_config(U);
173  m_fopr_f->set_config(U);
174 }
175 
176 
177 //====================================================================
178 template<typename AFIELD_d, typename AFIELD_f>
180  int& nconv, double& diff)
181 {
182  vout.paranoiac(m_vl, "%s: invert is called.\n", class_name.c_str());
183  vout.paranoiac(m_vl, "mode = %s.\n", m_mode.c_str());
184 
185  if (m_mode == "D") {
186  invert_D(xq, b, nconv, diff);
187  } else if (m_mode == "DdagD") {
188  invert_DdagD(xq, b, nconv, diff);
189  } else {
190  vout.crucial(m_vl, "%s: unsupported mode: %s\n",
191  class_name.c_str(), m_mode.c_str());
192  exit(EXIT_FAILURE);
193  }
194 }
195 
196 
197 //====================================================================
198 template<typename AFIELD_d, typename AFIELD_f>
200  int& nconv, double& diff)
201 {
202  vout.paranoiac(m_vl, "%s: invert is called.\n", class_name.c_str());
203  vout.paranoiac(m_vl, "mode = %s.\n", m_mode.c_str());
204 
205  if (m_mode == "D") {
206  invert_D(xq, b, nconv, diff);
207  } else if (m_mode == "DdagD") {
208  invert_DdagD(xq, b, nconv, diff);
209  } else {
210  vout.crucial(m_vl, "%s: unsupported mode: %s\n",
211  class_name.c_str(), m_mode.c_str());
212  exit(EXIT_FAILURE);
213  }
214 }
215 
216 
217 //====================================================================
218 template<typename AFIELD_d, typename AFIELD_f>
220  Field& xq, const Field& b,
221  int& nconv, double& diff)
222 {
224  vout.paranoiac("invert_D start.\n");
225 #pragma omp parallel
226  {
227  Timer timer;
228  timer.reset();
229  timer.start();
230 
232 
233  if (m_fopr_d->needs_convert()) {
234  vout.detailed(m_vl, "convert required.\n");
235  m_fopr_d->convert(m_abq, b);
236  } else {
237  vout.detailed(m_vl, "convert not required.\n");
238  convert(index_alt, m_abq, b);
239  }
240 
241 #pragma omp barrier
242  timer.stop();
243  invert_D(m_axq, m_abq, nconv, diff);
244  timer.start();
245 #pragma omp barrier
246  if (m_fopr_d->needs_convert()) {
247  m_fopr_d->reverse(xq, m_axq);
248  } else {
249  reverse(index_alt, xq, m_axq);
250  }
251  timer.stop();
252 #pragma omp master
253  {
254  m_elapsed_time += timer.elapsed_sec();
255  }
256  } // omp parallel
257 }
258 
259 
260 //====================================================================
261 template<typename AFIELD_d, typename AFIELD_f>
263  Field& xq, const Field& b,
264  int& nconv, double& diff)
265 {
266 #pragma omp parallel
267  {
268  Timer timer;
269  timer.reset();
270  timer.start();
271 
273  if (m_fopr_d->needs_convert()) {
274  vout.detailed(m_vl, "convert required.\n");
275  m_fopr_d->convert(m_abq, b);
276  } else {
277  vout.detailed(m_vl, "convert not required.\n");
278  convert(index_alt, m_abq, b);
279  }
280 
281 #pragma omp barrier
282  timer.stop();
283  invert_DdagD(m_axq, m_abq, nconv, diff);
284  timer.start();
285 #pragma omp barrier
286  if (m_fopr_d->needs_convert()) {
287  m_fopr_d->reverse(xq, m_axq);
288  } else {
289  reverse(index_alt, xq, m_axq);
290  }
291 #pragma omp master
292  {
293  m_elapsed_time += timer.elapsed_sec();
294  }
295  } // omp parallel
296 }
297 
298 
299 //====================================================================
300 template<typename AFIELD_d, typename AFIELD_f>
302  AFIELD_d& xq, const AFIELD_d& b,
303  int& nconv, double& diff)
304 {
305  // ThreadManager::assert_single_thread(class_name);
306  vout.paranoiac("invert_D start.\n");
307 
308 #pragma omp master
309  {
310  m_timer.reset();
311  m_timer.start();
312  }
313 
316 
317  index_eo.split(m_be, m_bo, b);
318 #pragma omp barrier
319 
320  invert_De(m_xe, m_xo, m_be, m_bo, nconv, diff);
321 
322 #pragma omp barrier
323  index_eo.merge(xq, m_xe, m_xo);
324 
325 #pragma omp master
326  {
327  m_timer.stop();
328  m_elapsed_time += m_timer.elapsed_sec();
329  m_flop_count += m_solver->flop_count();
330  }
331 }
332 
333 
334 //====================================================================
335 template<typename AFIELD_d, typename AFIELD_f>
337  AFIELD_d& xq, const AFIELD_d& b,
338  int& nconv, double& diff)
339 {
341  vout.paranoiac("invert_DdagD start.\n");
342 
343 #pragma omp master
344  {
345  m_timer.reset();
346  m_timer.start();
347  }
348 
350  index_eo.split(m_be, m_bo, b);
351 #pragma omp barrier
352 
353  m_fopr_d->mult_gm5(m_y1, m_be);
354  m_fopr_d->mult_gm5(m_y2, m_bo);
355 #pragma omp barrier
356  int nconv1;
357  double diff1;
358  invert_De(m_xe, m_xo, m_y1, m_y2, nconv1, diff1);
359 #pragma omp barrier
360 
361  nconv = nconv1;
362  diff = diff1;
363 
364  m_fopr_d->mult_gm5(m_y1, m_xe);
365  m_fopr_d->mult_gm5(m_y2, m_xo);
366 
367  invert_De(m_xe, m_xo, m_y1, m_y2, nconv1, diff1);
368 #pragma omp barrier
369  nconv += nconv1;
370  diff += diff1;
371 
372  index_eo.merge(xq, m_xe, m_xo);
373 
374 #pragma omp master
375  {
376  m_timer.stop();
377  m_elapsed_time += m_timer.elapsed_sec();
378  m_flop_count += m_solver->flop_count();
379  }
380 }
381 
382 
383 //====================================================================
384 template<typename AFIELD_d, typename AFIELD_f>
386  AFIELD_d& xe, AFIELD_d& xo,
387  AFIELD_d& be, AFIELD_d& bo,
388  int& nconv, double& diff)
389 {
390  // set even source vector.
391 #pragma omp barrier
392 
393  m_fopr_d->mult(m_y1, bo, "Doo_inv");
394 #pragma omp barrier
395 
396  m_fopr_d->mult(m_y2, m_y1, "Deo");
397 #pragma omp barrier
398 
399  axpy(be, real_t(-1.0), m_y2);
400 #pragma omp barrier
401 
402  m_fopr_d->mult(m_y1, be, "Dee_inv");
403 
404  if (m_use_DdagD) {
405  m_fopr_d->set_mode("Ddag");
406  m_fopr_d->mult(m_y2, m_y1);
407  m_fopr_d->set_mode("DdagD");
408  m_fopr_f->set_mode("DdagD");
409  m_solver->solve(xe, m_y2, nconv, diff);
410  } else {
411  m_fopr_d->set_mode("D");
412  m_fopr_f->set_mode("D");
413  m_solver->solve(xe, m_y1, nconv, diff);
414  }
415 #pragma omp barrier
416 
417  m_fopr_d->mult(m_y1, xe, "Doe");
418 #pragma omp barrier
419 
420  aypx(real_t(-1.0), m_y1, bo);
421 #pragma omp barrier
422 
423  m_fopr_d->mult(xo, m_y1, "Doo_inv");
424 #pragma omp barrier
425 }
426 
427 
428 //====================================================================
429 template<typename AFIELD_d, typename AFIELD_f>
431 {
432  return m_solver->flop_count();
433 }
434 
435 
436 //====================================================================
437 template<typename AFIELD_d, typename AFIELD_f>
439 {
440  m_flop_count = 0.0;
441  m_elapsed_time = 0.0;
442 }
443 
444 
445 //====================================================================
446 template<typename AFIELD_d, typename AFIELD_f>
448 {
449  double flops = m_flop_count / m_elapsed_time;
450  double gflops = flops * 1.0e-9;
451 
452  vout.general(m_vl, "\n");
453  vout.general(m_vl, "%s: solver performance:\n", class_name.c_str());
454  vout.general(m_vl, " Elapsed time = %14.6f sec\n", m_elapsed_time);
455  vout.general(m_vl, " Flop(total) = %18.0f\n", m_flop_count);
456  vout.general(m_vl, " Performance = %11.3f GFlops\n", gflops);
457 }
458 
459 
460 //====================================================================
461 template<typename AFIELD_d, typename AFIELD_f>
463  const std::string mode,
464  const int Nrepeat)
465 {
466  int nin = m_fopr_d->field_nin();
467  int nvol = m_fopr_d->field_nvol();
468  int nex = m_fopr_d->field_nex();
469 
470  unique_ptr<Timer> timer(new Timer);
471 
472  std::string mode_prev_d = m_fopr_d->get_mode();
473  std::string mode_prev_f = m_fopr_f->get_mode();
474 
475  m_fopr_d->set_mode(mode);
476  m_fopr_f->set_mode(mode);
477 
478  {
479  // AFIELD_d axq(nin, nvol, nex), abq(nin, nvol, nex);
480 #pragma omp parallel
481  {
482  m_y1.set(0.0);
483  }
484  m_y1.set(0, 1.0);
485 
486  timer->start();
487 
488 #pragma omp parallel
489  {
490  for (int i = 0; i < Nrepeat; ++i) {
491  m_fopr_d->mult(m_y2, m_y1);
492  m_fopr_d->mult(m_y1, m_y2);
493  }
494  }
495 
496  timer->stop();
497  }
498 
499  double flop_fopr = m_fopr_d->flop_count();
500  double flop_total = flop_fopr * double(2 * Nrepeat);
501 
502  double elapsed_time = timer->elapsed_sec();
503  double flops = flop_total / elapsed_time;
504  double gflops = flops * 1.0e-9;
505 
506  vout.general(m_vl, "\n");
507  vout.general(m_vl, "%s: mult performance:\n", class_name.c_str());
508  vout.general(m_vl, " mult mode = %s\n", mode.c_str());
509  vout.general(m_vl, " Double precision:\n");
510  vout.general(m_vl, " Elapsed time = %14.6f sec\n", elapsed_time);
511  vout.general(m_vl, " Flop(Fopr) = %18.0f\n", flop_fopr);
512  vout.general(m_vl, " Flop(total) = %18.0f\n", flop_total);
513  vout.general(m_vl, " Performance = %11.3f GFlops\n", gflops);
514 
515  {
516  // AFIELD_f axq(nin, nvol, nex), abq(nin, nvol, nex);
517 #pragma omp parallel
518  {
519  m_y1f.set(0.0);
520  }
521  m_y2f.set(0, 1.0);
522 
523  timer->reset();
524  timer->start();
525 
526 #pragma omp parallel
527  {
528  for (int i = 0; i < Nrepeat; ++i) {
529  m_fopr_f->mult(m_y2f, m_y1f);
530  m_fopr_f->mult(m_y1f, m_y2f);
531  }
532  }
533 
534  timer->stop();
535  }
536 
537  flop_fopr = m_fopr_f->flop_count();
538  flop_total = flop_fopr * double(2 * Nrepeat);
539 
540  elapsed_time = timer->elapsed_sec();
541  flops = flop_total / elapsed_time;
542  gflops = flops * 1.0e-9;
543 
544  vout.general(m_vl, " Single precision:\n");
545  vout.general(m_vl, " Elapsed time = %14.6f sec\n", elapsed_time);
546  vout.general(m_vl, " Flop(Fopr) = %18.0f\n", flop_fopr);
547  vout.general(m_vl, " Flop(total) = %18.0f\n", flop_total);
548  vout.general(m_vl, " Performance = %11.3f GFlops\n", gflops);
549 
550  m_fopr_d->set_mode(mode_prev_d);
551  m_fopr_f->set_mode(mode_prev_f);
552 }
553 
554 
555 //============================================================END=====
Parameters::set_string
void set_string(const string &key, const string &value)
Definition: parameters.cpp:39
AFopr
Definition: afopr.h:48
ASolver
Definition: asolver.h:23
Fprop_alt_Standard_eo_Richardson::invert_De
void invert_De(AFIELD_d &, AFIELD_d &, AFIELD_d &, AFIELD_d &, int &, double &)
Definition: fprop_alt_Standard_eo_Richardson-tmpl.h:385
Fprop_alt_Standard_eo_Richardson::set_config
void set_config(Field *)
Definition: fprop_alt_Standard_eo_Richardson-tmpl.h:170
Parameters
Class for parameters.
Definition: parameters.h:46
AIndex_lex
Definition: aindex_lex_base.h:17
Fprop_alt_Standard_eo_Richardson
Get quark propagator for Fopr with lexical site index: alternative version.
Definition: fprop_alt_Standard_eo_Richardson.h:31
Parameters::set_double
void set_double(const string &key, const double value)
Definition: parameters.cpp:33
convert
void convert(INDEX &index, AFIELD &v, const Field &w)
Definition: afield-inc.h:129
AField
Definition: afield_base.h:16
Parameters::get_int
int get_int(const string &key) const
Definition: parameters.cpp:192
Bridge::BridgeIO::detailed
void detailed(const char *format,...)
Definition: bridgeIO.cpp:219
Fprop_alt_Standard_eo_Richardson::tidyup
void tidyup()
Definition: fprop_alt_Standard_eo_Richardson-tmpl.h:156
aypx
void aypx(const double a, Field &y, const Field &x)
aypx(y, a, x): y := a * y + x
Definition: field.cpp:509
Fprop_alt_Standard_eo_Richardson::init
void init(const Parameters &params_fopr, const Parameters &params_solver)
Definition: fprop_alt_Standard_eo_Richardson-tmpl.h:15
axpy
void axpy(Field &y, const double a, const Field &x)
axpy(y, a, x): y := a * x + y
Definition: field.cpp:380
real_t
double real_t
Definition: bridgeQXS_Clover_coarse_double.cpp:16
Fprop_alt_Standard_eo_Richardson::report_performance
void report_performance()
Definition: fprop_alt_Standard_eo_Richardson-tmpl.h:447
Timer
Definition: timer.h:31
Bridge::BridgeIO::paranoiac
void paranoiac(const char *format,...)
Definition: bridgeIO.cpp:238
Timer::start
void start()
Definition: timer.cpp:44
Fprop_alt_Standard_eo_Richardson::invert_DdagD
void invert_DdagD(Field &, const Field &, int &, double &)
Definition: fprop_alt_Standard_eo_Richardson-tmpl.h:262
Fprop_alt_Standard_eo_Richardson::mult_performance
void mult_performance(const std::string mode, const int Nrepeat)
Definition: fprop_alt_Standard_eo_Richardson-tmpl.h:462
AFopr_Smeared
smeared fermion operator: alternative version.
Definition: afopr_Smeared.h:41
Director_Smear
Manager of smeared configurations.
Definition: director_Smear.h:39
Fprop_alt_Standard_eo_Richardson::reset_performance
void reset_performance()
Definition: fprop_alt_Standard_eo_Richardson-tmpl.h:438
Fprop_alt_Standard_eo_Richardson::invert_D
void invert_D(Field &, const Field &, int &, double &)
Definition: fprop_alt_Standard_eo_Richardson-tmpl.h:219
APrecond_Mixedprec
Definition: aprecond_Mixedprec.h:28
reverse
void reverse(INDEX &index, Field &v, const AFIELD &w)
Definition: afield-inc.h:159
Fprop_alt_Standard_eo_Richardson::invert
void invert(Field &, const Field &, int &, double &)
invert accordingly to the mode. [22 Sep 2018 H.Matsufuru]
Definition: fprop_alt_Standard_eo_Richardson-tmpl.h:179
Parameters::set_int
void set_int(const string &key, const int value)
Definition: parameters.cpp:36
Parameters::get_string
string get_string(const string &key) const
Definition: parameters.cpp:221
Bridge::BridgeIO::crucial
void crucial(const char *format,...)
Definition: bridgeIO.cpp:180
Fprop_alt_Standard_eo_Richardson::flop_count
double flop_count()
Definition: fprop_alt_Standard_eo_Richardson-tmpl.h:430
Field
Container of Field-type object.
Definition: field.h:46
Timer::elapsed_sec
double elapsed_sec() const
Definition: timer.cpp:107
ASolver_Richardson
Definition: asolver_Richardson.h:30
Bridge::BridgeIO::general
void general(const char *format,...)
Definition: bridgeIO.cpp:200
Timer::stop
void stop()
Definition: timer.cpp:69
ThreadManager::assert_single_thread
static void assert_single_thread(const std::string &class_name)
assert currently running on single thread.
Definition: threadManager.cpp:372
Bridge::vout
BridgeIO vout
Definition: bridgeIO.cpp:512
AIndex_eo
Definition: aindex_eo_base.h:17
Timer::reset
void reset()
Definition: timer.cpp:97