Bridge++  Ver. 2.0.3
fprop_alt_Standard_eo_Mixedprec-tmpl.h
Go to the documentation of this file.
1 
10 template<typename AFIELD_d, typename AFIELD_f>
12  = "Fprop_alt_Standard_eo_Mixedprec";
13 //====================================================================
14 template<typename AFIELD_d, typename AFIELD_f>
16  const Parameters& params_solver)
17 {
18  // this constructor assumes that the factories are available.
19  vout.general(m_vl, "\n");
20  vout.general(m_vl, "%s: being setup.\n", class_name.c_str());
21 
22  string fopr_type = params_fopr.get_string("fermion_type");
23  fopr_type += "_eo";
24 
25  m_kernel_d = 0;
26  m_fopr_d = AFopr<AFIELD_d>::New(fopr_type, params_fopr);
27 
28  m_kernel_f = 0;
29  m_fopr_f = AFopr<AFIELD_f>::New(fopr_type, params_fopr);
30 
31  // solver parameters for preconditioner solver
32  Parameters params_solver2;
33  params_solver2.set_int("maximum_number_of_iteration", 200);
34  params_solver2.set_int("maximum_number_of_restart", 1);
35  params_solver2.set_double("convergence_criterion_squared", 1.0e-8);
36  //params_solver2.set_double("convergence_criterion_squared", 1.0e-14);
37  string str_vlevel = params_solver.get_string("verbose_level");
38  params_solver2.set_string("verbose_level", str_vlevel);
39 
40  string solver_type = params_solver.get_string("solver_type");
41  m_solver_prec = ASolver<AFIELD_f>::New(solver_type, m_fopr_f);
42  m_solver_prec->set_parameters(params_solver2);
43 
44  m_precond
45  = new APrecond_Mixedprec<AFIELD_d, AFIELD_f>(m_solver_prec);
46 
47  m_solver
48  = new ASolver_FBiCGStab<AFIELD_d>(m_fopr_d, m_precond);
49  // ASolver_BiCGStab_Precond<AFIELD_d>(m_fopr_d, m_precond);
50 
51  m_solver->set_parameters(params_solver);
52 
53  reset_performance();
54 
55  vout.general(m_vl, "%s: setup finished.\n", class_name.c_str());
56 }
57 
58 
59 //====================================================================
60 template<typename AFIELD_d, typename AFIELD_f>
62  const Parameters& params_solver,
63  Director_Smear *dr_smear)
64 {
65  // this constructor assumes that the factories are available.
66  vout.general(m_vl, "\n");
67  vout.general(m_vl, "%s: being setup.\n", class_name.c_str());
68 
69  m_dr_smear = dr_smear;
70 
71  string fopr_type = params_fopr.get_string("fermion_type");
72  fopr_type += "_eo";
73 
74  m_kernel_d = AFopr<AFIELD_d>::New(fopr_type, params_fopr);
75  m_fopr_d = new AFopr_Smeared<AFIELD_d>(m_kernel_d, m_dr_smear);
76 
77  m_kernel_f = AFopr<AFIELD_f>::New(fopr_type, params_fopr);
78  m_fopr_f = new AFopr_Smeared<AFIELD_f>(m_kernel_f, m_dr_smear);
79 
80  // solver parameters for preconditioner solver
81  Parameters params_solver2;
82  params_solver2.set_int("maximum_number_of_iteration", 200);
83  params_solver2.set_int("maximum_number_of_restart", 1);
84  params_solver2.set_double("convergence_criterion_squared", 1.0e-8);
85  string str_vlevel = params_solver.get_string("verbose_level");
86  params_solver2.set_string("verbose_level", str_vlevel);
87 
88  string solver_type = params_solver.get_string("solver_type");
89  m_solver_prec = ASolver<AFIELD_f>::New(solver_type, m_fopr_f);
90  m_solver_prec->set_parameters(params_solver2);
91 
92  m_precond
93  = new APrecond_Mixedprec<AFIELD_d, AFIELD_f>(m_solver_prec);
94 
95  m_solver = new
96  ASolver_BiCGStab_Precond<AFIELD_d>(m_fopr_d, m_precond);
97  m_solver->set_parameters(params_solver);
98 
99  reset_performance();
100 
101  vout.general(m_vl, "%s: setup finished.\n", class_name.c_str());
102 }
103 
104 
105 //====================================================================
106 template<typename AFIELD_d, typename AFIELD_f>
108 {
109  delete m_solver;
110  delete m_precond;
111  delete m_solver_prec;
112  delete m_fopr_f;
113  delete m_fopr_d;
114  if (m_kernel_d != 0) delete m_kernel_d;
115  if (m_kernel_f != 0) delete m_kernel_f;
116 }
117 
118 
119 //====================================================================
120 template<typename AFIELD_d, typename AFIELD_f>
122 {
123  m_fopr_d->set_config(U);
124  m_fopr_f->set_config(U);
125 }
126 
127 
128 //====================================================================
129 template<typename AFIELD_d, typename AFIELD_f>
131  int& nconv, double& diff)
132 {
133  vout.paranoiac(m_vl, "%s: invert is called.\n", class_name.c_str());
134  vout.paranoiac(m_vl, "mode = %s.\n", m_mode.c_str());
135 
136  if (m_mode == "D") {
137  invert_D(xq, b, nconv, diff);
138  } else if (m_mode == "DdagD") {
139  invert_DdagD(xq, b, nconv, diff);
140  } else {
141  vout.crucial(m_vl, "%s: unsupported mode: %s\n",
142  class_name.c_str(), m_mode.c_str());
143  exit(EXIT_FAILURE);
144  }
145 }
146 
147 
148 //====================================================================
149 template<typename AFIELD_d, typename AFIELD_f>
151  int& nconv, double& diff)
152 {
153  vout.paranoiac(m_vl, "%s: invert is called.\n", class_name.c_str());
154  vout.paranoiac(m_vl, "mode = %s.\n", m_mode.c_str());
155 
156  if (m_mode == "D") {
157  invert_D(xq, b, nconv, diff);
158  } else if (m_mode == "DdagD") {
159  invert_DdagD(xq, b, nconv, diff);
160  } else {
161  vout.crucial(m_vl, "%s: unsupported mode: %s\n",
162  class_name.c_str(), m_mode.c_str());
163  exit(EXIT_FAILURE);
164  }
165 }
166 
167 
168 //====================================================================
169 template<typename AFIELD_d, typename AFIELD_f>
171  Field& xq, const Field& b,
172  int& nconv, double& diff)
173 {
175  vout.paranoiac("invert_D start.\n");
176 
177  Timer timer;
178  timer.reset();
179  timer.start();
180 
181  int nin = m_fopr_d->field_nin();
182  int nvol2 = m_fopr_d->field_nvol();
183  int nex = m_fopr_d->field_nex();
184  int nvol = 2 * nvol2;
185 
186  AFIELD_d axq(nin, nvol, nex);
187  AFIELD_d abq(nin, nvol, nex);
188 
190 
191 #pragma omp parallel
192  {
193  if (m_fopr_d->needs_convert()) {
194  m_fopr_d->convert(abq, b);
195  } else {
196  convert(index_alt, abq, b);
197  }
198  }
199 
200  timer.stop();
201  invert_D(axq, abq, nconv, diff);
202  timer.start();
203 #pragma omp parallel
204  {
205  if (m_fopr_d->needs_convert()) {
206  m_fopr_d->reverse(xq, axq);
207  } else {
208  reverse(index_alt, xq, axq);
209  }
210  }
211 
212 
213  timer.stop();
214 
215  m_elapsed_time += timer.elapsed_sec();
216 }
217 
218 
219 //====================================================================
220 template<typename AFIELD_d, typename AFIELD_f>
222  Field& xq, const Field& b,
223  int& nconv, double& diff)
224 {
225  int nin = m_fopr_d->field_nin();
226  int nvol2 = m_fopr_d->field_nvol();
227  int nex = m_fopr_d->field_nex();
228  int nvol = 2 * nvol2;
229 
230  AFIELD_d axq(nin, nvol, nex);
231  AFIELD_d abq(nin, nvol, nex);
232 
234 
235 #pragma omp parallel
236  {
237  if (m_fopr_d->needs_convert()) {
238  m_fopr_d->convert(abq, b);
239  } else {
240  convert(index_alt, abq, b);
241  }
242  }
243 
244 
245  invert_DdagD(axq, abq, nconv, diff);
246 
247 #pragma omp parallel
248  {
249  if (m_fopr_d->needs_convert()) {
250  m_fopr_d->reverse(xq, axq);
251  } else {
252  reverse(index_alt, xq, axq);
253  }
254  }
255 }
256 
257 
258 //====================================================================
259 template<typename AFIELD_d, typename AFIELD_f>
261  AFIELD_d& xq, const AFIELD_d& b,
262  int& nconv, double& diff)
263 {
265  vout.paranoiac("invert_D start.\n");
266 
267  m_timer.reset();
268  m_timer.start();
269 
270  int nin = m_fopr_d->field_nin();
271  int nvol2 = m_fopr_d->field_nvol();
272  int nex = m_fopr_d->field_nex();
273  int nvol = 2 * nvol2;
274 
275  AFIELD_d be(nin, nvol2, nex), bo(nin, nvol2, nex);
276  AFIELD_d xe(nin, nvol2, nex), xo(nin, nvol2, nex);
277  AFIELD_d y1(nin, nvol2, nex), y2(nin, nvol2, nex);
278 
281 #pragma omp parallel
282  {
283  index_eo.split(be, bo, b);
284  }
285 
286  invert_De(xe, xo, be, bo, nconv, diff);
287 
288 #pragma omp parallel
289  {
290  index_eo.merge(xq, xe, xo);
291  }
292 
293  m_timer.stop();
294  m_elapsed_time += m_timer.elapsed_sec();
295  m_flop_count += m_solver->flop_count();
296 }
297 
298 
299 //====================================================================
300 template<typename AFIELD_d, typename AFIELD_f>
302  AFIELD_d& xq, const AFIELD_d& b,
303  int& nconv, double& diff)
304 {
306  vout.paranoiac("invert_DdagD start.\n");
307 
308  m_timer.reset();
309  m_timer.start();
310 
311  int nin = m_fopr_d->field_nin();
312  int nvol2 = m_fopr_d->field_nvol();
313  int nex = m_fopr_d->field_nex();
314 
315  AFIELD_d be(nin, nvol2, nex), bo(nin, nvol2, nex);
316  AFIELD_d xe(nin, nvol2, nex), xo(nin, nvol2, nex);
317  AFIELD_d y1(nin, nvol2, nex), y2(nin, nvol2, nex);
318 
320 
321 #pragma omp parallel
322  {
323  index_eo.split(be, bo, b);
324  }
325 
326  int nconv1;
327  double diff1;
328  invert_De_dag(y1, y2, be, bo, nconv1, diff1);
329 
330  nconv = nconv1;
331  diff = diff1;
332 
333  invert_De(xe, xo, y1, y2, nconv1, diff1);
334 
335  nconv += nconv1;
336  diff += diff1;
337 
338 #pragma omp parallel
339  {
340  index_eo.merge(xq, xe, xo);
341  }
342 
343  m_timer.stop();
344  m_elapsed_time += m_timer.elapsed_sec();
345  m_flop_count += m_solver->flop_count();
346 }
347 
348 
349 //====================================================================
350 template<typename AFIELD_d, typename AFIELD_f>
352  AFIELD_d& xe, AFIELD_d& xo,
353  AFIELD_d& be, AFIELD_d& bo,
354  int& nconv, double& diff)
355 {
356  int nin = m_fopr_d->field_nin();
357  int nvol2 = m_fopr_d->field_nvol();
358  int nex = m_fopr_d->field_nex();
359  int nvol = 2 * nvol2;
360 
361  AFIELD_d y1(nin, nvol2, nex), y2(nin, nvol2, nex);
362 
363  // set even source vector.
364 #pragma omp parallel
365  {
366  m_fopr_d->mult(y1, bo, "Doo_inv");
367 #pragma omp barrier
368 
369  m_fopr_d->mult(y2, y1, "Deo");
370 #pragma omp barrier
371 
372  axpy(be, real_t(-1.0), y2);
373 #pragma omp barrier
374 
375  m_fopr_d->mult(y1, be, "Dee_inv");
376  }
377 
378  m_fopr_d->set_mode("D");
379  m_fopr_f->set_mode("D");
380 
381 #pragma omp parallel
382  {
383  m_solver->solve(xe, y1, nconv, diff);
384 #pragma omp barrier
385 
386  m_fopr_d->mult(y1, xe, "Doe");
387 #pragma omp barrier
388 
389  aypx(real_t(-1.0), y1, bo);
390 #pragma omp barrier
391 
392  m_fopr_d->mult(xo, y1, "Doo_inv");
393 #pragma omp barrier
394  }
395 }
396 
397 
398 //====================================================================
399 template<typename AFIELD_d, typename AFIELD_f>
401  AFIELD_d& xe, AFIELD_d& xo,
402  AFIELD_d& be, AFIELD_d& bo,
403  int& nconv, double& diff)
404 {
405  int nin = m_fopr_d->field_nin();
406  int nvol2 = m_fopr_d->field_nvol();
407  int nex = m_fopr_d->field_nex();
408  int nvol = 2 * nvol2;
409 
410  AFIELD_d y1(nin, nvol2, nex), y2(nin, nvol2, nex);
411 
412  vout.detailed(m_vl, "invert_De_dag(AFIELD)(6arg) start.\n");
413 
414 #pragma omp parallel
415  {
416  // set even source vector.
417  m_fopr_d->mult_dag(y1, bo, "Doo_inv");
418 #pragma omp barrier
419  m_fopr_d->mult_dag(y2, y1, "Deo");
420 
421 #pragma omp barrier
422  axpy(be, real_t(-1.0), y2);
423 
424  real_t diff2;
425  m_fopr_d->set_mode("Ddag");
426  m_fopr_f->set_mode("Ddag");
427  m_solver->solve(y2, be, nconv, diff2);
428 #pragma omp barrier
429 
430  m_fopr_d->mult_dag(xe, y2, "Dee_inv");
431 #pragma omp barrier
432 
433  m_fopr_d->mult_dag(y1, xe, "Doe");
434 #pragma omp barrier
435 
436  aypx(real_t(-1.0), y1, bo);
437 #pragma omp barrier
438 
439  m_fopr_d->mult_dag(xo, y1, "Doo_inv");
440 
441 #pragma omp master
442  {
443  diff = double(diff2);
444  }
445  }
446 
447  vout.detailed(m_vl, "diff(invert_De_dag) = %e\n", diff);
448 
449 }
450 
451 
452 //====================================================================
453 template<typename AFIELD_d, typename AFIELD_f>
455 {
456  return m_solver->flop_count();
457 }
458 
459 
460 //====================================================================
461 template<typename AFIELD_d, typename AFIELD_f>
463 {
464  m_flop_count = 0.0;
465  m_elapsed_time = 0.0;
466 }
467 
468 
469 //====================================================================
470 template<typename AFIELD_d, typename AFIELD_f>
472 {
473  double flops = m_flop_count / m_elapsed_time;
474  double gflops = flops * 1.0e-9;
475 
476  vout.general(m_vl, "\n");
477  vout.general(m_vl, "%s: solver performance:\n", class_name.c_str());
478  vout.general(m_vl, " Elapsed time = %14.6f sec\n", m_elapsed_time);
479  vout.general(m_vl, " Flop(total) = %18.0f\n", m_flop_count);
480  vout.general(m_vl, " Performance = %11.3f GFlops\n", gflops);
481 }
482 
483 
484 //====================================================================
485 template<typename AFIELD_d, typename AFIELD_f>
487  const std::string mode,
488  const int Nrepeat)
489 {
490  int nin = m_fopr_d->field_nin();
491  int nvol = m_fopr_d->field_nvol();
492  int nex = m_fopr_d->field_nex();
493 
494  unique_ptr<Timer> timer(new Timer);
495 
496  std::string mode_prev_d = m_fopr_d->get_mode();
497  std::string mode_prev_f = m_fopr_f->get_mode();
498 
499  m_fopr_d->set_mode(mode);
500  m_fopr_f->set_mode(mode);
501 
502  {
503  AFIELD_d axq(nin, nvol, nex), abq(nin, nvol, nex);
504  abq.set(0.0);
505  abq.set(0, 1.0);
506 
507  timer->start();
508 
509 #pragma omp parallel
510  {
511  for (int i = 0; i < Nrepeat; ++i) {
512  m_fopr_d->mult(axq, abq);
513  m_fopr_d->mult(abq, axq);
514  }
515  }
516 
517  timer->stop();
518  }
519 
520  double flop_fopr = m_fopr_d->flop_count();
521  double flop_total = flop_fopr * double(2 * Nrepeat);
522 
523  double elapsed_time = timer->elapsed_sec();
524  double flops = flop_total / elapsed_time;
525  double gflops = flops * 1.0e-9;
526 
527  vout.general(m_vl, "\n");
528  vout.general(m_vl, "%s: mult performance:\n", class_name.c_str());
529  vout.general(m_vl, " mult mode = %s\n", mode.c_str());
530  vout.general(m_vl, " Double precision:\n");
531  vout.general(m_vl, " Elapsed time = %14.6f sec\n", elapsed_time);
532  vout.general(m_vl, " Flop(Fopr) = %18.0f\n", flop_fopr);
533  vout.general(m_vl, " Flop(total) = %18.0f\n", flop_total);
534  vout.general(m_vl, " Performance = %11.3f GFlops\n", gflops);
535 
536  {
537  AFIELD_f axq(nin, nvol, nex), abq(nin, nvol, nex);
538  abq.set(0.0);
539  abq.set(0, 1.0);
540 
541  timer->reset();
542  timer->start();
543 
544 #pragma omp parallel
545  {
546  for (int i = 0; i < Nrepeat; ++i) {
547  m_fopr_f->mult(axq, abq);
548  m_fopr_f->mult(abq, axq);
549  }
550  }
551 
552  timer->stop();
553  }
554 
555  flop_fopr = m_fopr_f->flop_count();
556  flop_total = flop_fopr * double(2 * Nrepeat);
557 
558  elapsed_time = timer->elapsed_sec();
559  flops = flop_total / elapsed_time;
560  gflops = flops * 1.0e-9;
561 
562  vout.general(m_vl, " Single precision:\n");
563  vout.general(m_vl, " Elapsed time = %14.6f sec\n", elapsed_time);
564  vout.general(m_vl, " Flop(Fopr) = %18.0f\n", flop_fopr);
565  vout.general(m_vl, " Flop(total) = %18.0f\n", flop_total);
566  vout.general(m_vl, " Performance = %11.3f GFlops\n", gflops);
567 
568  m_fopr_d->set_mode(mode_prev_d);
569  m_fopr_f->set_mode(mode_prev_f);
570 }
571 
572 
573 //============================================================END=====
Fprop_alt_Standard_eo_Mixedprec::invert_De
void invert_De(AFIELD_d &, AFIELD_d &, AFIELD_d &, AFIELD_d &, int &, double &)
Definition: fprop_alt_Standard_eo_Mixedprec-tmpl.h:351
Parameters::set_string
void set_string(const string &key, const string &value)
Definition: parameters.cpp:39
AFopr
Definition: afopr.h:48
ASolver
Definition: asolver.h:23
ASolver_FBiCGStab
Definition: asolver_FBiCGStab.h:32
Fprop_alt_Standard_eo_Mixedprec::invert_DdagD
void invert_DdagD(Field &, const Field &, int &, double &)
Definition: fprop_alt_Standard_eo_Mixedprec-tmpl.h:221
Fprop_alt_Standard_eo_Mixedprec::init
void init(const Parameters &params_fopr, const Parameters &params_solver)
Definition: fprop_alt_Standard_eo_Mixedprec-tmpl.h:15
Parameters
Class for parameters.
Definition: parameters.h:46
AIndex_lex
Definition: aindex_lex_base.h:17
Parameters::set_double
void set_double(const string &key, const double value)
Definition: parameters.cpp:33
convert
void convert(INDEX &index, AFIELD &v, const Field &w)
Definition: afield-inc.h:129
AField< double, QXS >
Bridge::BridgeIO::detailed
void detailed(const char *format,...)
Definition: bridgeIO.cpp:219
aypx
void aypx(const double a, Field &y, const Field &x)
aypx(y, a, x): y := a * y + x
Definition: field.cpp:509
axpy
void axpy(Field &y, const double a, const Field &x)
axpy(y, a, x): y := a * x + y
Definition: field.cpp:380
Fprop_alt_Standard_eo_Mixedprec::invert_De_dag
void invert_De_dag(AFIELD_d &, AFIELD_d &, AFIELD_d &, AFIELD_d &, int &, double &)
Definition: fprop_alt_Standard_eo_Mixedprec-tmpl.h:400
real_t
double real_t
Definition: bridgeQXS_Clover_coarse_double.cpp:16
Fprop_alt_Standard_eo_Mixedprec
Get quark propagator for Fopr with lexical site index: alternative version.
Definition: fprop_alt_Standard_eo_Mixedprec.h:31
Timer
Definition: timer.h:31
Bridge::BridgeIO::paranoiac
void paranoiac(const char *format,...)
Definition: bridgeIO.cpp:238
Fprop_alt_Standard_eo_Mixedprec::reset_performance
void reset_performance()
Definition: fprop_alt_Standard_eo_Mixedprec-tmpl.h:462
Fprop_alt_Standard_eo_Mixedprec::invert
void invert(Field &, const Field &, int &, double &)
invert accordingly to the mode. [22 Sep 2018 H.Matsufuru]
Definition: fprop_alt_Standard_eo_Mixedprec-tmpl.h:130
Timer::start
void start()
Definition: timer.cpp:44
Fprop_alt_Standard_eo_Mixedprec::real_t
AFIELD_d::real_t real_t
Definition: fprop_alt_Standard_eo_Mixedprec.h:35
Fprop_alt_Standard_eo_Mixedprec::report_performance
void report_performance()
Definition: fprop_alt_Standard_eo_Mixedprec-tmpl.h:471
AFopr_Smeared
smeared fermion operator: alternative version.
Definition: afopr_Smeared.h:41
Director_Smear
Manager of smeared configurations.
Definition: director_Smear.h:39
APrecond_Mixedprec
Definition: aprecond_Mixedprec.h:28
reverse
void reverse(INDEX &index, Field &v, const AFIELD &w)
Definition: afield-inc.h:159
Fprop_alt_Standard_eo_Mixedprec::set_config
void set_config(Field *)
Definition: fprop_alt_Standard_eo_Mixedprec-tmpl.h:121
ASolver_BiCGStab_Precond
Definition: asolver_BiCGStab_Precond.h:24
Fprop_alt_Standard_eo_Mixedprec::tidyup
void tidyup()
Definition: fprop_alt_Standard_eo_Mixedprec-tmpl.h:107
Parameters::set_int
void set_int(const string &key, const int value)
Definition: parameters.cpp:36
Fprop_alt_Standard_eo_Mixedprec::mult_performance
void mult_performance(const std::string mode, const int Nrepeat)
Definition: fprop_alt_Standard_eo_Mixedprec-tmpl.h:486
Parameters::get_string
string get_string(const string &key) const
Definition: parameters.cpp:221
Bridge::BridgeIO::crucial
void crucial(const char *format,...)
Definition: bridgeIO.cpp:180
Field
Container of Field-type object.
Definition: field.h:46
Timer::elapsed_sec
double elapsed_sec() const
Definition: timer.cpp:107
Fprop_alt_Standard_eo_Mixedprec::invert_D
void invert_D(Field &, const Field &, int &, double &)
Definition: fprop_alt_Standard_eo_Mixedprec-tmpl.h:170
Bridge::BridgeIO::general
void general(const char *format,...)
Definition: bridgeIO.cpp:200
Fprop_alt_Standard_eo_Mixedprec::flop_count
double flop_count()
Definition: fprop_alt_Standard_eo_Mixedprec-tmpl.h:454
Timer::stop
void stop()
Definition: timer.cpp:69
ThreadManager::assert_single_thread
static void assert_single_thread(const std::string &class_name)
assert currently running on single thread.
Definition: threadManager.cpp:372
Bridge::vout
BridgeIO vout
Definition: bridgeIO.cpp:512
AIndex_eo
Definition: aindex_eo_base.h:17
Timer::reset
void reset()
Definition: timer.cpp:97