Bridge++  Ver. 2.0.2
fprop_alt_Standard_eo_Mixedprec-tmpl.h
Go to the documentation of this file.
1 
10 template<typename AFIELD_d, typename AFIELD_f>
12  = "Fprop_alt_Standard_eo_Mixedprec";
13 //====================================================================
14 template<typename AFIELD_d, typename AFIELD_f>
16  const Parameters& params_solver)
17 {
18  // this constructor assumes that the factories are available.
19  vout.general(m_vl, "\n");
20  vout.general(m_vl, "%s: being setup.\n", class_name.c_str());
21 
22  string fopr_type = params_fopr.get_string("fermion_type");
23  fopr_type += "_eo";
24 
25  m_kernel_d = 0;
26  m_fopr_d = AFopr<AFIELD_d>::New(fopr_type, params_fopr);
27 
28  m_kernel_f = 0;
29  m_fopr_f = AFopr<AFIELD_f>::New(fopr_type, params_fopr);
30 
31  // solver parameters for preconditioner solver
32  Parameters params_solver2;
33  params_solver2.set_int("maximum_number_of_iteration", 200);
34  params_solver2.set_int("maximum_number_of_restart", 1);
35  params_solver2.set_double("convergence_criterion_squared", 1.0e-8);
36  //params_solver2.set_double("convergence_criterion_squared", 1.0e-14);
37  string str_vlevel = params_solver.get_string("verbose_level");
38  params_solver2.set_string("verbose_level", str_vlevel);
39 
40  string solver_type = params_solver.get_string("solver_type");
41  m_solver_prec = ASolver<AFIELD_f>::New(solver_type, m_fopr_f);
42  m_solver_prec->set_parameters(params_solver2);
43 
44  m_precond
45  = new APrecond_Mixedprec<AFIELD_d, AFIELD_f>(m_solver_prec);
46 
47  m_solver
48  = new ASolver_FBiCGStab<AFIELD_d>(m_fopr_d, m_precond);
49  // ASolver_BiCGStab_Precond<AFIELD_d>(m_fopr_d, m_precond);
50 
51  m_solver->set_parameters(params_solver);
52 
53  reset_performance();
54 
55  vout.general(m_vl, "%s: setup finished.\n", class_name.c_str());
56 }
57 
58 
59 //====================================================================
60 template<typename AFIELD_d, typename AFIELD_f>
62  const Parameters& params_solver,
63  Director_Smear *dr_smear)
64 {
65  // this constructor assumes that the factories are available.
66  vout.general(m_vl, "\n");
67  vout.general(m_vl, "%s: being setup.\n", class_name.c_str());
68 
69  m_dr_smear = dr_smear;
70 
71  string fopr_type = params_fopr.get_string("fermion_type");
72  fopr_type += "_eo";
73 
74  m_kernel_d = AFopr<AFIELD_d>::New(fopr_type, params_fopr);
75  m_fopr_d = new AFopr_Smeared<AFIELD_d>(m_kernel_d, m_dr_smear);
76 
77  m_kernel_f = AFopr<AFIELD_f>::New(fopr_type, params_fopr);
78  m_fopr_f = new AFopr_Smeared<AFIELD_f>(m_kernel_f, m_dr_smear);
79 
80  // solver parameters for preconditioner solver
81  Parameters params_solver2;
82  params_solver2.set_int("maximum_number_of_iteration", 200);
83  params_solver2.set_int("maximum_number_of_restart", 1);
84  params_solver2.set_double("convergence_criterion_squared", 1.0e-8);
85  string str_vlevel = params_solver.get_string("verbose_level");
86  params_solver2.set_string("verbose_level", str_vlevel);
87 
88  string solver_type = params_solver.get_string("solver_type");
89  m_solver_prec = ASolver<AFIELD_f>::New(solver_type, m_fopr_f);
90  m_solver_prec->set_parameters(params_solver2);
91 
92  m_precond
93  = new APrecond_Mixedprec<AFIELD_d, AFIELD_f>(m_solver_prec);
94 
95  m_solver = new
96  ASolver_BiCGStab_Precond<AFIELD_d>(m_fopr_d, m_precond);
97  m_solver->set_parameters(params_solver);
98 
99  reset_performance();
100 
101  vout.general(m_vl, "%s: setup finished.\n", class_name.c_str());
102 }
103 
104 
105 //====================================================================
106 template<typename AFIELD_d, typename AFIELD_f>
108 {
109  delete m_solver;
110  delete m_precond;
111  delete m_solver_prec;
112  delete m_fopr_f;
113  delete m_fopr_d;
114  if (m_kernel_d != 0) delete m_kernel_d;
115  if (m_kernel_f != 0) delete m_kernel_f;
116 }
117 
118 
119 //====================================================================
120 template<typename AFIELD_d, typename AFIELD_f>
122 {
123  m_fopr_d->set_config(U);
124  m_fopr_f->set_config(U);
125 }
126 
127 
128 //====================================================================
129 template<typename AFIELD_d, typename AFIELD_f>
131  int& nconv, double& diff)
132 {
133  vout.paranoiac(m_vl, "%s: invert is called.\n", class_name.c_str());
134  vout.paranoiac(m_vl, "mode = %s.\n", m_mode.c_str());
135 
136  if (m_mode == "D") {
137  invert_D(xq, b, nconv, diff);
138  } else if (m_mode == "DdagD") {
139  invert_DdagD(xq, b, nconv, diff);
140  } else {
141  vout.crucial(m_vl, "%s: unsupported mode: %s\n",
142  class_name.c_str(), m_mode.c_str());
143  exit(EXIT_FAILURE);
144  }
145 }
146 
147 
148 //====================================================================
149 template<typename AFIELD_d, typename AFIELD_f>
151  int& nconv, double& diff)
152 {
153  vout.paranoiac(m_vl, "%s: invert is called.\n", class_name.c_str());
154  vout.paranoiac(m_vl, "mode = %s.\n", m_mode.c_str());
155 
156  if (m_mode == "D") {
157  invert_D(xq, b, nconv, diff);
158  } else if (m_mode == "DdagD") {
159  invert_DdagD(xq, b, nconv, diff);
160  } else {
161  vout.crucial(m_vl, "%s: unsupported mode: %s\n",
162  class_name.c_str(), m_mode.c_str());
163  exit(EXIT_FAILURE);
164  }
165 }
166 
167 
168 //====================================================================
169 template<typename AFIELD_d, typename AFIELD_f>
171  Field& xq, const Field& b,
172  int& nconv, double& diff)
173 {
175  vout.paranoiac("invert_D start.\n");
176 
177  Timer timer;
178  timer.reset();
179  timer.start();
180 
181  int nin = m_fopr_d->field_nin();
182  int nvol2 = m_fopr_d->field_nvol();
183  int nex = m_fopr_d->field_nex();
184  int nvol = 2 * nvol2;
185 
186  AFIELD_d axq(nin, nvol, nex);
187  AFIELD_d abq(nin, nvol, nex);
188 
190 
191 #pragma omp parallel
192  {
193  if (m_fopr_d->needs_convert()) {
194  m_fopr_d->convert(abq, b);
195  } else {
196  convert(index_alt, abq, b);
197  }
198  }
199 
200  timer.stop();
201  invert_D(axq, abq, nconv, diff);
202  timer.start();
203 #pragma omp parallel
204  {
205  if (m_fopr_d->needs_convert()) {
206  m_fopr_d->reverse(xq, axq);
207  } else {
208  reverse(index_alt, xq, axq);
209  }
210  }
211 
212 
213  timer.stop();
214 
215  m_elapsed_time += timer.elapsed_sec();
216 }
217 
218 
219 //====================================================================
220 template<typename AFIELD_d, typename AFIELD_f>
222  Field& xq, const Field& b,
223  int& nconv, double& diff)
224 {
225  int nin = m_fopr_d->field_nin();
226  int nvol2 = m_fopr_d->field_nvol();
227  int nex = m_fopr_d->field_nex();
228  int nvol = 2 * nvol2;
229 
230  AFIELD_d axq(nin, nvol, nex);
231  AFIELD_d abq(nin, nvol, nex);
232 
234 
235 #pragma omp parallel
236  {
237  if (m_fopr_d->needs_convert()) {
238  m_fopr_d->convert(abq, b);
239  } else {
240  convert(index_alt, abq, b);
241  }
242  convert(index_alt, abq, b);
243  }
244 
245 
246  invert_DdagD(axq, abq, nconv, diff);
247 
248 #pragma omp parallel
249  {
250  if (m_fopr_d->needs_convert()) {
251  m_fopr_d->reverse(xq, axq);
252  } else {
253  reverse(index_alt, xq, axq);
254  }
255  }
256 }
257 
258 
259 //====================================================================
260 template<typename AFIELD_d, typename AFIELD_f>
262  AFIELD_d& xq, const AFIELD_d& b,
263  int& nconv, double& diff)
264 {
266  vout.paranoiac("invert_D start.\n");
267 
268  m_timer.reset();
269  m_timer.start();
270 
271  int nin = m_fopr_d->field_nin();
272  int nvol2 = m_fopr_d->field_nvol();
273  int nex = m_fopr_d->field_nex();
274  int nvol = 2 * nvol2;
275 
276  AFIELD_d be(nin, nvol2, nex), bo(nin, nvol2, nex);
277  AFIELD_d xe(nin, nvol2, nex), xo(nin, nvol2, nex);
278  AFIELD_d y1(nin, nvol2, nex), y2(nin, nvol2, nex);
279 
282 #pragma omp parallel
283  {
284  index_eo.split(be, bo, b);
285  }
286 
287  invert_De(xe, xo, be, bo, nconv, diff);
288 
289 #pragma omp parallel
290  {
291  index_eo.merge(xq, xe, xo);
292  }
293 
294  m_timer.stop();
295  m_elapsed_time += m_timer.elapsed_sec();
296  m_flop_count += m_solver->flop_count();
297 }
298 
299 
300 //====================================================================
301 template<typename AFIELD_d, typename AFIELD_f>
303  AFIELD_d& xq, const AFIELD_d& b,
304  int& nconv, double& diff)
305 {
307  vout.paranoiac("invert_DdagD start.\n");
308 
309  m_timer.reset();
310  m_timer.start();
311 
312  int nin = m_fopr_d->field_nin();
313  int nvol2 = m_fopr_d->field_nvol();
314  int nex = m_fopr_d->field_nex();
315 
316  AFIELD_d be(nin, nvol2, nex), bo(nin, nvol2, nex);
317  AFIELD_d xe(nin, nvol2, nex), xo(nin, nvol2, nex);
318  AFIELD_d y1(nin, nvol2, nex), y2(nin, nvol2, nex);
319 
321 
322 #pragma omp parallel
323  {
324  index_eo.split(be, bo, b);
325 #pragma omp barrier
326 
327  m_fopr_d->mult_gm5(y1, be);
328  m_fopr_d->mult_gm5(y2, bo);
329  }
330 
331  int nconv1;
332  double diff1;
333  invert_De(xe, xo, y1, y2, nconv1, diff1);
334 
335  nconv = nconv1;
336  diff = diff1;
337 
338 #pragma omp parallel
339  {
340  m_fopr_d->mult_gm5(y1, xe);
341  m_fopr_d->mult_gm5(y2, xo);
342  }
343 
344  invert_De(xe, xo, y1, y2, nconv1, diff1);
345 
346  nconv += nconv1;
347  diff += diff1;
348 
349 #pragma omp parallel
350  {
351  index_eo.merge(xq, xe, xo);
352  }
353 
354  m_timer.stop();
355  m_elapsed_time += m_timer.elapsed_sec();
356  m_flop_count += m_solver->flop_count();
357 }
358 
359 
360 //====================================================================
361 template<typename AFIELD_d, typename AFIELD_f>
363  AFIELD_d& xe, AFIELD_d& xo,
364  AFIELD_d& be, AFIELD_d& bo,
365  int& nconv, double& diff)
366 {
367  int nin = m_fopr_d->field_nin();
368  int nvol2 = m_fopr_d->field_nvol();
369  int nex = m_fopr_d->field_nex();
370  int nvol = 2 * nvol2;
371 
372  AFIELD_d y1(nin, nvol2, nex), y2(nin, nvol2, nex);
373 
374  // set even source vector.
375 #pragma omp parallel
376  {
377  m_fopr_d->mult(y1, bo, "Doo_inv");
378 #pragma omp barrier
379 
380  m_fopr_d->mult(y2, y1, "Deo");
381 #pragma omp barrier
382 
383  axpy(be, real_t(-1.0), y2);
384 #pragma omp barrier
385 
386  m_fopr_d->mult(y1, be, "Dee_inv");
387  }
388 
389  m_fopr_d->set_mode("D");
390  m_fopr_f->set_mode("D");
391 
392 #pragma omp parallel
393  {
394  m_solver->solve(xe, y1, nconv, diff);
395 #pragma omp barrier
396 
397  m_fopr_d->mult(y1, xe, "Doe");
398 #pragma omp barrier
399 
400  aypx(real_t(-1.0), y1, bo);
401 #pragma omp barrier
402 
403  m_fopr_d->mult(xo, y1, "Doo_inv");
404 #pragma omp barrier
405  }
406 }
407 
408 
409 //====================================================================
410 template<typename AFIELD_d, typename AFIELD_f>
412 {
413  return m_solver->flop_count();
414 }
415 
416 
417 //====================================================================
418 template<typename AFIELD_d, typename AFIELD_f>
420 {
421  m_flop_count = 0.0;
422  m_elapsed_time = 0.0;
423 }
424 
425 
426 //====================================================================
427 template<typename AFIELD_d, typename AFIELD_f>
429 {
430  double flops = m_flop_count / m_elapsed_time;
431  double gflops = flops * 1.0e-9;
432 
433  vout.general(m_vl, "\n");
434  vout.general(m_vl, "%s: solver performance:\n", class_name.c_str());
435  vout.general(m_vl, " Elapsed time = %14.6f sec\n", m_elapsed_time);
436  vout.general(m_vl, " Flop(total) = %18.0f\n", m_flop_count);
437  vout.general(m_vl, " Performance = %11.3f GFlops\n", gflops);
438 }
439 
440 
441 //====================================================================
442 template<typename AFIELD_d, typename AFIELD_f>
444  const std::string mode,
445  const int Nrepeat)
446 {
447  int nin = m_fopr_d->field_nin();
448  int nvol = m_fopr_d->field_nvol();
449  int nex = m_fopr_d->field_nex();
450 
451  unique_ptr<Timer> timer(new Timer);
452 
453  std::string mode_prev_d = m_fopr_d->get_mode();
454  std::string mode_prev_f = m_fopr_f->get_mode();
455 
456  m_fopr_d->set_mode(mode);
457  m_fopr_f->set_mode(mode);
458 
459  {
460  AFIELD_d axq(nin, nvol, nex), abq(nin, nvol, nex);
461  abq.set(0.0);
462  abq.set(0, 1.0);
463 
464  timer->start();
465 
466 #pragma omp parallel
467  {
468  for (int i = 0; i < Nrepeat; ++i) {
469  m_fopr_d->mult(axq, abq);
470  m_fopr_d->mult(abq, axq);
471  }
472  }
473 
474  timer->stop();
475  }
476 
477  double flop_fopr = m_fopr_d->flop_count();
478  double flop_total = flop_fopr * double(2 * Nrepeat);
479 
480  double elapsed_time = timer->elapsed_sec();
481  double flops = flop_total / elapsed_time;
482  double gflops = flops * 1.0e-9;
483 
484  vout.general(m_vl, "\n");
485  vout.general(m_vl, "%s: mult performance:\n", class_name.c_str());
486  vout.general(m_vl, " mult mode = %s\n", mode.c_str());
487  vout.general(m_vl, " Double precision:\n");
488  vout.general(m_vl, " Elapsed time = %14.6f sec\n", elapsed_time);
489  vout.general(m_vl, " Flop(Fopr) = %18.0f\n", flop_fopr);
490  vout.general(m_vl, " Flop(total) = %18.0f\n", flop_total);
491  vout.general(m_vl, " Performance = %11.3f GFlops\n", gflops);
492 
493  {
494  AFIELD_f axq(nin, nvol, nex), abq(nin, nvol, nex);
495  abq.set(0.0);
496  abq.set(0, 1.0);
497 
498  timer->reset();
499  timer->start();
500 
501 #pragma omp parallel
502  {
503  for (int i = 0; i < Nrepeat; ++i) {
504  m_fopr_f->mult(axq, abq);
505  m_fopr_f->mult(abq, axq);
506  }
507  }
508 
509  timer->stop();
510  }
511 
512  flop_fopr = m_fopr_f->flop_count();
513  flop_total = flop_fopr * double(2 * Nrepeat);
514 
515  elapsed_time = timer->elapsed_sec();
516  flops = flop_total / elapsed_time;
517  gflops = flops * 1.0e-9;
518 
519  vout.general(m_vl, " Single precision:\n");
520  vout.general(m_vl, " Elapsed time = %14.6f sec\n", elapsed_time);
521  vout.general(m_vl, " Flop(Fopr) = %18.0f\n", flop_fopr);
522  vout.general(m_vl, " Flop(total) = %18.0f\n", flop_total);
523  vout.general(m_vl, " Performance = %11.3f GFlops\n", gflops);
524 
525  m_fopr_d->set_mode(mode_prev_d);
526  m_fopr_f->set_mode(mode_prev_f);
527 }
528 
529 
530 //============================================================END=====
Fprop_alt_Standard_eo_Mixedprec::invert_De
void invert_De(AFIELD_d &, AFIELD_d &, AFIELD_d &, AFIELD_d &, int &, double &)
Definition: fprop_alt_Standard_eo_Mixedprec-tmpl.h:362
Parameters::set_string
void set_string(const string &key, const string &value)
Definition: parameters.cpp:39
AFopr
Definition: afopr.h:48
ASolver
Definition: asolver.h:23
ASolver_FBiCGStab
Definition: asolver_FBiCGStab.h:32
Fprop_alt_Standard_eo_Mixedprec::invert_DdagD
void invert_DdagD(Field &, const Field &, int &, double &)
Definition: fprop_alt_Standard_eo_Mixedprec-tmpl.h:221
Fprop_alt_Standard_eo_Mixedprec::init
void init(const Parameters &params_fopr, const Parameters &params_solver)
Definition: fprop_alt_Standard_eo_Mixedprec-tmpl.h:15
Parameters
Class for parameters.
Definition: parameters.h:46
AIndex_lex
Definition: aindex_lex_base.h:17
Parameters::set_double
void set_double(const string &key, const double value)
Definition: parameters.cpp:33
convert
void convert(INDEX &index, AFIELD &v, const Field &w)
Definition: afield-inc.h:129
AField< double, QXS >
aypx
void aypx(const double a, Field &y, const Field &x)
aypx(y, a, x): y := a * y + x
Definition: field.cpp:509
axpy
void axpy(Field &y, const double a, const Field &x)
axpy(y, a, x): y := a * x + y
Definition: field.cpp:380
real_t
double real_t
Definition: bridgeQXS_Clover_coarse_double.cpp:16
Fprop_alt_Standard_eo_Mixedprec
Get quark propagator for Fopr with lexical site index: alternative version.
Definition: fprop_alt_Standard_eo_Mixedprec.h:31
Timer
Definition: timer.h:31
Bridge::BridgeIO::paranoiac
void paranoiac(const char *format,...)
Definition: bridgeIO.cpp:238
Fprop_alt_Standard_eo_Mixedprec::reset_performance
void reset_performance()
Definition: fprop_alt_Standard_eo_Mixedprec-tmpl.h:419
Fprop_alt_Standard_eo_Mixedprec::invert
void invert(Field &, const Field &, int &, double &)
invert accordingly to the mode. [22 Sep 2018 H.Matsufuru]
Definition: fprop_alt_Standard_eo_Mixedprec-tmpl.h:130
Timer::start
void start()
Definition: timer.cpp:44
Fprop_alt_Standard_eo_Mixedprec::report_performance
void report_performance()
Definition: fprop_alt_Standard_eo_Mixedprec-tmpl.h:428
AFopr_Smeared
smeared fermion operator: alternative version.
Definition: afopr_Smeared.h:41
Director_Smear
Manager of smeared configurations.
Definition: director_Smear.h:39
APrecond_Mixedprec
Definition: aprecond_Mixedprec.h:28
reverse
void reverse(INDEX &index, Field &v, const AFIELD &w)
Definition: afield-inc.h:159
Fprop_alt_Standard_eo_Mixedprec::set_config
void set_config(Field *)
Definition: fprop_alt_Standard_eo_Mixedprec-tmpl.h:121
ASolver_BiCGStab_Precond
Definition: asolver_BiCGStab_Precond.h:24
Fprop_alt_Standard_eo_Mixedprec::tidyup
void tidyup()
Definition: fprop_alt_Standard_eo_Mixedprec-tmpl.h:107
Parameters::set_int
void set_int(const string &key, const int value)
Definition: parameters.cpp:36
Fprop_alt_Standard_eo_Mixedprec::mult_performance
void mult_performance(const std::string mode, const int Nrepeat)
Definition: fprop_alt_Standard_eo_Mixedprec-tmpl.h:443
Parameters::get_string
string get_string(const string &key) const
Definition: parameters.cpp:221
Bridge::BridgeIO::crucial
void crucial(const char *format,...)
Definition: bridgeIO.cpp:180
Field
Container of Field-type object.
Definition: field.h:46
Timer::elapsed_sec
double elapsed_sec() const
Definition: timer.cpp:107
Fprop_alt_Standard_eo_Mixedprec::invert_D
void invert_D(Field &, const Field &, int &, double &)
Definition: fprop_alt_Standard_eo_Mixedprec-tmpl.h:170
Bridge::BridgeIO::general
void general(const char *format,...)
Definition: bridgeIO.cpp:200
Fprop_alt_Standard_eo_Mixedprec::flop_count
double flop_count()
Definition: fprop_alt_Standard_eo_Mixedprec-tmpl.h:411
Timer::stop
void stop()
Definition: timer.cpp:69
ThreadManager::assert_single_thread
static void assert_single_thread(const std::string &class_name)
assert currently running on single thread.
Definition: threadManager.cpp:372
Bridge::vout
BridgeIO vout
Definition: bridgeIO.cpp:512
AIndex_eo
Definition: aindex_eo_base.h:17
Timer::reset
void reset()
Definition: timer.cpp:97