22 namespace ThreadManager_OpenMP_Reduce {
 
   23   template<
typename REALTYPE>
 
   26                   std::vector<REALTYPE>& array_reduction,
 
   27                   const int each_buf_size,
 
   28                   const int i_thread, 
const int Nthread)
 
   30     typedef REALTYPE real_t;
 
   32     std::vector<real_t> sum;
 
   34     real_t *psum = 
nullptr;
 
   42       for (
int i = 0; i < num; i++) {
 
   51       const int n = (remaining < each_buf_size)?num:each_buf_size;
 
   52       for (
int i = 0; i < n; i++) {
 
   53         array_reduction[i_thread * each_buf_size + i] = pa[i];
 
   58         for (
int i = 0; i < Nthread; i++) {
 
   59           for (
int j = 0; j < n; j++) {
 
   60             psum[j] += array_reduction[i * each_buf_size + j];
 
   63         psum += each_buf_size;
 
   66       remaining -= each_buf_size;
 
   77     const int total_buf_size = each_buf_size * Nthread;
 
   80       const int n = (remaining < total_buf_size)?num:total_buf_size;
 
   83         for (
int i = 0; i < n; i++) { 
 
   84           array_reduction[i] = pa[i];
 
   99       for (
int i = 0; i < n; i++) { 
 
  100         pa[i] = array_reduction[i];
 
  102       pa        += total_buf_size;
 
  103       remaining -= total_buf_size;
 
  131     if (omp_get_thread_num() == 0) {
 
  132       Nthread_env = omp_get_num_threads();
 
  137   if ((Nthread == Nthread_env) || (Nthread == 0)) {
 
  145     omp_set_num_threads(Nthread);
 
  167   return omp_get_num_threads();
 
  174   return omp_get_thread_num();
 
  208                                              const int i_thread, 
const int Nthread)
 
  219                                              const int i_thread, 
const int Nthread)
 
  229                                              const int i_thread, 
const int Nthread)
 
  240                                              const int i_thread, 
const int Nthread)
 
  250                                              const int i_thread, 
const int Nthread)
 
  261                                              const int i_thread, 
const int Nthread)
 
  277     vout.
crucial(
m_vl, 
"Single-thread %s is called in parallel region.\n", name.c_str());
 
static int m_Nthread
number of threads. 
static const int each_buf_size
reduction buffer size for each thread (double) 
static int get_num_threads()
returns available number of threads. 
static const int each_buf_sizeDC
reduction buffer size for each thread (dcomplex) 
void general(const char *format,...)
static std::vector< float > m_darray_reductionF
static Bridge::VerboseLevel Vlevel()
static int reduce_sum(int count, dcomplex *recv_buf, dcomplex *send_buf, int pattern=0)
make a global sum of an array of dcomplex over the communicator. pattern specifies the dimensions to ...
static void wait()
barrier among threads inside a node. 
static int get_thread_id()
returns thread id. 
static void init(int Nthread)
setup: called in main only once. 
static std::vector< dcomplex > m_darray_reductionDC
static const std::string class_name
static void barrier(const int Nthread)
barrier among threads inside a node. 
static void sync_barrier_all()
barrier among all the threads and nodes. 
static void finalize()
finalization. 
void paranoiac(const char *format,...)
static const int each_buf_sizeF
reduction buffer size for each thread (float) 
void crucial(const char *format,...)
static void reduce_sum_global(dcomplex &value, const int i_thread, const int Nthread)
global reduction with summation: dcomplex values are assumed thread local. 
static Bridge::VerboseLevel m_vl
verbose level. 
void sum_global(REALTYPE *a, const int num, std::vector< REALTYPE > &array_reduction, const int each_buf_size, const int i_thread, const int Nthread)
static std::vector< double > m_darray_reduction
static int sync()
synchronize within small world. 
static void assert_single_thread(const std::string &class_name)
assert currently running on single thread.