22 namespace ThreadManager_OpenMP_Reduce{
24 template<
typename REALTYPE>
27 std::vector<REALTYPE> &array_reduction,
28 const int each_buf_size,
29 const int i_thread,
const int Nthread)
31 typedef REALTYPE real_t;
33 std::vector<real_t> sum;
38 for(
int i=0; i<num; i++){
45 const int n=(remaining<each_buf_size)?num:each_buf_size;
46 for(
int i=0; i<n; i++){
47 array_reduction[i_thread*each_buf_size+i] = pa[i];
52 for (
int i = 0; i < Nthread; i++){
53 for(
int j=0; j<n; j++){
54 psum[j] += array_reduction[i*each_buf_size+j];
60 remaining-=each_buf_size;
71 const int total_buf_size=each_buf_size*Nthread;
73 const int n=(remaining<total_buf_size)?num:total_buf_size;
76 for(
int i=0; i<n; i++){
77 array_reduction[i] = pa[i];
92 for(
int i=0; i<n; i++){
93 pa[i]=array_reduction[i];
96 remaining-=total_buf_size;
124 if (omp_get_thread_num() == 0) {
125 Nthread_env = omp_get_num_threads();
130 if ((Nthread == Nthread_env) || (Nthread == 0)) {
136 omp_set_num_threads(Nthread);
157 return omp_get_num_threads();
164 return omp_get_thread_num();
198 const int i_thread,
const int Nthread)
208 const int i_thread,
const int Nthread)
218 const int i_thread,
const int Nthread)
229 const int i_thread,
const int Nthread)
245 vout.
crucial(
m_vl,
"Single-thread %s is called in parallel region.\n", name.c_str());
static int m_Nthread
number of threads.
static const int each_buf_size
reduction buffer size for each thread (double)
static int get_num_threads()
returns available number of threads.
void general(const char *format,...)
static std::vector< float > m_darray_reductionF
static Bridge::VerboseLevel Vlevel()
static void wait()
barrier among threads inside a node.
static int get_thread_id()
returns thread id.
static void init(int Nthread)
setup: called in main only once.
static const std::string class_name
static void barrier(const int Nthread)
barrier among threads inside a node.
static void reduce_sum_global(double &value, const int i_thread, const int Nthread)
global reduction with summation: value is assumed thread local.
static void sync_barrier_all()
barrier among all the threads and nodes.
static void finalize()
finalization.
void paranoiac(const char *format,...)
static const int each_buf_sizeF
reduction buffer size for each thread (float)
void crucial(const char *format,...)
static Bridge::VerboseLevel m_vl
verbose level.
void sum_global(REALTYPE *a, const int num, std::vector< REALTYPE > &array_reduction, const int each_buf_size, const int i_thread, const int Nthread)
static std::vector< double > m_darray_reduction
static int reduce_sum(int count, double *recv_buf, double *send_buf, int pattern=0)
make a global sum of an array of double over the communicator. pattern specifies the dimensions to be...
static int sync()
synchronize within small world.
static void assert_single_thread(const std::string &class_name)
assert currently running on single thread.