9 #include <stk_util/parallel/ParallelReduce.hpp> 18 #if defined( STK_HAS_MPI ) 24 std::ostream & arg_root_os ,
25 const std::string & arg_msg )
27 const int i_zero = 0 ;
28 const int p_root = 0 ;
36 int send_count = arg_msg.size();
38 std::vector<int> recv_count( p_size , i_zero );
40 int *
const recv_count_ptr = & recv_count[0] ;
42 result = MPI_Gather( & send_count , 1 , MPI_INT ,
43 recv_count_ptr , 1 , MPI_INT ,
46 if ( MPI_SUCCESS != result ) {
47 std::ostringstream msg ;
48 msg <<
"stk_classic::all_write FAILED: MPI_Gather = " << result ;
49 throw std::runtime_error( msg.str() );
54 std::vector<int> recv_displ( p_size + 1 , i_zero );
56 for (
unsigned i = 0 ; i < p_size ; ++i ) {
57 recv_displ[i+1] = recv_displ[i] + recv_count[i] ;
60 const unsigned recv_size = (unsigned) recv_displ[ p_size ] ;
62 std::vector<char> buffer( recv_size );
65 const char *
const send_ptr = arg_msg.c_str();
66 char *
const recv_ptr = recv_size ? & buffer[0] : (
char *) NULL ;
67 int *
const recv_displ_ptr = & recv_displ[0] ;
69 result = MPI_Gatherv( (
void*) send_ptr, send_count, MPI_CHAR ,
70 recv_ptr, recv_count_ptr, recv_displ_ptr, MPI_CHAR,
74 if ( MPI_SUCCESS != result ) {
75 std::ostringstream msg ;
76 msg <<
"stk_classic::all_write FAILED: MPI_Gatherv = " << result ;
77 throw std::runtime_error( msg.str() );
80 if ( p_root == (
int) p_rank ) {
82 for (
unsigned i = 0 ; i < p_size ; ++i ) {
83 if ( recv_count[i] ) {
84 char *
const ptr = & buffer[ recv_displ[i] ];
85 arg_root_os.write( ptr , recv_count[i] );
86 arg_root_os << std::endl ;
97 ParallelReduceOp arg_op ,
102 MPI_Op mpi_op = MPI_OP_NULL ;
104 MPI_Op_create( arg_op , 0 , & mpi_op );
114 const int result_reduce =
115 MPI_Reduce(arg_in,arg_out,arg_len,MPI_BYTE,mpi_op,0,arg_comm);
117 const int result_bcast =
118 MPI_Bcast(arg_out,arg_len,MPI_BYTE,0,arg_comm);
120 MPI_Op_free( & mpi_op );
122 if ( MPI_SUCCESS != result_reduce || MPI_SUCCESS != result_bcast ) {
123 std::ostringstream msg ;
124 msg <<
"stk_classic::all_reduce FAILED: MPI_Reduce = " << result_reduce
125 <<
" MPI_Bcast = " << result_bcast ;
126 throw std::runtime_error( msg.str() );
134 const double * local ,
double * global ,
unsigned count )
136 double * tmp =
const_cast<double*
>( local );
137 MPI_Allreduce( tmp , global , count , MPI_DOUBLE , MPI_SUM , comm );
141 const float * local ,
float * global ,
unsigned count )
143 float * tmp =
const_cast<float*
>( local );
144 MPI_Allreduce( tmp , global , count , MPI_FLOAT , MPI_SUM , comm );
148 const int * local ,
int * global ,
unsigned count )
150 int * tmp =
const_cast<int*
>( local );
151 MPI_Allreduce( tmp , global , count , MPI_INT , MPI_SUM , comm );
155 const size_t * local ,
size_t * global ,
unsigned count )
157 size_t * tmp =
const_cast<size_t*
>( local );
159 if (
sizeof(
size_t) ==
sizeof(unsigned) ) {
160 MPI_Allreduce( tmp , global , count , MPI_UNSIGNED , MPI_SUM , comm );
162 else if (
sizeof(
size_t) ==
sizeof(
unsigned long) ) {
163 MPI_Allreduce( tmp , global , count , MPI_UNSIGNED_LONG , MPI_SUM , comm );
166 unsigned long *
const in =
new unsigned long[ count ];
167 unsigned long *
const out =
new unsigned long[ count ];
169 for (
unsigned i = 0 ; i < count ; ++i ) { in[i] = local[i] ; }
170 MPI_Allreduce( in , out , count , MPI_UNSIGNED_LONG , MPI_SUM , comm );
171 for (
unsigned i = 0 ; i < count ; ++i ) { global[i] = out[i] ; }
179 const unsigned * local ,
180 unsigned * global ,
unsigned count )
182 unsigned * tmp =
const_cast<unsigned*
>( local );
183 MPI_Allreduce( tmp , global , count , MPI_UNSIGNED , MPI_BOR , comm );
192 std::ostream & arg_root_os ,
193 const std::string & arg_msg )
195 arg_root_os << arg_msg ;
199 const double * local ,
double * global ,
unsigned count )
201 for (
unsigned i = 0 ; i <
count ; ++i ) { global[i] = local[i] ; }
205 const float * local ,
float * global ,
unsigned count )
207 for (
unsigned i = 0 ; i <
count ; ++i ) { global[i] = local[i] ; }
211 const int * local ,
int * global ,
unsigned count )
213 for (
unsigned i = 0 ; i <
count ; ++i ) { global[i] = local[i] ; }
217 const size_t * local ,
size_t * global ,
unsigned count )
219 for (
unsigned i = 0 ; i <
count ; ++i ) { global[i] = local[i] ; }
223 const unsigned * local ,
224 unsigned * global ,
unsigned count )
226 for (
unsigned i = 0 ; i <
count ; ++i ) { global[i] = local[i] ; }
237 unsigned char * i =
reinterpret_cast<unsigned char *
>( arg_in );
238 unsigned char * o =
reinterpret_cast<unsigned char *
>( arg_out );
239 for (
unsigned char *
const e = i + arg_len ; e != i ; ++i , ++o ) {
void all_reduce_bor(ParallelMachine comm, const unsigned *local, unsigned *global, unsigned count)
Parallel bitwise-or to all processors.
void all_reduce_sum(ParallelMachine comm, const double *local, double *global, unsigned count)
Parallel summation to all processors.
unsigned parallel_machine_rank(ParallelMachine parallel_machine)
Member function parallel_machine_rank ...
unsigned parallel_machine_size(ParallelMachine parallel_machine)
Member function parallel_machine_size ...
void all_write_string(ParallelMachine arg_comm, std::ostream &arg_root_os, const std::string &arg_msg)
Write string from any or all processors to the ostream on the root processor.
eastl::iterator_traits< InputIterator >::difference_type count(InputIterator first, InputIterator last, const T &value)