46 #include "Kokkos_Core.hpp" 48 #include "Teuchos_CommandLineProcessor.hpp" 49 #include "Teuchos_StandardCatchMacros.hpp" 59 const char *
sg_alg_names[] = {
"Original Matrix-Free",
"Product CRS" };
62 run_test(
const size_t num_cpu,
const size_t num_core_per_cpu,
63 const size_t num_threads_per_core,
64 const size_t p,
const size_t d,
const size_t nGrid,
const size_t nIter,
65 const bool symmetric,
SG_Alg sg_alg,
66 const std::vector<double>& perf1 = std::vector<double>())
69 typedef Kokkos::Threads Device;
70 const size_t team_count = num_cpu * num_core_per_cpu;
71 const size_t threads_per_team = num_threads_per_core;
72 Kokkos::Threads::initialize( team_count * threads_per_team );
74 std::vector<int> var_degree( d , p );
76 std::vector<double> perf;
79 unit_test::test_product_tensor_matrix<Scalar,Stokhos::CrsProductTensor<Scalar,Device>,Device>(var_degree , nGrid , nIter , symmetric );
82 unit_test::test_original_matrix_free_vec<Scalar,Device,Stokhos::DefaultMultiply>(
83 var_degree , nGrid , nIter ,
true , symmetric );
85 Kokkos::Threads::finalize();
89 speed_up = perf1[1] / perf[1];
91 speed_up = perf[1] / perf[1];
92 double efficiency = speed_up / team_count;
94 std::cout << team_count <<
" , " 101 << 100.0 * efficiency <<
" , " 113 Teuchos::CommandLineProcessor
CLP;
115 CLP.setOption(
"p", &p,
"Polynomial order");
117 CLP.setOption(
"d", &d,
"Stochastic dimension");
119 CLP.setOption(
"n", &nGrid,
"Number of spatial grid points in each dimension");
121 CLP.setOption(
"niter", &nIter,
"Number of iterations");
122 int n_thread_per_core = 1;
123 CLP.setOption(
"nthread", &n_thread_per_core,
"Number of threads per core to use");
124 int n_hyperthreads = 2;
125 CLP.setOption(
"nht", &n_hyperthreads,
"Number of hyperthreads per core available");
128 "SG Mat-Vec Algorithm");
129 bool symmetric =
true;
130 CLP.setOption(
"symmetric",
"asymmetric", &symmetric,
"Use symmetric PDF");
134 const size_t num_cpu = Kokkos::hwloc::get_available_numa_count();
135 const size_t num_core_per_cpu = Kokkos::hwloc::get_available_cores_per_numa();
136 const size_t core_capacity = Kokkos::hwloc::get_available_threads_per_core();
137 if (static_cast<size_t>(n_thread_per_core) > core_capacity )
138 n_thread_per_core = core_capacity;
141 std::cout << std::endl
144 <<
"\"#Variable\" , " 145 <<
"\"PolyDegree\" , " 149 <<
"\"" <<
sg_alg_names[sg_alg] <<
" MXV Efficiency\" , " 153 const std::vector<double> perf1 =
154 run_test(1, 1, 1, p, d, nGrid, nIter, symmetric, sg_alg);
157 for (
size_t n=2; n<=num_cpu; ++n) {
158 const std::vector<double> perf =
159 run_test(n, 1, 1, p, d, nGrid, nIter, symmetric, sg_alg, perf1);
163 for (
size_t n=2; n<=num_core_per_cpu; ++n) {
164 const std::vector<double> perf =
165 run_test(num_cpu, n, 1, p, d, nGrid, nIter, symmetric, sg_alg, perf1);
169 const std::vector<double> perf =
170 run_test(num_cpu, num_core_per_cpu, n_thread_per_core, p, d, nGrid,
171 nIter, symmetric, sg_alg, perf1);
175 TEUCHOS_STANDARD_CATCH_STATEMENTS(
true, std::cerr, success);
const SG_Alg sg_alg_values[]
std::vector< double > run_test(const size_t num_cpu, const size_t num_core_per_cpu, const size_t num_threads_per_core, const size_t p, const size_t d, const size_t nGrid, const size_t nIter, const bool symmetric, SG_Alg sg_alg, const std::vector< double > &perf1=std::vector< double >())
const char * sg_alg_names[]
int main(int argc, char *argv[])