42 #ifndef KOKKOSBLAS2_GEMV_MP_VECTOR_HPP 43 #define KOKKOSBLAS2_GEMV_MP_VECTOR_HPP 45 #include <type_traits> 46 #include "Sacado_ConfigDefs.h" 52 #include "KokkosBlas.hpp" 55 #include "Kokkos_Core.hpp" 57 #include "Stokhos_config.h" 59 #define Sacado_MP_Vector_GEMV_Tile_Size(size) (STOKHOS_GEMV_CACHE_SIZE / size) 62 template <
class AViewType,
65 class IndexType =
typename AViewType::size_type>
82 KOKKOS_INLINE_FUNCTION
void 85 const IndexType m =
y_.extent(0);
86 const IndexType n =
x_.extent(0);
88 IndexType i_min =
m_c_ * i_tile;
89 bool last_tile = (i_min +
m_c_ >= m);
90 IndexType i_max = (last_tile) ? m : (i_min +
m_c_);
92 #ifdef STOKHOS_HAVE_PRAGMA_UNROLL 96 for (IndexType i = i_min; i < i_max; ++i)
99 for (IndexType i = i_min; i < i_max; ++i)
102 for (IndexType
j = 0;
j < n; ++
j)
106 for (IndexType i = i_min; i < i_max; ++i)
107 y_(i) += alphab *
A_(i,
j);
113 typename AViewType::const_type
A_;
114 typename XViewType::const_type
x_;
121 template <
class AViewType,
124 class IndexType =
typename AViewType::size_type>
143 KOKKOS_INLINE_FUNCTION
void 146 const IndexType m =
y_.extent(0);
147 const IndexType n =
x_.extent(0);
149 const int j = team.league_rank();
150 const IndexType j_min =
n_c_ *
j;
151 const IndexType nj = (j_min +
n_c_ > n) ? (n - j_min) :
n_c_;
152 const IndexType i_min =
j % m;
154 for (IndexType i = i_min; i < m; ++i)
157 Kokkos::parallel_reduce(
158 Kokkos::TeamThreadRange(team, nj), [=](
int jj,
Scalar &tmp_sum) {
159 tmp_sum +=
A_(jj + j_min, i) *
x_(jj + j_min);
162 if (team.team_rank() == 0)
165 Kokkos::atomic_add<Scalar>(&
y_(i), tmp);
168 for (IndexType i = 0; i < i_min; ++i)
171 Kokkos::parallel_reduce(
172 Kokkos::TeamThreadRange(team, nj), [=](
int jj,
Scalar &tmp_sum) {
173 tmp_sum +=
A_(jj + j_min, i) *
x_(jj + j_min);
176 if (team.team_rank() == 0)
179 Kokkos::atomic_add<Scalar>(&
y_(i), tmp);
186 typename AViewType::const_type
A_;
187 typename XViewType::const_type
x_;
197 typename VA::const_value_type &alpha,
200 typename VY::const_value_type &beta,
204 using IndexType =
typename VA::size_type;
205 using policy_type = Kokkos::RangePolicy<execution_space, IndexType>;
208 const size_t m =
y.extent(0);
210 #ifdef KOKKOS_ENABLE_DEPRECATED_CODE 211 const size_t N = execution_space::thread_pool_size();
213 const size_t N = execution_space::impl_thread_pool_size();
216 const size_t n_tiles_per_thread =
ceil(((
double)m) / (N * m_c_star));
217 const size_t m_c =
ceil(((
double)m) / (N * n_tiles_per_thread));
218 const size_t n_tiles = N * n_tiles_per_thread;
220 policy_type range(0, n_tiles);
223 functor_type functor(alpha, A,
x, beta,
y, m_c);
225 Kokkos::parallel_for(
"KokkosBlas::gemv[Update]", range, functor);
233 typename VA::const_value_type &alpha,
236 typename VY::const_value_type &beta,
240 using IndexType =
typename VA::size_type;
241 using team_policy_type = Kokkos::TeamPolicy<execution_space>;
244 const size_t m =
y.extent(0);
245 const size_t n =
x.extent(0);
247 const size_t team_size = STOKHOS_GEMV_TEAM_SIZE;
249 #ifdef KOKKOS_ENABLE_DEPRECATED_CODE 250 const size_t N = execution_space::thread_pool_size();
252 const size_t N = execution_space::impl_thread_pool_size();
255 const size_t n_tiles_per_thread =
ceil(((
double)n) / (N * m_c_star));
256 const size_t m_c =
ceil(((
double)n) / (N * n_tiles_per_thread));
257 const size_t n_per_tile2 = m_c * team_size;
259 const size_t n_i2 =
ceil(((
double)n) / n_per_tile2);
261 team_policy_type team(n_i2, team_size);
264 Kokkos::parallel_for(
265 m, KOKKOS_LAMBDA(
const int i) {
269 Kokkos::parallel_for(
270 m, KOKKOS_LAMBDA(
const int i) {
275 functor_type functor(alpha, A,
x,
y, n_per_tile2);
277 Kokkos::parallel_for(
"KokkosBlas::gemv[InnerProducts]", team, functor);
282 template <
typename DA,
typename... PA,
283 typename DX,
typename... PX,
284 typename DY,
typename... PY>
289 typename Kokkos::View<DA, PA...>::const_value_type &alpha,
290 const Kokkos::View<DA, PA...> &A,
291 const Kokkos::View<DX, PX...> &
x,
292 typename Kokkos::View<DY, PY...>::const_value_type &beta,
293 const Kokkos::View<DY, PY...> &
y)
296 typedef Kokkos::View<DA, PA...> VA;
297 typedef Kokkos::View<DX, PX...> VX;
298 typedef Kokkos::View<DY, PY...> VY;
300 static_assert(VA::rank == 2,
"GEMM: A must have rank 2 (be a matrix).");
301 static_assert(VX::rank == 1,
"GEMM: x must have rank 1 (be a vector).");
302 static_assert(VY::rank == 1,
"GEMM: y must have rank 1 (be a vector).");
304 if (trans[0] ==
'n' || trans[0] ==
'N')
305 update_MP<Scalar, VA, VX, VY>(alpha, A,
x, beta,
y);
307 inner_products_MP<Scalar, VA, VX, VY>(alpha, A,
x, beta,
y);
Kokkos::DefaultExecutionSpace execution_space
#define Sacado_MP_Vector_GEMV_Tile_Size(size)
Kokkos::DefaultExecutionSpace execution_space
Kokkos::TeamPolicy< execution_space > policy_type
void update_MP(typename VA::const_value_type &alpha, const VA &A, const VX &x, typename VY::const_value_type &beta, const VY &y)
KOKKOS_INLINE_FUNCTION void operator()(const IndexType &i_tile) const
typename AViewType::execution_space execution_space
const IndexType const IndexType const IndexType const IndexType const ValueType const ValueType * x
typename AViewType::non_const_value_type AlphaCoeffType
KOKKOS_INLINE_FUNCTION PCE< Storage > ceil(const PCE< Storage > &a)
typename YViewType::non_const_value_type BetaCoeffType
typename policy_type::member_type member_type
KOKKOS_INLINE_FUNCTION void operator()(const member_type &team) const
updateF(const AlphaCoeffType &alpha, const AViewType &A, const XViewType &x, const BetaCoeffType &beta, const YViewType &y, const IndexType m_c)
innerF(const AlphaCoeffType &alpha, const AViewType &A, const XViewType &x, const YViewType &y, const IndexType n_c)
const IndexType const IndexType const IndexType const IndexType const ValueType const ValueType ValueType * y
typename AViewType::non_const_value_type AlphaCoeffType
std::enable_if< Kokkos::is_view_mp_vector< Kokkos::View< DA, PA... > >::value &&Kokkos::is_view_mp_vector< Kokkos::View< DX, PX... > >::value &&Kokkos::is_view_mp_vector< Kokkos::View< DY, PY... > >::value >::type gemv(const char trans[], typename Kokkos::View< DA, PA... >::const_value_type &alpha, const Kokkos::View< DA, PA... > &A, const Kokkos::View< DX, PX... > &x, typename Kokkos::View< DY, PY... >::const_value_type &beta, const Kokkos::View< DY, PY... > &y)
void inner_products_MP(typename VA::const_value_type &alpha, const VA &A, const VX &x, typename VY::const_value_type &beta, const VY &y)