45 #ifndef KOKKOS_CUDA_HPP 46 #define KOKKOS_CUDA_HPP 48 #include <Kokkos_Macros.hpp> 49 #if defined(KOKKOS_ENABLE_CUDA) 51 #include <Kokkos_Core_fwd.hpp> 56 #include <impl/Kokkos_AnalyzePolicy.hpp> 57 #include <Kokkos_CudaSpace.hpp> 60 #include <Kokkos_TaskScheduler.hpp> 62 #include <Kokkos_ScratchSpace.hpp> 63 #include <Kokkos_MemoryTraits.hpp> 64 #include <impl/Kokkos_Tags.hpp> 65 #include <impl/Kokkos_ExecSpaceInitializer.hpp> 66 #include <impl/Kokkos_HostSharedPtr.hpp> 83 enum class CudaLaunchMechanism : unsigned {
90 constexpr
inline CudaLaunchMechanism operator|(CudaLaunchMechanism p1,
91 CudaLaunchMechanism p2) {
92 return static_cast<CudaLaunchMechanism
>(
static_cast<unsigned>(p1) |
93 static_cast<unsigned>(p2));
95 constexpr
inline CudaLaunchMechanism operator&(CudaLaunchMechanism p1,
96 CudaLaunchMechanism p2) {
97 return static_cast<CudaLaunchMechanism
>(
static_cast<unsigned>(p1) &
98 static_cast<unsigned>(p2));
101 template <CudaLaunchMechanism l>
102 struct CudaDispatchProperties {
103 CudaLaunchMechanism launch_mechanism = l;
123 using execution_space = Cuda;
125 #if defined(KOKKOS_ENABLE_CUDA_UVM) 126 using memory_space = CudaUVMSpace;
129 using memory_space = CudaSpace;
134 using device_type = Kokkos::Device<execution_space, memory_space>;
137 using size_type = memory_space::size_type;
140 using array_layout = LayoutLeft;
143 using scratch_memory_space = ScratchMemorySpace<Cuda>;
152 KOKKOS_INLINE_FUNCTION
static int in_parallel() {
153 #if defined(__CUDA_ARCH__) 186 static void impl_static_fence();
191 static int concurrency();
194 static void print_configuration(std::ostream&,
const bool detail =
false);
202 Cuda(cudaStream_t stream);
208 struct SelectDevice {
210 SelectDevice() : cuda_device_id(0) {}
211 explicit SelectDevice(
int id) : cuda_device_id(id) {}
215 static void impl_finalize();
218 static int impl_is_initialized();
221 static void impl_initialize(
const SelectDevice = SelectDevice(),
222 const size_t num_instances = 1);
227 static size_type device_arch();
230 static size_type detect_device_count();
235 static std::vector<unsigned> detect_device_arch();
237 cudaStream_t cuda_stream()
const;
238 int cuda_device()
const;
239 const cudaDeviceProp& cuda_device_prop()
const;
244 static const char* name();
246 inline Impl::CudaInternal* impl_internal_space_instance()
const {
247 return m_space_instance.get();
249 uint32_t impl_instance_id() const noexcept {
return 0; }
252 Kokkos::Impl::HostSharedPtr<Impl::CudaInternal> m_space_instance;
258 struct DeviceTypeTraits<Cuda> {
260 static constexpr DeviceType
id = DeviceType::Cuda;
267 class CudaSpaceInitializer :
public ExecSpaceInitializerBase {
269 CudaSpaceInitializer() =
default;
270 ~CudaSpaceInitializer() =
default;
271 void initialize(
const InitArguments& args)
final;
272 void finalize(
const bool all_spaces)
final;
274 void print_configuration(std::ostream& msg, const
bool detail) final;
287 struct MemorySpaceAccess<
Kokkos::CudaSpace,
288 Kokkos::Cuda::scratch_memory_space> {
289 enum :
bool { assignable =
false };
290 enum :
bool { accessible =
true };
291 enum :
bool { deepcopy =
false };
294 #if defined(KOKKOS_ENABLE_CUDA_UVM) 303 struct MemorySpaceAccess<
Kokkos::CudaUVMSpace,
304 Kokkos::Cuda::scratch_memory_space> {
305 enum :
bool { assignable =
false };
306 enum :
bool { accessible =
true };
307 enum :
bool { deepcopy =
false };
Declaration of various MemoryLayout options.
Declaration of parallel operators.