19 #ifndef OPM_GPU_THREAD_UTILS_HPP 20 #define OPM_GPU_THREAD_UTILS_HPP 23 #include <cuda_runtime.h> 24 #include <opm/simulators/linalg/gpuistl/detail/gpu_safe_call.hpp> 31 constexpr
inline size_t 32 getThreads([[maybe_unused]]
size_t numberOfRows)
38 getBlocks(
size_t numberOfRows)
40 const auto threads = getThreads(numberOfRows);
41 return (numberOfRows + threads - 1) / threads;
45 template <
class Kernel>
47 getCudaRecomendedThreadBlockSize(Kernel k,
int suggestedThrBlockSize = -1)
49 if (suggestedThrBlockSize != -1) {
50 return suggestedThrBlockSize;
54 OPM_GPU_SAFE_CALL(cudaOccupancyMaxPotentialBlockSize(&tmpGridSize, &blockSize, k, 0, 0));
59 getNumberOfBlocks(
int wantedThreads,
int threadBlockSize)
61 return (wantedThreads + threadBlockSize - 1) / threadBlockSize;
Contains wrappers to make the CuBLAS library behave as a modern C++ library with function overlading...
Definition: autotuner.hpp:29