gpuThreadUtils.hpp
Go to the documentation of this file.
1/*
2 Copyright 2024 SINTEF AS
3
4 This file is part of the Open Porous Media project (OPM).
5
6 OPM is free software: you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation, either version 3 of the License, or
9 (at your option) any later version.
10
11 OPM is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with OPM. If not, see <http://www.gnu.org/licenses/>.
18*/
19#ifndef OPM_GPU_THREAD_UTILS_HPP
20#define OPM_GPU_THREAD_UTILS_HPP
21#include <cstddef>
22#include <cuda.h>
23#include <cuda_runtime.h>
25
26/*
27 This file provides some logic for handling how to choose the correct thread-block size
28*/
30{
31constexpr inline size_t
32getThreads([[maybe_unused]] size_t numberOfRows)
33{
34 return 1024;
35}
36
37inline size_t
38getBlocks(size_t numberOfRows)
39{
40 const auto threads = getThreads(numberOfRows);
41 return (numberOfRows + threads - 1) / threads;
42}
43
44// Kernel here is the function object of the cuda kernel
45template <class Kernel>
46inline int
47getCudaRecomendedThreadBlockSize(Kernel k, int suggestedThrBlockSize = -1)
48{
49 if (suggestedThrBlockSize != -1) {
50 return suggestedThrBlockSize;
51 }
52 int blockSize;
53 int tmpGridSize;
54 OPM_GPU_SAFE_CALL(cudaOccupancyMaxPotentialBlockSize(&tmpGridSize, &blockSize, k, 0, 0));
55 return blockSize;
56}
57
58inline int
59getNumberOfBlocks(int wantedThreads, int threadBlockSize)
60{
61 return (wantedThreads + threadBlockSize - 1) / threadBlockSize;
62}
63
64} // namespace Opm::gpuistl::detail
65
66#endif
#define OPM_GPU_SAFE_CALL(expression)
OPM_GPU_SAFE_CALL checks the return type of the GPU expression (function call) and throws an exceptio...
Definition: gpu_safe_call.hpp:150
Definition: autotuner.hpp:29
int getCudaRecomendedThreadBlockSize(Kernel k, int suggestedThrBlockSize=-1)
Definition: gpuThreadUtils.hpp:47
int getNumberOfBlocks(int wantedThreads, int threadBlockSize)
Definition: gpuThreadUtils.hpp:59
size_t getBlocks(size_t numberOfRows)
Definition: gpuThreadUtils.hpp:38
constexpr size_t getThreads(size_t numberOfRows)
Definition: gpuThreadUtils.hpp:32