opm-simulators
gpuThreadUtils.hpp
1 /*
2  Copyright 2024 SINTEF AS
3 
4  This file is part of the Open Porous Media project (OPM).
5 
6  OPM is free software: you can redistribute it and/or modify
7  it under the terms of the GNU General Public License as published by
8  the Free Software Foundation, either version 3 of the License, or
9  (at your option) any later version.
10 
11  OPM is distributed in the hope that it will be useful,
12  but WITHOUT ANY WARRANTY; without even the implied warranty of
13  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  GNU General Public License for more details.
15 
16  You should have received a copy of the GNU General Public License
17  along with OPM. If not, see <http://www.gnu.org/licenses/>.
18 */
19 #ifndef OPM_GPU_THREAD_UTILS_HPP
20 #define OPM_GPU_THREAD_UTILS_HPP
21 #include <cstddef>
22 #include <cuda.h>
23 #include <cuda_runtime.h>
24 #include <opm/simulators/linalg/gpuistl/detail/gpu_safe_call.hpp>
25 
26 /*
27  This file provides some logic for handling how to choose the correct thread-block size
28 */
29 namespace Opm::gpuistl::detail
30 {
31 constexpr inline size_t
32 getThreads([[maybe_unused]] size_t numberOfRows)
33 {
34  return 1024;
35 }
36 
37 inline size_t
38 getBlocks(size_t numberOfRows)
39 {
40  const auto threads = getThreads(numberOfRows);
41  return (numberOfRows + threads - 1) / threads;
42 }
43 
44 // Kernel here is the function object of the cuda kernel
45 template <class Kernel>
46 inline int
47 getCudaRecomendedThreadBlockSize(Kernel k, int suggestedThrBlockSize = -1)
48 {
49  if (suggestedThrBlockSize != -1) {
50  return suggestedThrBlockSize;
51  }
52  int blockSize;
53  int tmpGridSize;
54  OPM_GPU_SAFE_CALL(cudaOccupancyMaxPotentialBlockSize(&tmpGridSize, &blockSize, k, 0, 0));
55  return blockSize;
56 }
57 
58 inline int
59 getNumberOfBlocks(int wantedThreads, int threadBlockSize)
60 {
61  return (wantedThreads + threadBlockSize - 1) / threadBlockSize;
62 }
63 
64 } // namespace Opm::gpuistl::detail
65 
66 #endif
Contains wrappers to make the CuBLAS library behave as a modern C++ library with function overlading...
Definition: autotuner.hpp:29