15#ifndef OPM_AUTOTUNER_HPP
16#define OPM_AUTOTUNER_HPP
19#include <cuda_runtime.h>
22#include <opm/common/ErrorMacros.hpp>
23#include <opm/common/OpmLog/OpmLog.hpp>
34template <
typename func>
45 constexpr const int runs = 2;
46 cudaEvent_t events[runs + 1];
49 for (
int i = 0; i < runs + 1; ++i) {
54 float bestTime = std::numeric_limits<float>::max();
55 int bestBlockSize = -1;
59 for (
int thrBlockSize = interval; thrBlockSize <= 1024; thrBlockSize += interval) {
63 for (
int i = 0; i < runs; ++i) {
72 if (cudaSuccess == cudaGetLastError()) {
74 for (
int i = 0; i < runs; ++i) {
75 float candidateBlockSizeTime;
76 OPM_GPU_SAFE_CALL(cudaEventElapsedTime(&candidateBlockSizeTime, events[i], events[i + 1]));
77 if (candidateBlockSizeTime < bestTime) {
78 bestTime = candidateBlockSizeTime;
79 bestBlockSize = thrBlockSize;
86 fmt::format(
"{}: Tuned Blocksize: {} (fastest runtime: {}).", descriptionOfFunction, bestBlockSize, bestTime));
#define OPM_GPU_SAFE_CALL(expression)
OPM_GPU_SAFE_CALL checks the return type of the GPU expression (function call) and throws an exceptio...
Definition: gpu_safe_call.hpp:150
Definition: autotuner.hpp:29
int tuneThreadBlockSize(func &f, std::string descriptionOfFunction)
Function that tests the best thread block size, assumes the provided function depends on threadblock-...
Definition: autotuner.hpp:36