GpuBuffer.hpp
Go to the documentation of this file.
1/*
2 Copyright 2024 SINTEF AS
3
4 This file is part of the Open Porous Media project (OPM).
5
6 OPM is free software: you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation, either version 3 of the License, or
9 (at your option) any later version.
10
11 OPM is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with OPM. If not, see <http://www.gnu.org/licenses/>.
18*/
19#ifndef OPM_GPUBUFFER_HEADER_HPP
20#define OPM_GPUBUFFER_HEADER_HPP
21#include <dune/common/fvector.hh>
22#include <dune/istl/bvector.hh>
23#include <exception>
24#include <fmt/core.h>
25#include <opm/common/ErrorMacros.hpp>
29#include <vector>
30#include <string>
31#include <cuda_runtime.h>
32
33
34namespace Opm::gpuistl
35{
36
56template <typename T>
57class GpuBuffer
58{
59public:
60 using field_type = T;
61 using size_type = size_t;
62 using value_type = T;
63
67 GpuBuffer() = default;
68
77 GpuBuffer(const GpuBuffer<T>& other)
78 : GpuBuffer(other.m_numberOfElements)
79 {
80 assertSameSize(other);
81 if (m_numberOfElements == 0) {
82 return;
83 }
84 OPM_GPU_SAFE_CALL(cudaMemcpy(m_dataOnDevice,
85 other.m_dataOnDevice,
86 m_numberOfElements * sizeof(T),
87 cudaMemcpyDeviceToDevice));
88 }
89
98 GpuBuffer(GpuBuffer<T>&& other) noexcept
99 : m_dataOnDevice(other.m_dataOnDevice)
100 , m_numberOfElements(other.m_numberOfElements)
101 {
102 other.m_dataOnDevice = nullptr;
103 other.m_numberOfElements = 0;
104 }
105
114 GpuBuffer<T>& operator=(GpuBuffer<T>&& other) noexcept
115 {
116 if (this != &other) {
117 OPM_GPU_WARN_IF_ERROR(cudaFree(m_dataOnDevice));
118 m_dataOnDevice = other.m_dataOnDevice;
119 m_numberOfElements = other.m_numberOfElements;
120 other.m_dataOnDevice = nullptr;
121 other.m_numberOfElements = 0;
122 }
123 return *this;
124 }
125
135 explicit GpuBuffer(const std::vector<T>& data)
136 : GpuBuffer(data.size())
137 {
138 copyFromHost(data);
139 }
140
146 explicit GpuBuffer(const size_t numberOfElements)
147 : m_numberOfElements(numberOfElements)
148 {
149 OPM_GPU_SAFE_CALL(cudaMalloc(&m_dataOnDevice, sizeof(T) * m_numberOfElements));
150 }
151
152
162 GpuBuffer(const T* dataOnHost, const size_t numberOfElements)
163 : GpuBuffer(numberOfElements)
164 {
165 OPM_GPU_SAFE_CALL(cudaMemcpy(
166 m_dataOnDevice, dataOnHost, m_numberOfElements * sizeof(T), cudaMemcpyHostToDevice));
167 }
168
169
173 virtual ~GpuBuffer()
174 {
175 OPM_GPU_WARN_IF_ERROR(cudaFree(m_dataOnDevice));
176 }
177
181 T* data()
182 {
183 return m_dataOnDevice;
184 }
185
189 const T* data() const
190 {
191 return m_dataOnDevice;
192 }
193
201 template <int BlockDimension>
202 void copyFromHost(const Dune::BlockVector<Dune::FieldVector<T, BlockDimension>>& bvector)
203 {
204 // TODO: [perf] vector.size() can be replaced by bvector.N() * BlockDimension
205 if (m_numberOfElements != bvector.size()) {
206 OPM_THROW(std::runtime_error,
207 fmt::format("Given incompatible vector size. GpuBuffer has size {}, \n"
208 "however, BlockVector has N() = {}, and size = {}.",
209 m_numberOfElements,
210 bvector.N(),
211 bvector.size()));
212 }
213 const auto dataPointer = static_cast<const T*>(&(bvector[0][0]));
214 copyFromHost(dataPointer, m_numberOfElements);
215 }
216
224 template <int BlockDimension>
225 void copyToHost(Dune::BlockVector<Dune::FieldVector<T, BlockDimension>>& bvector) const
226 {
227 // TODO: [perf] vector.size() can be replaced by bvector.N() * BlockDimension
228 if (m_numberOfElements != bvector.size()) {
229 OPM_THROW(std::runtime_error,
230 fmt::format("Given incompatible vector size. GpuBuffer has size {},\n however, the BlockVector "
231 "has has N() = {}, and size() = {}.",
232 m_numberOfElements,
233 bvector.N(),
234 bvector.size()));
235 }
236 const auto dataPointer = static_cast<T*>(&(bvector[0][0]));
237 copyToHost(dataPointer, m_numberOfElements);
238 }
239
247 void copyFromHost(const T* dataPointer, size_t numberOfElements)
248 {
249 if (numberOfElements > size()) {
250 OPM_THROW(std::runtime_error,
251 fmt::format(fmt::runtime("Requesting to copy too many elements. "
252 "buffer has {} elements, while {} was requested."),
253 size(),
254 numberOfElements));
255 }
256 OPM_GPU_SAFE_CALL(cudaMemcpy(data(), dataPointer, numberOfElements * sizeof(T), cudaMemcpyHostToDevice));
257 }
258
266 void copyToHost(T* dataPointer, size_t numberOfElements) const
267 {
268 assertSameSize(numberOfElements);
269 OPM_GPU_SAFE_CALL(cudaMemcpy(dataPointer, data(), numberOfElements * sizeof(T), cudaMemcpyDeviceToHost));
270 }
271
279 void copyFromHost(const std::vector<T>& data)
280 {
281 assertSameSize(data.size());
282
283 if (data.empty()) {
284 return;
285 }
286
287 if constexpr (std::is_same_v<T, bool>)
288 {
289 auto tmp = std::make_unique<bool[]>(data.size());
290 for (size_t i = 0; i < data.size(); ++i) {
291 tmp[i] = static_cast<bool>(data[i]);
292 }
293 copyFromHost(tmp.get(), data.size());
294 }
295 else {
296 copyFromHost(data.data(), data.size());
297 }
298 }
299
307 void copyToHost(std::vector<T>& data) const
308 {
309 assertSameSize(data.size());
310
311 if (data.empty()) {
312 return;
313 }
314
315 if constexpr (std::is_same_v<T, bool>)
316 {
317 auto tmp = std::make_unique<bool[]>(data.size());
318 copyToHost(tmp.get(), data.size());
319 for (size_t i = 0; i < data.size(); ++i) {
320 data[i] = static_cast<bool>(tmp[i]);
321 }
322 return;
323 }
324 else {
325 copyToHost(data.data(), data.size());
326 }
327 }
328
333 size_type size() const
334 {
335 return m_numberOfElements;
336 }
337
342 void resize(size_t newSize)
343 {
344 if (newSize < 1) {
345 OPM_THROW(std::invalid_argument, "Setting a GpuBuffer size to a non-positive number is not allowed");
346 }
347
348 if (m_numberOfElements == 0) {
349 // We have no data, so we can just allocate new memory
350 OPM_GPU_SAFE_CALL(cudaMalloc(&m_dataOnDevice, sizeof(T) * newSize));
351 }
352 else {
353 // Allocate memory for temporary buffer
354 T* tmpBuffer = nullptr;
355 OPM_GPU_SAFE_CALL(cudaMalloc(&tmpBuffer, sizeof(T) * m_numberOfElements));
356
357 // Move the data from the old to the new buffer with truncation
358 size_t sizeOfMove = std::min({m_numberOfElements, newSize});
359 OPM_GPU_SAFE_CALL(cudaMemcpy(tmpBuffer,
360 m_dataOnDevice,
361 sizeOfMove * sizeof(T),
362 cudaMemcpyDeviceToDevice));
363
364 // free the old buffer
365 OPM_GPU_SAFE_CALL(cudaFree(m_dataOnDevice));
366
367 // swap the buffers
368 m_dataOnDevice = tmpBuffer;
369 }
370
371 // update size
372 m_numberOfElements = newSize;
373 }
374
379 std::vector<T> asStdVector() const
380 {
381 std::vector<T> temporary(m_numberOfElements);
382 copyToHost(temporary);
383 return temporary;
384 }
385
386private:
387 T* m_dataOnDevice = nullptr;
388 size_t m_numberOfElements = 0;
389
390 void assertSameSize(const GpuBuffer<T>& other) const
391 {
392 assertSameSize(other.m_numberOfElements);
393 }
394
395 void assertSameSize(size_t size) const
396 {
397 if (size != m_numberOfElements) {
398 OPM_THROW(std::invalid_argument,
399 fmt::format(fmt::runtime("Given buffer has {}, while we have {}."),
400 size, m_numberOfElements));
401 }
402 }
403
404 void assertHasElements() const
405 {
406 if (m_numberOfElements <= 0) {
407 OPM_THROW(std::invalid_argument, "We have 0 elements");
408 }
409 }
410};
411
412template <class T>
413GpuView<T> make_view(GpuBuffer<T>& buf) {
414 return GpuView<T>(buf.data(), buf.size());
415}
416
417template <class T>
418GpuView<const T> make_view(const GpuBuffer<T>& buf) {
419 return GpuView<const T>(buf.data(), buf.size());
420}
421
422} // namespace Opm::gpuistl
423#endif
#define OPM_GPU_SAFE_CALL(expression)
OPM_GPU_SAFE_CALL checks the return type of the GPU expression (function call) and throws an exceptio...
Definition: gpu_safe_call.hpp:164
#define OPM_GPU_WARN_IF_ERROR(expression)
OPM_GPU_WARN_IF_ERROR checks the return type of the GPU expression (function call) and issues a warni...
Definition: gpu_safe_call.hpp:185
A small, fixed‑dimension MiniVector class backed by std::array that can be used in both host and CUDA...
Definition: ThermalGasWaterFlowProblem.hpp:95
ThermalGasWaterFlowProblem< Scalar, GpuView > make_view(ThermalGasWaterFlowProblem< Scalar, GpuBuffer > &buffer)
Definition: ThermalGasWaterFlowProblem.hpp:111