GpuBuffer.hpp
Go to the documentation of this file.
1/*
2 Copyright 2024 SINTEF AS
3
4 This file is part of the Open Porous Media project (OPM).
5
6 OPM is free software: you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation, either version 3 of the License, or
9 (at your option) any later version.
10
11 OPM is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with OPM. If not, see <http://www.gnu.org/licenses/>.
18*/
19#ifndef OPM_GPUBUFFER_HEADER_HPP
20#define OPM_GPUBUFFER_HEADER_HPP
21#include <dune/common/fvector.hh>
22#include <dune/istl/bvector.hh>
23#include <exception>
24#include <fmt/core.h>
25#include <opm/common/ErrorMacros.hpp>
29#include <vector>
30#include <string>
31#include <cuda_runtime.h>
32
33
34namespace Opm::gpuistl
35{
36
56template <typename T>
57class GpuBuffer
58{
59public:
60 using field_type = T;
61 using size_type = size_t;
62 using value_type = T;
63
67 GpuBuffer() = default;
68
77 GpuBuffer(const GpuBuffer<T>& other)
78 : GpuBuffer(other.m_numberOfElements)
79 {
80 assertSameSize(other);
81 if (m_numberOfElements == 0) {
82 return;
83 }
84 OPM_GPU_SAFE_CALL(cudaMemcpy(m_dataOnDevice,
85 other.m_dataOnDevice,
86 m_numberOfElements * sizeof(T),
87 cudaMemcpyDeviceToDevice));
88 }
89
99 explicit GpuBuffer(const std::vector<T>& data)
100 : GpuBuffer(data.size())
101 {
102 copyFromHost(data);
103 }
104
110 explicit GpuBuffer(const size_t numberOfElements)
111 : m_numberOfElements(numberOfElements)
112 {
113 OPM_GPU_SAFE_CALL(cudaMalloc(&m_dataOnDevice, sizeof(T) * m_numberOfElements));
114 }
115
116
126 GpuBuffer(const T* dataOnHost, const size_t numberOfElements)
127 : GpuBuffer(numberOfElements)
128 {
129 OPM_GPU_SAFE_CALL(cudaMemcpy(
130 m_dataOnDevice, dataOnHost, m_numberOfElements * sizeof(T), cudaMemcpyHostToDevice));
131 }
132
133
137 virtual ~GpuBuffer()
138 {
139 OPM_GPU_WARN_IF_ERROR(cudaFree(m_dataOnDevice));
140 }
141
145 T* data()
146 {
147 return m_dataOnDevice;
148 }
149
153 const T* data() const
154 {
155 return m_dataOnDevice;
156 }
157
165 template <int BlockDimension>
166 void copyFromHost(const Dune::BlockVector<Dune::FieldVector<T, BlockDimension>>& bvector)
167 {
168 // TODO: [perf] vector.size() can be replaced by bvector.N() * BlockDimension
169 if (m_numberOfElements != bvector.size()) {
170 OPM_THROW(std::runtime_error,
171 fmt::format("Given incompatible vector size. GpuBuffer has size {}, \n"
172 "however, BlockVector has N() = {}, and size = {}.",
173 m_numberOfElements,
174 bvector.N(),
175 bvector.size()));
176 }
177 const auto dataPointer = static_cast<const T*>(&(bvector[0][0]));
178 copyFromHost(dataPointer, m_numberOfElements);
179 }
180
188 template <int BlockDimension>
189 void copyToHost(Dune::BlockVector<Dune::FieldVector<T, BlockDimension>>& bvector) const
190 {
191 // TODO: [perf] vector.size() can be replaced by bvector.N() * BlockDimension
192 if (m_numberOfElements != bvector.size()) {
193 OPM_THROW(std::runtime_error,
194 fmt::format("Given incompatible vector size. GpuBuffer has size {},\n however, the BlockVector "
195 "has has N() = {}, and size() = {}.",
196 m_numberOfElements,
197 bvector.N(),
198 bvector.size()));
199 }
200 const auto dataPointer = static_cast<T*>(&(bvector[0][0]));
201 copyToHost(dataPointer, m_numberOfElements);
202 }
203
211 void copyFromHost(const T* dataPointer, size_t numberOfElements)
212 {
213 if (numberOfElements > size()) {
214 OPM_THROW(std::runtime_error,
215 fmt::format(fmt::runtime("Requesting to copy too many elements. "
216 "buffer has {} elements, while {} was requested."),
217 size(),
218 numberOfElements));
219 }
220 OPM_GPU_SAFE_CALL(cudaMemcpy(data(), dataPointer, numberOfElements * sizeof(T), cudaMemcpyHostToDevice));
221 }
222
230 void copyToHost(T* dataPointer, size_t numberOfElements) const
231 {
232 assertSameSize(numberOfElements);
233 OPM_GPU_SAFE_CALL(cudaMemcpy(dataPointer, data(), numberOfElements * sizeof(T), cudaMemcpyDeviceToHost));
234 }
235
243 void copyFromHost(const std::vector<T>& data)
244 {
245 assertSameSize(data.size());
246
247 if (data.empty()) {
248 return;
249 }
250
251 if constexpr (std::is_same_v<T, bool>)
252 {
253 auto tmp = std::make_unique<bool[]>(data.size());
254 for (size_t i = 0; i < data.size(); ++i) {
255 tmp[i] = static_cast<bool>(data[i]);
256 }
257 copyFromHost(tmp.get(), data.size());
258 }
259 else {
260 copyFromHost(data.data(), data.size());
261 }
262 }
263
271 void copyToHost(std::vector<T>& data) const
272 {
273 assertSameSize(data.size());
274
275 if (data.empty()) {
276 return;
277 }
278
279 if constexpr (std::is_same_v<T, bool>)
280 {
281 auto tmp = std::make_unique<bool[]>(data.size());
282 copyToHost(tmp.get(), data.size());
283 for (size_t i = 0; i < data.size(); ++i) {
284 data[i] = static_cast<bool>(tmp[i]);
285 }
286 return;
287 }
288 else {
289 copyToHost(data.data(), data.size());
290 }
291 }
292
297 size_type size() const
298 {
299 return m_numberOfElements;
300 }
301
306 void resize(size_t newSize)
307 {
308 if (newSize < 1) {
309 OPM_THROW(std::invalid_argument, "Setting a GpuBuffer size to a non-positive number is not allowed");
310 }
311
312 if (m_numberOfElements == 0) {
313 // We have no data, so we can just allocate new memory
314 OPM_GPU_SAFE_CALL(cudaMalloc(&m_dataOnDevice, sizeof(T) * newSize));
315 }
316 else {
317 // Allocate memory for temporary buffer
318 T* tmpBuffer = nullptr;
319 OPM_GPU_SAFE_CALL(cudaMalloc(&tmpBuffer, sizeof(T) * m_numberOfElements));
320
321 // Move the data from the old to the new buffer with truncation
322 size_t sizeOfMove = std::min({m_numberOfElements, newSize});
323 OPM_GPU_SAFE_CALL(cudaMemcpy(tmpBuffer,
324 m_dataOnDevice,
325 sizeOfMove * sizeof(T),
326 cudaMemcpyDeviceToDevice));
327
328 // free the old buffer
329 OPM_GPU_SAFE_CALL(cudaFree(m_dataOnDevice));
330
331 // swap the buffers
332 m_dataOnDevice = tmpBuffer;
333 }
334
335 // update size
336 m_numberOfElements = newSize;
337 }
338
343 std::vector<T> asStdVector() const
344 {
345 std::vector<T> temporary(m_numberOfElements);
346 copyToHost(temporary);
347 return temporary;
348 }
349
350private:
351 T* m_dataOnDevice = nullptr;
352 size_t m_numberOfElements = 0;
353
354 void assertSameSize(const GpuBuffer<T>& other) const
355 {
356 assertSameSize(other.m_numberOfElements);
357 }
358
359 void assertSameSize(size_t size) const
360 {
361 if (size != m_numberOfElements) {
362 OPM_THROW(std::invalid_argument,
363 fmt::format(fmt::runtime("Given buffer has {}, while we have {}."),
364 size, m_numberOfElements));
365 }
366 }
367
368 void assertHasElements() const
369 {
370 if (m_numberOfElements <= 0) {
371 OPM_THROW(std::invalid_argument, "We have 0 elements");
372 }
373 }
374};
375
376template <class T>
377GpuView<T> make_view(GpuBuffer<T>& buf) {
378 return GpuView<T>(buf.data(), buf.size());
379}
380
381template <class T>
382GpuView<const T> make_view(const GpuBuffer<T>& buf) {
383 return GpuView<const T>(buf.data(), buf.size());
384}
385
386} // namespace Opm::gpuistl
387#endif
#define OPM_GPU_SAFE_CALL(expression)
OPM_GPU_SAFE_CALL checks the return type of the GPU expression (function call) and throws an exceptio...
Definition: gpu_safe_call.hpp:150
#define OPM_GPU_WARN_IF_ERROR(expression)
OPM_GPU_WARN_IF_ERROR checks the return type of the GPU expression (function call) and issues a warni...
Definition: gpu_safe_call.hpp:171
A small, fixed‑dimension MiniVector class backed by std::array that can be used in both host and CUDA...
Definition: AmgxInterface.hpp:38
PointerView< T > make_view(const std::shared_ptr< T > &ptr)
Definition: gpu_smart_pointer.hpp:318