opm-simulators
GpuBuffer.hpp
1 /*
2  Copyright 2024 SINTEF AS
3 
4  This file is part of the Open Porous Media project (OPM).
5 
6  OPM is free software: you can redistribute it and/or modify
7  it under the terms of the GNU General Public License as published by
8  the Free Software Foundation, either version 3 of the License, or
9  (at your option) any later version.
10 
11  OPM is distributed in the hope that it will be useful,
12  but WITHOUT ANY WARRANTY; without even the implied warranty of
13  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  GNU General Public License for more details.
15 
16  You should have received a copy of the GNU General Public License
17  along with OPM. If not, see <http://www.gnu.org/licenses/>.
18 */
19 #ifndef OPM_GPUBUFFER_HEADER_HPP
20 #define OPM_GPUBUFFER_HEADER_HPP
21 #include <dune/common/fvector.hh>
22 #include <dune/istl/bvector.hh>
23 #include <exception>
24 #include <fmt/core.h>
25 #include <opm/common/ErrorMacros.hpp>
26 #include <opm/simulators/linalg/gpuistl/detail/gpu_safe_call.hpp>
27 #include <opm/simulators/linalg/gpuistl/detail/safe_conversion.hpp>
28 #include <opm/simulators/linalg/gpuistl/GpuView.hpp>
29 #include <vector>
30 #include <string>
31 #include <cuda_runtime.h>
32 
33 
34 namespace Opm::gpuistl
35 {
36 
56 template <typename T>
57 class GpuBuffer
58 {
59 public:
60  using field_type = T;
61  using size_type = size_t;
62  using value_type = T;
63 
67  GpuBuffer() = default;
68 
77  GpuBuffer(const GpuBuffer<T>& other)
78  : GpuBuffer(other.m_numberOfElements)
79  {
80  assertSameSize(other);
81  if (m_numberOfElements == 0) {
82  return;
83  }
84  OPM_GPU_SAFE_CALL(cudaMemcpy(m_dataOnDevice,
85  other.m_dataOnDevice,
86  m_numberOfElements * sizeof(T),
87  cudaMemcpyDeviceToDevice));
88  }
89 
99  explicit GpuBuffer(const std::vector<T>& data)
100  : GpuBuffer(data.size())
101  {
102  copyFromHost(data);
103  }
104 
110  explicit GpuBuffer(const size_t numberOfElements)
111  : m_numberOfElements(numberOfElements)
112  {
113  OPM_GPU_SAFE_CALL(cudaMalloc(&m_dataOnDevice, sizeof(T) * m_numberOfElements));
114  }
115 
116 
126  GpuBuffer(const T* dataOnHost, const size_t numberOfElements)
127  : GpuBuffer(numberOfElements)
128  {
129  OPM_GPU_SAFE_CALL(cudaMemcpy(
130  m_dataOnDevice, dataOnHost, m_numberOfElements * sizeof(T), cudaMemcpyHostToDevice));
131  }
132 
133 
137  virtual ~GpuBuffer()
138  {
139  OPM_GPU_WARN_IF_ERROR(cudaFree(m_dataOnDevice));
140  }
141 
145  T* data()
146  {
147  return m_dataOnDevice;
148  }
149 
153  const T* data() const
154  {
155  return m_dataOnDevice;
156  }
157 
165  template <int BlockDimension>
166  void copyFromHost(const Dune::BlockVector<Dune::FieldVector<T, BlockDimension>>& bvector)
167  {
168  // TODO: [perf] vector.size() can be replaced by bvector.N() * BlockDimension
169  if (m_numberOfElements != bvector.size()) {
170  OPM_THROW(std::runtime_error,
171  fmt::format("Given incompatible vector size. GpuBuffer has size {}, \n"
172  "however, BlockVector has N() = {}, and size = {}.",
173  m_numberOfElements,
174  bvector.N(),
175  bvector.size()));
176  }
177  const auto dataPointer = static_cast<const T*>(&(bvector[0][0]));
178  copyFromHost(dataPointer, m_numberOfElements);
179  }
180 
188  template <int BlockDimension>
189  void copyToHost(Dune::BlockVector<Dune::FieldVector<T, BlockDimension>>& bvector) const
190  {
191  // TODO: [perf] vector.size() can be replaced by bvector.N() * BlockDimension
192  if (m_numberOfElements != bvector.size()) {
193  OPM_THROW(std::runtime_error,
194  fmt::format("Given incompatible vector size. GpuBuffer has size {},\n however, the BlockVector "
195  "has has N() = {}, and size() = {}.",
196  m_numberOfElements,
197  bvector.N(),
198  bvector.size()));
199  }
200  const auto dataPointer = static_cast<T*>(&(bvector[0][0]));
201  copyToHost(dataPointer, m_numberOfElements);
202  }
203 
211  void copyFromHost(const T* dataPointer, size_t numberOfElements)
212  {
213  if (numberOfElements > size()) {
214  OPM_THROW(std::runtime_error,
215  fmt::format(fmt::runtime("Requesting to copy too many elements. "
216  "buffer has {} elements, while {} was requested."),
217  size(),
218  numberOfElements));
219  }
220  OPM_GPU_SAFE_CALL(cudaMemcpy(data(), dataPointer, numberOfElements * sizeof(T), cudaMemcpyHostToDevice));
221  }
222 
230  void copyToHost(T* dataPointer, size_t numberOfElements) const
231  {
232  assertSameSize(numberOfElements);
233  OPM_GPU_SAFE_CALL(cudaMemcpy(dataPointer, data(), numberOfElements * sizeof(T), cudaMemcpyDeviceToHost));
234  }
235 
243  void copyFromHost(const std::vector<T>& data)
244  {
245  assertSameSize(data.size());
246 
247  if (data.empty()) {
248  return;
249  }
250 
251  if constexpr (std::is_same_v<T, bool>)
252  {
253  auto tmp = std::make_unique<bool[]>(data.size());
254  for (size_t i = 0; i < data.size(); ++i) {
255  tmp[i] = static_cast<bool>(data[i]);
256  }
257  copyFromHost(tmp.get(), data.size());
258  }
259  else {
260  copyFromHost(data.data(), data.size());
261  }
262  }
263 
271  void copyToHost(std::vector<T>& data) const
272  {
273  assertSameSize(data.size());
274 
275  if (data.empty()) {
276  return;
277  }
278 
279  if constexpr (std::is_same_v<T, bool>)
280  {
281  auto tmp = std::make_unique<bool[]>(data.size());
282  copyToHost(tmp.get(), data.size());
283  for (size_t i = 0; i < data.size(); ++i) {
284  data[i] = static_cast<bool>(tmp[i]);
285  }
286  return;
287  }
288  else {
289  copyToHost(data.data(), data.size());
290  }
291  }
292 
297  size_type size() const
298  {
299  return m_numberOfElements;
300  }
301 
306  void resize(size_t newSize)
307  {
308  if (newSize < 1) {
309  OPM_THROW(std::invalid_argument, "Setting a GpuBuffer size to a non-positive number is not allowed");
310  }
311 
312  if (m_numberOfElements == 0) {
313  // We have no data, so we can just allocate new memory
314  OPM_GPU_SAFE_CALL(cudaMalloc(&m_dataOnDevice, sizeof(T) * newSize));
315  }
316  else {
317  // Allocate memory for temporary buffer
318  T* tmpBuffer = nullptr;
319  OPM_GPU_SAFE_CALL(cudaMalloc(&tmpBuffer, sizeof(T) * m_numberOfElements));
320 
321  // Move the data from the old to the new buffer with truncation
322  size_t sizeOfMove = std::min({m_numberOfElements, newSize});
323  OPM_GPU_SAFE_CALL(cudaMemcpy(tmpBuffer,
324  m_dataOnDevice,
325  sizeOfMove * sizeof(T),
326  cudaMemcpyDeviceToDevice));
327 
328  // free the old buffer
329  OPM_GPU_SAFE_CALL(cudaFree(m_dataOnDevice));
330 
331  // swap the buffers
332  m_dataOnDevice = tmpBuffer;
333  }
334 
335  // update size
336  m_numberOfElements = newSize;
337  }
338 
343  std::vector<T> asStdVector() const
344  {
345  std::vector<T> temporary(m_numberOfElements);
346  copyToHost(temporary);
347  return temporary;
348  }
349 
350 private:
351  T* m_dataOnDevice = nullptr;
352  size_t m_numberOfElements = 0;
353 
354  void assertSameSize(const GpuBuffer<T>& other) const
355  {
356  assertSameSize(other.m_numberOfElements);
357  }
358 
359  void assertSameSize(size_t size) const
360  {
361  if (size != m_numberOfElements) {
362  OPM_THROW(std::invalid_argument,
363  fmt::format(fmt::runtime("Given buffer has {}, while we have {}."),
364  size, m_numberOfElements));
365  }
366  }
367 
368  void assertHasElements() const
369  {
370  if (m_numberOfElements <= 0) {
371  OPM_THROW(std::invalid_argument, "We have 0 elements");
372  }
373  }
374 };
375 
376 template <class T>
377 GpuView<T> make_view(GpuBuffer<T>& buf) {
378  return GpuView<T>(buf.data(), buf.size());
379 }
380 
381 template <class T>
382 GpuView<const T> make_view(const GpuBuffer<T>& buf) {
383  return GpuView<const T>(buf.data(), buf.size());
384 }
385 
386 } // namespace Opm::gpuistl
387 #endif
A small, fixed‑dimension MiniVector class backed by std::array that can be used in both host and CUD...
Definition: AmgxInterface.hpp:37