opm-simulators
GpuVector.hpp
1 /*
2  Copyright 2022-2023 SINTEF AS
3 
4  This file is part of the Open Porous Media project (OPM).
5 
6  OPM is free software: you can redistribute it and/or modify
7  it under the terms of the GNU General Public License as published by
8  the Free Software Foundation, either version 3 of the License, or
9  (at your option) any later version.
10 
11  OPM is distributed in the hope that it will be useful,
12  but WITHOUT ANY WARRANTY; without even the implied warranty of
13  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  GNU General Public License for more details.
15 
16  You should have received a copy of the GNU General Public License
17  along with OPM. If not, see <http://www.gnu.org/licenses/>.
18 */
19 #ifndef OPM_GPUVECTOR_HEADER_HPP
20 #define OPM_GPUVECTOR_HEADER_HPP
21 #include <dune/common/fvector.hh>
22 #include <dune/istl/bvector.hh>
23 
24 #include <fmt/core.h>
25 #include <opm/common/ErrorMacros.hpp>
26 #include <opm/simulators/linalg/gpuistl/detail/CuBlasHandle.hpp>
27 #include <opm/simulators/linalg/gpuistl/detail/safe_conversion.hpp>
28 #include <opm/simulators/linalg/gpuistl/detail/gpu_constants.hpp>
29 #include <vector>
30 #include <string>
31 
32 
33 namespace Opm::gpuistl
34 {
35 
65 template <typename T>
66 class GpuVector
67 {
68 public:
69  using field_type = T;
70  using size_type = size_t;
71 
72 
81  GpuVector(const GpuVector<T>& other);
82 
94  explicit GpuVector(const std::vector<T>& data);
95 
104  GpuVector& operator=(const GpuVector<T>& other);
105 
117  template<int BlockDimension>
118  explicit GpuVector(const Dune::BlockVector<Dune::FieldVector<T, BlockDimension>>& bvector)
119  : GpuVector(bvector.dim())
120  {
121  copyFromHost(bvector);
122  }
123 
131  GpuVector& operator=(T scalar);
132 
136  GpuVector() : m_dataOnDevice(nullptr), m_numberOfElements(0), m_cuBlasHandle(detail::CuBlasHandle::getInstance()) {}
137 
145  explicit GpuVector(const size_t numberOfElements);
146 
147 
159  GpuVector(const T* dataOnHost, const size_t numberOfElements);
160 
164  virtual ~GpuVector();
165 
169  T* data();
170 
174  const T* data() const;
175 
183  template <int BlockDimension>
184  void copyFromHost(const Dune::BlockVector<Dune::FieldVector<T, BlockDimension>>& bvector)
185  {
186  // TODO: [perf] vector.dim() can be replaced by bvector.N() * BlockDimension
187  if (detail::to_size_t(m_numberOfElements) != bvector.dim()) {
188  OPM_THROW(std::runtime_error,
189  fmt::format("Given incompatible vector size. GpuVector has size {}, \n"
190  "however, BlockVector has N() = {}, and dim = {}.",
191  m_numberOfElements,
192  bvector.N(),
193  bvector.dim()));
194  }
195  const auto dataPointer = static_cast<const T*>(&(bvector[0][0]));
196  copyFromHost(dataPointer, m_numberOfElements);
197  }
198 
207  template <int BlockDimension>
208  void copyFromHostAsync(const Dune::BlockVector<Dune::FieldVector<T, BlockDimension>>& bvector, cudaStream_t stream = detail::DEFAULT_STREAM)
209  {
210  // TODO: [perf] vector.dim() can be replaced by bvector.N() * BlockDimension
211  if (detail::to_size_t(m_numberOfElements) != bvector.dim()) {
212  OPM_THROW(std::runtime_error,
213  fmt::format("Given incompatible vector size. GpuVector has size {}, \n"
214  "however, BlockVector has N() = {}, and dim = {}.",
215  m_numberOfElements,
216  bvector.N(),
217  bvector.dim()));
218  }
219  const auto dataPointer = static_cast<const T*>(&(bvector[0][0]));
220  copyFromHostAsync(dataPointer, m_numberOfElements, stream);
221  }
222 
230  template <int BlockDimension>
231  void copyToHost(Dune::BlockVector<Dune::FieldVector<T, BlockDimension>>& bvector) const
232  {
233  // TODO: [perf] vector.dim() can be replaced by bvector.N() * BlockDimension
234  if (detail::to_size_t(m_numberOfElements) != bvector.dim()) {
235  OPM_THROW(std::runtime_error,
236  fmt::format(fmt::runtime("Given incompatible vector size. GpuVector has size {},\n"
237  "however, the BlockVector has has N() = {}, and dim() = {}."),
238  m_numberOfElements,
239  bvector.N(),
240  bvector.dim()));
241  }
242  const auto dataPointer = static_cast<T*>(&(bvector[0][0]));
243  copyToHost(dataPointer, m_numberOfElements);
244  }
245 
254  template <int BlockDimension>
255  void copyToHostAsync(Dune::BlockVector<Dune::FieldVector<T, BlockDimension>>& bvector, cudaStream_t stream = detail::DEFAULT_STREAM) const
256  {
257  // TODO: [perf] vector.dim() can be replaced by bvector.N() * BlockDimension
258  if (detail::to_size_t(m_numberOfElements) != bvector.dim()) {
259  OPM_THROW(std::runtime_error,
260  fmt::format("Given incompatible vector size. GpuVector has size {},\n however, the BlockVector "
261  "has has N() = {}, and dim() = {}.",
262  m_numberOfElements,
263  bvector.N(),
264  bvector.dim()));
265  }
266  const auto dataPointer = static_cast<T*>(&(bvector[0][0]));
267  copyToHostAsync(dataPointer, m_numberOfElements, stream);
268  }
269 
277  void copyFromHost(const T* dataPointer, size_t numberOfElements);
278 
289  void copyFromHostAsync(const T* dataPointer, size_t numberOfElements, cudaStream_t stream = detail::DEFAULT_STREAM);
290 
291 
292 
300  void copyToHost(T* dataPointer, size_t numberOfElements) const;
301 
312  void copyToHostAsync(T* dataPointer, size_t numberOfElements, cudaStream_t stream = detail::DEFAULT_STREAM) const;
313 
314 
315 
323  void copyFromHost(const std::vector<T>& data);
324 
333  void copyFromHostAsync(const std::vector<T>& data, cudaStream_t stream = detail::DEFAULT_STREAM);
334 
342  void copyToHost(std::vector<T>& data) const;
343 
353  void copyToHostAsync(std::vector<T>& data, cudaStream_t stream = detail::DEFAULT_STREAM) const;
354 
355 
356 
361  void copyFromDeviceToDevice(const GpuVector<T>& other) const;
362 
363  void prepareSendBuf(GpuVector<T>& buffer, const GpuVector<int>& indexSet) const;
364  void syncFromRecvBuf(GpuVector<T>& buffer, const GpuVector<int>& indexSet) const;
365 
374  GpuVector<T>& operator*=(const T& scalar);
375 
384  GpuVector<T>& axpy(T alpha, const GpuVector<T>& y);
385 
392  GpuVector<T>& operator+=(const GpuVector<T>& other);
393 
400  GpuVector<T>& operator-=(const GpuVector<T>& other);
401 
410  T dot(const GpuVector<T>& other) const;
411 
419  T two_norm() const;
420 
426  T dot(const GpuVector<T>& other, const GpuVector<int>& indexSet, GpuVector<T>& buffer) const;
427 
433  T two_norm(const GpuVector<int>& indexSet, GpuVector<T>& buffer) const;
434 
435 
441  T dot(const GpuVector<T>& other, const GpuVector<int>& indexSet) const;
442 
448  T two_norm(const GpuVector<int>& indexSet) const;
449 
450 
455  size_type dim() const;
456 
465  void resize(size_t new_size);
466 
471  std::vector<T> asStdVector() const;
472 
477  template <int blockSize>
478  Dune::BlockVector<Dune::FieldVector<T, blockSize>> asDuneBlockVector() const
479  {
480  OPM_ERROR_IF(dim() % blockSize != 0,
481  fmt::format("blockSize is not a multiple of dim(). Given blockSize = {}, and dim() = {}",
482  blockSize,
483  dim()));
484 
485  Dune::BlockVector<Dune::FieldVector<T, blockSize>> returnValue(dim() / blockSize);
486  copyToHost(returnValue);
487  return returnValue;
488  }
489 
490 
504  void setZeroAtIndexSet(const GpuVector<int>& indexSet);
505 
506  // Slow method that creates a string representation of a GpuVector for debug purposes
507  std::string toDebugString()
508  {
509  std::vector<T> v = asStdVector();
510  std::string res = "";
511  for (T element : v){
512  res += std::to_string(element) + " ";
513  }
514  res += std::to_string(v[v.size()-1]);
515  return res;
516  }
517 
518 private:
519  T* m_dataOnDevice = nullptr;
520 
521  // Note that we store this as int to make sure we are always cublas compatible.
522  // This gives the added benefit that a size_t to int conversion error occurs during construction.
523  int m_numberOfElements;
524  detail::CuBlasHandle& m_cuBlasHandle;
525 
526  void assertSameSize(const GpuVector<T>& other) const;
527  void assertSameSize(int size) const;
528 
529  void assertHasElements() const;
530 };
531 
532 } // namespace Opm::gpuistl
533 
534 // ADL bridge: convert GPU vector to Dune BlockVector and delegate to Dune's writer
535 namespace Opm::gpuistl
536 {
537 template <typename T>
538 inline void writeMatrixMarket(const GpuVector<T>& vectorOnDevice, std::ostream& ostr)
539 {
540  const auto hostBlockVector = vectorOnDevice.template asDuneBlockVector<1>();
541  writeMatrixMarket(hostBlockVector, ostr);
542 }
543 } // namespace Opm::gpuistl
544 #endif
__host__ __device__ std::size_t to_size_t(int i)
to_size_t converts a (on most relevant platforms) a 32 bit signed int to a 64 bits unsigned int ...
Definition: safe_conversion.hpp:97
constexpr cudaStream_t DEFAULT_STREAM
The default GPU stream (stream 0)
Definition: gpu_constants.hpp:31
A small, fixed‑dimension MiniVector class backed by std::array that can be used in both host and CUD...
Definition: AmgxInterface.hpp:37
void setZeroAtIndexSet(const GpuVector< int > &indexSet)
The GpuVector class is a simple (arithmetic) vector class for the GPU.
Definition: GpuVector.cpp:174