GpuVector.hpp
Go to the documentation of this file.
1/*
2 Copyright 2022-2023 SINTEF AS
3
4 This file is part of the Open Porous Media project (OPM).
5
6 OPM is free software: you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation, either version 3 of the License, or
9 (at your option) any later version.
10
11 OPM is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with OPM. If not, see <http://www.gnu.org/licenses/>.
18*/
19#ifndef OPM_GPUVECTOR_HEADER_HPP
20#define OPM_GPUVECTOR_HEADER_HPP
21#include <dune/common/fvector.hh>
22#include <dune/istl/bvector.hh>
23
24#include <fmt/core.h>
25#include <opm/common/ErrorMacros.hpp>
29#include <vector>
30#include <string>
31
32
33namespace Opm::gpuistl
34{
35
65template <typename T>
66class GpuVector
67{
68public:
69 using field_type = T;
70 using size_type = size_t;
71
72
81 GpuVector(const GpuVector<T>& other);
82
94 explicit GpuVector(const std::vector<T>& data);
95
104 GpuVector& operator=(const GpuVector<T>& other);
105
117 template<int BlockDimension>
118 explicit GpuVector(const Dune::BlockVector<Dune::FieldVector<T, BlockDimension>>& bvector)
119 : GpuVector(bvector.dim())
120 {
121 copyFromHost(bvector);
122 }
123
131 GpuVector& operator=(T scalar);
132
136 GpuVector() : m_dataOnDevice(nullptr), m_numberOfElements(0), m_cuBlasHandle(detail::CuBlasHandle::getInstance()) {}
137
145 explicit GpuVector(const size_t numberOfElements);
146
147
159 GpuVector(const T* dataOnHost, const size_t numberOfElements);
160
164 virtual ~GpuVector();
165
169 T* data();
170
174 const T* data() const;
175
183 template <int BlockDimension>
184 void copyFromHost(const Dune::BlockVector<Dune::FieldVector<T, BlockDimension>>& bvector)
185 {
186 // TODO: [perf] vector.dim() can be replaced by bvector.N() * BlockDimension
187 if (detail::to_size_t(m_numberOfElements) != bvector.dim()) {
188 OPM_THROW(std::runtime_error,
189 fmt::format("Given incompatible vector size. GpuVector has size {}, \n"
190 "however, BlockVector has N() = {}, and dim = {}.",
191 m_numberOfElements,
192 bvector.N(),
193 bvector.dim()));
194 }
195 const auto dataPointer = static_cast<const T*>(&(bvector[0][0]));
196 copyFromHost(dataPointer, m_numberOfElements);
197 }
198
207 template <int BlockDimension>
208 void copyFromHostAsync(const Dune::BlockVector<Dune::FieldVector<T, BlockDimension>>& bvector, cudaStream_t stream = detail::DEFAULT_STREAM)
209 {
210 // TODO: [perf] vector.dim() can be replaced by bvector.N() * BlockDimension
211 if (detail::to_size_t(m_numberOfElements) != bvector.dim()) {
212 OPM_THROW(std::runtime_error,
213 fmt::format("Given incompatible vector size. GpuVector has size {}, \n"
214 "however, BlockVector has N() = {}, and dim = {}.",
215 m_numberOfElements,
216 bvector.N(),
217 bvector.dim()));
218 }
219 const auto dataPointer = static_cast<const T*>(&(bvector[0][0]));
220 copyFromHostAsync(dataPointer, m_numberOfElements, stream);
221 }
222
230 template <int BlockDimension>
231 void copyToHost(Dune::BlockVector<Dune::FieldVector<T, BlockDimension>>& bvector) const
232 {
233 // TODO: [perf] vector.dim() can be replaced by bvector.N() * BlockDimension
234 if (detail::to_size_t(m_numberOfElements) != bvector.dim()) {
235 OPM_THROW(std::runtime_error,
236 fmt::format("Given incompatible vector size. GpuVector has size {},\n however, the BlockVector "
237 "has has N() = {}, and dim() = {}.",
238 m_numberOfElements,
239 bvector.N(),
240 bvector.dim()));
241 }
242 const auto dataPointer = static_cast<T*>(&(bvector[0][0]));
243 copyToHost(dataPointer, m_numberOfElements);
244 }
245
254 template <int BlockDimension>
255 void copyToHostAsync(Dune::BlockVector<Dune::FieldVector<T, BlockDimension>>& bvector, cudaStream_t stream = detail::DEFAULT_STREAM) const
256 {
257 // TODO: [perf] vector.dim() can be replaced by bvector.N() * BlockDimension
258 if (detail::to_size_t(m_numberOfElements) != bvector.dim()) {
259 OPM_THROW(std::runtime_error,
260 fmt::format("Given incompatible vector size. GpuVector has size {},\n however, the BlockVector "
261 "has has N() = {}, and dim() = {}.",
262 m_numberOfElements,
263 bvector.N(),
264 bvector.dim()));
265 }
266 const auto dataPointer = static_cast<T*>(&(bvector[0][0]));
267 copyToHostAsync(dataPointer, m_numberOfElements, stream);
268 }
269
277 void copyFromHost(const T* dataPointer, size_t numberOfElements);
278
289 void copyFromHostAsync(const T* dataPointer, size_t numberOfElements, cudaStream_t stream = detail::DEFAULT_STREAM);
290
291
292
300 void copyToHost(T* dataPointer, size_t numberOfElements) const;
301
312 void copyToHostAsync(T* dataPointer, size_t numberOfElements, cudaStream_t stream = detail::DEFAULT_STREAM) const;
313
314
315
323 void copyFromHost(const std::vector<T>& data);
324
333 void copyFromHostAsync(const std::vector<T>& data, cudaStream_t stream = detail::DEFAULT_STREAM);
334
342 void copyToHost(std::vector<T>& data) const;
343
353 void copyToHostAsync(std::vector<T>& data, cudaStream_t stream = detail::DEFAULT_STREAM) const;
354
355
356
361 void copyFromDeviceToDevice(const GpuVector<T>& other) const;
362
363 void prepareSendBuf(GpuVector<T>& buffer, const GpuVector<int>& indexSet) const;
364 void syncFromRecvBuf(GpuVector<T>& buffer, const GpuVector<int>& indexSet) const;
365
374 GpuVector<T>& operator*=(const T& scalar);
375
384 GpuVector<T>& axpy(T alpha, const GpuVector<T>& y);
385
392 GpuVector<T>& operator+=(const GpuVector<T>& other);
393
400 GpuVector<T>& operator-=(const GpuVector<T>& other);
401
410 T dot(const GpuVector<T>& other) const;
411
419 T two_norm() const;
420
426 T dot(const GpuVector<T>& other, const GpuVector<int>& indexSet, GpuVector<T>& buffer) const;
427
433 T two_norm(const GpuVector<int>& indexSet, GpuVector<T>& buffer) const;
434
435
441 T dot(const GpuVector<T>& other, const GpuVector<int>& indexSet) const;
442
448 T two_norm(const GpuVector<int>& indexSet) const;
449
450
455 size_type dim() const;
456
465 void resize(size_t new_size);
466
471 std::vector<T> asStdVector() const;
472
477 template <int blockSize>
478 Dune::BlockVector<Dune::FieldVector<T, blockSize>> asDuneBlockVector() const
479 {
480 OPM_ERROR_IF(dim() % blockSize != 0,
481 fmt::format("blockSize is not a multiple of dim(). Given blockSize = {}, and dim() = {}",
482 blockSize,
483 dim()));
484
485 Dune::BlockVector<Dune::FieldVector<T, blockSize>> returnValue(dim() / blockSize);
486 copyToHost(returnValue);
487 return returnValue;
488 }
489
490
504 void setZeroAtIndexSet(const GpuVector<int>& indexSet);
505
506 // Slow method that creates a string representation of a GpuVector for debug purposes
507 std::string toDebugString()
508 {
509 std::vector<T> v = asStdVector();
510 std::string res = "";
511 for (T element : v){
512 res += std::to_string(element) + " ";
513 }
514 res += std::to_string(v[v.size()-1]);
515 return res;
516 }
517
518private:
519 T* m_dataOnDevice = nullptr;
520
521 // Note that we store this as int to make sure we are always cublas compatible.
522 // This gives the added benefit that a size_t to int conversion error occurs during construction.
523 int m_numberOfElements;
524 detail::CuBlasHandle& m_cuBlasHandle;
525
526 void assertSameSize(const GpuVector<T>& other) const;
527 void assertSameSize(int size) const;
528
529 void assertHasElements() const;
530};
531
532} // namespace Opm::gpuistl
533
534// ADL bridge: convert GPU vector to Dune BlockVector and delegate to Dune's writer
535namespace Opm::gpuistl
536{
537template <typename T>
538inline void writeMatrixMarket(const GpuVector<T>& vectorOnDevice, std::ostream& ostr)
539{
540 const auto hostBlockVector = vectorOnDevice.template asDuneBlockVector<1>();
541 writeMatrixMarket(hostBlockVector, ostr);
542}
543} // namespace Opm::gpuistl
544#endif
static constexpr int dim
Definition: structuredgridvanguard.hh:68
void syncFromRecvBuf(T *deviceA, T *buffer, size_t numberOfElements, const int *indices)
constexpr cudaStream_t DEFAULT_STREAM
The default GPU stream (stream 0)
Definition: gpu_constants.hpp:31
void prepareSendBuf(const T *deviceA, T *buffer, size_t numberOfElements, const int *indices)
__host__ __device__ std::size_t to_size_t(int i)
to_size_t converts a (on most relevant platforms) a 32 bit signed int to a 64 bits unsigned int
Definition: safe_conversion.hpp:86
Definition: AmgxInterface.hpp:38
std::string toDebugString()
Definition: GpuVector.hpp:507
void setZeroAtIndexSet(const GpuVector< int > &indexSet)
The GpuVector class is a simple (arithmetic) vector class for the GPU.
void writeMatrixMarket(const GpuVector< T > &vectorOnDevice, std::ostream &ostr)
Definition: GpuVector.hpp:538
std::string to_string(const ConvergenceReport::ReservoirFailure::Type t)