GpuVector.hpp
Go to the documentation of this file.
1/*
2 Copyright 2022-2023 SINTEF AS
3
4 This file is part of the Open Porous Media project (OPM).
5
6 OPM is free software: you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation, either version 3 of the License, or
9 (at your option) any later version.
10
11 OPM is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with OPM. If not, see <http://www.gnu.org/licenses/>.
18*/
19#ifndef OPM_GPUVECTOR_HEADER_HPP
20#define OPM_GPUVECTOR_HEADER_HPP
21#include <dune/common/fvector.hh>
22#include <dune/istl/bvector.hh>
23
24#include <fmt/core.h>
25#include <opm/common/ErrorMacros.hpp>
29#include <vector>
30#include <string>
31
32
33namespace Opm::gpuistl
34{
35
65template <typename T>
66class GpuVector
67{
68public:
69 using field_type = T;
70 using size_type = size_t;
71
72
81 GpuVector(const GpuVector<T>& other);
82
94 explicit GpuVector(const std::vector<T>& data);
95
104 GpuVector& operator=(const GpuVector<T>& other);
105
117 template<int BlockDimension>
118 explicit GpuVector(const Dune::BlockVector<Dune::FieldVector<T, BlockDimension>>& bvector)
119 : GpuVector(bvector.dim())
120 {
121 copyFromHost(bvector);
122 }
123
131 GpuVector& operator=(T scalar);
132
140 explicit GpuVector(const size_t numberOfElements);
141
142
154 GpuVector(const T* dataOnHost, const size_t numberOfElements);
155
159 virtual ~GpuVector();
160
164 T* data();
165
169 const T* data() const;
170
178 template <int BlockDimension>
179 void copyFromHost(const Dune::BlockVector<Dune::FieldVector<T, BlockDimension>>& bvector)
180 {
181 // TODO: [perf] vector.dim() can be replaced by bvector.N() * BlockDimension
182 if (detail::to_size_t(m_numberOfElements) != bvector.dim()) {
183 OPM_THROW(std::runtime_error,
184 fmt::format("Given incompatible vector size. GpuVector has size {}, \n"
185 "however, BlockVector has N() = {}, and dim = {}.",
186 m_numberOfElements,
187 bvector.N(),
188 bvector.dim()));
189 }
190 const auto dataPointer = static_cast<const T*>(&(bvector[0][0]));
191 copyFromHost(dataPointer, m_numberOfElements);
192 }
193
202 template <int BlockDimension>
203 void copyFromHostAsync(const Dune::BlockVector<Dune::FieldVector<T, BlockDimension>>& bvector, cudaStream_t stream = detail::DEFAULT_STREAM)
204 {
205 // TODO: [perf] vector.dim() can be replaced by bvector.N() * BlockDimension
206 if (detail::to_size_t(m_numberOfElements) != bvector.dim()) {
207 OPM_THROW(std::runtime_error,
208 fmt::format("Given incompatible vector size. GpuVector has size {}, \n"
209 "however, BlockVector has N() = {}, and dim = {}.",
210 m_numberOfElements,
211 bvector.N(),
212 bvector.dim()));
213 }
214 const auto dataPointer = static_cast<const T*>(&(bvector[0][0]));
215 copyFromHostAsync(dataPointer, m_numberOfElements, stream);
216 }
217
225 template <int BlockDimension>
226 void copyToHost(Dune::BlockVector<Dune::FieldVector<T, BlockDimension>>& bvector) const
227 {
228 // TODO: [perf] vector.dim() can be replaced by bvector.N() * BlockDimension
229 if (detail::to_size_t(m_numberOfElements) != bvector.dim()) {
230 OPM_THROW(std::runtime_error,
231 fmt::format("Given incompatible vector size. GpuVector has size {},\n however, the BlockVector "
232 "has has N() = {}, and dim() = {}.",
233 m_numberOfElements,
234 bvector.N(),
235 bvector.dim()));
236 }
237 const auto dataPointer = static_cast<T*>(&(bvector[0][0]));
238 copyToHost(dataPointer, m_numberOfElements);
239 }
240
249 template <int BlockDimension>
250 void copyToHostAsync(Dune::BlockVector<Dune::FieldVector<T, BlockDimension>>& bvector, cudaStream_t stream = detail::DEFAULT_STREAM) const
251 {
252 // TODO: [perf] vector.dim() can be replaced by bvector.N() * BlockDimension
253 if (detail::to_size_t(m_numberOfElements) != bvector.dim()) {
254 OPM_THROW(std::runtime_error,
255 fmt::format("Given incompatible vector size. GpuVector has size {},\n however, the BlockVector "
256 "has has N() = {}, and dim() = {}.",
257 m_numberOfElements,
258 bvector.N(),
259 bvector.dim()));
260 }
261 const auto dataPointer = static_cast<T*>(&(bvector[0][0]));
262 copyToHostAsync(dataPointer, m_numberOfElements, stream);
263 }
264
272 void copyFromHost(const T* dataPointer, size_t numberOfElements);
273
284 void copyFromHostAsync(const T* dataPointer, size_t numberOfElements, cudaStream_t stream = detail::DEFAULT_STREAM);
285
286
287
295 void copyToHost(T* dataPointer, size_t numberOfElements) const;
296
307 void copyToHostAsync(T* dataPointer, size_t numberOfElements, cudaStream_t stream = detail::DEFAULT_STREAM) const;
308
309
310
318 void copyFromHost(const std::vector<T>& data);
319
328 void copyFromHostAsync(const std::vector<T>& data, cudaStream_t stream = detail::DEFAULT_STREAM);
329
337 void copyToHost(std::vector<T>& data) const;
338
348 void copyToHostAsync(std::vector<T>& data, cudaStream_t stream = detail::DEFAULT_STREAM) const;
349
350
351
356 void copyFromDeviceToDevice(const GpuVector<T>& other) const;
357
358 void prepareSendBuf(GpuVector<T>& buffer, const GpuVector<int>& indexSet) const;
359 void syncFromRecvBuf(GpuVector<T>& buffer, const GpuVector<int>& indexSet) const;
360
369 GpuVector<T>& operator*=(const T& scalar);
370
379 GpuVector<T>& axpy(T alpha, const GpuVector<T>& y);
380
387 GpuVector<T>& operator+=(const GpuVector<T>& other);
388
395 GpuVector<T>& operator-=(const GpuVector<T>& other);
396
405 T dot(const GpuVector<T>& other) const;
406
414 T two_norm() const;
415
421 T dot(const GpuVector<T>& other, const GpuVector<int>& indexSet, GpuVector<T>& buffer) const;
422
428 T two_norm(const GpuVector<int>& indexSet, GpuVector<T>& buffer) const;
429
430
436 T dot(const GpuVector<T>& other, const GpuVector<int>& indexSet) const;
437
443 T two_norm(const GpuVector<int>& indexSet) const;
444
445
450 size_type dim() const;
451
452
457 std::vector<T> asStdVector() const;
458
463 template <int blockSize>
464 Dune::BlockVector<Dune::FieldVector<T, blockSize>> asDuneBlockVector() const
465 {
466 OPM_ERROR_IF(dim() % blockSize != 0,
467 fmt::format("blockSize is not a multiple of dim(). Given blockSize = {}, and dim() = {}",
468 blockSize,
469 dim()));
470
471 Dune::BlockVector<Dune::FieldVector<T, blockSize>> returnValue(dim() / blockSize);
472 copyToHost(returnValue);
473 return returnValue;
474 }
475
476
490 void setZeroAtIndexSet(const GpuVector<int>& indexSet);
491
492 // Slow method that creates a string representation of a GpuVector for debug purposes
493 std::string toDebugString()
494 {
495 std::vector<T> v = asStdVector();
496 std::string res = "";
497 for (T element : v){
498 res += std::to_string(element) + " ";
499 }
500 res += std::to_string(v[v.size()-1]);
501 return res;
502 }
503
504private:
505 T* m_dataOnDevice = nullptr;
506
507 // Note that we store this as int to make sure we are always cublas compatible.
508 // This gives the added benefit that a size_t to int conversion error occurs during construction.
509 const int m_numberOfElements;
510 detail::CuBlasHandle& m_cuBlasHandle;
511
512 void assertSameSize(const GpuVector<T>& other) const;
513 void assertSameSize(int size) const;
514
515 void assertHasElements() const;
516};
517
518} // namespace Opm::gpuistl
519#endif
static constexpr int dim
Definition: structuredgridvanguard.hh:68
void syncFromRecvBuf(T *deviceA, T *buffer, size_t numberOfElements, const int *indices)
constexpr cudaStream_t DEFAULT_STREAM
The default GPU stream (stream 0)
Definition: gpu_constants.hpp:31
void prepareSendBuf(const T *deviceA, T *buffer, size_t numberOfElements, const int *indices)
__host__ __device__ std::size_t to_size_t(int i)
to_size_t converts a (on most relevant platforms) a 32 bit signed int to a 64 bits unsigned int
Definition: safe_conversion.hpp:86
Definition: AmgxInterface.hpp:38
std::string toDebugString()
Definition: GpuVector.hpp:493
void setZeroAtIndexSet(const GpuVector< int > &indexSet)
The GpuVector class is a simple (arithmetic) vector class for the GPU.
std::string to_string(const ConvergenceReport::ReservoirFailure::Type t)