19#ifndef OPM_GPUVECTOR_HEADER_HPP
20#define OPM_GPUVECTOR_HEADER_HPP
21#include <dune/common/fvector.hh>
22#include <dune/istl/bvector.hh>
25#include <opm/common/ErrorMacros.hpp>
70 using size_type = size_t;
81 GpuVector(
const GpuVector<T>& other);
94 explicit GpuVector(
const std::vector<T>& data);
104 GpuVector& operator=(
const GpuVector<T>& other);
117 template<
int BlockDimension>
118 explicit GpuVector(
const Dune::BlockVector<Dune::FieldVector<T, BlockDimension>>& bvector)
119 : GpuVector(bvector.
dim())
121 copyFromHost(bvector);
131 GpuVector& operator=(T scalar);
136 GpuVector() : m_dataOnDevice(nullptr), m_numberOfElements(0), m_cuBlasHandle(detail::CuBlasHandle::getInstance()) {}
145 explicit GpuVector(
const size_t numberOfElements);
159 GpuVector(
const T* dataOnHost,
const size_t numberOfElements);
164 virtual ~GpuVector();
174 const T* data()
const;
183 template <
int BlockDimension>
184 void copyFromHost(
const Dune::BlockVector<Dune::FieldVector<T, BlockDimension>>& bvector)
188 OPM_THROW(std::runtime_error,
189 fmt::format(
"Given incompatible vector size. GpuVector has size {}, \n"
190 "however, BlockVector has N() = {}, and dim = {}.",
195 const auto dataPointer =
static_cast<const T*
>(&(bvector[0][0]));
196 copyFromHost(dataPointer, m_numberOfElements);
207 template <
int BlockDimension>
208 void copyFromHostAsync(
const Dune::BlockVector<Dune::FieldVector<T, BlockDimension>>& bvector, cudaStream_t stream =
detail::DEFAULT_STREAM)
212 OPM_THROW(std::runtime_error,
213 fmt::format(
"Given incompatible vector size. GpuVector has size {}, \n"
214 "however, BlockVector has N() = {}, and dim = {}.",
219 const auto dataPointer =
static_cast<const T*
>(&(bvector[0][0]));
220 copyFromHostAsync(dataPointer, m_numberOfElements, stream);
230 template <
int BlockDimension>
231 void copyToHost(Dune::BlockVector<Dune::FieldVector<T, BlockDimension>>& bvector)
const
235 OPM_THROW(std::runtime_error,
236 fmt::format(
"Given incompatible vector size. GpuVector has size {},\n however, the BlockVector "
237 "has has N() = {}, and dim() = {}.",
242 const auto dataPointer =
static_cast<T*
>(&(bvector[0][0]));
243 copyToHost(dataPointer, m_numberOfElements);
254 template <
int BlockDimension>
255 void copyToHostAsync(Dune::BlockVector<Dune::FieldVector<T, BlockDimension>>& bvector, cudaStream_t stream =
detail::DEFAULT_STREAM)
const
259 OPM_THROW(std::runtime_error,
260 fmt::format(
"Given incompatible vector size. GpuVector has size {},\n however, the BlockVector "
261 "has has N() = {}, and dim() = {}.",
266 const auto dataPointer =
static_cast<T*
>(&(bvector[0][0]));
267 copyToHostAsync(dataPointer, m_numberOfElements, stream);
277 void copyFromHost(
const T* dataPointer,
size_t numberOfElements);
289 void copyFromHostAsync(
const T* dataPointer,
size_t numberOfElements, cudaStream_t stream =
detail::DEFAULT_STREAM);
300 void copyToHost(T* dataPointer,
size_t numberOfElements)
const;
312 void copyToHostAsync(T* dataPointer,
size_t numberOfElements, cudaStream_t stream =
detail::DEFAULT_STREAM)
const;
323 void copyFromHost(
const std::vector<T>& data);
342 void copyToHost(std::vector<T>& data)
const;
361 void copyFromDeviceToDevice(
const GpuVector<T>& other)
const;
363 void prepareSendBuf(GpuVector<T>& buffer,
const GpuVector<int>& indexSet)
const;
364 void syncFromRecvBuf(GpuVector<T>& buffer,
const GpuVector<int>& indexSet)
const;
374 GpuVector<T>& operator*=(
const T& scalar);
384 GpuVector<T>& axpy(T alpha,
const GpuVector<T>& y);
392 GpuVector<T>& operator+=(
const GpuVector<T>& other);
400 GpuVector<T>& operator-=(
const GpuVector<T>& other);
410 T dot(
const GpuVector<T>& other)
const;
426 T dot(
const GpuVector<T>& other,
const GpuVector<int>& indexSet, GpuVector<T>& buffer)
const;
433 T two_norm(
const GpuVector<int>& indexSet, GpuVector<T>& buffer)
const;
441 T dot(
const GpuVector<T>& other,
const GpuVector<int>& indexSet)
const;
448 T two_norm(
const GpuVector<int>& indexSet)
const;
455 size_type
dim()
const;
465 void resize(
size_t new_size);
471 std::vector<T> asStdVector()
const;
477 template <
int blockSize>
478 Dune::BlockVector<Dune::FieldVector<T, blockSize>> asDuneBlockVector()
const
480 OPM_ERROR_IF(
dim() % blockSize != 0,
481 fmt::format(
"blockSize is not a multiple of dim(). Given blockSize = {}, and dim() = {}",
485 Dune::BlockVector<Dune::FieldVector<T, blockSize>> returnValue(
dim() / blockSize);
486 copyToHost(returnValue);
509 std::vector<T> v = asStdVector();
510 std::string res =
"";
519 T* m_dataOnDevice =
nullptr;
523 int m_numberOfElements;
524 detail::CuBlasHandle& m_cuBlasHandle;
526 void assertSameSize(
const GpuVector<T>& other)
const;
527 void assertSameSize(
int size)
const;
529 void assertHasElements()
const;
540 const auto hostBlockVector = vectorOnDevice.template asDuneBlockVector<1>();
static constexpr int dim
Definition: structuredgridvanguard.hh:68
void syncFromRecvBuf(T *deviceA, T *buffer, size_t numberOfElements, const int *indices)
constexpr cudaStream_t DEFAULT_STREAM
The default GPU stream (stream 0)
Definition: gpu_constants.hpp:31
void prepareSendBuf(const T *deviceA, T *buffer, size_t numberOfElements, const int *indices)
__host__ __device__ std::size_t to_size_t(int i)
to_size_t converts a (on most relevant platforms) a 32 bit signed int to a 64 bits unsigned int
Definition: safe_conversion.hpp:86
Definition: AmgxInterface.hpp:38
std::string toDebugString()
Definition: GpuVector.hpp:507
void setZeroAtIndexSet(const GpuVector< int > &indexSet)
The GpuVector class is a simple (arithmetic) vector class for the GPU.
void writeMatrixMarket(const GpuVector< T > &vectorOnDevice, std::ostream &ostr)
Definition: GpuVector.hpp:538
std::string to_string(const ConvergenceReport::ReservoirFailure::Type t)