GpuSparseMatrixGeneric.hpp
Go to the documentation of this file.
1/*
2 Copyright 2025 Equinor ASA
3
4 This file is part of the Open Porous Media project (OPM).
5
6 OPM is free software: you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation, either version 3 of the License, or
9 (at your option) any later version.
10
11 OPM is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with OPM. If not, see <http://www.gnu.org/licenses/>.
18*/
19#ifndef OPM_GPUSPARSEMATRIXGENERIC_HPP
20#define OPM_GPUSPARSEMATRIXGENERIC_HPP
21
22#include <opm/common/ErrorMacros.hpp>
29
30#include <cstddef>
31#include <cusparse.h>
32
33namespace Opm::gpuistl
34{
35
46template <typename T>
48{
49public:
50 using field_type = T;
51
60 static constexpr int max_block_size = 6;
61
71 GpuSparseMatrixGeneric(const T* nonZeroElements,
72 const int* rowIndices,
73 const int* columnIndices,
74 std::size_t numberOfNonzeroBlocks,
75 std::size_t blockSize,
76 std::size_t numberOfRows);
77
84 GpuSparseMatrixGeneric(const GpuVector<int>& rowIndices, const GpuVector<int>& columnIndices, std::size_t blockSize);
85
87
94
95 // We want to have this as non-mutable as possible, that is we do not want
96 // to deal with changing matrix sizes and sparsity patterns.
98
100
110 template <class MatrixType>
111 static GpuSparseMatrixGeneric<T> fromMatrix(const MatrixType& matrix, bool copyNonZeroElementsDirectly = false);
112
116 std::size_t N() const
117 {
118 return detail::to_size_t(m_numberOfRows);
119 }
120
125 std::size_t nonzeroes() const
126 {
127 // Technically this safe conversion is not needed since we enforce these to be
128 // non-negative in the constructor, but keeping them for added sanity for now.
129 //
130 // We don't believe this will yield any performance penality (it's used too far away from the inner loop),
131 // but should that be false, they can be removed.
132 return detail::to_size_t(m_numberOfNonzeroBlocks);
133 }
134
141 {
142 return m_nonZeroElements;
143 }
144
151 {
152 return m_nonZeroElements;
153 }
154
161 {
162 return m_rowIndices;
163 }
164
171 {
172 return m_rowIndices;
173 }
174
181 {
182 return m_columnIndices;
183 }
184
191 {
192 return m_columnIndices;
193 }
194
201 std::size_t dim() const
202 {
203 // Technically this safe conversion is not needed since we enforce these to be
204 // non-negative in the constructor, but keeping them for added sanity for now.
205 //
206 // We don't believe this will yield any performance penality (it's used too far away from the inner loop),
207 // but should that be false, they can be removed.
208 return detail::to_size_t(m_blockSize) * detail::to_size_t(m_numberOfRows);
209 }
210
214 std::size_t blockSize() const
215 {
216 // Technically this safe conversion is not needed since we enforce these to be
217 // non-negative in the constructor, but keeping them for added sanity for now.
218 //
219 // We don't believe this will yield any performance penality (it's used too far away from the inner loop),
220 // but should that be false, they can be removed.
221 return detail::to_size_t(m_blockSize);
222 }
223
229 virtual void mv(const GpuVector<T>& x, GpuVector<T>& y) const;
230
236 virtual void umv(const GpuVector<T>& x, GpuVector<T>& y) const;
237
238
245 virtual void usmv(T alpha, const GpuVector<T>& x, GpuVector<T>& y) const;
246
257 template <class MatrixType>
258 void updateNonzeroValues(const MatrixType& matrix, bool copyNonZeroElementsDirectly = false);
259
266
287 template<class FunctionType>
288 auto dispatchOnBlocksize(FunctionType function) const
289 {
290 return dispatchOnBlocksizeImpl<max_block_size>(function);
291 }
292
293private:
294 GpuVector<T> m_nonZeroElements;
295 GpuVector<int> m_columnIndices;
296 GpuVector<int> m_rowIndices;
297
298 // Notice that we store these three as int to make sure we are cusparse compatible.
299 //
300 // This gives the added benefit of checking the size constraints at construction of the matrix
301 // rather than in some call to cusparse.
302 const int m_numberOfNonzeroBlocks;
303 const int m_numberOfRows;
304 const int m_blockSize;
305
306 // Generic API descriptors
307 decltype(detail::makeSafeMatrixDescriptor()) m_matrixDescriptor;
308 detail::CuSparseHandle& m_cusparseHandle;
309
310 // Cached buffer for operations
311 mutable GpuBuffer<std::byte> m_buffer;
312
313 // Helper methods for SpMV operations
314 void spMV(T alpha, const GpuVector<T>& x, T beta, GpuVector<T>& y) const;
315
316 // Initialize matrix descriptor based on block size
317 void initializeMatrixDescriptor();
318
319 template <class VectorType>
320 void assertSameSize(const VectorType& vector) const;
321
322 // Helper to get cuSPARSE data type from C++ type
323 constexpr cudaDataType getDataType() const
324 {
325 if constexpr (std::is_same_v<T, float>) {
326 return CUDA_R_32F;
327 } else if constexpr (std::is_same_v<T, double>) {
328 return CUDA_R_64F;
329 } else {
330 static_assert(std::is_same_v<T, float> || std::is_same_v<T, double>, "Only float and double are supported");
331 return CUDA_R_32F; // Unreachable, but needed to compile
332 }
333 }
334
335 template<int blockSizeCompileTime, class FunctionType>
336 auto dispatchOnBlocksizeImpl(FunctionType function) const
337 {
338 if (blockSizeCompileTime == m_blockSize) {
339 return function(std::integral_constant<int, blockSizeCompileTime>());
340 }
341
342 if constexpr (blockSizeCompileTime > 1) {
343 return dispatchOnBlocksizeImpl<blockSizeCompileTime - 1>(function);
344 } else {
345 OPM_THROW(std::runtime_error, fmt::format("Unsupported block size: {}", m_blockSize));
346 }
347 }
348};
349} // namespace Opm::gpuistl
350#endif
The GpuSparseMatrixGeneric class uses cuSPARSE Generic API for sparse matrix operations.
Definition: GpuSparseMatrixGeneric.hpp:48
std::size_t nonzeroes() const
nonzeroes behaves as the Dune::BCRSMatrix::nonzeros() function and returns the number of non zero blo...
Definition: GpuSparseMatrixGeneric.hpp:125
GpuSparseMatrixGeneric & operator=(const GpuSparseMatrixGeneric &)=delete
std::size_t blockSize() const
blockSize size of the blocks
Definition: GpuSparseMatrixGeneric.hpp:214
GpuSparseMatrixGeneric(const T *nonZeroElements, const int *rowIndices, const int *columnIndices, std::size_t numberOfNonzeroBlocks, std::size_t blockSize, std::size_t numberOfRows)
const GpuVector< int > & getColumnIndices() const
getColumnIndices returns the column indices used to represent the BSR structure.
Definition: GpuSparseMatrixGeneric.hpp:190
virtual void umv(const GpuVector< T > &x, GpuVector< T > &y) const
umv computes y=Ax+y
virtual void mv(const GpuVector< T > &x, GpuVector< T > &y) const
mv performs matrix vector multiply y = Ax
GpuSparseMatrixGeneric(const GpuSparseMatrixGeneric &)
void updateNonzeroValues(const MatrixType &matrix, bool copyNonZeroElementsDirectly=false)
updateNonzeroValues updates the non-zero values by using the non-zero values of the supplied matrix
GpuVector< int > & getColumnIndices()
getColumnIndices returns the column indices used to represent the BSR structure.
Definition: GpuSparseMatrixGeneric.hpp:180
const GpuVector< T > & getNonZeroValues() const
getNonZeroValues returns the GPU vector containing the non-zero values (ordered by block)
Definition: GpuSparseMatrixGeneric.hpp:150
T field_type
Definition: GpuSparseMatrixGeneric.hpp:50
GpuVector< T > & getNonZeroValues()
getNonZeroValues returns the GPU vector containing the non-zero values (ordered by block)
Definition: GpuSparseMatrixGeneric.hpp:140
void updateNonzeroValues(const GpuSparseMatrixGeneric< T > &matrix)
updateNonzeroValues updates the non-zero values by using the non-zero values of the supplied matrix
GpuVector< int > & getRowIndices()
getRowIndices returns the row indices used to represent the BSR structure.
Definition: GpuSparseMatrixGeneric.hpp:160
std::size_t dim() const
dim returns the dimension of the vector space on which this matrix acts
Definition: GpuSparseMatrixGeneric.hpp:201
void preprocessSpMV()
Preprocess SpMV operation to optimize for sparsity pattern.
static GpuSparseMatrixGeneric< T > fromMatrix(const MatrixType &matrix, bool copyNonZeroElementsDirectly=false)
fromMatrix creates a new matrix with the same block size and values as the given matrix
GpuSparseMatrixGeneric(const GpuVector< int > &rowIndices, const GpuVector< int > &columnIndices, std::size_t blockSize)
static constexpr int max_block_size
Maximum block size supported by this implementation.
Definition: GpuSparseMatrixGeneric.hpp:60
std::size_t N() const
N returns the number of rows (which is equal to the number of columns)
Definition: GpuSparseMatrixGeneric.hpp:116
auto dispatchOnBlocksize(FunctionType function) const
Dispatches a function based on the block size of the matrix.
Definition: GpuSparseMatrixGeneric.hpp:288
const GpuVector< int > & getRowIndices() const
getRowIndices returns the row indices used to represent the BSR structure.
Definition: GpuSparseMatrixGeneric.hpp:170
virtual void usmv(T alpha, const GpuVector< T > &x, GpuVector< T > &y) const
umv computes y=alpha * Ax + y
The CuSparseHandle class provides a singleton for the simulator universal cuSparseHandle.
Definition: CuSparseHandle.hpp:41
auto makeSafeMatrixDescriptor()
Create RAII-managed cuSPARSE sparse matrix descriptor.
Definition: gpusparse_matrix_utilities.hpp:56
__host__ __device__ std::size_t to_size_t(int i)
to_size_t converts a (on most relevant platforms) a 32 bit signed int to a 64 bits unsigned int
Definition: safe_conversion.hpp:86
A small, fixed‑dimension MiniVector class backed by std::array that can be used in both host and CUDA...
Definition: AmgxInterface.hpp:38