GpuSparseMatrixGeneric.hpp
Go to the documentation of this file.
1/*
2 Copyright 2025 Equinor ASA
3
4 This file is part of the Open Porous Media project (OPM).
5
6 OPM is free software: you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation, either version 3 of the License, or
9 (at your option) any later version.
10
11 OPM is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with OPM. If not, see <http://www.gnu.org/licenses/>.
18*/
19#ifndef OPM_GPUSPARSEMATRIXGENERIC_HPP
20#define OPM_GPUSPARSEMATRIXGENERIC_HPP
21
22#include <opm/common/ErrorMacros.hpp>
29
30#include <cusparse.h>
31
32#include <cstddef>
33
34namespace Opm::gpuistl
35{
36
37
38
49template <typename T>
51{
52public:
53 using field_type = T;
54
63 static constexpr int max_block_size = 6;
64
74 GpuSparseMatrixGeneric(const T* nonZeroElements,
75 const int* rowIndices,
76 const int* columnIndices,
77 size_t numberOfNonzeroBlocks,
78 size_t blockSize,
79 size_t numberOfRows);
80
87 GpuSparseMatrixGeneric(const GpuVector<int>& rowIndices, const GpuVector<int>& columnIndices, size_t blockSize);
88
90
97
98 // We want to have this as non-mutable as possible, that is we do not want
99 // to deal with changing matrix sizes and sparsity patterns.
101
103
113 template <class MatrixType>
114 static GpuSparseMatrixGeneric<T> fromMatrix(const MatrixType& matrix, bool copyNonZeroElementsDirectly = false);
115
119 size_t N() const
120 {
121 return detail::to_size_t(m_numberOfRows);
122 }
123
128 size_t nonzeroes() const
129 {
130 // Technically this safe conversion is not needed since we enforce these to be
131 // non-negative in the constructor, but keeping them for added sanity for now.
132 //
133 // We don't believe this will yield any performance penality (it's used too far away from the inner loop),
134 // but should that be false, they can be removed.
135 return detail::to_size_t(m_numberOfNonzeroBlocks);
136 }
137
144 {
145 return m_nonZeroElements;
146 }
147
154 {
155 return m_nonZeroElements;
156 }
157
164 {
165 return m_rowIndices;
166 }
167
174 {
175 return m_rowIndices;
176 }
177
184 {
185 return m_columnIndices;
186 }
187
194 {
195 return m_columnIndices;
196 }
197
204 size_t dim() const
205 {
206 // Technically this safe conversion is not needed since we enforce these to be
207 // non-negative in the constructor, but keeping them for added sanity for now.
208 //
209 // We don't believe this will yield any performance penality (it's used too far away from the inner loop),
210 // but should that be false, they can be removed.
211 return detail::to_size_t(m_blockSize) * detail::to_size_t(m_numberOfRows);
212 }
213
217 size_t blockSize() const
218 {
219 // Technically this safe conversion is not needed since we enforce these to be
220 // non-negative in the constructor, but keeping them for added sanity for now.
221 //
222 // We don't believe this will yield any performance penality (it's used too far away from the inner loop),
223 // but should that be false, they can be removed.
224 return detail::to_size_t(m_blockSize);
225 }
226
232 virtual void mv(const GpuVector<T>& x, GpuVector<T>& y) const;
233
239 virtual void umv(const GpuVector<T>& x, GpuVector<T>& y) const;
240
241
248 virtual void usmv(T alpha, const GpuVector<T>& x, GpuVector<T>& y) const;
249
260 template <class MatrixType>
261 void updateNonzeroValues(const MatrixType& matrix, bool copyNonZeroElementsDirectly = false);
262
269
290 template<class FunctionType>
291 auto dispatchOnBlocksize(FunctionType function) const
292 {
293 return dispatchOnBlocksizeImpl<max_block_size>(function);
294 }
295
296private:
297 GpuVector<T> m_nonZeroElements;
298 GpuVector<int> m_columnIndices;
299 GpuVector<int> m_rowIndices;
300
301 // Notice that we store these three as int to make sure we are cusparse compatible.
302 //
303 // This gives the added benefit of checking the size constraints at construction of the matrix
304 // rather than in some call to cusparse.
305 const int m_numberOfNonzeroBlocks;
306 const int m_numberOfRows;
307 const int m_blockSize;
308
309 // Generic API descriptors
310 decltype(detail::makeSafeMatrixDescriptor()) m_matrixDescriptor;
311 detail::CuSparseHandle& m_cusparseHandle;
312
313 // Cached buffer for operations
314 mutable GpuBuffer<std::byte> m_buffer;
315
316 // Helper methods for SpMV operations
317 void spMV(T alpha, const GpuVector<T>& x, T beta, GpuVector<T>& y) const;
318
319 // Initialize matrix descriptor based on block size
320 void initializeMatrixDescriptor();
321
322 template <class VectorType>
323 void assertSameSize(const VectorType& vector) const;
324
325 // Helper to get cuSPARSE data type from C++ type
326 constexpr cudaDataType getDataType() const
327 {
328 if constexpr (std::is_same_v<T, float>) {
329 return CUDA_R_32F;
330 } else if constexpr (std::is_same_v<T, double>) {
331 return CUDA_R_64F;
332 } else {
333 static_assert(std::is_same_v<T, float> || std::is_same_v<T, double>, "Only float and double are supported");
334 return CUDA_R_32F; // Unreachable, but needed to compile
335 }
336 }
337
338 template<int blockSizeCompileTime, class FunctionType>
339 auto dispatchOnBlocksizeImpl(FunctionType function) const
340 {
341 if (blockSizeCompileTime == m_blockSize) {
342 return function(std::integral_constant<int, blockSizeCompileTime>());
343 }
344
345 if constexpr (blockSizeCompileTime > 1) {
346 return dispatchOnBlocksizeImpl<blockSizeCompileTime - 1>(function);
347 } else {
348 OPM_THROW(std::runtime_error, fmt::format("Unsupported block size: {}", m_blockSize));
349 }
350 }
351};
352} // namespace Opm::gpuistl
353#endif
The GpuSparseMatrixGeneric class uses cuSPARSE Generic API for sparse matrix operations.
Definition: GpuSparseMatrixGeneric.hpp:51
size_t dim() const
dim returns the dimension of the vector space on which this matrix acts
Definition: GpuSparseMatrixGeneric.hpp:204
GpuSparseMatrixGeneric & operator=(const GpuSparseMatrixGeneric &)=delete
GpuSparseMatrixGeneric(const GpuVector< int > &rowIndices, const GpuVector< int > &columnIndices, size_t blockSize)
const GpuVector< int > & getColumnIndices() const
getColumnIndices returns the column indices used to represent the BSR structure.
Definition: GpuSparseMatrixGeneric.hpp:193
size_t blockSize() const
blockSize size of the blocks
Definition: GpuSparseMatrixGeneric.hpp:217
virtual void umv(const GpuVector< T > &x, GpuVector< T > &y) const
umv computes y=Ax+y
virtual void mv(const GpuVector< T > &x, GpuVector< T > &y) const
mv performs matrix vector multiply y = Ax
GpuSparseMatrixGeneric(const GpuSparseMatrixGeneric &)
void updateNonzeroValues(const MatrixType &matrix, bool copyNonZeroElementsDirectly=false)
updateNonzeroValues updates the non-zero values by using the non-zero values of the supplied matrix
GpuVector< int > & getColumnIndices()
getColumnIndices returns the column indices used to represent the BSR structure.
Definition: GpuSparseMatrixGeneric.hpp:183
const GpuVector< T > & getNonZeroValues() const
getNonZeroValues returns the GPU vector containing the non-zero values (ordered by block)
Definition: GpuSparseMatrixGeneric.hpp:153
T field_type
Definition: GpuSparseMatrixGeneric.hpp:53
GpuVector< T > & getNonZeroValues()
getNonZeroValues returns the GPU vector containing the non-zero values (ordered by block)
Definition: GpuSparseMatrixGeneric.hpp:143
void updateNonzeroValues(const GpuSparseMatrixGeneric< T > &matrix)
updateNonzeroValues updates the non-zero values by using the non-zero values of the supplied matrix
GpuVector< int > & getRowIndices()
getRowIndices returns the row indices used to represent the BSR structure.
Definition: GpuSparseMatrixGeneric.hpp:163
size_t nonzeroes() const
nonzeroes behaves as the Dune::BCRSMatrix::nonzeros() function and returns the number of non zero blo...
Definition: GpuSparseMatrixGeneric.hpp:128
GpuSparseMatrixGeneric(const T *nonZeroElements, const int *rowIndices, const int *columnIndices, size_t numberOfNonzeroBlocks, size_t blockSize, size_t numberOfRows)
void preprocessSpMV()
Preprocess SpMV operation to optimize for sparsity pattern.
static GpuSparseMatrixGeneric< T > fromMatrix(const MatrixType &matrix, bool copyNonZeroElementsDirectly=false)
fromMatrix creates a new matrix with the same block size and values as the given matrix
static constexpr int max_block_size
Maximum block size supported by this implementation.
Definition: GpuSparseMatrixGeneric.hpp:63
size_t N() const
N returns the number of rows (which is equal to the number of columns)
Definition: GpuSparseMatrixGeneric.hpp:119
auto dispatchOnBlocksize(FunctionType function) const
Dispatches a function based on the block size of the matrix.
Definition: GpuSparseMatrixGeneric.hpp:291
const GpuVector< int > & getRowIndices() const
getRowIndices returns the row indices used to represent the BSR structure.
Definition: GpuSparseMatrixGeneric.hpp:173
virtual void usmv(T alpha, const GpuVector< T > &x, GpuVector< T > &y) const
umv computes y=alpha * Ax + y
The CuSparseHandle class provides a singleton for the simulator universal cuSparseHandle.
Definition: CuSparseHandle.hpp:41
auto makeSafeMatrixDescriptor()
Create RAII-managed cuSPARSE sparse matrix descriptor.
Definition: gpusparse_matrix_utilities.hpp:56
__host__ __device__ std::size_t to_size_t(int i)
to_size_t converts a (on most relevant platforms) a 32 bit signed int to a 64 bits unsigned int
Definition: safe_conversion.hpp:86
Definition: AmgxInterface.hpp:38