opm-simulators
GpuSparseMatrixGeneric.hpp
1 /*
2  Copyright 2025 Equinor ASA
3 
4  This file is part of the Open Porous Media project (OPM).
5 
6  OPM is free software: you can redistribute it and/or modify
7  it under the terms of the GNU General Public License as published by
8  the Free Software Foundation, either version 3 of the License, or
9  (at your option) any later version.
10 
11  OPM is distributed in the hope that it will be useful,
12  but WITHOUT ANY WARRANTY; without even the implied warranty of
13  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  GNU General Public License for more details.
15 
16  You should have received a copy of the GNU General Public License
17  along with OPM. If not, see <http://www.gnu.org/licenses/>.
18 */
19 #ifndef OPM_GPUSPARSEMATRIXGENERIC_HPP
20 #define OPM_GPUSPARSEMATRIXGENERIC_HPP
21 
22 #include <opm/common/ErrorMacros.hpp>
23 #include <opm/simulators/linalg/gpuistl/detail/CuSparseHandle.hpp>
24 #include <opm/simulators/linalg/gpuistl/detail/cusparse_safe_call.hpp>
25 #include <opm/simulators/linalg/gpuistl/detail/gpusparse_matrix_utilities.hpp>
26 #include <opm/simulators/linalg/gpuistl/detail/safe_conversion.hpp>
27 #include <opm/simulators/linalg/gpuistl/GpuBuffer.hpp>
28 #include <opm/simulators/linalg/gpuistl/GpuVector.hpp>
29 
30 #include <cstddef>
31 #include <cusparse.h>
32 
33 namespace Opm::gpuistl
34 {
35 
46 template <typename T>
47 class GpuSparseMatrixGeneric
48 {
49 public:
50  using field_type = T;
51 
60  static constexpr int max_block_size = 6;
61 
71  GpuSparseMatrixGeneric(const T* nonZeroElements,
72  const int* rowIndices,
73  const int* columnIndices,
74  std::size_t numberOfNonzeroBlocks,
75  std::size_t blockSize,
76  std::size_t numberOfRows);
77 
84  GpuSparseMatrixGeneric(const GpuVector<int>& rowIndices, const GpuVector<int>& columnIndices, std::size_t blockSize);
85 
87 
93  void preprocessSpMV();
94 
95  // We want to have this as non-mutable as possible, that is we do not want
96  // to deal with changing matrix sizes and sparsity patterns.
97  GpuSparseMatrixGeneric& operator=(const GpuSparseMatrixGeneric&) = delete;
98 
99  virtual ~GpuSparseMatrixGeneric();
100 
110  template <class MatrixType>
111  static GpuSparseMatrixGeneric<T> fromMatrix(const MatrixType& matrix, bool copyNonZeroElementsDirectly = false);
112 
116  std::size_t N() const
117  {
118  return detail::to_size_t(m_numberOfRows);
119  }
120 
125  std::size_t nonzeroes() const
126  {
127  // Technically this safe conversion is not needed since we enforce these to be
128  // non-negative in the constructor, but keeping them for added sanity for now.
129  //
130  // We don't believe this will yield any performance penality (it's used too far away from the inner loop),
131  // but should that be false, they can be removed.
132  return detail::to_size_t(m_numberOfNonzeroBlocks);
133  }
134 
141  {
142  return m_nonZeroElements;
143  }
144 
151  {
152  return m_nonZeroElements;
153  }
154 
161  {
162  return m_rowIndices;
163  }
164 
171  {
172  return m_rowIndices;
173  }
174 
181  {
182  return m_columnIndices;
183  }
184 
191  {
192  return m_columnIndices;
193  }
194 
201  std::size_t dim() const
202  {
203  // Technically this safe conversion is not needed since we enforce these to be
204  // non-negative in the constructor, but keeping them for added sanity for now.
205  //
206  // We don't believe this will yield any performance penality (it's used too far away from the inner loop),
207  // but should that be false, they can be removed.
208  return detail::to_size_t(m_blockSize) * detail::to_size_t(m_numberOfRows);
209  }
210 
214  std::size_t blockSize() const
215  {
216  // Technically this safe conversion is not needed since we enforce these to be
217  // non-negative in the constructor, but keeping them for added sanity for now.
218  //
219  // We don't believe this will yield any performance penality (it's used too far away from the inner loop),
220  // but should that be false, they can be removed.
221  return detail::to_size_t(m_blockSize);
222  }
223 
229  virtual void mv(const GpuVector<T>& x, GpuVector<T>& y) const;
230 
236  virtual void umv(const GpuVector<T>& x, GpuVector<T>& y) const;
237 
238 
245  virtual void usmv(T alpha, const GpuVector<T>& x, GpuVector<T>& y) const;
246 
257  template <class MatrixType>
258  void updateNonzeroValues(const MatrixType& matrix, bool copyNonZeroElementsDirectly = false);
259 
266 
270  void setToZero();
271 
292  template<class FunctionType>
293  auto dispatchOnBlocksize(FunctionType function) const
294  {
295  return dispatchOnBlocksizeImpl<max_block_size>(function);
296  }
297 
298 private:
299  GpuVector<T> m_nonZeroElements;
300  GpuVector<int> m_columnIndices;
301  GpuVector<int> m_rowIndices;
302 
303  // Notice that we store these three as int to make sure we are cusparse compatible.
304  //
305  // This gives the added benefit of checking the size constraints at construction of the matrix
306  // rather than in some call to cusparse.
307  const int m_numberOfNonzeroBlocks;
308  const int m_numberOfRows;
309  const int m_blockSize;
310 
311  // Generic API descriptors
312  decltype(detail::makeSafeMatrixDescriptor()) m_matrixDescriptor;
313  detail::CuSparseHandle& m_cusparseHandle;
314 
315  // Cached buffer for operations
316  mutable GpuBuffer<std::byte> m_buffer;
317 
318  // Helper methods for SpMV operations
319  void spMV(T alpha, const GpuVector<T>& x, T beta, GpuVector<T>& y) const;
320 
321  // Initialize matrix descriptor based on block size
322  void initializeMatrixDescriptor();
323 
324  template <class VectorType>
325  void assertSameSize(const VectorType& vector) const;
326 
327  // Helper to get cuSPARSE data type from C++ type
328  constexpr cudaDataType getDataType() const
329  {
330  if constexpr (std::is_same_v<T, float>) {
331  return CUDA_R_32F;
332  } else if constexpr (std::is_same_v<T, double>) {
333  return CUDA_R_64F;
334  } else {
335  static_assert(std::is_same_v<T, float> || std::is_same_v<T, double>, "Only float and double are supported");
336  return CUDA_R_32F; // Unreachable, but needed to compile
337  }
338  }
339 
340  template<int blockSizeCompileTime, class FunctionType>
341  auto dispatchOnBlocksizeImpl(FunctionType function) const
342  {
343  if (blockSizeCompileTime == m_blockSize) {
344  return function(std::integral_constant<int, blockSizeCompileTime>());
345  }
346 
347  if constexpr (blockSizeCompileTime > 1) {
348  return dispatchOnBlocksizeImpl<blockSizeCompileTime - 1>(function);
349  } else {
350  OPM_THROW(std::runtime_error, fmt::format(fmt::runtime("Unsupported block size: {}"), m_blockSize));
351  }
352  }
353 };
354 } // namespace Opm::gpuistl
355 #endif
virtual void usmv(T alpha, const GpuVector< T > &x, GpuVector< T > &y) const
umv computes y=alpha * Ax + y
Definition: GpuSparseMatrixGeneric.cpp:314
void preprocessSpMV()
Preprocess SpMV operation to optimize for sparsity pattern.
Definition: GpuSparseMatrixGeneric.cpp:140
const GpuVector< T > & getNonZeroValues() const
getNonZeroValues returns the GPU vector containing the non-zero values (ordered by block) ...
Definition: GpuSparseMatrixGeneric.hpp:150
The GpuSparseMatrixGeneric class uses cuSPARSE Generic API for sparse matrix operations.
Definition: gpu_type_detection.hpp:34
GpuSparseMatrixGeneric(const T *nonZeroElements, const int *rowIndices, const int *columnIndices, std::size_t numberOfNonzeroBlocks, std::size_t blockSize, std::size_t numberOfRows)
Create the sparse matrix specified by the raw data.
std::size_t dim() const
dim returns the dimension of the vector space on which this matrix acts
Definition: GpuSparseMatrixGeneric.hpp:201
static GpuSparseMatrixGeneric< T > fromMatrix(const MatrixType &matrix, bool copyNonZeroElementsDirectly=false)
fromMatrix creates a new matrix with the same block size and values as the given matrix ...
Definition: GpuSparseMatrixGeneric.cpp:202
std::size_t blockSize() const
blockSize size of the blocks
Definition: GpuSparseMatrixGeneric.hpp:214
void updateNonzeroValues(const MatrixType &matrix, bool copyNonZeroElementsDirectly=false)
updateNonzeroValues updates the non-zero values by using the non-zero values of the supplied matrix ...
Definition: GpuSparseMatrixGeneric.cpp:231
const GpuVector< int > & getRowIndices() const
getRowIndices returns the row indices used to represent the BSR structure.
Definition: GpuSparseMatrixGeneric.hpp:170
void setToZero()
setToZero resets the matrix to zero values.
Definition: GpuSparseMatrixGeneric.cpp:258
__host__ __device__ std::size_t to_size_t(int i)
to_size_t converts a (on most relevant platforms) a 32 bit signed int to a 64 bits unsigned int ...
Definition: safe_conversion.hpp:97
virtual void umv(const GpuVector< T > &x, GpuVector< T > &y) const
umv computes y=Ax+y
Definition: GpuSparseMatrixGeneric.cpp:305
GpuVector< T > & getNonZeroValues()
getNonZeroValues returns the GPU vector containing the non-zero values (ordered by block) ...
Definition: GpuSparseMatrixGeneric.hpp:140
std::size_t N() const
N returns the number of rows (which is equal to the number of columns)
Definition: GpuSparseMatrixGeneric.hpp:116
std::size_t nonzeroes() const
nonzeroes behaves as the Dune::BCRSMatrix::nonzeros() function and returns the number of non zero blo...
Definition: GpuSparseMatrixGeneric.hpp:125
A small, fixed‑dimension MiniVector class backed by std::array that can be used in both host and CUD...
Definition: AmgxInterface.hpp:37
virtual void mv(const GpuVector< T > &x, GpuVector< T > &y) const
mv performs matrix vector multiply y = Ax
Definition: GpuSparseMatrixGeneric.cpp:296
GpuVector< int > & getRowIndices()
getRowIndices returns the row indices used to represent the BSR structure.
Definition: GpuSparseMatrixGeneric.hpp:160
GpuVector< int > & getColumnIndices()
getColumnIndices returns the column indices used to represent the BSR structure.
Definition: GpuSparseMatrixGeneric.hpp:180
const GpuVector< int > & getColumnIndices() const
getColumnIndices returns the column indices used to represent the BSR structure.
Definition: GpuSparseMatrixGeneric.hpp:190
static constexpr int max_block_size
Maximum block size supported by this implementation.
Definition: GpuSparseMatrixGeneric.hpp:60
auto dispatchOnBlocksize(FunctionType function) const
Dispatches a function based on the block size of the matrix.
Definition: GpuSparseMatrixGeneric.hpp:293
auto makeSafeMatrixDescriptor()
Create RAII-managed cuSPARSE sparse matrix descriptor.
Definition: gpusparse_matrix_utilities.hpp:56