gpuistl/hypreinterface/HypreSetup.hpp
Go to the documentation of this file.
1/*
2 Copyright 2025 Equinor ASA
3
4 This file is part of the Open Porous Media project (OPM).
5
6 OPM is free software: you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation, either version 3 of the License, or
9 (at your option) any later version.
10
11 OPM is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with OPM. If not, see <http://www.gnu.org/licenses/>.
18*/
19
20#ifndef OPM_HYPRE_SETUP_GPU_HPP
21#define OPM_HYPRE_SETUP_GPU_HPP
22
26
27#include <dune/istl/owneroverlapcopy.hh>
28#include <dune/istl/paamg/graph.hh>
29#include <dune/istl/paamg/pinfo.hh>
30#include <dune/istl/repartition.hh>
31
32#if HAVE_CUDA
33#if USE_HIP
34#include <opm/simulators/linalg/gpuistl_hip/GpuSparseMatrixWrapper.hpp>
35#else
37#endif
38#endif // HAVE_CUDA
39
40#include <HYPRE.h>
41#include <HYPRE_parcsr_ls.h>
42#include <_hypre_utilities.h>
43
44#include <algorithm>
45#include <cstddef>
46
48{
49
50// GPU-specific helper functions
51template <typename T, bool ForceLegacy>
52linalg::HypreInterface::SparsityPattern
53setupSparsityPatternFromGpuMatrix(const GpuSparseMatrixWrapper<T, ForceLegacy>& gpu_matrix,
54 const linalg::HypreInterface::ParallelInfo& par_info,
55 bool owner_first);
56
57template <typename T, bool ForceLegacy>
58std::vector<HYPRE_Int> computeRowIndexesWithMappingGpu(const GpuSparseMatrixWrapper<T, ForceLegacy>& gpu_matrix,
59 const std::vector<int>& local_dune_to_local_hypre);
60
69template <typename T, bool ForceLegacy>
70linalg::HypreInterface::SparsityPattern
73 bool owner_first)
74{
76
77 // Determine the size for cols array based on owner_first
78 if (owner_first) {
79 std::size_t cols_size = 0;
80 // For owner_first=true case, we need to calculate how many owned entries there are
81 auto host_row_ptrs = gpu_matrix.getRowIndices().asStdVector();
82 for (int rind = 0; rind < static_cast<int>(gpu_matrix.N()); ++rind) {
83 if (par_info.local_dune_to_local_hypre[rind] >= 0) {
84 const int row_start = host_row_ptrs[rind];
85 const int row_end = host_row_ptrs[rind + 1];
86 cols_size += (row_end - row_start);
87 }
88 }
89 pattern.nnz = cols_size;
90 } else {
91 // Full matrix space case: all entries (including ghost rows)
92 pattern.nnz = gpu_matrix.nonzeroes();
93 }
94
95 // Setup host arrays
96 pattern.ncols.resize(par_info.N_owned);
97 pattern.rows.resize(par_info.N_owned);
98 pattern.cols.resize(pattern.nnz);
99
100 // Get row pointers and column indices from GPU matrix (one-time host copy during setup)
101 auto host_row_ptrs = gpu_matrix.getRowIndices().asStdVector();
102 auto host_col_indices = gpu_matrix.getColumnIndices().asStdVector();
103
104 int pos = 0;
105 for (int rind = 0; rind < static_cast<int>(gpu_matrix.N()); ++rind) {
106 const int local_rowIdx = par_info.local_dune_to_local_hypre[rind];
107
108 // For owner_first=true: skip ghost rows entirely
109 // For owner_first=false: process all rows (owned + ghost)
110 if (owner_first && local_rowIdx < 0) {
111 continue;
112 }
113
114 const int row_start = host_row_ptrs[rind];
115 const int row_end = host_row_ptrs[rind + 1];
116 const int num_cols = row_end - row_start;
117
118 if (local_rowIdx >= 0) {
119 // This is an owned row - record its metadata
120 const int global_rowIdx = par_info.local_dune_to_global_hypre[rind];
121 pattern.rows[local_rowIdx] = global_rowIdx;
122 pattern.ncols[local_rowIdx] = num_cols;
123 }
124
125 // Add column indices for this row
126 for (int col_idx = row_start; col_idx < row_end; ++col_idx) {
127 const int colIdx = host_col_indices[col_idx];
128 const int global_colIdx = par_info.local_dune_to_global_hypre[colIdx];
129 assert(global_colIdx >= 0);
130 pattern.cols[pos++] = global_colIdx;
131 }
132 }
133
134 return pattern;
135}
136
148template <typename T, bool ForceLegacy>
149std::vector<HYPRE_Int>
151 const std::vector<int>& local_dune_to_local_hypre)
152{
153 const int N = std::count_if(
154 local_dune_to_local_hypre.begin(), local_dune_to_local_hypre.end(), [](int val) { return val >= 0; });
155 std::vector<HYPRE_Int> row_indexes(N);
156
157 // Use pre-computed BSR row pointers (already contain row starting positions)
158 auto host_row_ptrs = gpu_matrix.getRowIndices().asStdVector();
159
160 // Map each owned Hypre row to its starting position in the FULL (including ghost) GPU matrix
161 for (int dune_row_idx = 0; dune_row_idx < static_cast<int>(gpu_matrix.N()); ++dune_row_idx) {
162 const int hypre_row_idx = local_dune_to_local_hypre[dune_row_idx];
163
164 if (hypre_row_idx >= 0) {
165 // This is an owned row - record where its data starts in the FULL GPU matrix
166 // Use hypre_row_idx as index (maps to Hypre ordering up to N_owned)
167 row_indexes[hypre_row_idx] = host_row_ptrs[dune_row_idx];
168 }
169 // Non-owned rows create natural gaps in the indexing
170 }
171 return row_indexes;
172}
173
174} // namespace Opm::gpuistl::HypreInterface
175
176#endif // OPM_HYPRE_SETUP_GPU_HPP
The GpuSparseMatrixWrapper Checks CUDA/HIP version and dispatches a version either using the old or t...
Definition: GpuSparseMatrixWrapper.hpp:62
GpuVector< int > & getRowIndices()
getRowIndices returns the row indices used to represent the BSR structure.
Definition: GpuSparseMatrixWrapper.hpp:271
std::size_t N() const
N returns the number of rows (which is equal to the number of columns)
Definition: GpuSparseMatrixWrapper.hpp:232
std::size_t nonzeroes() const
nonzeroes behaves as the Dune::BCRSMatrix::nonzeros() function and returns the number of non zero blo...
Definition: GpuSparseMatrixWrapper.hpp:241
GpuVector< int > & getColumnIndices()
getColumnIndices returns the column indices used to represent the BSR structure.
Definition: GpuSparseMatrixWrapper.hpp:291
Definition: gpuistl/hypreinterface/HypreCpuTransfers.hpp:30
std::vector< HYPRE_Int > computeRowIndexesWithMappingGpu(const GpuSparseMatrixWrapper< T, ForceLegacy > &gpu_matrix, const std::vector< int > &local_dune_to_local_hypre)
Compute row indexes for GPU matrix with ownership mapping.
Definition: gpuistl/hypreinterface/HypreSetup.hpp:150
linalg::HypreInterface::SparsityPattern setupSparsityPatternFromGpuMatrix(const GpuSparseMatrixWrapper< T, ForceLegacy > &gpu_matrix, const linalg::HypreInterface::ParallelInfo &par_info, bool owner_first)
Setup sparsity pattern from GPU matrix (GpuSparseMatrix)
Definition: gpuistl/hypreinterface/HypreSetup.hpp:71
Parallel domain decomposition information for HYPRE-Dune interface.
Definition: HypreDataStructures.hpp:37
std::vector< int > local_dune_to_global_hypre
Mapping from local Dune indices to global HYPRE indices.
Definition: HypreDataStructures.hpp:51
std::vector< int > local_dune_to_local_hypre
Mapping from local Dune indices to local HYPRE indices.
Definition: HypreDataStructures.hpp:44
HYPRE_Int N_owned
Number of DOFs owned by this MPI process.
Definition: HypreDataStructures.hpp:62
Compressed Sparse Row (CSR) sparsity pattern for HYPRE matrix assembly.
Definition: HypreDataStructures.hpp:86
HYPRE_Int nnz
Number of non-zero entries in matrix.
Definition: HypreDataStructures.hpp:97
std::vector< HYPRE_BigInt > cols
Global column indices in CSR format (size: nnz)
Definition: HypreDataStructures.hpp:94
std::vector< HYPRE_Int > ncols
Non-zero entries per owned row (size: N_owned)
Definition: HypreDataStructures.hpp:88
std::vector< HYPRE_BigInt > rows
Global row indices for owned rows (size: N_owned)
Definition: HypreDataStructures.hpp:91