StandardPreconditioners_gpu_serial.hpp
Go to the documentation of this file.
1/*
2 Copyright 2025 Equinor ASA
3
4 This file is part of the Open Porous Media project (OPM).
5 OPM is free software: you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation, either version 3 of the License, or
8 (at your option) any later version.
9 OPM is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
13 You should have received a copy of the GNU General Public License
14 along with OPM. If not, see <http://www.gnu.org/licenses/>.
15*/
16
17#ifndef OPM_STANDARDPRECONDITIONERS_GPU_SERIAL_HEADER
18#define OPM_STANDARDPRECONDITIONERS_GPU_SERIAL_HEADER
19
20
21#include <dune/istl/bcrsmatrix.hh>
22#include <type_traits>
23
24
25namespace Opm
26{
27
28
29template <class Operator>
31 Dune::Amg::SequentialInformation,
32 typename std::enable_if_t<Opm::is_gpu_operator_v<Operator>>>
33{
34
35 using O = Operator;
36 using C = Dune::Amg::SequentialInformation;
38 using M = typename F::Matrix;
39 using V = typename F::Vector;
40 using P = PropertyTree;
41 using PrecPtr = typename F::PrecPtr;
42
43 using field_type = typename V::field_type;
44
45 static constexpr int maxblocksize = 6;
46 static void add()
47 {
48 F::addCreator("ilu0", [](const O& op, const P& prm, const std::function<V()>&, std::size_t) {
49 const double w = prm.get<double>("relaxation", 1.0);
50 using GpuILU0 =
52 return std::make_shared<GpuILU0>(op.getmat(), w);
53 });
54
55 F::addCreator("jac", [](const O& op, const P& prm, const std::function<V()>&, std::size_t) {
56 const double w = prm.get<double>("relaxation", 1.0);
58 return std::make_shared<GPUJac>(op.getmat(), w);
59 });
60
61 // The next two (OPMILU0 and DILU) are the GPU preconditioners that need CPU matrices.
62 // Since we are not storing the block size compile time for the GPU matrices
63 // **and** we need the CPU matrix for the creation, we need to
64 // dispatch the creation of the preconditioner based on the block size.
65 //
66 // Note that this dispatching is not needed in the future, since we will have a constructor taking GPU matrices directly.
67 F::addCreator("opmilu0", [](const O& op, [[maybe_unused]] const P& prm, const std::function<V()>&, std::size_t) {
68 return dispatchOnBlockSize<maxblocksize>(op, [&](const auto& cpuMatrix) {
69 const bool split_matrix = prm.get<bool>("split_matrix", true);
70 const bool tune_gpu_kernels = prm.get<bool>("tune_gpu_kernels", true);
71 const int mixed_precision_scheme = prm.get<int>("mixed_precision_scheme", 0);
72 using CPUMatrixType = std::remove_const_t<std::remove_reference_t<decltype(cpuMatrix)>>;
73 using GPUILU0 =
75
76 return std::make_shared<GPUILU0>(op.getmat(), cpuMatrix, split_matrix, tune_gpu_kernels, mixed_precision_scheme);
77 });
78 });
79
80 F::addCreator("dilu", [](const O& op, [[maybe_unused]] const P& prm, const std::function<V()>&, std::size_t) {
81 return dispatchOnBlockSize<maxblocksize>(op, [&](const auto& cpuMatrix) {
82 const bool split_matrix = prm.get<bool>("split_matrix", true);
83 const bool tune_gpu_kernels = prm.get<bool>("tune_gpu_kernels", true);
84 const int mixed_precision_scheme = prm.get<int>("mixed_precision_scheme", 0);
85 const bool reorder = prm.get<bool>("reorder", true);
86 using CPUMatrixType = std::remove_const_t<std::remove_reference_t<decltype(cpuMatrix)>>;
87 using GPUDILU =
89 return std::make_shared<GPUDILU>(op.getmat(), cpuMatrix, split_matrix, tune_gpu_kernels, mixed_precision_scheme, reorder);
90 });
91 });
92
93 // Only add AMG preconditioners to the factory if the operator
94 // is an actual matrix operator.
95 if constexpr (std::is_same_v<O, Dune::MatrixAdapter<M, V, V>>) {
96#if HAVE_AMGX
97 // Only add AMGX for scalar matrices
98 F::addCreator("amgx", [](const O& op, const P& prm, const std::function<V()>&, std::size_t) {
99 // Only create AMGX preconditioner for scalar matrices
100 if (op.getmat().blockSize() == 1) {
101 auto prm_copy = prm;
102 prm_copy.put("setup_frequency", Opm::Parameters::Get<Opm::Parameters::CprReuseInterval>());
103 return std::make_shared<Amgx::AmgxPreconditioner<M, V, V>>(op.getmat(), prm_copy);
104 } else {
105 OPM_THROW(std::logic_error, "AMGX preconditioner only works with scalar matrices (block size 1)");
106 }
107 });
108#endif
109
110#if HAVE_HYPRE && HYPRE_USING_CUDA || HYPRE_USING_HIP
111 // Only register Hypre preconditioner for double precision
112 if constexpr (std::is_same_v<HYPRE_Real, typename V::field_type>) {
113 F::addCreator("hypre", [](const O& op, const P& prm, const std::function<V()>&, std::size_t) {
114 // Only create Hypre preconditioner for scalar matrices
115 if (op.getmat().blockSize() == 1) {
116 return std::make_shared<Hypre::HyprePreconditioner<M, V, V, Dune::Amg::SequentialInformation>>(op.getmat(), prm, Dune::Amg::SequentialInformation());
117 } else {
118 OPM_THROW(std::logic_error, "Hypre preconditioner only works with scalar matrices (block size 1).");
119 }
120 });
121 }
122#endif
123
124 }
125 }
126
127
128private:
129
137 template<class BlockType>
138 static Dune::BCRSMatrix<BlockType> makeCPUMatrix(const O& op) {
139 // TODO: Make this more efficient. Maybe we can simply copy the memory areas directly?
140 // Do note that this function is anyway going away when we have a GPU
141 // constructor for the preconditioners, so it is not a priority.
142 const auto& gpuMatrix = op.getmat();
143
144 const auto nonZeros = gpuMatrix.getNonZeroValues().asStdVector();
145 const auto rowIndices = gpuMatrix.getRowIndices().asStdVector();
146 const auto columnIndices = gpuMatrix.getColumnIndices().asStdVector();
147
148 const auto numberOfNonZeroes = gpuMatrix.nonzeroes();
149 const auto N = gpuMatrix.N();
150
151 Dune::BCRSMatrix<BlockType> matrix(N, N, numberOfNonZeroes, Dune::BCRSMatrix<BlockType>::row_wise);
152 for (auto row = matrix.createbegin(); row != matrix.createend(); ++row) {
153
154 for (auto j = rowIndices[row.index()]; j != rowIndices[row.index() + 1]; ++j) {
155 const auto columnIndex = columnIndices[j];
156 row.insert(columnIndex);
157 }
158 }
159
160 for (std::size_t i = 0; i < N; ++i) {
161 for (auto j = rowIndices[i]; j != rowIndices[i + 1]; ++j) {
162 const auto columnIndex = columnIndices[j];
163 // Now it gets a bit tricky, first we need to fetch the block matrix
164 BlockType blockMatrix;
165 constexpr static auto rows = BlockType::rows;
166
167 for (std::size_t k = 0; k < rows; ++k) {
168 for (std::size_t l = 0; l < rows; ++l) {
169 blockMatrix[k][l] = nonZeros[j * rows * rows + k * rows + l];
170 }
171 }
172 matrix[i][columnIndex] = blockMatrix;
173 }
174 }
175
176 return matrix;
177 }
178
188 template<int blocksizeCompileTime, class CreateType>
189 static PrecPtr dispatchOnBlockSize(const O& op, CreateType create) {
190 if (op.getmat().blockSize() == blocksizeCompileTime) {
191 const auto cpuMatrix = makeCPUMatrix<Dune::FieldMatrix<field_type, blocksizeCompileTime, blocksizeCompileTime>>(op);
192 return create(cpuMatrix);
193 }
194 if constexpr (blocksizeCompileTime > 1) {
195 return dispatchOnBlockSize<blocksizeCompileTime - 1>(op, create);
196 }
197 else {
198 throw std::runtime_error(fmt::format("Unsupported block size: {}. Max blocksize supported is {}.", op.getmat().blockSize(), maxblocksize));
199 }
200 }
201
202};
203
204
205} // namespace Opm
206
207#endif // OPM_STANDARDPRECONDITIONERS_GPU_SERIAL_HEADER
Definition: PreconditionerFactory.hpp:64
typename Operator::domain_type Vector
Definition: PreconditionerFactory.hpp:68
std::shared_ptr< Dune::PreconditionerWithUpdate< Vector, Vector > > PrecPtr
The type of pointer returned by create().
Definition: PreconditionerFactory.hpp:71
typename Operator::matrix_type Matrix
Linear algebra types.
Definition: PreconditionerFactory.hpp:67
Hierarchical collection of key/value pairs.
Definition: PropertyTree.hpp:39
T get(const std::string &key) const
DILU preconditioner on the GPU.
Definition: GpuDILU.hpp:53
Jacobi preconditioner on the GPU.
Definition: GpuJac.hpp:47
Sequential ILU0 preconditioner on the GPU through the CuSparse library.
Definition: GpuSeqILU0.hpp:52
ILU0 preconditioner on the GPU.
Definition: OpmGpuILU0.hpp:51
Definition: fvbaseprimaryvariables.hh:141
Definition: blackoilboundaryratevector.hh:39
Definition: StandardPreconditioners_mpi.hpp:128