StandardPreconditioners_gpu_serial.hpp
Go to the documentation of this file.
1/*
2 Copyright 2025 Equinor ASA
3
4 This file is part of the Open Porous Media project (OPM).
5 OPM is free software: you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation, either version 3 of the License, or
8 (at your option) any later version.
9 OPM is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
13 You should have received a copy of the GNU General Public License
14 along with OPM. If not, see <http://www.gnu.org/licenses/>.
15*/
16
17#ifndef OPM_STANDARDPRECONDITIONERS_GPU_SERIAL_HEADER
18#define OPM_STANDARDPRECONDITIONERS_GPU_SERIAL_HEADER
19
20
21#include <dune/istl/bcrsmatrix.hh>
22#include <type_traits>
23
24
25namespace Opm
26{
27
28
29template <class Operator>
31 Dune::Amg::SequentialInformation,
32 typename std::enable_if_t<Opm::is_gpu_operator_v<Operator>>>
33{
34
35 using O = Operator;
36 using C = Dune::Amg::SequentialInformation;
38 using M = typename F::Matrix;
39 using V = typename F::Vector;
40 using P = PropertyTree;
41 using PrecPtr = typename F::PrecPtr;
42
43 using field_type = typename V::field_type;
44
45 static constexpr int maxblocksize = 6;
46 static void add()
47 {
48 F::addCreator("ILU0", [](const O& op, const P& prm, const std::function<V()>&, std::size_t) {
49 const double w = prm.get<double>("relaxation", 1.0);
50 using GpuILU0 =
51 typename gpuistl::GpuSeqILU0<M, gpuistl::GpuVector<field_type>, gpuistl::GpuVector<field_type>>;
52 return std::make_shared<GpuILU0>(op.getmat(), w);
53 });
54
55 F::addCreator("JAC", [](const O& op, const P& prm, const std::function<V()>&, std::size_t) {
56 const double w = prm.get<double>("relaxation", 1.0);
57 using GPUJac = typename gpuistl::GpuJac<M, gpuistl::GpuVector<field_type>, gpuistl::GpuVector<field_type>>;
58 return std::make_shared<GPUJac>(op.getmat(), w);
59 });
60
61 // The next two (OPMILU0 and DILU) are the GPU preconditioners that need CPU matrices.
62 // Since we are not storing the block size compile time for the GPU matrices
63 // **and** we need the CPU matrix for the creation, we need to
64 // dispatch the creation of the preconditioner based on the block size.
65 //
66 // Note that this dispatching is not needed in the future, since we will have a constructor taking GPU matrices directly.
67 F::addCreator("OPMILU0", [](const O& op, [[maybe_unused]] const P& prm, const std::function<V()>&, std::size_t) {
68 return dispatchOnBlockSize<maxblocksize>(op, [&](const auto& cpuMatrix) {
69 const bool split_matrix = prm.get<bool>("split_matrix", true);
70 const bool tune_gpu_kernels = prm.get<bool>("tune_gpu_kernels", true);
71 const int mixed_precision_scheme = prm.get<int>("mixed_precision_scheme", 0);
72 using CPUMatrixType = std::remove_const_t<std::remove_reference_t<decltype(cpuMatrix)>>;
73 using GPUILU0 =
74 typename gpuistl::OpmGpuILU0<CPUMatrixType, gpuistl::GpuVector<field_type>, gpuistl::GpuVector<field_type>>;
75
76 return std::make_shared<GPUILU0>(op.getmat(), cpuMatrix, split_matrix, tune_gpu_kernels, mixed_precision_scheme);
77 });
78 });
79
80 F::addCreator("DILU", [](const O& op, [[maybe_unused]] const P& prm, const std::function<V()>&, std::size_t) {
81 return dispatchOnBlockSize<maxblocksize>(op, [&](const auto& cpuMatrix) {
82 const bool split_matrix = prm.get<bool>("split_matrix", true);
83 const bool tune_gpu_kernels = prm.get<bool>("tune_gpu_kernels", true);
84 const int mixed_precision_scheme = prm.get<int>("mixed_precision_scheme", 0);
85 using CPUMatrixType = std::remove_const_t<std::remove_reference_t<decltype(cpuMatrix)>>;
86 using GPUDILU =
87 typename gpuistl::GpuDILU<CPUMatrixType, gpuistl::GpuVector<field_type>, gpuistl::GpuVector<field_type>>;
88 return std::make_shared<GPUDILU>(op.getmat(), cpuMatrix, split_matrix, tune_gpu_kernels, mixed_precision_scheme);
89 });
90 });
91 }
92
93private:
94
102 template<class BlockType>
103 static Dune::BCRSMatrix<BlockType> makeCPUMatrix(const O& op) {
104 // TODO: Make this more efficient. Maybe we can simply copy the memory areas directly?
105 // Do note that this function is anyway going away when we have a GPU
106 // constructor for the preconditioners, so it is not a priority.
107 const auto& gpuMatrix = op.getmat();
108
109 const auto nonZeros = gpuMatrix.getNonZeroValues().asStdVector();
110 const auto rowIndices = gpuMatrix.getRowIndices().asStdVector();
111 const auto columnIndices = gpuMatrix.getColumnIndices().asStdVector();
112
113 const auto numberOfNonZeroes = gpuMatrix.nonzeroes();
114 const auto N = gpuMatrix.N();
115
116 Dune::BCRSMatrix<BlockType> matrix(N, N, numberOfNonZeroes, Dune::BCRSMatrix<BlockType>::row_wise);
117 for (auto row = matrix.createbegin(); row != matrix.createend(); ++row) {
118
119 for (auto j = rowIndices[row.index()]; j != rowIndices[row.index() + 1]; ++j) {
120 const auto columnIndex = columnIndices[j];
121 row.insert(columnIndex);
122 }
123 }
124
125 for (std::size_t i = 0; i < N; ++i) {
126 for (auto j = rowIndices[i]; j != rowIndices[i + 1]; ++j) {
127 const auto columnIndex = columnIndices[j];
128 // Now it gets a bit tricky, first we need to fetch the block matrix
129 BlockType blockMatrix;
130 constexpr static auto rows = BlockType::rows;
131
132 for (std::size_t k = 0; k < rows; ++k) {
133 for (std::size_t l = 0; l < rows; ++l) {
134 blockMatrix[k][l] = nonZeros[j * rows * rows + k * rows + l];
135 }
136 }
137 matrix[i][columnIndex] = blockMatrix;
138 }
139 }
140
141 return matrix;
142 }
143
153 template<int blocksizeCompileTime, class CreateType>
154 static PrecPtr dispatchOnBlockSize(const O& op, CreateType create) {
155 if (op.getmat().blockSize() == blocksizeCompileTime) {
156 const auto cpuMatrix = makeCPUMatrix<Dune::FieldMatrix<field_type, blocksizeCompileTime, blocksizeCompileTime>>(op);
157 return create(cpuMatrix);
158 }
159 if constexpr (blocksizeCompileTime > 1) {
160 return dispatchOnBlockSize<blocksizeCompileTime - 1>(op, create);
161 }
162 else {
163 throw std::runtime_error(fmt::format("Unsupported block size: {}. Max blocksize supported is {}.", op.getmat().blockSize(), maxblocksize));
164 }
165 }
166
167};
168
169
170} // namespace Opm
171
172#endif // OPM_STANDARDPRECONDITIONERS_GPU_SERIAL_HEADER
Definition: PreconditionerFactory.hpp:64
typename Operator::domain_type Vector
Definition: PreconditionerFactory.hpp:68
std::shared_ptr< Dune::PreconditionerWithUpdate< Vector, Vector > > PrecPtr
The type of pointer returned by create().
Definition: PreconditionerFactory.hpp:71
typename Operator::matrix_type Matrix
Linear algebra types.
Definition: PreconditionerFactory.hpp:67
Hierarchical collection of key/value pairs.
Definition: PropertyTree.hpp:39
T get(const std::string &key) const
DILU preconditioner on the GPU.
Definition: GpuDILU.hpp:50
Jacobi preconditioner on the GPU.
Definition: GpuJac.hpp:47
Sequential ILU0 preconditioner on the GPU through the CuSparse library.
Definition: GpuSeqILU0.hpp:52
ILU0 preconditioner on the GPU.
Definition: OpmGpuILU0.hpp:51
Definition: fvbaseprimaryvariables.hh:141
Definition: blackoilboundaryratevector.hh:39
Definition: StandardPreconditioners_mpi.hpp:128