GpuDILU.hpp
Go to the documentation of this file.
1/*
2 Copyright 2022-2023 SINTEF AS
3
4 This file is part of the Open Porous Media project (OPM).
5
6 OPM is free software: you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation, either version 3 of the License, or
9 (at your option) any later version.
10
11 OPM is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with OPM. If not, see <http://www.gnu.org/licenses/>.
18*/
19#ifndef OPM_GPUDILU_HPP
20#define OPM_GPUDILU_HPP
21
22#include <map>
23#include <memory>
24#include <opm/grid/utility/SparseTable.hpp>
32#include <utility>
33#include <vector>
34
35
36namespace Opm::gpuistl
37{
51template <class CPUMatrixT, class X, class Y, int l = 1>
53{
54public:
56 using domain_type = X;
58 using range_type = Y;
60 using field_type = typename X::field_type;
61
65
68
69
73 explicit GpuDILU(const GPUMatrix& gpuMatrix,
74 const CPUMatrixT& cpuMatrix,
75 bool splitMatrix,
76 bool tuneKernels,
77 int mixedPrecisionScheme,
78 bool reorder);
79
82 void pre(X& x, Y& b) override;
83
85 void apply(X& v, const Y& d) override;
86
89 void post(X& x) override;
90
92 Dune::SolverCategory::Category category() const override;
93
95 void update() final;
96
98 void reorderAndSplitMatrix(int moveThreadBlockSize);
99
101 void computeDiagonal(int factorizationThreadBlockSize);
102
105
106
108 static constexpr bool shouldCallPre()
109 {
110 return false;
111 }
112
114 static constexpr bool shouldCallPost()
115 {
116 return false;
117 }
118
119 virtual bool hasPerfectUpdate() const override
120 {
121 return true;
122 }
123
124
125private:
127 void apply(X& v, const Y& d, int lowerSolveThreadBlockSize, int upperSolveThreadBlockSize);
129 void update(int moveThreadBlockSize, int factorizationThreadBlockSize);
131 static constexpr const size_t blocksize_ = CPUMatrixT::block_type::cols;
133 Opm::SparseTable<size_t> m_levelSets;
135 std::vector<int> m_reorderedToNatural;
137 std::vector<int> m_naturalToReordered;
139 const GPUMatrix& m_gpuMatrix;
141 std::unique_ptr<GPUMatrix> m_gpuMatrixReordered;
143 std::unique_ptr<GPUMatrix> m_gpuMatrixReorderedLower;
144 std::unique_ptr<GPUMatrix> m_gpuMatrixReorderedUpper;
146 std::unique_ptr<GpuVector<field_type>> m_gpuMatrixReorderedDiag;
148 std::unique_ptr<FloatMat> m_gpuMatrixReorderedLowerFloat;
149 std::unique_ptr<FloatMat> m_gpuMatrixReorderedUpperFloat;
150 std::unique_ptr<FloatVec> m_gpuMatrixReorderedDiagFloat;
151 std::unique_ptr<FloatVec> m_gpuDInvFloat;
153 GpuVector<int> m_gpuNaturalToReorder;
155 GpuVector<int> m_gpuReorderToNatural;
157 GpuBuffer<size_t> m_gpuLevelSets; // TODO: make this an optional to not spend time on the copy if not needed
159 GpuVector<field_type> m_gpuDInv;
161 bool m_splitMatrix;
163 bool m_tuneThreadBlockSizes;
165 const MatrixStorageMPScheme m_mixedPrecisionScheme;
168 int m_upperSolveThreadBlockSize = -1;
169 int m_lowerSolveThreadBlockSize = -1;
170 int m_moveThreadBlockSize = -1;
171 int m_DILUFactorizationThreadBlockSize = -1;
173 bool m_reorder;
174
176 GpuBuffer<size_t> m_diagIdxs;
177
178 // Graphs for Apply
179 std::map<std::pair<field_type*, const field_type*>, GPUGraph> m_apply_graphs;
180 std::map<std::pair<field_type*, const field_type*>, GPUGraphExec> m_executableGraphs;
181
182 // Stream for the DILU operations on the GPU
183 GPUStream m_stream {};
184 // Events for synchronization with main stream
185 GPUEvent m_before {};
186 GPUEvent m_after {};
187};
188} // end namespace Opm::gpuistl
189
190#endif
Interface class adding the update() method to the preconditioner interface.
Definition: PreconditionerWithUpdate.hpp:32
DILU preconditioner on the GPU.
Definition: GpuDILU.hpp:53
void pre(X &x, Y &b) override
Prepare the preconditioner.
void tuneThreadBlockSizes()
function that will experimentally tune the thread block sizes of the important cuda kernels
void update() final
Updates the matrix data.
void apply(X &v, const Y &d) override
Apply the preconditoner.
GpuDILU(const GPUMatrix &gpuMatrix, const CPUMatrixT &cpuMatrix, bool splitMatrix, bool tuneKernels, int mixedPrecisionScheme, bool reorder)
Constructor.
virtual bool hasPerfectUpdate() const override
Definition: GpuDILU.hpp:119
Y range_type
The range type of the preconditioner.
Definition: GpuDILU.hpp:58
static constexpr bool shouldCallPre()
Definition: GpuDILU.hpp:108
Dune::SolverCategory::Category category() const override
Category of the preconditioner (see SolverCategory::Category)
void computeDiagonal(int factorizationThreadBlockSize)
Compute the diagonal of the DILU, and update the data of the reordered matrix.
static constexpr bool shouldCallPost()
Definition: GpuDILU.hpp:114
X domain_type
The domain type of the preconditioner.
Definition: GpuDILU.hpp:56
void reorderAndSplitMatrix(int moveThreadBlockSize)
perform matrix splitting and reordering
typename X::field_type field_type
The field type of the preconditioner.
Definition: GpuDILU.hpp:60
void post(X &x) override
Post processing.
GpuSparseMatrix< field_type > GPUMatrix
Definition: GpuDILU.hpp:62
Definition: gpu_type_detection.hpp:30
Definition: AmgxInterface.hpp:38
MatrixStorageMPScheme
Definition: kernel_enums.hpp:31