opm-simulators
OpmGpuILU0.hpp
1 /*
2  Copyright 2024 SINTEF AS
3 
4  This file is part of the Open Porous Media project (OPM).
5 
6  OPM is free software: you can redistribute it and/or modify
7  it under the terms of the GNU General Public License as published by
8  the Free Software Foundation, either version 3 of the License, or
9  (at your option) any later version.
10 
11  OPM is distributed in the hope that it will be useful,
12  but WITHOUT ANY WARRANTY; without even the implied warranty of
13  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  GNU General Public License for more details.
15 
16  You should have received a copy of the GNU General Public License
17  along with OPM. If not, see <http://www.gnu.org/licenses/>.
18 */
19 #ifndef OPM_GPUILU0_OPM_Impl_HPP
20 #define OPM_GPUILU0_OPM_Impl_HPP
21 
22 #include <memory>
23 #include <opm/grid/utility/SparseTable.hpp>
24 #include <opm/simulators/linalg/PreconditionerWithUpdate.hpp>
25 #include <opm/simulators/linalg/gpuistl/GpuSparseMatrixWrapper.hpp>
26 #include <opm/simulators/linalg/gpuistl/GpuVector.hpp>
27 #include <opm/simulators/linalg/gpuistl/gpu_resources.hpp>
28 #include <opm/simulators/linalg/gpuistl/detail/kernel_enums.hpp>
29 #include <optional>
30 #include <type_traits>
31 #include <vector>
32 
33 
34 namespace Opm::gpuistl
35 {
49 template <class CPUMatrixT, class X, class Y, int l = 1>
51 {
52 public:
54  using domain_type = X;
56  using range_type = Y;
58  using field_type = typename X::field_type;
63 
66 
67 
77  explicit OpmGpuILU0(const GpuMatrix& gpuMatrix,
78  const CPUMatrixT& cpuMatrix,
79  bool splitMatrix,
80  bool tuneKernels,
81  int mixedPrecisionScheme);
82 
85  void pre(X& x, Y& b) override;
86 
88  void apply(X& v, const Y& d) override;
89 
92  void post(X& x) override;
93 
95  Dune::SolverCategory::Category category() const override;
96 
98  void update() final;
99 
101  void reorderAndSplitMatrix(int moveThreadBlockSize);
102 
104  void LUFactorizeMatrix(int factorizationThreadBlockSize);
105 
107  void tuneThreadBlockSizes();
108 
110  static constexpr bool shouldCallPre()
111  {
112  return false;
113  }
114 
116  static constexpr bool shouldCallPost()
117  {
118  return false;
119  }
120 
121  virtual bool hasPerfectUpdate() const override {
122  return true;
123  }
124 
125 
126 private:
128  void apply(X& v, const Y& d, int lowerSolveThreadBlockSize, int upperSolveThreadBlockSize);
130  void update(int moveThreadBlockSize, int factorizationThreadBlockSize);
131 
133  static constexpr const size_t blocksize_ = CPUMatrixT::block_type::cols;
135  Opm::SparseTable<size_t> m_levelSets;
137  std::vector<int> m_reorderedToNatural;
139  std::vector<int> m_naturalToReordered;
141  const GpuMatrix& m_gpuMatrix;
142  std::unique_ptr<GpuMatrix> m_gpuReorderedLU;
144  std::unique_ptr<GpuMatrix> m_gpuMatrixReorderedLower;
145  std::unique_ptr<GpuMatrix> m_gpuMatrixReorderedUpper;
147  std::unique_ptr<FloatMat> m_gpuMatrixReorderedLowerFloat;
148  std::unique_ptr<FloatMat> m_gpuMatrixReorderedUpperFloat;
149  std::optional<GpuVector<float>> m_gpuMatrixReorderedDiagFloat;
151  std::optional<GpuVector<field_type>> m_gpuMatrixReorderedDiag;
153  GpuVector<int> m_gpuNaturalToReorder;
155  GpuVector<int> m_gpuReorderToNatural;
157  GpuVector<field_type> m_gpuDInv;
159  bool m_splitMatrix;
161  bool m_tuneThreadBlockSizes;
163  const MatrixStorageMPScheme m_mixedPrecisionScheme;
166  int m_upperSolveThreadBlockSize = -1;
167  int m_lowerSolveThreadBlockSize = -1;
168  int m_moveThreadBlockSize = -1;
169  int m_ILU0FactorizationThreadBlockSize = -1;
170 
171  // Graphs for Apply
172  std::map<std::pair<field_type*, const field_type*>, GPUGraph> m_apply_graphs;
173  std::map<std::pair<field_type*, const field_type*>, GPUGraphExec> m_executableGraphs;
174 
175  // Stream for the DILU operations on the GPU
176  GPUStream m_stream{};
177  // Events for synchronization with main stream
178  GPUEvent m_before{};
179  GPUEvent m_after{};
180 };
181 } // end namespace Opm::gpuistl
182 
183 #endif
void apply(X &v, const Y &d) override
Apply the preconditoner.
Definition: OpmGpuILU0.cpp:113
Y range_type
The range type of the preconditioner.
Definition: OpmGpuILU0.hpp:56
GpuSparseMatrixWrapper< field_type > GpuMatrix
The GPU matrix type.
Definition: OpmGpuILU0.hpp:60
void post(X &x) override
Post processing.
Definition: OpmGpuILU0.cpp:255
X domain_type
The domain type of the preconditioner.
Definition: OpmGpuILU0.hpp:54
void reorderAndSplitMatrix(int moveThreadBlockSize)
perform matrix splitting and reordering
Definition: OpmGpuILU0.cpp:295
static constexpr bool shouldCallPre()
Definition: OpmGpuILU0.hpp:110
ILU0 preconditioner on the GPU.
Definition: OpmGpuILU0.hpp:50
static constexpr bool shouldCallPost()
Definition: OpmGpuILU0.hpp:116
OpmGpuILU0(const GpuMatrix &gpuMatrix, const CPUMatrixT &cpuMatrix, bool splitMatrix, bool tuneKernels, int mixedPrecisionScheme)
Constructor.
Definition: OpmGpuILU0.cpp:42
void LUFactorizeMatrix(int factorizationThreadBlockSize)
Compute LU factorization, and update the data of the reordered matrix.
Definition: OpmGpuILU0.cpp:323
void pre(X &x, Y &b) override
Prepare the preconditioner.
Definition: OpmGpuILU0.cpp:107
Interface class adding the update() method to the preconditioner interface.
Definition: PreconditionerWithUpdate.hpp:33
A small, fixed‑dimension MiniVector class backed by std::array that can be used in both host and CUD...
Definition: AmgxInterface.hpp:37
typename X::field_type field_type
The field type of the preconditioner.
Definition: OpmGpuILU0.hpp:58
void update() final
Updates the matrix data.
Definition: OpmGpuILU0.cpp:268
void tuneThreadBlockSizes()
function that will experimentally tune the thread block sizes of the important cuda kernels ...
Definition: OpmGpuILU0.cpp:401
Dune::SolverCategory::Category category() const override
Category of the preconditioner (see SolverCategory::Category)
Definition: OpmGpuILU0.cpp:261