opm-simulators
openclBILU0.hpp
1 /*
2  Copyright 2019 Equinor ASA
3 
4  This file is part of the Open Porous Media project (OPM).
5 
6  OPM is free software: you can redistribute it and/or modify
7  it under the terms of the GNU General Public License as published by
8  the Free Software Foundation, either version 3 of the License, or
9  (at your option) any later version.
10 
11  OPM is distributed in the hope that it will be useful,
12  but WITHOUT ANY WARRANTY; without even the implied warranty of
13  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  GNU General Public License for more details.
15 
16  You should have received a copy of the GNU General Public License
17  along with OPM. If not, see <http://www.gnu.org/licenses/>.
18 */
19 
20 #ifndef OPM_OPENCLBILU0_HPP
21 #define OPM_OPENCLBILU0_HPP
22 
23 #include <opm/simulators/linalg/gpubridge/BlockedMatrix.hpp>
24 
25 #include <opm/simulators/linalg/gpubridge/opencl/opencl.hpp>
26 #include <opm/simulators/linalg/gpubridge/opencl/openclPreconditioner.hpp>
27 #include <opm/simulators/linalg/gpubridge/opencl/ChowPatelIlu.hpp>
28 
29 #include <memory>
30 #include <mutex>
31 
32 namespace Opm::Accelerator {
33 
37 template<class Scalar, unsigned int block_size>
38 class openclBILU0 : public openclPreconditioner<Scalar,block_size>
39 {
41 
42  using Base::N;
43  using Base::Nb;
44  using Base::nnz;
45  using Base::nnzb;
46  using Base::verbosity;
47  using Base::context;
48  using Base::queue;
49  using Base::events;
50  using Base::err;
51 
52 private:
53  std::unique_ptr<BlockedMatrix<Scalar>> LUmat{};
54 #if CHOW_PATEL
55  std::unique_ptr<BlockedMatrix<Scalar>> Lmat{}, Umat{};
56 #endif
57  std::vector<Scalar> invDiagVals;
58  std::vector<int> diagIndex;
59  std::vector<int> rowsPerColor; // color i contains rowsPerColor[i] rows, which are processed in parallel
60  std::vector<int> rowsPerColorPrefix; // the prefix sum of rowsPerColor
61  std::vector<int> toOrder, fromOrder;
62  int numColors;
63  std::once_flag pattern_uploaded;
64 
65  bool opencl_ilu_parallel;
66 
67  struct GPU_storage {
68  cl::Buffer invDiagVals; // nnz values of diagonal blocks of the matrix, inverted
69  cl::Buffer diagIndex; // index of diagonal block of each row, used to differentiate between lower and upper triangular part
70  cl::Buffer rowsPerColor; // number of rows for every color
71  cl::Buffer rowIndices; // mapping every row to another index
72  // after mapping, all rows that are processed in parallel are contiguous
73  // equal to the contents of fromOrder
74 #if CHOW_PATEL
75  cl::Buffer Lvals, Lcols, Lrows;
76  cl::Buffer Uvals, Ucols, Urows;
77 #else
78  cl::Buffer LUvals, LUcols, LUrows;
79 #endif
80  };
81 
82  GPU_storage s;
83 
84 #if CHOW_PATEL
85  ChowPatelIlu<block_size> chowPatelIlu;
86 #endif
87 
88 public:
89 
90  openclBILU0(bool opencl_ilu_parallel, int verbosity);
91 
92  // analysis, extract parallelism if specified
93  bool analyze_matrix(BlockedMatrix<Scalar>* mat) override;
94  bool analyze_matrix(BlockedMatrix<Scalar>* mat,
95  BlockedMatrix<Scalar>* jacMat) override;
96 
97  // ilu_decomposition
98  bool create_preconditioner(BlockedMatrix<Scalar>* mat) override;
99  bool create_preconditioner(BlockedMatrix<Scalar>* mat,
100  BlockedMatrix<Scalar>* jacMat) override;
101 
102  // apply preconditioner, x = prec(y)
103  // via Lz = y
104  // and Ux = z
105  void apply(const cl::Buffer& y,
106  cl::Buffer& x,
107  WellContributions<Scalar>& wellContribs) override;
108 
109  std::tuple<std::vector<int>, std::vector<int>, std::vector<int>>
110  get_preconditioner_structure()
111  {
112  return {{LUmat->rowPointers, LUmat->rowPointers + (Nb + 1)},
113  {LUmat->colIndices, LUmat->colIndices + nnzb}, diagIndex};
114  }
115 
116  std::pair<cl::Buffer, cl::Buffer> get_preconditioner_data()
117  {
118 #if CHOW_PATEL
119  return std::make_pair(s.Lvals, s.invDiagVals); // send dummy, BISAI is disabled when ChowPatel is selected
120 #else
121  return std::make_pair(s.LUvals, s.invDiagVals);
122 #endif
123  }
124 };
125 
126 } // namespace Opm::Accelerator
127 
128 #endif
Definition: openclPreconditioner.hpp:31
Definition: amgclSolverBackend.cpp:49
This class implements a Blocked ILU0 preconditioner The decomposition is done on GPU, using exact decomposition, or ChowPatel decomposition The preconditioner is applied via two exact triangular solves.
Definition: openclBILU0.hpp:38
This class serves to eliminate the need to include the WellContributions into the matrix (with –matr...
Definition: GpuBridge.hpp:30
This struct resembles a blocked csr matrix, like Dune::BCRSMatrix.
Definition: BlockedMatrix.hpp:28