opm-simulators
DILU.hpp
1 
2 /*
3  Copyright 2022-2023 SINTEF AS
4  This file is part of the Open Porous Media project (OPM).
5  OPM is free software: you can redistribute it and/or modify
6  it under the terms of the GNU General Public License as published by
7  the Free Software Foundation, either version 3 of the License, or
8  (at your option) any later version.
9  OPM is distributed in the hope that it will be useful,
10  but WITHOUT ANY WARRANTY; without even the implied warranty of
11  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12  GNU General Public License for more details.
13  You should have received a copy of the GNU General Public License
14  along with OPM. If not, see <http://www.gnu.org/licenses/>.
15 */
16 
17 #ifndef OPM_DILU_HEADER_INCLUDED
18 #define OPM_DILU_HEADER_INCLUDED
19 
20 #include <opm/common/ErrorMacros.hpp>
21 #include <opm/common/TimingMacros.hpp>
22 #include <opm/simulators/linalg/PreconditionerWithUpdate.hpp>
23 
24 #include <dune/common/fmatrix.hh>
25 #include <dune/common/unused.hh>
26 #include <dune/common/version.hh>
27 #include <dune/istl/bcrsmatrix.hh>
28 
29 #include <opm/simulators/linalg/GraphColoring.hpp>
30 
31 #include <cstddef>
32 #include <optional>
33 #include <vector>
34 
35 #if HAVE_OPENMP
36 #include <omp.h>
37 #endif
38 
39 // TODO: rewrite factory and constructor to keep track of a number of threads variable
40 namespace Dune
41 {
42 
51 template <class M, class X, class Y>
53 {
54 public:
56  using matrix_type = M;
58  using domain_type = X;
60  using range_type = Y;
62  using field_type = typename X::field_type;
64 
68  explicit MultithreadDILU(const M& A)
69  : A_(A)
70  {
71  OPM_TIMEBLOCK(prec_construct);
72  // TODO: rewrite so this value is set by an argument to the constructor
73 #if HAVE_OPENMP
74  use_multithreading = omp_get_max_threads() > 1;
75 #endif
76 
77  if (use_multithreading) {
78  A_reordered_.emplace(A_.N(), A_.N(), A_.nonzeroes(), M::row_wise);
79 
82  level_sets_ = Opm::getMatrixRowColoring(A_, Opm::ColoringType::LOWER);
83  reordered_to_natural_ = std::vector<std::size_t>(A_.N());
84  natural_to_reorder_ = std::vector<std::size_t>(A_.N());
85  int globCnt = 0;
86  for (const auto& level_set : level_sets_) {
87  for (const auto j : level_set) {
88  reordered_to_natural_[globCnt] = j;
89  natural_to_reorder_[j] = globCnt++;
90  }
91  }
92 
93  for (auto dst_row_it = A_reordered_->createbegin(); dst_row_it != A_reordered_->createend(); ++dst_row_it) {
94  auto src_row = A_.begin() + reordered_to_natural_[dst_row_it.index()];
95  // For elements in A
96  for (auto elem = src_row->begin(); elem != src_row->end(); elem++) {
97  dst_row_it.insert(elem.index());
98  }
99  }
100  }
101 
102  Dinv_.resize(A_.N());
103  update();
104  }
105 
110  void update() override
111  {
112  OPM_TIMEBLOCK(prec_update);
113  if (use_multithreading) {
114  parallelUpdate();
115  } else {
116  serialUpdate();
117  }
118  }
119 
124  void pre(X& v, Y& d) override
125  {
126  DUNE_UNUSED_PARAMETER(v);
127  DUNE_UNUSED_PARAMETER(d);
128  }
129 
130 
135  void apply(X& v, const Y& d) override
136  {
137  OPM_TIMEBLOCK(prec_apply);
138  if (use_multithreading) {
139  parallelApply(v, d);
140  } else {
141  serialApply(v, d);
142  }
143  }
144 
149  void post(X& x) override
150  {
151  DUNE_UNUSED_PARAMETER(x);
152  }
153 
154  std::vector<typename M::block_type> getDiagonal()
155  {
156  return Dinv_;
157  }
158 
160  virtual SolverCategory::Category category() const override
161  {
162  return SolverCategory::sequential;
163  }
164 
165  virtual bool hasPerfectUpdate() const override {
166  return true;
167  }
168 
169 private:
171  const M& A_;
174  std::optional<M> A_reordered_;
176  std::vector<typename M::block_type> Dinv_;
178  Opm::SparseTable<std::size_t> level_sets_;
180  std::vector<std::size_t> reordered_to_natural_;
182  std::vector<std::size_t> natural_to_reorder_;
184  bool use_multithreading{false};
185 
186  void serialUpdate()
187  {
188  for (std::size_t row = 0; row < A_.N(); ++row) {
189  Dinv_[row] = A_[row][row];
190  }
191  for (auto row = A_.begin(); row != A_.end(); ++row) {
192  const auto row_i = row.index();
193  auto Dinv_temp = Dinv_[row_i];
194  for (auto a_ij = row->begin(); a_ij.index() < row_i; ++a_ij) {
195  const auto col_j = a_ij.index();
196  const auto a_ji = A_[col_j].find(row_i);
197  // if A[i, j] != 0 and A[j, i] != 0
198  if (a_ji != A_[col_j].end()) {
199  // Dinv_temp -= A[i, j] * d[j] * A[j, i]
200  Dinv_temp -= (*a_ij) * Dune::FieldMatrix(Dinv_[col_j]) * (*a_ji);
201  }
202  }
203  Dinv_temp.invert();
204  Dinv_[row_i] = Dinv_temp;
205  }
206  }
207 
208  void parallelUpdate()
209  {
210 #ifdef _OPENMP
211 #pragma omp parallel for
212 #endif
213  for (std::size_t row = 0; row != A_.N(); ++row) {
214  Dinv_[natural_to_reorder_[row]] = A_[row][row];
215  }
216 
217  // TODO: is there a better/faster way of copying all values?
218  for (auto dst_row_it = A_reordered_->begin(); dst_row_it != A_reordered_->end(); ++dst_row_it) {
219  auto src_row = A_.begin() + reordered_to_natural_[dst_row_it.index()];
220  for (auto elem = src_row->begin(); elem != src_row->end(); elem++) {
221  (*A_reordered_)[dst_row_it.index()][elem.index()] = *elem;
222  }
223  }
224 
225  int level_start_idx = 0;
226  for (int level = 0; level < level_sets_.size(); ++level) {
227  const int num_of_rows_in_level = level_sets_[level].size();
228 
229 #ifdef _OPENMP
230 #pragma omp parallel for
231 #endif
232  for (int row_idx_in_level = 0; row_idx_in_level < num_of_rows_in_level; ++row_idx_in_level) {
233  auto row = A_reordered_->begin() + level_start_idx + row_idx_in_level;
234  const auto row_i = reordered_to_natural_[row.index()];
235  // auto Dinv_temp = Dinv_[row_i];
236  auto Dinv_temp = Dinv_[level_start_idx + row_idx_in_level];
237  for (auto a_ij = row->begin(); a_ij.index() < row_i; ++a_ij) {
238  const auto col_j = natural_to_reorder_[a_ij.index()];
239  const auto a_ji = (*A_reordered_)[col_j].find(row_i);
240  if (a_ji != (*A_reordered_)[col_j].end()) {
241  // Dinv_temp -= A[i, j] * d[j] * A[j, i]
242  Dinv_temp -= (*a_ij) * Dune::FieldMatrix(Dinv_[col_j]) * (*a_ji);
243  }
244  }
245  Dinv_temp.invert();
246  Dinv_[level_start_idx + row_idx_in_level] = Dinv_temp;
247  }
248 
249  level_start_idx += num_of_rows_in_level;
250  }
251  }
252 
253  void serialApply(X& v, const Y& d)
254  {
255  // M = (D + L_A) D^-1 (D + U_A) (a LU decomposition of M)
256  // where L_A and U_A are the strictly lower and upper parts of A and M has the properties:
257  // diag(A) = diag(M)
258  // Working with defect d = b - Ax and update v = x_{n+1} - x_n
259  // solving the product M^-1(d) using upper and lower triangular solve
260  // v = M^{-1}*d = (D + U_A)^{-1} D (D + L_A)^{-1} * d
261  // lower triangular solve: (D + L_A) y = d
262  using Xblock = typename X::block_type;
263  using Yblock = typename Y::block_type;
264  {
265  OPM_TIMEBLOCK(lower_solve);
266  auto endi = A_.end();
267  for (auto row = A_.begin(); row != endi; ++row) {
268  const auto row_i = row.index();
269  Yblock rhs = d[row_i];
270  for (auto a_ij = (*row).begin(); a_ij.index() < row_i; ++a_ij) {
271  // if A[i][j] != 0
272  // rhs -= A[i][j]* y[j], where v_j stores y_j
273  const auto col_j = a_ij.index();
274  a_ij->mmv(v[col_j], rhs);
275  }
276  // y_i = Dinv_i * rhs
277  // storing y_i in v_i
278  Dinv_[row_i].mv(rhs, v[row_i]); // (D + L_A)_ii = D_i
279  }
280  }
281 
282  {
283  OPM_TIMEBLOCK(upper_solve);
284 
285  // upper triangular solve: (D + U_A) v = Dy
286  auto rendi = A_.beforeBegin();
287  for (auto row = A_.beforeEnd(); row != rendi; --row) {
288  const auto row_i = row.index();
289  // rhs = 0
290  Xblock rhs(0.0);
291  for (auto a_ij = (*row).beforeEnd(); a_ij.index() > row_i; --a_ij) {
292  // if A[i][j] != 0
293  // rhs += A[i][j]*v[j]
294  const auto col_j = a_ij.index();
295  a_ij->umv(v[col_j], rhs);
296  }
297  // calculate update v = M^-1*d
298  // v_i = y_i - Dinv_i*rhs
299  // before update v_i is y_i
300  Dinv_[row_i].mmv(rhs, v[row_i]);
301  }
302  }
303  }
304 
305  void parallelApply(X& v, const Y& d)
306  {
307  using Xblock = typename X::block_type;
308  using Yblock = typename Y::block_type;
309  {
310  OPM_TIMEBLOCK(lower_solve);
311  int level_start_idx = 0;
312  for (int level = 0; level < level_sets_.size(); ++level) {
313  const int num_of_rows_in_level = level_sets_[level].size();
314 
315 #ifdef _OPENMP
316 #pragma omp parallel for
317 #endif
318  for (int row_idx_in_level = 0; row_idx_in_level < num_of_rows_in_level; ++row_idx_in_level) {
319  auto row = A_reordered_->begin() + level_start_idx + row_idx_in_level;
320  const auto row_i = reordered_to_natural_[row.index()];
321  Yblock rhs = d[row_i];
322  for (auto a_ij = (*row).begin(); a_ij.index() < row_i; ++a_ij) {
323  // if A[i][j] != 0
324  // rhs -= A[i][j]* y[j], where v_j stores y_j
325  const auto col_j = a_ij.index();
326  a_ij->mmv(v[col_j], rhs);
327  }
328  // y_i = Dinv_i * rhs
329  // storing y_i in v_i
330  Dinv_[level_start_idx + row_idx_in_level].mv(rhs, v[row_i]); // (D + L_A)_ii = D_i
331  }
332  level_start_idx += num_of_rows_in_level;
333  }
334  }
335 
336  {
337  int level_start_idx = A_.N();
338  // upper triangular solve: (D + U_A) v = Dy
339  for (int level = level_sets_.size() - 1; level >= 0; --level) {
340  const int num_of_rows_in_level = level_sets_[level].size();
341  level_start_idx -= num_of_rows_in_level;
342 #ifdef _OPENMP
343 #pragma omp parallel for
344 #endif
345  for (int row_idx_in_level = num_of_rows_in_level - 1; row_idx_in_level >= 0; --row_idx_in_level) {
346  auto row = A_reordered_->begin() + level_start_idx + row_idx_in_level;
347  const auto row_i = reordered_to_natural_[row.index()];
348  Xblock rhs(0.0);
349  for (auto a_ij = (*row).beforeEnd(); a_ij.index() > row_i; --a_ij) {
350  // rhs += A[i][j]*v[j]
351  const auto col_j = a_ij.index();
352  a_ij->umv(v[col_j], rhs);
353  }
354  // calculate update v = M^-1*d
355  // v_i = y_i - Dinv_i*rhs
356  // before update v_i is y_i
357  Dinv_[level_start_idx + row_idx_in_level].mmv(rhs, v[row_i]);
358  }
359  }
360  }
361  }
362 };
363 
364 } // namespace Dune
365 
366 #endif
void pre(X &v, Y &d) override
Prepare the preconditioner.
Definition: DILU.hpp:124
Definition: fvbaseprimaryvariables.hh:161
void update() override
Update the preconditioner.
Definition: DILU.hpp:110
void apply(X &v, const Y &d) override
Apply the preconditioner.
Definition: DILU.hpp:135
Y range_type
The range type of the preconditioner.
Definition: DILU.hpp:60
M matrix_type
The matrix type the preconditioner is for.
Definition: DILU.hpp:56
Interface class adding the update() method to the preconditioner interface.
Definition: PreconditionerWithUpdate.hpp:33
X domain_type
The domain type of the preconditioner.
Definition: DILU.hpp:58
virtual SolverCategory::Category category() const override
Category of the preconditioner (see SolverCategory::Category)
Definition: DILU.hpp:160
void post(X &x) override
Clean up.
Definition: DILU.hpp:149
typename X::field_type field_type
The field type of the preconditioner.
Definition: DILU.hpp:62
Opm::SparseTable< std::size_t > getMatrixRowColoring(const M &matrix, ColoringType coloringType)
This coloring algorithm interprets the sparsity structure of a matrix as a graph. ...
Definition: GraphColoring.hpp:248
The OpenMP thread parallelized DILU preconditioner.
Definition: DILU.hpp:52
MultithreadDILU(const M &A)
scalar type underlying the field_type
Definition: DILU.hpp:68