DILU.hpp
Go to the documentation of this file.
1
2/*
3 Copyright 2022-2023 SINTEF AS
4 This file is part of the Open Porous Media project (OPM).
5 OPM is free software: you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation, either version 3 of the License, or
8 (at your option) any later version.
9 OPM is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
13 You should have received a copy of the GNU General Public License
14 along with OPM. If not, see <http://www.gnu.org/licenses/>.
15*/
16
17#ifndef OPM_DILU_HEADER_INCLUDED
18#define OPM_DILU_HEADER_INCLUDED
19
20#include <opm/common/ErrorMacros.hpp>
21#include <opm/common/TimingMacros.hpp>
23
24#include <dune/common/fmatrix.hh>
25#include <dune/common/unused.hh>
26#include <dune/common/version.hh>
27#include <dune/istl/bcrsmatrix.hh>
28
30
31#include <cstddef>
32#include <optional>
33#include <vector>
34
35#if HAVE_OPENMP
36#include <omp.h>
37#endif
38
39// TODO: rewrite factory and constructor to keep track of a number of threads variable
40namespace Dune
41{
42
51template <class M, class X, class Y>
53{
54public:
56 using matrix_type = M;
58 using domain_type = X;
60 using range_type = Y;
62 using field_type = typename X::field_type;
64
68 MultithreadDILU(const M& A)
69 : A_(A)
70 {
71 OPM_TIMEBLOCK(prec_construct);
72 // TODO: rewrite so this value is set by an argument to the constructor
73#if HAVE_OPENMP
74 use_multithreading = omp_get_max_threads() > 1;
75#endif
76
77 if (use_multithreading) {
78 A_reordered_.emplace(A_.N(), A_.N(), A_.nonzeroes(), M::row_wise);
79
83 reordered_to_natural_ = std::vector<std::size_t>(A_.N());
84 natural_to_reorder_ = std::vector<std::size_t>(A_.N());
85 int globCnt = 0;
86 for (const auto& level_set : level_sets_) {
87 for (const auto j : level_set) {
88 reordered_to_natural_[globCnt] = j;
89 natural_to_reorder_[j] = globCnt++;
90 }
91 }
92
93 for (auto dst_row_it = A_reordered_->createbegin(); dst_row_it != A_reordered_->createend(); ++dst_row_it) {
94 auto src_row = A_.begin() + reordered_to_natural_[dst_row_it.index()];
95 // For elements in A
96 for (auto elem = src_row->begin(); elem != src_row->end(); elem++) {
97 dst_row_it.insert(elem.index());
98 }
99 }
100 }
101
102 Dinv_.resize(A_.N());
103 update();
104 }
105
110 void update() override
111 {
112 OPM_TIMEBLOCK(prec_update);
113 if (use_multithreading) {
114 parallelUpdate();
115 } else {
116 serialUpdate();
117 }
118 }
119
124 void pre(X& v, Y& d) override
125 {
126 DUNE_UNUSED_PARAMETER(v);
127 DUNE_UNUSED_PARAMETER(d);
128 }
129
130
135 void apply(X& v, const Y& d) override
136 {
137 OPM_TIMEBLOCK(prec_apply);
138 if (use_multithreading) {
139 parallelApply(v, d);
140 } else {
141 serialApply(v, d);
142 }
143 }
144
149 void post(X& x) override
150 {
151 DUNE_UNUSED_PARAMETER(x);
152 }
153
154 std::vector<typename M::block_type> getDiagonal()
155 {
156 return Dinv_;
157 }
158
160 virtual SolverCategory::Category category() const override
161 {
162 return SolverCategory::sequential;
163 }
164
165 virtual bool hasPerfectUpdate() const override {
166 return true;
167 }
168
169private:
171 const M& A_;
174 std::optional<M> A_reordered_;
176 std::vector<typename M::block_type> Dinv_;
178 Opm::SparseTable<std::size_t> level_sets_;
180 std::vector<std::size_t> reordered_to_natural_;
182 std::vector<std::size_t> natural_to_reorder_;
184 bool use_multithreading{false};
185
186 void serialUpdate()
187 {
188 for (std::size_t row = 0; row < A_.N(); ++row) {
189 Dinv_[row] = A_[row][row];
190 }
191 for (auto row = A_.begin(); row != A_.end(); ++row) {
192 const auto row_i = row.index();
193 auto Dinv_temp = Dinv_[row_i];
194 for (auto a_ij = row->begin(); a_ij.index() < row_i; ++a_ij) {
195 const auto col_j = a_ij.index();
196 const auto a_ji = A_[col_j].find(row_i);
197 // if A[i, j] != 0 and A[j, i] != 0
198 if (a_ji != A_[col_j].end()) {
199 // Dinv_temp -= A[i, j] * d[j] * A[j, i]
200 Dinv_temp -= (*a_ij) * Dune::FieldMatrix(Dinv_[col_j]) * (*a_ji);
201 }
202 }
203 Dinv_temp.invert();
204 Dinv_[row_i] = Dinv_temp;
205 }
206 }
207
208 void parallelUpdate()
209 {
210#ifdef _OPENMP
211#pragma omp parallel for
212#endif
213 for (std::size_t row = 0; row != A_.N(); ++row) {
214 Dinv_[natural_to_reorder_[row]] = A_[row][row];
215 }
216
217 // TODO: is there a better/faster way of copying all values?
218 for (auto dst_row_it = A_reordered_->begin(); dst_row_it != A_reordered_->end(); ++dst_row_it) {
219 auto src_row = A_.begin() + reordered_to_natural_[dst_row_it.index()];
220 for (auto elem = src_row->begin(); elem != src_row->end(); elem++) {
221 (*A_reordered_)[dst_row_it.index()][elem.index()] = *elem;
222 }
223 }
224
225 int level_start_idx = 0;
226 for (int level = 0; level < level_sets_.size(); ++level) {
227 const int num_of_rows_in_level = level_sets_[level].size();
228
229#ifdef _OPENMP
230#pragma omp parallel for
231#endif
232 for (int row_idx_in_level = 0; row_idx_in_level < num_of_rows_in_level; ++row_idx_in_level) {
233 auto row = A_reordered_->begin() + level_start_idx + row_idx_in_level;
234 const auto row_i = reordered_to_natural_[row.index()];
235 // auto Dinv_temp = Dinv_[row_i];
236 auto Dinv_temp = Dinv_[level_start_idx + row_idx_in_level];
237 for (auto a_ij = row->begin(); a_ij.index() < row_i; ++a_ij) {
238 const auto col_j = natural_to_reorder_[a_ij.index()];
239 const auto a_ji = (*A_reordered_)[col_j].find(row_i);
240 if (a_ji != (*A_reordered_)[col_j].end()) {
241 // Dinv_temp -= A[i, j] * d[j] * A[j, i]
242 Dinv_temp -= (*a_ij) * Dune::FieldMatrix(Dinv_[col_j]) * (*a_ji);
243 }
244 }
245 Dinv_temp.invert();
246 Dinv_[level_start_idx + row_idx_in_level] = Dinv_temp;
247 }
248
249 level_start_idx += num_of_rows_in_level;
250 }
251 }
252
253 void serialApply(X& v, const Y& d)
254 {
255 // M = (D + L_A) D^-1 (D + U_A) (a LU decomposition of M)
256 // where L_A and U_A are the strictly lower and upper parts of A and M has the properties:
257 // diag(A) = diag(M)
258 // Working with defect d = b - Ax and update v = x_{n+1} - x_n
259 // solving the product M^-1(d) using upper and lower triangular solve
260 // v = M^{-1}*d = (D + U_A)^{-1} D (D + L_A)^{-1} * d
261 // lower triangular solve: (D + L_A) y = d
262 using Xblock = typename X::block_type;
263 using Yblock = typename Y::block_type;
264 {
265 OPM_TIMEBLOCK(lower_solve);
266 auto endi = A_.end();
267 for (auto row = A_.begin(); row != endi; ++row) {
268 const auto row_i = row.index();
269 Yblock rhs = d[row_i];
270 for (auto a_ij = (*row).begin(); a_ij.index() < row_i; ++a_ij) {
271 // if A[i][j] != 0
272 // rhs -= A[i][j]* y[j], where v_j stores y_j
273 const auto col_j = a_ij.index();
274 a_ij->mmv(v[col_j], rhs);
275 }
276 // y_i = Dinv_i * rhs
277 // storing y_i in v_i
278 Dinv_[row_i].mv(rhs, v[row_i]); // (D + L_A)_ii = D_i
279 }
280 }
281
282 {
283 OPM_TIMEBLOCK(upper_solve);
284
285 // upper triangular solve: (D + U_A) v = Dy
286 auto rendi = A_.beforeBegin();
287 for (auto row = A_.beforeEnd(); row != rendi; --row) {
288 const auto row_i = row.index();
289 // rhs = 0
290 Xblock rhs(0.0);
291 for (auto a_ij = (*row).beforeEnd(); a_ij.index() > row_i; --a_ij) {
292 // if A[i][j] != 0
293 // rhs += A[i][j]*v[j]
294 const auto col_j = a_ij.index();
295 a_ij->umv(v[col_j], rhs);
296 }
297 // calculate update v = M^-1*d
298 // v_i = y_i - Dinv_i*rhs
299 // before update v_i is y_i
300 Dinv_[row_i].mmv(rhs, v[row_i]);
301 }
302 }
303 }
304
305 void parallelApply(X& v, const Y& d)
306 {
307 using Xblock = typename X::block_type;
308 using Yblock = typename Y::block_type;
309 {
310 OPM_TIMEBLOCK(lower_solve);
311 int level_start_idx = 0;
312 for (int level = 0; level < level_sets_.size(); ++level) {
313 const int num_of_rows_in_level = level_sets_[level].size();
314
315#ifdef _OPENMP
316#pragma omp parallel for
317#endif
318 for (int row_idx_in_level = 0; row_idx_in_level < num_of_rows_in_level; ++row_idx_in_level) {
319 auto row = A_reordered_->begin() + level_start_idx + row_idx_in_level;
320 const auto row_i = reordered_to_natural_[row.index()];
321 Yblock rhs = d[row_i];
322 for (auto a_ij = (*row).begin(); a_ij.index() < row_i; ++a_ij) {
323 // if A[i][j] != 0
324 // rhs -= A[i][j]* y[j], where v_j stores y_j
325 const auto col_j = a_ij.index();
326 a_ij->mmv(v[col_j], rhs);
327 }
328 // y_i = Dinv_i * rhs
329 // storing y_i in v_i
330 Dinv_[level_start_idx + row_idx_in_level].mv(rhs, v[row_i]); // (D + L_A)_ii = D_i
331 }
332 level_start_idx += num_of_rows_in_level;
333 }
334 }
335
336 {
337 int level_start_idx = A_.N();
338 // upper triangular solve: (D + U_A) v = Dy
339 for (int level = level_sets_.size() - 1; level >= 0; --level) {
340 const int num_of_rows_in_level = level_sets_[level].size();
341 level_start_idx -= num_of_rows_in_level;
342#ifdef _OPENMP
343#pragma omp parallel for
344#endif
345 for (int row_idx_in_level = num_of_rows_in_level - 1; row_idx_in_level >= 0; --row_idx_in_level) {
346 auto row = A_reordered_->begin() + level_start_idx + row_idx_in_level;
347 const auto row_i = reordered_to_natural_[row.index()];
348 Xblock rhs(0.0);
349 for (auto a_ij = (*row).beforeEnd(); a_ij.index() > row_i; --a_ij) {
350 // rhs += A[i][j]*v[j]
351 const auto col_j = a_ij.index();
352 a_ij->umv(v[col_j], rhs);
353 }
354 // calculate update v = M^-1*d
355 // v_i = y_i - Dinv_i*rhs
356 // before update v_i is y_i
357 Dinv_[level_start_idx + row_idx_in_level].mmv(rhs, v[row_i]);
358 }
359 }
360 }
361 }
362};
363
364} // namespace Dune
365
366#endif
The OpenMP thread parallelized DILU preconditioner.
Definition: DILU.hpp:53
void post(X &x) override
Clean up.
Definition: DILU.hpp:149
M matrix_type
The matrix type the preconditioner is for.
Definition: DILU.hpp:56
void apply(X &v, const Y &d) override
Apply the preconditioner.
Definition: DILU.hpp:135
X domain_type
The domain type of the preconditioner.
Definition: DILU.hpp:58
void pre(X &v, Y &d) override
Prepare the preconditioner.
Definition: DILU.hpp:124
MultithreadDILU(const M &A)
scalar type underlying the field_type
Definition: DILU.hpp:68
virtual SolverCategory::Category category() const override
Category of the preconditioner (see SolverCategory::Category)
Definition: DILU.hpp:160
Y range_type
The range type of the preconditioner.
Definition: DILU.hpp:60
void update() override
Update the preconditioner.
Definition: DILU.hpp:110
virtual bool hasPerfectUpdate() const override
Definition: DILU.hpp:165
std::vector< typename M::block_type > getDiagonal()
Definition: DILU.hpp:154
typename X::field_type field_type
The field type of the preconditioner.
Definition: DILU.hpp:62
Interface class adding the update() method to the preconditioner interface.
Definition: PreconditionerWithUpdate.hpp:32
Definition: fvbaseprimaryvariables.hh:141
Opm::SparseTable< std::size_t > getMatrixRowColoring(const M &matrix, ColoringType coloringType)
Given a matrix and dependecy type, returns a SparseTable grouping the rows by which can be executed i...
Definition: GraphColoring.hpp:248