getQuasiImpesWeights.hpp
Go to the documentation of this file.
1/*
2 Copyright 2019 SINTEF Digital, Mathematics and Cybernetics.
3
4 This file is part of the Open Porous Media project (OPM).
5
6 OPM is free software: you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation, either version 3 of the License, or
9 (at your option) any later version.
10
11 OPM is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with OPM. If not, see <http://www.gnu.org/licenses/>.
18*/
19
20#ifndef OPM_GET_QUASI_IMPES_WEIGHTS_HEADER_INCLUDED
21#define OPM_GET_QUASI_IMPES_WEIGHTS_HEADER_INCLUDED
22
23#include <dune/common/fvector.hh>
24
25#include <opm/grid/utility/ElementChunks.hpp>
27#include <opm/material/common/MathToolbox.hpp>
29#include <algorithm>
30#include <cmath>
31
32#if HAVE_CUDA
33#if USE_HIP
34#include <opm/simulators/linalg/gpuistl_hip/detail/cpr_amg_operations.hpp>
35#else
37#endif
38#endif
39
40
41namespace Opm
42{
43
44namespace Details
45{
46 template <class DenseMatrix>
47 DenseMatrix transposeDenseMatrix(const DenseMatrix& M)
48 {
49 DenseMatrix tmp;
50 for (int i = 0; i < M.rows; ++i)
51 for (int j = 0; j < M.cols; ++j)
52 tmp[j][i] = M[i][j];
53
54 return tmp;
55 }
56} // namespace Details
57
58namespace Amg
59{
60 template <class Matrix, class Vector>
61 void getQuasiImpesWeights(const Matrix& matrix,
62 const int pressureVarIndex,
63 const bool transpose,
64 Vector& weights,
65 [[maybe_unused]] bool enable_thread_parallel)
66 {
67 using VectorBlockType = typename Vector::block_type;
68 using MatrixBlockType = typename Matrix::block_type;
69 const Matrix& A = matrix;
70
71 VectorBlockType rhs(0.0);
72 rhs[pressureVarIndex] = 1.0;
73
74 // Declare variables outside the loop to avoid repetitive allocation
75 MatrixBlockType diag_block;
76 VectorBlockType bweights;
77 MatrixBlockType diag_block_transpose;
78
79 // Use OpenMP to parallelize over matrix rows (runtime controlled via if clause)
80#ifdef _OPENMP
81#pragma omp parallel for private(diag_block, bweights, diag_block_transpose) if(enable_thread_parallel)
82#endif
83 for (int row_idx = 0; row_idx < static_cast<int>(A.N()); ++row_idx) {
84 diag_block = MatrixBlockType(0.0);
85 // Find diagonal block for this row
86 const auto row_it = A.begin() + row_idx;
87 const auto endj = (*row_it).end();
88 for (auto j = (*row_it).begin(); j != endj; ++j) {
89 if (row_it.index() == j.index()) {
90 diag_block = (*j);
91 break;
92 }
93 }
94 if (transpose) {
95 diag_block.solve(bweights, rhs);
96 } else {
97 diag_block_transpose = Details::transposeDenseMatrix(diag_block);
98 diag_block_transpose.solve(bweights, rhs);
99 }
100
101 const double abs_max =
102 *std::ranges::max_element(bweights,
103 [](double a, double b)
104 { return std::fabs(a) < std::fabs(b); });
105 bweights /= std::fabs(abs_max);
106 weights[row_idx] = bweights;
107 }
108 }
109
110 template <class Matrix, class Vector>
111 Vector getQuasiImpesWeights(const Matrix& matrix,
112 const int pressureVarIndex,
113 const bool transpose,
114 bool enable_thread_parallel)
115 {
116 Vector weights(matrix.N());
117 getQuasiImpesWeights(matrix, pressureVarIndex, transpose, weights, enable_thread_parallel);
118 return weights;
119 }
120
121#if HAVE_CUDA
122 template <typename T>
123 std::vector<int> precomputeDiagonalIndices(const gpuistl::GpuSparseMatrixWrapper<T>& matrix) {
124 std::vector<int> diagonalIndices(matrix.N(), -1);
125 const auto rowIndices = matrix.getRowIndices().asStdVector();
126 const auto colIndices = matrix.getColumnIndices().asStdVector();
127
128 for (auto row = 0; row < Opm::gpuistl::detail::to_int(matrix.N()); ++row) {
129 for (auto i = rowIndices[row]; i < rowIndices[row+1]; ++i) {
130 if (colIndices[i] == row) {
131 diagonalIndices[row] = i;
132 break;
133 }
134 }
135 }
136 return diagonalIndices;
137 }
138
139 // GPU version that delegates to the GPU implementation
140 template <typename T, bool transpose>
141 void getQuasiImpesWeights(const gpuistl::GpuSparseMatrixWrapper<T>& matrix,
142 const int pressureVarIndex,
143 gpuistl::GpuVector<T>& weights,
144 const gpuistl::GpuVector<int>& diagonalIndices)
145 {
146 gpuistl::detail::getQuasiImpesWeights<T, transpose>(matrix, pressureVarIndex, weights, diagonalIndices);
147 }
148
149 template <typename T, bool transpose>
150 gpuistl::GpuVector<T> getQuasiImpesWeights(const gpuistl::GpuSparseMatrixWrapper<T>& matrix,
151 const int pressureVarIndex,
152 const gpuistl::GpuVector<int>& diagonalIndices)
153 {
154 gpuistl::GpuVector<T> weights(matrix.N() * matrix.blockSize());
155 getQuasiImpesWeights<T, transpose>(matrix, pressureVarIndex, weights, diagonalIndices);
156 return weights;
157 }
158#endif
159
160 template<class Vector, class ElementContext, class Model, class ElementChunksType>
161 void getTrueImpesWeights(int pressureVarIndex, Vector& weights,
162 const ElementContext& elemCtx,
163 const Model& model,
164 const ElementChunksType& element_chunks,
165 [[maybe_unused]] bool enable_thread_parallel)
166 {
167 using VectorBlockType = typename Vector::block_type;
168 using Matrix = typename std::decay_t<decltype(model.linearizer().jacobian())>;
169 using MatrixBlockType = typename Matrix::MatrixBlock;
170 constexpr int numEq = VectorBlockType::size();
171 using Evaluation = typename std::decay_t<decltype(model.localLinearizer(ThreadManager::threadId()).localResidual().residual(0))>
172 ::block_type;
173
174 VectorBlockType rhs(0.0);
175 rhs[pressureVarIndex] = 1.0;
176
177 // Declare variables outside the loop to avoid repetitive allocation
178 MatrixBlockType block;
179 VectorBlockType bweights;
180 MatrixBlockType block_transpose;
181 Dune::FieldVector<Evaluation, numEq> storage;
182
184#ifdef _OPENMP
185#pragma omp parallel for private(block, bweights, block_transpose, storage) if(enable_thread_parallel)
186#endif
187 for (const auto& chunk : element_chunks) {
188 const std::size_t thread_id = ThreadManager::threadId();
189 ElementContext localElemCtx(elemCtx.simulator());
190
191 for (const auto& elem : chunk) {
192 localElemCtx.updatePrimaryStencil(elem);
193 localElemCtx.updatePrimaryIntensiveQuantities(/*timeIdx=*/0);
194
195 model.localLinearizer(thread_id).localResidual().computeStorage(storage, localElemCtx, /*spaceIdx=*/0, /*timeIdx=*/0);
196
197 auto extrusionFactor = localElemCtx.intensiveQuantities(0, /*timeIdx=*/0).extrusionFactor();
198 auto scvVolume = localElemCtx.stencil(/*timeIdx=*/0).subControlVolume(0).volume() * extrusionFactor;
199 auto storage_scale = scvVolume / localElemCtx.simulator().timeStepSize();
200 const double pressure_scale = 50e5;
201
202 // Build the transposed matrix directly to avoid separate transpose step
203 for (int ii = 0; ii < numEq; ++ii) {
204 for (int jj = 0; jj < numEq; ++jj) {
205 block_transpose[jj][ii] = storage[ii].derivative(jj)/storage_scale;
206 if (jj == pressureVarIndex) {
207 block_transpose[jj][ii] *= pressure_scale;
208 }
209 }
210 }
211 block_transpose.solve(bweights, rhs);
212
213 const double abs_max =
214 *std::ranges::max_element(bweights,
215 [](double a, double b)
216 { return std::fabs(a) < std::fabs(b); });
217 // probably a scaling which could give approximately total compressibility would be better
218 bweights /= std::fabs(abs_max); // given normal densities this scales weights to about 1.
219
220 const auto index = localElemCtx.globalSpaceIndex(/*spaceIdx=*/0, /*timeIdx=*/0);
221 weights[index] = bweights;
222 }
223 }
224 OPM_END_PARALLEL_TRY_CATCH("getTrueImpesWeights() failed: ", elemCtx.simulator().vanguard().grid().comm());
225 }
226
227 template <class Vector, class ElementContext, class Model, class ElementChunksType>
228 void getTrueImpesWeightsAnalytic(int /*pressureVarIndex*/,
229 Vector& weights,
230 const ElementContext& elemCtx,
231 const Model& model,
232 const ElementChunksType& element_chunks,
233 [[maybe_unused]] bool enable_thread_parallel)
234 {
235 // The sequential residual is a linear combination of the
236 // mass balance residuals, with coefficients equal to (for
237 // water, oil, gas):
238 // 1/bw,
239 // (1/bo - rs/bg)/(1-rs*rv)
240 // (1/bg - rv/bo)/(1-rs*rv)
241 // These coefficients must be applied for both the residual and
242 // Jacobian.
243 using FluidSystem = typename Model::FluidSystem;
244 using LhsEval = double;
245
246 using PrimaryVariables = typename Model::PrimaryVariables;
247 using VectorBlockType = typename Vector::block_type;
248 using Evaluation =
249 typename std::decay_t<decltype(model.localLinearizer(ThreadManager::threadId()).localResidual().residual(0))>::block_type;
250 using Toolbox = MathToolbox<Evaluation>;
251
252 const auto& solution = model.solution(/*timeIdx*/ 0);
253 VectorBlockType bweights;
254
255 // Use OpenMP to parallelize over element chunks (runtime controlled via if clause)
257#ifdef _OPENMP
258#pragma omp parallel for private(bweights) if(enable_thread_parallel)
259#endif
260 for (const auto& chunk : element_chunks) {
261
262 // Each thread gets a unique copy of elemCtx
263 ElementContext localElemCtx(elemCtx.simulator());
264
265 for (const auto& elem : chunk) {
266 localElemCtx.updatePrimaryStencil(elem);
267 localElemCtx.updatePrimaryIntensiveQuantities(/*timeIdx=*/0);
268
269 const auto index = localElemCtx.globalSpaceIndex(/*spaceIdx=*/0, /*timeIdx=*/0);
270 const auto& intQuants = localElemCtx.intensiveQuantities(/*spaceIdx=*/0, /*timeIdx=*/0);
271 const auto& fs = intQuants.fluidState();
272
273 if (FluidSystem::phaseIsActive(FluidSystem::waterPhaseIdx)) {
274 const unsigned activeCompIdx = FluidSystem::canonicalToActiveCompIdx(
275 FluidSystem::solventComponentIndex(FluidSystem::waterPhaseIdx));
276 bweights[activeCompIdx]
277 = Toolbox::template decay<LhsEval>(1 / fs.invB(FluidSystem::waterPhaseIdx));
278 }
279
280 double denominator = 1.0;
281 double rs = Toolbox::template decay<double>(fs.Rs());
282 double rv = Toolbox::template decay<double>(fs.Rv());
283 const auto& priVars = solution[index];
284 if (priVars.primaryVarsMeaningGas() == PrimaryVariables::GasMeaning::Rv) {
285 rs = 0.0;
286 }
287 if (priVars.primaryVarsMeaningGas() == PrimaryVariables::GasMeaning::Rs) {
288 rv = 0.0;
289 }
290 if (FluidSystem::phaseIsActive(FluidSystem::oilPhaseIdx)
291 && FluidSystem::phaseIsActive(FluidSystem::gasPhaseIdx)) {
292 denominator = Toolbox::template decay<LhsEval>(1 - rs * rv);
293 }
294
295 if (FluidSystem::phaseIsActive(FluidSystem::oilPhaseIdx)) {
296 const unsigned activeCompIdx = FluidSystem::canonicalToActiveCompIdx(
297 FluidSystem::solventComponentIndex(FluidSystem::oilPhaseIdx));
298 bweights[activeCompIdx] = Toolbox::template decay<LhsEval>(
299 (1 / fs.invB(FluidSystem::oilPhaseIdx) - rs / fs.invB(FluidSystem::gasPhaseIdx))
300 / denominator);
301 }
302 if (FluidSystem::phaseIsActive(FluidSystem::gasPhaseIdx)) {
303 const unsigned activeCompIdx = FluidSystem::canonicalToActiveCompIdx(
304 FluidSystem::solventComponentIndex(FluidSystem::gasPhaseIdx));
305 bweights[activeCompIdx] = Toolbox::template decay<LhsEval>(
306 (1 / fs.invB(FluidSystem::gasPhaseIdx) - rv / fs.invB(FluidSystem::oilPhaseIdx))
307 / denominator);
308 }
309
310 weights[index] = bweights;
311 }
312 }
313 OPM_END_PARALLEL_TRY_CATCH("getTrueImpesAnalyticWeights() failed: ", elemCtx.simulator().vanguard().grid().comm());
314 }
315} // namespace Amg
316
317} // namespace Opm
318
319#endif // OPM_GET_QUASI_IMPES_WEIGHTS_HEADER_INCLUDED
#define OPM_END_PARALLEL_TRY_CATCH(prefix, comm)
Catch exception and throw in a parallel try-catch clause.
Definition: DeferredLoggingErrorHelpers.hpp:192
#define OPM_BEGIN_PARALLEL_TRY_CATCH()
Macro to setup the try of a parallel try-catch.
Definition: DeferredLoggingErrorHelpers.hpp:158
static unsigned threadId()
Return the index of the current OpenMP thread.
The GpuSparseMatrixWrapper Checks CUDA/HIP version and dispatches a version either using the old or t...
Definition: GpuSparseMatrixWrapper.hpp:62
GpuVector< int > & getRowIndices()
getRowIndices returns the row indices used to represent the BSR structure.
Definition: GpuSparseMatrixWrapper.hpp:271
std::size_t N() const
N returns the number of rows (which is equal to the number of columns)
Definition: GpuSparseMatrixWrapper.hpp:232
GpuVector< int > & getColumnIndices()
getColumnIndices returns the column indices used to represent the BSR structure.
Definition: GpuSparseMatrixWrapper.hpp:291
void getTrueImpesWeights(int pressureVarIndex, Vector &weights, const ElementContext &elemCtx, const Model &model, const ElementChunksType &element_chunks, bool enable_thread_parallel)
Definition: getQuasiImpesWeights.hpp:161
void getQuasiImpesWeights(const Matrix &matrix, const int pressureVarIndex, const bool transpose, Vector &weights, bool enable_thread_parallel)
Definition: getQuasiImpesWeights.hpp:61
void getTrueImpesWeightsAnalytic(int, Vector &weights, const ElementContext &elemCtx, const Model &model, const ElementChunksType &element_chunks, bool enable_thread_parallel)
Definition: getQuasiImpesWeights.hpp:228
DenseMatrix transposeDenseMatrix(const DenseMatrix &M)
Definition: getQuasiImpesWeights.hpp:47
int to_int(std::size_t s)
to_int converts a (on most relevant platforms) 64 bits unsigned size_t to a signed 32 bits signed int
Definition: safe_conversion.hpp:52
Definition: blackoilbioeffectsmodules.hh:45