opm-simulators
ISTLSolverGPUISTL.hpp
1 /*
2  Copyright 2025 Equinor ASA
3 
4  This file is part of the Open Porous Media project (OPM).
5  OPM is free software: you can redistribute it and/or modify
6  it under the terms of the GNU General Public License as published by
7  the Free Software Foundation, either version 3 of the License, or
8  (at your option) any later version.
9  OPM is distributed in the hope that it will be useful,
10  but WITHOUT ANY WARRANTY; without even the implied warranty of
11  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12  GNU General Public License for more details.
13  You should have received a copy of the GNU General Public License
14  along with OPM. If not, see <http://www.gnu.org/licenses/>.
15 */
16 
17 #ifndef OPM_ISTLSOLVERGPUISTL_HEADER_INCLUDED
18 #define OPM_ISTLSOLVERGPUISTL_HEADER_INCLUDED
19 
20 #include <dune/istl/operators.hh>
21 #include <memory>
22 #include <optional>
23 #include <opm/grid/utility/ElementChunks.hpp>
24 #include <opm/simulators/linalg/AbstractISTLSolver.hpp>
25 #include <opm/simulators/linalg/getQuasiImpesWeights.hpp>
26 #include <opm/simulators/linalg/ISTLSolver.hpp>
27 
28 #if USE_HIP
29 #include <opm/simulators/linalg/gpuistl_hip/GpuSparseMatrixWrapper.hpp>
30 #include <opm/simulators/linalg/gpuistl_hip/GpuVector.hpp>
31 #include <opm/simulators/linalg/gpuistl_hip/PinnedMemoryHolder.hpp>
32 #else
33 #include <opm/simulators/linalg/gpuistl/GpuSparseMatrixWrapper.hpp>
34 #include <opm/simulators/linalg/gpuistl/GpuVector.hpp>
35 #include <opm/simulators/linalg/gpuistl/PinnedMemoryHolder.hpp>
36 #endif
37 
38 #include <opm/simulators/linalg/ExtractParallelGridInformationToISTL.hpp>
39 #include <opm/simulators/linalg/ParallelIstlInformation.hpp>
40 #include <opm/simulators/linalg/findOverlapRowsAndColumns.hpp>
41 #include <opm/simulators/linalg/gpuistl/detail/FlexibleSolverWrapper.hpp>
42 #include <opm/simulators/linalg/printlinearsolverparameter.hpp>
43 
44 namespace Opm::gpuistl
45 {
46 
59 template <class TypeTag>
60 class ISTLSolverGPUISTL : public AbstractISTLSolver<GetPropType<TypeTag, Properties::SparseMatrixAdapter>,
61  GetPropType<TypeTag, Properties::GlobalEqVector>>
62 {
63 public:
68  using Matrix = typename SparseMatrixAdapter::IstlMatrix;
72  using ElementChunksType = Opm::ElementChunks<GridView, Dune::Partitions::All>;
73 
74  using real_type = typename Vector::field_type;
75 
79 
80  constexpr static std::size_t pressureIndex = GetPropType<TypeTag, Properties::Indices>::pressureSwitchIdx;
81 
82 
83 #if HAVE_MPI
84  using CommunicationType = Dune::OwnerOverlapCopyCommunication<int, int>;
85 #else
86  using CommunicationType = Dune::Communication<int>;
87 #endif
88 
90 
98  ISTLSolverGPUISTL(const Simulator& simulator,
99  const FlowLinearSolverParameters& parameters,
100  bool forceSerial = false)
101  : m_parameters(parameters)
102  , m_simulator(simulator)
103  , m_forceSerial(forceSerial)
104  , m_element_chunks(simulator.vanguard().gridView(), Dune::Partitions::all, ThreadManager::maxThreads())
105  {
106 #if HAVE_MPI
107  m_comm = std::make_shared<CommunicationType>(simulator.vanguard().grid().comm());
108  // Extract parallel grid information to populate index sets
109  extractParallelGridInformationToISTL(simulator.vanguard().grid(), m_parallelInformation);
110  // Set up element mapper manually
111  ElementMapper elemMapper(simulator.vanguard().gridView(), Dune::mcmgElementLayout());
112  // Overlap rows are needed for making overlap rows invalid in parallel mode
113  Opm::detail::findOverlapAndInterior(simulator.vanguard().grid(), elemMapper, m_overlapRows, m_interiorRows);
114  if (isParallel()) {
115  const std::size_t size = simulator.vanguard().grid().leafGridView().size(0);
116  // Copy parallel information to communication object (index sets and remote indices)
117  Opm::detail::copyParValues(m_parallelInformation, size, *m_comm);
118  }
119 
120 #else
121  m_comm = std::make_shared<CommunicationType>(simulator.gridView().comm());
122 #endif
123  m_parameters.init(simulator.vanguard().eclState().getSimulationConfig().useCPR());
124  m_propertyTree = setupPropertyTree(m_parameters,
125  Parameters::IsSet<Parameters::LinearSolverMaxIter>(),
126  Parameters::IsSet<Parameters::LinearSolverReduction>());
127  if (!Parameters::Get<Parameters::MatrixAddWellContributions>()) {
128  OPM_THROW(std::logic_error, "Well operators are currently not supported for the GPU backend. "
129  "Use --matrix-add-well-contributions=true to add well contributions to the matrix instead.");
130  }
131 
132  Opm::detail::printLinearSolverParameters(m_parameters, m_propertyTree, simulator.gridView().comm());
133  }
134 
137  explicit ISTLSolverGPUISTL(const Simulator& simulator)
138  : ISTLSolverGPUISTL(simulator, FlowLinearSolverParameters(), false)
139  {
140  }
141 
147  void eraseMatrix() override
148  {
149  // Nothing, this is the same as the ISTLSolver
150  }
151 
158  void setActiveSolver(int num) override
159  {
160  if (num != 0) {
161  OPM_THROW(std::logic_error, "Only one solver available for the GPU backend.");
162  }
163  }
164 
170  int numAvailableSolvers() const override
171  {
172  return 1;
173  }
174 
184  void prepare(const SparseMatrixAdapter& M, Vector& b) override
185  {
186  prepare(M.istlMatrix(), b);
187  }
188 
198  void prepare(const Matrix& M, Vector& b) override
199  {
200  try {
201  if (isParallel() && !m_overlapRows.empty()) {
202  Opm::detail::makeOverlapRowsInvalid(const_cast<Matrix&>(M), m_overlapRows);
203  }
204  updateMatrix(M);
205  updateRhs(b);
206  }
207  OPM_CATCH_AND_RETHROW_AS_CRITICAL_ERROR("This is likely due to a faulty linear solver JSON specification. "
208  "Check for errors related to missing nodes.");
209  }
210 
216  void setResidual(Vector&) override
217  {
218  // Should be handled in prepare() instead.
219  }
220 
229  void getResidual(Vector& b) const override
230  {
231  if (!m_rhs) {
232  OPM_THROW(std::runtime_error, "m_rhs not initialized, prepare(matrix, rhs); needs to be called");
233  }
234  m_rhs->copyToHost(b);
235  }
236 
242  void setMatrix(const SparseMatrixAdapter&) override
243  {
244  // Should be handled in prepare() instead.
245  }
246 
258  bool solve(Vector& x) override
259  {
260  // TODO: Write matrix to disk if needed
261  Dune::InverseOperatorResult result;
262  if (!m_matrix) {
263  OPM_THROW(std::runtime_error, "m_matrix not initialized, prepare(matrix, rhs); needs to be called");
264  }
265  if (!m_rhs) {
266  OPM_THROW(std::runtime_error, "m_rhs not initialized, prepare(matrix, rhs); needs to be called");
267  }
268  if (!m_gpuSolver) {
269  OPM_THROW(std::runtime_error,
270  "m_gpuFlexibleSolver not initialized, prepare(matrix, rhs); needs to be called");
271  }
272 
273  if (!m_x) {
274  m_x = std::make_unique<GPUVector>(x);
275  m_pinnedXMemory = std::make_unique<PinnedMemoryHolder<real_type>>(
276  const_cast<real_type*>(&x[0][0]),
277  x.dim()
278  );
279  } else {
280  // copy from host to device using main stream and asynchronous transfer
281  m_x->copyFromHostAsync(x);
282  }
283  m_gpuSolver->apply(*m_x, *m_rhs, result);
284 
285  m_x->copyToHost(x);
286 
287  ++m_solveCount;
288 
289  m_lastSeenIterations = result.iterations;
290  return checkConvergence(result);
291  }
292 
298  int iterations() const override
299  {
300  return m_lastSeenIterations;
301  }
302 
306  const CommunicationType* comm() const override
307  {
308  return m_comm.get();
309  }
310 
316  bool isParallel() const
317  {
318  #if HAVE_MPI
319  return !m_forceSerial && m_comm->communicator().size() > 1;
320  #else
321  return false;
322  #endif
323  }
324 
328  int getSolveCount() const override
329  {
330  return m_solveCount;
331  }
332 
333 private:
334  bool checkConvergence(const Dune::InverseOperatorResult& result) const
335  {
337  }
338 
339  // Weights to make approximate pressure equations.
340  std::function<GPUVector&()> getWeightsCalculator()
341  {
342  std::function<GPUVector&()> weightsCalculator;
343 
344  using namespace std::string_literals;
345 
346  auto preconditionerType = m_propertyTree.get("preconditioner.type"s, "cpr"s);
347  // Make the preconditioner type lowercase for internal canonical representation
348  std::transform(preconditionerType.begin(), preconditionerType.end(), preconditionerType.begin(), ::tolower);
349  if (preconditionerType == "cpr" || preconditionerType == "cprt"
350  || preconditionerType == "cprw" || preconditionerType == "cprwt") {
351  const bool transpose = preconditionerType == "cprt" || preconditionerType == "cprwt";
352  const auto weightsType = m_propertyTree.get("preconditioner.weight_type"s, "quasiimpes"s);
353  if (weightsType == "quasiimpes") {
354  m_weights.emplace(m_matrix->N() * m_matrix->blockSize());
355  // Pre-compute diagonal indices once when setting up the calculator
356  auto diagonalIndices = Amg::precomputeDiagonalIndices(*m_matrix);
357  m_diagonalIndices.emplace(diagonalIndices);
358 
359  if (transpose) {
360  weightsCalculator = [this]() -> GPUVector& {
361  Amg::getQuasiImpesWeights<real_type, true>(
362  *m_matrix, pressureIndex, *m_weights, *m_diagonalIndices);
363  return *m_weights;
364  };
365  } else {
366  weightsCalculator = [this]() -> GPUVector& {
367  Amg::getQuasiImpesWeights<real_type, false>(
368  *m_matrix, pressureIndex, *m_weights, *m_diagonalIndices);
369  return *m_weights;
370  };
371  }
372  } else if (weightsType == "trueimpes") {
373  // Create CPU vector for the weights and initialize GPU vector
374  m_cpuWeights.resize(m_matrix->N());
375  m_pinnedWeightsMemory = std::make_unique<PinnedMemoryHolder<real_type>>(
376  const_cast<real_type*>(&m_cpuWeights[0][0]), m_cpuWeights.dim());
377  m_weights.emplace(m_cpuWeights);
378 
379  const bool enableThreadParallel = m_parameters.cpr_weights_thread_parallel_;
380  // CPU implementation wrapped for GPU
381  weightsCalculator = [this, enableThreadParallel]() -> GPUVector& {
382  // Use the CPU implementation to calculate the weights
383  ElementContext elemCtx(m_simulator);
384  Amg::getTrueImpesWeights(pressureIndex,
385  m_cpuWeights,
386  elemCtx, m_simulator.model(),
387  m_element_chunks,
388  enableThreadParallel);
389 
390  // Copy CPU vector to GPU vector using main stream and asynchronous transfer
391  m_weights->copyFromHostAsync(m_cpuWeights);
392  return *m_weights;
393  };
394  } else if (weightsType == "trueimpesanalytic") {
395  // Create CPU vector for the weights and initialize GPU vector
396  m_cpuWeights.resize(m_matrix->N());
397  m_pinnedWeightsMemory = std::make_unique<PinnedMemoryHolder<real_type>>(
398  const_cast<real_type*>(&m_cpuWeights[0][0]), m_cpuWeights.dim());
399  m_weights.emplace(m_cpuWeights);
400  const bool enableThreadParallel = m_parameters.cpr_weights_thread_parallel_;
401  // CPU implementation wrapped for GPU
402  weightsCalculator = [this, enableThreadParallel]() -> GPUVector& {
403  // Use the CPU implementation to calculate the weights
404  ElementContext elemCtx(m_simulator);
405  Amg::getTrueImpesWeightsAnalytic(pressureIndex,
406  m_cpuWeights,
407  elemCtx, m_simulator.model(),
408  m_element_chunks,
409  enableThreadParallel);
410 
411  // Copy CPU vector to GPU vector using main stream and asynchronous transfer
412  m_weights->copyFromHostAsync(m_cpuWeights);
413  return *m_weights;
414  };
415  } else {
416  OPM_THROW(std::invalid_argument,
417  "Weights type " + weightsType
418  + " not implemented for cpr."
419  " Please use quasiimpes, trueimpes or trueimpesanalytic.");
420  }
421  }
422  return weightsCalculator;
423  }
424 
425  void updateMatrix(const Matrix& M)
426  {
427  if (!m_matrix) {
428  m_matrix.reset(new auto(GPUMatrix::fromMatrix(M)));
429  m_pinnedMatrixMemory = std::make_unique<PinnedMemoryHolder<real_type>>(
430  const_cast<real_type*>(&M[0][0][0][0]),
431  M.nonzeroes() * M[0][0].N() * M[0][0].M()
432  );
433  std::function<GPUVector&()> weightsCalculator = getWeightsCalculator();
434  m_gpuSolver = std::make_unique<SolverType>(
435  *m_matrix, isParallel(), m_propertyTree, pressureIndex, weightsCalculator, m_forceSerial, m_comm.get());
436  } else {
437  m_matrix->updateNonzeroValues(M, true);
438  m_gpuSolver->update();
439  }
440  }
441 
442  void updateRhs(const Vector& b)
443  {
444  if (!m_rhs) {
445  m_rhs = std::make_unique<GPUVector>(b);
446  m_pinnedRhsMemory = std::make_unique<PinnedMemoryHolder<real_type>>(
447  const_cast<real_type*>(&b[0][0]),
448  b.dim()
449  );
450  } else {
451  // copy from host to device using main stream and asynchronous transfer
452  m_rhs->copyFromHostAsync(b);
453  }
454  }
455 
456  FlowLinearSolverParameters m_parameters;
457  const Simulator& m_simulator;
458  const bool m_forceSerial;
459  PropertyTree m_propertyTree;
460  ElementChunksType m_element_chunks;
461 
462  int m_lastSeenIterations = 0;
463  int m_solveCount = 0;
464 
465  std::unique_ptr<GPUMatrix> m_matrix;
466 
467  std::unique_ptr<SolverType> m_gpuSolver;
468 
469  std::unique_ptr<GPUVector> m_rhs;
470  std::unique_ptr<GPUVector> m_x;
471 
472  std::unique_ptr<PinnedMemoryHolder<real_type>> m_pinnedMatrixMemory;
473  std::unique_ptr<PinnedMemoryHolder<real_type>> m_pinnedRhsMemory;
474  std::unique_ptr<PinnedMemoryHolder<real_type>> m_pinnedXMemory;
475  std::unique_ptr<PinnedMemoryHolder<real_type>> m_pinnedWeightsMemory;
476 
477  Vector m_cpuWeights;
478  std::optional<GPUVector> m_weights;
479  std::optional<GPUVectorInt> m_diagonalIndices;
480 
481  std::shared_ptr<CommunicationType> m_comm;
482  std::vector<int> m_interiorRows;
483  std::vector<int> m_overlapRows;
484  std::any m_parallelInformation;
485 
486 };
487 } // namespace Opm::gpuistl
488 
489 #endif
void eraseMatrix() override
Signals that the memory for the matrix internally in the solver could be erased.
Definition: ISTLSolverGPUISTL.hpp:147
static bool checkConvergence(const Dune::InverseOperatorResult &result, const FlowLinearSolverParameters &parameters)
Check the convergence of the linear solver.
Definition: AbstractISTLSolver.hpp:192
void setMatrix(const SparseMatrixAdapter &) override
Set the matrix for the solver.
Definition: ISTLSolverGPUISTL.hpp:242
typename Properties::Detail::GetPropImpl< TypeTag, Property >::type::type GetPropType
get the type alias defined in the property (equivalent to old macro GET_PROP_TYPE(...))
Definition: propertysystem.hh:233
bool isParallel() const
Check if we are running in parallel mode.
Definition: ISTLSolverGPUISTL.hpp:316
static GpuSparseMatrixWrapper< real_type, false > fromMatrix(const MatrixType &matrix, bool copyNonZeroElementsDirectly=false)
fromMatrix creates a new matrix with the same block size and values as the given matrix ...
Definition: GpuSparseMatrixWrapper.hpp:183
int iterations() const override
Definition: ISTLSolverGPUISTL.hpp:298
Definition: fvbaseprimaryvariables.hh:161
Definition: gpu_type_detection.hpp:30
void setActiveSolver(int num) override
Set the active solver by its index.
Definition: ISTLSolverGPUISTL.hpp:158
ISTL solver for GPU using the GPU ISTL backend.
Definition: ISTLSolverGPUISTL.hpp:60
int getSolveCount() const override
Get the count of how many times the solver has been called.
Definition: ISTLSolverGPUISTL.hpp:328
int numAvailableSolvers() const override
Get the number of available solvers.
Definition: ISTLSolverGPUISTL.hpp:170
FlexibleSolverWrapper is compilational trick to reduce compile time overhead.
Definition: FlexibleSolverWrapper.hpp:44
void setResidual(Vector &) override
Set the residual vector.
Definition: ISTLSolverGPUISTL.hpp:216
ISTLSolverGPUISTL(const Simulator &simulator)
Construct a system solver.
Definition: ISTLSolverGPUISTL.hpp:137
void getResidual(Vector &b) const override
Get the residual vector.
Definition: ISTLSolverGPUISTL.hpp:229
void extractParallelGridInformationToISTL(const Dune::CpGrid &, std::any &)
Extracts the information about the data decomposition from the grid for dune-istl.
Definition: ExtractParallelGridInformationToISTL.cpp:46
void prepare(const SparseMatrixAdapter &M, Vector &b) override
Prepare the solver with the given matrix and right-hand side vector.
Definition: ISTLSolverGPUISTL.hpp:184
A small, fixed‑dimension MiniVector class backed by std::array that can be used in both host and CUD...
Definition: AmgxInterface.hpp:37
This class carries all parameters for the NewtonIterationBlackoilInterleaved class.
Definition: FlowLinearSolverParameters.hpp:97
ISTLSolverGPUISTL(const Simulator &simulator, const FlowLinearSolverParameters &parameters, bool forceSerial=false)
Construct a system solver.
Definition: ISTLSolverGPUISTL.hpp:98
const CommunicationType * comm() const override
Get the communication object used by the solver.
Definition: ISTLSolverGPUISTL.hpp:306
void prepare(const Matrix &M, Vector &b) override
Prepare the solver with the given matrix and right-hand side vector.
Definition: ISTLSolverGPUISTL.hpp:198
Abstract interface for ISTL solvers.
Definition: AbstractISTLSolver.hpp:44
PropertyTree setupPropertyTree(FlowLinearSolverParameters p, bool linearSolverMaxIterSet, bool linearSolverReductionSet, bool tpsaSetup)
Set up a property tree intended for FlexibleSolver by either reading the tree from a JSON file or cre...
Definition: setupPropertyTree.cpp:182
bool solve(Vector &x) override
Solve the system of linear equations Ax = b.
Definition: ISTLSolverGPUISTL.hpp:258