opm-simulators
SolverAdapter.hpp
1 /*
2  Copyright 2022-2023 SINTEF AS
3 
4  This file is part of the Open Porous Media project (OPM).
5 
6  OPM is free software: you can redistribute it and/or modify
7  it under the terms of the GNU General Public License as published by
8  the Free Software Foundation, either version 3 of the License, or
9  (at your option) any later version.
10 
11  OPM is distributed in the hope that it will be useful,
12  but WITHOUT ANY WARRANTY; without even the implied warranty of
13  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  GNU General Public License for more details.
15 
16  You should have received a copy of the GNU General Public License
17  along with OPM. If not, see <http://www.gnu.org/licenses/>.
18 */
19 #ifndef OPM_SOLVERADAPTER_HPP
20 #define OPM_SOLVERADAPTER_HPP
21 
22 #include <dune/istl/bcrsmatrix.hh>
23 #include <dune/istl/operators.hh>
24 #include <dune/istl/paamg/pinfo.hh>
25 #include <dune/istl/schwarz.hh>
26 #include <dune/istl/solver.hh>
27 
28 #include <opm/common/ErrorMacros.hpp>
29 
30 #include <opm/simulators/linalg/gpuistl/GpuBlockPreconditioner.hpp>
31 #include <opm/simulators/linalg/gpuistl/GpuSparseMatrixWrapper.hpp>
32 #include <opm/simulators/linalg/gpuistl/GpuVector.hpp>
33 #include <opm/simulators/linalg/gpuistl/PreconditionerAdapter.hpp>
34 #include <opm/simulators/linalg/gpuistl/detail/has_function.hpp>
35 
36 #if HAVE_MPI
37 #include <opm/simulators/linalg/gpuistl/GpuOwnerOverlapCopy.hpp>
38 #endif
39 
40 
41 #include <memory>
42 
43 namespace Opm::gpuistl
44 {
50 template <class Operator, template <class> class UnderlyingSolver, class X>
51 class SolverAdapter : public Dune::IterativeSolver<X, X>
52 {
53 public:
54  using typename Dune::IterativeSolver<X, X>::domain_type;
55  using typename Dune::IterativeSolver<X, X>::range_type;
56  using typename Dune::IterativeSolver<X, X>::field_type;
57  using typename Dune::IterativeSolver<X, X>::real_type;
58  using typename Dune::IterativeSolver<X, X>::scalar_real_type;
59  static constexpr auto block_size = domain_type::block_type::dimension;
61 
62 
74  template<class Comm>
75  SolverAdapter(Operator& op,
76  Dune::ScalarProduct<X>& sp,
77  std::shared_ptr<Dune::Preconditioner<X, X>> prec,
78  scalar_real_type reduction,
79  int maxit,
80  int verbose,
81  const Comm& comm)
82  : Dune::IterativeSolver<X, X>(op, sp, *prec, reduction, maxit, verbose)
83  , m_opOnCPUWithMatrix(op)
84  , m_matrix(GpuSparseMatrixWrapper<real_type>::fromMatrix(op.getmat()))
85  , m_underlyingSolver(constructSolver(prec, reduction, maxit, verbose, comm))
86  {
87  OPM_ERROR_IF(
89  "Currently we only support operators of type MatrixAdapter in the CUDA/HIP solver. "
90  "Use --matrix-add-well-contributions=true. "
91  "Using WellModelMatrixAdapter with SolverAdapter is not well-defined so it will not work well, or at all.");
92  }
93 
94  virtual void apply(X& x, X& b, double reduction, Dune::InverseOperatorResult& res) override
95  {
96  // TODO: Can we do this without reimplementing the other function?
97  // TODO: [perf] Do we need to update the matrix every time? Probably yes
98  m_matrix.updateNonzeroValues(m_opOnCPUWithMatrix.getmat(), true);
99 
100  if (!m_inputBuffer) {
101  m_inputBuffer.reset(new XGPU(b.dim()));
102  m_outputBuffer.reset(new XGPU(x.dim()));
103  }
104 
105  m_inputBuffer->copyFromHost(b);
106  // TODO: [perf] do we need to copy x here?
107  m_outputBuffer->copyFromHost(x);
108 
109  m_underlyingSolver.apply(*m_outputBuffer, *m_inputBuffer, reduction, res);
110 
111  // TODO: [perf] do we need to copy b here?
112  m_inputBuffer->copyToHost(b);
113  m_outputBuffer->copyToHost(x);
114  }
115  virtual void apply(X& x, X& b, Dune::InverseOperatorResult& res) override
116  {
117  // TODO: [perf] Do we need to update the matrix every time? Probably yes
118  m_matrix.updateNonzeroValues(m_opOnCPUWithMatrix.getmat(), true);
119 
120  if (!m_inputBuffer) {
121  m_inputBuffer.reset(new XGPU(b.dim()));
122  m_outputBuffer.reset(new XGPU(x.dim()));
123  }
124 
125  m_inputBuffer->copyFromHost(b);
126  // TODO: [perf] do we need to copy x here?
127  m_outputBuffer->copyFromHost(x);
128 
129  m_underlyingSolver.apply(*m_outputBuffer, *m_inputBuffer, res);
130 
131  // TODO: [perf] do we need to copy b here?
132  m_inputBuffer->copyToHost(b);
133  m_outputBuffer->copyToHost(x);
134  }
135 
136 private:
137  Operator& m_opOnCPUWithMatrix;
138  GpuSparseMatrixWrapper<real_type> m_matrix;
139 
140  UnderlyingSolver<XGPU> m_underlyingSolver;
141 
142 
143  // TODO: Use a std::forward
144  // This is the MPI parallel case (general communication object)
145 #if HAVE_MPI
146  template <class Comm>
147  UnderlyingSolver<XGPU> constructSolver(std::shared_ptr<Dune::Preconditioner<X, X>> prec,
148  scalar_real_type reduction,
149  int maxit,
150  int verbose,
151  const Comm& communication)
152  {
153  // TODO: See the below TODO over the definition of precHolder in the other overload of constructSolver
154  // TODO: We are currently double wrapping preconditioners in the preconditioner factory to be extra
155  // compatible with CPU. Probably a cleaner way eventually would be to do more modifications to the
156  // flexible solver to accomodate the pure GPU better.
157  auto precAsHolder = std::dynamic_pointer_cast<PreconditionerHolder<X, X>>(prec);
158  if (!precAsHolder) {
159  OPM_THROW(std::invalid_argument,
160  "The preconditioner needs to be a CUDA preconditioner (eg. GPUDILU) wrapped in a "
161  "Opm::gpuistl::PreconditionerAdapter wrapped in a "
162  "Opm::gpuistl::GpuBlockPreconditioner. If you are unsure what this means, set "
163  "preconditioner to 'gpudilu'");
164  }
165 
166  auto preconditionerAdapter = precAsHolder->getUnderlyingPreconditioner();
167  auto preconditionerAdapterAsHolder
168  = std::dynamic_pointer_cast<PreconditionerHolder<XGPU, XGPU>>(preconditionerAdapter);
169  if (!preconditionerAdapterAsHolder) {
170  OPM_THROW(std::invalid_argument,
171  "The preconditioner needs to be a CUDA preconditioner (eg. GPUDILU) wrapped in a "
172  "Opm::gpuistl::PreconditionerAdapter wrapped in a "
173  "Opm::gpuistl::GpuBlockPreconditioner. If you are unsure what this means, set "
174  "preconditioner to 'gpudilu'");
175  }
176  // We need to get the underlying preconditioner:
177  auto preconditionerReallyOnGPU = preconditionerAdapterAsHolder->getUnderlyingPreconditioner();
178 
179 
180  using CudaCommunication = GpuOwnerOverlapCopy<real_type, Comm>;
181  using SchwarzOperator
182  = Dune::OverlappingSchwarzOperator<GpuSparseMatrixWrapper<real_type>, XGPU, XGPU, CudaCommunication>;
183  auto cudaCommunication = makeGpuOwnerOverlapCopy<real_type, block_size, Comm>(communication);
184 
185  auto mpiPreconditioner = std::make_shared<GpuBlockPreconditioner<XGPU, XGPU, CudaCommunication>>(
186  preconditionerReallyOnGPU, cudaCommunication);
187 
188  auto scalarProduct = std::make_shared<Dune::ParallelScalarProduct<XGPU, CudaCommunication>>(
189  cudaCommunication, m_opOnCPUWithMatrix.category());
190 
191 
192  // NOTE: Ownership of cudaCommunication is handled by mpiPreconditioner. However, just to make sure we
193  // remember this, we add this check. So remember that we hold one count in this scope, and one in the
194  // GpuBlockPreconditioner instance. We accommodate for the fact that it could be passed around in
195  // GpuBlockPreconditioner, hence we do not test for != 2
196  OPM_ERROR_IF(cudaCommunication.use_count() < 2, "Internal error. Shared pointer not owned properly.");
197  auto overlappingCudaOperator = std::make_shared<SchwarzOperator>(m_matrix, *cudaCommunication);
198 
199  return UnderlyingSolver<XGPU>(
200  overlappingCudaOperator, scalarProduct, mpiPreconditioner, reduction, maxit, verbose);
201  }
202 #endif
203 
204  // This is the serial case (specific overload for Dune::Amg::SequentialInformation)
205  UnderlyingSolver<XGPU> constructSolver(std::shared_ptr<Dune::Preconditioner<X, X>> prec,
206  scalar_real_type reduction,
207  int maxit,
208  int verbose,
209  [[maybe_unused]] const Dune::Amg::SequentialInformation& communication)
210  {
211  // Dune::Amg::SequentialInformation is the serial case
212  return constructSolver(prec, reduction, maxit, verbose);
213  }
214 
215  // TODO: Use a std::forward
216  UnderlyingSolver<XGPU> constructSolver(std::shared_ptr<Dune::Preconditioner<X, X>> prec,
217  scalar_real_type reduction,
218  int maxit,
219  int verbose)
220  {
221  // TODO: Fix the reliance on casting here. This is a code smell to a certain degree, and assumes
222  // a certain setup beforehand. The only reason we do it this way is to minimize edits to the
223  // flexible solver. We could design it differently, but keep this for the time being until
224  // we figure out how we want to GPU-ify the rest of the system.
225  auto precAsHolder = std::dynamic_pointer_cast<PreconditionerHolder<XGPU, XGPU>>(prec);
226  if (!precAsHolder) {
227  OPM_THROW(std::invalid_argument,
228  "The preconditioner needs to be a CUDA preconditioner wrapped in a "
229  "Opm::gpuistl::PreconditionerHolder (eg. GPUDILU).");
230  }
231  auto preconditionerOnGPU = precAsHolder->getUnderlyingPreconditioner();
232 
233  auto matrixOperator = std::make_shared<Dune::MatrixAdapter<GpuSparseMatrixWrapper<real_type>, XGPU, XGPU>>(m_matrix);
234  auto scalarProduct = std::make_shared<Dune::SeqScalarProduct<XGPU>>();
235  return UnderlyingSolver<XGPU>(matrixOperator, scalarProduct, preconditionerOnGPU, reduction, maxit, verbose);
236  }
237 
238  std::unique_ptr<XGPU> m_inputBuffer;
239  std::unique_ptr<XGPU> m_outputBuffer;
240 };
241 
242 } // namespace Opm::gpuistl
243 
244 #endif
The GpuSparseMatrixWrapper Checks CUDA/HIP version and dispatches a version either using the old or t...
Definition: gpu_type_detection.hpp:32
SolverAdapter(Operator &op, Dune::ScalarProduct< X > &sp, std::shared_ptr< Dune::Preconditioner< X, X >> prec, scalar_real_type reduction, int maxit, int verbose, const Comm &comm)
constructor
Definition: SolverAdapter.hpp:75
Wraps a CUDA solver to work with CPU data.
Definition: SolverAdapter.hpp:51
Definition: fvbaseprimaryvariables.hh:161
Definition: gpu_type_detection.hpp:30
The is_a_well_operator class tries to guess if the operator is a well type operator.
Definition: has_function.hpp:113
A small, fixed‑dimension MiniVector class backed by std::array that can be used in both host and CUD...
Definition: AmgxInterface.hpp:37
void updateNonzeroValues(const MatrixType &matrix, bool copyNonZeroElementsDirectly=false)
updateNonzeroValues updates the non-zero values by using the non-zero values of the supplied matrix ...
Definition: GpuSparseMatrixWrapper.hpp:378