SolverAdapter.hpp
Go to the documentation of this file.
1/*
2 Copyright 2022-2023 SINTEF AS
3
4 This file is part of the Open Porous Media project (OPM).
5
6 OPM is free software: you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation, either version 3 of the License, or
9 (at your option) any later version.
10
11 OPM is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with OPM. If not, see <http://www.gnu.org/licenses/>.
18*/
19#ifndef OPM_SOLVERADAPTER_HPP
20#define OPM_SOLVERADAPTER_HPP
21
22#include <dune/istl/bcrsmatrix.hh>
23#include <dune/istl/operators.hh>
24#include <dune/istl/paamg/pinfo.hh>
25#include <dune/istl/schwarz.hh>
26#include <dune/istl/solver.hh>
27
28#include <opm/common/ErrorMacros.hpp>
29
35
36#if HAVE_MPI
38#endif
39
40
41#include <memory>
42
43namespace Opm::gpuistl
44{
50template <class Operator, template <class> class UnderlyingSolver, class X>
51class SolverAdapter : public Dune::IterativeSolver<X, X>
52{
53public:
54 using typename Dune::IterativeSolver<X, X>::domain_type;
55 using typename Dune::IterativeSolver<X, X>::range_type;
56 using typename Dune::IterativeSolver<X, X>::field_type;
57 using typename Dune::IterativeSolver<X, X>::real_type;
58 using typename Dune::IterativeSolver<X, X>::scalar_real_type;
59 static constexpr auto block_size = domain_type::block_type::dimension;
61
62
74 template<class Comm>
75 SolverAdapter(Operator& op,
76 Dune::ScalarProduct<X>& sp,
77 std::shared_ptr<Dune::Preconditioner<X, X>> prec,
78 scalar_real_type reduction,
79 int maxit,
80 int verbose,
81 const Comm& comm)
82 : Dune::IterativeSolver<X, X>(op, sp, *prec, reduction, maxit, verbose)
83 , m_opOnCPUWithMatrix(op)
84 , m_matrix(GpuSparseMatrix<real_type>::fromMatrix(op.getmat()))
85 , m_underlyingSolver(constructSolver(prec, reduction, maxit, verbose, comm))
86 {
87 OPM_ERROR_IF(
89 "Currently we only support operators of type MatrixAdapter in the CUDA/HIP solver. "
90 "Use --matrix-add-well-contributions=true. "
91 "Using WellModelMatrixAdapter with SolverAdapter is not well-defined so it will not work well, or at all.");
92 }
93
94 virtual void apply(X& x, X& b, double reduction, Dune::InverseOperatorResult& res) override
95 {
96 // TODO: Can we do this without reimplementing the other function?
97 // TODO: [perf] Do we need to update the matrix every time? Probably yes
98 m_matrix.updateNonzeroValues(m_opOnCPUWithMatrix.getmat(), true);
99
100 if (!m_inputBuffer) {
101 m_inputBuffer.reset(new XGPU(b.dim()));
102 m_outputBuffer.reset(new XGPU(x.dim()));
103 }
104
105 m_inputBuffer->copyFromHost(b);
106 // TODO: [perf] do we need to copy x here?
107 m_outputBuffer->copyFromHost(x);
108
109 m_underlyingSolver.apply(*m_outputBuffer, *m_inputBuffer, reduction, res);
110
111 // TODO: [perf] do we need to copy b here?
112 m_inputBuffer->copyToHost(b);
113 m_outputBuffer->copyToHost(x);
114 }
115 virtual void apply(X& x, X& b, Dune::InverseOperatorResult& res) override
116 {
117 // TODO: [perf] Do we need to update the matrix every time? Probably yes
118 m_matrix.updateNonzeroValues(m_opOnCPUWithMatrix.getmat(), true);
119
120 if (!m_inputBuffer) {
121 m_inputBuffer.reset(new XGPU(b.dim()));
122 m_outputBuffer.reset(new XGPU(x.dim()));
123 }
124
125 m_inputBuffer->copyFromHost(b);
126 // TODO: [perf] do we need to copy x here?
127 m_outputBuffer->copyFromHost(x);
128
129 m_underlyingSolver.apply(*m_outputBuffer, *m_inputBuffer, res);
130
131 // TODO: [perf] do we need to copy b here?
132 m_inputBuffer->copyToHost(b);
133 m_outputBuffer->copyToHost(x);
134 }
135
136private:
137 Operator& m_opOnCPUWithMatrix;
139
140 UnderlyingSolver<XGPU> m_underlyingSolver;
141
142
143 // TODO: Use a std::forward
144 // This is the MPI parallel case (general communication object)
145#if HAVE_MPI
146 template <class Comm>
147 UnderlyingSolver<XGPU> constructSolver(std::shared_ptr<Dune::Preconditioner<X, X>> prec,
148 scalar_real_type reduction,
149 int maxit,
150 int verbose,
151 const Comm& communication)
152 {
153 // TODO: See the below TODO over the definition of precHolder in the other overload of constructSolver
154 // TODO: We are currently double wrapping preconditioners in the preconditioner factory to be extra
155 // compatible with CPU. Probably a cleaner way eventually would be to do more modifications to the
156 // flexible solver to accomodate the pure GPU better.
157 auto precAsHolder = std::dynamic_pointer_cast<PreconditionerHolder<X, X>>(prec);
158 if (!precAsHolder) {
159 OPM_THROW(std::invalid_argument,
160 "The preconditioner needs to be a CUDA preconditioner (eg. GPUDILU) wrapped in a "
161 "Opm::gpuistl::PreconditionerAdapter wrapped in a "
162 "Opm::gpuistl::GpuBlockPreconditioner. If you are unsure what this means, set "
163 "preconditioner to 'gpudilu'");
164 }
165
166 auto preconditionerAdapter = precAsHolder->getUnderlyingPreconditioner();
167 auto preconditionerAdapterAsHolder
168 = std::dynamic_pointer_cast<PreconditionerHolder<XGPU, XGPU>>(preconditionerAdapter);
169 if (!preconditionerAdapterAsHolder) {
170 OPM_THROW(std::invalid_argument,
171 "The preconditioner needs to be a CUDA preconditioner (eg. GPUDILU) wrapped in a "
172 "Opm::gpuistl::PreconditionerAdapter wrapped in a "
173 "Opm::gpuistl::GpuBlockPreconditioner. If you are unsure what this means, set "
174 "preconditioner to 'gpudilu'");
175 }
176 // We need to get the underlying preconditioner:
177 auto preconditionerReallyOnGPU = preconditionerAdapterAsHolder->getUnderlyingPreconditioner();
178
179
180 using CudaCommunication = GpuOwnerOverlapCopy<real_type, Comm>;
181 using SchwarzOperator
182 = Dune::OverlappingSchwarzOperator<GpuSparseMatrix<real_type>, XGPU, XGPU, CudaCommunication>;
183 auto cudaCommunication = makeGpuOwnerOverlapCopy<real_type, block_size, Comm>(communication);
184
185 auto mpiPreconditioner = std::make_shared<GpuBlockPreconditioner<XGPU, XGPU, CudaCommunication>>(
186 preconditionerReallyOnGPU, cudaCommunication);
187
188 auto scalarProduct = std::make_shared<Dune::ParallelScalarProduct<XGPU, CudaCommunication>>(
189 cudaCommunication, m_opOnCPUWithMatrix.category());
190
191
192 // NOTE: Ownership of cudaCommunication is handled by mpiPreconditioner. However, just to make sure we
193 // remember this, we add this check. So remember that we hold one count in this scope, and one in the
194 // GpuBlockPreconditioner instance. We accommodate for the fact that it could be passed around in
195 // GpuBlockPreconditioner, hence we do not test for != 2
196 OPM_ERROR_IF(cudaCommunication.use_count() < 2, "Internal error. Shared pointer not owned properly.");
197 auto overlappingCudaOperator = std::make_shared<SchwarzOperator>(m_matrix, *cudaCommunication);
198
199 return UnderlyingSolver<XGPU>(
200 overlappingCudaOperator, scalarProduct, mpiPreconditioner, reduction, maxit, verbose);
201 }
202#endif
203
204 // This is the serial case (specific overload for Dune::Amg::SequentialInformation)
205 UnderlyingSolver<XGPU> constructSolver(std::shared_ptr<Dune::Preconditioner<X, X>> prec,
206 scalar_real_type reduction,
207 int maxit,
208 int verbose,
209 [[maybe_unused]] const Dune::Amg::SequentialInformation& communication)
210 {
211 // Dune::Amg::SequentialInformation is the serial case
212 return constructSolver(prec, reduction, maxit, verbose);
213 }
214
215 // TODO: Use a std::forward
216 UnderlyingSolver<XGPU> constructSolver(std::shared_ptr<Dune::Preconditioner<X, X>> prec,
217 scalar_real_type reduction,
218 int maxit,
219 int verbose)
220 {
221 // TODO: Fix the reliance on casting here. This is a code smell to a certain degree, and assumes
222 // a certain setup beforehand. The only reason we do it this way is to minimize edits to the
223 // flexible solver. We could design it differently, but keep this for the time being until
224 // we figure out how we want to GPU-ify the rest of the system.
225 auto precAsHolder = std::dynamic_pointer_cast<PreconditionerHolder<XGPU, XGPU>>(prec);
226 if (!precAsHolder) {
227 OPM_THROW(std::invalid_argument,
228 "The preconditioner needs to be a CUDA preconditioner wrapped in a "
229 "Opm::gpuistl::PreconditionerHolder (eg. GPUDILU).");
230 }
231 auto preconditionerOnGPU = precAsHolder->getUnderlyingPreconditioner();
232
233 auto matrixOperator = std::make_shared<Dune::MatrixAdapter<GpuSparseMatrix<real_type>, XGPU, XGPU>>(m_matrix);
234 auto scalarProduct = std::make_shared<Dune::SeqScalarProduct<XGPU>>();
235 return UnderlyingSolver<XGPU>(matrixOperator, scalarProduct, preconditionerOnGPU, reduction, maxit, verbose);
236 }
237
238 std::unique_ptr<XGPU> m_inputBuffer;
239 std::unique_ptr<XGPU> m_outputBuffer;
240};
241
242} // namespace Opm::gpuistl
243
244#endif
Dune::OwnerOverlapCopyCommunication< int, int > Comm
Definition: FlexibleSolver_impl.hpp:304
The GpuSparseMatrix class simple wrapper class for a CuSparse matrix.
Definition: GpuSparseMatrix.hpp:60
void updateNonzeroValues(const MatrixType &matrix, bool copyNonZeroElementsDirectly=false)
updateNonzeroValues updates the non-zero values by using the non-zero values of the supplied matrix
Wraps a CUDA solver to work with CPU data.
Definition: SolverAdapter.hpp:52
virtual void apply(X &x, X &b, Dune::InverseOperatorResult &res) override
Definition: SolverAdapter.hpp:115
virtual void apply(X &x, X &b, double reduction, Dune::InverseOperatorResult &res) override
Definition: SolverAdapter.hpp:94
Opm::gpuistl::GpuVector< real_type > XGPU
Definition: SolverAdapter.hpp:60
SolverAdapter(Operator &op, Dune::ScalarProduct< X > &sp, std::shared_ptr< Dune::Preconditioner< X, X > > prec, scalar_real_type reduction, int maxit, int verbose, const Comm &comm)
constructor
Definition: SolverAdapter.hpp:75
static constexpr auto block_size
Definition: SolverAdapter.hpp:59
The is_a_well_operator class tries to guess if the operator is a well type operator.
Definition: has_function.hpp:114
Definition: fvbaseprimaryvariables.hh:141
Definition: AmgxInterface.hpp:38
Dune::InverseOperatorResult InverseOperatorResult
Definition: GpuBridge.hpp:32