SolverAdapter.hpp
Go to the documentation of this file.
1/*
2 Copyright 2022-2023 SINTEF AS
3
4 This file is part of the Open Porous Media project (OPM).
5
6 OPM is free software: you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation, either version 3 of the License, or
9 (at your option) any later version.
10
11 OPM is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with OPM. If not, see <http://www.gnu.org/licenses/>.
18*/
19#ifndef OPM_SOLVERADAPTER_HPP
20#define OPM_SOLVERADAPTER_HPP
21
22#include <dune/istl/bcrsmatrix.hh>
23#include <dune/istl/operators.hh>
24#include <dune/istl/paamg/pinfo.hh>
25#include <dune/istl/schwarz.hh>
26#include <dune/istl/solver.hh>
27
28#include <opm/common/ErrorMacros.hpp>
29
35
36#if HAVE_MPI
38#endif
39
40#ifdef OPEN_MPI
41#if OPEN_MPI
42#include "mpi-ext.h"
43#endif
44#endif
45
46#include <memory>
47
48namespace Opm::gpuistl
49{
55template <class Operator, template <class> class UnderlyingSolver, class X>
56class SolverAdapter : public Dune::IterativeSolver<X, X>
57{
58public:
59 using typename Dune::IterativeSolver<X, X>::domain_type;
60 using typename Dune::IterativeSolver<X, X>::range_type;
61 using typename Dune::IterativeSolver<X, X>::field_type;
62 using typename Dune::IterativeSolver<X, X>::real_type;
63 using typename Dune::IterativeSolver<X, X>::scalar_real_type;
64 static constexpr auto block_size = domain_type::block_type::dimension;
65 using XGPU = Opm::gpuistl::GpuVector<real_type>;
66
67
78 template<class Comm>
79 SolverAdapter(Operator& op,
80 Dune::ScalarProduct<X>& sp,
81 std::shared_ptr<Dune::Preconditioner<X, X>> prec,
82 scalar_real_type reduction,
83 int maxit,
84 int verbose,
85 const Comm& comm)
86 : Dune::IterativeSolver<X, X>(op, sp, *prec, reduction, maxit, verbose)
87 , m_opOnCPUWithMatrix(op)
88 , m_matrix(GpuSparseMatrix<real_type>::fromMatrix(op.getmat()))
89 , m_underlyingSolver(constructSolver(prec, reduction, maxit, verbose, comm))
90 {
91 OPM_ERROR_IF(
93 "Currently we only support operators of type MatrixAdapter in the CUDA/HIP solver. "
94 "Use --matrix-add-well-contributions=true. "
95 "Using WellModelMatrixAdapter with SolverAdapter is not well-defined so it will not work well, or at all.");
96 }
97
98 virtual void apply(X& x, X& b, double reduction, Dune::InverseOperatorResult& res) override
99 {
100 // TODO: Can we do this without reimplementing the other function?
101 // TODO: [perf] Do we need to update the matrix every time? Probably yes
102 m_matrix.updateNonzeroValues(m_opOnCPUWithMatrix.getmat());
103
104 if (!m_inputBuffer) {
105 m_inputBuffer.reset(new XGPU(b.dim()));
106 m_outputBuffer.reset(new XGPU(x.dim()));
107 }
108
109 m_inputBuffer->copyFromHost(b);
110 // TODO: [perf] do we need to copy x here?
111 m_outputBuffer->copyFromHost(x);
112
113 m_underlyingSolver.apply(*m_outputBuffer, *m_inputBuffer, reduction, res);
114
115 // TODO: [perf] do we need to copy b here?
116 m_inputBuffer->copyToHost(b);
117 m_outputBuffer->copyToHost(x);
118 }
119 virtual void apply(X& x, X& b, Dune::InverseOperatorResult& res) override
120 {
121 // TODO: [perf] Do we need to update the matrix every time? Probably yes
122 m_matrix.updateNonzeroValues(m_opOnCPUWithMatrix.getmat());
123
124 if (!m_inputBuffer) {
125 m_inputBuffer.reset(new XGPU(b.dim()));
126 m_outputBuffer.reset(new XGPU(x.dim()));
127 }
128
129 m_inputBuffer->copyFromHost(b);
130 // TODO: [perf] do we need to copy x here?
131 m_outputBuffer->copyFromHost(x);
132
133 m_underlyingSolver.apply(*m_outputBuffer, *m_inputBuffer, res);
134
135 // TODO: [perf] do we need to copy b here?
136 m_inputBuffer->copyToHost(b);
137 m_outputBuffer->copyToHost(x);
138 }
139
140private:
141 Operator& m_opOnCPUWithMatrix;
143
144 UnderlyingSolver<XGPU> m_underlyingSolver;
145
146
147 // TODO: Use a std::forward
148 // This is the MPI parallel case (general communication object)
149#if HAVE_MPI
150 template <class Comm>
151 UnderlyingSolver<XGPU> constructSolver(std::shared_ptr<Dune::Preconditioner<X, X>> prec,
152 scalar_real_type reduction,
153 int maxit,
154 int verbose,
155 const Comm& communication)
156 {
157 // TODO: See the below TODO over the definition of precHolder in the other overload of constructSolver
158 // TODO: We are currently double wrapping preconditioners in the preconditioner factory to be extra
159 // compatible with CPU. Probably a cleaner way eventually would be to do more modifications to the
160 // flexible solver to accomodate the pure GPU better.
161 auto precAsHolder = std::dynamic_pointer_cast<PreconditionerHolder<X, X>>(prec);
162 if (!precAsHolder) {
163 OPM_THROW(std::invalid_argument,
164 "The preconditioner needs to be a CUDA preconditioner (eg. GPUDILU) wrapped in a "
165 "Opm::gpuistl::PreconditionerAdapter wrapped in a "
166 "Opm::gpuistl::GpuBlockPreconditioner. If you are unsure what this means, set "
167 "preconditioner to 'GPUDILU'");
168 }
169
170 auto preconditionerAdapter = precAsHolder->getUnderlyingPreconditioner();
171 auto preconditionerAdapterAsHolder
172 = std::dynamic_pointer_cast<PreconditionerHolder<XGPU, XGPU>>(preconditionerAdapter);
173 if (!preconditionerAdapterAsHolder) {
174 OPM_THROW(std::invalid_argument,
175 "The preconditioner needs to be a CUDA preconditioner (eg. GPUDILU) wrapped in a "
176 "Opm::gpuistl::PreconditionerAdapter wrapped in a "
177 "Opm::gpuistl::GpuBlockPreconditioner. If you are unsure what this means, set "
178 "preconditioner to 'GPUDILU'");
179 }
180 // We need to get the underlying preconditioner:
181 auto preconditionerReallyOnGPU = preconditionerAdapterAsHolder->getUnderlyingPreconditioner();
182
183 // Temporary solution use the GPU Direct communication solely based on these prepcrosessor statements
184 bool mpiSupportsCudaAwareAtCompileTime = false;
185 bool mpiSupportsCudaAwareAtRunTime = false;
186
187#if defined(MPIX_CUDA_AWARE_SUPPORT) && MPIX_CUDA_AWARE_SUPPORT
188 mpiSupportsCudaAwareAtCompileTime = true;
189#endif /* MPIX_CUDA_AWARE_SUPPORT */
190
191#if defined(MPIX_CUDA_AWARE_SUPPORT)
192 if (1 == MPIX_Query_cuda_support()) {
193 mpiSupportsCudaAwareAtRunTime = true;
194 }
195#endif /* MPIX_CUDA_AWARE_SUPPORT */
196
197
198 // TODO add typename Operator communication type as a named type with using
199 std::shared_ptr<Opm::gpuistl::GPUSender<real_type, Comm>> gpuComm;
200 if (mpiSupportsCudaAwareAtCompileTime && mpiSupportsCudaAwareAtRunTime) {
201 gpuComm = std::make_shared<
203 communication);
204 } else {
205 gpuComm = std::make_shared<
207 communication);
208 }
209
210 using CudaCommunication = GpuOwnerOverlapCopy<real_type, block_size, Comm>;
211 using SchwarzOperator
212 = Dune::OverlappingSchwarzOperator<GpuSparseMatrix<real_type>, XGPU, XGPU, CudaCommunication>;
213 auto cudaCommunication = std::make_shared<CudaCommunication>(gpuComm);
214
215 auto mpiPreconditioner = std::make_shared<GpuBlockPreconditioner<XGPU, XGPU, CudaCommunication>>(
216 preconditionerReallyOnGPU, cudaCommunication);
217
218 auto scalarProduct = std::make_shared<Dune::ParallelScalarProduct<XGPU, CudaCommunication>>(
219 cudaCommunication, m_opOnCPUWithMatrix.category());
220
221
222 // NOTE: Ownership of cudaCommunication is handled by mpiPreconditioner. However, just to make sure we
223 // remember this, we add this check. So remember that we hold one count in this scope, and one in the
224 // GpuBlockPreconditioner instance. We accommodate for the fact that it could be passed around in
225 // GpuBlockPreconditioner, hence we do not test for != 2
226 OPM_ERROR_IF(cudaCommunication.use_count() < 2, "Internal error. Shared pointer not owned properly.");
227 auto overlappingCudaOperator = std::make_shared<SchwarzOperator>(m_matrix, *cudaCommunication);
228
229 return UnderlyingSolver<XGPU>(
230 overlappingCudaOperator, scalarProduct, mpiPreconditioner, reduction, maxit, verbose);
231 }
232#endif
233
234 // This is the serial case (specific overload for Dune::Amg::SequentialInformation)
235 UnderlyingSolver<XGPU> constructSolver(std::shared_ptr<Dune::Preconditioner<X, X>> prec,
236 scalar_real_type reduction,
237 int maxit,
238 int verbose,
239 [[maybe_unused]] const Dune::Amg::SequentialInformation& communication)
240 {
241 // Dune::Amg::SequentialInformation is the serial case
242 return constructSolver(prec, reduction, maxit, verbose);
243 }
244
245 // TODO: Use a std::forward
246 UnderlyingSolver<XGPU> constructSolver(std::shared_ptr<Dune::Preconditioner<X, X>> prec,
247 scalar_real_type reduction,
248 int maxit,
249 int verbose)
250 {
251 // TODO: Fix the reliance on casting here. This is a code smell to a certain degree, and assumes
252 // a certain setup beforehand. The only reason we do it this way is to minimize edits to the
253 // flexible solver. We could design it differently, but keep this for the time being until
254 // we figure out how we want to GPU-ify the rest of the system.
255 auto precAsHolder = std::dynamic_pointer_cast<PreconditionerHolder<XGPU, XGPU>>(prec);
256 if (!precAsHolder) {
257 OPM_THROW(std::invalid_argument,
258 "The preconditioner needs to be a CUDA preconditioner wrapped in a "
259 "Opm::gpuistl::PreconditionerHolder (eg. GPUDILU).");
260 }
261 auto preconditionerOnGPU = precAsHolder->getUnderlyingPreconditioner();
262
263 auto matrixOperator = std::make_shared<Dune::MatrixAdapter<GpuSparseMatrix<real_type>, XGPU, XGPU>>(m_matrix);
264 auto scalarProduct = std::make_shared<Dune::SeqScalarProduct<XGPU>>();
265 return UnderlyingSolver<XGPU>(matrixOperator, scalarProduct, preconditionerOnGPU, reduction, maxit, verbose);
266 }
267
268 std::unique_ptr<XGPU> m_inputBuffer;
269 std::unique_ptr<XGPU> m_outputBuffer;
270};
271
272} // namespace Opm::gpuistl
273
274#endif
Dune::OwnerOverlapCopyCommunication< int, int > Comm
Definition: FlexibleSolver_impl.hpp:304
Derived class of GPUSender that handles MPI made with CUDA aware MPI The copOwnerToAll function uses ...
Definition: GpuOwnerOverlapCopy.hpp:177
Derived class of GPUSender that handles MPI calls that should NOT use GPU direct communicatoin The im...
Definition: GpuOwnerOverlapCopy.hpp:121
The GpuSparseMatrix class simple wrapper class for a CuSparse matrix.
Definition: GpuSparseMatrix.hpp:48
void updateNonzeroValues(const MatrixType &matrix, bool copyNonZeroElementsDirectly=false)
updateNonzeroValues updates the non-zero values by using the non-zero values of the supplied matrix
Wraps a CUDA solver to work with CPU data.
Definition: SolverAdapter.hpp:57
virtual void apply(X &x, X &b, Dune::InverseOperatorResult &res) override
Definition: SolverAdapter.hpp:119
virtual void apply(X &x, X &b, double reduction, Dune::InverseOperatorResult &res) override
Definition: SolverAdapter.hpp:98
Opm::gpuistl::GpuVector< real_type > XGPU
Definition: SolverAdapter.hpp:65
SolverAdapter(Operator &op, Dune::ScalarProduct< X > &sp, std::shared_ptr< Dune::Preconditioner< X, X > > prec, scalar_real_type reduction, int maxit, int verbose, const Comm &comm)
constructor
Definition: SolverAdapter.hpp:79
static constexpr auto block_size
Definition: SolverAdapter.hpp:64
The is_a_well_operator class tries to guess if the operator is a well type operator.
Definition: has_function.hpp:114
Definition: fvbaseprimaryvariables.hh:141
Definition: autotuner.hpp:30
Dune::InverseOperatorResult InverseOperatorResult
Definition: GpuBridge.hpp:32