SolverAdapter.hpp
Go to the documentation of this file.
1/*
2 Copyright 2022-2023 SINTEF AS
3
4 This file is part of the Open Porous Media project (OPM).
5
6 OPM is free software: you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation, either version 3 of the License, or
9 (at your option) any later version.
10
11 OPM is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with OPM. If not, see <http://www.gnu.org/licenses/>.
18*/
19#ifndef OPM_SOLVERADAPTER_HPP
20#define OPM_SOLVERADAPTER_HPP
21
22#include <memory>
23
24#include <dune/istl/bcrsmatrix.hh>
25#include <dune/istl/operators.hh>
26#include <dune/istl/schwarz.hh>
27#include <dune/istl/solver.hh>
28#include <opm/common/ErrorMacros.hpp>
35
36#ifdef OPEN_MPI
37#if OPEN_MPI
38#include "mpi-ext.h"
39#endif
40#endif
41
42namespace Opm::cuistl
43{
49template <class Operator, template <class> class UnderlyingSolver, class X>
50class SolverAdapter : public Dune::IterativeSolver<X, X>
51{
52public:
53 using typename Dune::IterativeSolver<X, X>::domain_type;
54 using typename Dune::IterativeSolver<X, X>::range_type;
55 using typename Dune::IterativeSolver<X, X>::field_type;
56 using typename Dune::IterativeSolver<X, X>::real_type;
57 using typename Dune::IterativeSolver<X, X>::scalar_real_type;
58 static constexpr auto block_size = domain_type::block_type::dimension;
59 using XGPU = Opm::cuistl::CuVector<real_type>;
60
61 // TODO: Use a std::forward
62 SolverAdapter(Operator& op,
63 Dune::ScalarProduct<X>& sp,
64 std::shared_ptr<Dune::Preconditioner<X, X>> prec,
65 scalar_real_type reduction,
66 int maxit,
67 int verbose)
68 : Dune::IterativeSolver<X, X>(op, sp, *prec, reduction, maxit, verbose)
69 , m_opOnCPUWithMatrix(op)
70 , m_matrix(CuSparseMatrix<real_type>::fromMatrix(op.getmat()))
71 , m_underlyingSolver(constructSolver(prec, reduction, maxit, verbose))
72 {
73 }
74
75 virtual void apply(X& x, X& b, double reduction, Dune::InverseOperatorResult& res) override
76 {
77 // TODO: Can we do this without reimplementing the other function?
78 // TODO: [perf] Do we need to update the matrix every time? Probably yes
79 m_matrix.updateNonzeroValues(m_opOnCPUWithMatrix.getmat());
80
81 if (!m_inputBuffer) {
82 m_inputBuffer.reset(new XGPU(b.dim()));
83 m_outputBuffer.reset(new XGPU(x.dim()));
84 }
85
86 m_inputBuffer->copyFromHost(b);
87 // TODO: [perf] do we need to copy x here?
88 m_outputBuffer->copyFromHost(x);
89
90 m_underlyingSolver.apply(*m_outputBuffer, *m_inputBuffer, reduction, res);
91
92 // TODO: [perf] do we need to copy b here?
93 m_inputBuffer->copyToHost(b);
94 m_outputBuffer->copyToHost(x);
95 }
96 virtual void apply(X& x, X& b, Dune::InverseOperatorResult& res) override
97 {
98 // TODO: [perf] Do we need to update the matrix every time? Probably yes
99 m_matrix.updateNonzeroValues(m_opOnCPUWithMatrix.getmat());
100
101 if (!m_inputBuffer) {
102 m_inputBuffer.reset(new XGPU(b.dim()));
103 m_outputBuffer.reset(new XGPU(x.dim()));
104 }
105
106 m_inputBuffer->copyFromHost(b);
107 // TODO: [perf] do we need to copy x here?
108 m_outputBuffer->copyFromHost(x);
109
110 m_underlyingSolver.apply(*m_outputBuffer, *m_inputBuffer, res);
111
112 // TODO: [perf] do we need to copy b here?
113 m_inputBuffer->copyToHost(b);
114 m_outputBuffer->copyToHost(x);
115 }
116
117private:
118 Operator& m_opOnCPUWithMatrix;
120
121 UnderlyingSolver<XGPU> m_underlyingSolver;
122
123
124 // TODO: Use a std::forward
125 UnderlyingSolver<XGPU> constructSolver(std::shared_ptr<Dune::Preconditioner<X, X>> prec,
126 scalar_real_type reduction,
127 int maxit,
128 int verbose)
129 {
130
131 OPM_ERROR_IF(
133 "Currently we only support operators of type MatrixAdapter in the CUDA solver. "
134 "Use --matrix-add-well-contributions=true. "
135 "Using WellModelMatrixAdapter with SolverAdapter is not well-defined so it will not work well, or at all.");
136
137
138
139
140
142 // TODO: See the below TODO over the definition of precHolder in the other branch
143 // TODO: We are currently double wrapping preconditioners in the preconditioner factory to be extra
144 // compatible
145 // with CPU. Probably a cleaner way eventually would be to do more modifications to the flexible
146 // solver to accomodate the pure GPU better.
147 auto precAsHolder = std::dynamic_pointer_cast<PreconditionerHolder<X, X>>(prec);
148 if (!precAsHolder) {
149 OPM_THROW(std::invalid_argument,
150 "The preconditioner needs to be a CUDA preconditioner (eg. CuILU0) wrapped in a "
151 "Opm::cuistl::PreconditionerAdapter wrapped in a "
152 "Opm::cuistl::CuBlockPreconditioner. If you are unsure what this means, set "
153 "preconditioner to 'CUILU0'"); // TODO: Suggest a better preconditioner
154 }
155
156 auto preconditionerAdapter = precAsHolder->getUnderlyingPreconditioner();
157 auto preconditionerAdapterAsHolder
158 = std::dynamic_pointer_cast<PreconditionerHolder<XGPU, XGPU>>(preconditionerAdapter);
159 if (!preconditionerAdapterAsHolder) {
160 OPM_THROW(std::invalid_argument,
161 "The preconditioner needs to be a CUDA preconditioner (eg. CuILU0) wrapped in a "
162 "Opm::cuistl::PreconditionerAdapter wrapped in a "
163 "Opm::cuistl::CuBlockPreconditioner. If you are unsure what this means, set "
164 "preconditioner to 'CUILU0'"); // TODO: Suggest a better preconditioner
165 }
166 // We need to get the underlying preconditioner:
167 auto preconditionerReallyOnGPU = preconditionerAdapterAsHolder->getUnderlyingPreconditioner();
168 const auto& communication = m_opOnCPUWithMatrix.getCommunication();
169
170 // Temporary solution use the GPU Direct communication solely based on these prepcrosessor statements
171 bool mpiSUpportsCudaAwareAtCompileTime = false;
172 bool mpiSupportsCudaAwareAtRunTime = false;
173
174#if defined(MPIX_CUDA_AWARE_SUPPORT) && MPIX_CUDA_AWARE_SUPPORT
175 mpiSupportsCudaAwareAtCompileTime = true;
176#endif /* MPIX_CUDA_AWARE_SUPPORT */
177
178#if defined(MPIX_CUDA_AWARE_SUPPORT)
179 if (1 == MPIX_Query_cuda_support()) {
180 mpiSupportsCudaAwareAtRunTime = true;
181 }
182#endif /* MPIX_CUDA_AWARE_SUPPORT */
183
184
185 // TODO add typename Operator communication type as a named type with using
186 std::shared_ptr<Opm::cuistl::GPUSender<real_type, typename Operator::communication_type>> gpuComm;
187 if (mpiSUpportsCudaAwareAtCompileTime && mpiSupportsCudaAwareAtRunTime){
188 gpuComm = std::make_shared<Opm::cuistl::GPUAwareMPISender<real_type, block_size, typename Operator::communication_type>>(communication);
189 }
190 else{
191 gpuComm = std::make_shared<Opm::cuistl::GPUObliviousMPISender<real_type, block_size, typename Operator::communication_type>>(communication);
192 }
193
194 using CudaCommunication = CuOwnerOverlapCopy<real_type, block_size, typename Operator::communication_type>;
195 using SchwarzOperator
196 = Dune::OverlappingSchwarzOperator<CuSparseMatrix<real_type>, XGPU, XGPU, CudaCommunication>;
197 auto cudaCommunication = std::make_shared<CudaCommunication>(gpuComm);
198
199 auto mpiPreconditioner = std::make_shared<CuBlockPreconditioner<XGPU, XGPU, CudaCommunication>>(
200 preconditionerReallyOnGPU, cudaCommunication);
201
202 auto scalarProduct = std::make_shared<Dune::ParallelScalarProduct<XGPU, CudaCommunication>>(
203 cudaCommunication, m_opOnCPUWithMatrix.category());
204
205
206 // NOTE: Ownsership of cudaCommunication is handled by mpiPreconditioner. However, just to make sure we
207 // remember
208 // this, we add this check. So remember that we hold one count in this scope, and one in the
209 // CuBlockPreconditioner instance. We accomedate for the fact that it could be passed around in
210 // CuBlockPreconditioner, hence we do not test for != 2
211 OPM_ERROR_IF(cudaCommunication.use_count() < 2, "Internal error. Shared pointer not owned properly.");
212 auto overlappingCudaOperator = std::make_shared<SchwarzOperator>(m_matrix, *cudaCommunication);
213
214 return UnderlyingSolver<XGPU>(
215 overlappingCudaOperator, scalarProduct, mpiPreconditioner, reduction, maxit, verbose);
216 } else {
217 // TODO: Fix the reliance on casting here. This is a code smell to a certain degree, and assumes
218 // a certain setup beforehand. The only reason we do it this way is to minimize edits to the
219 // flexible solver. We could design it differently, but keep this for the time being until
220 // we figure out how we want to GPU-ify the rest of the system.
221 auto precAsHolder = std::dynamic_pointer_cast<PreconditionerHolder<XGPU, XGPU>>(prec);
222 if (!precAsHolder) {
223 OPM_THROW(std::invalid_argument,
224 "The preconditioner needs to be a CUDA preconditioner wrapped in a "
225 "Opm::cuistl::PreconditionerHolder (eg. CuILU0).");
226 }
227 auto preconditionerOnGPU = precAsHolder->getUnderlyingPreconditioner();
228
229 auto matrixOperator
230 = std::make_shared<Dune::MatrixAdapter<CuSparseMatrix<real_type>, XGPU, XGPU>>(m_matrix);
231 auto scalarProduct = std::make_shared<Dune::SeqScalarProduct<XGPU>>();
232 return UnderlyingSolver<XGPU>(
233 matrixOperator, scalarProduct, preconditionerOnGPU, reduction, maxit, verbose);
234 }
235 }
236
237 std::unique_ptr<XGPU> m_inputBuffer;
238 std::unique_ptr<XGPU> m_outputBuffer;
239};
240} // namespace Opm::cuistl
241
242#endif
The CuSparseMatrix class simple wrapper class for a CuSparse matrix.
Definition: CuSparseMatrix.hpp:48
void updateNonzeroValues(const MatrixType &matrix, bool copyNonZeroElementsDirectly=false)
updateNonzeroValues updates the non-zero values by using the non-zero values of the supplied matrix
Wraps a CUDA solver to work with CPU data.
Definition: SolverAdapter.hpp:51
Opm::cuistl::CuVector< real_type > XGPU
Definition: SolverAdapter.hpp:59
virtual void apply(X &x, X &b, double reduction, Dune::InverseOperatorResult &res) override
Definition: SolverAdapter.hpp:75
SolverAdapter(Operator &op, Dune::ScalarProduct< X > &sp, std::shared_ptr< Dune::Preconditioner< X, X > > prec, scalar_real_type reduction, int maxit, int verbose)
Definition: SolverAdapter.hpp:62
static constexpr auto block_size
Definition: SolverAdapter.hpp:58
virtual void apply(X &x, X &b, Dune::InverseOperatorResult &res) override
Definition: SolverAdapter.hpp:96
The has_communication class checks if the type has the member function getCommunication.
Definition: has_function.hpp:96
The is_a_well_operator class tries to guess if the operator is a well type operator.
Definition: has_function.hpp:114
Definition: SupportsFaceTag.hpp:27
Definition: CuBlockPreconditioner.hpp:29
Dune::InverseOperatorResult InverseOperatorResult
Definition: BdaBridge.hpp:32