opm-simulators
GpuAwareMPISender.hpp
1 /*
2  Copyright 2022-2023 SINTEF AS, 2025 Equinor ASA
3 
4  This file is part of the Open Porous Media project (OPM).
5 
6  OPM is free software: you can redistribute it and/or modify
7  it under the terms of the GNU General Public License as published by
8  the Free Software Foundation, either version 3 of the License, or
9  (at your option) any later version.
10 
11  OPM is distributed in the hope that it will be useful,
12  but WITHOUT ANY WARRANTY; without even the implied warranty of
13  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  GNU General Public License for more details.
15 
16  You should have received a copy of the GNU General Public License
17  along with OPM. If not, see <http://www.gnu.org/licenses/>.
18 */
19 #ifndef OPM_GPUISTL_GPUAWAREMPISENDER_HPP
20 #define OPM_GPUISTL_GPUAWAREMPISENDER_HPP
21 
22 #include <dune/istl/owneroverlapcopy.hh>
23 
24 #include <opm/simulators/linalg/FlowLinearSolverParameters.hpp>
25 #include <opm/simulators/linalg/gpuistl/GpuSender.hpp>
26 #include <opm/simulators/linalg/gpuistl/GpuVector.hpp>
27 
28 #include <mpi.h>
29 
30 #include <memory>
31 #include <vector>
32 
33 namespace Opm::gpuistl
34 {
35 
44 template <class field_type, int block_size, class OwnerOverlapCopyCommunicationType>
45 class GPUAwareMPISender : public GPUSender<field_type, OwnerOverlapCopyCommunicationType>
46 {
47 public:
48  using X = GpuVector<field_type>;
49 
50  explicit GPUAwareMPISender(const OwnerOverlapCopyCommunicationType& cpuOwnerOverlapCopy)
52  {
53  }
54 
55  void copyOwnerToAll(const X& source, X& dest) const override
56  {
57  OPM_ERROR_IF(&source != &dest,
58  "The provided GpuVectors' address did not match"); // In this context, source == dest!!!
59  std::call_once(this->m_initializedIndices, [&]() { initIndexSet(); });
60 
61  int rank = this->m_cpuOwnerOverlapCopy.communicator().rank();
62  dest.prepareSendBuf(*m_GPUSendBuf, *m_commpairIndicesOwner);
63 
64  // Start MPI stuff here...
65  // Note: This has been taken from DUNE's parallel/communicator.hh
66  std::vector<MPI_Request> sendRequests(m_messageInformation.size());
67  std::vector<MPI_Request> recvRequests(m_messageInformation.size());
68  std::vector<int> processMap(m_messageInformation.size());
69  size_t numberOfRealRecvRequests = 0;
70 
71  using const_iterator = typename InformationMap::const_iterator;
72  const const_iterator end = m_messageInformation.end();
73 
74  {
75  size_t i = 0;
76  for (const_iterator info = m_messageInformation.begin(); info != end; ++info, ++i) {
77  processMap[i] = info->first;
78  if (info->second.second.m_size) {
79  MPI_Irecv(m_GPURecvBuf->data() + info->second.second.m_start,
80  detail::to_int(info->second.second.m_size),
81  MPI_BYTE,
82  info->first,
83  m_commTag,
84  this->m_cpuOwnerOverlapCopy.communicator(),
85  &recvRequests[i]);
86  numberOfRealRecvRequests += 1;
87  } else {
88  recvRequests[i] = MPI_REQUEST_NULL;
89  }
90  }
91  }
92 
93  {
94  size_t i = 0;
95  for (const_iterator info = m_messageInformation.begin(); info != end; ++info, ++i) {
96  if (info->second.first.m_size) {
97  MPI_Issend(m_GPUSendBuf->data() + info->second.first.m_start,
98  detail::to_int(info->second.first.m_size),
99  MPI_BYTE,
100  info->first,
101  m_commTag,
102  this->m_cpuOwnerOverlapCopy.communicator(),
103  &sendRequests[i]);
104  } else {
105  sendRequests[i] = MPI_REQUEST_NULL;
106  }
107  }
108  }
109  int finished = MPI_UNDEFINED;
110  MPI_Status status;
111  for (size_t i = 0; i < numberOfRealRecvRequests; i++) {
112  status.MPI_ERROR = MPI_SUCCESS;
113  MPI_Waitany(m_messageInformation.size(), recvRequests.data(), &finished, &status);
114 
115  if (status.MPI_ERROR != MPI_SUCCESS) {
116  OPM_THROW(std::runtime_error,
117  fmt::format(fmt::runtime("MPI_Error occurred while rank {} received a message from rank {}"),
118  rank,
119  processMap[finished]));
120  }
121  }
122  MPI_Status recvStatus;
123  for (size_t i = 0; i < m_messageInformation.size(); i++) {
124  if (MPI_SUCCESS != MPI_Wait(&sendRequests[i], &recvStatus)) {
125  OPM_THROW(std::runtime_error,
126  fmt::format(fmt::runtime("MPI_Error occurred while rank {} sent a message from rank {}"),
127  rank,
128  processMap[finished]));
129  }
130  }
131  // ...End of MPI stuff
132 
133  dest.syncFromRecvBuf(*m_GPURecvBuf, *m_commpairIndicesCopy);
134  }
135 
136 private:
137  mutable std::unique_ptr<GpuVector<int>> m_commpairIndicesCopy;
138  mutable std::unique_ptr<GpuVector<int>> m_commpairIndicesOwner;
139  mutable std::unique_ptr<GpuVector<field_type>> m_GPUSendBuf;
140  mutable std::unique_ptr<GpuVector<field_type>> m_GPURecvBuf;
141 
142  struct MessageInformation {
143  MessageInformation()
144  : m_start(0)
145  , m_size(0)
146  {
147  }
148  MessageInformation(size_t start, size_t size)
149  : m_start(start)
150  , m_size(size)
151  {
152  }
153  size_t m_start; // offset in elements of "field_type"
154  size_t m_size; // size in bytes
155  };
156 
157  using InformationMap = std::map<int, std::pair<MessageInformation, MessageInformation>>;
158  mutable InformationMap m_messageInformation;
159  using IM = std::map<int, std::pair<std::vector<int>, std::vector<int>>>;
160  mutable IM m_im;
161 
162  constexpr static int m_commTag = 0; // So says DUNE
163 
164  void buildCommPairIdxs() const
165  {
166  const auto& ri = this->m_cpuOwnerOverlapCopy.remoteIndices();
167  std::vector<int> commpairIndicesCopyOnCPU;
168  std::vector<int> commpairIndicesOwnerCPU;
169 
170  for (const auto& process : ri) {
171  m_im[process.first] = std::pair(std::vector<int>(), std::vector<int>());
172  for (int send = 0; send < 2; ++send) {
173  auto remoteEnd = send ? process.second.first->end() : process.second.second->end();
174  auto remote = send ? process.second.first->begin() : process.second.second->begin();
175 
176  while (remote != remoteEnd) {
177  if (send ? (remote->localIndexPair().local().attribute() == 1) : (remote->attribute() == 1)) {
178  if (send) {
179  m_im[process.first].first.push_back(remote->localIndexPair().local().local());
180  } else {
181  m_im[process.first].second.push_back(remote->localIndexPair().local().local());
182  }
183  }
184  ++remote;
185  }
186  }
187  }
188 
189  int sendBufIdx = 0;
190  int recvBufIdx = 0;
191  for (auto it = m_im.begin(); it != m_im.end(); it++) {
192  int noSend = it->second.first.size();
193  int noRecv = it->second.second.size();
194 
195  if (noSend + noRecv > 0) {
196  m_messageInformation.insert(std::make_pair(
197  it->first,
198  std::make_pair(
199  MessageInformation(sendBufIdx * block_size, noSend * block_size * sizeof(field_type)),
200  MessageInformation(recvBufIdx * block_size, noRecv * block_size * sizeof(field_type)))));
201 
202  for (int x = 0; x < noSend; x++) {
203  for (int bs = 0; bs < block_size; bs++) {
204  commpairIndicesOwnerCPU.push_back(it->second.first[x] * block_size + bs);
205  }
206  }
207  for (int x = 0; x < noRecv; x++) {
208  for (int bs = 0; bs < block_size; bs++) {
209  commpairIndicesCopyOnCPU.push_back(it->second.second[x] * block_size + bs);
210  }
211  }
212  sendBufIdx += noSend;
213  recvBufIdx += noRecv;
214  }
215  }
216 
217  m_commpairIndicesCopy = std::make_unique<GpuVector<int>>(commpairIndicesCopyOnCPU);
218  m_commpairIndicesOwner = std::make_unique<GpuVector<int>>(commpairIndicesOwnerCPU);
219 
220  m_GPUSendBuf = std::make_unique<GpuVector<field_type>>(sendBufIdx * block_size);
221  m_GPURecvBuf = std::make_unique<GpuVector<field_type>>(recvBufIdx * block_size);
222  }
223 
224  void initIndexSet() const override
225  {
226  // We need indices that we we will use in the project, dot and norm calls.
227  // TODO: [premature perf] Can this be run once per instance? Or do we need to rebuild every time?
228  const auto& pis = this->m_cpuOwnerOverlapCopy.indexSet();
229  std::vector<int> indicesCopyOnCPU;
230  std::vector<int> indicesOwnerCPU;
231  for (const auto& index : pis) {
232  if (index.local().attribute() == Dune::OwnerOverlapCopyAttributeSet::copy) {
233  for (int component = 0; component < block_size; ++component) {
234  indicesCopyOnCPU.push_back(index.local().local() * block_size + component);
235  }
236  }
237 
238  if (index.local().attribute() == Dune::OwnerOverlapCopyAttributeSet::owner) {
239  for (int component = 0; component < block_size; ++component) {
240  indicesOwnerCPU.push_back(index.local().local() * block_size + component);
241  }
242  }
243  }
244 
245  this->m_indicesCopy = std::make_unique<GpuVector<int>>(indicesCopyOnCPU);
246  this->m_indicesOwner = std::make_unique<GpuVector<int>>(indicesOwnerCPU);
247 
248  buildCommPairIdxs();
249  }
250 };
251 
252 } // namespace Opm::gpuistl
253 #endif // OPM_GPUISTL_GPUAWAREMPISENDER_HPP
int to_int(std::size_t s)
to_int converts a (on most relevant platforms) 64 bits unsigned size_t to a signed 32 bits signed int...
Definition: safe_conversion.hpp:56
GPUSender is a wrapper class for classes which will implement copOwnerToAll This is implemented with ...
Definition: GpuSender.hpp:43
A small, fixed‑dimension MiniVector class backed by std::array that can be used in both host and CUD...
Definition: AmgxInterface.hpp:37
void copyOwnerToAll(const X &source, X &dest) const override
copyOwnerToAll will copy the data in source to all processes.
Definition: GpuAwareMPISender.hpp:55
Derived class of GPUSender that handles MPI made with CUDA aware MPI The copOwnerToAll function uses ...
Definition: GpuAwareMPISender.hpp:45