19 #ifndef OPM_GPUISTL_GPUAWAREMPISENDER_HPP 20 #define OPM_GPUISTL_GPUAWAREMPISENDER_HPP 22 #include <dune/istl/owneroverlapcopy.hh> 24 #include <opm/simulators/linalg/FlowLinearSolverParameters.hpp> 25 #include <opm/simulators/linalg/gpuistl/GpuSender.hpp> 26 #include <opm/simulators/linalg/gpuistl/GpuVector.hpp> 44 template <
class field_type,
int block_size,
class OwnerOverlapCopyCommunicationType>
50 explicit GPUAwareMPISender(
const OwnerOverlapCopyCommunicationType& cpuOwnerOverlapCopy)
57 OPM_ERROR_IF(&source != &dest,
58 "The provided GpuVectors' address did not match");
59 std::call_once(this->m_initializedIndices, [&]() { initIndexSet(); });
61 int rank = this->m_cpuOwnerOverlapCopy.communicator().rank();
62 dest.prepareSendBuf(*m_GPUSendBuf, *m_commpairIndicesOwner);
66 std::vector<MPI_Request> sendRequests(m_messageInformation.size());
67 std::vector<MPI_Request> recvRequests(m_messageInformation.size());
68 std::vector<int> processMap(m_messageInformation.size());
69 size_t numberOfRealRecvRequests = 0;
71 using const_iterator =
typename InformationMap::const_iterator;
72 const const_iterator end = m_messageInformation.end();
76 for (const_iterator info = m_messageInformation.begin(); info != end; ++info, ++i) {
77 processMap[i] = info->first;
78 if (info->second.second.m_size) {
79 MPI_Irecv(m_GPURecvBuf->data() + info->second.second.m_start,
84 this->m_cpuOwnerOverlapCopy.communicator(),
86 numberOfRealRecvRequests += 1;
88 recvRequests[i] = MPI_REQUEST_NULL;
95 for (const_iterator info = m_messageInformation.begin(); info != end; ++info, ++i) {
96 if (info->second.first.m_size) {
97 MPI_Issend(m_GPUSendBuf->data() + info->second.first.m_start,
102 this->m_cpuOwnerOverlapCopy.communicator(),
105 sendRequests[i] = MPI_REQUEST_NULL;
109 int finished = MPI_UNDEFINED;
111 for (
size_t i = 0; i < numberOfRealRecvRequests; i++) {
112 status.MPI_ERROR = MPI_SUCCESS;
113 MPI_Waitany(m_messageInformation.size(), recvRequests.data(), &finished, &status);
115 if (status.MPI_ERROR != MPI_SUCCESS) {
116 OPM_THROW(std::runtime_error,
117 fmt::format(fmt::runtime(
"MPI_Error occurred while rank {} received a message from rank {}"),
119 processMap[finished]));
122 MPI_Status recvStatus;
123 for (
size_t i = 0; i < m_messageInformation.size(); i++) {
124 if (MPI_SUCCESS != MPI_Wait(&sendRequests[i], &recvStatus)) {
125 OPM_THROW(std::runtime_error,
126 fmt::format(fmt::runtime(
"MPI_Error occurred while rank {} sent a message from rank {}"),
128 processMap[finished]));
133 dest.syncFromRecvBuf(*m_GPURecvBuf, *m_commpairIndicesCopy);
137 mutable std::unique_ptr<GpuVector<int>> m_commpairIndicesCopy;
138 mutable std::unique_ptr<GpuVector<int>> m_commpairIndicesOwner;
139 mutable std::unique_ptr<GpuVector<field_type>> m_GPUSendBuf;
140 mutable std::unique_ptr<GpuVector<field_type>> m_GPURecvBuf;
142 struct MessageInformation {
148 MessageInformation(
size_t start,
size_t size)
157 using InformationMap = std::map<int, std::pair<MessageInformation, MessageInformation>>;
158 mutable InformationMap m_messageInformation;
159 using IM = std::map<int, std::pair<std::vector<int>, std::vector<int>>>;
162 constexpr
static int m_commTag = 0;
164 void buildCommPairIdxs()
const 166 const auto& ri = this->m_cpuOwnerOverlapCopy.remoteIndices();
167 std::vector<int> commpairIndicesCopyOnCPU;
168 std::vector<int> commpairIndicesOwnerCPU;
170 for (
const auto& process : ri) {
171 m_im[process.first] = std::pair(std::vector<int>(), std::vector<int>());
172 for (
int send = 0; send < 2; ++send) {
173 auto remoteEnd = send ? process.second.first->end() : process.second.second->end();
174 auto remote = send ? process.second.first->begin() : process.second.second->begin();
176 while (remote != remoteEnd) {
177 if (send ? (remote->localIndexPair().local().attribute() == 1) : (remote->attribute() == 1)) {
179 m_im[process.first].first.push_back(remote->localIndexPair().local().local());
181 m_im[process.first].second.push_back(remote->localIndexPair().local().local());
191 for (
auto it = m_im.begin(); it != m_im.end(); it++) {
192 int noSend = it->second.first.size();
193 int noRecv = it->second.second.size();
195 if (noSend + noRecv > 0) {
196 m_messageInformation.insert(std::make_pair(
199 MessageInformation(sendBufIdx * block_size, noSend * block_size *
sizeof(field_type)),
200 MessageInformation(recvBufIdx * block_size, noRecv * block_size *
sizeof(field_type)))));
202 for (
int x = 0; x < noSend; x++) {
203 for (
int bs = 0; bs < block_size; bs++) {
204 commpairIndicesOwnerCPU.push_back(it->second.first[x] * block_size + bs);
207 for (
int x = 0; x < noRecv; x++) {
208 for (
int bs = 0; bs < block_size; bs++) {
209 commpairIndicesCopyOnCPU.push_back(it->second.second[x] * block_size + bs);
212 sendBufIdx += noSend;
213 recvBufIdx += noRecv;
217 m_commpairIndicesCopy = std::make_unique<GpuVector<int>>(commpairIndicesCopyOnCPU);
218 m_commpairIndicesOwner = std::make_unique<GpuVector<int>>(commpairIndicesOwnerCPU);
220 m_GPUSendBuf = std::make_unique<GpuVector<field_type>>(sendBufIdx * block_size);
221 m_GPURecvBuf = std::make_unique<GpuVector<field_type>>(recvBufIdx * block_size);
224 void initIndexSet()
const override 228 const auto& pis = this->m_cpuOwnerOverlapCopy.indexSet();
229 std::vector<int> indicesCopyOnCPU;
230 std::vector<int> indicesOwnerCPU;
231 for (
const auto& index : pis) {
232 if (index.local().attribute() == Dune::OwnerOverlapCopyAttributeSet::copy) {
233 for (
int component = 0; component < block_size; ++component) {
234 indicesCopyOnCPU.push_back(index.local().local() * block_size + component);
238 if (index.local().attribute() == Dune::OwnerOverlapCopyAttributeSet::owner) {
239 for (
int component = 0; component < block_size; ++component) {
240 indicesOwnerCPU.push_back(index.local().local() * block_size + component);
245 this->m_indicesCopy = std::make_unique<GpuVector<int>>(indicesCopyOnCPU);
246 this->m_indicesOwner = std::make_unique<GpuVector<int>>(indicesOwnerCPU);
253 #endif // OPM_GPUISTL_GPUAWAREMPISENDER_HPP int to_int(std::size_t s)
to_int converts a (on most relevant platforms) 64 bits unsigned size_t to a signed 32 bits signed int...
Definition: safe_conversion.hpp:56
GPUSender is a wrapper class for classes which will implement copOwnerToAll This is implemented with ...
Definition: GpuSender.hpp:43
A small, fixed‑dimension MiniVector class backed by std::array that can be used in both host and CUD...
Definition: AmgxInterface.hpp:37
void copyOwnerToAll(const X &source, X &dest) const override
copyOwnerToAll will copy the data in source to all processes.
Definition: GpuAwareMPISender.hpp:55
Derived class of GPUSender that handles MPI made with CUDA aware MPI The copOwnerToAll function uses ...
Definition: GpuAwareMPISender.hpp:45