HypreGpuTransfers.hpp
Go to the documentation of this file.
1/*
2 Copyright 2025 Equinor ASA
3
4 This file is part of the Open Porous Media project (OPM).
5
6 OPM is free software: you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation, either version 3 of the License, or
9 (at your option) any later version.
10
11 OPM is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with OPM. If not, see <http://www.gnu.org/licenses/>.
18*/
19
20#ifndef OPM_HYPRE_GPU_TRANSFERS_HPP
21#define OPM_HYPRE_GPU_TRANSFERS_HPP
22
25
26#if HAVE_CUDA
27#if USE_HIP
28#include <opm/simulators/linalg/gpuistl_hip/GpuSparseMatrix.hpp>
29#include <opm/simulators/linalg/gpuistl_hip/GpuVector.hpp>
30#else
33#endif
34#endif // HAVE_CUDA
35
36#include <HYPRE.h>
37#include <_hypre_utilities.h>
38
40{
41
42template <typename VectorType>
43void setContinuousGpuVectorForHypre(const VectorType& v,
44 std::vector<HYPRE_Real>& continuous_vector_values,
45 const std::vector<int>& local_hypre_to_local_dune);
46
47template <typename VectorType>
49 const std::vector<HYPRE_Real>& continuous_vector_values,
50 const std::vector<int>& local_hypre_to_local_dune);
51#if HYPRE_USING_CUDA || HYPRE_USING_HIP
52
56template <typename VectorType>
57void
58transferGpuVectorToHypre(const VectorType& gpu_vec,
59 HYPRE_IJVector hypre_vec,
60 HypreHostDataArrays& host_arrays,
61 const HypreDeviceDataArrays& device_arrays,
62 const ParallelInfo& par_info,
63 bool use_gpu_backend)
64{
65 const int N = static_cast<int>(host_arrays.indices.size());
66 using T = typename VectorType::field_type;
67
68 if (use_gpu_backend) {
69 // GPU backend with GPU input: use pre-allocated device arrays
70 if (par_info.owner_first) {
71 // Direct device-to-device transfer for owner-first ordering
72 const T* device_ptr = gpu_vec.data();
74 HYPRE_IJVectorSetValues(hypre_vec, N, device_arrays.indices_device, const_cast<T*>(device_ptr)));
75 } else {
76 // Use continuous storage and device buffer for non-owner-first ordering
78 gpu_vec, host_arrays.continuous_vector_values, par_info.local_hypre_to_local_dune);
79 hypre_TMemcpy(device_arrays.vector_buffer_device,
80 host_arrays.continuous_vector_values.data(),
81 HYPRE_Real,
82 N,
83 HYPRE_MEMORY_DEVICE,
84 HYPRE_MEMORY_HOST);
85 OPM_HYPRE_SAFE_CALL(HYPRE_IJVectorSetValues(
86 hypre_vec, N, device_arrays.indices_device, device_arrays.vector_buffer_device));
87 }
88 } else {
89 // CPU backend with GPU input: copy via host memory
90 if (par_info.owner_first) {
91 // Get values to host and then set
92 auto host_values = gpu_vec.asStdVector();
93 OPM_HYPRE_SAFE_CALL(HYPRE_IJVectorSetValues(hypre_vec,
94 N,
95 const_cast<HYPRE_BigInt*>(host_arrays.indices.data()),
96 reinterpret_cast<HYPRE_Real*>(host_values.data())));
97 } else {
98 // Use continuous storage for non-owner-first ordering
100 gpu_vec, host_arrays.continuous_vector_values, par_info.local_hypre_to_local_dune);
101 OPM_HYPRE_SAFE_CALL(HYPRE_IJVectorSetValues(hypre_vec,
102 N,
103 const_cast<HYPRE_BigInt*>(host_arrays.indices.data()),
104 host_arrays.continuous_vector_values.data()));
105 }
106 }
107 OPM_HYPRE_SAFE_CALL(HYPRE_IJVectorAssemble(hypre_vec));
108}
109
113template <typename VectorType>
114void
115transferHypreToGpuVector(HYPRE_IJVector hypre_vec,
116 VectorType& gpu_vec,
117 HypreHostDataArrays& host_arrays,
118 const HypreDeviceDataArrays& device_arrays,
119 const ParallelInfo& par_info,
120 bool use_gpu_backend)
121{
122 const int N = static_cast<int>(host_arrays.indices.size());
123 using T = typename VectorType::field_type;
124
125 if (use_gpu_backend) {
126 // GPU backend with GPU input: use pre-allocated device arrays
127 if (par_info.owner_first) {
128 // Direct device-to-device transfer for owner-first ordering
129 T* device_ptr = gpu_vec.data();
130 OPM_HYPRE_SAFE_CALL(HYPRE_IJVectorGetValues(hypre_vec, N, device_arrays.indices_device, device_ptr));
131 } else {
132 // Use device buffer and then remap for non-owner-first ordering
133 OPM_HYPRE_SAFE_CALL(HYPRE_IJVectorGetValues(
134 hypre_vec, N, device_arrays.indices_device, device_arrays.vector_buffer_device));
135 hypre_TMemcpy(host_arrays.continuous_vector_values.data(),
136 device_arrays.vector_buffer_device,
137 HYPRE_Real,
138 N,
139 HYPRE_MEMORY_HOST,
140 HYPRE_MEMORY_DEVICE);
142 gpu_vec, host_arrays.continuous_vector_values, par_info.local_hypre_to_local_dune);
143 }
144 } else {
145 // CPU backend with GPU input: copy via host memory
146 if (par_info.owner_first) {
147 // Get values to host and then copy to GPU
148 auto host_values = gpu_vec.asStdVector();
149 OPM_HYPRE_SAFE_CALL(HYPRE_IJVectorGetValues(hypre_vec,
150 N,
151 const_cast<HYPRE_BigInt*>(host_arrays.indices.data()),
152 reinterpret_cast<HYPRE_Real*>(host_values.data())));
153 gpu_vec = VectorType(host_values);
154 } else {
155 // Use continuous storage for non-owner-first ordering
156 OPM_HYPRE_SAFE_CALL(HYPRE_IJVectorGetValues(hypre_vec,
157 N,
158 const_cast<HYPRE_BigInt*>(host_arrays.indices.data()),
159 host_arrays.continuous_vector_values.data()));
161 gpu_vec, host_arrays.continuous_vector_values, par_info.local_hypre_to_local_dune);
162 }
163 }
164}
165
171template <typename MatrixType>
172void
173updateMatrixFromGpuSparseMatrix(const MatrixType& gpu_matrix,
174 HYPRE_IJMatrix hypre_matrix,
175 const SparsityPattern& sparsity_pattern,
176 const HypreHostDataArrays& host_arrays,
177 const HypreDeviceDataArrays& device_arrays,
178 bool use_gpu_backend)
179{
180 const auto N = sparsity_pattern.rows.size();
181 using T = typename MatrixType::field_type;
182 const T* values = gpu_matrix.getNonZeroValues().data();
183
184 if (use_gpu_backend) {
185 // GPU backend with GPU input: use pre-allocated device arrays
186 // Direct device-to-device transfer using smart row_indexes
187 OPM_HYPRE_SAFE_CALL(HYPRE_IJMatrixSetValues2(hypre_matrix,
188 N,
189 device_arrays.ncols_device,
190 device_arrays.rows_device,
191 device_arrays.row_indexes_device,
192 device_arrays.cols_device,
193 values));
194 } else {
195 // CPU backend with GPU input: copy to host first
196 auto host_values = gpu_matrix.getNonZeroValues().asStdVector();
197 OPM_HYPRE_SAFE_CALL(HYPRE_IJMatrixSetValues2(hypre_matrix,
198 N,
199 const_cast<HYPRE_Int*>(sparsity_pattern.ncols.data()),
200 const_cast<HYPRE_BigInt*>(sparsity_pattern.rows.data()),
201 const_cast<HYPRE_Int*>(host_arrays.row_indexes.data()),
202 const_cast<HYPRE_BigInt*>(sparsity_pattern.cols.data()),
203 host_values.data()));
204 }
205 OPM_HYPRE_SAFE_CALL(HYPRE_IJMatrixAssemble(hypre_matrix));
206}
207
208template <typename VectorType>
209void
210setContinuousGpuVectorForHypre(const VectorType& v,
211 std::vector<HYPRE_Real>& continuous_vector_values,
212 const std::vector<int>& local_hypre_to_local_dune)
213{
214 // Get vector data to host first
215 auto host_values = v.asStdVector();
216 // Set values using the mapping
217 for (size_t i = 0; i < local_hypre_to_local_dune.size(); ++i) {
218 continuous_vector_values[i] = host_values[local_hypre_to_local_dune[i]];
219 }
220}
221
222template <typename VectorType>
223void
225 const std::vector<HYPRE_Real>& continuous_vector_values,
226 const std::vector<int>& local_hypre_to_local_dune)
227{
228 // Copy values to host and update values with mapping
229 auto host_values = v.asStdVector();
230 for (size_t i = 0; i < local_hypre_to_local_dune.size(); ++i) {
231 host_values[local_hypre_to_local_dune[i]] = continuous_vector_values[i];
232 }
233 // Copy back to GPU
234 v = VectorType(host_values);
235}
236#endif // HYPRE_USING_CUDA || HYPRE_USING_HIP
237
238} // namespace Opm::gpuistl::HypreInterface
239
240#endif // OPM_HYPRE_GPU_TRANSFERS_HPP
#define OPM_HYPRE_SAFE_CALL(expr)
Macro to wrap Hypre function calls with error checking.
Definition: HypreErrorHandling.hpp:96
Unified interface for Hypre operations with both CPU and GPU data structures.
Definition: HypreInterface.hpp:61
void setContinuousGpuVectorForHypre(const VectorType &v, std::vector< HYPRE_Real > &continuous_vector_values, const std::vector< int > &local_hypre_to_local_dune)
::Opm::gpuistl::ParallelInfo ParallelInfo
Definition: HypreInterface.hpp:64
::Opm::gpuistl::SparsityPattern SparsityPattern
Definition: HypreInterface.hpp:65
void setGpuVectorFromContinuousVector(VectorType &v, const std::vector< HYPRE_Real > &continuous_vector_values, const std::vector< int > &local_hypre_to_local_dune)
GPU device memory arrays for HYPRE operations with GPU backend.
Definition: HypreDataStructures.hpp:137
HYPRE_Real * vector_buffer_device
Device buffer for vector operations Used when input type and backend are different,...
Definition: HypreDataStructures.hpp:149
HYPRE_BigInt * indices_device
Definition: HypreDataStructures.hpp:143
Host arrays for HYPRE matrix and vector data transfers.
Definition: HypreDataStructures.hpp:106
std::vector< HYPRE_BigInt > indices
Global DOF indices for owned degrees of freedom.
Definition: HypreDataStructures.hpp:120
std::vector< HYPRE_Real > continuous_vector_values
Temporary buffer for vector values in non-owner-first ordering.
Definition: HypreDataStructures.hpp:128
Parallel domain decomposition information for HYPRE-Dune interface.
Definition: HypreDataStructures.hpp:37
bool owner_first
Whether owned DOFs appear first in local Dune ordering.
Definition: HypreDataStructures.hpp:77
std::vector< int > local_hypre_to_local_dune
Mapping from local HYPRE indices to local Dune indices.
Definition: HypreDataStructures.hpp:59