opm-simulators
HypreGpuTransfers.hpp
1 /*
2  Copyright 2025 Equinor ASA
3 
4  This file is part of the Open Porous Media project (OPM).
5 
6  OPM is free software: you can redistribute it and/or modify
7  it under the terms of the GNU General Public License as published by
8  the Free Software Foundation, either version 3 of the License, or
9  (at your option) any later version.
10 
11  OPM is distributed in the hope that it will be useful,
12  but WITHOUT ANY WARRANTY; without even the implied warranty of
13  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  GNU General Public License for more details.
15 
16  You should have received a copy of the GNU General Public License
17  along with OPM. If not, see <http://www.gnu.org/licenses/>.
18 */
19 
20 #ifndef OPM_HYPRE_GPU_TRANSFERS_HPP
21 #define OPM_HYPRE_GPU_TRANSFERS_HPP
22 
23 #include <opm/simulators/linalg/gpuistl/hypreinterface/HypreDataStructures.hpp>
24 #include <opm/simulators/linalg/gpuistl/hypreinterface/HypreErrorHandling.hpp>
25 
26 #if HAVE_CUDA
27 #if USE_HIP
28 #include <opm/simulators/linalg/gpuistl_hip/GpuSparseMatrixWrapper.hpp>
29 #include <opm/simulators/linalg/gpuistl_hip/GpuVector.hpp>
30 #else
31 #include <opm/simulators/linalg/gpuistl/GpuSparseMatrixWrapper.hpp>
32 #include <opm/simulators/linalg/gpuistl/GpuVector.hpp>
33 #endif
34 #endif // HAVE_CUDA
35 
36 #include <HYPRE.h>
37 #include <_hypre_utilities.h>
38 
40 {
41 
42 template <typename VectorType>
43 void setContinuousGpuVectorForHypre(const VectorType& v,
44  std::vector<HYPRE_Real>& continuous_vector_values,
45  const std::vector<int>& local_hypre_to_local_dune);
46 
47 template <typename VectorType>
48 void setGpuVectorFromContinuousVector(VectorType& v,
49  const std::vector<HYPRE_Real>& continuous_vector_values,
50  const std::vector<int>& local_hypre_to_local_dune);
51 #if HYPRE_USING_CUDA || HYPRE_USING_HIP
52 
56 template <typename VectorType>
57 void
58 transferGpuVectorToHypre(const VectorType& gpu_vec,
59  HYPRE_IJVector hypre_vec,
60  HypreHostDataArrays& host_arrays,
61  const HypreDeviceDataArrays& device_arrays,
62  const ParallelInfo& par_info,
63  bool use_gpu_backend)
64 {
65  const int N = static_cast<int>(host_arrays.indices.size());
66  using T = typename VectorType::field_type;
67 
68  if (use_gpu_backend) {
69  // GPU backend with GPU input: use pre-allocated device arrays
70  if (par_info.owner_first) {
71  // Direct device-to-device transfer for owner-first ordering
72  const T* device_ptr = gpu_vec.data();
73  OPM_HYPRE_SAFE_CALL(
74  HYPRE_IJVectorSetValues(hypre_vec, N, device_arrays.indices_device, const_cast<T*>(device_ptr)));
75  } else {
76  // Use continuous storage and device buffer for non-owner-first ordering
77  setContinuousGpuVectorForHypre(
78  gpu_vec, host_arrays.continuous_vector_values, par_info.local_hypre_to_local_dune);
79  hypre_TMemcpy(device_arrays.vector_buffer_device,
80  host_arrays.continuous_vector_values.data(),
81  HYPRE_Real,
82  N,
83  HYPRE_MEMORY_DEVICE,
84  HYPRE_MEMORY_HOST);
85  OPM_HYPRE_SAFE_CALL(HYPRE_IJVectorSetValues(
86  hypre_vec, N, device_arrays.indices_device, device_arrays.vector_buffer_device));
87  }
88  } else {
89  // CPU backend with GPU input: copy via host memory
90  if (par_info.owner_first) {
91  // Get values to host and then set
92  auto host_values = gpu_vec.asStdVector();
93  OPM_HYPRE_SAFE_CALL(HYPRE_IJVectorSetValues(hypre_vec,
94  N,
95  const_cast<HYPRE_BigInt*>(host_arrays.indices.data()),
96  reinterpret_cast<HYPRE_Real*>(host_values.data())));
97  } else {
98  // Use continuous storage for non-owner-first ordering
99  setContinuousGpuVectorForHypre(
100  gpu_vec, host_arrays.continuous_vector_values, par_info.local_hypre_to_local_dune);
101  OPM_HYPRE_SAFE_CALL(HYPRE_IJVectorSetValues(hypre_vec,
102  N,
103  const_cast<HYPRE_BigInt*>(host_arrays.indices.data()),
104  host_arrays.continuous_vector_values.data()));
105  }
106  }
107  OPM_HYPRE_SAFE_CALL(HYPRE_IJVectorAssemble(hypre_vec));
108 }
109 
113 template <typename VectorType>
114 void
115 transferHypreToGpuVector(HYPRE_IJVector hypre_vec,
116  VectorType& gpu_vec,
117  HypreHostDataArrays& host_arrays,
118  const HypreDeviceDataArrays& device_arrays,
119  const ParallelInfo& par_info,
120  bool use_gpu_backend)
121 {
122  const int N = static_cast<int>(host_arrays.indices.size());
123  using T = typename VectorType::field_type;
124 
125  if (use_gpu_backend) {
126  // GPU backend with GPU input: use pre-allocated device arrays
127  if (par_info.owner_first) {
128  // Direct device-to-device transfer for owner-first ordering
129  T* device_ptr = gpu_vec.data();
130  OPM_HYPRE_SAFE_CALL(HYPRE_IJVectorGetValues(hypre_vec, N, device_arrays.indices_device, device_ptr));
131  } else {
132  // Use device buffer and then remap for non-owner-first ordering
133  OPM_HYPRE_SAFE_CALL(HYPRE_IJVectorGetValues(
134  hypre_vec, N, device_arrays.indices_device, device_arrays.vector_buffer_device));
135  hypre_TMemcpy(host_arrays.continuous_vector_values.data(),
136  device_arrays.vector_buffer_device,
137  HYPRE_Real,
138  N,
139  HYPRE_MEMORY_HOST,
140  HYPRE_MEMORY_DEVICE);
141  setGpuVectorFromContinuousVector(
142  gpu_vec, host_arrays.continuous_vector_values, par_info.local_hypre_to_local_dune);
143  }
144  } else {
145  // CPU backend with GPU input: copy via host memory
146  if (par_info.owner_first) {
147  // Get values to host and then copy to GPU
148  auto host_values = gpu_vec.asStdVector();
149  OPM_HYPRE_SAFE_CALL(HYPRE_IJVectorGetValues(hypre_vec,
150  N,
151  const_cast<HYPRE_BigInt*>(host_arrays.indices.data()),
152  reinterpret_cast<HYPRE_Real*>(host_values.data())));
153  gpu_vec = VectorType(host_values);
154  } else {
155  // Use continuous storage for non-owner-first ordering
156  OPM_HYPRE_SAFE_CALL(HYPRE_IJVectorGetValues(hypre_vec,
157  N,
158  const_cast<HYPRE_BigInt*>(host_arrays.indices.data()),
159  host_arrays.continuous_vector_values.data()));
160  setGpuVectorFromContinuousVector(
161  gpu_vec, host_arrays.continuous_vector_values, par_info.local_hypre_to_local_dune);
162  }
163  }
164 }
165 
171 template <typename MatrixType>
172 void
173 updateMatrixFromGpuSparseMatrix(const MatrixType& gpu_matrix,
174  HYPRE_IJMatrix hypre_matrix,
175  const SparsityPattern& sparsity_pattern,
176  const HypreHostDataArrays& host_arrays,
177  const HypreDeviceDataArrays& device_arrays,
178  bool use_gpu_backend)
179 {
180  const auto N = sparsity_pattern.rows.size();
181  using T = typename MatrixType::field_type;
182  const T* values = gpu_matrix.getNonZeroValues().data();
183 
184  if (use_gpu_backend) {
185  // GPU backend with GPU input: use pre-allocated device arrays
186  // Direct device-to-device transfer using smart row_indexes
187  OPM_HYPRE_SAFE_CALL(HYPRE_IJMatrixSetValues2(hypre_matrix,
188  N,
189  device_arrays.ncols_device,
190  device_arrays.rows_device,
191  device_arrays.row_indexes_device,
192  device_arrays.cols_device,
193  values));
194  } else {
195  // CPU backend with GPU input: copy to host first
196  auto host_values = gpu_matrix.getNonZeroValues().asStdVector();
197  OPM_HYPRE_SAFE_CALL(HYPRE_IJMatrixSetValues2(hypre_matrix,
198  N,
199  const_cast<HYPRE_Int*>(sparsity_pattern.ncols.data()),
200  const_cast<HYPRE_BigInt*>(sparsity_pattern.rows.data()),
201  const_cast<HYPRE_Int*>(host_arrays.row_indexes.data()),
202  const_cast<HYPRE_BigInt*>(sparsity_pattern.cols.data()),
203  host_values.data()));
204  }
205  OPM_HYPRE_SAFE_CALL(HYPRE_IJMatrixAssemble(hypre_matrix));
206 }
207 
208 template <typename VectorType>
209 void
210 setContinuousGpuVectorForHypre(const VectorType& v,
211  std::vector<HYPRE_Real>& continuous_vector_values,
212  const std::vector<int>& local_hypre_to_local_dune)
213 {
214  // Get vector data to host first
215  auto host_values = v.asStdVector();
216  // Set values using the mapping
217  for (size_t i = 0; i < local_hypre_to_local_dune.size(); ++i) {
218  continuous_vector_values[i] = host_values[local_hypre_to_local_dune[i]];
219  }
220 }
221 
222 template <typename VectorType>
223 void
224 setGpuVectorFromContinuousVector(VectorType& v,
225  const std::vector<HYPRE_Real>& continuous_vector_values,
226  const std::vector<int>& local_hypre_to_local_dune)
227 {
228  // Copy values to host and update values with mapping
229  auto host_values = v.asStdVector();
230  for (size_t i = 0; i < local_hypre_to_local_dune.size(); ++i) {
231  host_values[local_hypre_to_local_dune[i]] = continuous_vector_values[i];
232  }
233  // Copy back to GPU
234  v = VectorType(host_values);
235 }
236 #endif // HYPRE_USING_CUDA || HYPRE_USING_HIP
237 
238 } // namespace Opm::gpuistl::HypreInterface
239 
240 #endif // OPM_HYPRE_GPU_TRANSFERS_HPP
Unified interface for Hypre operations with both CPU and GPU data structures.
Definition: HypreCpuTransfers.hpp:29