opm-simulators
openclKernels.hpp
1 /*
2  Copyright 2020 Equinor ASA
3 
4  This file is part of the Open Porous Media project (OPM).
5 
6  OPM is free software: you can redistribute it and/or modify
7  it under the terms of the GNU General Public License as published by
8  the Free Software Foundation, either version 3 of the License, or
9  (at your option) any later version.
10 
11  OPM is distributed in the hope that it will be useful,
12  but WITHOUT ANY WARRANTY; without even the implied warranty of
13  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  GNU General Public License for more details.
15 
16  You should have received a copy of the GNU General Public License
17  along with OPM. If not, see <http://www.gnu.org/licenses/>.
18 */
19 
20 #ifndef OPENCL_HPP
21 #define OPENCL_HPP
22 
23 #include <string>
24 #include <memory>
25 #include <cstddef>
26 
27 #include <opm/simulators/linalg/gpubridge/opencl/opencl.hpp>
28 
29 namespace Opm::Accelerator {
30 
31 using spmv_blocked_kernel_type = cl::KernelFunctor<cl::Buffer&, cl::Buffer&, cl::Buffer&, const unsigned int,
32  const cl::Buffer&, cl::Buffer&, const unsigned int, cl::LocalSpaceArg>;
33 using spmv_kernel_type = cl::KernelFunctor<cl::Buffer&, cl::Buffer&, cl::Buffer&, const unsigned int,
34  const cl::Buffer&, cl::Buffer&, cl::LocalSpaceArg>;
35 using residual_blocked_kernel_type = cl::KernelFunctor<cl::Buffer&, cl::Buffer&, cl::Buffer&, const unsigned int,
36  cl::Buffer&, const cl::Buffer&, cl::Buffer&, const unsigned int, cl::LocalSpaceArg>;
37 using residual_kernel_type = cl::KernelFunctor<cl::Buffer&, cl::Buffer&, cl::Buffer&, const unsigned int,
38  cl::Buffer&, const cl::Buffer&, cl::Buffer&, cl::LocalSpaceArg>;
39 using ilu_apply1_kernel_type = cl::KernelFunctor<cl::Buffer&, cl::Buffer&, cl::Buffer&, cl::Buffer&, cl::Buffer&, const cl::Buffer&,
40  cl::Buffer&, cl::Buffer&, const unsigned int, const unsigned int, cl::LocalSpaceArg>;
41 using ilu_apply2_kernel_type = cl::KernelFunctor<cl::Buffer&, cl::Buffer&, cl::Buffer&, cl::Buffer&, cl::Buffer&, cl::Buffer&,
42  cl::Buffer&, cl::Buffer&, const unsigned int, const unsigned int, cl::LocalSpaceArg>;
43 using stdwell_apply_kernel_type = cl::KernelFunctor<cl::Buffer&, cl::Buffer&, cl::Buffer&, cl::Buffer&,
44  cl::Buffer&, cl::Buffer&, cl::Buffer&,
45  const unsigned int, const unsigned int, cl::Buffer&,
46  cl::LocalSpaceArg, cl::LocalSpaceArg, cl::LocalSpaceArg>;
47 using ilu_decomp_kernel_type = cl::KernelFunctor<const unsigned int, const unsigned int, cl::Buffer&, cl::Buffer&, cl::Buffer&,
48  cl::Buffer&, cl::Buffer&, cl::Buffer&, const int, cl::LocalSpaceArg>;
49 using isaiL_kernel_type = cl::KernelFunctor<cl::Buffer&, cl::Buffer&, cl::Buffer&, cl::Buffer&, cl::Buffer&, cl::Buffer&,
50  cl::Buffer&, cl::Buffer&, cl::Buffer&, const unsigned int>;
51 using isaiU_kernel_type = cl::KernelFunctor<cl::Buffer&, cl::Buffer&, cl::Buffer&, cl::Buffer&, cl::Buffer&, cl::Buffer&,
52  cl::Buffer&, cl::Buffer&, cl::Buffer&, cl::Buffer&, cl::Buffer&, const unsigned int>;
53 
54 template<class Scalar>
56 {
57 private:
58  static int verbosity;
59  static cl::CommandQueue *queue;
60  static std::vector<Scalar> tmp; // used as tmp CPU buffer for dot() and norm()
61  static bool initialized;
62  static std::size_t preferred_workgroup_size_multiple; // stores CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE
63 
64  static std::unique_ptr<cl::KernelFunctor<cl::Buffer&, cl::Buffer&, cl::Buffer&, const unsigned int, cl::LocalSpaceArg> > dot_k;
65  static std::unique_ptr<cl::KernelFunctor<cl::Buffer&, cl::Buffer&, const unsigned int, cl::LocalSpaceArg> > norm_k;
66  static std::unique_ptr<cl::KernelFunctor<cl::Buffer&, const Scalar, cl::Buffer&, const unsigned int> > axpy_k;
67  static std::unique_ptr<cl::KernelFunctor<cl::Buffer&, const Scalar, const unsigned int> > scale_k;
68  static std::unique_ptr<cl::KernelFunctor<const Scalar, cl::Buffer&, cl::Buffer&, cl::Buffer&, const unsigned int> > vmul_k;
69  static std::unique_ptr<cl::KernelFunctor<cl::Buffer&, cl::Buffer&, cl::Buffer&, const Scalar, const Scalar, const unsigned int> > custom_k;
70  static std::unique_ptr<cl::KernelFunctor<const cl::Buffer&, cl::Buffer&, cl::Buffer&, const unsigned int> > full_to_pressure_restriction_k;
71  static std::unique_ptr<cl::KernelFunctor<cl::Buffer&, cl::Buffer&, const unsigned int, const unsigned int> > add_coarse_pressure_correction_k;
72  static std::unique_ptr<cl::KernelFunctor<const cl::Buffer&, cl::Buffer&, const cl::Buffer&, const unsigned int> > prolongate_vector_k;
73  static std::unique_ptr<spmv_blocked_kernel_type> spmv_blocked_k;
74  static std::unique_ptr<spmv_blocked_kernel_type> spmv_blocked_add_k;
75  static std::unique_ptr<spmv_kernel_type> spmv_k;
76  static std::unique_ptr<spmv_kernel_type> spmv_noreset_k;
77  static std::unique_ptr<residual_blocked_kernel_type> residual_blocked_k;
78  static std::unique_ptr<residual_kernel_type> residual_k;
79  static std::unique_ptr<ilu_apply1_kernel_type> ILU_apply1_k;
80  static std::unique_ptr<ilu_apply2_kernel_type> ILU_apply2_k;
81  static std::unique_ptr<stdwell_apply_kernel_type> stdwell_apply_k;
82  static std::unique_ptr<ilu_decomp_kernel_type> ilu_decomp_k;
83  static std::unique_ptr<isaiL_kernel_type> isaiL_k;
84  static std::unique_ptr<isaiU_kernel_type> isaiU_k;
85 
86  OpenclKernels(){}; // disable instantiation
87 
88 public:
89  static const std::string axpy_str;
90  static const std::string scale_str;
91  static const std::string vmul_str;
92  static const std::string dot_1_str;
93  static const std::string norm_str;
94  static const std::string custom_str;
95  static const std::string full_to_pressure_restriction_str;
96  static const std::string add_coarse_pressure_correction_str;
97  static const std::string prolongate_vector_str;
98  static const std::string spmv_blocked_str;
99  static const std::string spmv_blocked_add_str;
100  static const std::string spmv_str;
101  static const std::string spmv_noreset_str;
102  static const std::string residual_blocked_str;
103  static const std::string residual_str;
104 #if CHOW_PATEL
105  static const std::string ILU_apply1_str;
106  static const std::string ILU_apply2_str;
107 #else
108  static const std::string ILU_apply1_fm_str;
109  static const std::string ILU_apply2_fm_str;
110 #endif
111  static const std::string stdwell_apply_str;
112  static const std::string ILU_decomp_str;
113  static const std::string isaiL_str;
114  static const std::string isaiU_str;
115 
116  static void init(cl::Context *context, cl::CommandQueue *queue, std::vector<cl::Device>& devices, int verbosity);
117 
118  static Scalar dot(cl::Buffer& in1, cl::Buffer& in2, cl::Buffer& out, int N);
119  static Scalar norm(cl::Buffer& in, cl::Buffer& out, int N);
120  static void axpy(cl::Buffer& in, const Scalar a, cl::Buffer& out, int N);
121  static void scale(cl::Buffer& in, const Scalar a, int N);
122  static void vmul(const Scalar alpha, cl::Buffer& in1, cl::Buffer& in2, cl::Buffer& out, int N);
123  static void custom(cl::Buffer& p, cl::Buffer& v, cl::Buffer& r, const Scalar omega, const Scalar beta, int N);
124  static void full_to_pressure_restriction(const cl::Buffer& fine_y, cl::Buffer& weights, cl::Buffer& coarse_y, int Nb);
125  static void add_coarse_pressure_correction(cl::Buffer& coarse_x, cl::Buffer& fine_x, int pressure_idx, int Nb);
126  static void prolongate_vector(const cl::Buffer& in, cl::Buffer& out, const cl::Buffer& cols, int N);
127  static void spmv(cl::Buffer& vals, cl::Buffer& cols, cl::Buffer& rows, const cl::Buffer& x, cl::Buffer& b, int Nb, unsigned int block_size, bool reset = true, bool add = false);
128  static void residual(cl::Buffer& vals, cl::Buffer& cols, cl::Buffer& rows, cl::Buffer& x, const cl::Buffer& rhs, cl::Buffer& out, int Nb, unsigned int block_size);
129 
130  static void ILU_apply1(cl::Buffer& rowIndices, cl::Buffer& vals, cl::Buffer& cols, cl::Buffer& rows, cl::Buffer& diagIndex,
131  const cl::Buffer& y, cl::Buffer& x, cl::Buffer& rowsPerColor, int color, int Nb, unsigned int block_size);
132 
133  static void ILU_apply2(cl::Buffer& rowIndices, cl::Buffer& vals, cl::Buffer& cols, cl::Buffer& rows, cl::Buffer& diagIndex,
134  cl::Buffer& invDiagVals, cl::Buffer& x, cl::Buffer& rowsPerColor, int color, int Nb, unsigned int block_size);
135 
136  static void ILU_decomp(int firstRow, int lastRow, cl::Buffer& rowIndices, cl::Buffer& vals, cl::Buffer& cols, cl::Buffer& rows,
137  cl::Buffer& diagIndex, cl::Buffer& invDiagVals, int Nb, unsigned int block_size);
138 
139  static void apply_stdwells(cl::Buffer& d_Cnnzs_ocl, cl::Buffer &d_Dnnzs_ocl, cl::Buffer &d_Bnnzs_ocl,
140  cl::Buffer &d_Ccols_ocl, cl::Buffer &d_Bcols_ocl, cl::Buffer &d_x, cl::Buffer &d_y,
141  int dim, int dim_wells, cl::Buffer &d_val_pointers_ocl, int num_std_wells);
142 
143  static void isaiL(cl::Buffer& diagIndex, cl::Buffer& colPointers, cl::Buffer& mapping, cl::Buffer& nvc,
144  cl::Buffer& luIdxs, cl::Buffer& xxIdxs, cl::Buffer& dxIdxs, cl::Buffer& LUvals, cl::Buffer& invLvals, unsigned int Nb);
145 
146  static void isaiU(cl::Buffer& diagIndex, cl::Buffer& colPointers, cl::Buffer& rowIndices, cl::Buffer& mapping,
147  cl::Buffer& nvc, cl::Buffer& luIdxs, cl::Buffer& xxIdxs, cl::Buffer& dxIdxs, cl::Buffer& LUvals,
148  cl::Buffer& invDiagVals, cl::Buffer& invUvals, unsigned int Nb);
149 };
150 
151 #if CHOW_PATEL
152 #define DECLARE_ILU(T) \
153  template<> const std::string OpenclKernels<T>::ILU_apply1_str; \
154  template<> const std::string OpenclKernels<T>::ILU_apply2_str;
155 #else
156 #define DECLARE_ILU(T) \
157  template<> const std::string OpenclKernels<T>::ILU_apply1_fm_str; \
158  template<> const std::string OpenclKernels<T>::ILU_apply2_fm_str;
159 #endif
160 
161 #define DECLARE_INSTANCE(T) \
162  DECLARE_ILU(T) \
163  template<> const std::string OpenclKernels<T>::axpy_str; \
164  template<> const std::string OpenclKernels<T>::scale_str; \
165  template<> const std::string OpenclKernels<T>::vmul_str; \
166  template<> const std::string OpenclKernels<T>::dot_1_str; \
167  template<> const std::string OpenclKernels<T>::norm_str; \
168  template<> const std::string OpenclKernels<T>::custom_str; \
169  template<> const std::string OpenclKernels<T>::full_to_pressure_restriction_str; \
170  template<> const std::string OpenclKernels<T>::add_coarse_pressure_correction_str; \
171  template<> const std::string OpenclKernels<T>::prolongate_vector_str; \
172  template<> const std::string OpenclKernels<T>::spmv_blocked_str; \
173  template<> const std::string OpenclKernels<T>::spmv_blocked_add_str; \
174  template<> const std::string OpenclKernels<T>::spmv_str; \
175  template<> const std::string OpenclKernels<T>::spmv_noreset_str; \
176  template<> const std::string OpenclKernels<T>::residual_blocked_str; \
177  template<> const std::string OpenclKernels<T>::residual_str; \
178  template<> const std::string OpenclKernels<T>::stdwell_apply_str; \
179  template<> const std::string OpenclKernels<T>::ILU_decomp_str; \
180  template<> const std::string OpenclKernels<T>::isaiL_str; \
181  template<> const std::string OpenclKernels<T>::isaiU_str;
182 
183 DECLARE_INSTANCE(double)
184 
185 #if FLOW_INSTANTIATE_FLOAT
186 DECLARE_INSTANCE(float)
187 #endif
188 
189 } // namespace Opm::Accelerator
190 
191 #endif
Definition: amgclSolverBackend.cpp:49
Definition: openclKernels.hpp:55