ChowPatelIlu.hpp
Go to the documentation of this file.
1/*
2 Copyright 2020 Equinor ASA
3
4 This file is part of the Open Porous Media project (OPM).
5
6 OPM is free software: you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation, either version 3 of the License, or
9 (at your option) any later version.
10
11 OPM is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with OPM. If not, see <http://www.gnu.org/licenses/>.
18*/
19
20#ifndef CHOW_PATEL_ILU_HEADER_INCLUDED
21#define CHOW_PATEL_ILU_HEADER_INCLUDED
22
23
24#include <mutex>
25
27
28// Variables CHOW_PATEL, CHOW_PATEL_GPU and CHOW_PATEL_GPU_PARALLEL are set by CMake
29// Pass -DUSE_CHOW_PATEL_ILU=1 to cmake to define CHOW_PATEL and use the iterative ILU decomposition
30// Pass -DUSE_CHOW_PATEL_ILU_GPU=1 to run the ILU decomposition sweeps on the GPU
31// Pass -DUSE_CHOW_PATEL_ILU_GPU_PARALLEL=1 to use more parallelisation in the GPU kernel, see ChowPatelIlu.cpp
32
33// if CHOW_PATEL is 0, exact ILU decomposition is performed on CPU
34// if CHOW_PATEL is 1, iterative ILU decomposition (FGPILU) is done, as described in:
35// FINE-GRAINED PARALLEL INCOMPLETE LU FACTORIZATION, E. Chow and A. Patel, SIAM 2015, https://doi.org/10.1137/140968896
36// if CHOW_PATEL_GPU is 0, the decomposition is done on CPU
37// if CHOW_PATEL_GPU is 1, the decomposition is done by gpu_decomposition() on GPU
38// the apply phase of the ChowPatelIlu uses two triangular matrices: L and U
39// the exact decomposition uses a full matrix LU which is the superposition of L and U
40// ChowPatelIlu could also operate on a full matrix LU when L and U are merged, but it is generally better to keep them split
41
42#if CHOW_PATEL
43
44namespace Opm
45{
46namespace Accelerator
47{
48
49class BlockedMatrix;
50
51// This class implements a blocked version on GPU of the Fine-Grained Parallel ILU (FGPILU) by Chow and Patel 2015:
52// FINE-GRAINED PARALLEL INCOMPLETE LU FACTORIZATION, E. Chow and A. Patel, SIAM 2015, https://doi.org/10.1137/140968896
53// only blocksize == 3 is supported
54// decomposition() allocates the cl::Buffers on the first call, these are C++ objects that deallocate automatically
55template <unsigned int block_size>
56class ChowPatelIlu
57{
58private:
59 cl::Buffer d_Ut_vals, d_L_vals, d_LU_vals;
60 cl::Buffer d_Ut_ptrs, d_Ut_idxs;
61 cl::Buffer d_L_rows, d_L_cols;
62 cl::Buffer d_LU_rows, d_LU_cols;
63 cl::Buffer d_Ltmp, d_Utmp;
64
65 cl::Event event;
66 std::vector<cl::Event> events;
67 cl_int err;
68 std::once_flag initialize_flag;
69 std::once_flag pattern_uploaded;
70 int verbosity = 0;
71
72 std::unique_ptr<cl::KernelFunctor<cl::Buffer&, cl::Buffer&, cl::Buffer&,
73 cl::Buffer&, cl::Buffer&, cl::Buffer&,
74 cl::Buffer&, cl::Buffer&, cl::Buffer&,
75 cl::Buffer&, cl::Buffer&,
76 const int, cl::LocalSpaceArg, cl::LocalSpaceArg> > chow_patel_ilu_sweep_k;
77
78public:
83 void decomposition(
84 cl::CommandQueue *queue, cl::Context *context,
85 BlockedMatrix *LUmat, BlockedMatrix *Lmat, BlockedMatrix *Umat,
86 double *invDiagVals, std::vector<int>& diagIndex,
87 cl::Buffer& d_diagIndex, cl::Buffer& d_invDiagVals,
88 cl::Buffer& d_Lvals, cl::Buffer& d_Lcols, cl::Buffer& d_Lrows,
89 cl::Buffer& d_Uvals, cl::Buffer& d_Ucols, cl::Buffer& d_Urows);
90
91
110 void gpu_decomposition(
111 cl::CommandQueue *queue, cl::Context *context,
112 int *Ut_ptrs, int *Ut_idxs, double *Ut_vals, int Ut_nnzbs,
113 int *L_rows, int *L_cols, double *L_vals, int L_nnzbs,
114 int *LU_rows, int *LU_cols, double *LU_vals, int LU_nnzbs,
115 int Nb, int num_sweeps);
116
118 void setVerbosity(int verbosity_) {
119 this->verbosity = verbosity_;
120 }
121
122};
123
124} // namespace Accelerator
125} // namespace Opm
126
127#endif // CHOW_PATEL
128
129#endif // CHOW_PATEL_ILU_HEADER_INCLUDED
Definition: BlackoilPhases.hpp:27