28#ifndef TPFA_LINEARIZER_HH
29#define TPFA_LINEARIZER_HH
31#include <dune/common/version.hh>
32#include <dune/common/fvector.hh>
33#include <dune/common/fmatrix.hh>
35#include <opm/common/Exceptions.hpp>
36#include <opm/common/TimingMacros.hpp>
38#include <opm/grid/utility/SparseTable.hpp>
40#include <opm/material/common/ConditionalStorage.hpp>
42#include <opm/input/eclipse/EclipseState/Grid/FaceDir.hpp>
43#include <opm/input/eclipse/Schedule/BCProp.hpp>
54#include <opm/material/fluidsystems/BlackOilFluidSystem.hpp>
55#include <opm/material/fluidsystems/BlackOilFluidSystemNonStatic.hpp>
66#include <unordered_map>
74#include <fmt/format.h>
76#include <opm/common/utility/gpuDecorators.hpp>
77#include <opm/common/utility/gpuistl_if_available.hpp>
78#include <opm/common/utility/pointerArithmetic.hpp>
85#include <opm/simulators/linalg/gpuistl_hip/GpuSparseMatrixWrapper.hpp>
86#include <opm/simulators/linalg/gpuistl_hip/MiniMatrix.hpp>
87#include <opm/simulators/linalg/gpuistl_hip/MiniVector.hpp>
88#include <opm/simulators/linalg/gpuistl_hip/detail/gpusparse_matrix_operations.hpp>
106template<
class TypeTag>
118template<
class TypeTag>
139 using Element =
typename GridView::template Codim<0>::Entity;
140 using ElementIterator =
typename GridView::template Codim<0>::Iterator;
142 using Vector = GlobalEqVector;
146 enum { numEq = getPropValue<TypeTag, Properties::NumEq>() };
147 enum { historySize = getPropValue<TypeTag, Properties::TimeDiscHistorySize>() };
148 enum { dimWorld = GridView::dimensionworld };
150 constexpr static bool runAssemblyOnGpu = getPropValue<TypeTag, Properties::RunAssemblyOnGpu>();
152 using MatrixBlockCPU =
typename SparseMatrixAdapter::MatrixBlock;
153 using VectorBlockCPU = Dune::FieldVector<Scalar, numEq>;
160 using MatrixBlock = std::conditional_t<runAssemblyOnGpu, MatrixBlockGPU, MatrixBlockCPU>;
161 using VectorBlock = std::conditional_t<runAssemblyOnGpu, VectorBlockGPU, VectorBlockCPU>;
162 using ADVectorBlock = std::conditional_t<runAssemblyOnGpu, ADVectorBlockGPU, ADVectorBlockCPU>;
165 using VectorBlock = VectorBlockCPU;
166 using ADVectorBlock = ADVectorBlockCPU;
169 static constexpr bool linearizeNonLocalElements =
170 getPropValue<TypeTag, Properties::LinearizeNonLocalElements>();
171 static constexpr bool enableFullyImplicitThermal = (getPropValue<TypeTag, Properties::EnergyModuleType>() == EnergyModules::FullyImplicitThermal);
172 static constexpr bool enableDiffusion = getPropValue<TypeTag, Properties::EnableDiffusion>();
173 static constexpr bool enableDispersion = getPropValue<TypeTag, Properties::EnableDispersion>();
174 static const bool enableBioeffects = getPropValue<TypeTag, Properties::EnableBioeffects>();
183 simulatorPtr_ =
nullptr;
184 separateSparseSourceTerms_ = Parameters::Get<Parameters::SeparateSparseSourceTerms>();
194 Parameters::Register<Parameters::SeparateSparseSourceTerms>
195 (
"Treat well source terms all in one go, instead of on a cell by cell basis.");
209 simulatorPtr_ = &simulator;
257 catch (
const std::exception& e) {
258 std::cout <<
"rank " << simulator_().
gridView().comm().rank()
259 <<
" caught an exception while linearizing:" << e.what()
260 <<
"\n" << std::flush;
264 std::cout <<
"rank " << simulator_().
gridView().comm().rank()
265 <<
" caught an exception while linearizing"
266 <<
"\n" << std::flush;
269 OPM_TIMEBLOCK(linearizationSynch);
270 succeeded = simulator_().
gridView().comm().min(succeeded);
273 throw NumericalProblem(
"A process did not succeed in linearizing the system");
289 template <
class SubDomainType>
297 initFirstIteration_();
301 if (problem_().iterationContext().inLocalSolve()) {
312 { jacobian_->finalize(); }
320 OPM_TIMEBLOCK(linearizeAuxilaryEquations);
324 auto& model = model_();
325 const auto& comm = simulator_().
gridView().comm();
326 for (
unsigned auxModIdx = 0; auxModIdx < model.numAuxiliaryModules(); ++auxModIdx) {
327 bool succeeded =
true;
329 model.auxiliaryModule(auxModIdx)->linearize(*jacobian_, residual_);
331 catch (
const std::exception& e) {
334 std::cout <<
"rank " << simulator_().
gridView().comm().rank()
335 <<
" caught an exception while linearizing:" << e.what()
336 <<
"\n" << std::flush;
339 succeeded = comm.min(succeeded);
342 throw NumericalProblem(
"linearization of an auxiliary equation failed");
351 {
return *jacobian_; }
354 {
return *jacobian_; }
360 {
return residual_; }
363 {
return residual_; }
368 void exportSystem(
const int idx, std::string& tag,
const char *path=
"export")
370 const bool export_sparsity = exportIndex_ == -1;
373 exportCount_ = exportIndex_ == idx ? ++exportCount_ : 0;
375 tag = fmt::format(fmt::runtime(
"_{:03d}_{:02d}"), exportIndex_, exportCount_);
377 fmt::print(fmt::runtime(
"index = {:d}\n"), exportIndex_);
378 fmt::print(fmt::runtime(
"count = {:d}\n"), exportCount_);
380 Opm::exportSystem(jacobian_->istlMatrix(), residual_, export_sparsity, tag.c_str(), path);
384 { linearizationType_ = linearizationType; }
387 {
return linearizationType_; }
395 {
return flowsInfo_; }
403 {
return floresInfo_; }
412 {
return velocityInfo_; }
415 return neighborInfo_;
421 updateStoredTransmissibilities();
426 for (
auto& bdyInfo : boundaryInfo_) {
427 const auto [type, massrateAD] = problem_().boundaryCondition(bdyInfo.cell, bdyInfo.dir);
430 VectorBlockCPU massrate(0.0);
431 for (std::size_t ii = 0; ii < massrate.size(); ++ii) {
432 massrate[ii] = massrateAD[ii].value();
435 const auto& exFluidState = problem_().boundaryFluidState(bdyInfo.cell, bdyInfo.dir);
436 bdyInfo.bcdata.type = type;
437 bdyInfo.bcdata.massRate = massrate;
438 bdyInfo.bcdata.exFluidState = exFluidState;
451 template <
class SubDomainType>
455 initFirstIteration_();
457 for (
int globI : domain.cells) {
458 residual_[globI] = 0.0;
459 jacobian_->clearRow(globI, 0.0);
465 {
return *simulatorPtr_; }
467 const Simulator& simulator_()
const
468 {
return *simulatorPtr_; }
471 {
return simulator_().
problem(); }
473 const Problem& problem_()
const
474 {
return simulator_().
problem(); }
477 {
return simulator_().
model(); }
479 const Model& model_()
const
480 {
return simulator_().
model(); }
482 const GridView& gridView_()
const
483 {
return problem_().gridView(); }
485 void initFirstIteration_()
491 residual_.resize(model_().numTotalDof());
502 if (!neighborInfo_.empty()) {
507 const auto& model = model_();
508 Stencil stencil(gridView_(), model_().dofMapper());
512 using NeighborSet = std::set<unsigned>;
513 std::vector<NeighborSet> sparsityPattern(model.numTotalDof());
514 const Scalar gravity = problem_().gravity()[dimWorld - 1];
515 unsigned numCells = model.numTotalDof();
516 neighborInfo_.reserve(numCells, 6 * numCells);
517 std::vector<NeighborInfoCPU> loc_nbinfo;
518 for (
const auto& elem : elements(gridView_())) {
519 stencil.update(elem);
521 for (
unsigned primaryDofIdx = 0; primaryDofIdx < stencil.numPrimaryDof(); ++primaryDofIdx) {
522 const unsigned myIdx = stencil.globalSpaceIndex(primaryDofIdx);
523 loc_nbinfo.resize(stencil.numDof() - 1);
525 for (
unsigned dofIdx = 0; dofIdx < stencil.numDof(); ++dofIdx) {
526 const unsigned neighborIdx = stencil.globalSpaceIndex(dofIdx);
527 sparsityPattern[myIdx].insert(neighborIdx);
529 const Scalar trans = problem_().transmissibility(myIdx, neighborIdx);
530 const auto scvfIdx = dofIdx - 1;
531 const auto& scvf = stencil.interiorFace(scvfIdx);
532 const Scalar area = scvf.area();
533 const Scalar Vin = problem_().model().dofTotalVolume(myIdx);
534 const Scalar Vex = problem_().model().dofTotalVolume(neighborIdx);
535 const Scalar zIn = problem_().dofCenterDepth(myIdx);
536 const Scalar zEx = problem_().dofCenterDepth(neighborIdx);
537 const Scalar dZg = (zIn - zEx)*gravity;
538 const Scalar thpresInToEx = problem_().thresholdPressure(myIdx, neighborIdx);
539 const Scalar thpresExToIn = problem_().thresholdPressure(neighborIdx, myIdx);
540 const auto dirId = scvf.dirId();
541 auto faceDir = dirId < 0 ? FaceDir::DirEnum::Unknown
542 : FaceDir::FromIntersectionIndex(dirId);
543 ResidualNBInfo nbinfo{trans,
555 if constexpr (enableFullyImplicitThermal) {
556 nbinfo.inAlpha = problem_().thermalHalfTransmissibility(myIdx, neighborIdx);
557 nbinfo.outAlpha = problem_().thermalHalfTransmissibility(neighborIdx, myIdx);
559 if constexpr (enableDiffusion) {
560 nbinfo.diffusivity = problem_().diffusivity(myIdx, neighborIdx);
562 if constexpr (enableDispersion) {
563 nbinfo.dispersivity = problem_().dispersivity(myIdx, neighborIdx);
565 loc_nbinfo[dofIdx - 1] = NeighborInfoCPU{neighborIdx, nbinfo,
nullptr};
568 neighborInfo_.appendRow(loc_nbinfo.begin(), loc_nbinfo.end());
569 if (problem_().nonTrivialBoundaryConditions()) {
570 for (
unsigned bfIndex = 0; bfIndex < stencil.numBoundaryFaces(); ++bfIndex) {
571 const auto& bf = stencil.boundaryFace(bfIndex);
572 const int dir_id = bf.dirId();
577 const auto [type, massrateAD] = problem_().boundaryCondition(myIdx, dir_id);
579 VectorBlockCPU massrate(0.0);
580 for (std::size_t ii = 0; ii < massrate.size(); ++ii) {
581 massrate[ii] = massrateAD[ii].value();
583 const auto& exFluidState = problem_().boundaryFluidState(myIdx, dir_id);
584 BoundaryConditionDataCPU bcdata {type,
586 exFluidState.pvtRegionIndex(),
589 bf.integrationPos()[dimWorld - 1],
591 boundaryInfo_.push_back({myIdx, dir_id, bfIndex, bcdata});
599 const std::size_t numAuxMod = model.numAuxiliaryModules();
600 for (
unsigned auxModIdx = 0; auxModIdx < numAuxMod; ++auxModIdx) {
601 model.auxiliaryModule(auxModIdx)->addNeighbors(sparsityPattern);
605 jacobian_ = std::make_unique<SparseMatrixAdapter>(simulator_());
606 diagMatAddress_.resize(numCells);
608 jacobian_->reserve(sparsityPattern);
609 for (
unsigned globI = 0; globI < numCells; globI++) {
610 const auto& nbInfos = neighborInfo_[globI];
611 diagMatAddress_[globI] = jacobian_->blockAddress(globI, globI);
612 for (
auto& nbInfo : nbInfos) {
613 nbInfo.matBlockAddress = jacobian_->blockAddress(nbInfo.neighbor, globI);
617#if HAVE_CUDA && OPM_IS_COMPILING_WITH_GPU_COMPILER
618 gpuJacobian_.reset(
new gpuistl::GpuSparseMatrixWrapper<Scalar>(
620 gpuBufferDiagMatAddress_.reset(
new gpuistl::GpuBuffer<MatrixBlockGPU*>(
621 gpuistl::detail::getDiagPtrsTyped<MatrixBlockGPU>(*gpuJacobian_)));
625 fullDomain_.
cells.resize(numCells);
626 std::iota(fullDomain_.
cells.begin(), fullDomain_.
cells.end(), 0);
636#if HAVE_CUDA && OPM_IS_COMPILING_WITH_GPU_COMPILER
637 gpuJacobian_->setToZero();
644 OPM_TIMEBLOCK(createFlows);
647 const bool anyFlows = simulator_().
problem().eclWriter().outputModule().getFlows().anyFlows();
648 const auto& blockFlows = simulator_().
problem().eclWriter().outputModule().getFlows().blockFlows();
649 const auto& blockVelocity = simulator_().
problem().eclWriter().outputModule().getFlows().blockVelocity();
650 const bool isTemp = simulator_().
vanguard().eclState().getSimulationConfig().isTemp();
651 const bool anyFlores = simulator_().
problem().eclWriter().outputModule().getFlows().anyFlores() || isTemp;
652 const bool dispersionActive = simulator_().
vanguard().eclState().getSimulationConfig().rock_config().dispersion();
653 if (!dispersionActive && !enableBioeffects && blockVelocity.empty()
654 && !((anyFlows || !blockFlows.empty()) && flowsInfo_.empty())
655 && !(anyFlores && floresInfo_.empty())) {
658 const auto& model = model_();
659 const auto& nncOutput = simulator_().
problem().eclWriter().getOutputNnc().front();
660 Stencil stencil(gridView_(), model_().dofMapper());
661 const unsigned numCells = model.numTotalDof();
662 std::unordered_multimap<int, std::pair<int, int>> nncIndices;
663 std::vector<FlowInfo> loc_flinfo;
664 std::vector<VelocityInfo> loc_vlinfo;
665 unsigned int nncId = 0;
666 VectorBlock flow(0.0);
669 for (
unsigned nncIdx = 0; nncIdx < nncOutput.size(); ++nncIdx) {
670 const int ci1 = nncOutput[nncIdx].cell1;
671 const int ci2 = nncOutput[nncIdx].cell2;
672 nncIndices.emplace(ci1, std::make_pair(ci2, nncIdx));
676 flowsInfo_.reserve(numCells, 6 * numCells);
678 else if (!blockFlows.empty()) {
679 flowsInfo_.reserve(numCells, 6 * blockFlows.size());
682 floresInfo_.reserve(numCells, 6 * numCells);
684 if (dispersionActive || enableBioeffects) {
685 velocityInfo_.reserve(numCells, 6 * numCells);
687 else if (!blockVelocity.empty()) {
688 velocityInfo_.reserve(numCells, 6 * blockVelocity.size());
691 for (
const auto& elem : elements(gridView_())) {
692 stencil.update(elem);
693 for (
unsigned primaryDofIdx = 0; primaryDofIdx < stencil.numPrimaryDof(); ++primaryDofIdx) {
694 const unsigned myIdx = stencil.globalSpaceIndex(primaryDofIdx);
695 bool blockFlowFound =
false;
696 bool blockVelocityFound =
false;
697 if (!blockFlows.empty()) {
698 if (std::ranges::binary_search(blockFlows,
699 simulator_().vanguard().cartesianIndex(myIdx))) {
700 blockFlowFound =
true;
703 flowsInfo_.appendRow(loc_flinfo.begin(), loc_flinfo.begin());
704 if (!dispersionActive && !enableBioeffects && !anyFlores && blockVelocity.empty()) {
709 if (!blockVelocity.empty() && !(dispersionActive || enableBioeffects)) {
710 if (std::ranges::binary_search(blockVelocity,
711 simulator_().vanguard().cartesianIndex(myIdx))) {
712 blockVelocityFound =
true;
715 velocityInfo_.appendRow(loc_vlinfo.begin(), loc_vlinfo.begin());
716 if (!anyFlows && blockFlows.empty() && !anyFlores) {
721 const int numFaces = stencil.numBoundaryFaces() + stencil.numInteriorFaces();
722 loc_flinfo.resize(numFaces);
723 loc_vlinfo.resize(stencil.numDof() - 1);
725 for (
unsigned dofIdx = 0; dofIdx < stencil.numDof(); ++dofIdx) {
726 const unsigned neighborIdx = stencil.globalSpaceIndex(dofIdx);
728 const auto scvfIdx = dofIdx - 1;
729 const auto& scvf = stencil.interiorFace(scvfIdx);
730 int faceId = scvf.dirId();
731 const int cartMyIdx = simulator_().
vanguard().cartesianIndex(myIdx);
732 const int cartNeighborIdx = simulator_().
vanguard().cartesianIndex(neighborIdx);
733 const auto& range = nncIndices.equal_range(cartMyIdx);
734 for (
auto it = range.first; it != range.second; ++it) {
735 if (it->second.first == cartNeighborIdx){
739 nncId = it->second.second;
742 loc_flinfo[dofIdx - 1] = FlowInfo{faceId, flow, nncId};
743 loc_vlinfo[dofIdx - 1] = VelocityInfo{faceId, flow};
747 for (
unsigned bdfIdx = 0; bdfIdx < stencil.numBoundaryFaces(); ++bdfIdx) {
748 const auto& scvf = stencil.boundaryFace(bdfIdx);
749 const int faceId = scvf.dirId();
750 loc_flinfo[stencil.numInteriorFaces() + bdfIdx] = FlowInfo{faceId, flow, nncId};
753 if (anyFlows || blockFlowFound) {
754 flowsInfo_.appendRow(loc_flinfo.begin(), loc_flinfo.end());
757 floresInfo_.appendRow(loc_flinfo.begin(), loc_flinfo.end());
759 if (dispersionActive || enableBioeffects || blockVelocityFound) {
760 velocityInfo_.appendRow(loc_vlinfo.begin(), loc_vlinfo.end());
767 template <
class VectorBlockType,
class MatrixBlockType,
class ADVectorBlockType>
768 OPM_HOST_DEVICE
static void
769 setResAndJacobi(VectorBlockType& res, MatrixBlockType& bMat,
const ADVectorBlockType& resid)
771 for (
unsigned eqIdx = 0; eqIdx < numEq; ++eqIdx) {
772 res[eqIdx] = resid[eqIdx].value();
775 for (
unsigned eqIdx = 0; eqIdx < numEq; ++eqIdx) {
776 for (
unsigned pvIdx = 0; pvIdx < numEq; ++pvIdx) {
781 bMat[eqIdx][pvIdx] = resid[eqIdx].derivative(pvIdx);
788 OPM_TIMEBLOCK(updateFlows);
789 const bool enableFlows = simulator_().
problem().eclWriter().outputModule().getFlows().hasFlows();
790 const auto& blockFlows = simulator_().
problem().eclWriter().outputModule().getFlows().blockFlows();
792 const bool isTemp = simulator_().
vanguard().eclState().getSimulationConfig().isTemp();
793 const bool enableFlores = simulator_().
problem().eclWriter().outputModule().getFlows().hasFlores() || isTemp;
794 if (!enableFlows && !enableFlores && blockFlows.empty()) {
797 const unsigned int numCells = model_().numTotalDof();
799#pragma omp parallel for
801 for (
unsigned globI = 0; globI < numCells; ++globI) {
802 OPM_TIMEBLOCK_LOCAL(linearizationForEachCell, Subsystem::Assembly);
803 const auto& nbInfos = neighborInfo_[globI];
804 ADVectorBlock adres(0.0);
805 ADVectorBlock darcyFlux(0.0);
806 const IntensiveQuantities& intQuantsIn = model_().intensiveQuantities(globI, 0);
809 OPM_TIMEBLOCK_LOCAL(fluxCalculationForEachCell, Subsystem::Assembly);
811 for (
const auto& nbInfo : nbInfos) {
812 OPM_TIMEBLOCK_LOCAL(fluxCalculationForEachFace, Subsystem::Assembly);
813 const unsigned globJ = nbInfo.neighbor;
814 assert(globJ != globI);
817 const IntensiveQuantities& intQuantsEx = model_().intensiveQuantities(globJ, 0);
818 LocalResidual::computeFlux(adres, darcyFlux, globI, globJ, intQuantsIn,
819 intQuantsEx, nbInfo.res_nbinfo, problem_().moduleParams());
820 adres *= nbInfo.res_nbinfo.faceArea;
821 if (!blockFlows.empty()) {
822 if (std::ranges::binary_search(blockFlows,
823 simulator_().vanguard().cartesianIndex(globI))) {
824 for (
unsigned eqIdx = 0; eqIdx < numEq; ++eqIdx) {
825 flowsInfo_[globI][loc].flow[eqIdx] = adres[eqIdx].value();
829 else if (enableFlows) {
830 for (
unsigned eqIdx = 0; eqIdx < numEq; ++eqIdx) {
831 flowsInfo_[globI][loc].flow[eqIdx] = adres[eqIdx].value();
835 for (
unsigned eqIdx = 0; eqIdx < numEq; ++eqIdx) {
836 floresInfo_[globI][loc].flow[eqIdx] = darcyFlux[eqIdx].value();
845 for (
const auto& bdyInfo : boundaryInfo_) {
850 ADVectorBlockCPU adres(0.0);
851 const unsigned globI = bdyInfo.cell;
852 const auto& nbInfos = neighborInfo_[globI];
853 const IntensiveQuantities& insideIntQuants = model_().intensiveQuantities(globI, 0);
854 LocalResidual::computeBoundaryFlux(adres, problem_(), bdyInfo.bcdata, insideIntQuants, globI);
855 adres *= bdyInfo.bcdata.faceArea;
856 const unsigned bfIndex = bdyInfo.bfIndex;
858 for (
unsigned eqIdx = 0; eqIdx < numEq; ++eqIdx) {
859 flowsInfo_[globI][nbInfos.size() + bfIndex].flow[eqIdx] = adres[eqIdx].value();
867 template <
class SubDomainType>
868 void linearize_(
const SubDomainType& domain)
870 constexpr bool run_assembly_on_gpu = getPropValue<TypeTag, Properties::RunAssemblyOnGpu>();
875 if (!problem_().recycleFirstIterationStorage()) {
876 if (!model_().storeIntensiveQuantities() && !model_().enableStorageCache()) {
877 OPM_THROW(std::runtime_error,
"Must have cached either IQs or storage when we cannot recycle.");
888 const bool dispersionActive = simulator_().
vanguard().eclState().getSimulationConfig().rock_config().dispersion();
889 const unsigned int numCells = domain.cells.size();
891 if constexpr (!run_assembly_on_gpu) {
892 linearize_parallelization_wrapper<run_assembly_on_gpu, LocalResidual>(
903 linearize_bc<IntensiveQuantities, Model, LocalResidual>(
904 diagMatAddress_, residual_, boundaryInfo_);
906#if HAVE_CUDA && OPM_IS_COMPILING_WITH_GPU_COMPILER
908 if constexpr (std::is_same_v<SubDomainType, FullDomain<>>) {
910 using GpuParams = TpfaLinearizerGpuParams<TypeTag>;
912 int constexpr blockSize = 256;
915 GpuParams gpuParams(domain,
918 *gpuBufferDiagMatAddress_,
919 jacobian_->istlMatrix(),
926 linearize_parallelization_wrapper<run_assembly_on_gpu,
927 typename GpuParams::LocalResidualGPU>(
929 gpuParams.domainView(),
930 gpuParams.neighborInfoView(),
931 gpuParams.diagMatAddressView(),
932 gpuParams.residualView(),
933 gpuParams.modelView(),
936 gpuParams.flowProblemView());
938 if (gpuParams.boundaryInfoSize() > 0) {
939 auto boundaryInfoView = gpuParams.boundaryInfoView();
940 linearize_bc_threadsafe<TpfaLinearizer<TypeTag>,
941 typename GpuParams::GPUBOIQ,
942 decltype(gpuParams.modelView()),
943 typename GpuParams::LocalResidualGPU>
944 <<<((gpuParams.boundaryInfoSize() + blockSize - 1) / blockSize),
945 blockSize>>>(gpuParams.diagMatAddressView(),
946 gpuParams.residualView(),
948 gpuParams.modelView(),
949 gpuParams.flowProblemView());
954 gpuParams.copyResidualToHost(residual_, numCells);
955 gpuParams.copyJacobianToHost(*jacobian_, *gpuJacobian_);
957 OPM_THROW(std::logic_error,
"Only FullDomain is supported on GPU");
960 OPM_THROW(std::logic_error,
961 "Trying to run GPU assembly without compiling with GPU support");
967 linearize_source_terms(numCells, domain);
970 template <
bool useGPU,
971 class LocalResidualT,
975 class NeighborSparseTable,
978 void linearize_parallelization_wrapper(
const unsigned int numCells,
979 const DomainType& domain,
980 const NeighborSparseTable& neighborInfo,
981 DiagPtrType& diagMatAddress,
983 const ModelClass& model,
985 [[maybe_unused]]
bool dispersionActive,
986 const ProblemT& problem)
988 if constexpr (useGPU) {
989 static_assert(!enableBioeffects &&
"Bioeffects not yet supported on GPU");
990 assert(!dispersionActive &&
"Dispersion not yet supported on GPU");
991#if HAVE_CUDA && OPM_IS_COMPILING_WITH_GPU_COMPILER
992 int constexpr blockSize = 256;
993 kernel_linearize<TpfaLinearizer<TypeTag>,
1002 <<<((numCells + blockSize - 1) / blockSize), blockSize>>>(numCells,
1011 OPM_THROW(std::runtime_error,
"Trying to run GPU code without GPU support");
1015#pragma omp parallel for
1017 for (
unsigned ii = 0; ii < numCells; ++ii) {
1018 linearize_cell<false, LocalResidual>(ii,
1031 template <
class SubDomainType>
1032 void linearize_source_terms(
unsigned int numCells,
const SubDomainType& domain)
1035#pragma omp parallel for
1037 for (
unsigned ii = 0; ii < numCells; ++ii) {
1038 OPM_TIMEBLOCK_LOCAL(linearizationForEachCell, Subsystem::Assembly);
1039 const unsigned globI = domain.cells[ii];
1040 VectorBlockCPU res(0.0);
1041 MatrixBlockCPU bMat(0.0);
1042 ADVectorBlockCPU adres(0.0);
1043 const IntensiveQuantities& intQuantsIn = model_().intensiveQuantities(globI, 0);
1044 const double volume = model_().dofTotalVolume(globI);
1051 if (separateSparseSourceTerms_) {
1052 LocalResidual::computeSourceDense(adres, problem_(), intQuantsIn, globI, 0);
1054 LocalResidual::computeSource(adres, problem_(), intQuantsIn, globI, 0);
1058 residual_[globI] += res;
1060 *diagMatAddress_[globI] += bMat;
1064 if (separateSparseSourceTerms_) {
1065 problem_().wellModel().addReservoirSourceTerms(residual_, diagMatAddress_);
1070 template <
bool useGPU,
1071 class LocalResidualT,
1073 class VelocityInfoType,
1077 class NeighborSparseTable,
1080 const DomainType& domain,
1081 const NeighborSparseTable& neighborInfo,
1082 const DiagPtrType& diagMatAddress,
1084 const ModelClass& model,
1085 VelocityInfoType& velocityInfo,
1087 const ProblemType& problem)
1089#if OPM_IS_INSIDE_HOST_FUNCTION
1090 OPM_TIMEBLOCK_LOCAL(linearizationForEachCell, Subsystem::Assembly);
1092 const unsigned globI = domain.cells[ii];
1093 const auto& nbInfos = neighborInfo[globI];
1094 VectorBlock res(0.0);
1096 ADVectorBlock adres(0.0);
1097 ADVectorBlock darcyFlux(0.0);
1098 const auto& intQuantsIn = model.intensiveQuantities(globI, 0);
1102#if OPM_IS_INSIDE_HOST_FUNCTION
1103 OPM_TIMEBLOCK_LOCAL(fluxCalculationForEachCell, Subsystem::Assembly);
1106 for (
const auto& nbInfo : nbInfos) {
1107 const unsigned globJ = nbInfo.neighbor;
1113 const auto& intQuantsEx = model.intensiveQuantities(globJ, 0);
1115 LocalResidualT::computeFlux(adres,
1122 problem.moduleParams());
1124 adres *= nbInfo.res_nbinfo.faceArea;
1127 if constexpr (!useGPU) {
1129 if (std::cmp_less(globI, velocityInfo.size())) {
1130 if (velocityInfo.rowSize(globI) > 0) {
1131 for (
unsigned phaseIdx = 0; phaseIdx < numEq; ++phaseIdx) {
1132 velocityInfo[globI][loc].velocity[phaseIdx]
1133 = darcyFlux[phaseIdx].value() / nbInfo.res_nbinfo.faceArea;
1142 *diagMatAddress[globI] += bMat;
1145 *nbInfo.matBlockAddress += bMat;
1153 LocalResidualT::template computeStorage<Evaluation>(adres, intQuantsIn);
1157 if constexpr (!useGPU) {
1159 if (model.enableStorageCache()) {
1163 model.updateCachedStorage(globI, 0, res);
1170 if (problem.iterationContext().isFirstGlobalIteration()) {
1172 if (problem.recycleFirstIterationStorage()) {
1175 model.updateCachedStorage(globI, 1, res);
1178 const auto& intQuantOld = model.intensiveQuantities(globI, 1);
1179 LocalResidualT::template computeStorage<Scalar>(tmp, intQuantOld);
1180 model.updateCachedStorage(globI, 1, tmp);
1183 res -= model.cachedStorage(globI, 1);
1185#if OPM_IS_INSIDE_HOST_FUNCTION
1186 OPM_TIMEBLOCK_LOCAL(computeStorage0, Subsystem::Assembly);
1189 const auto& intQuantOld = model.intensiveQuantities(globI, 1);
1190 LocalResidualT::template computeStorage<Scalar>(tmp, intQuantOld);
1196 const auto& intQuantOld = model.intensiveQuantities(globI, 1);
1197 LocalResidualT::template computeStorage<Scalar>(tmp, intQuantOld);
1202 const Scalar volume = model.dofTotalVolume(globI);
1203 const Scalar storefac = volume / dt;
1208 *diagMatAddress[globI] += bMat;
1211#if HAVE_CUDA && OPM_IS_COMPILING_WITH_GPU_COMPILER
1212 template <
class LocalIntensiveQuantities,
1213 class LocalResidualT,
1217 class BoundaryInfoT,
1219 OPM_HOST_DEVICE
static void linearize_bc_threadsafe_single_cell(DiagPtrType diagMatAdress,
1221 const BoundaryInfoT boundaryInfoElement,
1226 constexpr int numEq = getPropValue<TypeTag, Properties::NumEq>();
1228 VectorBlock res(0.0);
1230 ADVectorBlock adres(0.0);
1231 const unsigned globI = boundaryInfoElement.cell;
1232 const LocalIntensiveQuantities& insideIntQuants
1233 = model.intensiveQuantities(globI, 0);
1234 if constexpr (!std::is_empty_v<GetPropType<TypeTag, Properties::FluidSystem>>) {
1235 LocalResidualT::computeBoundaryFlux(
1236 adres, problem, boundaryInfoElement.bcdata, insideIntQuants, globI);
1238 adres *= boundaryInfoElement.bcdata.faceArea;
1240 auto* residualPtr = &(
residual.data()[globI]);
1241 for (
int i = 0; i < numEq; ++i) {
1242 atomicAdd(&((*residualPtr)[i]), res[i]);
1244 auto* matPtr = diagMatAdress[globI];
1245 for (
int row = 0; row < bMat.size(); ++row) {
1246 for (
int col = 0; col < bMat.size(); ++col) {
1247 Scalar* elemPtr = &((*matPtr)[row][col]);
1248 atomicAdd(elemPtr, bMat[row][col]);
1256 template <
class LocalIntensiveQuantities,
1258 class LocalResidualT,
1261 class BoundaryInfoT>
1262 void linearize_bc(DiagPtrType& diagMatAdress,
1264 const BoundaryInfoT& boundaryInfo)
1267 for (
const auto& bdyInfo : boundaryInfo) {
1271 VectorBlock res(0.0);
1272 MatrixBlock bMat(0.0);
1273 ADVectorBlock adres(0.0);
1274 const unsigned globI = bdyInfo.cell;
1275 const LocalIntensiveQuantities& insideIntQuants
1276 = model_().intensiveQuantities(globI, 0);
1277 LocalResidual::computeBoundaryFlux(
1278 adres, problem_(), bdyInfo.bcdata, insideIntQuants, globI);
1279 adres *= bdyInfo.bcdata.faceArea;
1283 *diagMatAdress[globI] += bMat;
1287 void updateStoredTransmissibilities()
1289 if (neighborInfo_.empty()) {
1293 initFirstIteration_();
1296 const unsigned numCells = model_().numTotalDof();
1298#pragma omp parallel for
1300 for (
unsigned globI = 0; globI < numCells; globI++) {
1301 auto nbInfos = neighborInfo_[globI];
1302 for (
auto& nbInfo : nbInfos) {
1303 const unsigned globJ = nbInfo.neighbor;
1304 nbInfo.res_nbinfo.trans = problem_().transmissibility(globI, globJ);
1309 Simulator* simulatorPtr_{};
1312 std::unique_ptr<SparseMatrixAdapter> jacobian_{};
1314 std::unique_ptr<gpuistl::GpuSparseMatrixWrapper<Scalar>> gpuJacobian_;
1315 std::unique_ptr<gpuistl::GpuBuffer<MatrixBlockGPU*>> gpuBufferDiagMatAddress_;
1319 GlobalEqVector residual_;
1321 LinearizationType linearizationType_{};
1323 using ResidualNBInfo =
typename LocalResidual::ResidualNBInfo;
1324 using NeighborInfoCPU = NeighborInfoStruct<ResidualNBInfo, MatrixBlockCPU>;
1326 SparseTable<NeighborInfoCPU> neighborInfo_{};
1327 std::vector<MatrixBlockCPU*> diagMatAddress_ {};
1335 SparseTable<FlowInfo> flowsInfo_;
1336 SparseTable<FlowInfo> floresInfo_;
1341 VectorBlock velocity;
1343 SparseTable<VelocityInfo> velocityInfo_;
1345 using ScalarFluidState =
typename IntensiveQuantities::ScalarFluidState;
1347 using BoundaryConditionDataCPU = BoundaryConditionData<VectorBlockCPU, ScalarFluidState>;
1349 using BoundaryInfoCPU = BoundaryInfo<BoundaryConditionDataCPU>;
1351 std::vector<BoundaryInfoCPU> boundaryInfo_;
1353 bool separateSparseSourceTerms_ =
false;
1355 FullDomain<> fullDomain_;
Declares the properties required by the black oil model.
The base class for the element-centered finite-volume discretization scheme.
Definition: ecfvdiscretization.hh:160
Definition: matrixblock.hh:229
Manages the initializing and running of time dependent problems.
Definition: simulator.hh:84
Scalar timeStepSize() const
Returns the time step length so that we don't miss the beginning of the next episode or cross the en...
Definition: simulator.hh:418
Vanguard & vanguard()
Return a reference to the grid manager of simulation.
Definition: simulator.hh:239
Problem & problem()
Return the object which specifies the pysical setup of the simulation.
Definition: simulator.hh:270
const GridView & gridView() const
Return the grid view for which the simulation is done.
Definition: simulator.hh:251
Model & model()
Return the physical model used in the simulation.
Definition: simulator.hh:257
The common code for the linearizers of non-linear systems of equations.
Definition: tpfalinearizer.hh:120
const auto & getFloresInfo() const
Return constant reference to the floresInfo.
Definition: tpfalinearizer.hh:402
const LinearizationType & getLinearizationType() const
Definition: tpfalinearizer.hh:386
const auto & getNeighborInfo() const
Definition: tpfalinearizer.hh:414
void updateBoundaryConditionData()
Definition: tpfalinearizer.hh:424
void linearize()
Linearize the full system of non-linear equations.
Definition: tpfalinearizer.hh:234
SparseMatrixAdapter & jacobian()
Definition: tpfalinearizer.hh:353
const auto & getFlowsInfo() const
Return constant reference to the flowsInfo.
Definition: tpfalinearizer.hh:394
std::map< unsigned, Constraints > constraintsMap() const
Returns the map of constraint degrees of freedom.
Definition: tpfalinearizer.hh:448
TpfaLinearizer()
Definition: tpfalinearizer.hh:181
void linearizeDomain(const SubDomainType &domain)
Linearize the part of the non-linear system of equations that is associated with a part of the spatia...
Definition: tpfalinearizer.hh:290
void finalize()
Definition: tpfalinearizer.hh:311
void init(Simulator &simulator)
Initialize the linearizer.
Definition: tpfalinearizer.hh:207
void updateFlowsInfo()
Definition: tpfalinearizer.hh:786
static void registerParameters()
Register all run-time parameters for the Jacobian linearizer.
Definition: tpfalinearizer.hh:192
void exportSystem(const int idx, std::string &tag, const char *path="export")
Export block sparse linear system.
Definition: tpfalinearizer.hh:368
void linearizeDomain()
Linearize the part of the non-linear system of equations that is associated with the spatial domain.
Definition: tpfalinearizer.hh:250
const SparseMatrixAdapter & jacobian() const
Return constant reference to global Jacobian matrix backend.
Definition: tpfalinearizer.hh:350
void setLinearizationType(LinearizationType linearizationType)
Definition: tpfalinearizer.hh:383
GlobalEqVector & residual()
Definition: tpfalinearizer.hh:362
void eraseMatrix()
Causes the Jacobian matrix to be recreated from scratch before the next iteration.
Definition: tpfalinearizer.hh:220
const auto & getVelocityInfo() const
Return constant reference to the velocityInfo.
Definition: tpfalinearizer.hh:411
static OPM_HOST_DEVICE void linearize_cell(const unsigned int ii, const DomainType &domain, const NeighborSparseTable &neighborInfo, const DiagPtrType &diagMatAddress, ResidualType &residual, const ModelClass &model, VelocityInfoType &velocityInfo, const Scalar dt, const ProblemType &problem)
Definition: tpfalinearizer.hh:1079
void updateDiscretizationParameters()
Definition: tpfalinearizer.hh:419
void resetSystem_(const SubDomainType &domain)
Definition: tpfalinearizer.hh:452
static OPM_HOST_DEVICE void setResAndJacobi(VectorBlockType &res, MatrixBlockType &bMat, const ADVectorBlockType &resid)
Definition: tpfalinearizer.hh:769
void linearizeAuxiliaryEquations()
Linearize the part of the non-linear system of equations that is associated with the spatial domain.
Definition: tpfalinearizer.hh:318
const GlobalEqVector & residual() const
Return constant reference to global residual vector.
Definition: tpfalinearizer.hh:359
static GpuSparseMatrixWrapper< T, ForceLegacy > fromMatrix(const MatrixType &matrix, bool copyNonZeroElementsDirectly=false)
fromMatrix creates a new matrix with the same block size and values as the given matrix
Definition: GpuSparseMatrixWrapper.hpp:183
A small fixed-size square matrix class for use in CUDA kernels.
Definition: MiniMatrix.hpp:37
Definition: MiniVector.hpp:50
Declare the properties used by the infrastructure code of the finite volume discretizations.
Defines the common properties required by the porous medium multi-phase models.
@ NONE
Definition: DeferredLogger.hpp:46
Definition: blackoilnewtonmethodparams.hpp:31
HYPRE_IJMatrix createMatrix(HYPRE_Int N, HYPRE_Int dof_offset, const CommType &comm)
Create Hypre matrix.
Definition: hypreinterface/HypreSetup.hpp:165
Definition: blackoilbioeffectsmodules.hh:45
typename Properties::Detail::GetPropImpl< TypeTag, Property >::type::type GetPropType
get the type alias defined in the property (equivalent to old macro GET_PROP_TYPE(....
Definition: propertysystem.hh:233
void exportSystem(const IstlMatrix &jacobian, const GlobalEqVector &residual, const bool export_sparsity, const char *tag, const char *path="export")
Export blocks-sparse linear system.
Definition: exportSystem.hpp:42
Storage cells
Definition: tpfalinearizerstructs.hh:39
Definition: linearizationtype.hh:34
Definition: tpfalinearizer.hh:99
static constexpr bool value
Definition: tpfalinearizer.hh:99
GPU parameter setup class for TpfaLinearizer.