Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Set LU matrices to zero when jacobian is a zero element #666

Merged
merged 81 commits into from
Sep 24, 2024
Merged
Show file tree
Hide file tree
Changes from 14 commits
Commits
Show all changes
81 commits
Select commit Hold shift + click to select a range
1fc8931
pushing
K20shores Sep 20, 2024
a7c6c1f
pushing fix
K20shores Sep 20, 2024
8baafb2
removing unneccesary logic check
K20shores Sep 20, 2024
d930f73
adding cuda stuff
K20shores Sep 20, 2024
7a0819d
lowering tolerance
K20shores Sep 20, 2024
56caa4c
lowering tolerance
K20shores Sep 20, 2024
259f3c6
Merge branch 'zero_matrices' of https://github.com/NCAR/micm into zer…
K20shores Sep 20, 2024
3fb69a1
modified jit ludecomp
K20shores Sep 20, 2024
fec303e
raising tolerance
K20shores Sep 23, 2024
65f06e6
testing jit and cuda properly
K20shores Sep 23, 2024
31eea12
raising tolerance
K20shores Sep 23, 2024
08d4fbf
raising again
K20shores Sep 23, 2024
167fbc5
again
K20shores Sep 23, 2024
8b929ab
raising again
K20shores Sep 23, 2024
96eda4e
lowering tolerance
K20shores Sep 23, 2024
6f486db
adding prints to matrices
K20shores Sep 23, 2024
27b3b08
copy LU to host
K20shores Sep 23, 2024
bb304ce
printing A
K20shores Sep 23, 2024
11bcedc
sparsity
K20shores Sep 23, 2024
f46ed3e
bernoulli again
K20shores Sep 23, 2024
22ae288
manual engine
K20shores Sep 23, 2024
4b3c1a2
double
K20shores Sep 23, 2024
0584f92
thing
K20shores Sep 23, 2024
8a8a2ac
printing values
K20shores Sep 23, 2024
faf90ab
larger matrix
K20shores Sep 23, 2024
6fa8d3f
2 cells
K20shores Sep 23, 2024
2eb1345
now
K20shores Sep 23, 2024
fe508de
dense
K20shores Sep 23, 2024
0dc7855
20
K20shores Sep 23, 2024
a97e189
4000
K20shores Sep 23, 2024
d05be0e
things
K20shores Sep 23, 2024
a72f473
uncomment
K20shores Sep 23, 2024
ee48a96
uncomment
K20shores Sep 23, 2024
40a3714
print
K20shores Sep 23, 2024
16c2918
9
K20shores Sep 23, 2024
523eec7
8
K20shores Sep 23, 2024
f2af798
6
K20shores Sep 23, 2024
e3d4c8a
5
K20shores Sep 23, 2024
29c3f78
2e-6
K20shores Sep 23, 2024
302e66c
lu decomp
K20shores Sep 23, 2024
b4ec9b3
10
K20shores Sep 23, 2024
169b1fb
8
K20shores Sep 23, 2024
c55569a
0
K20shores Sep 23, 2024
7b763fe
comment
K20shores Sep 23, 2024
6c70384
checking
K20shores Sep 23, 2024
2437bce
uncomment
K20shores Sep 23, 2024
27fb806
7
K20shores Sep 23, 2024
0a713a3
1
K20shores Sep 23, 2024
c384b4e
10
K20shores Sep 23, 2024
fefc2ab
print
K20shores Sep 23, 2024
e56c553
print
K20shores Sep 23, 2024
3cf04fb
again
K20shores Sep 23, 2024
fe4082c
100
K20shores Sep 23, 2024
7c77437
data check
K20shores Sep 23, 2024
3f99c5c
remove check results
K20shores Sep 23, 2024
fe95eee
13
K20shores Sep 23, 2024
a371a19
16
K20shores Sep 23, 2024
c05ffef
eq
K20shores Sep 23, 2024
1a567d8
equal
K20shores Sep 23, 2024
cd278f6
uncomment
K20shores Sep 23, 2024
bd6fd8c
1 block
K20shores Sep 23, 2024
ff76e36
5
K20shores Sep 23, 2024
d6821cd
print
K20shores Sep 23, 2024
eaaa729
1
K20shores Sep 23, 2024
7ac2460
testing LU decomp specifically
K20shores Sep 24, 2024
844b32c
trying to correct cuda test
K20shores Sep 24, 2024
e660c01
lowering
K20shores Sep 24, 2024
4506588
lowering tolerance
K20shores Sep 24, 2024
09df2df
lowering again
K20shores Sep 24, 2024
a679aa4
thing
K20shores Sep 24, 2024
e1a1aad
variable
K20shores Sep 24, 2024
7c90c71
all tests pass on derecho
K20shores Sep 24, 2024
3ed3983
setting values to zero for lu decomp
K20shores Sep 24, 2024
f204f03
defaulting LU to 0 instead of 1e-30
K20shores Sep 24, 2024
8f69bae
copying block values to other blocks
K20shores Sep 24, 2024
4f741be
removing small value initialization
K20shores Sep 24, 2024
09f689f
correcting version copyright
K20shores Sep 24, 2024
a398305
using absolute error
K20shores Sep 24, 2024
0c4bce9
Merge branch 'zero_matrices' of https://github.com/NCAR/micm into zer…
K20shores Sep 24, 2024
7fc8efa
making index once
K20shores Sep 24, 2024
a82bd2d
camel case
K20shores Sep 24, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 18 additions & 0 deletions include/micm/jit/solver/jit_lu_decomposition.inl
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,15 @@ namespace micm
func.SetArrayElement(func.arguments_[2], U_ptr_index, JitType::Double, A_val);
func.EndLoop(loop);
}
else {
auto loop = func.StartLoop("Uik_eq_zero_loop", 0, L);
llvm::Value *zero_val = llvm::ConstantFP::get(*(func.context_), llvm::APFloat(0.0));
llvm::Value *iUf = llvm::ConstantInt::get(*(func.context_), llvm::APInt(64, uik_nkj->first));
llvm::Value *U_ptr_index[1];
U_ptr_index[0] = func.builder_->CreateNSWAdd(loop.index_, iUf);
func.SetArrayElement(func.arguments_[2], U_ptr_index, JitType::Double, zero_val);
func.EndLoop(loop);
}
for (std::size_t ikj = 0; ikj < uik_nkj->second; ++ikj)
{
auto loop = func.StartLoop("Uik_seq_Lij_Ujk_loop", 0, L);
Expand Down Expand Up @@ -137,6 +146,15 @@ namespace micm
func.SetArrayElement(func.arguments_[1], L_ptr_index, JitType::Double, A_val);
func.EndLoop(loop);
}
else {
auto loop = func.StartLoop("Lki_eq_zero_loop", 0, L);
llvm::Value *zero_val = llvm::ConstantFP::get(*(func.context_), llvm::APFloat(0.0));
llvm::Value *iLf = llvm::ConstantInt::get(*(func.context_), llvm::APInt(64, lki_nkj->first));
llvm::Value *L_ptr_index[1];
L_ptr_index[0] = func.builder_->CreateNSWAdd(loop.index_, iLf);
func.SetArrayElement(func.arguments_[1], L_ptr_index, JitType::Double, zero_val);
func.EndLoop(loop);
}
for (std::size_t ikj = 0; ikj < lki_nkj->second; ++ikj)
{
auto loop = func.StartLoop("Lki_seq_Lkj_Uji_loop", 0, L);
Expand Down
7 changes: 7 additions & 0 deletions include/micm/solver/lu_decomposition.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,13 @@ namespace micm
/// For the sparse matrix algorithm, the indices of non-zero terms are stored in
/// several arrays during construction. These arrays are iterated through during
/// calls to Decompose to do the actual decomposition.
/// Our LU Decomposition only assigns the values of the jacobian to the LU matrices
/// when the *jacobian* is nonzero. However, the sparsity pattern of the jacobian doesn't
/// necessarily match that of the LU matrices. There can be more nonzero elements in the LU matrices
/// than in the jacobian. When this happens, we still need to assign the value of the jacobian matrix
/// to the LU matrix. This value is implicitly zero when the sparsity pattern differs. The Fill values
/// here do this implicit assignment
/// More detail in this issue: https://github.com/NCAR/micm/issues/625
class LuDecomposition
{
protected:
Expand Down
18 changes: 16 additions & 2 deletions include/micm/solver/lu_decomposition.inl
Original file line number Diff line number Diff line change
Expand Up @@ -194,8 +194,12 @@ namespace micm
// Upper trianglur matrix
for (std::size_t iU = 0; iU < inLU.second; ++iU)
{
if (*(do_aik++))
if (*(do_aik++)){
U_vector[uik_nkj->first] = A_vector[*(aik++)];
}
else {
U_vector[uik_nkj->first] = 0;
}
for (std::size_t ikj = 0; ikj < uik_nkj->second; ++ikj)
{
U_vector[uik_nkj->first] -= L_vector[lij_ujk->first] * U_vector[lij_ujk->second];
Expand All @@ -207,8 +211,12 @@ namespace micm
L_vector[(lki_nkj++)->first] = 1.0;
for (std::size_t iL = 0; iL < inLU.first; ++iL)
{
if (*(do_aki++))
if (*(do_aki++)){
L_vector[lki_nkj->first] = A_vector[*(aki++)];
}
else {
L_vector[lki_nkj->first] = 0;
}
for (std::size_t ikj = 0; ikj < lki_nkj->second; ++ikj)
{
L_vector[lki_nkj->first] -= L_vector[lkj_uji->first] * U_vector[lkj_uji->second];
Expand Down Expand Up @@ -275,6 +283,9 @@ namespace micm
std::copy(A_vector + *aik, A_vector + *aik + n_cells, U_vector + uik_nkj_first);
++aik;
}
else {
std::fill(U_vector + uik_nkj_first, U_vector + uik_nkj_first + n_cells, 0);
}
for (std::size_t ikj = 0; ikj < uik_nkj->second; ++ikj)
{
const std::size_t lij_ujk_first = lij_ujk->first;
Expand All @@ -297,6 +308,9 @@ namespace micm
std::copy(A_vector + *aki, A_vector + *aki + n_cells, L_vector + lki_nkj_first);
++aki;
}
else {
std::fill(L_vector + lki_nkj_first, L_vector + lki_nkj_first + n_cells, 0);
}
for (std::size_t ikj = 0; ikj < lki_nkj->second; ++ikj)
{
const std::size_t lkj_uji_first = lkj_uji->first;
Expand Down
3 changes: 2 additions & 1 deletion include/micm/solver/rosenbrock.inl
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ namespace micm
const double h_max = parameters_.h_max_ == 0.0 ? time_step : std::min(time_step, parameters_.h_max_);
const double h_start =
parameters_.h_start_ == 0.0 ? std::max(parameters_.h_min_, DELTA_MIN) : std::min(h_max, parameters_.h_start_);

SolverStats stats;

double present_time = 0.0;
Expand Down Expand Up @@ -243,6 +243,7 @@ namespace micm
{
double alpha = 1 / (H * gamma);
static_cast<const Derived*>(this)->AlphaMinusJacobian(state.jacobian_, alpha);

linear_solver_.Factor(state.jacobian_, state.lower_matrix_, state.upper_matrix_, singular);
stats.decompositions_ += 1;

Expand Down
2 changes: 0 additions & 2 deletions include/micm/version.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
// Copyright (C) 2023-2024 National Center for Atmospheric Research
// SPDX-License-Identifier: Apache-2.0
// clang-format off
#pragma once

Expand Down
9 changes: 9 additions & 0 deletions src/solver/lu_decomposition.cu
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,10 @@ namespace micm
size_t A_idx = d_aik[aik_offset++] + tid;
d_U[U_idx] = d_A[A_idx];
}
else {
size_t U_idx = d_uik_nkj[uik_nkj_offset].first + tid;
K20shores marked this conversation as resolved.
Show resolved Hide resolved
d_U[U_idx] = 0;
}

for (size_t ikj = 0; ikj < d_uik_nkj[uik_nkj_offset].second; ++ikj)
{
Expand All @@ -85,6 +89,11 @@ namespace micm
size_t A_idx = d_aki[aki_offset++] + tid;
d_L[L_idx] = d_A[A_idx];
}
else {
size_t L_idx = d_lki_nkj[lki_nkj_offset].first + tid;
d_L[L_idx] = 0;
K20shores marked this conversation as resolved.
Show resolved Hide resolved
}

for (size_t ikj = 0; ikj < d_lki_nkj[lki_nkj_offset].second; ++ikj)
{
size_t L_idx_1 = d_lki_nkj[lki_nkj_offset].first + tid;
K20shores marked this conversation as resolved.
Show resolved Hide resolved
Expand Down
2 changes: 1 addition & 1 deletion test/unit/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
if(MICM_ENABLE_JSON)
if(MICM_ENABLE_CONFIG_READER)
add_subdirectory(configure)
endif()
if(MICM_ENABLE_CUDA)
Expand Down
4 changes: 2 additions & 2 deletions test/unit/configure/process/test_user_defined_config.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -46,9 +46,9 @@ TEST(UserDefinedConfig, ParseConfig)
// first reaction
{
EXPECT_EQ(process_vector[0].reactants_.size(), 3);
EXPECT_EQ(process_vector[0].reactants_[0].name_, "bar");
EXPECT_EQ(process_vector[0].reactants_[0].name_, "foo");
EXPECT_EQ(process_vector[0].reactants_[1].name_, "bar");
EXPECT_EQ(process_vector[0].reactants_[2].name_, "foo");
EXPECT_EQ(process_vector[0].reactants_[2].name_, "bar");
EXPECT_EQ(process_vector[0].products_.size(), 2);
EXPECT_EQ(process_vector[0].products_[0].first.name_, "baz");
EXPECT_EQ(process_vector[0].products_[0].second, 1.4);
Expand Down
14 changes: 14 additions & 0 deletions test/unit/cuda/solver/test_cuda_linear_solver.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -138,3 +138,17 @@ TEST(CudaLinearSolver, RandomMatrixVectorOrderingForGPU)
{
verify_gpu_against_cpu();
}

TEST(CudaLinearSolver, AgnosticToInitialValue)
{
double initial_values[5] = { -INFINITY, -1.0, 0.0, 1.0, INFINITY };
for(auto initial_value : initial_values)
{
testExtremeInitialValue<Group1CudaDenseMatrix, Group1CudaSparseMatrix, micm::CudaLinearSolver<Group1CudaSparseMatrix>>(1, initial_value);
testExtremeInitialValue<Group20CudaDenseMatrix, Group20CudaSparseMatrix, micm::CudaLinearSolver<Group20CudaSparseMatrix>>(20, initial_value);
testExtremeInitialValue<Group300CudaDenseMatrix, Group300CudaSparseMatrix, micm::CudaLinearSolver<Group300CudaSparseMatrix>>(
300, initial_value);
testExtremeInitialValue<Group4000CudaDenseMatrix, Group4000CudaSparseMatrix, micm::CudaLinearSolver<Group4000CudaSparseMatrix>>(
4000, initial_value);
}
}
8 changes: 8 additions & 0 deletions test/unit/jit/solver/test_jit_linear_solver.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -42,4 +42,12 @@ TEST(JitLinearSolver, DiagonalMatrixVectorOrdering)
testDiagonalMatrix<Group2VectorMatrix, Group2SparseVectorMatrix, micm::JitLinearSolver<2, Group2SparseVectorMatrix>>(2);
testDiagonalMatrix<Group3VectorMatrix, Group3SparseVectorMatrix, micm::JitLinearSolver<3, Group3SparseVectorMatrix>>(3);
testDiagonalMatrix<Group4VectorMatrix, Group4SparseVectorMatrix, micm::JitLinearSolver<4, Group4SparseVectorMatrix>>(4);
}

TEST(JitLinearSolver, AgnosticToInitialValue)
{
double initial_values[5] = { -INFINITY, INFINITY };
for(auto initial_value : initial_values) {
testExtremeInitialValue<Group1VectorMatrix, Group1SparseVectorMatrix, micm::JitLinearSolver<1, Group1SparseVectorMatrix>>(1, initial_value);
}
}
18 changes: 18 additions & 0 deletions test/unit/solver/test_linear_solver.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,13 @@ TEST(LinearSolver, DiagonalMarkowitzReorder)
testMarkowitzReordering<micm::Matrix<int>, SparseMatrixTest>();
}

TEST(LinearSolver, StandardOrderingAgnosticToInitialValue)
{
double initial_values[5] = { -INFINITY, -1.0, 0.0, 1.0, INFINITY };
for(auto initial_value : initial_values)
testExtremeInitialValue<DenseMatrixTest, SparseMatrixTest, micm::LinearSolver<SparseMatrixTest>>(5, initial_value);
}

using Group1VectorMatrix = micm::VectorMatrix<FloatingPointType, 1>;
using Group2VectorMatrix = micm::VectorMatrix<FloatingPointType, 2>;
using Group3VectorMatrix = micm::VectorMatrix<FloatingPointType, 3>;
Expand All @@ -61,6 +68,17 @@ TEST(LinearSolver, RandomMatrixVectorOrdering)
testRandomMatrix<Group4VectorMatrix, Group4SparseVectorMatrix, micm::LinearSolver<Group4SparseVectorMatrix>>(5);
}

TEST(LinearSolver, VectorOrderingAgnosticToInitialValue)
{
double initial_values[5] = { -INFINITY, -1.0, 0.0, 1.0, INFINITY };
for(auto initial_value : initial_values) {
testExtremeInitialValue<Group1VectorMatrix, Group1SparseVectorMatrix, micm::LinearSolver<Group1SparseVectorMatrix>>(5, initial_value);
testExtremeInitialValue<Group2VectorMatrix, Group2SparseVectorMatrix, micm::LinearSolver<Group2SparseVectorMatrix>>(5, initial_value);
testExtremeInitialValue<Group3VectorMatrix, Group3SparseVectorMatrix, micm::LinearSolver<Group3SparseVectorMatrix>>(5, initial_value);
testExtremeInitialValue<Group4VectorMatrix, Group4SparseVectorMatrix, micm::LinearSolver<Group4SparseVectorMatrix>>(5, initial_value);
}
}

TEST(LinearSolver, DiagonalMatrixVectorOrdering)
{
testDiagonalMatrix<Group1VectorMatrix, Group1SparseVectorMatrix, micm::LinearSolver<Group1SparseVectorMatrix>>(5);
Expand Down
62 changes: 60 additions & 2 deletions test/unit/solver/test_linear_solver_policy.hpp
K20shores marked this conversation as resolved.
Show resolved Hide resolved
Original file line number Diff line number Diff line change
Expand Up @@ -201,7 +201,65 @@ void testRandomMatrix(std::size_t number_of_blocks)
CopyToHostDense<MatrixPolicy>(x);

check_results<FloatingPointType, MatrixPolicy, SparseMatrixPolicy>(
A, b, x, [&](const FloatingPointType a, const FloatingPointType b) -> void { EXPECT_NEAR(a, b, 1.0e-5); });
A, b, x, [&](const FloatingPointType a, const FloatingPointType b) -> void { EXPECT_NEAR(a, b, 1.0e-6); });
}

template<class MatrixPolicy, class SparseMatrixPolicy, class LinearSolverPolicy>
void testExtremeInitialValue(std::size_t number_of_blocks, double initial_value)
{
using FloatingPointType = typename MatrixPolicy::value_type;

const unsigned int seed = 12345;
std::default_random_engine generator(seed);

auto gen_bool = std::bind(std::uniform_int_distribution<>(0, 1), generator);
auto get_double = std::bind(std::lognormal_distribution(-2.0, 2.0), generator);
const size_t size = 30;

auto builder = SparseMatrixPolicy::Create(size).SetNumberOfBlocks(number_of_blocks).InitialValue(1e-30);
for (std::size_t i = 0; i < size; ++i)
for (std::size_t j = 0; j < size; ++j)
if (i == j || gen_bool())
builder = builder.WithElement(i, j);

SparseMatrixPolicy A(builder);
MatrixPolicy b(number_of_blocks, size, 0.0);
MatrixPolicy x(number_of_blocks, size, 0.0);

for (std::size_t i = 0; i < size; ++i)
for (std::size_t j = 0; j < size; ++j)
if (!A.IsZero(i, j))
for (std::size_t i_block = 0; i_block < number_of_blocks; ++i_block)
A[i_block][i][j] = get_double();

for (std::size_t i = 0; i < size; ++i)
for (std::size_t i_block = 0; i_block < number_of_blocks; ++i_block)
b[i_block][i] = get_double();

x = b;

// Only copy the data to the device when it is a CudaMatrix
CopyToDeviceSparse<SparseMatrixPolicy>(A);
CopyToDeviceDense<MatrixPolicy>(x);

LinearSolverPolicy solver = LinearSolverPolicy(A, initial_value);
auto lu = micm::LuDecomposition::GetLUMatrices<SparseMatrixPolicy>(A, initial_value);
auto lower_matrix = std::move(lu.first);
auto upper_matrix = std::move(lu.second);
bool is_singular = false;

// Only copy the data to the device when it is a CudaMatrix
CopyToDeviceSparse<SparseMatrixPolicy>(lower_matrix);
CopyToDeviceSparse<SparseMatrixPolicy>(upper_matrix);

solver.Factor(A, lower_matrix, upper_matrix, is_singular);
solver.template Solve<MatrixPolicy>(x, lower_matrix, upper_matrix);

// Only copy the data to the host when it is a CudaMatrix
CopyToHostDense<MatrixPolicy>(x);

check_results<FloatingPointType, MatrixPolicy, SparseMatrixPolicy>(
A, b, x, [&](const FloatingPointType a, const FloatingPointType b) -> void { EXPECT_NEAR(a, b, 1.0e-2); });
}

template<class MatrixPolicy, class SparseMatrixPolicy, class LinearSolverPolicy>
Expand Down Expand Up @@ -298,4 +356,4 @@ void testMarkowitzReordering()
EXPECT_GT(
orig_LU.first.RowIdsVector().size() + orig_LU.second.RowIdsVector().size(),
reordered_LU.first.RowIdsVector().size() + reordered_LU.second.RowIdsVector().size());
}
}
Loading