NCAR · K20shores · Sep 24, 2024 · Sep 20, 2024 · Sep 20, 2024 · Sep 20, 2024
diff --git a/include/micm/jit/solver/jit_lu_decomposition.inl b/include/micm/jit/solver/jit_lu_decomposition.inl
@@ -89,6 +89,15 @@ namespace micm
           func.SetArrayElement(func.arguments_[2], U_ptr_index, JitType::Double, A_val);
           func.EndLoop(loop);
         }
+        else {
+          auto loop = func.StartLoop("Uik_eq_zero_loop", 0, L);
+          llvm::Value *zero_val = llvm::ConstantFP::get(*(func.context_), llvm::APFloat(0.0));
+          llvm::Value *iUf = llvm::ConstantInt::get(*(func.context_), llvm::APInt(64, uik_nkj->first));
+          llvm::Value *U_ptr_index[1];
+          U_ptr_index[0] = func.builder_->CreateNSWAdd(loop.index_, iUf);
+          func.SetArrayElement(func.arguments_[2], U_ptr_index, JitType::Double, zero_val);
+          func.EndLoop(loop);
+        }
         for (std::size_t ikj = 0; ikj < uik_nkj->second; ++ikj)
         {
           auto loop = func.StartLoop("Uik_seq_Lij_Ujk_loop", 0, L);
@@ -137,6 +146,15 @@ namespace micm
           func.SetArrayElement(func.arguments_[1], L_ptr_index, JitType::Double, A_val);
           func.EndLoop(loop);
         }
+        else {
+          auto loop = func.StartLoop("Lki_eq_zero_loop", 0, L);
+          llvm::Value *zero_val = llvm::ConstantFP::get(*(func.context_), llvm::APFloat(0.0));
+          llvm::Value *iLf = llvm::ConstantInt::get(*(func.context_), llvm::APInt(64, lki_nkj->first));
+          llvm::Value *L_ptr_index[1];
+          L_ptr_index[0] = func.builder_->CreateNSWAdd(loop.index_, iLf);
+          func.SetArrayElement(func.arguments_[1], L_ptr_index, JitType::Double, zero_val);
+          func.EndLoop(loop);
+        }
         for (std::size_t ikj = 0; ikj < lki_nkj->second; ++ikj)
         {
           auto loop = func.StartLoop("Lki_seq_Lkj_Uji_loop", 0, L);

diff --git a/include/micm/solver/lu_decomposition.hpp b/include/micm/solver/lu_decomposition.hpp
@@ -43,6 +43,13 @@ namespace micm
   /// For the sparse matrix algorithm, the indices of non-zero terms are stored in
   /// several arrays during construction. These arrays are iterated through during
   /// calls to Decompose to do the actual decomposition.
+  /// Our LU Decomposition only assigns the values of the jacobian to the LU matrices
+  /// when the *jacobian* is nonzero. However, the sparsity pattern of the jacobian doesn't
+  /// necessarily match that of the LU matrices. There can be more nonzero elements in the LU matrices
+  /// than in the jacobian. When this happens, we still need to assign the value of the jacobian matrix
+  /// to the LU matrix. This value is implicitly zero when the sparsity pattern differs. The Fill values
+  /// here do this implicit assignment
+  /// More detail in this issue: https://github.com/NCAR/micm/issues/625
   class LuDecomposition
   {
    protected:

diff --git a/include/micm/solver/lu_decomposition.inl b/include/micm/solver/lu_decomposition.inl
@@ -194,8 +194,12 @@ namespace micm
         // Upper trianglur matrix
         for (std::size_t iU = 0; iU < inLU.second; ++iU)
         {
-          if (*(do_aik++))
+          if (*(do_aik++)){
             U_vector[uik_nkj->first] = A_vector[*(aik++)];
+          }
+          else {
+            U_vector[uik_nkj->first] = 0;
+          }
           for (std::size_t ikj = 0; ikj < uik_nkj->second; ++ikj)
           {
             U_vector[uik_nkj->first] -= L_vector[lij_ujk->first] * U_vector[lij_ujk->second];
@@ -207,8 +211,12 @@ namespace micm
         L_vector[(lki_nkj++)->first] = 1.0;
         for (std::size_t iL = 0; iL < inLU.first; ++iL)
         {
-          if (*(do_aki++))
+          if (*(do_aki++)){
             L_vector[lki_nkj->first] = A_vector[*(aki++)];
+          }
+          else {
+            L_vector[lki_nkj->first] = 0;
+          }
           for (std::size_t ikj = 0; ikj < lki_nkj->second; ++ikj)
           {
             L_vector[lki_nkj->first] -= L_vector[lkj_uji->first] * U_vector[lkj_uji->second];
@@ -275,6 +283,9 @@ namespace micm
             std::copy(A_vector + *aik, A_vector + *aik + n_cells, U_vector + uik_nkj_first);
             ++aik;
           }
+          else {
+            std::fill(U_vector + uik_nkj_first, U_vector + uik_nkj_first + n_cells, 0);
+          }
           for (std::size_t ikj = 0; ikj < uik_nkj->second; ++ikj)
           {
             const std::size_t lij_ujk_first = lij_ujk->first;
@@ -297,6 +308,9 @@ namespace micm
             std::copy(A_vector + *aki, A_vector + *aki + n_cells, L_vector + lki_nkj_first);
             ++aki;
           }
+          else {
+            std::fill(L_vector + lki_nkj_first, L_vector + lki_nkj_first + n_cells, 0);
+          }
           for (std::size_t ikj = 0; ikj < lki_nkj->second; ++ikj)
           {
             const std::size_t lkj_uji_first = lkj_uji->first;

diff --git a/include/micm/solver/rosenbrock.inl b/include/micm/solver/rosenbrock.inl
@@ -23,7 +23,7 @@ namespace micm
     const double h_max = parameters_.h_max_ == 0.0 ? time_step : std::min(time_step, parameters_.h_max_);
     const double h_start =
         parameters_.h_start_ == 0.0 ? std::max(parameters_.h_min_, DELTA_MIN) : std::min(h_max, parameters_.h_start_);
-
+    
     SolverStats stats;
 
     double present_time = 0.0;
@@ -243,6 +243,7 @@ namespace micm
     {
       double alpha = 1 / (H * gamma);
       static_cast<const Derived*>(this)->AlphaMinusJacobian(state.jacobian_, alpha);
+
       linear_solver_.Factor(state.jacobian_, state.lower_matrix_, state.upper_matrix_, singular);
       stats.decompositions_ += 1;
 

diff --git a/include/micm/version.hpp b/include/micm/version.hpp
@@ -1,5 +1,3 @@
-// Copyright (C) 2023-2024 National Center for Atmospheric Research
-// SPDX-License-Identifier: Apache-2.0
 // clang-format off
 #pragma once
 

diff --git a/src/solver/lu_decomposition.cu b/src/solver/lu_decomposition.cu
@@ -62,6 +62,10 @@ namespace micm
               size_t A_idx = d_aik[aik_offset++] + tid;
               d_U[U_idx] = d_A[A_idx];
             }
+            else {
+              size_t U_idx = d_uik_nkj[uik_nkj_offset].first + tid;
+              d_U[U_idx] = 0;
+            }
 
             for (size_t ikj = 0; ikj < d_uik_nkj[uik_nkj_offset].second; ++ikj)
             {
@@ -85,6 +89,11 @@ namespace micm
               size_t A_idx = d_aki[aki_offset++] + tid;
               d_L[L_idx] = d_A[A_idx];
             }
+            else {
+              size_t L_idx = d_lki_nkj[lki_nkj_offset].first + tid;
+              d_L[L_idx] = 0;
+            }
+
             for (size_t ikj = 0; ikj < d_lki_nkj[lki_nkj_offset].second; ++ikj)
             {
               size_t L_idx_1 = d_lki_nkj[lki_nkj_offset].first + tid;

diff --git a/test/unit/CMakeLists.txt b/test/unit/CMakeLists.txt
@@ -1,4 +1,4 @@
-if(MICM_ENABLE_JSON)
+if(MICM_ENABLE_CONFIG_READER)
   add_subdirectory(configure)
 endif()
 if(MICM_ENABLE_CUDA)

diff --git a/test/unit/configure/process/test_user_defined_config.cpp b/test/unit/configure/process/test_user_defined_config.cpp
@@ -46,9 +46,9 @@ TEST(UserDefinedConfig, ParseConfig)
   // first reaction
   {
     EXPECT_EQ(process_vector[0].reactants_.size(), 3);
-    EXPECT_EQ(process_vector[0].reactants_[0].name_, "bar");
+    EXPECT_EQ(process_vector[0].reactants_[0].name_, "foo");
     EXPECT_EQ(process_vector[0].reactants_[1].name_, "bar");
-    EXPECT_EQ(process_vector[0].reactants_[2].name_, "foo");
+    EXPECT_EQ(process_vector[0].reactants_[2].name_, "bar");
     EXPECT_EQ(process_vector[0].products_.size(), 2);
     EXPECT_EQ(process_vector[0].products_[0].first.name_, "baz");
     EXPECT_EQ(process_vector[0].products_[0].second, 1.4);

diff --git a/test/unit/cuda/solver/test_cuda_linear_solver.cpp b/test/unit/cuda/solver/test_cuda_linear_solver.cpp
@@ -138,3 +138,17 @@ TEST(CudaLinearSolver, RandomMatrixVectorOrderingForGPU)
 {
   verify_gpu_against_cpu();
 }
+
+TEST(CudaLinearSolver, AgnosticToInitialValue)
+{
+  double initial_values[5] = { -INFINITY, -1.0, 0.0, 1.0, INFINITY };
+  for(auto initial_value : initial_values)
+  {
+    testExtremeInitialValue<Group1CudaDenseMatrix, Group1CudaSparseMatrix, micm::CudaLinearSolver<Group1CudaSparseMatrix>>(1, initial_value);
+    testExtremeInitialValue<Group20CudaDenseMatrix, Group20CudaSparseMatrix, micm::CudaLinearSolver<Group20CudaSparseMatrix>>(20, initial_value);
+    testExtremeInitialValue<Group300CudaDenseMatrix, Group300CudaSparseMatrix, micm::CudaLinearSolver<Group300CudaSparseMatrix>>(
+        300, initial_value);
+    testExtremeInitialValue<Group4000CudaDenseMatrix, Group4000CudaSparseMatrix, micm::CudaLinearSolver<Group4000CudaSparseMatrix>>(
+        4000, initial_value);
+  }
+}
diff --git a/test/unit/jit/solver/test_jit_linear_solver.cpp b/test/unit/jit/solver/test_jit_linear_solver.cpp
@@ -42,4 +42,12 @@ TEST(JitLinearSolver, DiagonalMatrixVectorOrdering)
   testDiagonalMatrix<Group2VectorMatrix, Group2SparseVectorMatrix, micm::JitLinearSolver<2, Group2SparseVectorMatrix>>(2);
   testDiagonalMatrix<Group3VectorMatrix, Group3SparseVectorMatrix, micm::JitLinearSolver<3, Group3SparseVectorMatrix>>(3);
   testDiagonalMatrix<Group4VectorMatrix, Group4SparseVectorMatrix, micm::JitLinearSolver<4, Group4SparseVectorMatrix>>(4);
+}
+
+TEST(JitLinearSolver, AgnosticToInitialValue)
+{
+  double initial_values[5] = { -INFINITY, INFINITY };
+  for(auto initial_value : initial_values) {
+    testExtremeInitialValue<Group1VectorMatrix, Group1SparseVectorMatrix, micm::JitLinearSolver<1, Group1SparseVectorMatrix>>(1, initial_value);
+  }
 }
diff --git a/test/unit/solver/test_linear_solver.cpp b/test/unit/solver/test_linear_solver.cpp
@@ -35,6 +35,13 @@ TEST(LinearSolver, DiagonalMarkowitzReorder)
   testMarkowitzReordering<micm::Matrix<int>, SparseMatrixTest>();
 }
 
+TEST(LinearSolver, StandardOrderingAgnosticToInitialValue)
+{
+  double initial_values[5] = { -INFINITY, -1.0, 0.0, 1.0, INFINITY };
+  for(auto initial_value : initial_values)
+    testExtremeInitialValue<DenseMatrixTest, SparseMatrixTest, micm::LinearSolver<SparseMatrixTest>>(5, initial_value);
+}
+
 using Group1VectorMatrix = micm::VectorMatrix<FloatingPointType, 1>;
 using Group2VectorMatrix = micm::VectorMatrix<FloatingPointType, 2>;
 using Group3VectorMatrix = micm::VectorMatrix<FloatingPointType, 3>;
@@ -61,6 +68,17 @@ TEST(LinearSolver, RandomMatrixVectorOrdering)
   testRandomMatrix<Group4VectorMatrix, Group4SparseVectorMatrix, micm::LinearSolver<Group4SparseVectorMatrix>>(5);
 }
 
+TEST(LinearSolver, VectorOrderingAgnosticToInitialValue)
+{
+  double initial_values[5] = { -INFINITY, -1.0, 0.0, 1.0, INFINITY };
+  for(auto initial_value : initial_values) {
+    testExtremeInitialValue<Group1VectorMatrix, Group1SparseVectorMatrix, micm::LinearSolver<Group1SparseVectorMatrix>>(5, initial_value);
+    testExtremeInitialValue<Group2VectorMatrix, Group2SparseVectorMatrix, micm::LinearSolver<Group2SparseVectorMatrix>>(5, initial_value);
+    testExtremeInitialValue<Group3VectorMatrix, Group3SparseVectorMatrix, micm::LinearSolver<Group3SparseVectorMatrix>>(5, initial_value);
+    testExtremeInitialValue<Group4VectorMatrix, Group4SparseVectorMatrix, micm::LinearSolver<Group4SparseVectorMatrix>>(5, initial_value);
+  }
+}
+
 TEST(LinearSolver, DiagonalMatrixVectorOrdering)
 {
   testDiagonalMatrix<Group1VectorMatrix, Group1SparseVectorMatrix, micm::LinearSolver<Group1SparseVectorMatrix>>(5);

diff --git a/test/unit/solver/test_linear_solver_policy.hpp b/test/unit/solver/test_linear_solver_policy.hpp
@@ -201,7 +201,65 @@ void testRandomMatrix(std::size_t number_of_blocks)
   CopyToHostDense<MatrixPolicy>(x);
 
   check_results<FloatingPointType, MatrixPolicy, SparseMatrixPolicy>(
-      A, b, x, [&](const FloatingPointType a, const FloatingPointType b) -> void { EXPECT_NEAR(a, b, 1.0e-5); });
+      A, b, x, [&](const FloatingPointType a, const FloatingPointType b) -> void { EXPECT_NEAR(a, b, 1.0e-6); });
+}
+
+template<class MatrixPolicy, class SparseMatrixPolicy, class LinearSolverPolicy>
+void testExtremeInitialValue(std::size_t number_of_blocks, double initial_value)
+{
+  using FloatingPointType = typename MatrixPolicy::value_type;
+
+  const unsigned int seed = 12345;
+  std::default_random_engine generator(seed);
+
+  auto gen_bool = std::bind(std::uniform_int_distribution<>(0, 1), generator);
+  auto get_double = std::bind(std::lognormal_distribution(-2.0, 2.0), generator);
+  const size_t size = 30;
+
+  auto builder = SparseMatrixPolicy::Create(size).SetNumberOfBlocks(number_of_blocks).InitialValue(1e-30);
+  for (std::size_t i = 0; i < size; ++i)
+    for (std::size_t j = 0; j < size; ++j)
+      if (i == j || gen_bool())
+        builder = builder.WithElement(i, j);
+
+  SparseMatrixPolicy A(builder);
+  MatrixPolicy b(number_of_blocks, size, 0.0);
+  MatrixPolicy x(number_of_blocks, size, 0.0);
+
+  for (std::size_t i = 0; i < size; ++i)
+    for (std::size_t j = 0; j < size; ++j)
+      if (!A.IsZero(i, j))
+        for (std::size_t i_block = 0; i_block < number_of_blocks; ++i_block)
+          A[i_block][i][j] = get_double();
+
+  for (std::size_t i = 0; i < size; ++i)
+    for (std::size_t i_block = 0; i_block < number_of_blocks; ++i_block)
+      b[i_block][i] = get_double();
+
+  x = b;
+
+  // Only copy the data to the device when it is a CudaMatrix
+  CopyToDeviceSparse<SparseMatrixPolicy>(A);
+  CopyToDeviceDense<MatrixPolicy>(x);
+
+  LinearSolverPolicy solver = LinearSolverPolicy(A, initial_value);
+  auto lu = micm::LuDecomposition::GetLUMatrices<SparseMatrixPolicy>(A, initial_value);
+  auto lower_matrix = std::move(lu.first);
+  auto upper_matrix = std::move(lu.second);
+  bool is_singular = false;
+
+  // Only copy the data to the device when it is a CudaMatrix
+  CopyToDeviceSparse<SparseMatrixPolicy>(lower_matrix);
+  CopyToDeviceSparse<SparseMatrixPolicy>(upper_matrix);
+
+  solver.Factor(A, lower_matrix, upper_matrix, is_singular);
+  solver.template Solve<MatrixPolicy>(x, lower_matrix, upper_matrix);
+
+  // Only copy the data to the host when it is a CudaMatrix
+  CopyToHostDense<MatrixPolicy>(x);
+
+  check_results<FloatingPointType, MatrixPolicy, SparseMatrixPolicy>(
+      A, b, x, [&](const FloatingPointType a, const FloatingPointType b) -> void { EXPECT_NEAR(a, b, 1.0e-2); });
 }
 
 template<class MatrixPolicy, class SparseMatrixPolicy, class LinearSolverPolicy>
@@ -298,4 +356,4 @@ void testMarkowitzReordering()
   EXPECT_GT(
       orig_LU.first.RowIdsVector().size() + orig_LU.second.RowIdsVector().size(),
       reordered_LU.first.RowIdsVector().size() + reordered_LU.second.RowIdsVector().size());
-}
+}