diff --git a/.github/workflows/run_tests/action.yml b/.github/workflows/run_tests/action.yml index 17a5cf769..928991ce3 100644 --- a/.github/workflows/run_tests/action.yml +++ b/.github/workflows/run_tests/action.yml @@ -23,6 +23,8 @@ runs: mpirun -n 3 --oversubscribe tests/test_StaticSVD ./tests/test_IncrementalSVDBrand mpirun -n 3 --oversubscribe tests/test_IncrementalSVDBrand + ./tests/test_NNLS + mpirun -n 3 --oversubscribe tests/test_NNLS shell: bash - name: Basis dataset update test run: | diff --git a/CMakeLists.txt b/CMakeLists.txt index 8356f3e92..f3f1eaf15 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -275,6 +275,7 @@ if(GTEST_FOUND) IncrementalSVD IncrementalSVDBrand GreedyCustomSampler + NNLS basis_conversion) foreach(stem IN LISTS unit_test_stems) add_executable(test_${stem} unit_tests/test_${stem}.cpp) diff --git a/lib/linalg/NNLS.cpp b/lib/linalg/NNLS.cpp index 5c794f1a2..cc73e4456 100644 --- a/lib/linalg/NNLS.cpp +++ b/lib/linalg/NNLS.cpp @@ -58,15 +58,18 @@ extern "C" { NNLSSolver::NNLSSolver(double const_tol, int min_nnz, int max_nnz, int verbosity, double res_change_termination_tol, - double zero_tol, int n_outer, int n_inner) + double zero_tol, int n_outer, int n_inner, + const NNLS_termination criterion) : const_tol_(const_tol), min_nnz_(min_nnz), max_nnz_(max_nnz), verbosity_(verbosity), res_change_termination_tol_(res_change_termination_tol), zero_tol_(zero_tol), n_outer_(n_outer), n_inner_(n_inner), - n_proc_max_for_partial_matrix_(15), NNLS_qrres_on_(false), - qr_residual_mode_(QRresidualMode::hybrid) + qr_residual_mode_(QRresidualMode::hybrid), + d_criterion(criterion) { + CAROM_VERIFY((d_criterion == NNLS_termination::L2) + || (d_criterion == NNLS_termination::LINF)); MPI_Comm_rank(MPI_COMM_WORLD, &d_rank); MPI_Comm_size(MPI_COMM_WORLD, &d_num_procs); std::cout << "NNLSSolver init on rank " << d_rank << " out of " @@ -147,6 +150,13 @@ void NNLSSolver::solve_parallel_with_scalapack(const Matrix& matTrans, rhs_halfgap -= rhs_lb; rhs_halfgap *= 0.5; + double l2norm_threshold; + if (d_criterion == NNLS_termination::L2) { + l2norm_threshold = rhs_halfgap.norm(); + if (d_rank == 0 && verbosity_ > 1) + printf("L2 norm threshold: %.5e\n", l2norm_threshold); + } + Vector rhs_avg_glob(rhs_avg); Vector rhs_halfgap_glob(rhs_halfgap); @@ -159,7 +169,7 @@ void NNLSSolver::solve_parallel_with_scalapack(const Matrix& matTrans, char notrans = 'N'; char layout = 'R'; - int n_proc = std::min(n_proc_max_for_partial_matrix_,d_num_procs); + int n_proc = d_num_procs; int nb = 3; // block column size // initialize blacs process grid @@ -168,8 +178,8 @@ void NNLSSolver::solve_parallel_with_scalapack(const Matrix& matTrans, int n_dist_loc_max = 0; // maximum number of columns in distributed matrix if (d_rank < n_proc) { - // m is the maximum number of columns in the global matrix - n_dist_loc_max = ((m/nb + 1)/n_proc + 1)*nb; + // n_tot is the maximum number of columns in the global (not transposed) matrix + n_dist_loc_max = ((n_tot/nb + 1)/n_proc + 1)*nb; } std::vector mu_max_array(d_num_procs); @@ -254,6 +264,7 @@ void NNLSSolver::solve_parallel_with_scalapack(const Matrix& matTrans, double mumax_glob; double rmax; + bool tolerance_met; for (unsigned int oiter = 0; oiter < n_outer_; ++oiter) { stalledFlag = 0; @@ -265,12 +276,17 @@ void NNLSSolver::solve_parallel_with_scalapack(const Matrix& matTrans, // This norm of a non-distributed vector is identical on all ranks. l2_res_hist(oiter) = res_glob.norm(); + if (d_criterion == NNLS_termination::LINF) + tolerance_met = (rmax <= const_tol_); + else if (d_criterion == NNLS_termination::L2) + tolerance_met = (l2_res_hist(oiter) <= l2norm_threshold); + if (verbosity_ > 1 && d_rank == 0) { printf("%d %d %d %d %d %.15e %.15e\n", oiter, n_total_inner_iter, m, n_tot, n_glob, rmax, l2_res_hist(oiter)); fflush(stdout); } - if (rmax <= const_tol_ && n_glob >= min_nnz_cap) { + if (tolerance_met && n_glob >= min_nnz_cap) { if (d_rank == 0 && verbosity_ > 1) { printf("target tolerance met\n"); fflush(stdout); diff --git a/lib/linalg/NNLS.h b/lib/linalg/NNLS.h index 7ccf3a9b9..459e43379 100644 --- a/lib/linalg/NNLS.h +++ b/lib/linalg/NNLS.h @@ -19,6 +19,16 @@ namespace CAROM { +/** + * @brief Termination criterion for NNLS solver: + * LINF: L-infinity norm (maximum value) of the residual + * L2: L2 norm of the residual + */ +enum class NNLS_termination { + LINF, + L2 +}; + /** * \class NNLSSolver * Class for solving non-negative least-squares problems, cf. T. Chapman et al, @@ -34,7 +44,8 @@ class NNLSSolver { int verbosity=0, double res_change_termination_tol=1.0e-4, double zero_tol=1.0e-14, int n_outer=100000, - int n_inner=100000); + int n_inner=100000, + const NNLS_termination criterion=NNLS_termination::LINF); /** * Destructor*/ @@ -115,12 +126,6 @@ class NNLSSolver { */ double res_change_termination_tol_; - /** - * @brief Maximum number of processors used in the partial matrix containing - * only the nonzero quadrature points. - */ - int n_proc_max_for_partial_matrix_; - bool normalize_const_; bool QR_reduce_const_; bool NNLS_qrres_on_; @@ -128,6 +133,8 @@ class NNLSSolver { int d_num_procs; int d_rank; + + NNLS_termination d_criterion; }; } diff --git a/unit_tests/test_NNLS.cpp b/unit_tests/test_NNLS.cpp new file mode 100644 index 000000000..cc501127e --- /dev/null +++ b/unit_tests/test_NNLS.cpp @@ -0,0 +1,244 @@ +/****************************************************************************** + * + * Copyright (c) 2013-2024, Lawrence Livermore National Security, LLC + * and other libROM project developers. See the top-level COPYRIGHT + * file for details. + * + * SPDX-License-Identifier: (Apache-2.0 OR MIT) + * + *****************************************************************************/ + +#ifdef CAROM_HAS_GTEST + +#include +#include "linalg/NNLS.h" +#include +#include +#include +#include // for memcpy +#include +#include "mpi.h" +#include "utils/mpi_utils.h" + +/** + * Simple smoke test to make sure Google Test is properly linked + */ +TEST(GoogleTestFramework, GoogleTestFrameworkFound) { + SUCCEED(); +} + +TEST(NNLS, solve_with_LINF) +{ + int nproc; + int rank; + MPI_Comm_size(MPI_COMM_WORLD, &nproc); + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + + const int nrow = 50; + const int ncol = 200; + const int ncol_local = CAROM::split_dimension(ncol); + std::vector row_offset(nproc + 1); + const int total_cols = CAROM::get_global_offsets(ncol_local, row_offset, + MPI_COMM_WORLD); + const double rel_tol = 0.05; + const double nnls_tol = 1.0e-11; + const int true_nnz = 44; + + std::default_random_engine generator; + generator.seed( + 1234); // fix the seed to keep the same result for different nproc. + std::uniform_real_distribution<> uniform_distribution(0.0, 1.0); + std::normal_distribution normal_distribution(0.0, 1.0); + + // distribute from a global matrix to keep the same system for different nproc. + CAROM::Matrix Gt(ncol, nrow, false); + for (int i = 0; i < ncol; i++) + for (int j = 0; j < nrow; j++) + Gt(i, j) = normal_distribution(generator); + Gt.distribute(ncol_local); + + CAROM::Vector fom_sol(ncol_local, true); + CAROM::Vector rom_sol(ncol_local, true); + CAROM::Vector rhs(nrow, false); + + // distribute from a global matrix to keep the same system for different nproc. + CAROM::Vector fom_sol_serial(ncol, false); + for (int c = 0; c < ncol; c++) + fom_sol_serial(c) = uniform_distribution(generator); + for (int c = 0; c < ncol_local; c++) + fom_sol(c) = fom_sol_serial(row_offset[rank] + c); + + Gt.transposeMult(fom_sol, rhs); + rom_sol = 0.0; + + CAROM::Vector rhs_lb(rhs); + CAROM::Vector rhs_ub(rhs); + + for (int i = 0; i < rhs.dim(); ++i) + { + double delta = rel_tol * abs(rhs(i)); + rhs_lb(i) -= delta; + rhs_ub(i) += delta; + } + + CAROM::NNLSSolver nnls(nnls_tol, 0, 0, 2); + nnls.solve_parallel_with_scalapack(Gt, rhs_lb, rhs_ub, rom_sol); + + int nnz = 0; + for (int i = 0; i < rom_sol.dim(); ++i) + { + if (rom_sol(i) != 0.0) + { + nnz++; + } + } + + std::cout << rank << ": Number of nonzeros in NNLS solution: " << nnz + << ", out of " << rom_sol.dim() << std::endl; + + MPI_Allreduce(MPI_IN_PLACE, &nnz, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD); + + if (rank == 0) + std::cout << "Global number of nonzeros in NNLS solution: " << nnz << std::endl; + EXPECT_EQ(nnz, true_nnz); + + // Check residual of NNLS solution + CAROM::Vector res(Gt.numColumns(), false); + Gt.transposeMult(rom_sol, res); + + const double normGsol = res.norm(); + const double normRHS = rhs.norm(); + + res -= rhs; + const double relNorm = res.norm() / std::max(normGsol, normRHS); + std::cout << rank << ": relative residual norm for NNLS solution of Gs = Gw: " + << relNorm << std::endl; + + double max_error = 0.0; + for (int k = 0; k < res.dim(); k++) + { + max_error = std::max(max_error, abs(res(k) / rhs(k))); + // printf("rank %d, error(%d): %.3e\n", rank, k, abs(res(k) / rhs(k))); + EXPECT_TRUE(abs(res(k)) < rel_tol * abs(rhs(k)) + nnls_tol); + } + if (rank == 0) + printf("maximum error: %.5e\n", max_error); +} + +TEST(NNLS, solve_with_L2) +{ + int nproc; + int rank; + MPI_Comm_size(MPI_COMM_WORLD, &nproc); + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + + const int nrow = 30; + const int ncol = 100; + const int ncol_local = CAROM::split_dimension(ncol); + std::vector row_offset(nproc + 1); + const int total_cols = CAROM::get_global_offsets(ncol_local, row_offset, + MPI_COMM_WORLD); + const double rel_tol = 0.05; + const double nnls_tol = 1.0e-11; + const int true_nnz = 21; + + std::default_random_engine generator; + generator.seed( + 1234); // fix the seed to keep the same result for different nproc. + std::uniform_real_distribution<> uniform_distribution(0.0, 1.0); + std::normal_distribution normal_distribution(0.0, 1.0); + + // distribute from a global matrix to keep the same system for different nproc. + CAROM::Matrix Gt(ncol, nrow, false); + for (int i = 0; i < ncol; i++) + for (int j = 0; j < nrow; j++) + Gt(i, j) = normal_distribution(generator); + Gt.distribute(ncol_local); + + CAROM::Vector fom_sol(ncol_local, true); + CAROM::Vector rom_sol(ncol_local, true); + CAROM::Vector rhs(nrow, false); + + // distribute from a global matrix to keep the same system for different nproc. + CAROM::Vector fom_sol_serial(ncol, false); + for (int c = 0; c < ncol; c++) + fom_sol_serial(c) = uniform_distribution(generator); + for (int c = 0; c < ncol_local; c++) + fom_sol(c) = fom_sol_serial(row_offset[rank] + c); + + Gt.transposeMult(fom_sol, rhs); + rom_sol = 0.0; + + CAROM::Vector rhs_lb(rhs); + CAROM::Vector rhs_ub(rhs); + + for (int i = 0; i < rhs.dim(); ++i) + { + double delta = rel_tol * abs(rhs(i)); + rhs_lb(i) -= delta; + rhs_ub(i) += delta; + } + + CAROM::NNLSSolver nnls(nnls_tol, 0, 0, 2, 1.0e-4, 1.0e-14, 100000, 100000, + CAROM::NNLS_termination::L2); + nnls.solve_parallel_with_scalapack(Gt, rhs_lb, rhs_ub, rom_sol); + + int nnz = 0; + for (int i = 0; i < rom_sol.dim(); ++i) + { + if (rom_sol(i) != 0.0) + { + nnz++; + } + } + + std::cout << rank << ": Number of nonzeros in NNLS solution: " << nnz + << ", out of " << rom_sol.dim() << std::endl; + + MPI_Allreduce(MPI_IN_PLACE, &nnz, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD); + + if (rank == 0) + std::cout << "Global number of nonzeros in NNLS solution: " << nnz << std::endl; + EXPECT_EQ(nnz, true_nnz); + + // Check residual of NNLS solution + CAROM::Vector res(Gt.numColumns(), false); + Gt.transposeMult(rom_sol, res); + + const double normGsol = res.norm(); + const double normRHS = rhs.norm(); + + res -= rhs; + const double relNorm = res.norm() / std::max(normGsol, normRHS); + std::cout << rank << ": relative residual norm for NNLS solution of Gs = Gw: " + << relNorm << std::endl; + + EXPECT_TRUE(res.norm() < rel_tol * normRHS + nnls_tol); + double max_error = 0.0; + for (int k = 0; k < res.dim(); k++) + { + max_error = std::max(max_error, abs(res(k) / rhs(k))); + } + if (rank == 0) + printf("maximum error: %.5e\n", max_error); +} + +int main(int argc, char* argv[]) +{ + ::testing::InitGoogleTest(&argc, argv); + MPI_Init(&argc, &argv); + int result = RUN_ALL_TESTS(); + MPI_Finalize(); + return result; +} + +#else // #ifndef CAROM_HAS_GTEST + +int main() +{ + std::cout << "libROM was compiled without Google Test support, so unit " + << "tests have been disabled. To enable unit tests, compile " + << "libROM with Google Test support." << std::endl; +} + +#endif // #endif CAROM_HAS_GTEST \ No newline at end of file