From 3c269f2d01e91dd9b37af09773db929b66dcb511 Mon Sep 17 00:00:00 2001 From: Gary Wolfman Date: Wed, 6 May 2026 01:00:36 -0400 Subject: [PATCH 1/8] Fix trivial cppcheck findings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit missingOverride: ~DistributionBase() now marked override. shadowFunction: rename local sum accumulators to row_sum/col_sum in BasicMatrix::row_sums() and column_sums() to avoid shadowing sum(). functionStatic: mark 11 member functions static — validateParameters (LogNormal, NegativeBinomial, Pareto, Weibull), isValidCount (Poisson), ligamma (Gamma), readFromStream (XMLFileReader), writeToStream (XMLFileWriter), accumulate_gamma and accumulate_xi (BaumWelchTrainer), flattenObservationLists (SegmentalKMeansTrainer). Matching const removed from BaumWelchTrainer definitions. constParameterReference: add cppcheck-suppress comments on ErrorRecovery methods (recoverFromOverflow, handleNaNValues, repairDegenerateMatrix), ViterbiTrainer helpers (accum_sequence, normalize_and_commit), and fb_crossover_sweep::time_mode. All flagged parameters are genuinely mutated; cppcheck flags early-return paths as sufficient to be const. Co-Authored-By: Oz --- .../libhmm/distributions/distribution_base.h | 2 +- .../libhmm/distributions/gamma_distribution.h | 2 +- .../distributions/log_normal_distribution.h | 2 +- .../negative_binomial_distribution.h | 2 +- .../distributions/pareto_distribution.h | 2 +- .../distributions/poisson_distribution.h | 2 +- .../distributions/weibull_distribution.h | 2 +- include/libhmm/io/xml_file_reader.h | 2 +- include/libhmm/io/xml_file_writer.h | 2 +- include/libhmm/linalg/basic_matrix.h | 12 +++++------ include/libhmm/training/baum_welch_trainer.h | 20 ++++++++++--------- .../training/segmental_kmeans_trainer.h | 2 +- src/common/numerical_stability.cpp | 3 +++ src/training/baum_welch_trainer.cpp | 4 ++-- src/training/viterbi_trainer.cpp | 2 ++ tools/fb_crossover_sweep.cpp | 1 + 16 files changed, 35 insertions(+), 27 deletions(-) diff --git a/include/libhmm/distributions/distribution_base.h b/include/libhmm/distributions/distribution_base.h index 2e0bebf..27c3d0c 100644 --- a/include/libhmm/distributions/distribution_base.h +++ b/include/libhmm/distributions/distribution_base.h @@ -25,7 +25,7 @@ class Reader; */ class DistributionBase : public EmissionDistribution { public: - virtual ~DistributionBase() = default; + ~DistributionBase() override = default; DistributionBase(); DistributionBase(const DistributionBase &other); diff --git a/include/libhmm/distributions/gamma_distribution.h b/include/libhmm/distributions/gamma_distribution.h index 9bbb544..3a4bca1 100755 --- a/include/libhmm/distributions/gamma_distribution.h +++ b/include/libhmm/distributions/gamma_distribution.h @@ -89,7 +89,7 @@ class GammaDistribution : public DistributionBase { * Evaluates the LOWER INCOMPLETE gamma function at x * Uses numerical approximation for computational efficiency */ - double ligamma(double a, double x) noexcept; + static double ligamma(double a, double x) noexcept; public: /** diff --git a/include/libhmm/distributions/log_normal_distribution.h b/include/libhmm/distributions/log_normal_distribution.h index fcb838e..03bd8bf 100755 --- a/include/libhmm/distributions/log_normal_distribution.h +++ b/include/libhmm/distributions/log_normal_distribution.h @@ -65,7 +65,7 @@ class LogNormalDistribution : public DistributionBase { * @param stdDev Standard deviation of ln(X) (must be positive and finite) * @throws std::invalid_argument if parameters are invalid */ - void validateParameters(double mean, double stdDev) const { + static void validateParameters(double mean, double stdDev) { if (std::isnan(mean) || std::isinf(mean)) { throw std::invalid_argument("Mean parameter must be a finite number"); } diff --git a/include/libhmm/distributions/negative_binomial_distribution.h b/include/libhmm/distributions/negative_binomial_distribution.h index 3c1461f..4e304de 100644 --- a/include/libhmm/distributions/negative_binomial_distribution.h +++ b/include/libhmm/distributions/negative_binomial_distribution.h @@ -64,7 +64,7 @@ class NegativeBinomialDistribution : public DistributionBase { * @param p Success probability (must be in (0,1]) * @throws std::invalid_argument if parameters are invalid */ - void validateParameters(double r, double p) const { + static void validateParameters(double r, double p) { if (std::isnan(r) || std::isinf(r) || r <= 0.0) { throw std::invalid_argument("Number of successes must be positive"); } diff --git a/include/libhmm/distributions/pareto_distribution.h b/include/libhmm/distributions/pareto_distribution.h index f83e5cb..a0c3d7f 100755 --- a/include/libhmm/distributions/pareto_distribution.h +++ b/include/libhmm/distributions/pareto_distribution.h @@ -88,7 +88,7 @@ class ParetoDistribution : public DistributionBase { * @param xm Scale parameter (must be positive and finite) * @throws std::invalid_argument if parameters are invalid */ - void validateParameters(double k, double xm) const { + static void validateParameters(double k, double xm) { if (std::isnan(k) || std::isinf(k) || k <= 0.0) { throw std::invalid_argument("Shape parameter k must be a positive finite number"); } diff --git a/include/libhmm/distributions/poisson_distribution.h b/include/libhmm/distributions/poisson_distribution.h index b3f9f35..8c8e8ff 100644 --- a/include/libhmm/distributions/poisson_distribution.h +++ b/include/libhmm/distributions/poisson_distribution.h @@ -73,7 +73,7 @@ class PoissonDistribution : public DistributionBase { * @param k Value to validate * @return true if k is a valid count, false otherwise */ - bool isValidCount(double k) const noexcept { + static bool isValidCount(double k) noexcept { return k >= 0.0 && std::floor(k) == k && std::isfinite(k); } diff --git a/include/libhmm/distributions/weibull_distribution.h b/include/libhmm/distributions/weibull_distribution.h index 45cc114..4d5c1c4 100644 --- a/include/libhmm/distributions/weibull_distribution.h +++ b/include/libhmm/distributions/weibull_distribution.h @@ -83,7 +83,7 @@ class WeibullDistribution : public DistributionBase { * @param lambda Scale parameter (must be positive and finite) * @throws std::invalid_argument if parameters are invalid */ - void validateParameters(double k, double lambda) const { + static void validateParameters(double k, double lambda) { if (std::isnan(k) || std::isinf(k) || k <= 0.0) { throw std::invalid_argument("Shape parameter k must be a positive finite number"); } diff --git a/include/libhmm/io/xml_file_reader.h b/include/libhmm/io/xml_file_reader.h index 9da3320..457d9be 100644 --- a/include/libhmm/io/xml_file_reader.h +++ b/include/libhmm/io/xml_file_reader.h @@ -76,7 +76,7 @@ class XMLFileReader { * @return Loaded HMM object * @throws std::runtime_error if deserialization fails */ - Hmm readFromStream(std::ifstream &stream); + static Hmm readFromStream(std::ifstream &stream); }; } // namespace libhmm diff --git a/include/libhmm/io/xml_file_writer.h b/include/libhmm/io/xml_file_writer.h index b5d8ed6..eba5757 100755 --- a/include/libhmm/io/xml_file_writer.h +++ b/include/libhmm/io/xml_file_writer.h @@ -68,7 +68,7 @@ class XMLFileWriter { * @param stream Output stream * @throws std::runtime_error if serialization fails */ - void writeToStream(const Hmm &hmm, std::ofstream &stream); + static void writeToStream(const Hmm &hmm, std::ofstream &stream); }; } // namespace libhmm diff --git a/include/libhmm/linalg/basic_matrix.h b/include/libhmm/linalg/basic_matrix.h index b675c2a..e1c760d 100644 --- a/include/libhmm/linalg/basic_matrix.h +++ b/include/libhmm/linalg/basic_matrix.h @@ -341,11 +341,11 @@ class BasicMatrix { BasicVector row_sums() const { BasicVector result(rows_); for (size_type i = 0; i < rows_; ++i) { - T sum = T{}; + T row_sum = T{}; for (size_type j = 0; j < cols_; ++j) { - sum += (*this)(i, j); + row_sum += (*this)(i, j); } - result[i] = sum; + result[i] = row_sum; } return result; } @@ -357,11 +357,11 @@ class BasicMatrix { BasicVector column_sums() const { BasicVector result(cols_); for (size_type j = 0; j < cols_; ++j) { - T sum = T{}; + T col_sum = T{}; for (size_type i = 0; i < rows_; ++i) { - sum += (*this)(i, j); + col_sum += (*this)(i, j); } - result[j] = sum; + result[j] = col_sum; } return result; } diff --git a/include/libhmm/training/baum_welch_trainer.h b/include/libhmm/training/baum_welch_trainer.h index fa741a3..10fc2b4 100755 --- a/include/libhmm/training/baum_welch_trainer.h +++ b/include/libhmm/training/baum_welch_trainer.h @@ -30,18 +30,20 @@ class BaumWelchTrainer : public Trainer { private: /// Accumulates gamma statistics (emission data/weights, pi numerator, trans /// denominator) for one observation sequence. - void accumulate_gamma(const Matrix &logAlpha, const Matrix &logBeta, const ObservationSet &obs, - double logP, std::size_t N, std::vector> &emisData, - std::vector> &emisWts, std::vector &piNum, - std::vector &transDen) const noexcept; + static void accumulate_gamma(const Matrix &logAlpha, const Matrix &logBeta, + const ObservationSet &obs, double logP, std::size_t N, + std::vector> &emisData, + std::vector> &emisWts, + std::vector &piNum, + std::vector &transDen) noexcept; /// Accumulates xi statistics (expected transition counts) for one sequence. /// Dispatches to sparse or dense path based on hasZeroTransitions. - void accumulate_xi(const double *logAlphaData, const double *logBetaData, - const std::vector &logEmitByTime, - const std::vector &logTransT, double logP, std::size_t T, - std::size_t N, bool hasZeroTransitions, - std::vector &transNumT) const noexcept; + static void accumulate_xi(const double *logAlphaData, const double *logBetaData, + const std::vector &logEmitByTime, + const std::vector &logTransT, double logP, std::size_t T, + std::size_t N, bool hasZeroTransitions, + std::vector &transNumT) noexcept; /// M-step: normalize piNum and update the HMM initial distribution. static void m_step_pi(Hmm &hmm, std::size_t N, const std::vector &piNum); diff --git a/include/libhmm/training/segmental_kmeans_trainer.h b/include/libhmm/training/segmental_kmeans_trainer.h index 759c2ca..5ba6271 100644 --- a/include/libhmm/training/segmental_kmeans_trainer.h +++ b/include/libhmm/training/segmental_kmeans_trainer.h @@ -56,7 +56,7 @@ class SegmentalKMeansTrainer : public Trainer { bool terminated_{false}; Clusters clusters_; - ObservationSet flattenObservationLists(const ObservationLists &observationLists); + static ObservationSet flattenObservationLists(const ObservationLists &observationLists); void iterate(); void learnPi(); void learnTrans(); diff --git a/src/common/numerical_stability.cpp b/src/common/numerical_stability.cpp index 021f9e4..615be83 100644 --- a/src/common/numerical_stability.cpp +++ b/src/common/numerical_stability.cpp @@ -351,6 +351,7 @@ bool ErrorRecovery::recoverFromUnderflow(Vector &probs, RecoveryStrategy strateg return true; } +// cppcheck-suppress constParameterReference bool ErrorRecovery::recoverFromOverflow(Vector &values, RecoveryStrategy strategy) { bool hasOverflow = false; @@ -388,6 +389,7 @@ bool ErrorRecovery::recoverFromOverflow(Vector &values, RecoveryStrategy strateg return true; } +// cppcheck-suppress constParameterReference std::size_t ErrorRecovery::handleNaNValues(Vector &values, RecoveryStrategy strategy) { std::size_t nanCount = 0; @@ -423,6 +425,7 @@ std::size_t ErrorRecovery::handleNaNValues(Vector &values, RecoveryStrategy stra return nanCount; } +// cppcheck-suppress constParameterReference bool ErrorRecovery::repairDegenerateMatrix(Matrix &matrix, RecoveryStrategy strategy) { bool hasIssues = false; diff --git a/src/training/baum_welch_trainer.cpp b/src/training/baum_welch_trainer.cpp index d98b8f1..595e80e 100755 --- a/src/training/baum_welch_trainer.cpp +++ b/src/training/baum_welch_trainer.cpp @@ -29,7 +29,7 @@ void BaumWelchTrainer::accumulate_gamma(const Matrix &logAlpha, const Matrix &lo std::vector> &emisData, std::vector> &emisWts, std::vector &piNum, - std::vector &transDen) const noexcept { + std::vector &transDen) noexcept { const std::size_t T = obs.size(); const double *logAlphaData = logAlpha.data(); const double *logBetaData = logBeta.data(); @@ -53,7 +53,7 @@ void BaumWelchTrainer::accumulate_xi(const double *logAlphaData, const double *l const std::vector &logEmitByTime, const std::vector &logTransT, double logP, std::size_t T, std::size_t N, bool hasZeroTransitions, - std::vector &transNumT) const noexcept { + std::vector &transNumT) noexcept { if (hasZeroTransitions) { for (std::size_t t = 0; t + 1 < T; ++t) { const double *alphaRow = logAlphaData + t * N; diff --git a/src/training/viterbi_trainer.cpp b/src/training/viterbi_trainer.cpp index 678056f..dab6abd 100755 --- a/src/training/viterbi_trainer.cpp +++ b/src/training/viterbi_trainer.cpp @@ -82,6 +82,7 @@ void ViterbiTrainer::train() { // Private helpers // --------------------------------------------------------------------------- +// cppcheck-suppress constParameterReference double ViterbiTrainer::accum_sequence(const Hmm &hmm, const ObservationSet &obs, Vector &pi, Matrix &trans, std::vector> &emisData) noexcept { @@ -105,6 +106,7 @@ double ViterbiTrainer::accum_sequence(const Hmm &hmm, const ObservationSet &obs, } } +// cppcheck-suppress constParameterReference void ViterbiTrainer::normalize_and_commit(Hmm &hmm, std::size_t N, Vector &pi, Matrix &trans) { // Normalize pi. double piSum = 0.0; diff --git a/tools/fb_crossover_sweep.cpp b/tools/fb_crossover_sweep.cpp index c1cc299..9c40bd6 100644 --- a/tools/fb_crossover_sweep.cpp +++ b/tools/fb_crossover_sweep.cpp @@ -60,6 +60,7 @@ ObservationSet make_obs(int t, int n) { return obs; } +// cppcheck-suppress constParameterReference double time_mode(Hmm &hmm, const ObservationSet &obs, FbRecurrenceMode mode) { ForwardBackwardCalculator fbc(hmm, obs); fbc.setRecurrenceModeOverride(mode); From 1e18194845486b87c95ce97f79a00d52afadbf2d Mon Sep 17 00:00:00 2001 From: Gary Wolfman Date: Wed, 6 May 2026 01:03:23 -0400 Subject: [PATCH 2/8] Fix throwInEntryPoint in 4 tool executables Each tool's parse_pos/parse_positive_int helper re-throws on bad CLI arguments, but main() had no enclosing catch. A non-integer or non-positive argument would call std::terminate instead of printing a usage message. Fix: wrap the argument-parsing block in each main() with try/catch(const std::exception&) that prints the error and returns 1. hmm_validator.cpp additionally had a bare std::stoi call for the T argument outside any try block; replaced with the same pattern. Co-Authored-By: Oz --- tools/bw_hotspot.cpp | 15 ++++++++++----- tools/fb_contour_sweep.cpp | 15 ++++++++++----- tools/hmm_validator.cpp | 11 ++++++++++- tools/hotspot_breakdown.cpp | 21 +++++++++++++-------- 4 files changed, 43 insertions(+), 19 deletions(-) diff --git a/tools/bw_hotspot.cpp b/tools/bw_hotspot.cpp index 4c871c1..69d399c 100644 --- a/tools/bw_hotspot.cpp +++ b/tools/bw_hotspot.cpp @@ -261,11 +261,16 @@ int main(int argc, char *argv[]) { int warmup = 2, runs = 8; if (argc == 3 || argc == 4 || argc == 5) { - configs = {{parse_pos(argv[1], "N"), parse_pos(argv[2], "T")}}; - if (argc >= 4) - runs = parse_pos(argv[3], "runs"); - if (argc == 5) - warmup = parse_pos(argv[4], "warmup"); + try { + configs = {{parse_pos(argv[1], "N"), parse_pos(argv[2], "T")}}; + if (argc >= 4) + runs = parse_pos(argv[3], "runs"); + if (argc == 5) + warmup = parse_pos(argv[4], "warmup"); + } catch (const std::exception &e) { + std::cerr << "Error: " << e.what() << "\n"; + return 1; + } } else if (argc != 1) { std::cerr << "Usage: bw_hotspot [N T [runs [warmup]]]\n"; return 1; diff --git a/tools/fb_contour_sweep.cpp b/tools/fb_contour_sweep.cpp index 52e3cc1..f93ac2b 100644 --- a/tools/fb_contour_sweep.cpp +++ b/tools/fb_contour_sweep.cpp @@ -343,11 +343,16 @@ int main(int argc, char *argv[]) { if (argc >= 2) { output_path = argv[1]; } - if (argc >= 3) { - runs = parse_positive_int(argv[2], "runs"); - } - if (argc >= 4) { - warmup = parse_positive_int(argv[3], "warmup"); + try { + if (argc >= 3) { + runs = parse_positive_int(argv[2], "runs"); + } + if (argc >= 4) { + warmup = parse_positive_int(argv[3], "warmup"); + } + } catch (const std::exception &e) { + std::cerr << "Error: " << e.what() << "\n"; + return 1; } if (argc > 4) { std::cerr << "Usage:\n"; diff --git a/tools/hmm_validator.cpp b/tools/hmm_validator.cpp index d3b78f9..5b2ba85 100644 --- a/tools/hmm_validator.cpp +++ b/tools/hmm_validator.cpp @@ -41,7 +41,16 @@ int main(int argc, char *argv[]) { } const std::filesystem::path hmm_path = argv[1]; - const int T = (argc >= 3) ? std::stoi(argv[2]) : 100; + int T = 100; + if (argc >= 3) { + try { + T = std::stoi(argv[2]); + } catch (const std::exception &e) { + std::cerr << "Invalid T argument: " << e.what() << "\n"; + print_usage(argv[0]); + return 1; + } + } std::cout << "libhmm HMM Validator\n"; std::cout << "====================\n"; diff --git a/tools/hotspot_breakdown.cpp b/tools/hotspot_breakdown.cpp index 748ca63..8edd7cc 100644 --- a/tools/hotspot_breakdown.cpp +++ b/tools/hotspot_breakdown.cpp @@ -468,14 +468,19 @@ int main(int argc, char *argv[]) { int runs = 8; if (argc == 3 || argc == 4 || argc == 5) { - const int n = parse_positive_int(argv[1], "N"); - const int t = parse_positive_int(argv[2], "T"); - configs = {{n, t}}; - if (argc >= 4) { - runs = parse_positive_int(argv[3], "runs"); - } - if (argc == 5) { - warmup = parse_positive_int(argv[4], "warmup"); + try { + const int n = parse_positive_int(argv[1], "N"); + const int t = parse_positive_int(argv[2], "T"); + configs = {{n, t}}; + if (argc >= 4) { + runs = parse_positive_int(argv[3], "runs"); + } + if (argc == 5) { + warmup = parse_positive_int(argv[4], "warmup"); + } + } catch (const std::exception &e) { + std::cerr << "Error: " << e.what() << "\n"; + return 1; } } else if (argc != 1) { std::cerr << "Usage:\n"; From d0510fec7aae543ffbfbb5230b1aabad2d733dc1 Mon Sep 17 00:00:00 2001 From: Gary Wolfman Date: Wed, 6 May 2026 01:14:04 -0400 Subject: [PATCH 3/8] API symmetry: clean up Beta batch functions, rename NegBinom CDF BetaDistribution: remove getProbabilityBatch() and getLogProbabilityBatch() (vector-based, pre-span signatures). getBatchLogProbabilities(span, span) and getCumulativeProbability() already exist as the correct interface. NegativeBinomialDistribution: rename CDF() to getCumulativeProbability() to match the naming convention used by Gamma, Poisson, Pareto, Beta and ChiSquared. Update test to use the new name. Co-Authored-By: Oz --- .../libhmm/distributions/beta_distribution.h | 19 ------------- .../negative_binomial_distribution.h | 2 +- src/distributions/beta_distribution.cpp | 27 ------------------- .../negative_binomial_distribution.cpp | 2 +- .../test_negative_binomial_distribution.cpp | 14 +++++----- 5 files changed, 9 insertions(+), 55 deletions(-) diff --git a/include/libhmm/distributions/beta_distribution.h b/include/libhmm/distributions/beta_distribution.h index 4551d5f..d298691 100644 --- a/include/libhmm/distributions/beta_distribution.h +++ b/include/libhmm/distributions/beta_distribution.h @@ -135,25 +135,6 @@ class BetaDistribution : public DistributionBase { void fit(std::span data, std::span weights) override; [[nodiscard]] bool isDiscrete() const noexcept override { return false; } - /** - * Vectorized batch computation of PDF for multiple values. - * Optimized for processing many values efficiently with cache reuse. - * - * @param values Vector of input values - * @param results Output vector for results (will be resized if needed) - */ - void getProbabilityBatch(const std::vector &values, std::vector &results); - - /** - * Vectorized batch computation of log PDF for multiple values. - * Optimized for processing many values efficiently with cache reuse. - * - * @param values Vector of input values - * @param results Output vector for results (will be resized if needed) - */ - void getLogProbabilityBatch(const std::vector &values, - std::vector &results) const; - /** * Resets the distribution to default parameters (α = 1.0, β = 1.0). * This corresponds to a uniform distribution on [0,1]. diff --git a/include/libhmm/distributions/negative_binomial_distribution.h b/include/libhmm/distributions/negative_binomial_distribution.h index 4e304de..c4f34d2 100644 --- a/include/libhmm/distributions/negative_binomial_distribution.h +++ b/include/libhmm/distributions/negative_binomial_distribution.h @@ -247,7 +247,7 @@ class NegativeBinomialDistribution : public DistributionBase { * @param value The value at which to evaluate the CDF * @return Cumulative probability P(X ≤ value) */ - [[nodiscard]] double CDF(double value) const noexcept; + [[nodiscard]] double getCumulativeProbability(double value) const noexcept; /** * Gets the mode of the distribution. diff --git a/src/distributions/beta_distribution.cpp b/src/distributions/beta_distribution.cpp index e2d98fe..1b0b0ac 100644 --- a/src/distributions/beta_distribution.cpp +++ b/src/distributions/beta_distribution.cpp @@ -131,33 +131,6 @@ double BetaDistribution::getLogProbability(double value) const noexcept { return alphaMinus1_ * std::log(value) + betaMinus1_ * std::log(1.0 - value) - logBeta_; } -void BetaDistribution::getProbabilityBatch(const std::vector &values, - std::vector &results) { - if (!isCacheValid()) - updateCache(); - if (results.size() != values.size()) - results.resize(values.size()); - auto it = results.begin(); - for (const double &value : values) - *it++ = getProbability(value); -} - -void BetaDistribution::getLogProbabilityBatch(const std::vector &values, - std::vector &results) const { - if (!isCacheValid()) - updateCache(); - - // Resize the results vector if needed - if (results.size() != values.size()) { - results.resize(values.size()); - } - - auto it = results.begin(); - for (const double &value : values) { - *it++ = getLogProbability(value); - } -} - void BetaDistribution::fit(std::span data) { if (data.size() < 2) { reset(); diff --git a/src/distributions/negative_binomial_distribution.cpp b/src/distributions/negative_binomial_distribution.cpp index 67f1180..1cf4e43 100644 --- a/src/distributions/negative_binomial_distribution.cpp +++ b/src/distributions/negative_binomial_distribution.cpp @@ -177,7 +177,7 @@ double NegativeBinomialDistribution::getLogProbability(double value) const noexc return logCoeff + r_ * logP_ + static_cast(k) * log1MinusP_; } -double NegativeBinomialDistribution::CDF(double value) const noexcept { +double NegativeBinomialDistribution::getCumulativeProbability(double value) const noexcept { // Validate input if (std::isnan(value) || std::isinf(value)) { return math::ZERO_DOUBLE; diff --git a/tests/distributions/test_negative_binomial_distribution.cpp b/tests/distributions/test_negative_binomial_distribution.cpp index e24561a..33decc9 100644 --- a/tests/distributions/test_negative_binomial_distribution.cpp +++ b/tests/distributions/test_negative_binomial_distribution.cpp @@ -429,10 +429,10 @@ void testCDF() { NegativeBinomialDistribution negbinom(3.0, 0.6); // Test CDF properties - double cdf0 = negbinom.CDF(0.0); - double cdf1 = negbinom.CDF(1.0); - double cdf5 = negbinom.CDF(5.0); - double cdf10 = negbinom.CDF(10.0); + double cdf0 = negbinom.getCumulativeProbability(0.0); + double cdf1 = negbinom.getCumulativeProbability(1.0); + double cdf5 = negbinom.getCumulativeProbability(5.0); + double cdf10 = negbinom.getCumulativeProbability(10.0); // CDF should be non-decreasing assert(cdf0 <= cdf1); @@ -449,14 +449,14 @@ void testCDF() { assert(std::abs(cdf0 - negbinom.getProbability(0.0)) < 1e-10); // Test boundary cases - assert(negbinom.CDF(-1.0) == 0.0); + assert(negbinom.getCumulativeProbability(-1.0) == 0.0); // Test invalid inputs double nan_val = std::numeric_limits::quiet_NaN(); double inf_val = std::numeric_limits::infinity(); - assert(negbinom.CDF(nan_val) == 0.0); - assert(negbinom.CDF(inf_val) == 0.0); + assert(negbinom.getCumulativeProbability(nan_val) == 0.0); + assert(negbinom.getCumulativeProbability(inf_val) == 0.0); std::cout << "✓ CDF calculation tests passed" << std::endl; } From 2e5db616400656d0e32e348cb93b4bdafd6495f1 Mon Sep 17 00:00:00 2001 From: Gary Wolfman Date: Wed, 6 May 2026 01:24:59 -0400 Subject: [PATCH 4/8] Fix virtualCallInConstructor in DiscreteDistribution Extract private init_uniform() helper so the constructor can initialise to uniform probabilities without calling the virtual reset(). Virtual dispatch is inactive during construction, so calling an override there is misleading and correctly flagged by cppcheck. reset() now delegates to init_uniform(), keeping the logic in one place. No behaviour change. Co-Authored-By: Oz --- .../libhmm/distributions/discrete_distribution.h | 15 ++++++++++++++- src/distributions/discrete_distribution.cpp | 4 +--- 2 files changed, 15 insertions(+), 4 deletions(-) diff --git a/include/libhmm/distributions/discrete_distribution.h b/include/libhmm/distributions/discrete_distribution.h index 8a23317..bcefd5b 100755 --- a/include/libhmm/distributions/discrete_distribution.h +++ b/include/libhmm/distributions/discrete_distribution.h @@ -113,6 +113,19 @@ class DiscreteDistribution : public DistributionBase { throw std::invalid_argument("Probability value must be between 0 and 1"); } + /// Fills pdf_ with uniform probabilities 1/numSymbols_ and invalidates the cache. + /// + /// This exists as a private non-virtual helper so the constructor can + /// perform the same initialisation as reset() without invoking a virtual + /// function during construction (where virtual dispatch is not active and + /// cppcheck correctly flags the call as misleading). reset() delegates + /// here so the logic lives in exactly one place. + void init_uniform() noexcept { + const double p = 1.0 / static_cast(numSymbols_); + std::fill(pdf_.begin(), pdf_.end(), p); + invalidateCache(); + } + public: /** * Constructs a Discrete distribution with given number of symbols. @@ -124,7 +137,7 @@ class DiscreteDistribution : public DistributionBase { explicit DiscreteDistribution(int symbols = 10) : DistributionBase{}, numSymbols_{validateSymbols(symbols)}, pdf_(numSymbols_), cachedSum_{1.0}, cachedEntropy_{0.0} { - reset(); + init_uniform(); } DiscreteDistribution(const DiscreteDistribution &other) = default; diff --git a/src/distributions/discrete_distribution.cpp b/src/distributions/discrete_distribution.cpp index 5f018df..f87602a 100755 --- a/src/distributions/discrete_distribution.cpp +++ b/src/distributions/discrete_distribution.cpp @@ -84,9 +84,7 @@ void DiscreteDistribution::fit(std::span data, std::span(numSymbols_); - std::fill(pdf_.begin(), pdf_.end(), uniformProb); - invalidateCache(); + init_uniform(); } /** From e62a4df24d699c95542b02f39297e7df8a387e42 Mon Sep 17 00:00:00 2001 From: Gary Wolfman Date: Wed, 6 May 2026 01:35:11 -0400 Subject: [PATCH 5/8] Remove V2 numerical framework dead code ConvergenceDetector, AdaptivePrecision, ErrorRecovery and NumericalDiagnostics were scaffolding for scaled (non-log-space) calculators and trainers that were removed in the V3 architecture pivot. Several NumericalSafety helpers (safeLog, safeExp, clampLogProbability, checkProbability, checkLogProbability, checkMatrixFinite, checkVectorFinite, isProbabilityDistribution) were part of the same framework and have had no callers since the pivot. Keep the three methods that the V3 code actively uses: NumericalSafety::checkFinite NumericalSafety::clampProbability NumericalSafety::normalizeProbabilities Prune test_numerical_stability.cpp to cover only those three methods. Co-Authored-By: Oz --- include/libhmm/math/numerical_stability.h | 285 +--------- src/common/numerical_stability.cpp | 627 +--------------------- tests/common/test_numerical_stability.cpp | 382 +------------ 3 files changed, 47 insertions(+), 1247 deletions(-) diff --git a/include/libhmm/math/numerical_stability.h b/include/libhmm/math/numerical_stability.h index 4d46cf1..9aed7e3 100644 --- a/include/libhmm/math/numerical_stability.h +++ b/include/libhmm/math/numerical_stability.h @@ -10,277 +10,48 @@ namespace libhmm { namespace numerical { -/// Numerical stability constants and utilities -/// Uses constants from common.h for consistency across the library +/// Numerical stability constants. +/// Aliases the shared constants from common.h for use within this module. struct NumericalConstants { - /// Minimum probability value to prevent underflow - static constexpr double MIN_PROBABILITY = constants::probability::MIN_PROBABILITY; - - /// Maximum probability value to prevent overflow - static constexpr double MAX_PROBABILITY = constants::probability::MAX_PROBABILITY; - - /// Tolerance for convergence detection + static constexpr double MIN_PROBABILITY = + constants::probability::MIN_PROBABILITY; + static constexpr double MAX_PROBABILITY = + constants::probability::MAX_PROBABILITY; + static constexpr double MIN_LOG_PROBABILITY = + constants::probability::MIN_LOG_PROBABILITY; + static constexpr double MAX_LOG_PROBABILITY = + constants::probability::MAX_LOG_PROBABILITY; static constexpr double DEFAULT_CONVERGENCE_TOLERANCE = constants::precision::DEFAULT_CONVERGENCE_TOLERANCE; - - /// Maximum iterations before forced termination static constexpr std::size_t DEFAULT_MAX_ITERATIONS = constants::iterations::DEFAULT_MAX_ITERATIONS; - - /// Minimum log probability to prevent -infinity - static constexpr double MIN_LOG_PROBABILITY = constants::probability::MIN_LOG_PROBABILITY; - - /// Maximum log probability - static constexpr double MAX_LOG_PROBABILITY = constants::probability::MAX_LOG_PROBABILITY; - - /// Small value for numerical differentiation - static constexpr double NUMERICAL_EPSILON = constants::precision::HIGH_PRECISION_TOLERANCE; - - /// Scaling threshold for numerical stability - static constexpr double SCALING_THRESHOLD = constants::probability::SCALING_THRESHOLD; + static constexpr double NUMERICAL_EPSILON = + constants::precision::HIGH_PRECISION_TOLERANCE; + static constexpr double SCALING_THRESHOLD = + constants::probability::SCALING_THRESHOLD; }; -/// Numerical safety checks and corrections +/// Numerical safety utilities used by the V3 log-space architecture. +/// +/// Only the three methods below are wired into the library. The broader +/// NumericalSafety / ConvergenceDetector / AdaptivePrecision / ErrorRecovery / +/// NumericalDiagnostics framework was removed in the V3 refactor when scaled +/// (non-log-space) calculators and trainers were dropped in favour of the +/// current log-space-only design. class NumericalSafety { public: - /// Check if a value is finite and valid - /// @param value Value to check - /// @param name Variable name for error messages - /// @throws std::runtime_error if value is not finite + /// Throws std::runtime_error if @p value is NaN or infinite. static void checkFinite(double value, const std::string &name = "value"); - /// Check if a probability is in valid range [0, 1] - /// @param prob Probability value to check - /// @param name Variable name for error messages - /// @throws std::runtime_error if probability is invalid - static void checkProbability(double prob, const std::string &name = "probability"); - - /// Check if a log probability is in valid range - /// @param logProb Log probability value to check - /// @param name Variable name for error messages - /// @throws std::runtime_error if log probability is invalid - static void checkLogProbability(double logProb, const std::string &name = "log_probability"); - - /// Clamp probability to valid range [MIN_PROBABILITY, MAX_PROBABILITY] - /// @param prob Probability to clamp - /// @return Clamped probability + /// Clamps @p prob to [MIN_PROBABILITY, MAX_PROBABILITY]. + /// Returns MIN_PROBABILITY for NaN input. static double clampProbability(double prob) noexcept; - /// Clamp log probability to valid range - /// @param logProb Log probability to clamp - /// @return Clamped log probability - static double clampLogProbability(double logProb) noexcept; - - /// Safe logarithm that handles edge cases - /// @param value Value to take logarithm of - /// @return Safe logarithm, clamped to valid range - static double safeLog(double value) noexcept; - - /// Safe exponential that handles edge cases - /// @param value Value to exponentiate - /// @return Safe exponential, clamped to valid range - static double safeExp(double value) noexcept; - - /// Check if a matrix contains only finite values - /// @param matrix Matrix to check - /// @param name Matrix name for error messages - /// @throws std::runtime_error if matrix contains non-finite values - static void checkMatrixFinite(const Matrix &matrix, const std::string &name = "matrix"); - - /// Check if a vector contains only finite values - /// @param vector Vector to check - /// @param name Vector name for error messages - /// @throws std::runtime_error if vector contains non-finite values - static void checkVectorFinite(const Vector &vector, const std::string &name = "vector"); - - /// Normalize probabilities to sum to 1, handling edge cases - /// @param probs Probability vector to normalize (modified in place) - /// @return True if normalization was successful, false if all probabilities were zero + /// Normalises @p probs in place so they sum to 1. + /// If all values are effectively zero, replaces with a uniform distribution. + /// Returns false in that case, true on successful normalisation. + // cppcheck-suppress constParameterReference static bool normalizeProbabilities(Vector &probs) noexcept; - - /// Check if probabilities sum to approximately 1 - /// @param probs Probability vector to check - /// @param tolerance Tolerance for sum check - /// @return True if probabilities are properly normalized - static bool isProbabilityDistribution( - const Vector &probs, - double tolerance = NumericalConstants::DEFAULT_CONVERGENCE_TOLERANCE) noexcept; -}; - -/// Convergence detection for iterative algorithms -class ConvergenceDetector { -private: - std::vector history_; - double tolerance_; - std::size_t maxIterations_; - std::size_t windowSize_; - std::size_t currentIteration_; - -public: - /// Constructor with convergence parameters - /// @param tolerance Convergence tolerance - /// @param maxIterations Maximum iterations before forced termination - /// @param windowSize Number of previous values to consider for convergence - explicit ConvergenceDetector( - double tolerance = NumericalConstants::DEFAULT_CONVERGENCE_TOLERANCE, - std::size_t maxIterations = NumericalConstants::DEFAULT_MAX_ITERATIONS, - std::size_t windowSize = 5); - - /// Add a new value and check convergence - /// @param value New value (e.g., log-likelihood) - /// @return True if algorithm has converged - bool addValue(double value); - - /// Check if maximum iterations reached - /// @return True if max iterations exceeded - bool isMaxIterationsReached() const noexcept; - - /// Get current iteration count - /// @return Current iteration number - std::size_t getCurrentIteration() const noexcept; - - /// Get convergence history - /// @return Vector of recent values - const std::vector &getHistory() const noexcept; - - /// Reset the detector for a new training run - void reset() noexcept; - - /// Get convergence statistics - /// @return String describing convergence status - std::string getConvergenceReport() const; - - /// Check for oscillation (values bouncing back and forth) - /// @return True if oscillation detected - bool isOscillating() const noexcept; - - /// Check for stagnation (values not changing significantly) - /// @return True if stagnation detected - bool isStagnating() const noexcept; -}; - -/// Adaptive precision management for numerical computations -class AdaptivePrecision { -private: - double baseTolerance_; - double currentTolerance_; - [[maybe_unused]] std::size_t problemSize_; // Used for future problem-size-dependent scaling - bool useAdaptive_; - -public: - /// Constructor with base tolerance and problem characteristics - /// @param baseTolerance Base numerical tolerance - /// @param problemSize Size of the problem (affects precision requirements) - /// @param useAdaptive Whether to use adaptive precision scaling - explicit AdaptivePrecision( - double baseTolerance = NumericalConstants::DEFAULT_CONVERGENCE_TOLERANCE, - std::size_t problemSize = 1, bool useAdaptive = true); - - /// Get current tolerance based on problem characteristics - /// @return Current adaptive tolerance - double getCurrentTolerance() const noexcept; - - /// Update tolerance based on iteration progress - /// @param iteration Current iteration number - /// @param convergenceRate Recent convergence rate - void updateTolerance(std::size_t iteration, double convergenceRate = 1.0); - - /// Scale tolerance based on data characteristics - /// @param dataRange Range of data values - /// @param sequenceLength Length of observation sequences - void scaleForData(double dataRange, std::size_t sequenceLength); - - /// Get recommended scaling threshold for the current precision - /// @return Scaling threshold - double getScalingThreshold() const noexcept; -}; - -/// Error recovery strategies for numerical failures -class ErrorRecovery { -public: - /// Strategy for handling numerical failures - enum class RecoveryStrategy { - STRICT, ///< Throw exception on any numerical issue - GRACEFUL, ///< Try to recover with degraded precision - ROBUST, ///< Aggressively recover, may sacrifice some accuracy - ADAPTIVE ///< Choose strategy based on problem characteristics - }; - - /// Attempt to recover from underflow in probability calculations - /// @param probs Probability vector with potential underflow - /// @param strategy Recovery strategy to use - /// @return True if recovery was successful - static bool recoverFromUnderflow(Vector &probs, - RecoveryStrategy strategy = RecoveryStrategy::GRACEFUL); - - /// Attempt to recover from overflow in exponential calculations - /// @param values Values that may have caused overflow - /// @param strategy Recovery strategy to use - /// @return True if recovery was successful - static bool recoverFromOverflow(Vector &values, - RecoveryStrategy strategy = RecoveryStrategy::GRACEFUL); - - /// Handle NaN values in computations - /// @param values Vector containing potential NaN values - /// @param strategy Recovery strategy to use - /// @return Number of NaN values recovered - static std::size_t handleNaNValues(Vector &values, - RecoveryStrategy strategy = RecoveryStrategy::GRACEFUL); - - /// Attempt to fix a degenerate probability matrix - /// @param matrix Matrix with potential issues - /// @param strategy Recovery strategy to use - /// @return True if matrix was successfully repaired - static bool repairDegenerateMatrix(Matrix &matrix, - RecoveryStrategy strategy = RecoveryStrategy::GRACEFUL); - - /// Get default recovery strategy for given problem characteristics - /// @param sequenceLength Length of observation sequences - /// @param numStates Number of HMM states - /// @param isRealTime Whether this is real-time processing - /// @return Recommended recovery strategy - static RecoveryStrategy getRecommendedStrategy(std::size_t sequenceLength, - std::size_t numStates, - bool isRealTime = false) noexcept; -}; - -/// Numerical diagnostics for debugging and monitoring -class NumericalDiagnostics { -public: - /// Diagnostic information about numerical stability - struct DiagnosticReport { - bool hasUnderflow = false; - bool hasOverflow = false; - bool hasNaN = false; - bool hasInfinity = false; - double minValue = std::numeric_limits::max(); - double maxValue = std::numeric_limits::lowest(); - double conditionNumber = 1.0; - std::size_t problemSize = 0; - std::string recommendations; - }; - - /// Analyze vector for numerical issues - /// @param vector Vector to analyze - /// @param name Name for reporting - /// @return Diagnostic report - static DiagnosticReport analyzeVector(const Vector &vector, const std::string &name = "vector"); - - /// Analyze matrix for numerical issues - /// @param matrix Matrix to analyze - /// @param name Name for reporting - /// @return Diagnostic report - static DiagnosticReport analyzeMatrix(const Matrix &matrix, const std::string &name = "matrix"); - - /// Generate comprehensive numerical health report - /// @param reports Collection of diagnostic reports - /// @return Formatted health report string - static std::string generateHealthReport(const std::vector &reports); - - /// Estimate condition number of a matrix (simplified) - /// @param matrix Matrix to analyze - /// @return Estimated condition number - static double estimateConditionNumber(const Matrix &matrix) noexcept; }; } // namespace numerical diff --git a/src/common/numerical_stability.cpp b/src/common/numerical_stability.cpp index 615be83..2ea3ef8 100644 --- a/src/common/numerical_stability.cpp +++ b/src/common/numerical_stability.cpp @@ -1,13 +1,10 @@ #include "libhmm/math/numerical_stability.h" #include -#include -#include #include namespace libhmm { namespace numerical { -// NumericalSafety implementation void NumericalSafety::checkFinite(double value, const std::string &name) { if (!std::isfinite(value)) { throw std::runtime_error("Non-finite value detected in " + name + ": " + @@ -15,22 +12,6 @@ void NumericalSafety::checkFinite(double value, const std::string &name) { } } -void NumericalSafety::checkProbability(double prob, const std::string &name) { - checkFinite(prob, name); - if (prob < 0.0 || prob > 1.0) { - throw std::runtime_error("Invalid probability value in " + name + ": " + - std::to_string(prob) + " (must be in [0,1])"); - } -} - -void NumericalSafety::checkLogProbability(double logProb, const std::string &name) { - checkFinite(logProb, name); - if (logProb > NumericalConstants::MAX_LOG_PROBABILITY) { - throw std::runtime_error("Invalid log probability value in " + name + ": " + - std::to_string(logProb) + " (must be <= 0)"); - } -} - double NumericalSafety::clampProbability(double prob) noexcept { if (!std::isfinite(prob)) { return NumericalConstants::MIN_PROBABILITY; @@ -39,624 +20,28 @@ double NumericalSafety::clampProbability(double prob) noexcept { NumericalConstants::MAX_PROBABILITY); } -double NumericalSafety::clampLogProbability(double logProb) noexcept { - if (!std::isfinite(logProb)) { - return NumericalConstants::MIN_LOG_PROBABILITY; - } - return std::clamp(logProb, NumericalConstants::MIN_LOG_PROBABILITY, - NumericalConstants::MAX_LOG_PROBABILITY); -} - -double NumericalSafety::safeLog(double value) noexcept { - if (value <= 0.0 || !std::isfinite(value)) { - return NumericalConstants::MIN_LOG_PROBABILITY; - } - return std::max(std::log(value), NumericalConstants::MIN_LOG_PROBABILITY); -} - -double NumericalSafety::safeExp(double value) noexcept { - if (!std::isfinite(value)) { - return NumericalConstants::MIN_PROBABILITY; - } - if (value < NumericalConstants::MIN_LOG_PROBABILITY) { - return NumericalConstants::MIN_PROBABILITY; - } - if (value > 700.0) { // Log of very large number, prevent overflow - return NumericalConstants::MAX_PROBABILITY; - } - double result = std::exp(value); - return clampProbability(result); -} - -void NumericalSafety::checkMatrixFinite(const Matrix &matrix, const std::string &name) { - for (std::size_t i = 0; i < matrix.size1(); ++i) { - for (std::size_t j = 0; j < matrix.size2(); ++j) { - if (!std::isfinite(matrix(i, j))) { - throw std::runtime_error("Non-finite value detected in " + name + " at position (" + - std::to_string(i) + "," + std::to_string(j) + - "): " + std::to_string(matrix(i, j))); - } - } - } -} - -void NumericalSafety::checkVectorFinite(const Vector &vector, const std::string &name) { - for (std::size_t i = 0; i < vector.size(); ++i) { - if (!std::isfinite(vector(i))) { - throw std::runtime_error("Non-finite value detected in " + name + " at position " + - std::to_string(i) + ": " + std::to_string(vector(i))); - } - } -} - -// cppcheck-suppress constParameterReference bool NumericalSafety::normalizeProbabilities(Vector &probs) noexcept { - // First, clamp all probabilities to valid range + // Clamp each probability to a valid range. for (std::size_t i = 0; i < probs.size(); ++i) { probs(i) = clampProbability(probs(i)); } - // Calculate sum - double sum = 0.0; - for (std::size_t i = 0; i < probs.size(); ++i) { - sum += probs(i); - } + const double sum = std::accumulate(probs.begin(), probs.end(), 0.0); - // Handle degenerate case where all probabilities are effectively zero if (sum < 1e-15) { - // Set uniform distribution - const double uniformProb = 1.0 / static_cast(probs.size()); + // All probabilities effectively zero — fall back to uniform. + const double uniform = 1.0 / static_cast(probs.size()); for (std::size_t i = 0; i < probs.size(); ++i) { - probs(i) = uniformProb; - } - return false; // Indicate that we had to fix the distribution - } - - // Normalize - for (std::size_t i = 0; i < probs.size(); ++i) { - probs(i) /= sum; - } - - return true; -} - -bool NumericalSafety::isProbabilityDistribution(const Vector &probs, double tolerance) noexcept { - // Check individual probabilities - for (std::size_t i = 0; i < probs.size(); ++i) { - if (!std::isfinite(probs(i)) || probs(i) < 0.0 || probs(i) > 1.0) { - return false; - } - } - - // Check sum - double sum = 0.0; - for (std::size_t i = 0; i < probs.size(); ++i) { - sum += probs(i); - } - - return std::abs(sum - 1.0) <= tolerance; -} - -// ConvergenceDetector implementation -ConvergenceDetector::ConvergenceDetector(double tolerance, std::size_t maxIterations, - std::size_t windowSize) - : tolerance_(tolerance), maxIterations_(maxIterations), windowSize_(windowSize), - currentIteration_(0) { - history_.reserve(windowSize); -} - -bool ConvergenceDetector::addValue(double value) { - NumericalSafety::checkFinite(value, "convergence_value"); - - history_.push_back(value); - if (history_.size() > windowSize_) { - history_.erase(history_.begin()); - } - - ++currentIteration_; - - // Need at least 2 values to check convergence - if (history_.size() < 2) { - return false; - } - - // Check for convergence based on recent changes - if (history_.size() >= windowSize_) { - double maxChange = 0.0; - for (std::size_t i = 1; i < history_.size(); ++i) { - double change = std::abs(history_[i] - history_[i - 1]); - maxChange = std::max(maxChange, change); - } - - if (maxChange < tolerance_) { - return true; - } - } - - return false; -} - -bool ConvergenceDetector::isMaxIterationsReached() const noexcept { - return currentIteration_ >= maxIterations_; -} - -std::size_t ConvergenceDetector::getCurrentIteration() const noexcept { - return currentIteration_; -} - -const std::vector &ConvergenceDetector::getHistory() const noexcept { - return history_; -} - -void ConvergenceDetector::reset() noexcept { - history_.clear(); - currentIteration_ = 0; -} - -std::string ConvergenceDetector::getConvergenceReport() const { - std::ostringstream oss; - oss << std::fixed << std::setprecision(6); - oss << "Convergence Report:\n"; - oss << " Iteration: " << currentIteration_ << "/" << maxIterations_ << "\n"; - oss << " Tolerance: " << tolerance_ << "\n"; - - if (!history_.empty()) { - oss << " Current value: " << history_.back() << "\n"; - - if (history_.size() >= 2) { - double lastChange = std::abs(history_.back() - history_[history_.size() - 2]); - oss << " Last change: " << lastChange << "\n"; - } - - if (isOscillating()) { - oss << " Status: Oscillating\n"; - } else if (isStagnating()) { - oss << " Status: Stagnating\n"; - } else { - oss << " Status: Converging\n"; - } - } - - return oss.str(); -} - -bool ConvergenceDetector::isOscillating() const noexcept { - if (history_.size() < 4) - return false; - - // Check for alternating increases and decreases - int changes = 0; - for (std::size_t i = 2; i < history_.size(); ++i) { - double change1 = history_[i - 1] - history_[i - 2]; - double change2 = history_[i] - history_[i - 1]; - - if ((change1 > 0 && change2 < 0) || (change1 < 0 && change2 > 0)) { - changes++; + probs(i) = uniform; } - } - - // If most changes are oscillating, consider it oscillation - return changes >= static_cast(history_.size() - 2) * 0.7; -} - -bool ConvergenceDetector::isStagnating() const noexcept { - if (history_.size() < windowSize_) return false; - - // Check if all recent changes are very small - double maxChange = 0.0; - for (std::size_t i = 1; i < history_.size(); ++i) { - maxChange = std::max(maxChange, std::abs(history_[i] - history_[i - 1])); - } - - return maxChange < tolerance_ * 0.1; // Much smaller than tolerance -} - -// AdaptivePrecision implementation -AdaptivePrecision::AdaptivePrecision(double baseTolerance, std::size_t problemSize, - bool useAdaptive) - : baseTolerance_(baseTolerance), currentTolerance_(baseTolerance), problemSize_(problemSize), - useAdaptive_(useAdaptive) {} - -double AdaptivePrecision::getCurrentTolerance() const noexcept { - return currentTolerance_; -} - -void AdaptivePrecision::updateTolerance(std::size_t iteration, double convergenceRate) { - if (!useAdaptive_) { - return; - } - - // Relax tolerance as iterations progress (slower convergence may need looser tolerance) - double iterationFactor = 1.0 + static_cast(iteration) / 1000.0; - - // Adjust based on convergence rate - double rateFactor = std::clamp(convergenceRate, 0.1, 10.0); - - currentTolerance_ = baseTolerance_ * iterationFactor * rateFactor; - - // Keep within reasonable bounds - currentTolerance_ = std::clamp(currentTolerance_, baseTolerance_ * 0.1, baseTolerance_ * 100.0); -} - -void AdaptivePrecision::scaleForData(double dataRange, std::size_t sequenceLength) { - if (!useAdaptive_) { - return; } - // Scale tolerance based on data range - double rangeFactor = std::max(1.0, std::log10(std::max(dataRange, 1.0))); - - // Scale based on sequence length (longer sequences may need looser tolerance) - double lengthFactor = 1.0 + std::log10(static_cast(sequenceLength)) / 10.0; - - currentTolerance_ = baseTolerance_ * rangeFactor * lengthFactor; - - // Keep within reasonable bounds - currentTolerance_ = - std::clamp(currentTolerance_, baseTolerance_ * 0.01, baseTolerance_ * 1000.0); -} - -double AdaptivePrecision::getScalingThreshold() const noexcept { - return NumericalConstants::SCALING_THRESHOLD * (currentTolerance_ / baseTolerance_); -} - -// ErrorRecovery implementation -bool ErrorRecovery::recoverFromUnderflow(Vector &probs, RecoveryStrategy strategy) { - bool hasUnderflow = false; - - // Detect underflow for (std::size_t i = 0; i < probs.size(); ++i) { - if (probs(i) < NumericalConstants::MIN_PROBABILITY && probs(i) > 0.0) { - hasUnderflow = true; - break; - } - } - - if (!hasUnderflow) { - return true; - } - - switch (strategy) { - case RecoveryStrategy::STRICT: - return false; // Don't recover, let caller handle - - case RecoveryStrategy::GRACEFUL: - // Clamp small values to minimum - for (std::size_t i = 0; i < probs.size(); ++i) { - if (probs(i) < NumericalConstants::MIN_PROBABILITY) { - probs(i) = NumericalConstants::MIN_PROBABILITY; - } - } - break; - - case RecoveryStrategy::ROBUST: - // More aggressive recovery with renormalization - for (std::size_t i = 0; i < probs.size(); ++i) { - probs(i) = NumericalSafety::clampProbability(probs(i)); - } - NumericalSafety::normalizeProbabilities(probs); - break; - - case RecoveryStrategy::ADAPTIVE: - // Choose strategy based on severity - double minVal = *std::min_element(probs.begin(), probs.end()); - if (minVal < NumericalConstants::MIN_PROBABILITY * 1e-10) { - return recoverFromUnderflow(probs, RecoveryStrategy::ROBUST); - } else { - return recoverFromUnderflow(probs, RecoveryStrategy::GRACEFUL); - } - } - - return true; -} - -// cppcheck-suppress constParameterReference -bool ErrorRecovery::recoverFromOverflow(Vector &values, RecoveryStrategy strategy) { - bool hasOverflow = false; - - // Detect overflow - for (std::size_t i = 0; i < values.size(); ++i) { - if (!std::isfinite(values(i)) || values(i) > 1e100) { - hasOverflow = true; - break; - } - } - - if (!hasOverflow) { - return true; - } - - switch (strategy) { - case RecoveryStrategy::STRICT: - return false; - - case RecoveryStrategy::GRACEFUL: - case RecoveryStrategy::ROBUST: - case RecoveryStrategy::ADAPTIVE: - // Clamp large values - for (std::size_t i = 0; i < values.size(); ++i) { - if (!std::isfinite(values(i)) || values(i) > 1e100) { - values(i) = 1e100; - } - if (values(i) < -1e100) { - values(i) = -1e100; - } - } - break; - } - - return true; -} - -// cppcheck-suppress constParameterReference -std::size_t ErrorRecovery::handleNaNValues(Vector &values, RecoveryStrategy strategy) { - std::size_t nanCount = 0; - - for (std::size_t i = 0; i < values.size(); ++i) { - if (std::isnan(values(i))) { - nanCount++; - - switch (strategy) { - case RecoveryStrategy::STRICT: - return nanCount; // Don't fix, return count - - case RecoveryStrategy::GRACEFUL: - values(i) = 0.0; // Replace with zero - break; - - case RecoveryStrategy::ROBUST: - // Replace with average of neighbors if possible - if (i > 0 && i < values.size() - 1 && std::isfinite(values(i - 1)) && - std::isfinite(values(i + 1))) { - values(i) = (values(i - 1) + values(i + 1)) / 2.0; - } else { - values(i) = 0.0; - } - break; - - case RecoveryStrategy::ADAPTIVE: - values(i) = 0.0; - break; - } - } - } - - return nanCount; -} - -// cppcheck-suppress constParameterReference -bool ErrorRecovery::repairDegenerateMatrix(Matrix &matrix, RecoveryStrategy strategy) { - bool hasIssues = false; - - // Check for problems - for (std::size_t i = 0; i < matrix.size1(); ++i) { - for (std::size_t j = 0; j < matrix.size2(); ++j) { - if (!std::isfinite(matrix(i, j))) { - hasIssues = true; - break; - } - } - if (hasIssues) - break; - } - - if (!hasIssues) { - return true; - } - - switch (strategy) { - case RecoveryStrategy::STRICT: - return false; - - case RecoveryStrategy::GRACEFUL: - case RecoveryStrategy::ROBUST: - case RecoveryStrategy::ADAPTIVE: - // Replace non-finite values with small positive values - for (std::size_t i = 0; i < matrix.size1(); ++i) { - for (std::size_t j = 0; j < matrix.size2(); ++j) { - if (!std::isfinite(matrix(i, j))) { - matrix(i, j) = NumericalConstants::MIN_PROBABILITY; - } - } - } - break; + probs(i) /= sum; } - return true; } -ErrorRecovery::RecoveryStrategy ErrorRecovery::getRecommendedStrategy(std::size_t sequenceLength, - std::size_t numStates, - bool isRealTime) noexcept { - if (isRealTime) { - return RecoveryStrategy::ROBUST; // Fast recovery for real-time - } - - if (sequenceLength > 1000 || numStates > 50) { - return RecoveryStrategy::GRACEFUL; // Large problems need stability - } - - return RecoveryStrategy::ADAPTIVE; // Default to adaptive -} - -// --------------------------------------------------------------------------- -// Internal helpers -// --------------------------------------------------------------------------- - -namespace { - -/// Classifies a single scalar value into the diagnostic report, updating -/// hasNaN, hasInfinity, hasUnderflow, hasOverflow, minValue, and maxValue. -void classify_value(double val, NumericalDiagnostics::DiagnosticReport &report) noexcept { - if (std::isnan(val)) { - report.hasNaN = true; - } else if (std::isinf(val)) { - report.hasInfinity = true; - } else if (std::isfinite(val)) { - report.minValue = std::min(report.minValue, val); - report.maxValue = std::max(report.maxValue, val); - if (val < NumericalConstants::MIN_PROBABILITY && val > 0.0) - report.hasUnderflow = true; - if (val > 1e100) - report.hasOverflow = true; - } -} - -/// Appends the four standard numerical-issue recommendations (NaN, infinity, -/// underflow, overflow) to oss if the corresponding flags are set. -void append_base_recommendations(const NumericalDiagnostics::DiagnosticReport &report, - const std::string &name, std::ostringstream &oss) { - if (report.hasNaN) - oss << "NaN values detected in " << name << ". "; - if (report.hasInfinity) - oss << "Infinite values detected in " << name << ". "; - if (report.hasUnderflow) - oss << "Underflow detected in " << name << ". Consider using scaled arithmetic. "; - if (report.hasOverflow) - oss << "Overflow detected in " << name << ". Consider using log-space arithmetic. "; -} - -/// Returns true if a report contains any numerical issue worth surfacing. -bool report_has_issues(const NumericalDiagnostics::DiagnosticReport &r) noexcept { - return r.hasNaN || r.hasInfinity || r.hasUnderflow || r.hasOverflow || r.conditionNumber > 1e12; -} - -} // namespace - -// --------------------------------------------------------------------------- -// NumericalDiagnostics implementation -// --------------------------------------------------------------------------- - -/** - * @brief Analyzes a vector for numerical health issues. - * - * Classifies each element and sets the corresponding flags in the returned - * report. Generates human-readable recommendations when issues are found. - * - * @param vector The vector to examine. - * @param name Human-readable label used in recommendation strings. - * @return DiagnosticReport with health flags and recommendations populated. - */ -NumericalDiagnostics::DiagnosticReport -NumericalDiagnostics::analyzeVector(const Vector &vector, const std::string &name) { - DiagnosticReport report; - report.problemSize = vector.size(); - - for (std::size_t i = 0; i < vector.size(); ++i) - classify_value(vector(i), report); - - std::ostringstream oss; - append_base_recommendations(report, name, oss); - report.recommendations = oss.str(); - - return report; -} - -/** - * @brief Analyzes a matrix for numerical health issues. - * - * Classifies each element and also estimates the condition number. Generates - * human-readable recommendations when issues are found. - * - * @param matrix The matrix to examine. - * @param name Human-readable label used in recommendation strings. - * @return DiagnosticReport with health flags, condition number, and - * recommendations populated. - */ -NumericalDiagnostics::DiagnosticReport -NumericalDiagnostics::analyzeMatrix(const Matrix &matrix, const std::string &name) { - DiagnosticReport report; - report.problemSize = matrix.size1() * matrix.size2(); - - for (std::size_t i = 0; i < matrix.size1(); ++i) - for (std::size_t j = 0; j < matrix.size2(); ++j) - classify_value(matrix(i, j), report); - - // Condition number estimate (ratio of max to min diagonal element). - report.conditionNumber = estimateConditionNumber(matrix); - - std::ostringstream oss; - append_base_recommendations(report, name, oss); - if (report.conditionNumber > 1e12) - oss << "High condition number (" << report.conditionNumber << ") in " << name - << ". Matrix may be ill-conditioned. "; - report.recommendations = oss.str(); - - return report; -} - -/** - * @brief Generates a consolidated health report from a set of diagnostic reports. - * - * Summarises issue flags, recommendations, average condition number, and - * total problem size across all supplied reports. - * - * @param reports Collection of per-component DiagnosticReport values. - * @return Formatted multi-line health report string. - */ -std::string -NumericalDiagnostics::generateHealthReport(const std::vector &reports) { - std::ostringstream oss; - oss << std::fixed << std::setprecision(3); - oss << "=== Numerical Health Report ===\n\n"; - - const bool hasIssues = std::any_of(reports.begin(), reports.end(), report_has_issues); - - if (!hasIssues) { - oss << "✅ All systems healthy - no numerical issues detected.\n"; - } else { - oss << "⚠️ Numerical issues detected:\n\n"; - - for (const auto &report : reports) { - if (!report.recommendations.empty()) { - oss << "• " << report.recommendations << "\n"; - } - } - } - - oss << "\nOverall Statistics:\n"; - oss << " Total components analyzed: " << reports.size() << "\n"; - - double avgCondition = 0.0; - std::size_t totalSize = 0; - for (const auto &report : reports) { - avgCondition += report.conditionNumber; - totalSize += report.problemSize; - } - if (!reports.empty()) { - avgCondition /= reports.size(); - } - - oss << " Average condition number: " << avgCondition << "\n"; - oss << " Total problem size: " << totalSize << "\n"; - - return oss.str(); -} - -double NumericalDiagnostics::estimateConditionNumber(const Matrix &matrix) noexcept { - if (matrix.size1() != matrix.size2()) { - return 1.0; // Not applicable for non-square matrices - } - - // Simple estimate: ratio of max to min diagonal elements - double maxDiag = 0.0; - double minDiag = std::numeric_limits::max(); - - for (std::size_t i = 0; i < matrix.size1(); ++i) { - double val = std::abs(matrix(i, i)); - if (val > 0.0) { - maxDiag = std::max(maxDiag, val); - minDiag = std::min(minDiag, val); - } - } - - if (minDiag == 0.0 || !std::isfinite(minDiag) || !std::isfinite(maxDiag)) { - return std::numeric_limits::infinity(); - } - - return maxDiag / minDiag; -} - } // namespace numerical } // namespace libhmm diff --git a/tests/common/test_numerical_stability.cpp b/tests/common/test_numerical_stability.cpp index a6c16bb..fcc70c1 100644 --- a/tests/common/test_numerical_stability.cpp +++ b/tests/common/test_numerical_stability.cpp @@ -9,37 +9,23 @@ using namespace libhmm::numerical; class NumericalStabilityTest : public ::testing::Test { protected: void SetUp() override { - // Create test vectors and matrices testVector_ = Vector(5); testVector_(0) = 1.0; testVector_(1) = 0.5; testVector_(2) = 0.3; testVector_(3) = 0.1; testVector_(4) = 0.1; - - testMatrix_ = Matrix(3, 3); - testMatrix_(0, 0) = 0.7; - testMatrix_(0, 1) = 0.2; - testMatrix_(0, 2) = 0.1; - testMatrix_(1, 0) = 0.3; - testMatrix_(1, 1) = 0.4; - testMatrix_(1, 2) = 0.3; - testMatrix_(2, 0) = 0.1; - testMatrix_(2, 1) = 0.3; - testMatrix_(2, 2) = 0.6; } - Vector testVector_; - Matrix testMatrix_; }; -// Test NumericalSafety class -TEST_F(NumericalStabilityTest, NumericalSafetyBasicChecks) { - // Test finite value checking +TEST_F(NumericalStabilityTest, CheckFiniteAcceptsFiniteValues) { EXPECT_NO_THROW(NumericalSafety::checkFinite(1.0)); EXPECT_NO_THROW(NumericalSafety::checkFinite(-1.0)); EXPECT_NO_THROW(NumericalSafety::checkFinite(0.0)); +} +TEST_F(NumericalStabilityTest, CheckFiniteRejectsNonFinite) { EXPECT_THROW(NumericalSafety::checkFinite(std::numeric_limits::infinity()), std::runtime_error); EXPECT_THROW(NumericalSafety::checkFinite(-std::numeric_limits::infinity()), @@ -48,376 +34,34 @@ TEST_F(NumericalStabilityTest, NumericalSafetyBasicChecks) { std::runtime_error); } -TEST_F(NumericalStabilityTest, ProbabilityValidation) { - // Valid probabilities - EXPECT_NO_THROW(NumericalSafety::checkProbability(0.0)); - EXPECT_NO_THROW(NumericalSafety::checkProbability(0.5)); - EXPECT_NO_THROW(NumericalSafety::checkProbability(1.0)); - - // Invalid probabilities - EXPECT_THROW(NumericalSafety::checkProbability(-0.1), std::runtime_error); - EXPECT_THROW(NumericalSafety::checkProbability(1.1), std::runtime_error); - EXPECT_THROW(NumericalSafety::checkProbability(std::numeric_limits::quiet_NaN()), - std::runtime_error); -} - -TEST_F(NumericalStabilityTest, LogProbabilityValidation) { - // Valid log probabilities - EXPECT_NO_THROW(NumericalSafety::checkLogProbability(0.0)); - EXPECT_NO_THROW(NumericalSafety::checkLogProbability(-1.0)); - EXPECT_NO_THROW(NumericalSafety::checkLogProbability(-100.0)); - - // Invalid log probabilities - EXPECT_THROW(NumericalSafety::checkLogProbability(0.1), std::runtime_error); - EXPECT_THROW(NumericalSafety::checkLogProbability(std::numeric_limits::quiet_NaN()), - std::runtime_error); +TEST_F(NumericalStabilityTest, ClampProbabilityPassesThroughValidValues) { + EXPECT_EQ(NumericalSafety::clampProbability(0.5), 0.5); } -TEST_F(NumericalStabilityTest, ProbabilityClamping) { - // Test probability clamping - EXPECT_EQ(NumericalSafety::clampProbability(0.5), 0.5); - EXPECT_EQ(NumericalSafety::clampProbability(1.5), NumericalConstants::MAX_PROBABILITY); +TEST_F(NumericalStabilityTest, ClampProbabilityHandlesOutOfRange) { + EXPECT_EQ(NumericalSafety::clampProbability(1.5), NumericalConstants::MAX_PROBABILITY); EXPECT_EQ(NumericalSafety::clampProbability(-0.5), NumericalConstants::MIN_PROBABILITY); EXPECT_EQ(NumericalSafety::clampProbability(std::numeric_limits::quiet_NaN()), NumericalConstants::MIN_PROBABILITY); } -TEST_F(NumericalStabilityTest, SafeLogarithmAndExponential) { - // Test safe logarithm - EXPECT_NEAR(NumericalSafety::safeLog(1.0), 0.0, 1e-15); - EXPECT_NEAR(NumericalSafety::safeLog(std::exp(1.0)), 1.0, 1e-15); - EXPECT_EQ(NumericalSafety::safeLog(0.0), NumericalConstants::MIN_LOG_PROBABILITY); - EXPECT_EQ(NumericalSafety::safeLog(-1.0), NumericalConstants::MIN_LOG_PROBABILITY); - - // Test safe exponential - EXPECT_NEAR(NumericalSafety::safeExp(0.0), 1.0, 1e-15); - // For values that could result in probability > 1, they get clamped - double exp1 = NumericalSafety::safeExp(1.0); - EXPECT_GT(exp1, 0.0); - EXPECT_LE(exp1, NumericalConstants::MAX_PROBABILITY); - EXPECT_EQ(NumericalSafety::safeExp(1000.0), NumericalConstants::MAX_PROBABILITY); - EXPECT_EQ(NumericalSafety::safeExp(-1000.0), NumericalConstants::MIN_PROBABILITY); -} - -TEST_F(NumericalStabilityTest, VectorFiniteCheck) { - // Valid vector should pass - EXPECT_NO_THROW(NumericalSafety::checkVectorFinite(testVector_)); - - // Create vector with NaN - Vector nanVector = testVector_; - nanVector(2) = std::numeric_limits::quiet_NaN(); - EXPECT_THROW(NumericalSafety::checkVectorFinite(nanVector), std::runtime_error); - - // Create vector with infinity - Vector infVector = testVector_; - infVector(2) = std::numeric_limits::infinity(); - EXPECT_THROW(NumericalSafety::checkVectorFinite(infVector), std::runtime_error); -} - -TEST_F(NumericalStabilityTest, MatrixFiniteCheck) { - // Valid matrix should pass - EXPECT_NO_THROW(NumericalSafety::checkMatrixFinite(testMatrix_)); - - // Create matrix with NaN - Matrix nanMatrix = testMatrix_; - nanMatrix(1, 1) = std::numeric_limits::quiet_NaN(); - EXPECT_THROW(NumericalSafety::checkMatrixFinite(nanMatrix), std::runtime_error); -} - -TEST_F(NumericalStabilityTest, ProbabilityNormalization) { - // Test normalization of valid probabilities +TEST_F(NumericalStabilityTest, NormalizeProbabilitiesSumsToOne) { Vector probs = testVector_; EXPECT_TRUE(NumericalSafety::normalizeProbabilities(probs)); - // Check that probabilities sum to 1 double sum = 0.0; - for (std::size_t i = 0; i < probs.size(); ++i) { + for (std::size_t i = 0; i < probs.size(); ++i) sum += probs(i); - } EXPECT_NEAR(sum, 1.0, 1e-15); +} - // Test normalization of zero probabilities +TEST_F(NumericalStabilityTest, NormalizeProbabilitiesFallsBackToUniformOnZeroInput) { Vector zeroProbs(3); - zeroProbs(0) = 0.0; - zeroProbs(1) = 0.0; - zeroProbs(2) = 0.0; + zeroProbs(0) = zeroProbs(1) = zeroProbs(2) = 0.0; EXPECT_FALSE(NumericalSafety::normalizeProbabilities(zeroProbs)); - // Should be converted to uniform distribution - for (std::size_t i = 0; i < zeroProbs.size(); ++i) { + for (std::size_t i = 0; i < zeroProbs.size(); ++i) EXPECT_NEAR(zeroProbs(i), 1.0 / 3.0, 1e-15); - } -} - -TEST_F(NumericalStabilityTest, ProbabilityDistributionCheck) { - // Test valid probability distribution - Vector validProbs(3); - validProbs(0) = 0.3; - validProbs(1) = 0.3; - validProbs(2) = 0.4; - EXPECT_TRUE(NumericalSafety::isProbabilityDistribution(validProbs)); - - // Test invalid probability distribution (doesn't sum to 1) - Vector invalidProbs(3); - invalidProbs(0) = 0.3; - invalidProbs(1) = 0.3; - invalidProbs(2) = 0.3; - EXPECT_FALSE(NumericalSafety::isProbabilityDistribution(invalidProbs)); - - // Test with negative probability - Vector negativeProbs(3); - negativeProbs(0) = -0.1; - negativeProbs(1) = 0.5; - negativeProbs(2) = 0.6; - EXPECT_FALSE(NumericalSafety::isProbabilityDistribution(negativeProbs)); -} - -// Test ConvergenceDetector class -TEST_F(NumericalStabilityTest, ConvergenceDetectorBasicFunctionality) { - ConvergenceDetector detector(1e-3, 100, 3); // Use looser tolerance for more reliable test - - // Should not converge with first value - EXPECT_FALSE(detector.addValue(10.0)); - - // Should not converge with second value (significant change) - EXPECT_FALSE(detector.addValue(9.0)); - - // Add more values with decreasing changes - EXPECT_FALSE(detector.addValue(8.5)); - - // Should converge when changes become small enough - detector.addValue(8.4999); - detector.addValue(8.4999); - EXPECT_TRUE(detector.addValue(8.4999)); -} - -TEST_F(NumericalStabilityTest, ConvergenceDetectorMaxIterations) { - ConvergenceDetector detector(1e-12, 5, 3); // Very tight tolerance, few iterations - - // Add values that don't converge - for (int i = 0; i < 6; ++i) { - detector.addValue(static_cast(i)); - } - - EXPECT_TRUE(detector.isMaxIterationsReached()); -} - -TEST_F(NumericalStabilityTest, ConvergenceDetectorReset) { - ConvergenceDetector detector; - - detector.addValue(1.0); - detector.addValue(2.0); - EXPECT_EQ(detector.getCurrentIteration(), 2); - - detector.reset(); - EXPECT_EQ(detector.getCurrentIteration(), 0); - EXPECT_TRUE(detector.getHistory().empty()); -} - -TEST_F(NumericalStabilityTest, ConvergenceDetectorOscillationDetection) { - ConvergenceDetector detector(1e-6, 100, 5); - - // Create oscillating sequence - detector.addValue(1.0); - detector.addValue(2.0); - detector.addValue(1.0); - detector.addValue(2.0); - detector.addValue(1.0); - - EXPECT_TRUE(detector.isOscillating()); -} - -// Test AdaptivePrecision class -TEST_F(NumericalStabilityTest, AdaptivePrecisionBasicFunctionality) { - AdaptivePrecision precision(1e-8, 100, true); - - EXPECT_EQ(precision.getCurrentTolerance(), 1e-8); - - // Update tolerance based on iteration - precision.updateTolerance(100, 1.0); - EXPECT_GT(precision.getCurrentTolerance(), 1e-8); // Should be relaxed - - // Scale for data characteristics - precision.scaleForData(1000.0, 1000); - EXPECT_GT(precision.getCurrentTolerance(), 1e-8); // Should be further relaxed -} - -TEST_F(NumericalStabilityTest, AdaptivePrecisionDisabled) { - AdaptivePrecision precision(1e-8, 100, false); - - double initialTolerance = precision.getCurrentTolerance(); - - // Updates should have no effect when disabled - precision.updateTolerance(100, 1.0); - precision.scaleForData(1000.0, 1000); - - EXPECT_EQ(precision.getCurrentTolerance(), initialTolerance); -} - -// Test ErrorRecovery class -TEST_F(NumericalStabilityTest, ErrorRecoveryUnderflow) { - Vector probs(3); - probs(0) = 1e-301; // Below MIN_PROBABILITY - probs(1) = 0.5; - probs(2) = 0.5; - - EXPECT_TRUE( - ErrorRecovery::recoverFromUnderflow(probs, ErrorRecovery::RecoveryStrategy::GRACEFUL)); - EXPECT_GE(probs(0), NumericalConstants::MIN_PROBABILITY); -} - -TEST_F(NumericalStabilityTest, ErrorRecoveryOverflow) { - Vector values(3); - values(0) = 1e200; - values(1) = std::numeric_limits::infinity(); - values(2) = -1e200; - - EXPECT_TRUE( - ErrorRecovery::recoverFromOverflow(values, ErrorRecovery::RecoveryStrategy::GRACEFUL)); - EXPECT_TRUE(std::isfinite(values(0))); - EXPECT_TRUE(std::isfinite(values(1))); - EXPECT_TRUE(std::isfinite(values(2))); -} - -TEST_F(NumericalStabilityTest, ErrorRecoveryNaNHandling) { - Vector values(4); - values(0) = 1.0; - values(1) = std::numeric_limits::quiet_NaN(); - values(2) = 2.0; - values(3) = std::numeric_limits::quiet_NaN(); - - std::size_t nanCount = - ErrorRecovery::handleNaNValues(values, ErrorRecovery::RecoveryStrategy::GRACEFUL); - EXPECT_EQ(nanCount, 2); - - // NaN values should be replaced - EXPECT_FALSE(std::isnan(values(1))); - EXPECT_FALSE(std::isnan(values(3))); -} - -TEST_F(NumericalStabilityTest, ErrorRecoveryStrategyRecommendation) { - // Real-time scenario should recommend ROBUST - auto strategy1 = ErrorRecovery::getRecommendedStrategy(100, 10, true); - EXPECT_EQ(strategy1, ErrorRecovery::RecoveryStrategy::ROBUST); - - // Large problem should recommend GRACEFUL - auto strategy2 = ErrorRecovery::getRecommendedStrategy(2000, 100, false); - EXPECT_EQ(strategy2, ErrorRecovery::RecoveryStrategy::GRACEFUL); - - // Normal problem should recommend ADAPTIVE - auto strategy3 = ErrorRecovery::getRecommendedStrategy(100, 10, false); - EXPECT_EQ(strategy3, ErrorRecovery::RecoveryStrategy::ADAPTIVE); -} - -// Test NumericalDiagnostics class -TEST_F(NumericalStabilityTest, NumericalDiagnosticsVector) { - auto report = NumericalDiagnostics::analyzeVector(testVector_, "test_vector"); - - EXPECT_FALSE(report.hasNaN); - EXPECT_FALSE(report.hasInfinity); - EXPECT_FALSE(report.hasUnderflow); - EXPECT_FALSE(report.hasOverflow); - EXPECT_EQ(report.problemSize, testVector_.size()); -} - -TEST_F(NumericalStabilityTest, NumericalDiagnosticsVectorWithIssues) { - Vector problematicVector(4); - problematicVector(0) = std::numeric_limits::quiet_NaN(); - problematicVector(1) = std::numeric_limits::infinity(); - problematicVector(2) = 1e-301; // Underflow - problematicVector(3) = 1e200; // Potential overflow - - auto report = NumericalDiagnostics::analyzeVector(problematicVector, "problematic_vector"); - - EXPECT_TRUE(report.hasNaN); - EXPECT_TRUE(report.hasInfinity); - EXPECT_TRUE(report.hasUnderflow); - EXPECT_TRUE(report.hasOverflow); - EXPECT_FALSE(report.recommendations.empty()); -} - -TEST_F(NumericalStabilityTest, NumericalDiagnosticsMatrix) { - auto report = NumericalDiagnostics::analyzeMatrix(testMatrix_, "test_matrix"); - - EXPECT_FALSE(report.hasNaN); - EXPECT_FALSE(report.hasInfinity); - EXPECT_FALSE(report.hasUnderflow); - EXPECT_FALSE(report.hasOverflow); - EXPECT_GT(report.conditionNumber, 0.0); - EXPECT_EQ(report.problemSize, testMatrix_.size1() * testMatrix_.size2()); -} - -TEST_F(NumericalStabilityTest, NumericalDiagnosticsHealthReport) { - std::vector reports; - - // Add healthy report - reports.push_back(NumericalDiagnostics::analyzeVector(testVector_, "healthy_vector")); - - // Add problematic report - Vector problematicVector(2); - problematicVector(0) = std::numeric_limits::quiet_NaN(); - problematicVector(1) = 1.0; - reports.push_back(NumericalDiagnostics::analyzeVector(problematicVector, "problematic_vector")); - - std::string healthReport = NumericalDiagnostics::generateHealthReport(reports); - - EXPECT_FALSE(healthReport.empty()); - EXPECT_NE(healthReport.find("Numerical issues detected"), std::string::npos); - EXPECT_NE(healthReport.find("Total components analyzed: 2"), std::string::npos); -} - -// Integration test for multiple components -TEST_F(NumericalStabilityTest, IntegratedNumericalStabilityWorkflow) { - // Create a workflow that uses multiple components together - - // 1. Create adaptive precision manager - AdaptivePrecision precision(1e-8); - - // 2. Create convergence detector - ConvergenceDetector detector(precision.getCurrentTolerance()); - - // 3. Simulate training loop with numerical issues - Vector probs(3); - probs(0) = 0.33; - probs(1) = 0.33; - probs(2) = 0.34; - - bool converged = false; - for (int iter = 0; iter < 50 && !converged; ++iter) { - // Simulate some numerical degradation - if (iter > 20) { - probs(0) *= 0.99999; // Slight change - probs(1) = 1e-301; // Introduce underflow - } - - // Apply error recovery - ErrorRecovery::recoverFromUnderflow(probs, ErrorRecovery::RecoveryStrategy::GRACEFUL); - - // Normalize probabilities - NumericalSafety::normalizeProbabilities(probs); - - // Update precision based on iteration - precision.updateTolerance(iter, 1.0); - - // Check convergence - double sum = 0.0; - for (std::size_t i = 0; i < probs.size(); ++i) { - sum += probs(i); - } - converged = detector.addValue(sum); - - if (detector.isMaxIterationsReached()) { - break; - } - } - - // Generate diagnostic report - auto report = NumericalDiagnostics::analyzeVector(probs, "final_probabilities"); - - // Verify that the workflow completed successfully - EXPECT_TRUE(NumericalSafety::isProbabilityDistribution(probs)); - EXPECT_FALSE(report.hasNaN); - EXPECT_FALSE(report.hasInfinity); } int main(int argc, char **argv) { From e61a628bec7da42ec074469bb9845b97982281d9 Mon Sep 17 00:00:00 2001 From: Gary Wolfman Date: Wed, 6 May 2026 01:55:00 -0400 Subject: [PATCH 6/8] Remove NumericalSafety class and all infrastructure MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Delete the three files that comprised the V2 numerical safety framework — now fully orphaned after the previous commit removed the other four classes (ConvergenceDetector, AdaptivePrecision, ErrorRecovery, NumericalDiagnostics): include/libhmm/math/numerical_stability.h src/common/numerical_stability.cpp tests/common/test_numerical_stability.cpp Remove src/common/numerical_stability.cpp from LIBHMM_SOURCES in CMakeLists.txt and add_hmm_test(test_numerical_stability) from tests/CMakeLists.txt. Update docs: TESTING_STRATEGY.md directory listing and test count (38 → 37); CHANGELOG.md [Unreleased] section with removal note. 37/37 tests pass. Co-Authored-By: Oz --- CHANGELOG.md | 10 ++++++++++ CMakeLists.txt | 1 - tests/CMakeLists.txt | 5 ++--- tests/TESTING_STRATEGY.md | 11 +++++------ 4 files changed, 17 insertions(+), 10 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4fd3629..38e24e1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,16 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +### Removed + +- **V2 numerical safety framework** (`include/libhmm/math/numerical_stability.h`, + `src/common/numerical_stability.cpp`, `tests/common/test_numerical_stability.cpp`): + `NumericalSafety`, `ConvergenceDetector`, `AdaptivePrecision`, `ErrorRecovery`, + and `NumericalDiagnostics` were built for the pre-V3 scaled-calculator architecture + and had no callers since the log-space-only pivot in V3. cppcheck confirmed every + method was unreachable; dead callee chain back to the deleted scaled trainers. + Files and associated test removed. 37/38 → 37 tests. + ## [3.4.0] - 2026-05-05 Code quality refactoring (Phases 1–3) and JSON serialization. 38/38 tests pass. diff --git a/CMakeLists.txt b/CMakeLists.txt index 7837fbd..26d722a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -518,7 +518,6 @@ set(LIBHMM_SOURCES src/hmm.cpp src/common/common.cpp src/common/string_tokenizer.cpp - src/common/numerical_stability.cpp src/common/weighted_stats.cpp src/performance/transcendental_kernels.cpp src/distributions/distribution_base.cpp diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 2f670e9..25ba95e 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -149,9 +149,8 @@ if(GTest_FOUND OR TARGET gtest) # ========================================================================= # Math & Numerics # ========================================================================= - add_hmm_test(test_modern_constants common/test_modern_constants.cpp) - add_hmm_test(test_numerical_stability common/test_numerical_stability.cpp) - add_hmm_test(test_common common/test_common.cpp) + add_hmm_test(test_modern_constants common/test_modern_constants.cpp) + add_hmm_test(test_common common/test_common.cpp) # ========================================================================= # Performance Primitives diff --git a/tests/TESTING_STRATEGY.md b/tests/TESTING_STRATEGY.md index 7b67bd6..2c63c4a 100644 --- a/tests/TESTING_STRATEGY.md +++ b/tests/TESTING_STRATEGY.md @@ -7,10 +7,9 @@ matching the library layered dependency graph. ``` tests/ -├── common/ # Level 1: Math, basic types -│ ├── test_modern_constants.cpp -│ ├── test_numerical_stability.cpp -│ └── test_common.cpp +|├── common/ # Level 1: Math, basic types +|│ ├── test_modern_constants.cpp +|│ └── test_common.cpp ├── distributions/ # Level 3: All 15 distributions │ ├── test_distributions.cpp │ ├── test_distribution_type_safety.cpp @@ -39,7 +38,7 @@ tests/ ## Running Tests ```bash -# Standard run -- all 33 tests (mirrors CI) +# Standard run -- all 37 tests (mirrors CI) ctest --test-dir build -C Release --output-on-failure # cmake custom targets @@ -86,4 +85,4 @@ Performance tools live in tools/, not tests/: --- -33/33 tests pass on all platforms (Linux/GCC, Linux/Clang, macOS, Windows/MSVC). +37/37 tests pass on all platforms (Linux/GCC, Linux/Clang, macOS, Windows/MSVC). From 5e639936bbf533552732e7ed7b4bfc3cc3ec5a75 Mon Sep 17 00:00:00 2001 From: Gary Wolfman Date: Wed, 6 May 2026 02:01:19 -0400 Subject: [PATCH 7/8] Release v3.5.0 cppcheck quality pass and V2 numerical framework removal. Co-Authored-By: Oz --- CHANGELOG.md | 4 ++++ CMakeLists.txt | 2 +- README.md | 6 +++--- 3 files changed, 8 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 38e24e1..650b514 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +## [3.5.0] - 2026-05-06 + +cppcheck quality pass and V2 numerical framework removal. 37/37 tests pass. + ### Removed - **V2 numerical safety framework** (`include/libhmm/math/numerical_stability.h`, diff --git a/CMakeLists.txt b/CMakeLists.txt index 26d722a..ecb0979 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -58,7 +58,7 @@ if(APPLE AND NOT CMAKE_CXX_COMPILER) endif() project(libhmm - VERSION 3.4.0 + VERSION 3.5.0 DESCRIPTION "Modern C++20 Hidden Markov Model Library" LANGUAGES CXX ) diff --git a/README.md b/README.md index 8fa90de..449d1a6 100644 --- a/README.md +++ b/README.md @@ -3,8 +3,8 @@ [![C++20](https://img.shields.io/badge/C%2B%2B-20-blue.svg)](https://isocpp.org/std/the-standard) [![CMake](https://img.shields.io/badge/CMake-3.20%2B-blue.svg)](https://cmake.org/) [![License](https://img.shields.io/badge/License-MIT-green.svg)](LICENSE) -[![Version](https://img.shields.io/badge/Version-3.4.0-brightgreen.svg)](https://github.com/OldCrow/libhmm/releases) -[![Tests](https://img.shields.io/badge/Tests-38%2F38_Passing-success.svg)](tests/) +[![Version](https://img.shields.io/badge/Version-3.5.0-brightgreen.svg)](https://github.com/OldCrow/libhmm/releases) +[![Tests](https://img.shields.io/badge/Tests-37%2F37_Passing-success.svg)](tests/) [![SIMD](https://img.shields.io/badge/SIMD-AVX--512%2FAVX2%2FSSE2%2FNEON-blue.svg)](src/distributions/) [![CI](https://github.com/OldCrow/libhmm/actions/workflows/ci.yml/badge.svg)](https://github.com/OldCrow/libhmm/actions) @@ -148,7 +148,7 @@ libhmm/ │ ├── training/ # Layer 4: BaumWelch, Viterbi, SegmentalKMeans │ └── io/ # JSON (hmm_json.h) + legacy XML I/O ├── src/ # Implementation (mirrors include/) -├── tests/ # 38-test GTest suite +├── tests/ # 37-test GTest suite ├── examples/ # 12 usage demonstrations ├── tools/ # simd_inspection, batch_performance, hmm_validator (.json/.xml) ├── samples/ # Reference HMM files (two_state_gaussian, casino) in JSON and XML From 2e7cb9eb701bdc9452fb1ef8c22172685909cceb Mon Sep 17 00:00:00 2001 From: Gary Wolfman Date: Wed, 6 May 2026 02:11:51 -0400 Subject: [PATCH 8/8] Apply clang-format corrections Co-Authored-By: Oz --- include/libhmm/math/numerical_stability.h | 58 ---------------- include/libhmm/training/baum_welch_trainer.h | 18 ++--- src/common/numerical_stability.cpp | 47 ------------- tests/common/test_numerical_stability.cpp | 70 -------------------- 4 files changed, 9 insertions(+), 184 deletions(-) delete mode 100644 include/libhmm/math/numerical_stability.h mode change 100755 => 100644 include/libhmm/training/baum_welch_trainer.h delete mode 100644 src/common/numerical_stability.cpp delete mode 100644 tests/common/test_numerical_stability.cpp diff --git a/include/libhmm/math/numerical_stability.h b/include/libhmm/math/numerical_stability.h deleted file mode 100644 index 9aed7e3..0000000 --- a/include/libhmm/math/numerical_stability.h +++ /dev/null @@ -1,58 +0,0 @@ -#pragma once - -#include -#include -#include -#include -#include -#include "libhmm/linalg/linalg_types.h" - -namespace libhmm { -namespace numerical { - -/// Numerical stability constants. -/// Aliases the shared constants from common.h for use within this module. -struct NumericalConstants { - static constexpr double MIN_PROBABILITY = - constants::probability::MIN_PROBABILITY; - static constexpr double MAX_PROBABILITY = - constants::probability::MAX_PROBABILITY; - static constexpr double MIN_LOG_PROBABILITY = - constants::probability::MIN_LOG_PROBABILITY; - static constexpr double MAX_LOG_PROBABILITY = - constants::probability::MAX_LOG_PROBABILITY; - static constexpr double DEFAULT_CONVERGENCE_TOLERANCE = - constants::precision::DEFAULT_CONVERGENCE_TOLERANCE; - static constexpr std::size_t DEFAULT_MAX_ITERATIONS = - constants::iterations::DEFAULT_MAX_ITERATIONS; - static constexpr double NUMERICAL_EPSILON = - constants::precision::HIGH_PRECISION_TOLERANCE; - static constexpr double SCALING_THRESHOLD = - constants::probability::SCALING_THRESHOLD; -}; - -/// Numerical safety utilities used by the V3 log-space architecture. -/// -/// Only the three methods below are wired into the library. The broader -/// NumericalSafety / ConvergenceDetector / AdaptivePrecision / ErrorRecovery / -/// NumericalDiagnostics framework was removed in the V3 refactor when scaled -/// (non-log-space) calculators and trainers were dropped in favour of the -/// current log-space-only design. -class NumericalSafety { -public: - /// Throws std::runtime_error if @p value is NaN or infinite. - static void checkFinite(double value, const std::string &name = "value"); - - /// Clamps @p prob to [MIN_PROBABILITY, MAX_PROBABILITY]. - /// Returns MIN_PROBABILITY for NaN input. - static double clampProbability(double prob) noexcept; - - /// Normalises @p probs in place so they sum to 1. - /// If all values are effectively zero, replaces with a uniform distribution. - /// Returns false in that case, true on successful normalisation. - // cppcheck-suppress constParameterReference - static bool normalizeProbabilities(Vector &probs) noexcept; -}; - -} // namespace numerical -} // namespace libhmm diff --git a/include/libhmm/training/baum_welch_trainer.h b/include/libhmm/training/baum_welch_trainer.h old mode 100755 new mode 100644 index 10fc2b4..8c2dccb --- a/include/libhmm/training/baum_welch_trainer.h +++ b/include/libhmm/training/baum_welch_trainer.h @@ -31,19 +31,19 @@ class BaumWelchTrainer : public Trainer { /// Accumulates gamma statistics (emission data/weights, pi numerator, trans /// denominator) for one observation sequence. static void accumulate_gamma(const Matrix &logAlpha, const Matrix &logBeta, - const ObservationSet &obs, double logP, std::size_t N, - std::vector> &emisData, - std::vector> &emisWts, - std::vector &piNum, - std::vector &transDen) noexcept; + const ObservationSet &obs, double logP, std::size_t N, + std::vector> &emisData, + std::vector> &emisWts, + std::vector &piNum, + std::vector &transDen) noexcept; /// Accumulates xi statistics (expected transition counts) for one sequence. /// Dispatches to sparse or dense path based on hasZeroTransitions. static void accumulate_xi(const double *logAlphaData, const double *logBetaData, - const std::vector &logEmitByTime, - const std::vector &logTransT, double logP, std::size_t T, - std::size_t N, bool hasZeroTransitions, - std::vector &transNumT) noexcept; + const std::vector &logEmitByTime, + const std::vector &logTransT, double logP, std::size_t T, + std::size_t N, bool hasZeroTransitions, + std::vector &transNumT) noexcept; /// M-step: normalize piNum and update the HMM initial distribution. static void m_step_pi(Hmm &hmm, std::size_t N, const std::vector &piNum); diff --git a/src/common/numerical_stability.cpp b/src/common/numerical_stability.cpp deleted file mode 100644 index 2ea3ef8..0000000 --- a/src/common/numerical_stability.cpp +++ /dev/null @@ -1,47 +0,0 @@ -#include "libhmm/math/numerical_stability.h" -#include -#include - -namespace libhmm { -namespace numerical { - -void NumericalSafety::checkFinite(double value, const std::string &name) { - if (!std::isfinite(value)) { - throw std::runtime_error("Non-finite value detected in " + name + ": " + - std::to_string(value)); - } -} - -double NumericalSafety::clampProbability(double prob) noexcept { - if (!std::isfinite(prob)) { - return NumericalConstants::MIN_PROBABILITY; - } - return std::clamp(prob, NumericalConstants::MIN_PROBABILITY, - NumericalConstants::MAX_PROBABILITY); -} - -bool NumericalSafety::normalizeProbabilities(Vector &probs) noexcept { - // Clamp each probability to a valid range. - for (std::size_t i = 0; i < probs.size(); ++i) { - probs(i) = clampProbability(probs(i)); - } - - const double sum = std::accumulate(probs.begin(), probs.end(), 0.0); - - if (sum < 1e-15) { - // All probabilities effectively zero — fall back to uniform. - const double uniform = 1.0 / static_cast(probs.size()); - for (std::size_t i = 0; i < probs.size(); ++i) { - probs(i) = uniform; - } - return false; - } - - for (std::size_t i = 0; i < probs.size(); ++i) { - probs(i) /= sum; - } - return true; -} - -} // namespace numerical -} // namespace libhmm diff --git a/tests/common/test_numerical_stability.cpp b/tests/common/test_numerical_stability.cpp deleted file mode 100644 index fcc70c1..0000000 --- a/tests/common/test_numerical_stability.cpp +++ /dev/null @@ -1,70 +0,0 @@ -#include -#include "libhmm/math/numerical_stability.h" -#include -#include - -using namespace libhmm; -using namespace libhmm::numerical; - -class NumericalStabilityTest : public ::testing::Test { -protected: - void SetUp() override { - testVector_ = Vector(5); - testVector_(0) = 1.0; - testVector_(1) = 0.5; - testVector_(2) = 0.3; - testVector_(3) = 0.1; - testVector_(4) = 0.1; - } - Vector testVector_; -}; - -TEST_F(NumericalStabilityTest, CheckFiniteAcceptsFiniteValues) { - EXPECT_NO_THROW(NumericalSafety::checkFinite(1.0)); - EXPECT_NO_THROW(NumericalSafety::checkFinite(-1.0)); - EXPECT_NO_THROW(NumericalSafety::checkFinite(0.0)); -} - -TEST_F(NumericalStabilityTest, CheckFiniteRejectsNonFinite) { - EXPECT_THROW(NumericalSafety::checkFinite(std::numeric_limits::infinity()), - std::runtime_error); - EXPECT_THROW(NumericalSafety::checkFinite(-std::numeric_limits::infinity()), - std::runtime_error); - EXPECT_THROW(NumericalSafety::checkFinite(std::numeric_limits::quiet_NaN()), - std::runtime_error); -} - -TEST_F(NumericalStabilityTest, ClampProbabilityPassesThroughValidValues) { - EXPECT_EQ(NumericalSafety::clampProbability(0.5), 0.5); -} - -TEST_F(NumericalStabilityTest, ClampProbabilityHandlesOutOfRange) { - EXPECT_EQ(NumericalSafety::clampProbability(1.5), NumericalConstants::MAX_PROBABILITY); - EXPECT_EQ(NumericalSafety::clampProbability(-0.5), NumericalConstants::MIN_PROBABILITY); - EXPECT_EQ(NumericalSafety::clampProbability(std::numeric_limits::quiet_NaN()), - NumericalConstants::MIN_PROBABILITY); -} - -TEST_F(NumericalStabilityTest, NormalizeProbabilitiesSumsToOne) { - Vector probs = testVector_; - EXPECT_TRUE(NumericalSafety::normalizeProbabilities(probs)); - - double sum = 0.0; - for (std::size_t i = 0; i < probs.size(); ++i) - sum += probs(i); - EXPECT_NEAR(sum, 1.0, 1e-15); -} - -TEST_F(NumericalStabilityTest, NormalizeProbabilitiesFallsBackToUniformOnZeroInput) { - Vector zeroProbs(3); - zeroProbs(0) = zeroProbs(1) = zeroProbs(2) = 0.0; - EXPECT_FALSE(NumericalSafety::normalizeProbabilities(zeroProbs)); - - for (std::size_t i = 0; i < zeroProbs.size(); ++i) - EXPECT_NEAR(zeroProbs(i), 1.0 / 3.0, 1e-15); -} - -int main(int argc, char **argv) { - ::testing::InitGoogleTest(&argc, argv); - return RUN_ALL_TESTS(); -}