From 15000b62f5b14bcfb965785e39b8765caae4012a Mon Sep 17 00:00:00 2001 From: Jason Cory Brunson Date: Wed, 20 Aug 2025 09:41:24 -0400 Subject: [PATCH] record censored death values as missing rather than infinite --- NEWS.md | 7 +++++++ R/PHom.R | 3 ++- R/data.r | 2 +- man/aegypti.Rd | 2 +- man/ripserr.Rd | 4 ++-- src/ripser.cpp | 14 ++++++++++---- 6 files changed, 23 insertions(+), 9 deletions(-) diff --git a/NEWS.md b/NEWS.md index c13005f..085440c 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,10 @@ +# next version + +## censored death values + +This version addresses #39 by encoding deaths that exceed the threshold as undefined (`NaN`) rather than infinite (`Inf`) then converting these values to missing (`NA_REAL`) while populating the `Rcpp::NumericMatrix` returned to R. +A single infinite degree-0 feature for the connected component is retained. + # ripserr 1.0.0 This major version replaces an outdated version of the Ripser C++ library with its current version. diff --git a/R/PHom.R b/R/PHom.R index f5c9d95..a047925 100644 --- a/R/PHom.R +++ b/R/PHom.R @@ -61,7 +61,8 @@ validate_PHom <- function(x, error = TRUE) { } # make sure all deaths are after corresponding births - if (!all(x$birth < x$death)) { + # TODO: Check for consistency with extended persistence. + if (!all(x$birth < x$death, na.rm = TRUE)) { if (error) { stop(paste("In PHom objects, all births must be before corresponding", "deaths.")) diff --git a/R/data.r b/R/data.r index 90837f3..2c96d30 100644 --- a/R/data.r +++ b/R/data.r @@ -27,7 +27,7 @@ #' acre_coord <- aegypti[aegypti$state_code == "AC", c("x", "y"), drop = FALSE] #' acre_rips <- vietoris_rips(acre_coord) #' plot.new() -#' xymax <- max(setdiff(acre_rips$death, Inf)) +#' xymax <- max(setdiff(acre_rips$death, Inf), na.rm = TRUE) #' plot.window( #' xlim = c(0, xymax), #' ylim = c(0, xymax), diff --git a/man/aegypti.Rd b/man/aegypti.Rd index 1a512c1..2d0f827 100644 --- a/man/aegypti.Rd +++ b/man/aegypti.Rd @@ -36,7 +36,7 @@ and reverse-geocoded to states. acre_coord <- aegypti[aegypti$state_code == "AC", c("x", "y"), drop = FALSE] acre_rips <- vietoris_rips(acre_coord) plot.new() -xymax <- max(setdiff(acre_rips$death, Inf)) +xymax <- max(setdiff(acre_rips$death, Inf), na.rm = TRUE) plot.window( xlim = c(0, xymax), ylim = c(0, xymax), diff --git a/man/ripserr.Rd b/man/ripserr.Rd index 4a73a2e..1800a82 100644 --- a/man/ripserr.Rd +++ b/man/ripserr.Rd @@ -22,7 +22,7 @@ Useful links: Authors: \itemize{ - \item Raoul Wadhwa \email{raoulwadhwa@gmail.com} (\href{https://orcid.org/0000-0003-0503-9580}{ORCID}) + \item Raoul R. Wadhwa \email{raoulwadhwa@gmail.com} (\href{https://orcid.org/0000-0003-0503-9580}{ORCID}) \item Matt Piekenbrock \email{matt.piekenbrock@gmail.com} \item Xinyi Zhang \email{ezhang0927@gmail.com} \item Alice Zhang \email{aliscezhang@gmail.com} (\href{https://orcid.org/0009-0001-3584-5869}{ORCID}) @@ -36,7 +36,7 @@ Other contributors: \item Aymeric Stamm \email{aymeric.stamm@cnrs.fr} (\href{https://orcid.org/0000-0002-8725-3654}{ORCID}) [contributor] \item Aidan Bryant \email{bryant.aidan15@gmail.com} [contributor] \item James Golabek \email{jamesgolabek@gmail.com} [contributor] - \item Jacob Scott (\href{https://orcid.org/0000-0003-2971-7673}{ORCID}) [laboratory director] + \item Jacob G. Scott (\href{https://orcid.org/0000-0003-2971-7673}{ORCID}) [laboratory director] \item Ulrich Bauer (Ripser; MIT license) [copyright holder, contributor] \item Takeki Sudo (Cubical Ripser; GPL-3 license) [copyright holder, contributor] \item Kazushi Ahara (Cubical Ripser; GPL-3 license) [copyright holder, contributor] diff --git a/src/ripser.cpp b/src/ripser.cpp index 0868dfa..fd4c665 100644 --- a/src/ripser.cpp +++ b/src/ripser.cpp @@ -671,8 +671,10 @@ template class ripser { #endif // ripserq: Accumulate pairs in an object to be returned to the user. #ifdef COLLECT_PERSISTENCE_PAIRS - for (index_t i = 0; i < n; ++i) - if (dset.find(i) == i) persistence_pairs[0].emplace_back(0.0, std::numeric_limits::infinity()); + for (index_t i = 0; i < n - 1; ++i) + // ripserq: `quiet_NaN()` for deaths that subceed threshold. + if (dset.find(i) == i) persistence_pairs[0].emplace_back(0.0, std::numeric_limits::quiet_NaN()); + if (dset.find(n - 1) == n - 1) persistence_pairs[0].emplace_back(0.0, std::numeric_limits::infinity()); #endif } @@ -863,7 +865,8 @@ template class ripser { #endif // ripserq: Accumulate pairs in an object to be returned to the user. #ifdef COLLECT_PERSISTENCE_PAIRS - persistence_pairs[dim].emplace_back(diameter, std::numeric_limits::infinity()); + // ripserq: `quiet_NaN()` for deaths that subceed threshold. + persistence_pairs[dim].emplace_back(diameter, std::numeric_limits::quiet_NaN()); #endif break; } @@ -1404,7 +1407,10 @@ Rcpp::List ripser_cpp_dist(const Rcpp::NumericVector &dataset, int dim, double t Rcpp::NumericMatrix mat(pairs.size(), 2); for (size_t i = 0; i < pairs.size(); ++i) { mat(i, 0) = pairs[i].first; - mat(i, 1) = pairs[i].second; + if (std::isnan(pairs[i].second)) + mat(i, 1) = NA_REAL; + else + mat(i, 1) = pairs[i].second; } output[d] = mat; }