diff --git a/.Rbuildignore b/.Rbuildignore index ed4b6b5..f4cc1e4 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -16,5 +16,6 @@ ^NEWS\.md$ ^CONTRIBUTING\.md$ ^CODE_OF_CONDUCT\.md$ +^preprint$ ^doc$ ^Meta$ diff --git a/R/PHom.R b/R/PHom.R index f5c9d95..5505622 100644 --- a/R/PHom.R +++ b/R/PHom.R @@ -28,7 +28,8 @@ validate_PHom <- function(x, error = TRUE) { if (!("PHom" %in% class(x) & "data.frame" %in% class(x))) { if (error) { stop(paste("PHom objects must have classes `PHom` and `data.frame`;", - "paseed value has class =", class(x))) + "paseed value has class =", class(x)), + call. = FALSE) } else { return(FALSE) } @@ -36,7 +37,8 @@ validate_PHom <- function(x, error = TRUE) { if (ncol(x) != 3) { if (error) { stop(paste("PHom objects must be data frames with exactly 3 columns;", - "passed object has", ncol(x), "columns.")) + "passed object has", ncol(x), "columns."), + call. = FALSE) } else { return(FALSE) } @@ -45,7 +47,8 @@ validate_PHom <- function(x, error = TRUE) { if (error) { stop(paste("`dimension` column in PHom objects must have class integer;", "`dimension` column in passed object has class =", - class(x$dimension))) + class(x$dimension)), + call. = FALSE) } else { return(FALSE) } @@ -54,7 +57,8 @@ validate_PHom <- function(x, error = TRUE) { if (error) { stop(paste("`birth` and `death` columns in PHom objects must have", "class integer; respective columns in passed object have", - "classes =", class(x$birth), "and", class(x$death))) + "classes =", class(x$birth), "and", class(x$death)), + call. = FALSE) } else { return(FALSE) } @@ -64,7 +68,8 @@ validate_PHom <- function(x, error = TRUE) { if (!all(x$birth < x$death)) { if (error) { stop(paste("In PHom objects, all births must be before corresponding", - "deaths.")) + "deaths."), + call. = FALSE) } else { return(FALSE) } @@ -85,16 +90,19 @@ valid_colval <- function(df, val, val_name) { if (!(val %in% seq_len(ncol(df)))) { stop(paste(val_name, "parameter must be a valid column index for the", - "provided data frame; passed value =", val)) + "provided data frame; passed value =", val), + call. = FALSE) } } else if (is.character(val)) { if (!(val %in% colnames(df))) { stop(paste(val_name, "parameter must be a valid column name for the", - "provided data frame; passed value =", val)) + "provided data frame; passed value =", val), + call. = FALSE) } } else { stop(paste(val_name, "parameter must either be a valid column name or", - "index; passed value =", val)) + "index; passed value =", val), + call. = FALSE) } } diff --git a/preprint/.gitignore b/preprint/.gitignore new file mode 100644 index 0000000..1123ee1 --- /dev/null +++ b/preprint/.gitignore @@ -0,0 +1,3 @@ +# ignore Rmarkdown output files +*.pdf +*.tex diff --git a/preprint/arxiv.sty b/preprint/arxiv.sty new file mode 100644 index 0000000..0cc11a8 --- /dev/null +++ b/preprint/arxiv.sty @@ -0,0 +1,256 @@ +\NeedsTeXFormat{LaTeX2e} + +\ProcessOptions\relax + +% fonts +\renewcommand{\rmdefault}{ptm} +\renewcommand{\sfdefault}{phv} + +% set page geometry +\usepackage[verbose=true,letterpaper]{geometry} +\AtBeginDocument{ + \newgeometry{ + textheight=9in, + textwidth=6.5in, + top=1in, + headheight=14pt, + headsep=25pt, + footskip=30pt + } +} + +\widowpenalty=10000 +\clubpenalty=10000 +\flushbottom +\sloppy + +\usepackage{graphicx} +\usepackage{fancyhdr} +\fancyhf{} +\pagestyle{fancy} +\renewcommand{\headrulewidth}{0pt} +\fancyheadoffset{0pt} +\rhead{\scshape A preprint - \today} +\cfoot{\thepage} + + +%Handling Keywords +\def\keywordname{{\bfseries \emph Keywords}}% +\def\keywords#1{\par\addvspace\medskipamount{\rightskip=0pt plus1cm +\def\and{\ifhmode\unskip\nobreak\fi\ $\cdot$ +}\noindent\keywordname\enspace\ignorespaces#1\par}} + +% font sizes with reduced leading +\renewcommand{\normalsize}{% + \@setfontsize\normalsize\@xpt\@xipt + \abovedisplayskip 7\p@ \@plus 2\p@ \@minus 5\p@ + \abovedisplayshortskip \z@ \@plus 3\p@ + \belowdisplayskip \abovedisplayskip + \belowdisplayshortskip 4\p@ \@plus 3\p@ \@minus 3\p@ +} +\normalsize +\renewcommand{\small}{% + \@setfontsize\small\@ixpt\@xpt + \abovedisplayskip 6\p@ \@plus 1.5\p@ \@minus 4\p@ + \abovedisplayshortskip \z@ \@plus 2\p@ + \belowdisplayskip \abovedisplayskip + \belowdisplayshortskip 3\p@ \@plus 2\p@ \@minus 2\p@ +} +\renewcommand{\footnotesize}{\@setfontsize\footnotesize\@ixpt\@xpt} +\renewcommand{\scriptsize}{\@setfontsize\scriptsize\@viipt\@viiipt} +\renewcommand{\tiny}{\@setfontsize\tiny\@vipt\@viipt} +\renewcommand{\large}{\@setfontsize\large\@xiipt{14}} +\renewcommand{\Large}{\@setfontsize\Large\@xivpt{16}} +\renewcommand{\LARGE}{\@setfontsize\LARGE\@xviipt{20}} +\renewcommand{\huge}{\@setfontsize\huge\@xxpt{23}} +\renewcommand{\Huge}{\@setfontsize\Huge\@xxvpt{28}} + +% sections with less space +\providecommand{\section}{} +\renewcommand{\section}{% + \@startsection{section}{1}{\z@}% + {-2.0ex \@plus -0.5ex \@minus -0.2ex}% + { 1.5ex \@plus 0.3ex \@minus 0.2ex}% + {\large\bf\raggedright}% +} +\providecommand{\subsection}{} +\renewcommand{\subsection}{% + \@startsection{subsection}{2}{\z@}% + {-1.8ex \@plus -0.5ex \@minus -0.2ex}% + { 0.8ex \@plus 0.2ex}% + {\normalsize\bf\raggedright}% +} +\providecommand{\subsubsection}{} +\renewcommand{\subsubsection}{% + \@startsection{subsubsection}{3}{\z@}% + {-1.5ex \@plus -0.5ex \@minus -0.2ex}% + { 0.5ex \@plus 0.2ex}% + {\normalsize\bf\raggedright}% +} +\providecommand{\paragraph}{} +\renewcommand{\paragraph}{% + \@startsection{paragraph}{4}{\z@}% + {1.5ex \@plus 0.5ex \@minus 0.2ex}% + {-1em}% + {\normalsize\bf}% +} +\providecommand{\subparagraph}{} +\renewcommand{\subparagraph}{% + \@startsection{subparagraph}{5}{\z@}% + {1.5ex \@plus 0.5ex \@minus 0.2ex}% + {-1em}% + {\normalsize\bf}% +} +\providecommand{\subsubsubsection}{} +\renewcommand{\subsubsubsection}{% + \vskip5pt{\noindent\normalsize\rm\raggedright}% +} + +% float placement +\renewcommand{\topfraction }{0.85} +\renewcommand{\bottomfraction }{0.4} +\renewcommand{\textfraction }{0.1} +\renewcommand{\floatpagefraction}{0.7} + +\newlength{\@abovecaptionskip}\setlength{\@abovecaptionskip}{7\p@} +\newlength{\@belowcaptionskip}\setlength{\@belowcaptionskip}{\z@} + +\setlength{\abovecaptionskip}{\@abovecaptionskip} +\setlength{\belowcaptionskip}{\@belowcaptionskip} + +% swap above/belowcaptionskip lengths for tables +\renewenvironment{table} + {\setlength{\abovecaptionskip}{\@belowcaptionskip}% + \setlength{\belowcaptionskip}{\@abovecaptionskip}% + \@float{table}} + {\end@float} + +% footnote formatting +\setlength{\footnotesep }{6.65\p@} +\setlength{\skip\footins}{9\p@ \@plus 4\p@ \@minus 2\p@} +\renewcommand{\footnoterule}{\kern-3\p@ \hrule width 12pc \kern 2.6\p@} +\setcounter{footnote}{0} + +% paragraph formatting +\setlength{\parindent}{\z@} +\setlength{\parskip }{5.5\p@} + +% list formatting +\setlength{\topsep }{4\p@ \@plus 1\p@ \@minus 2\p@} +\setlength{\partopsep }{1\p@ \@plus 0.5\p@ \@minus 0.5\p@} +\setlength{\itemsep }{2\p@ \@plus 1\p@ \@minus 0.5\p@} +\setlength{\parsep }{2\p@ \@plus 1\p@ \@minus 0.5\p@} +\setlength{\leftmargin }{3pc} +\setlength{\leftmargini }{\leftmargin} +\setlength{\leftmarginii }{2em} +\setlength{\leftmarginiii}{1.5em} +\setlength{\leftmarginiv }{1.0em} +\setlength{\leftmarginv }{0.5em} +\def\@listi {\leftmargin\leftmargini} +\def\@listii {\leftmargin\leftmarginii + \labelwidth\leftmarginii + \advance\labelwidth-\labelsep + \topsep 2\p@ \@plus 1\p@ \@minus 0.5\p@ + \parsep 1\p@ \@plus 0.5\p@ \@minus 0.5\p@ + \itemsep \parsep} +\def\@listiii{\leftmargin\leftmarginiii + \labelwidth\leftmarginiii + \advance\labelwidth-\labelsep + \topsep 1\p@ \@plus 0.5\p@ \@minus 0.5\p@ + \parsep \z@ + \partopsep 0.5\p@ \@plus 0\p@ \@minus 0.5\p@ + \itemsep \topsep} +\def\@listiv {\leftmargin\leftmarginiv + \labelwidth\leftmarginiv + \advance\labelwidth-\labelsep} +\def\@listv {\leftmargin\leftmarginv + \labelwidth\leftmarginv + \advance\labelwidth-\labelsep} +\def\@listvi {\leftmargin\leftmarginvi + \labelwidth\leftmarginvi + \advance\labelwidth-\labelsep} + +% create title +\providecommand{\maketitle}{} +\renewcommand{\maketitle}{% + \par + \begingroup + \renewcommand{\thefootnote}{\fnsymbol{footnote}} + % for perfect author name centering + \renewcommand{\@makefnmark}{\hbox to \z@{$^{\@thefnmark}$\hss}} + % The footnote-mark was overlapping the footnote-text, + % added the following to fix this problem (MK) + \long\def\@makefntext##1{% + \parindent 1em\noindent + \hbox to 1.8em{\hss $\m@th ^{\@thefnmark}$}##1 + } + \thispagestyle{empty} + \@maketitle + \@thanks + %\@notice + \endgroup + \let\maketitle\relax + \let\thanks\relax +} + +% rules for title box at top of first page +\newcommand{\@toptitlebar}{ + \hrule height 2\p@ + \vskip 0.25in + \vskip -\parskip% +} +\newcommand{\@bottomtitlebar}{ + \vskip 0.29in + \vskip -\parskip + \hrule height 2\p@ + \vskip 0.09in% +} + +% create title (includes both anonymized and non-anonymized versions) +\providecommand{\@maketitle}{} +\renewcommand{\@maketitle}{% + \vbox{% + \hsize\textwidth + \linewidth\hsize + \vskip 0.1in + \@toptitlebar + \centering + {\LARGE\sc \@title\par} + \@bottomtitlebar + \textsc{A Preprint}\\ + \vskip 0.1in + \def\And{% + \end{tabular}\hfil\linebreak[0]\hfil% + \begin{tabular}[t]{c}\bf\rule{\z@}{24\p@}\ignorespaces% + } + \def\AND{% + \end{tabular}\hfil\linebreak[4]\hfil% + \begin{tabular}[t]{c}\bf\rule{\z@}{24\p@}\ignorespaces% + } + \begin{tabular}[t]{c}\bf\rule{\z@}{24\p@}\@author\end{tabular}% + \vskip 0.4in \@minus 0.1in \center{\today} \vskip 0.2in + } +} + +% add conference notice to bottom of first page +\newcommand{\ftype@noticebox}{8} +\newcommand{\@notice}{% + % give a bit of extra room back to authors on first page + \enlargethispage{2\baselineskip}% + \@float{noticebox}[b]% + \footnotesize\@noticestring% + \end@float% +} + +% abstract styling +\renewenvironment{abstract} +{ + \centerline + {\large \bfseries \scshape Abstract} + \begin{quote} +} +{ + \end{quote} +} + +\endinput diff --git a/preprint/references.bib b/preprint/references.bib new file mode 100644 index 0000000..263d6f9 --- /dev/null +++ b/preprint/references.bib @@ -0,0 +1,161 @@ +@manual{icon, + author = {Aaron Clauset and Ellen Tucker and Matthias Sainz}, + title = {The Colorado Index of Complex Networks}, + year = {2016}, + url = {https://icon.colorado.edu/} +} + +@article{ripser, + title = {Ripser: efficient computation of Vietoris-Rips persistence barcodes}, + author = {Ulrich Bauer}, + year = {2019}, + pages = {1908.02518}, + journal = {arXiv} +} + +@article{cubicalripser, + title = {Cubical Ripser: Software for computing persistent homology of image and volume data}, + author = {Shizuo Kaji and Takeki Sudo and Kazushi Ahara}, + year = {2020}, + pages = {2005.12692}, + journal = {arXiv} +} + +@article{flagser, + title = {Computing persistent homology of directed flag complexes}, + author = {Daniel Luetgehetmann and Dejan Govc and Jason Smith and Ran Levi}, + year = {2019}, + pages = {1906.10458}, + journal = {arXiv} +} + +@article{lockfreeripser, + title = {Towards Lockfree Persistent Homology}, + author = {Dmitriy Morozov and Arnur Nigmetov}, + year = {2020}, + journal = {Proceedings of the 32nd ACM Symposium on Parallelism in Algorithms and Architectures}, + pages = {555--557} +} + +@article{ripserpp, + title = {GPU-Accelerated Computation of Vietoris-Rips Persistence Barcodes}, + author = {Simon Zhang and Mengbai Xiao and Hao Wang}, + year = {2020}, + pages = {2003.07989}, + journal = {arXiv} +} + +@book{python, + author = {Van Rossum, Guido and Drake, Fred L.}, + title = {Python 3 Reference Manual}, + year = {2009}, + isbn = {1441412697}, + publisher = {CreateSpace}, + address = {Scotts Valley, CA} +} + +@Article{tdastats, + author = {Raoul R Wadhwa and Drew F K Williamson and Andrew Dhawan and Jacob G Scott}, + title = {{TDAstats}: {R} pipeline for computing persistent homology in topological data analysis}, + journal = {Journal of Open Source Software}, + year = {2018}, + volume = {3}, + number = {28}, + pages = {860} +} + +@Article{ext-persist, + author = {David Cohen-Steiner and Herbert Edelsbrunner and John Harer}, + title = {Extending Persistence Using Poincaré and Lefschetz Duality}, + journal = {Foundations of Computational Mathematics}, + year = {2009}, + volume = {9}, + pages = {79--103} +} + +@Manual{rlang, + title = {R: A Language and Environment for Statistical Computing}, + author = {{R Core Team}}, + organization = {R Foundation for Statistical Computing}, + address = {Vienna, Austria}, + year = {2020}, + url = {https://www.R-project.org}, +} + +@Article{roadmap-phom, + author = {Nina Otter and Mason A Porter and Ulrike Tillmann and Peter Grindrod and Heather A Harrington}, + title = {A roadmap for the computation of persistent homology}, + journal = {EPJ Data Science}, + year = {2017}, + volume = {6}, + pages = {17} +} + +@Book{tda-compute, + author = {Afra J Zomorodian}, + title = {Topology for Computing}, + year = {2009}, + publisher = {Cambridge University Press}, + address = {New York, NY} +} + +@Article{ts-phom, + author = {Yuhei Umeda}, + title = {Time Series Classification via Topological Data Analysis}, + journal = {Transactions of the Japanese Society for Artificial Intelligence}, + year = {2017}, + volume = {32}, + number = {3}, + pages = {D-G72_1--12} +} + +@Article{cubical-phom, + author = {Michael Werman and Matthew L Wright}, + title = {Intrinsic Volumes of Random Cubical Complexes}, + journal = {Discrete and Computational Geometry}, + year = {2016}, + volume = {56}, + pages = {93--113} +} + +@Article{rcpp, + author = {Dirk Eddelbuettel and Romain Francois}, + title = {Rcpp: Seamless {R} and {C++} Integration}, + journal = {Journal of Statistical Software}, + year = {2011}, + volume = {40}, + number = {8}, + pages = {1--18} +} + +@Manual{simplextree, + title = {simplextree: Provides Tools for Working with General Simplicial Complexes}, + author = {Matt Piekenbrock}, + year = {2020}, + note = {R package version 1.0.1}, + url = {https://CRAN.R-project.org/package=simplextree}, +} + +@Manual{tdaunif, + title = {tdaunif: Uniform Manifold Samplers for Topological Data Analysis}, + author = {Jason Cory Brunson and Brandon Demkowicz and Sanmati Choudhary}, + year = {2020}, + note = {R package version 0.1.0}, + url = {https://CRAN.R-project.org/package=tdaunif}, +} + +@Manual{tdamapper, + title = {TDAmapper: Analyze High-Dimensional Data Using Discrete Morse Theory}, + author = {Paul Pearson and Daniel Muellner and Gurjeet Singh}, + year = {2015}, + note = {R package version 1.0}, + url = {https://CRAN.R-project.org/package=TDAmapper}, +} + +@Manual{mapper, + title = {Mapper: Topological Data Analysis using Mapper}, + author = {Matt Piekenbrock}, + year = {2019}, + note = {R package version 0.9.2}, + url = {https://CRAN.R-project.org/package=Mapper} +} diff --git a/preprint/ripserr-preprint.Rmd b/preprint/ripserr-preprint.Rmd new file mode 100644 index 0000000..6dbe645 --- /dev/null +++ b/preprint/ripserr-preprint.Rmd @@ -0,0 +1,221 @@ +--- +title: Calculating persistent homology in R with ripserr +authors: + - name: Raoul R. Wadhwa + department: Cleveland Clinic Lerner College of Medicine + affiliation: Case Western Reserve University + location: Cleveland, OH 44195, United States + email: raoul.wadhwa@case.edu + - name: Matthew Piekenbrock, MS + department: Computational Mathematics, Science, \& Engineering + affiliation: Michigan State University + location: East Lansing, MI 48824, United States + email: matt.piekenbrock@gmail.com + - name: Jacob G. Scott, MD, DPhil + department: Translational Hematology \& Oncology Research + affiliation: Lerner Research Institute, Cleveland Clinic + location: Cleveland, OH 44195, United States + email: ScottJ10@ccf.org +abstract: | + We introduce ripserr, an R package that enables efficient persistent homology computation by wrapping Ripser-based C++ libraries via Rcpp. + The PHom S3 class provides a container for calculated persistence data and will serve as a means for integrating ripserr with other R packages that implement functionality related to topological data analysis. + The vietoris_rips generic wraps the Ripser library, which computes Vietoris-Rips simplicial homology and the cubical generic wraps the Cubical Ripser library, which computes cubical simplicial homology. + Currently, the Comprehensive R Archive Network hosts ripserr v0.1.1. + We hope that ripserr will be used by researchers to incorporate topological insight into data analysis and machine learning pipelines. + The open source code for ripserr and for this reproducible report can be found at http://github.com/rrrlw/ripserr. +keywords: + - persistent homology + - topological data analysis + - simplicial complex + - R programming language + - Ripser +bibliography: references.bib +output: rticles::arxiv_article +--- + +# Introduction + +Calculating persistent homology presents a computationally expensive challenge, particularly for large and high-dimensional datasets. +Ripser, a C++ library, addresses this obstacle and significantly reduces processing time and memory usage compared to contemporary persistent homology engines [@ripser]. +Although Ripser calculates persistent homology of a Vietoris-Rips complex, other libraries have built upon its foundation [@cubicalripser; @flagser; @lockfreeripser; @ripserpp]. +With the increasing popularity of persistent homology, wrapping Ripser and related C++ engines in the R programming language would permit easier incorporation into data analysis and machine learning pipelines [@rlang;@rcpp]. +Here, we introduce `ripserr`, an R package that wraps the Ripser and Cubical Ripser C++ libraries, allowing efficient persistent homology computation of Vieoris-Rips and cubical complexes. + +Before we explore the `ripserr` package, we load the required libraries and set a PRNG seed for reproducibility. + +```{r setup} +library("ripserr") +library("tdaunif") +library("ggplot2") +library("ggtda") # remotes::install_github("rrrlw/ggtda") + +set.seed(42) +``` + +# `PHom` S3 class + +When incorporating persistent homology in R pipelines, we must consider the best container to hold persistence data. +`ripserr` uses the `PHom` (portmanteau of persistent homology) S3 class to manage persistence data. +Note that more complex forms of persistence (e.g. extended persistence [@ext-persist]) are not discussed. + +At its core, the `PHom` class simply encompasses a data frame storing persistence feature details. +Each data frame row represents a single feature and each column represents a single feature property. +Structurally, the data frame within a `PHom` object has three columns named `"dimension"`, `"birth"`, and `"death"` (in that order) of class `integer`, `numeric`, and `numeric`, respectively. +Although `PHom`-specific S3 generic methods have been implemented, users may also benefit by using `data.frame`-specific S3 generic methods; for this reason, `PHom` objects have class `PHom` and `data.frame`. + +To get a better intuition for `PHom` objects, the following code block demonstrates the class's basic functionality. +We first create a data frame containing synthetic persistence data, convert it to a `PHom` instance, then explore how to work with its new form. + +```{r basic-ex, error=TRUE} +# synthetic persistence data +df <- data.frame(dimension = c(rep(0, 2), rep(1, 3), rep(2, 1)), + birth = rnorm(6), + death = rnorm(6, mean = 15)) + +# print synthetic persistence data +print(df) + +# convert to PHom +df_phom <- PHom(df) + +# print +print(df_phom) + +# print feature details of PHom object +print.data.frame(df_phom) + +# can convert with PHom or as.PHom +df_phom2 <- as.PHom(df) +all.equal(df_phom, df_phom2) + +# adjust synthetic data with invalid persistence data (death < birth) +df$death[1] <- df$birth[1] - 0.1 +print(df) + +# error when converting to PHom object +PHom(df) +``` + +# Persistent homology of a Vietoris-Rips complex + +The `vietoris_rips` S3 generic enables computation of persistent homology via a Vietoris-Rips complex. +Within `ripserr`, users can calculate Vietoris-Rips homology of point clouds (stored in `data.frame`, `matrix`, or `dist` objects) or time series (stored in `numeric` vectors or `ts` objects). +The methodology behind the former can be found in @roadmap-phom and @tda-compute and the quasi-attractor method to calculate persistent homology of time series can be found in @ts-phom. + +The following code demonstrates the various point cloud formats that `vietoris_rips` accepts and shows that the same dataset in different formats results in the same persistent data output. + +```{r vr-cloud, fig.height=2.5} +# use tdaunif library for sample dataset +circ_mat <- sample_circle(25) +colnames(circ_mat) <- c("x", "y") + +# peek at generated point cloud +ggplot(as.data.frame(circ_mat), aes(x = x, y = y)) + + geom_point() + coord_fixed() + theme_bw() + +# calculate persistent homology +circ_phom <- vietoris_rips(circ_mat) + +# print persistent homology summary and details +print(circ_phom) +head(circ_phom) +tail(circ_phom) +``` + +```{r vr-phom-2, fig.height=2} +# plot topological barcode of persistence data +ggplot(circ_phom, aes(start = birth, end = death, colour = as.factor(dimension))) + + geom_barcode() + theme_barcode() + + scale_color_discrete(name = "dimension") + +# calculate persistent homology for other data formats +circ_df <- as.data.frame(circ_mat) +circ_dist <- dist(circ_mat) + +# confirm equality across formats +all.equal(circ_phom, vietoris_rips(circ_df)) +all.equal(circ_phom, vietoris_rips(circ_dist)) +``` + +By adjusting the `max_dim` parameter, `vietoris_rips` can calculate persistence data including higher dimensional features. +We demonstrate this by sampling points from the surface of a sphere. + +```{r vr-maxdim, fig.height=2.5} +# sample points from surface of unit sphere +sphere <- sample_sphere(100, dim = 2) + +# calculate persistent homology +sphere_phom <- vietoris_rips(sphere, max_dim = 2L) + +# print and plot persistence data +sphere_phom +ggplot(sphere_phom, aes(start = birth, end = death, colour = as.factor(dimension))) + + geom_barcode() + theme_barcode() + + scale_color_manual(name = "dimension", values = c("red", "black", "blue")) +``` + +Persistence data can also be calculated in various prime finite fields ($\mathbb{Z}/p\mathbb{Z}$) determined by the `p` parameter; all valid values of `p` should result in the same persistence data output. + +```{r primefield} +# calculate circle persistence in various prime fields +circ_phom1 <- vietoris_rips(circ_mat, p = 2L) +circ_phom2 <- vietoris_rips(circ_mat, p = 3L) +circ_phom3 <- vietoris_rips(circ_mat, p = 5L) + +# confirm equality +all.equal(circ_phom1, circ_phom2) +all.equal(circ_phom1, circ_phom3) +``` + +# Persistent homology of a cubical complex + +The `cubical` S3 generic enables computation of persistent homology via a cubical complex. +Users can calculate simplicial homology of 2-dimensional (e.g. image), 3-dimensional (e.g. video), or 4-dimensional (e.g. multiple videos over time) `array` objects. +More details on cubical complexes can be found in @cubical-phom. +The following code block illustrates basic use of `cubical`, including the `method` parameter. + +```{r cubical, fig.height=2.5, fig.width=2.5} +# create synthetic dataset +sample_image <- array(data = 0, dim = rep(10, 2)) +i <- c(2, 9) +j <- 2:9 +sample_image[i, j] <- rnorm(n = 16, mean = 10) +sample_image[j, i] <- rnorm(n = 16, mean = 10) + +# view image +par(mar = rep(0, 4)) +graphics::image(sample_image, useRaster = TRUE, axes = FALSE) +``` + +```{r cubical-2,fig.height=2.5} +# calculate cubical homology +image_phom <- cubical(sample_image) + +# print homology +print(image_phom) +print.data.frame(image_phom) + +# plot homology +ggplot(image_phom, aes(start = birth, end = death, color = as.factor(dimension))) + + geom_barcode() + theme_barcode() + + scale_color_discrete(name = "dimension") + +# compare output from two methods +# "lj" = Link Join; "cp" = Compute Pairs +image_phom2 <- cubical(sample_image, method = "cp") +all.equal(image_phom, image_phom2) +``` + +Due to intrinsic properties of the underlying C++ Cubical Ripser library, `array`s passed to `cubical` have the following size limitations: 2-dimensional `array`s must be smaller than `2000x1000` elements; 3-dimensional `array`s must be smaller than `512x512x512`; and 4-dimensional `array`s must be smaller than `64x64x64x64`. + +# Discussion + +As a low-level topological data analysis package, `ripserr` provides the basic tools necessary to calculate persistent homology. +By providing the `PHom` class as a container for persistence data that preserves data frame functionality, `ripserr` can be easily integrated in an exploratory manner in data analysis pipelines, for feature extraction in machine learning pipelines, and for early calculation in topological inference pipelines. +Future directions for `ripserr` could involve inclusion of more Ripser-based engines, refinement of the `PHom` class, and improved integration with other R packages implementing topological data analysis [@simplextree;@tdaunif;@tdamapper;@mapper;@tdastats]. + +# Acknowledgments + +The authors thank Jason Cory Brunson, PhD for advice regarding `ripserr` package design. + +# References