From dc2ccc2bca7cb37eb625a0b897990113cdd585fa Mon Sep 17 00:00:00 2001 From: Raoul W Date: Sat, 24 Oct 2020 10:55:23 -0400 Subject: [PATCH 01/10] added basic arxiv template --- .Rbuildignore | 1 + preprint/.gitignore | 3 + preprint/arxiv.sty | 255 ++++++++++++++++++++++++++++++++++ preprint/references.bib | 65 +++++++++ preprint/ripserr preprint.Rmd | 41 ++++++ 5 files changed, 365 insertions(+) create mode 100644 preprint/.gitignore create mode 100644 preprint/arxiv.sty create mode 100644 preprint/references.bib create mode 100644 preprint/ripserr preprint.Rmd diff --git a/.Rbuildignore b/.Rbuildignore index 5d29c6e..94cc404 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -15,3 +15,4 @@ ^NEWS\.md$ ^CONTRIBUTING\.md$ ^CODE_OF_CONDUCT\.md$ +^preprint$ diff --git a/preprint/.gitignore b/preprint/.gitignore new file mode 100644 index 0000000..1123ee1 --- /dev/null +++ b/preprint/.gitignore @@ -0,0 +1,3 @@ +# ignore Rmarkdown output files +*.pdf +*.tex diff --git a/preprint/arxiv.sty b/preprint/arxiv.sty new file mode 100644 index 0000000..f32d6d8 --- /dev/null +++ b/preprint/arxiv.sty @@ -0,0 +1,255 @@ +\NeedsTeXFormat{LaTeX2e} + +\ProcessOptions\relax + +% fonts +\renewcommand{\rmdefault}{ptm} +\renewcommand{\sfdefault}{phv} + +% set page geometry +\usepackage[verbose=true,letterpaper]{geometry} +\AtBeginDocument{ + \newgeometry{ + textheight=9in, + textwidth=6.5in, + top=1in, + headheight=14pt, + headsep=25pt, + footskip=30pt + } +} + +\widowpenalty=10000 +\clubpenalty=10000 +\flushbottom +\sloppy + +\usepackage{fancyhdr} +\fancyhf{} +\pagestyle{fancy} +\renewcommand{\headrulewidth}{0pt} +\fancyheadoffset{0pt} +\rhead{\scshape A preprint - \today} +\cfoot{\thepage} + + +%Handling Keywords +\def\keywordname{{\bfseries \emph Keywords}}% +\def\keywords#1{\par\addvspace\medskipamount{\rightskip=0pt plus1cm +\def\and{\ifhmode\unskip\nobreak\fi\ $\cdot$ +}\noindent\keywordname\enspace\ignorespaces#1\par}} + +% font sizes with reduced leading +\renewcommand{\normalsize}{% + \@setfontsize\normalsize\@xpt\@xipt + \abovedisplayskip 7\p@ \@plus 2\p@ \@minus 5\p@ + \abovedisplayshortskip \z@ \@plus 3\p@ + \belowdisplayskip \abovedisplayskip + \belowdisplayshortskip 4\p@ \@plus 3\p@ \@minus 3\p@ +} +\normalsize +\renewcommand{\small}{% + \@setfontsize\small\@ixpt\@xpt + \abovedisplayskip 6\p@ \@plus 1.5\p@ \@minus 4\p@ + \abovedisplayshortskip \z@ \@plus 2\p@ + \belowdisplayskip \abovedisplayskip + \belowdisplayshortskip 3\p@ \@plus 2\p@ \@minus 2\p@ +} +\renewcommand{\footnotesize}{\@setfontsize\footnotesize\@ixpt\@xpt} +\renewcommand{\scriptsize}{\@setfontsize\scriptsize\@viipt\@viiipt} +\renewcommand{\tiny}{\@setfontsize\tiny\@vipt\@viipt} +\renewcommand{\large}{\@setfontsize\large\@xiipt{14}} +\renewcommand{\Large}{\@setfontsize\Large\@xivpt{16}} +\renewcommand{\LARGE}{\@setfontsize\LARGE\@xviipt{20}} +\renewcommand{\huge}{\@setfontsize\huge\@xxpt{23}} +\renewcommand{\Huge}{\@setfontsize\Huge\@xxvpt{28}} + +% sections with less space +\providecommand{\section}{} +\renewcommand{\section}{% + \@startsection{section}{1}{\z@}% + {-2.0ex \@plus -0.5ex \@minus -0.2ex}% + { 1.5ex \@plus 0.3ex \@minus 0.2ex}% + {\large\bf\raggedright}% +} +\providecommand{\subsection}{} +\renewcommand{\subsection}{% + \@startsection{subsection}{2}{\z@}% + {-1.8ex \@plus -0.5ex \@minus -0.2ex}% + { 0.8ex \@plus 0.2ex}% + {\normalsize\bf\raggedright}% +} +\providecommand{\subsubsection}{} +\renewcommand{\subsubsection}{% + \@startsection{subsubsection}{3}{\z@}% + {-1.5ex \@plus -0.5ex \@minus -0.2ex}% + { 0.5ex \@plus 0.2ex}% + {\normalsize\bf\raggedright}% +} +\providecommand{\paragraph}{} +\renewcommand{\paragraph}{% + \@startsection{paragraph}{4}{\z@}% + {1.5ex \@plus 0.5ex \@minus 0.2ex}% + {-1em}% + {\normalsize\bf}% +} +\providecommand{\subparagraph}{} +\renewcommand{\subparagraph}{% + \@startsection{subparagraph}{5}{\z@}% + {1.5ex \@plus 0.5ex \@minus 0.2ex}% + {-1em}% + {\normalsize\bf}% +} +\providecommand{\subsubsubsection}{} +\renewcommand{\subsubsubsection}{% + \vskip5pt{\noindent\normalsize\rm\raggedright}% +} + +% float placement +\renewcommand{\topfraction }{0.85} +\renewcommand{\bottomfraction }{0.4} +\renewcommand{\textfraction }{0.1} +\renewcommand{\floatpagefraction}{0.7} + +\newlength{\@abovecaptionskip}\setlength{\@abovecaptionskip}{7\p@} +\newlength{\@belowcaptionskip}\setlength{\@belowcaptionskip}{\z@} + +\setlength{\abovecaptionskip}{\@abovecaptionskip} +\setlength{\belowcaptionskip}{\@belowcaptionskip} + +% swap above/belowcaptionskip lengths for tables +\renewenvironment{table} + {\setlength{\abovecaptionskip}{\@belowcaptionskip}% + \setlength{\belowcaptionskip}{\@abovecaptionskip}% + \@float{table}} + {\end@float} + +% footnote formatting +\setlength{\footnotesep }{6.65\p@} +\setlength{\skip\footins}{9\p@ \@plus 4\p@ \@minus 2\p@} +\renewcommand{\footnoterule}{\kern-3\p@ \hrule width 12pc \kern 2.6\p@} +\setcounter{footnote}{0} + +% paragraph formatting +\setlength{\parindent}{\z@} +\setlength{\parskip }{5.5\p@} + +% list formatting +\setlength{\topsep }{4\p@ \@plus 1\p@ \@minus 2\p@} +\setlength{\partopsep }{1\p@ \@plus 0.5\p@ \@minus 0.5\p@} +\setlength{\itemsep }{2\p@ \@plus 1\p@ \@minus 0.5\p@} +\setlength{\parsep }{2\p@ \@plus 1\p@ \@minus 0.5\p@} +\setlength{\leftmargin }{3pc} +\setlength{\leftmargini }{\leftmargin} +\setlength{\leftmarginii }{2em} +\setlength{\leftmarginiii}{1.5em} +\setlength{\leftmarginiv }{1.0em} +\setlength{\leftmarginv }{0.5em} +\def\@listi {\leftmargin\leftmargini} +\def\@listii {\leftmargin\leftmarginii + \labelwidth\leftmarginii + \advance\labelwidth-\labelsep + \topsep 2\p@ \@plus 1\p@ \@minus 0.5\p@ + \parsep 1\p@ \@plus 0.5\p@ \@minus 0.5\p@ + \itemsep \parsep} +\def\@listiii{\leftmargin\leftmarginiii + \labelwidth\leftmarginiii + \advance\labelwidth-\labelsep + \topsep 1\p@ \@plus 0.5\p@ \@minus 0.5\p@ + \parsep \z@ + \partopsep 0.5\p@ \@plus 0\p@ \@minus 0.5\p@ + \itemsep \topsep} +\def\@listiv {\leftmargin\leftmarginiv + \labelwidth\leftmarginiv + \advance\labelwidth-\labelsep} +\def\@listv {\leftmargin\leftmarginv + \labelwidth\leftmarginv + \advance\labelwidth-\labelsep} +\def\@listvi {\leftmargin\leftmarginvi + \labelwidth\leftmarginvi + \advance\labelwidth-\labelsep} + +% create title +\providecommand{\maketitle}{} +\renewcommand{\maketitle}{% + \par + \begingroup + \renewcommand{\thefootnote}{\fnsymbol{footnote}} + % for perfect author name centering + \renewcommand{\@makefnmark}{\hbox to \z@{$^{\@thefnmark}$\hss}} + % The footnote-mark was overlapping the footnote-text, + % added the following to fix this problem (MK) + \long\def\@makefntext##1{% + \parindent 1em\noindent + \hbox to 1.8em{\hss $\m@th ^{\@thefnmark}$}##1 + } + \thispagestyle{empty} + \@maketitle + \@thanks + %\@notice + \endgroup + \let\maketitle\relax + \let\thanks\relax +} + +% rules for title box at top of first page +\newcommand{\@toptitlebar}{ + \hrule height 2\p@ + \vskip 0.25in + \vskip -\parskip% +} +\newcommand{\@bottomtitlebar}{ + \vskip 0.29in + \vskip -\parskip + \hrule height 2\p@ + \vskip 0.09in% +} + +% create title (includes both anonymized and non-anonymized versions) +\providecommand{\@maketitle}{} +\renewcommand{\@maketitle}{% + \vbox{% + \hsize\textwidth + \linewidth\hsize + \vskip 0.1in + \@toptitlebar + \centering + {\LARGE\sc \@title\par} + \@bottomtitlebar + \textsc{A Preprint}\\ + \vskip 0.1in + \def\And{% + \end{tabular}\hfil\linebreak[0]\hfil% + \begin{tabular}[t]{c}\bf\rule{\z@}{24\p@}\ignorespaces% + } + \def\AND{% + \end{tabular}\hfil\linebreak[4]\hfil% + \begin{tabular}[t]{c}\bf\rule{\z@}{24\p@}\ignorespaces% + } + \begin{tabular}[t]{c}\bf\rule{\z@}{24\p@}\@author\end{tabular}% + \vskip 0.4in \@minus 0.1in \center{\today} \vskip 0.2in + } +} + +% add conference notice to bottom of first page +\newcommand{\ftype@noticebox}{8} +\newcommand{\@notice}{% + % give a bit of extra room back to authors on first page + \enlargethispage{2\baselineskip}% + \@float{noticebox}[b]% + \footnotesize\@noticestring% + \end@float% +} + +% abstract styling +\renewenvironment{abstract} +{ + \centerline + {\large \bfseries \scshape Abstract} + \begin{quote} +} +{ + \end{quote} +} + +\endinput diff --git a/preprint/references.bib b/preprint/references.bib new file mode 100644 index 0000000..e4b5731 --- /dev/null +++ b/preprint/references.bib @@ -0,0 +1,65 @@ +@manual{icon, + author = {Aaron Clauset and Ellen Tucker and Matthias Sainz}, + title = {The Colorado Index of Complex Networks}, + year = {2016}, + url = {https://icon.colorado.edu/} +} + +@article{ripser, + title = {Ripser: efficient computation of Vietoris-Rips persistence barcodes}, + author = {Ulrich Bauer}, + year = {2019}, + pages = {1908.02518}, + journal = {arXiv} +} + +@article{cubicalripser, + title = {Cubical Ripser: Software for computing persistent homology of image and volume data}, + author = {Shizuo Kaji and Takeki Sudo and Kazushi Ahara}, + year = {2020}, + pages = {2005.12692}, + journal = {arXiv} +} + +@article{flagser, + title = {Computing persistent homology of directed flag complexes}, + author = {Daniel Luetgehetmann and Dejan Govc and Jason Smith and Ran Levi}, + year = {2019}, + pages = {1906.10458}, + journal = {arXiv} +} + +@article{lockfreeripser, + title = {Towards Lockfree Persistent Homology}, + author = {Dmitriy Morozov and Arnur Nigmetov}, + year = {2020}, + journal = {Proceedings of the 32nd ACM Symposium on Parallelism in Algorithms and Architectures}, + pages = {555--557}, + doi = {10.1145/3350755.3400244} +} + +@article{ripserpp, + title = {GPU-Accelerated Computation of Vietoris-Rips Persistence Barcodes}, + author = {Simon Zhang and Mengbai Xiao and Hao Wang}, + year = {2020}, + pages = {2003.07989}, + journal = {arXiv} +} + +@book{python, + author = {Van Rossum, Guido and Drake, Fred L.}, + title = {Python 3 Reference Manual}, + year = {2009}, + isbn = {1441412697}, + publisher = {CreateSpace}, + address = {Scotts Valley, CA} +} + +@Manual{rlang, + title = {R: A Language and Environment for Statistical Computing}, + author = {{R Core Team}}, + organization = {R Foundation for Statistical Computing}, + address = {Vienna, Austria}, + year = {2020}, + url = {https://www.R-project.org}, +} \ No newline at end of file diff --git a/preprint/ripserr preprint.Rmd b/preprint/ripserr preprint.Rmd new file mode 100644 index 0000000..b90831e --- /dev/null +++ b/preprint/ripserr preprint.Rmd @@ -0,0 +1,41 @@ +--- +title: Calculating persistent homology with ripserr +authors: + - name: Raoul R. Wadhwa + department: Cleveland Clinic Lerner College of Medicine + affiliation: Case Western Reserve University + location: Cleveland, OH 44195 + email: raoul.wadhwa@case.edu + - name: Jacob G. Scott + department: Translational Hematology \& Oncology Research + affiliation: Lerner Research Institute, Cleveland Clinic + location: Cleveland, OH 44195 + email: ScottJ10@ccf.org +abstract: | + Enter the text of your abstract here. +keywords: + - blah + - blee + - bloo +bibliography: references.bib +output: rticles::arxiv_article +--- + +# Introduction + +Calculating persistent homology via a Vietoris-Rips complex presents a computationally expensive challenge, particularly for high-dimensional datasets. +Ripser, a C++ library, addresses this obstacle and significantly reduces processing time and memory usage compared to other available persistent homology libraries [@ripser]. +Although Ripser calculates persistent homology of a Vietoris-Rips complex, other libraries have taken advantage of its general technique to calculate persistent homology of cubical complexes [@cubicalripser], directed flag complexes [@flagser], or to provide other functionality [@lockfreeripser; @ripserpp]. +With increasing use of persistent homology, effective incorporation of Ripser and Ripser-based engines in data analysis pipelines would serve a useful purpose. +The R programming language [@rlang] has been popular with data scientists [@cite-something +Here, we introduce `ripserr`, a software package that wraps Ripser and Ripser-based C++ libraries to calculate persistent homology in the R programming language. + +# Implementation + +\lipsum[1] + +# Use case + +# Discussion/Conclusions + +# References From 26ed17a0416591b811e8a3509d008aa88ba8185f Mon Sep 17 00:00:00 2001 From: Raoul W Date: Mon, 16 Nov 2020 13:53:44 -0500 Subject: [PATCH 02/10] updated skeleton --- preprint/ripserr preprint.Rmd | 27 +++++++++++++++++++++++++-- 1 file changed, 25 insertions(+), 2 deletions(-) diff --git a/preprint/ripserr preprint.Rmd b/preprint/ripserr preprint.Rmd index b90831e..1b75c15 100644 --- a/preprint/ripserr preprint.Rmd +++ b/preprint/ripserr preprint.Rmd @@ -6,6 +6,11 @@ authors: affiliation: Case Western Reserve University location: Cleveland, OH 44195 email: raoul.wadhwa@case.edu + - name: Matthew Piekenbrock + department: Department of Computational Mathematics, Science, and Engineering + affiliation: Michigan State University + location: East Lansing, MI 48824 + email: matt.piekenbrock@gmail.com - name: Jacob G. Scott department: Translational Hematology \& Oncology Research affiliation: Lerner Research Institute, Cleveland Clinic @@ -30,9 +35,27 @@ With increasing use of persistent homology, effective incorporation of Ripser an The R programming language [@rlang] has been popular with data scientists [@cite-something Here, we introduce `ripserr`, a software package that wraps Ripser and Ripser-based C++ libraries to calculate persistent homology in the R programming language. -# Implementation +# PHom S3 class -\lipsum[1] +## structure + +## basic functions + +# vietoris_rips + +## overview and basic parameters + +## generic methods + +## prime fields + +# cubical + +## overview and basic parameters + +## generic methods + +## size limitations # Use case From 602e25fe6c6657d4035be2fe0a3db9ad98b9920b Mon Sep 17 00:00:00 2001 From: Raoul W Date: Tue, 17 Nov 2020 12:14:25 -0500 Subject: [PATCH 03/10] updated preprint skeleton --- preprint/references.bib | 10 ++++++++++ preprint/ripserr preprint.Rmd | 24 +++++++++++++++--------- 2 files changed, 25 insertions(+), 9 deletions(-) diff --git a/preprint/references.bib b/preprint/references.bib index e4b5731..4cdcbf7 100644 --- a/preprint/references.bib +++ b/preprint/references.bib @@ -55,6 +55,16 @@ @book{python address = {Scotts Valley, CA} } +@Article{tdastats, + author = {Raoul R Wadhwa and Drew F K Williamson and Andrew Dhawan and Jacob G Scott}, + title = {{TDAstats}: {R} pipeline for computing persistent homology in topological data analysis}, + journal = {Journal of Open Source Software}, + year = {2018}, + volume = {3}, + number = {28}, + pages = {860} +} + @Manual{rlang, title = {R: A Language and Environment for Statistical Computing}, author = {{R Core Team}}, diff --git a/preprint/ripserr preprint.Rmd b/preprint/ripserr preprint.Rmd index 1b75c15..d417a8a 100644 --- a/preprint/ripserr preprint.Rmd +++ b/preprint/ripserr preprint.Rmd @@ -1,20 +1,20 @@ --- -title: Calculating persistent homology with ripserr +title: Calculating persistent homology in R with ripserr authors: - name: Raoul R. Wadhwa department: Cleveland Clinic Lerner College of Medicine affiliation: Case Western Reserve University - location: Cleveland, OH 44195 + location: Cleveland, OH 44195, United States email: raoul.wadhwa@case.edu - - name: Matthew Piekenbrock - department: Department of Computational Mathematics, Science, and Engineering + - name: Matthew Piekenbrock, MS + department: Computational Mathematics, Science, \& Engineering affiliation: Michigan State University - location: East Lansing, MI 48824 + location: East Lansing, MI 48824, United States email: matt.piekenbrock@gmail.com - - name: Jacob G. Scott + - name: Jacob G. Scott, MD, DPhil department: Translational Hematology \& Oncology Research affiliation: Lerner Research Institute, Cleveland Clinic - location: Cleveland, OH 44195 + location: Cleveland, OH 44195, United States email: ScottJ10@ccf.org abstract: | Enter the text of your abstract here. @@ -32,7 +32,7 @@ Calculating persistent homology via a Vietoris-Rips complex presents a computati Ripser, a C++ library, addresses this obstacle and significantly reduces processing time and memory usage compared to other available persistent homology libraries [@ripser]. Although Ripser calculates persistent homology of a Vietoris-Rips complex, other libraries have taken advantage of its general technique to calculate persistent homology of cubical complexes [@cubicalripser], directed flag complexes [@flagser], or to provide other functionality [@lockfreeripser; @ripserpp]. With increasing use of persistent homology, effective incorporation of Ripser and Ripser-based engines in data analysis pipelines would serve a useful purpose. -The R programming language [@rlang] has been popular with data scientists [@cite-something +The R programming language [@rlang] has been popular with data scientists [@cite-something]. Here, we introduce `ripserr`, a software package that wraps Ripser and Ripser-based C++ libraries to calculate persistent homology in the R programming language. # PHom S3 class @@ -49,6 +49,8 @@ Here, we introduce `ripserr`, a software package that wraps Ripser and Ripser-ba ## prime fields +## use case + # cubical ## overview and basic parameters @@ -57,8 +59,12 @@ Here, we introduce `ripserr`, a software package that wraps Ripser and Ripser-ba ## size limitations -# Use case +## use case # Discussion/Conclusions +## comparison to TDAstats and TDA + +## future directions + # References From eddebe857c327018e942fdcc7fd438be35c2de8f Mon Sep 17 00:00:00 2001 From: Raoul W Date: Wed, 18 Nov 2020 11:10:04 -0500 Subject: [PATCH 04/10] updated intro --- preprint/ripserr preprint.Rmd | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/preprint/ripserr preprint.Rmd b/preprint/ripserr preprint.Rmd index d417a8a..14b6072 100644 --- a/preprint/ripserr preprint.Rmd +++ b/preprint/ripserr preprint.Rmd @@ -28,12 +28,11 @@ output: rticles::arxiv_article # Introduction -Calculating persistent homology via a Vietoris-Rips complex presents a computationally expensive challenge, particularly for high-dimensional datasets. -Ripser, a C++ library, addresses this obstacle and significantly reduces processing time and memory usage compared to other available persistent homology libraries [@ripser]. -Although Ripser calculates persistent homology of a Vietoris-Rips complex, other libraries have taken advantage of its general technique to calculate persistent homology of cubical complexes [@cubicalripser], directed flag complexes [@flagser], or to provide other functionality [@lockfreeripser; @ripserpp]. -With increasing use of persistent homology, effective incorporation of Ripser and Ripser-based engines in data analysis pipelines would serve a useful purpose. -The R programming language [@rlang] has been popular with data scientists [@cite-something]. -Here, we introduce `ripserr`, a software package that wraps Ripser and Ripser-based C++ libraries to calculate persistent homology in the R programming language. +Calculating persistent homology presents a computationally expensive challenge, particularly for large and high-dimensional datasets. +Ripser, a C++ library, addresses this obstacle and significantly reduces processing time and memory usage compared to contemporary persistent homology engines [@ripser]. +Although Ripser calculates persistent homology of a Vietoris-Rips complex, other libraries have built upon its foundation [@cubicalripser; @flagser; @lockfreeripser; @ripserpp]. +With the increasing popularity of persistent homology, wrapping Ripser and related engines in the R programming language [@rlang] would permit easier incorporation into data analysis and machine learning pipelines. +Here, we introduce `ripserr`, an R package that wraps the Ripser and Cubical Ripser C++ libraries, allowing efficient persistent homology computation of Vieoris-Rips and cubical complexes. # PHom S3 class From 7997ef9ebf99d810af049295eda4b4f52af92e4f Mon Sep 17 00:00:00 2001 From: Raoul W Date: Thu, 19 Nov 2020 10:36:41 -0500 Subject: [PATCH 05/10] added PHom class details --- preprint/references.bib | 9 +++++++ ...serr preprint.Rmd => ripserr-preprint.Rmd} | 26 ++++++++++++++----- 2 files changed, 29 insertions(+), 6 deletions(-) rename preprint/{ripserr preprint.Rmd => ripserr-preprint.Rmd} (61%) diff --git a/preprint/references.bib b/preprint/references.bib index 4cdcbf7..771b1d7 100644 --- a/preprint/references.bib +++ b/preprint/references.bib @@ -65,6 +65,15 @@ @Article{tdastats pages = {860} } +@Article{ext-persist, + author = {David Cohen-Steiner and Herbert Edelsbrunner and John Harer}, + title = {Extending Persistence Using Poincaré and Lefschetz Duality}, + journal = {Foundations of Computational Mathematics}, + year = {2009}, + volume = {9}, + pages = {79--103} +} + @Manual{rlang, title = {R: A Language and Environment for Statistical Computing}, author = {{R Core Team}}, diff --git a/preprint/ripserr preprint.Rmd b/preprint/ripserr-preprint.Rmd similarity index 61% rename from preprint/ripserr preprint.Rmd rename to preprint/ripserr-preprint.Rmd index 14b6072..6289d31 100644 --- a/preprint/ripserr preprint.Rmd +++ b/preprint/ripserr-preprint.Rmd @@ -19,9 +19,11 @@ authors: abstract: | Enter the text of your abstract here. keywords: - - blah - - blee - - bloo + - persistent homology + - topological data analysis + - simplicial complex + - R programming language + - Ripser bibliography: references.bib output: rticles::arxiv_article --- @@ -34,11 +36,19 @@ Although Ripser calculates persistent homology of a Vietoris-Rips complex, other With the increasing popularity of persistent homology, wrapping Ripser and related engines in the R programming language [@rlang] would permit easier incorporation into data analysis and machine learning pipelines. Here, we introduce `ripserr`, an R package that wraps the Ripser and Cubical Ripser C++ libraries, allowing efficient persistent homology computation of Vieoris-Rips and cubical complexes. -# PHom S3 class +# `PHom` S3 class -## structure +When incorporating persistent homology in R pipelines, we must consider the best container to hold persistence data. +`ripserr` uses the `PHom` (portmanteau of persistent homology) S3 class to manage persistence data. +Note that more complex forms of persistence (e.g. extended persistence [@ext-persist]) are not discussed. + +At its core, the `PHom` class simply encompasses a data frame storing persistence feature details. +Each data frame row represents a single feature and each column represents a single feature property. +Structurally, the data frame within a `PHom` object has three columns named `"dimension"`, `"birth"`, and `"death"` (in that order) of class `integer`, `numeric`, and `numeric`, respectively. +Although `PHom`-specific S3 generic methods have been implemented, users may also benefit by using `data.frame`-specific S3 generic methods; for this reason, `PHom` objects have class `PHom` and `data.frame`. + +To get a better intuition for `PHom` objects, the following code blocks demonstrate the class's basic functionality. -## basic functions # vietoris_rips @@ -66,4 +76,8 @@ Here, we introduce `ripserr`, an R package that wraps the Ripser and Cubical Rip ## future directions +# Acknowledgments + +The authors thank Jason Cory Brunson, PhD for advice regarding `ripserr` package design. + # References From 0da17a1f858d543e95bf6253fc248a46b5909b44 Mon Sep 17 00:00:00 2001 From: Raoul W Date: Thu, 19 Nov 2020 10:58:33 -0500 Subject: [PATCH 06/10] shorten error msgs --- R/PHom.R | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/R/PHom.R b/R/PHom.R index 1f02a6a..cc8ddda 100644 --- a/R/PHom.R +++ b/R/PHom.R @@ -28,7 +28,8 @@ validate_PHom <- function(x, error = TRUE) { if (!("PHom" %in% class(x) & "data.frame" %in% class(x))) { if (error) { stop(paste("PHom objects must have classes `PHom` and `data.frame`;", - "paseed value has class =", class(x))) + "paseed value has class =", class(x)), + call. = FALSE) } else { return(FALSE) } @@ -36,7 +37,8 @@ validate_PHom <- function(x, error = TRUE) { if (ncol(x) != 3) { if (error) { stop(paste("PHom objects must be data frames with exactly 3 columns;", - "passed object has", ncol(x), "columns.")) + "passed object has", ncol(x), "columns."), + call. = FALSE) } else { return(FALSE) } @@ -45,7 +47,8 @@ validate_PHom <- function(x, error = TRUE) { if (error) { stop(paste("`dimension` column in PHom objects must have class integer;", "`dimension` column in passed object has class =", - class(x$dimension))) + class(x$dimension)), + call. = FALSE) } else { return(FALSE) } @@ -54,7 +57,8 @@ validate_PHom <- function(x, error = TRUE) { if (error) { stop(paste("`birth` and `death` columns in PHom objects must have", "class integer; respective columns in passed object have", - "classes =", class(x$birth), "and", class(x$death))) + "classes =", class(x$birth), "and", class(x$death)), + call. = FALSE) } else { return(FALSE) } @@ -64,7 +68,8 @@ validate_PHom <- function(x, error = TRUE) { if (!all(x$birth < x$death)) { if (error) { stop(paste("In PHom objects, all births must be before corresponding", - "deaths.")) + "deaths."), + call. = FALSE) } else { return(FALSE) } @@ -85,16 +90,19 @@ valid_colval <- function(df, val, val_name) { if (!(val %in% seq_len(ncol(df)))) { stop(paste(val_name, "parameter must be a valid column index for the", - "provided data frame; passed value =", val)) + "provided data frame; passed value =", val), + call. = FALSE) } } else if (is.character(val)) { if (!(val %in% colnames(df))) { stop(paste(val_name, "parameter must be a valid column name for the", - "provided data frame; passed value =", val)) + "provided data frame; passed value =", val), + call. = FALSE) } } else { stop(paste(val_name, "parameter must either be a valid column name or", - "index; passed value =", val)) + "index; passed value =", val), + call. = FALSE) } } From 83f346cd8ea34943add6a1988e30b08d5ba7faee Mon Sep 17 00:00:00 2001 From: Raoul W Date: Thu, 19 Nov 2020 10:58:40 -0500 Subject: [PATCH 07/10] updated preprint --- preprint/ripserr-preprint.Rmd | 39 ++++++++++++++++++++++++++++++++--- 1 file changed, 36 insertions(+), 3 deletions(-) diff --git a/preprint/ripserr-preprint.Rmd b/preprint/ripserr-preprint.Rmd index 6289d31..9c91d84 100644 --- a/preprint/ripserr-preprint.Rmd +++ b/preprint/ripserr-preprint.Rmd @@ -47,10 +47,43 @@ Each data frame row represents a single feature and each column represents a sin Structurally, the data frame within a `PHom` object has three columns named `"dimension"`, `"birth"`, and `"death"` (in that order) of class `integer`, `numeric`, and `numeric`, respectively. Although `PHom`-specific S3 generic methods have been implemented, users may also benefit by using `data.frame`-specific S3 generic methods; for this reason, `PHom` objects have class `PHom` and `data.frame`. -To get a better intuition for `PHom` objects, the following code blocks demonstrate the class's basic functionality. +To get a better intuition for `PHom` objects, the following code block demonstrates the class's basic functionality. +We first create a data frame containing synthetic persistence data, convert it to a `PHom` instance, then explore how to work with its new form. +```{r basic-ex, error=TRUE} +# setup +library("ripserr") -# vietoris_rips +# synthetic persistence data +df <- data.frame(dimension = c(rep(0, 2), rep(1, 3), rep(2, 1)), + birth = rnorm(6), + death = rnorm(6, mean = 15)) + +# print synthetic persistence data +print(df) + +# convert to PHom +df_phom <- PHom(df) + +# print +print(df_phom) + +# print feature details of PHom object +print.data.frame(df_phom) + +# can convert with PHom or as.PHom +df_phom2 <- as.PHom(df) +all.equal(df_phom, df_phom2) + +# adjust synthetic data with invalid persistence data (death < birth) +df$death[1] <- df$birth[1] - 0.1 +print(df) + +# error when converting to PHom object +PHom(df) +``` + +# Persistent homology of a Vietoris-Rips complex ## overview and basic parameters @@ -60,7 +93,7 @@ To get a better intuition for `PHom` objects, the following code blocks demonstr ## use case -# cubical +# Persistent homology of a cubical complex ## overview and basic parameters From 0c34e232bbb5d2752b2ed5c98586994fb6155968 Mon Sep 17 00:00:00 2001 From: Raoul W Date: Fri, 20 Nov 2020 12:34:32 -0500 Subject: [PATCH 08/10] added content to preprint --- preprint/arxiv.sty | 1 + preprint/references.bib | 41 ++++++++++- preprint/ripserr-preprint.Rmd | 126 ++++++++++++++++++++++++++++++---- 3 files changed, 151 insertions(+), 17 deletions(-) diff --git a/preprint/arxiv.sty b/preprint/arxiv.sty index f32d6d8..0cc11a8 100644 --- a/preprint/arxiv.sty +++ b/preprint/arxiv.sty @@ -24,6 +24,7 @@ \flushbottom \sloppy +\usepackage{graphicx} \usepackage{fancyhdr} \fancyhf{} \pagestyle{fancy} diff --git a/preprint/references.bib b/preprint/references.bib index 771b1d7..b7d207b 100644 --- a/preprint/references.bib +++ b/preprint/references.bib @@ -34,8 +34,7 @@ @article{lockfreeripser author = {Dmitriy Morozov and Arnur Nigmetov}, year = {2020}, journal = {Proceedings of the 32nd ACM Symposium on Parallelism in Algorithms and Architectures}, - pages = {555--557}, - doi = {10.1145/3350755.3400244} + pages = {555--557} } @article{ripserpp, @@ -81,4 +80,40 @@ @Manual{rlang address = {Vienna, Austria}, year = {2020}, url = {https://www.R-project.org}, -} \ No newline at end of file +} + +@Article{roadmap-phom, + author = {Nina Otter and Mason A Porter and Ulrike Tillmann and Peter Grindrod and Heather A Harrington}, + title = {A roadmap for the computation of persistent homology}, + journal = {EPJ Data Science}, + year = {2017}, + volume = {6}, + pages = {17} +} + +@Book{tda-compute, + author = {Afra J Zomorodian}, + title = {Topology for Computing}, + year = {2009}, + publisher = {Cambridge University Press}, + address = {New York, NY} +} + +@Article{ts-phom, + author = {Yuhei Umeda}, + title = {Time Series Classification via Topological Data Analysis}, + journal = {Transactions of the Japanese Society for Artificial Intelligence}, + year = {2017}, + volume = {32}, + number = {3}, + pages = {D-G72_1--12} +} + +@Article{cubical-phom, + author = {Michael Werman and Matthew L Wright}, + title = {Intrinsic Volumes of Random Cubical Complexes}, + journal = {Discrete and Computational Geometry}, + year = {2016}, + volume = {56}, + pages = {93--113} +} diff --git a/preprint/ripserr-preprint.Rmd b/preprint/ripserr-preprint.Rmd index 9c91d84..8d9ff3c 100644 --- a/preprint/ripserr-preprint.Rmd +++ b/preprint/ripserr-preprint.Rmd @@ -36,6 +36,17 @@ Although Ripser calculates persistent homology of a Vietoris-Rips complex, other With the increasing popularity of persistent homology, wrapping Ripser and related engines in the R programming language [@rlang] would permit easier incorporation into data analysis and machine learning pipelines. Here, we introduce `ripserr`, an R package that wraps the Ripser and Cubical Ripser C++ libraries, allowing efficient persistent homology computation of Vieoris-Rips and cubical complexes. +Before we explore the `ripserr` package, we load the required libraries and set a PRNG seed for reproducibility. + +```{r setup} +library("ripserr") +library("tdaunif") +library("ggplot2") +library("ggtda") # remotes::install_github("rrrlw/ggtda") + +set.seed(42) +``` + # `PHom` S3 class When incorporating persistent homology in R pipelines, we must consider the best container to hold persistence data. @@ -51,9 +62,6 @@ To get a better intuition for `PHom` objects, the following code block demonstra We first create a data frame containing synthetic persistence data, convert it to a `PHom` instance, then explore how to work with its new form. ```{r basic-ex, error=TRUE} -# setup -library("ripserr") - # synthetic persistence data df <- data.frame(dimension = c(rep(0, 2), rep(1, 3), rep(2, 1)), birth = rnorm(6), @@ -85,29 +93,119 @@ PHom(df) # Persistent homology of a Vietoris-Rips complex -## overview and basic parameters +The `vietoris_rips` S3 generic enables computation of persistent homology via a Vietoris-Rips complex. +Within `ripserr`, users can calculate Vietoris-Rips homology of point clouds (stored in `data.frame`, `matrix`, or `dist` objects) or time series (stored in `numeric` vectors or `ts` objects). +The methodology behind the former can be found in @roadmap-phom and @tda-compute and the quasi-attractor method to calculate persistent homology of time series can be found in @ts-phom. + +The following code demonstrates the various point cloud formats that `vietoris_rips` accepts and shows that the same dataset in different formats results in the same persistent data output. + +```{r vr-cloud, fig.height=2.5} +# use tdaunif library for sample dataset +circ_mat <- sample_circle(25) +colnames(circ_mat) <- c("x", "y") + +# peek at generated point cloud +ggplot(as.data.frame(circ_mat), aes(x = x, y = y)) + + geom_point() + coord_fixed() + theme_bw() + +# calculate persistent homology +circ_phom <- vietoris_rips(circ_mat) + +# print persistent homology summary and details +print(circ_phom) +head(circ_phom) +tail(circ_phom) + +# plot topological barcode of persistence data +ggplot(circ_phom, aes(start = birth, end = death, colour = as.factor(dimension))) + + geom_barcode() + theme_barcode() + + scale_color_discrete(name = "dimension") -## generic methods +# calculate persistent homology for other data formats +circ_df <- as.data.frame(circ_mat) +circ_dist <- dist(circ_mat) -## prime fields +# confirm equality across formats +all.equal(circ_phom, vietoris_rips(circ_df)) +all.equal(circ_phom, vietoris_rips(circ_dist)) +``` + +By adjusting the `max_dim` parameter, `vietoris_rips` can calculate persistence data including higher dimensional features. +We demonstrate this by sampling points from the surface of a sphere. + +```{r vr-maxdim} +# sample points from surface of unit sphere +sphere <- sample_sphere(100, dim = 2) + +# calculate persistent homology +sphere_phom <- vietoris_rips(sphere, max_dim = 2L) + +# print and plot persistence data +sphere_phom +ggplot(sphere_phom, aes(start = birth, end = death, colour = as.factor(dimension))) + + geom_barcode() + theme_barcode() + + scale_color_manual(name = "dimension", values = c("red", "black", "blue")) +``` + +Persistence data can also be calculated in various prime finite fields ($\mathbb{Z}/p\mathbb{Z}$) determined by the `p` parameter; all valid values of `p` should result in the same persistence data output. -## use case +```{r primefield} +# calculate circle persistence in various prime fields +circ_phom1 <- vietoris_rips(circ_mat, p = 2L) +circ_phom2 <- vietoris_rips(circ_mat, p = 3L) +circ_phom3 <- vietoris_rips(circ_mat, p = 5L) + +# confirm equality +all.equal(circ_phom1, circ_phom2) +all.equal(circ_phom1, circ_phom3) +``` # Persistent homology of a cubical complex -## overview and basic parameters +The `cubical` S3 generic enables computation of persistent homology via a cubical complex. +Users can calculate simplicial homology of 2-dimensional (e.g. image), 3-dimensional (e.g. video), or 4-dimensional (e.g. multiple videos over time) `array` objects. +More details on cubical complexes can be found in @cubical-phom. +The following code block illustrates basic use of `cubical`, including the `method` parameter. + +```{r cubical, fig.height=2.5, fig.width=2.5} +# create synthetic dataset +sample_image <- array(data = 0, dim = rep(10, 2)) +i <- c(2, 9) +j <- 2:9 +sample_image[i, j] <- rnorm(n = 16, mean = 10) +sample_image[j, i] <- rnorm(n = 16, mean = 10) + +# view image +par(mar = rep(0, 4)) +graphics::image(sample_image, useRaster = TRUE, axes = FALSE) +``` + +```{r cubical-2,fig.height=2} +# calculate cubical homology +image_phom <- cubical(sample_image) + +# print homology +print(image_phom) +print.data.frame(image_phom) -## generic methods +# plot homology +ggplot(image_phom, aes(start = birth, end = death, color = as.factor(dimension))) + + geom_barcode() + theme_barcode() + + scale_color_discrete(name = "dimension") -## size limitations +# compare output from two methods +# "lj" = Link Join; "cp" = Compute Pairs +image_phom2 <- cubical(sample_image, method = "cp") +all.equal(image_phom, image_phom2) +``` -## use case +Due to intrinsic properties of the underlying C++ Cubical Ripser library, `array`s passed to `cubical` have the following size limitations: 2-dimensional `array`s must be smaller than `2000x1000` elements; 3-dimensional `array`s must be smaller than `512x512x512`; and 4-dimensional `array`s must be smaller than `64x64x64x64`. -# Discussion/Conclusions +# Discussion and future directions -## comparison to TDAstats and TDA +## role in TDA pipelines -## future directions +## extending `vietoris_rips` and `cubical` # Acknowledgments From 637c748e0a09873c08f867a1694a5092ecec1efe Mon Sep 17 00:00:00 2001 From: Raoul W Date: Fri, 20 Nov 2020 13:22:31 -0500 Subject: [PATCH 09/10] added preprint content --- preprint/references.bib | 42 +++++++++++++++++++++++++++++++++++ preprint/ripserr-preprint.Rmd | 10 ++++----- 2 files changed, 47 insertions(+), 5 deletions(-) diff --git a/preprint/references.bib b/preprint/references.bib index b7d207b..263d6f9 100644 --- a/preprint/references.bib +++ b/preprint/references.bib @@ -117,3 +117,45 @@ @Article{cubical-phom volume = {56}, pages = {93--113} } + +@Article{rcpp, + author = {Dirk Eddelbuettel and Romain Francois}, + title = {Rcpp: Seamless {R} and {C++} Integration}, + journal = {Journal of Statistical Software}, + year = {2011}, + volume = {40}, + number = {8}, + pages = {1--18} +} + +@Manual{simplextree, + title = {simplextree: Provides Tools for Working with General Simplicial Complexes}, + author = {Matt Piekenbrock}, + year = {2020}, + note = {R package version 1.0.1}, + url = {https://CRAN.R-project.org/package=simplextree}, +} + +@Manual{tdaunif, + title = {tdaunif: Uniform Manifold Samplers for Topological Data Analysis}, + author = {Jason Cory Brunson and Brandon Demkowicz and Sanmati Choudhary}, + year = {2020}, + note = {R package version 0.1.0}, + url = {https://CRAN.R-project.org/package=tdaunif}, +} + +@Manual{tdamapper, + title = {TDAmapper: Analyze High-Dimensional Data Using Discrete Morse Theory}, + author = {Paul Pearson and Daniel Muellner and Gurjeet Singh}, + year = {2015}, + note = {R package version 1.0}, + url = {https://CRAN.R-project.org/package=TDAmapper}, +} + +@Manual{mapper, + title = {Mapper: Topological Data Analysis using Mapper}, + author = {Matt Piekenbrock}, + year = {2019}, + note = {R package version 0.9.2}, + url = {https://CRAN.R-project.org/package=Mapper} +} diff --git a/preprint/ripserr-preprint.Rmd b/preprint/ripserr-preprint.Rmd index 8d9ff3c..5978255 100644 --- a/preprint/ripserr-preprint.Rmd +++ b/preprint/ripserr-preprint.Rmd @@ -33,7 +33,7 @@ output: rticles::arxiv_article Calculating persistent homology presents a computationally expensive challenge, particularly for large and high-dimensional datasets. Ripser, a C++ library, addresses this obstacle and significantly reduces processing time and memory usage compared to contemporary persistent homology engines [@ripser]. Although Ripser calculates persistent homology of a Vietoris-Rips complex, other libraries have built upon its foundation [@cubicalripser; @flagser; @lockfreeripser; @ripserpp]. -With the increasing popularity of persistent homology, wrapping Ripser and related engines in the R programming language [@rlang] would permit easier incorporation into data analysis and machine learning pipelines. +With the increasing popularity of persistent homology, wrapping Ripser and related C++ engines in the R programming language would permit easier incorporation into data analysis and machine learning pipelines [@rlang;@rcpp]. Here, we introduce `ripserr`, an R package that wraps the Ripser and Cubical Ripser C++ libraries, allowing efficient persistent homology computation of Vieoris-Rips and cubical complexes. Before we explore the `ripserr` package, we load the required libraries and set a PRNG seed for reproducibility. @@ -201,11 +201,11 @@ all.equal(image_phom, image_phom2) Due to intrinsic properties of the underlying C++ Cubical Ripser library, `array`s passed to `cubical` have the following size limitations: 2-dimensional `array`s must be smaller than `2000x1000` elements; 3-dimensional `array`s must be smaller than `512x512x512`; and 4-dimensional `array`s must be smaller than `64x64x64x64`. -# Discussion and future directions +# Discussion -## role in TDA pipelines - -## extending `vietoris_rips` and `cubical` +As a low-level topological data analysis package, `ripserr` provides the basic tools necessary to calculate persistent homology. +By providing the `PHom` class as a container for persistence data that preserves data frame functionality, `ripserr` can be easily integrated in an exploratory manner in data analysis pipelines, for feature extraction in machine learning pipelines, and for early calculation in topological inference pipelines. +Future directions for `ripserr` could involve inclusion of more Ripser-based engines, refinement of the `PHom` class, and improved integration with other R packages implementing topological data analysis [@simplextree;@tdaunif;@tdamapper;@mapper;@tdastats]. # Acknowledgments From 50807b3f59c3ea2337edc485c78602fad94b6c13 Mon Sep 17 00:00:00 2001 From: Raoul W Date: Fri, 20 Nov 2020 13:42:09 -0500 Subject: [PATCH 10/10] added abstract --- preprint/ripserr-preprint.Rmd | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/preprint/ripserr-preprint.Rmd b/preprint/ripserr-preprint.Rmd index 5978255..6dbe645 100644 --- a/preprint/ripserr-preprint.Rmd +++ b/preprint/ripserr-preprint.Rmd @@ -17,7 +17,12 @@ authors: location: Cleveland, OH 44195, United States email: ScottJ10@ccf.org abstract: | - Enter the text of your abstract here. + We introduce ripserr, an R package that enables efficient persistent homology computation by wrapping Ripser-based C++ libraries via Rcpp. + The PHom S3 class provides a container for calculated persistence data and will serve as a means for integrating ripserr with other R packages that implement functionality related to topological data analysis. + The vietoris_rips generic wraps the Ripser library, which computes Vietoris-Rips simplicial homology and the cubical generic wraps the Cubical Ripser library, which computes cubical simplicial homology. + Currently, the Comprehensive R Archive Network hosts ripserr v0.1.1. + We hope that ripserr will be used by researchers to incorporate topological insight into data analysis and machine learning pipelines. + The open source code for ripserr and for this reproducible report can be found at http://github.com/rrrlw/ripserr. keywords: - persistent homology - topological data analysis @@ -115,7 +120,9 @@ circ_phom <- vietoris_rips(circ_mat) print(circ_phom) head(circ_phom) tail(circ_phom) +``` +```{r vr-phom-2, fig.height=2} # plot topological barcode of persistence data ggplot(circ_phom, aes(start = birth, end = death, colour = as.factor(dimension))) + geom_barcode() + theme_barcode() + @@ -133,7 +140,7 @@ all.equal(circ_phom, vietoris_rips(circ_dist)) By adjusting the `max_dim` parameter, `vietoris_rips` can calculate persistence data including higher dimensional features. We demonstrate this by sampling points from the surface of a sphere. -```{r vr-maxdim} +```{r vr-maxdim, fig.height=2.5} # sample points from surface of unit sphere sphere <- sample_sphere(100, dim = 2) @@ -180,7 +187,7 @@ par(mar = rep(0, 4)) graphics::image(sample_image, useRaster = TRUE, axes = FALSE) ``` -```{r cubical-2,fig.height=2} +```{r cubical-2,fig.height=2.5} # calculate cubical homology image_phom <- cubical(sample_image)