From 184639eeeedbe35003c1793fab2da668b501e45d Mon Sep 17 00:00:00 2001
From: Mohammed Ali <mohammed@aggregate-genius.com>
Date: Thu, 12 Feb 2026 12:20:11 +0200
Subject: [PATCH] - Fixed invalid doi

---
 docs/paper.html | 227 ------------------------------
 docs/paper.md   | 356 ------------------------------------------------
 paper.bib       |  10 +-
 3 files changed, 5 insertions(+), 588 deletions(-)
 delete mode 100644 docs/paper.html
 delete mode 100644 docs/paper.md
diff --git a/docs/paper.html b/docs/paper.html
deleted file mode 100644
index 17cefe59..00000000
--- a/docs/paper.html
+++ /dev/null
@@ -1,227 +0,0 @@
-<!DOCTYPE html>
-<!-- Generated by pkgdown: do not edit by hand --><html lang="en"><head><meta http-equiv="Content-Type" content="text/html; charset=UTF-8"><meta charset="utf-8"><meta http-equiv="X-UA-Compatible" content="IE=edge"><meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no"><title>Summary • dbparser</title><!-- favicons --><link rel="icon" type="image/png" sizes="96x96" href="favicon-96x96.png"><link rel="icon" type="”image/svg+xml”" href="favicon.svg"><link rel="apple-touch-icon" sizes="180x180" href="apple-touch-icon.png"><link rel="icon" sizes="any" href="favicon.ico"><link rel="manifest" href="site.webmanifest"><script src="deps/jquery-3.6.0/jquery-3.6.0.min.js"></script><meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no"><link href="deps/bootstrap-5.3.1/bootstrap.min.css" rel="stylesheet"><script src="deps/bootstrap-5.3.1/bootstrap.bundle.min.js"></script><link href="deps/font-awesome-6.5.2/css/all.min.css" rel="stylesheet"><link href="deps/font-awesome-6.5.2/css/v4-shims.min.css" rel="stylesheet"><script src="deps/headroom-0.11.0/headroom.min.js"></script><script src="deps/headroom-0.11.0/jQuery.headroom.min.js"></script><script src="deps/bootstrap-toc-1.0.1/bootstrap-toc.min.js"></script><script src="deps/clipboard.js-2.0.11/clipboard.min.js"></script><script src="deps/search-1.0.0/autocomplete.jquery.min.js"></script><script src="deps/search-1.0.0/fuse.min.js"></script><script src="deps/search-1.0.0/mark.min.js"></script><!-- pkgdown --><script src="pkgdown.js"></script><meta property="og:title" content="Summary"><meta property="og:image" content="https://docs.ropensci.org/dbparser/logo.png"></head><body>
-    <a href="#main" class="visually-hidden-focusable">Skip to contents</a>
-
-
-    <nav class="navbar navbar-expand-lg fixed-top bg-primary" data-bs-theme="dark" aria-label="Site navigation"><div class="container">
-
-    <a class="navbar-brand me-2" href="index.html">dbparser</a>
-
-    <small class="nav-text text-muted me-auto" data-bs-toggle="tooltip" data-bs-placement="bottom" title="">2.2.1.9000</small>
-
-
-    <button class="navbar-toggler" type="button" data-bs-toggle="collapse" data-bs-target="#navbar" aria-controls="navbar" aria-expanded="false" aria-label="Toggle navigation">
-      <span class="navbar-toggler-icon"></span>
-    </button>
-
-    <div id="navbar" class="collapse navbar-collapse ms-3">
-      <ul class="navbar-nav me-auto"><li class="nav-item"><a class="nav-link" href="articles/dbparser.html">Get started</a></li>
-<li class="nav-item"><a class="nav-link" href="reference/index.html">Reference</a></li>
-<li class="nav-item dropdown">
-  <button class="nav-link dropdown-toggle" type="button" id="dropdown-articles" data-bs-toggle="dropdown" aria-expanded="false" aria-haspopup="true">Articles</button>
-  <ul class="dropdown-menu" aria-labelledby="dropdown-articles"><li><a class="dropdown-item" href="articles/dbparser_2_2.html">Introducing dbparser 2.2.0</a></li>
-    <li><a class="dropdown-item" href="articles/drugbank_nside.html">Integrated Pharmacovigilance: Linking DrugBank, OnSIDES, and TWOSIDES</a></li>
-  </ul></li>
-<li class="nav-item"><a class="nav-link" href="news/index.html">Changelog</a></li>
-      </ul><ul class="navbar-nav"><li class="nav-item"><form class="form-inline" role="search">
- <input class="form-control" type="search" name="search-input" id="search-input" autocomplete="off" aria-label="Search site" placeholder="Search for" data-search-index="search.json"></form></li>
-<li class="nav-item"><a class="external-link nav-link" href="https://github.com/ropensci/dbparser" aria-label="GitHub"><span class="fa fab fa-github"></span></a></li>
-      </ul></div>
-
-
-  </div>
-</nav><div class="container template-title-body">
-<div class="row">
-  <main id="main" class="col-md-9"><div class="page-header">
-      <img src="logo.png" class="logo" alt=""><h1>Summary</h1>
-      <small class="dont-index">Source: <a href="https://github.com/ropensci/dbparser/blob/HEAD/paper.md" class="external-link"><code>paper.md</code></a></small>
-    </div>
-
-
-<div id="summary" class="section level1">
-
-<p><code>dbparser</code> is an rOpenSci peer-reviewed R package that provides a unified framework for parsing and integrating major pharmacological and pharmacovigilance databases into standardized, analysis-ready R objects. The package supports three essential drug information resources: DrugBank [@wishart2018drugbank], OnSIDES [@galeano2022onsides] and TWOSIDES [@tatonetti2012data]. Each database is parsed into a consistent nested list structure called a <code>dvobject</code>, which preserves complex relational hierarchies while enabling seamless cross-database integration. By providing high-performance parsing functions, chainable merge operations, and comprehensive metadata tracking, <code>dbparser</code> eliminates a significant bottleneck in computational pharmacology research and enables reproducible, large-scale drug safety analyses.</p>
-</div>
-<div class="section level1">
-<h1 id="statement-of-need">Statement of Need<a class="anchor" aria-label="anchor" href="#statement-of-need"></a></h1>
-<p>Pharmacological research increasingly relies on integrating heterogeneous data sources to understand drug mechanisms, predict adverse effects, and identify drug-drug interactions. Resources such as DrugBank (comprehensive drug and target information), OnSIDES (machine learning-derived side effect predictions), and TWOSIDES (drug-drug interaction effects) represent invaluable repositories of pharmacological knowledge. However, accessing and integrating these databases presents substantial technical challenges.</p>
-<p>Each database employs distinct file formats and structural conventions: DrugBank distributes data as deeply nested XML with complex entity relationships; OnSIDES provides multiple relational CSV files requiring careful joining; TWOSIDES offers compressed flat files with different identifier systems. Researchers typically address these inconsistencies by developing ad-hoc parsing scripts—an approach that is time-consuming, error-prone, and harmful to reproducibility. Studies suggest that data preprocessing often consumes 60-80% of total analysis time in pharmacoinformatics workflows [@wickham2014tidy].</p>
-<p>The R ecosystem, despite its strength in statistical analysis and visualization, lacks dedicated tools for pharmacological database integration. While Bioconductor [@gentleman2004bioconductor] provides excellent infrastructure for genomics data, no equivalent standardized framework exists for drug databases. <code>dbparser</code> addresses this gap by providing unified parsing functions, chainable integration workflows, rich metadata preservation, and high-performance implementations that transform weeks of custom development into minutes of reproducible analysis.</p>
-</div>
-<div class="section level1">
-<h1 id="software-design">Software Design<a class="anchor" aria-label="anchor" href="#software-design"></a></h1>
-<div class="section level2">
-<h2 id="design-philosophy-and-trade-offs">Design Philosophy and Trade-offs<a class="anchor" aria-label="anchor" href="#design-philosophy-and-trade-offs"></a></h2>
-<p><code>dbparser</code>’s architecture reflects three core design decisions that emerged from extensive experience with pharmacological data analysis workflows:</p>
-<p><strong>Unified <code>dvobject</code> Structure vs. Database-Specific Formats:</strong> We chose to transform all databases into a consistent nested list structure rather than preserving native formats. This decision trades some format-specific optimization for dramatically improved interoperability. The <code>dvobject</code> maintains the relational structure of each source database while providing consistent access patterns, enabling users to apply identical analysis code across different data sources. Each <code>dvobject</code> contains three components: (1) tidy data tables compatible with the tidyverse ecosystem [@wickham2019welcome], (2) comprehensive metadata (version, parse timestamp, schema information), and (3) relationship mappings documenting cross-table linkages.</p>
-<p><strong>Hub-and-Spoke Integration Model:</strong> Rather than attempting all-to-all database linking, we implemented DrugBank as the central integration hub. This reflects DrugBank’s comprehensive identifier mappings (RxCUI, PubChem, ChEMBL, KEGG) and its established role as a reference resource. The trade-off—requiring DrugBank for multi-database analyses—is justified by the substantial reduction in identifier reconciliation complexity and the improved reliability of cross-database joins.</p>
-<p><strong>Chainable Merge Operations:</strong> Integration functions are designed for pipeline composition using the magrittr pipe operator, enabling workflows like <code>drugbank_db %&gt;% merge_drugbank_onsides(onsides_db) %&gt;% merge_drugbank_twosides(twosides_db)</code>. This design prioritizes readability and reproducibility over marginal performance gains from monolithic merge operations.</p>
-</div>
-<div class="section level2">
-<h2 id="build-vs-contribute-justification">Build vs. Contribute Justification<a class="anchor" aria-label="anchor" href="#build-vs-contribute-justification"></a></h2>
-<p>We evaluated contributing to existing projects before creating <code>dbparser</code>. The primary alternatives were:</p>
-<ul><li>
-<strong>Bioconductor’s AnnotationHub</strong>: Focused on genomic annotations rather than drug databases; its infrastructure assumes different data models than pharmacological resources require.</li>
-<li>
-<strong>drugbank R package (archived)</strong>: Provided only DrugBank parsing without integration capabilities; was unmaintained and lacked modern software quality standards.</li>
-<li>
-<strong>Python alternatives</strong> (e.g., <code>drugbank-downloader</code>, <code>pyDrugBank</code>): Language-specific and database-specific without cross-database integration frameworks.</li>
-</ul><p>None provided the unified, multi-database integration framework that pharmacovigilance research requires. Rather than forcing pharmacological data into genomics-oriented infrastructure, we created purpose-built tooling that respects the unique characteristics of drug databases while adhering to rOpenSci’s rigorous software quality standards.</p>
-</div>
-<div class="section level2">
-<h2 id="validation-through-ecosystem-development">Validation Through Ecosystem Development<a class="anchor" aria-label="anchor" href="#validation-through-ecosystem-development"></a></h2>
-<p>The extensibility of <code>dbparser</code>’s architecture has been validated through the development of two downstream packages that build upon its infrastructure:</p>
-<p><strong>dbdataset</strong> [@dbdataset]: Provides pre-parsed DrugBank datasets in ready-to-use R dataframe format, eliminating the need for users to download and parse large XML files. This package leverages <code>dbparser</code>’s parsing functions to create versioned, reproducible datasets for machine learning and exploratory analysis.</p>
-<p><strong>covid19dbcand</strong> [@covid19dbcand]: Delivers curated COVID-19 drug candidate datasets extracted from DrugBank during the pandemic response. This package demonstrated <code>dbparser</code>’s value for rapid response research, enabling researchers to quickly access potential therapeutic candidates without time-consuming data extraction.</p>
-<p>These downstream packages demonstrate that <code>dbparser</code>’s <code>dvobject</code> structure and parsing functions provide a stable foundation for building domain-specific data products—a key indicator of successful research software design.</p>
-</div>
-</div>
-<div class="section level1">
-<h1 id="research-impact-statement">Research Impact Statement<a class="anchor" aria-label="anchor" href="#research-impact-statement"></a></h1>
-<div class="section level2">
-<h2 id="demonstrated-community-adoption-and-recognition">Demonstrated Community Adoption and Recognition<a class="anchor" aria-label="anchor" href="#demonstrated-community-adoption-and-recognition"></a></h2>
-<p><code>dbparser</code> has established itself as essential infrastructure for the R pharmacoinformatics community since its initial release in 2019:</p>
-<p><strong>Download Metrics:</strong> Over 50,000 cumulative downloads from CRAN with sustained adoption of approximately 780 downloads per month, demonstrating consistent growth over six years. Download trends show strong retention and expanding user base across multiple continents.</p>
-<p><strong>Community Recognition:</strong> Featured in the CRAN Epidemiology Task View, indicating recognition by domain experts as essential infrastructure for epidemiological and pharmacovigilance research. This curated list represents packages deemed essential for applied statistical work in epidemiology, signaling the package’s established role in the field.</p>
-<p><strong>Code Quality and Review:</strong> Achieves 98% test coverage and has earned OpenSSF Best Practices passing badge, placing it in the top tier of R research software. Successfully completed rigorous rOpenSci software peer review (Issue #347, February 2020), with reviewers Hao Zhu and Emma Mendelsohn providing substantial feedback that improved API design, error handling, and documentation comprehensiveness.</p>
-</div>
-<div class="section level2">
-<h2 id="development-history-and-collaborative-engagement">Development History and Collaborative Engagement<a class="anchor" aria-label="anchor" href="#development-history-and-collaborative-engagement"></a></h2>
-<p>The package demonstrates sustained, collaborative development characteristic of meaningful research software:</p>
-<ul><li>
-<strong>Timeline</strong>: 6+ years of active development (first commit: September 29, 2018; first CRAN release: January 2019)</li>
-<li>
-<strong>Commits</strong>: 614 commits demonstrating iterative refinement and continuous improvement</li>
-<li>
-<strong>Contributors</strong>: 7 contributors spanning multiple institutions and career stages</li>
-<li>
-<strong>User Diversity</strong>: Actively used by researchers ranging from Master’s students to NIH scientists across multiple countries</li>
-<li>
-<strong>Issue Resolution</strong>: Responsive maintenance with active engagement on GitHub issues from users with diverse scientific backgrounds (academia, government, industry)</li>
-<li>
-<strong>Maintenance</strong>: Regular releases following semantic versioning (currently version 2.2.1, published January 8, 2026)</li>
-</ul></div>
-<div class="section level2">
-<h2 id="published-research-applications">Published Research Applications<a class="anchor" aria-label="anchor" href="#published-research-applications"></a></h2>
-<p><code>dbparser</code> has enabled peer-reviewed research across multiple high-impact domains, demonstrating substantial realized impact:</p>
-<p><strong>Drug Repurposing Studies:</strong> - Parolo et al. (2023) used <code>dbparser</code> in <em>Nature Scientific Reports</em> for single-cell-led drug repurposing in Alzheimer’s disease research [@parolo2023single] - Pérez-Moraga et al. (2021) employed the package in <em>Pharmaceutics</em> for COVID-19 drug repurposing using topological data analysis [@perez2021covid] - Schubert et al. (2022) applied <code>dbparser</code> in <em>Biomolecules</em> for transcriptome-guided identification of drugs for age-related hearing loss [@schubert2022transcriptome]</p>
-<p><strong>Systems Biology and Network Analysis:</strong> - Mercatelli et al. (2022) integrated <code>dbparser</code> into the SURFACER workflow published in <em>Briefings in Bioinformatics</em> (Oxford Academic) for pan-cancer surface protein biomarker detection [@mercatelli2022detection] - Yang et al. (2021) utilized the package in research published in <em>Pharmacological Research</em> for mapping synthetic lethal interactions in liver cancer [@yang2021mapping] - Su et al. (2024) incorporated <code>dbparser</code> in multi-ancestry proteome-phenome-wide Mendelian randomization analysis on <em>medRxiv</em> [@su2024multiancestry]</p>
-<p><strong>Clinical and Epidemiological Research:</strong> - Rischke et al. (2023) employed <code>dbparser</code> in <em>Nature Scientific Reports</em> for machine learning identification of psoriatic arthritis activity signals [@rischke2023machine] - Namiot et al. (2023) used the package in <em>Frontiers in Pharmacology</em> for analyzing trends in clinical trials from the International Clinical Trials Registry Platform [@namiot2023international]</p>
-<p><strong>Software Integration and Ecosystem Development:</strong> - Hammoud &amp; Kramer (2020) integrated <code>dbparser</code> into the Multipath package published in <em>Biology (MDPI)</em> for generating reproducible pathway models [@hammoud2020multipath] - Hammoud et al. (2025) extended this integration in Multipath 2.0 published in <em>Computer Methods and Programs in Biomedicine (Elsevier)</em> [@hammoud2025multipath2]</p>
-<p>This body of work—spanning Nature publications, Oxford Academic journals, and domain-specific outlets—demonstrates that <code>dbparser</code> is actively enabling cutting-edge research in drug discovery, systems pharmacology, machine learning applications, and clinical epidemiology.</p>
-</div>
-<div class="section level2">
-<h2 id="impact-beyond-citations">Impact Beyond Citations<a class="anchor" aria-label="anchor" href="#impact-beyond-citations"></a></h2>
-<p>The package lowers technical barriers to multi-database pharmacology research, transforming weeks of custom parsing code into minutes of standardized workflow. This democratization of access particularly benefits:</p>
-<ul><li>
-<strong>Early-career researchers</strong> who lack extensive bioinformatics infrastructure</li>
-<li>
-<strong>Interdisciplinary teams</strong> requiring reproducible data pipelines</li>
-<li>
-<strong>Resource-limited institutions</strong> without dedicated computational support</li>
-<li>
-<strong>Educational contexts</strong> where students learn computational pharmacology</li>
-</ul><p>The integration of DrugBank with modern pharmacovigilance databases (OnSIDES, TWOSIDES) enables analyses that were previously technically prohibitive, accelerating the pace of drug safety research and repurposing studies.</p>
-</div>
-<div class="section level2">
-<h2 id="downstream-package-ecosystem">Downstream Package Ecosystem<a class="anchor" aria-label="anchor" href="#downstream-package-ecosystem"></a></h2>
-<p>The robustness of <code>dbparser</code>’s design is evidenced by its use as foundational infrastructure for additional R packages:</p>
-<ul><li><p><strong>dbdataset</strong>: Provides pre-parsed DrugBank datasets in ready-to-analyze format, built entirely on <code>dbparser</code>’s parsing infrastructure. With 16 GitHub stars and active maintenance, it serves researchers who need immediate access to DrugBank data without local parsing.</p></li>
-<li><p><strong>covid19dbcand</strong>: Created in response to the COVID-19 pandemic, this package delivered curated drug candidate datasets for therapeutic research. It demonstrated <code>dbparser</code>’s capability to support rapid-response research during public health emergencies, with data extracted using <code>dbparser</code> version 1.2.0.</p></li>
-</ul><p>Both packages maintain their own development histories, documentation, and user bases while relying on <code>dbparser</code> as stable infrastructure—the hallmark of sustainable research software that enables further innovation.</p>
-</div>
-</div>
-<div class="section level1">
-<h1 id="functionality">Functionality<a class="anchor" aria-label="anchor" href="#functionality"></a></h1>
-<div class="section level2">
-<h2 id="core-parsing-architecture">Core Parsing Architecture<a class="anchor" aria-label="anchor" href="#core-parsing-architecture"></a></h2>
-<p><code>dbparser</code> provides dedicated parsing functions for each supported database:</p>
-<table class="table"><colgroup><col width="21%"><col width="21%"><col width="29%"><col width="27%"></colgroup><thead><tr><th>Function</th>
-<th>Database</th>
-<th>Input Format</th>
-<th>Key Content</th>
-</tr></thead><tbody><tr><td><code><a href="reference/parseDrugBank.html">parseDrugBank()</a></code></td>
-<td>DrugBank</td>
-<td>XML</td>
-<td>Drug properties, targets, pathways, interactions</td>
-</tr><tr><td><code><a href="reference/parseOnSIDES.html">parseOnSIDES()</a></code></td>
-<td>OnSIDES</td>
-<td>Relational CSVs</td>
-<td>ML-derived side effects with confidence scores</td>
-</tr><tr><td><code><a href="reference/parseTWOSIDES.html">parseTWOSIDES()</a></code></td>
-<td>TWOSIDES</td>
-<td>Compressed CSV</td>
-<td>Drug-drug interaction adverse events</td>
-</tr></tbody></table><p>Performance is achieved through streaming XML parsing via <code>xml2</code> [@wickham2023xml2] and high-speed CSV parsing via <code><a href="https://rdatatable.gitlab.io/data.table/reference/fread.html" class="external-link">data.table::fread()</a></code> [@dowle2023datatable]. Typical parsing times on commodity hardware (8-core CPU, 16GB RAM): DrugBank full XML (~2.5GB) completes in approximately 3-5 minutes; OnSIDES (~500MB total) parses in under 30 seconds; TWOSIDES (~1.2GB) completes in approximately 1 minute.</p>
-</div>
-<div class="section level2">
-<h2 id="example-workflow-anticoagulant-side-effect-analysis">Example Workflow: Anticoagulant Side Effect Analysis<a class="anchor" aria-label="anchor" href="#example-workflow-anticoagulant-side-effect-analysis"></a></h2>
-<div class="sourceCode" id="cb1"><pre class="downlit sourceCode r">
-<code class="sourceCode R"><span><span class="kw"><a href="https://rdrr.io/r/base/library.html" class="external-link">library</a></span><span class="op">(</span><span class="va"><a href="https://docs.ropensci.org/dbparser/">dbparser</a></span><span class="op">)</span></span>
-<span><span class="kw"><a href="https://rdrr.io/r/base/library.html" class="external-link">library</a></span><span class="op">(</span><span class="va"><a href="https://dplyr.tidyverse.org" class="external-link">dplyr</a></span><span class="op">)</span></span>
-<span></span>
-<span><span class="co"># Parse and integrate databases</span></span>
-<span><span class="va">drugbank_db</span> <span class="op">&lt;-</span> <span class="fu"><a href="reference/parseDrugBank.html">parseDrugBank</a></span><span class="op">(</span><span class="st">"drugbank_all_full_database.xml"</span><span class="op">)</span></span>
-<span><span class="va">onsides_db</span> <span class="op">&lt;-</span> <span class="fu"><a href="reference/parseOnSIDES.html">parseOnSIDES</a></span><span class="op">(</span><span class="st">"onsides_v2.0.0/"</span><span class="op">)</span></span>
-<span></span>
-<span><span class="co"># Chain merge operations for integrated analysis</span></span>
-<span><span class="va">merged_db</span> <span class="op">&lt;-</span> <span class="va">drugbank_db</span> <span class="op"><a href="https://magrittr.tidyverse.org/reference/pipe.html" class="external-link">%&gt;%</a></span></span>
-<span>  <span class="fu"><a href="reference/merge_drugbank_onsides.html">merge_drugbank_onsides</a></span><span class="op">(</span><span class="va">onsides_db</span><span class="op">)</span></span>
-<span></span>
-<span><span class="co"># Identify anticoagulant drugs via therapeutic category</span></span>
-<span><span class="va">anticoagulant_ids</span> <span class="op">&lt;-</span> <span class="va">merged_db</span><span class="op">$</span><span class="va">drugbank</span><span class="op">$</span><span class="va">drugs</span><span class="op">$</span><span class="va">categories</span> <span class="op"><a href="https://magrittr.tidyverse.org/reference/pipe.html" class="external-link">%&gt;%</a></span></span>
-<span>  <span class="fu"><a href="https://dplyr.tidyverse.org/reference/filter.html" class="external-link">filter</a></span><span class="op">(</span><span class="va">category</span> <span class="op">==</span> <span class="st">"Anticoagulants"</span><span class="op">)</span> <span class="op"><a href="https://magrittr.tidyverse.org/reference/pipe.html" class="external-link">%&gt;%</a></span></span>
-<span>  <span class="fu"><a href="https://dplyr.tidyverse.org/reference/pull.html" class="external-link">pull</a></span><span class="op">(</span><span class="va">drugbank_id</span><span class="op">)</span></span>
-<span></span>
-<span><span class="co"># Analyze side effect frequencies from integrated data</span></span>
-<span><span class="va">side_effects</span> <span class="op">&lt;-</span> <span class="va">merged_db</span><span class="op">$</span><span class="va">integrated_data</span><span class="op">$</span><span class="va">drugbank_onsides</span> <span class="op"><a href="https://magrittr.tidyverse.org/reference/pipe.html" class="external-link">%&gt;%</a></span></span>
-<span>  <span class="fu"><a href="https://dplyr.tidyverse.org/reference/filter.html" class="external-link">filter</a></span><span class="op">(</span><span class="va">drugbank_id</span> <span class="op"><a href="https://rdrr.io/r/base/match.html" class="external-link">%in%</a></span> <span class="va">anticoagulant_ids</span><span class="op">)</span> <span class="op"><a href="https://magrittr.tidyverse.org/reference/pipe.html" class="external-link">%&gt;%</a></span></span>
-<span>  <span class="fu"><a href="https://dplyr.tidyverse.org/reference/count.html" class="external-link">count</a></span><span class="op">(</span><span class="va">meddra_name</span>, sort <span class="op">=</span> <span class="cn">TRUE</span><span class="op">)</span></span>
-<span></span>
-<span><span class="fu"><a href="https://rdrr.io/r/utils/head.html" class="external-link">head</a></span><span class="op">(</span><span class="va">side_effects</span>, <span class="fl">5</span><span class="op">)</span></span>
-<span><span class="co">#&gt;            meddra_name frequency</span></span>
-<span><span class="co">#&gt; 1          Haemorrhage       847</span></span>
-<span><span class="co">#&gt; 2             Anaemia       623</span></span>
-<span><span class="co">#&gt; 3   Thrombocytopenia       412</span></span>
-<span><span class="co">#&gt; 4          Ecchymosis       389</span></span>
-<span><span class="co">#&gt; 5           Epistaxis       356</span></span></code></pre></div>
-<p>This analysis validates against known clinical findings—hemorrhagic events represent the primary safety concern for anticoagulant therapy [@garcia2012anticoagulant]. The integrated database enables researchers to immediately cross-reference these findings with mechanistic target information from DrugBank or examine potential interaction effects from TWOSIDES.</p>
-</div>
-</div>
-<div class="section level1">
-<h1 id="ai-usage-disclosure">AI Usage Disclosure<a class="anchor" aria-label="anchor" href="#ai-usage-disclosure"></a></h1>
-<p>Generative AI tools (Claude, Anthropic) were used to assist with drafting portions of this manuscript, including reformatting bibliographic entries and suggesting organizational structure. All AI-generated content was thoroughly reviewed, verified for accuracy, and substantially edited by the authors. The core <code>dbparser</code> software implementation, architectural decisions, and research contributions represent original human intellectual work developed over six years (2018-2024) prior to the widespread availability of modern generative AI coding assistants. Initial development and the majority of the codebase predate AI-assisted programming tools.</p>
-</div>
-<div class="section level1">
-<h1 id="availability">Availability<a class="anchor" aria-label="anchor" href="#availability"></a></h1>
-<p><code>dbparser</code> is available from CRAN (<code>install.packages("dbparser")</code>) and the development version is hosted on GitHub (<a href="https://github.com/ropensci/dbparser" class="external-link uri">https://github.com/ropensci/dbparser</a>). Comprehensive documentation is available at <a href="https://docs.ropensci.org/dbparser/" class="uri">https://docs.ropensci.org/dbparser/</a>. The package is released under the MIT license. As an rOpenSci package, it adheres to a strict code of conduct. Community contributions, bug reports, and feature requests are welcomed through the GitHub issue tracker (<a href="https://github.com/ropensci/dbparser/issues" class="external-link uri">https://github.com/ropensci/dbparser/issues</a>).</p>
-</div>
-<div class="section level1">
-<h1 id="acknowledgements">Acknowledgements<a class="anchor" aria-label="anchor" href="#acknowledgements"></a></h1>
-<p>We gratefully acknowledge the creators and maintainers of DrugBank, OnSIDES, TWOSIDES, SIDER, and OFFSIDES for making their invaluable data resources publicly available to the research community. We thank the rOpenSci community and peer reviewers Hao Zhu and Emma Mendelsohn for their constructive feedback during the software review process (ropensci/software-review#347) that substantially improved the package’s quality, documentation, and API design. Special thanks to the Tatonetti Lab at Columbia University (now Cedars-Sinai) for developing and maintaining the OnSIDES, TWOSIDES, and OFFSIDES resources. We acknowledge all contributors to the dbparser codebase and the users who have provided feedback, bug reports, and feature suggestions over the past six years.</p>
-</div>
-<div class="section level1">
-<h1 id="references">References<a class="anchor" aria-label="anchor" href="#references"></a></h1>
-</div>
-
-
-  </main><aside class="col-md-3"><nav id="toc" aria-label="Table of contents"><h2>On this page</h2>
-    </nav></aside></div>
-
-
-    <footer><div class="pkgdown-footer-left">
-  <p>Developed by <a href="http://mohammedfcis.github.io" class="external-link">Mohammed Ali</a>, Ali Ezzat.</p>
-</div>
-
-<div class="pkgdown-footer-right">
-  <p>Site built with <a href="https://pkgdown.r-lib.org/" class="external-link">pkgdown</a> 2.2.0.</p>
-</div>
-
-    </footer></div>
-
-
-
-
-
-  </body></html>
-
diff --git a/docs/paper.md b/docs/paper.md
deleted file mode 100644
index fb25f7bc..00000000
--- a/docs/paper.md
+++ /dev/null
@@ -1,356 +0,0 @@
-# Summary
-
-`dbparser` is an rOpenSci peer-reviewed R package that provides a
-unified framework for parsing and integrating major pharmacological and
-pharmacovigilance databases into standardized, analysis-ready R objects.
-The package supports three essential drug information resources:
-DrugBank \[@wishart2018drugbank\], OnSIDES \[@galeano2022onsides\] and
-TWOSIDES \[@tatonetti2012data\]. Each database is parsed into a
-consistent nested list structure called a `dvobject`, which preserves
-complex relational hierarchies while enabling seamless cross-database
-integration. By providing high-performance parsing functions, chainable
-merge operations, and comprehensive metadata tracking, `dbparser`
-eliminates a significant bottleneck in computational pharmacology
-research and enables reproducible, large-scale drug safety analyses.
-
-# Statement of Need
-
-Pharmacological research increasingly relies on integrating
-heterogeneous data sources to understand drug mechanisms, predict
-adverse effects, and identify drug-drug interactions. Resources such as
-DrugBank (comprehensive drug and target information), OnSIDES (machine
-learning-derived side effect predictions), and TWOSIDES (drug-drug
-interaction effects) represent invaluable repositories of
-pharmacological knowledge. However, accessing and integrating these
-databases presents substantial technical challenges.
-
-Each database employs distinct file formats and structural conventions:
-DrugBank distributes data as deeply nested XML with complex entity
-relationships; OnSIDES provides multiple relational CSV files requiring
-careful joining; TWOSIDES offers compressed flat files with different
-identifier systems. Researchers typically address these inconsistencies
-by developing ad-hoc parsing scripts—an approach that is time-consuming,
-error-prone, and harmful to reproducibility. Studies suggest that data
-preprocessing often consumes 60-80% of total analysis time in
-pharmacoinformatics workflows \[@wickham2014tidy\].
-
-The R ecosystem, despite its strength in statistical analysis and
-visualization, lacks dedicated tools for pharmacological database
-integration. While Bioconductor \[@gentleman2004bioconductor\] provides
-excellent infrastructure for genomics data, no equivalent standardized
-framework exists for drug databases. `dbparser` addresses this gap by
-providing unified parsing functions, chainable integration workflows,
-rich metadata preservation, and high-performance implementations that
-transform weeks of custom development into minutes of reproducible
-analysis.
-
-# Software Design
-
-## Design Philosophy and Trade-offs
-
-`dbparser`’s architecture reflects three core design decisions that
-emerged from extensive experience with pharmacological data analysis
-workflows:
-
-**Unified `dvobject` Structure vs. Database-Specific Formats:** We chose
-to transform all databases into a consistent nested list structure
-rather than preserving native formats. This decision trades some
-format-specific optimization for dramatically improved interoperability.
-The `dvobject` maintains the relational structure of each source
-database while providing consistent access patterns, enabling users to
-apply identical analysis code across different data sources. Each
-`dvobject` contains three components: (1) tidy data tables compatible
-with the tidyverse ecosystem \[@wickham2019welcome\], (2) comprehensive
-metadata (version, parse timestamp, schema information), and (3)
-relationship mappings documenting cross-table linkages.
-
-**Hub-and-Spoke Integration Model:** Rather than attempting all-to-all
-database linking, we implemented DrugBank as the central integration
-hub. This reflects DrugBank’s comprehensive identifier mappings (RxCUI,
-PubChem, ChEMBL, KEGG) and its established role as a reference resource.
-The trade-off—requiring DrugBank for multi-database analyses—is
-justified by the substantial reduction in identifier reconciliation
-complexity and the improved reliability of cross-database joins.
-
-**Chainable Merge Operations:** Integration functions are designed for
-pipeline composition using the magrittr pipe operator, enabling
-workflows like
-`drugbank_db %>% merge_drugbank_onsides(onsides_db) %>% merge_drugbank_twosides(twosides_db)`.
-This design prioritizes readability and reproducibility over marginal
-performance gains from monolithic merge operations.
-
-## Build vs. Contribute Justification
-
-We evaluated contributing to existing projects before creating
-`dbparser`. The primary alternatives were:
-
-- **Bioconductor’s AnnotationHub**: Focused on genomic annotations
-  rather than drug databases; its infrastructure assumes different data
-  models than pharmacological resources require.
-- **drugbank R package (archived)**: Provided only DrugBank parsing
-  without integration capabilities; was unmaintained and lacked modern
-  software quality standards.
-- **Python alternatives** (e.g., `drugbank-downloader`, `pyDrugBank`):
-  Language-specific and database-specific without cross-database
-  integration frameworks.
-
-None provided the unified, multi-database integration framework that
-pharmacovigilance research requires. Rather than forcing pharmacological
-data into genomics-oriented infrastructure, we created purpose-built
-tooling that respects the unique characteristics of drug databases while
-adhering to rOpenSci’s rigorous software quality standards.
-
-## Validation Through Ecosystem Development
-
-The extensibility of `dbparser`’s architecture has been validated
-through the development of two downstream packages that build upon its
-infrastructure:
-
-**dbdataset** \[@dbdataset\]: Provides pre-parsed DrugBank datasets in
-ready-to-use R dataframe format, eliminating the need for users to
-download and parse large XML files. This package leverages `dbparser`’s
-parsing functions to create versioned, reproducible datasets for machine
-learning and exploratory analysis.
-
-**covid19dbcand** \[@covid19dbcand\]: Delivers curated COVID-19 drug
-candidate datasets extracted from DrugBank during the pandemic response.
-This package demonstrated `dbparser`’s value for rapid response
-research, enabling researchers to quickly access potential therapeutic
-candidates without time-consuming data extraction.
-
-These downstream packages demonstrate that `dbparser`’s `dvobject`
-structure and parsing functions provide a stable foundation for building
-domain-specific data products—a key indicator of successful research
-software design.
-
-# Research Impact Statement
-
-## Demonstrated Community Adoption and Recognition
-
-`dbparser` has established itself as essential infrastructure for the R
-pharmacoinformatics community since its initial release in 2019:
-
-**Download Metrics:** Over 50,000 cumulative downloads from CRAN with
-sustained adoption of approximately 780 downloads per month,
-demonstrating consistent growth over six years. Download trends show
-strong retention and expanding user base across multiple continents.
-
-**Community Recognition:** Featured in the CRAN Epidemiology Task View,
-indicating recognition by domain experts as essential infrastructure for
-epidemiological and pharmacovigilance research. This curated list
-represents packages deemed essential for applied statistical work in
-epidemiology, signaling the package’s established role in the field.
-
-**Code Quality and Review:** Achieves 98% test coverage and has earned
-OpenSSF Best Practices passing badge, placing it in the top tier of R
-research software. Successfully completed rigorous rOpenSci software
-peer review (Issue \#347, February 2020), with reviewers Hao Zhu and
-Emma Mendelsohn providing substantial feedback that improved API design,
-error handling, and documentation comprehensiveness.
-
-## Development History and Collaborative Engagement
-
-The package demonstrates sustained, collaborative development
-characteristic of meaningful research software:
-
-- **Timeline**: 6+ years of active development (first commit: September
-  29, 2018; first CRAN release: January 2019)
-- **Commits**: 614 commits demonstrating iterative refinement and
-  continuous improvement
-- **Contributors**: 7 contributors spanning multiple institutions and
-  career stages
-- **User Diversity**: Actively used by researchers ranging from Master’s
-  students to NIH scientists across multiple countries
-- **Issue Resolution**: Responsive maintenance with active engagement on
-  GitHub issues from users with diverse scientific backgrounds
-  (academia, government, industry)
-- **Maintenance**: Regular releases following semantic versioning
-  (currently version 2.2.1, published January 8, 2026)
-
-## Published Research Applications
-
-`dbparser` has enabled peer-reviewed research across multiple
-high-impact domains, demonstrating substantial realized impact:
-
-**Drug Repurposing Studies:** - Parolo et al. (2023) used `dbparser` in
-*Nature Scientific Reports* for single-cell-led drug repurposing in
-Alzheimer’s disease research \[@parolo2023single\] - Pérez-Moraga et
-al. (2021) employed the package in *Pharmaceutics* for COVID-19 drug
-repurposing using topological data analysis \[@perez2021covid\] -
-Schubert et al. (2022) applied `dbparser` in *Biomolecules* for
-transcriptome-guided identification of drugs for age-related hearing
-loss \[@schubert2022transcriptome\]
-
-**Systems Biology and Network Analysis:** - Mercatelli et al. (2022)
-integrated `dbparser` into the SURFACER workflow published in *Briefings
-in Bioinformatics* (Oxford Academic) for pan-cancer surface protein
-biomarker detection \[@mercatelli2022detection\] - Yang et al. (2021)
-utilized the package in research published in *Pharmacological Research*
-for mapping synthetic lethal interactions in liver cancer
-\[@yang2021mapping\] - Su et al. (2024) incorporated `dbparser` in
-multi-ancestry proteome-phenome-wide Mendelian randomization analysis on
-*medRxiv* \[@su2024multiancestry\]
-
-**Clinical and Epidemiological Research:** - Rischke et al. (2023)
-employed `dbparser` in *Nature Scientific Reports* for machine learning
-identification of psoriatic arthritis activity signals
-\[@rischke2023machine\] - Namiot et al. (2023) used the package in
-*Frontiers in Pharmacology* for analyzing trends in clinical trials from
-the International Clinical Trials Registry Platform
-\[@namiot2023international\]
-
-**Software Integration and Ecosystem Development:** - Hammoud & Kramer
-(2020) integrated `dbparser` into the Multipath package published in
-*Biology (MDPI)* for generating reproducible pathway models
-\[@hammoud2020multipath\] - Hammoud et al. (2025) extended this
-integration in Multipath 2.0 published in *Computer Methods and Programs
-in Biomedicine (Elsevier)* \[@hammoud2025multipath2\]
-
-This body of work—spanning Nature publications, Oxford Academic
-journals, and domain-specific outlets—demonstrates that `dbparser` is
-actively enabling cutting-edge research in drug discovery, systems
-pharmacology, machine learning applications, and clinical epidemiology.
-
-## Impact Beyond Citations
-
-The package lowers technical barriers to multi-database pharmacology
-research, transforming weeks of custom parsing code into minutes of
-standardized workflow. This democratization of access particularly
-benefits:
-
-- **Early-career researchers** who lack extensive bioinformatics
-  infrastructure
-- **Interdisciplinary teams** requiring reproducible data pipelines
-- **Resource-limited institutions** without dedicated computational
-  support
-- **Educational contexts** where students learn computational
-  pharmacology
-
-The integration of DrugBank with modern pharmacovigilance databases
-(OnSIDES, TWOSIDES) enables analyses that were previously technically
-prohibitive, accelerating the pace of drug safety research and
-repurposing studies.
-
-## Downstream Package Ecosystem
-
-The robustness of `dbparser`’s design is evidenced by its use as
-foundational infrastructure for additional R packages:
-
-- **dbdataset**: Provides pre-parsed DrugBank datasets in
-  ready-to-analyze format, built entirely on `dbparser`’s parsing
-  infrastructure. With 16 GitHub stars and active maintenance, it serves
-  researchers who need immediate access to DrugBank data without local
-  parsing.
-
-- **covid19dbcand**: Created in response to the COVID-19 pandemic, this
-  package delivered curated drug candidate datasets for therapeutic
-  research. It demonstrated `dbparser`’s capability to support
-  rapid-response research during public health emergencies, with data
-  extracted using `dbparser` version 1.2.0.
-
-Both packages maintain their own development histories, documentation,
-and user bases while relying on `dbparser` as stable infrastructure—the
-hallmark of sustainable research software that enables further
-innovation.
-
-# Functionality
-
-## Core Parsing Architecture
-
-`dbparser` provides dedicated parsing functions for each supported
-database:
-
-| Function | Database | Input Format | Key Content |
-|----|----|----|----|
-| [`parseDrugBank()`](https://docs.ropensci.org/dbparser/reference/parseDrugBank.md) | DrugBank | XML | Drug properties, targets, pathways, interactions |
-| [`parseOnSIDES()`](https://docs.ropensci.org/dbparser/reference/parseOnSIDES.md) | OnSIDES | Relational CSVs | ML-derived side effects with confidence scores |
-| [`parseTWOSIDES()`](https://docs.ropensci.org/dbparser/reference/parseTWOSIDES.md) | TWOSIDES | Compressed CSV | Drug-drug interaction adverse events |
-
-Performance is achieved through streaming XML parsing via `xml2`
-\[@wickham2023xml2\] and high-speed CSV parsing via
-[`data.table::fread()`](https://rdatatable.gitlab.io/data.table/reference/fread.html)
-\[@dowle2023datatable\]. Typical parsing times on commodity hardware
-(8-core CPU, 16GB RAM): DrugBank full XML (~2.5GB) completes in
-approximately 3-5 minutes; OnSIDES (~500MB total) parses in under 30
-seconds; TWOSIDES (~1.2GB) completes in approximately 1 minute.
-
-## Example Workflow: Anticoagulant Side Effect Analysis
-
-``` r
-library(dbparser)
-library(dplyr)
-
-# Parse and integrate databases
-drugbank_db <- parseDrugBank("drugbank_all_full_database.xml")
-onsides_db <- parseOnSIDES("onsides_v2.0.0/")
-
-# Chain merge operations for integrated analysis
-merged_db <- drugbank_db %>%
-  merge_drugbank_onsides(onsides_db)
-
-# Identify anticoagulant drugs via therapeutic category
-anticoagulant_ids <- merged_db$drugbank$drugs$categories %>%
-  filter(category == "Anticoagulants") %>%
-  pull(drugbank_id)
-
-# Analyze side effect frequencies from integrated data
-side_effects <- merged_db$integrated_data$drugbank_onsides %>%
-  filter(drugbank_id %in% anticoagulant_ids) %>%
-  count(meddra_name, sort = TRUE)
-
-head(side_effects, 5)
-#>            meddra_name frequency
-#> 1          Haemorrhage       847
-#> 2             Anaemia       623
-#> 3   Thrombocytopenia       412
-#> 4          Ecchymosis       389
-#> 5           Epistaxis       356
-```
-
-This analysis validates against known clinical findings—hemorrhagic
-events represent the primary safety concern for anticoagulant therapy
-\[@garcia2012anticoagulant\]. The integrated database enables
-researchers to immediately cross-reference these findings with
-mechanistic target information from DrugBank or examine potential
-interaction effects from TWOSIDES.
-
-# AI Usage Disclosure
-
-Generative AI tools (Claude, Anthropic) were used to assist with
-drafting portions of this manuscript, including reformatting
-bibliographic entries and suggesting organizational structure. All
-AI-generated content was thoroughly reviewed, verified for accuracy, and
-substantially edited by the authors. The core `dbparser` software
-implementation, architectural decisions, and research contributions
-represent original human intellectual work developed over six years
-(2018-2024) prior to the widespread availability of modern generative AI
-coding assistants. Initial development and the majority of the codebase
-predate AI-assisted programming tools.
-
-# Availability
-
-`dbparser` is available from CRAN (`install.packages("dbparser")`) and
-the development version is hosted on GitHub
-(<https://github.com/ropensci/dbparser>). Comprehensive documentation is
-available at <https://docs.ropensci.org/dbparser/>. The package is
-released under the MIT license. As an rOpenSci package, it adheres to a
-strict code of conduct. Community contributions, bug reports, and
-feature requests are welcomed through the GitHub issue tracker
-(<https://github.com/ropensci/dbparser/issues>).
-
-# Acknowledgements
-
-We gratefully acknowledge the creators and maintainers of DrugBank,
-OnSIDES, TWOSIDES, SIDER, and OFFSIDES for making their invaluable data
-resources publicly available to the research community. We thank the
-rOpenSci community and peer reviewers Hao Zhu and Emma Mendelsohn for
-their constructive feedback during the software review process
-(ropensci/software-review#347) that substantially improved the package’s
-quality, documentation, and API design. Special thanks to the Tatonetti
-Lab at Columbia University (now Cedars-Sinai) for developing and
-maintaining the OnSIDES, TWOSIDES, and OFFSIDES resources. We
-acknowledge all contributors to the dbparser codebase and the users who
-have provided feedback, bug reports, and feature suggestions over the
-past six years.
-
-# References
diff --git a/paper.bib b/paper.bib
index c6d70853..8ef3b390 100644
--- a/paper.bib
+++ b/paper.bib
@@ -29,7 +29,7 @@ @article{galeano2022onsides
   journal={medRxiv},
   year={2022},
   publisher={Cold Spring Harbor Laboratory Press},
-  doi={10.1101/2022.05.18.22275166}
+  doi={10.1101/2024.03.22.24304724}
 }
 
 @article{kuhn2016sider,
@@ -86,7 +86,7 @@ @article{parolo2023single
   pages={8497},
   year={2023},
   publisher={Nature Publishing Group},
-  doi={10.1038/s41598-023-35621-w}
+  doi={10.1038/s41598-023-27420-x}
 }
 
 @article{perez2021covid,
@@ -142,7 +142,7 @@ @article{su2024multiancestry
   journal={medRxiv},
   year={2024},
   publisher={Cold Spring Harbor Laboratory Press},
-  doi={10.1101/2024.01.15.24301159}
+  doi={10.1101/2024.10.17.24315553}
 }
 
 @article{rischke2023machine,
@@ -153,7 +153,7 @@ @article{rischke2023machine
   pages={10965},
   year={2023},
   publisher={Nature Publishing Group},
-  doi={10.1038/s41598-023-37911-w}
+  doi={10.1038/s41598-023-49574-4}
 }
 
 @article{namiot2023international,
@@ -164,7 +164,7 @@ @article{namiot2023international
   pages={1106591},
   year={2023},
   publisher={Frontiers Media SA},
-  doi={10.3389/fphar.2023.1106591}
+  doi={10.3389/fphar.2023.1228148}
 }
 
 @article{hammoud2020multipath,

Function	Database	Input Format	Key Content
`parseDrugBank()`	DrugBank	XML	Drug properties, targets, pathways, interactions
`parseOnSIDES()`	OnSIDES	Relational CSVs	ML-derived side effects with confidence scores
`parseTWOSIDES()`	TWOSIDES	Compressed CSV	Drug-drug interaction adverse events