From 2c0271c5c566aa981667b7f5f6ab2ba730b3c3ab Mon Sep 17 00:00:00 2001 From: Ivan K Date: Sat, 17 Jan 2026 19:16:43 +0300 Subject: [PATCH 1/3] Use xml2 to manipulate Rd2HTML output --- site/generate_reference.R | 103 +++++++++++++++++++++++--------------- 1 file changed, 62 insertions(+), 41 deletions(-) diff --git a/site/generate_reference.R b/site/generate_reference.R index 8a070a741..4f3ed2588 100644 --- a/site/generate_reference.R +++ b/site/generate_reference.R @@ -2,6 +2,7 @@ # Generate individual reference pages from .Rd files library(tools) +library(xml2) # Create reference directory ref_dir = "site/reference" @@ -35,6 +36,7 @@ get_rd_info = function(rd_file) { # Generate HTML for each .Rd file cat("Generating individual reference pages...\n") all_info = list() +html_meta = xfun::yaml_load(readLines('site/_litedown.yml'))$output$html$meta for (rd_file in rd_files) { info = get_rd_info(rd_file) @@ -44,47 +46,66 @@ for (rd_file in rd_files) { temp_html = tempfile(fileext = ".html") Rd2HTML(rd_file, out = temp_html, package = "data.table") - - html_content = readLines(temp_html, warn = FALSE) - navbar_html = readLines("site/_navbar.html", warn = FALSE) - navbar_html = gsub('href="index.html"', 'href="../index.html"', navbar_html) - navbar_html = gsub('href="news.html"', 'href="../news.html"', navbar_html) - navbar_html = gsub('href="manual.html"', 'href="../manual.html"', navbar_html) - navbar_html = gsub('href="articles/', 'href="../articles/', navbar_html) - navbar_html = gsub('href="assets/', 'href="../assets/', navbar_html) - - wrapped_html = c( - '', - '', - '', - '', - '', - '', - sprintf('%s — %s • data.table', paste(info$aliases, collapse = ", "), info$title), - '', - '', - '', - '', - '', - '', - '', - navbar_html, - '
', - html_content, - '
', - '', - '', - '' - ) - - writeLines(wrapped_html, out_file) + html_root = read_html(temp_html) + + if (!is.null(html_meta$lang)) + xml_attr(html_root, 'lang') = html_meta$lang + + html_title = xml_find_first(html_root, '/html/head/title') + xml_text(html_title) <- sprintf('%s — %s • data.table', paste(info$aliases, collapse = ", "), info$title) + + html_head = xml_find_first(html_root, '//head') + + if (!is.null(add_head <- html_meta$header_includes)) { + add_head = xml_find_first(read_html(paste0('', add_head, '')), '//head') + for (tag in xml_children(add_head)) + xml_add_child(html_head, tag) + } + + for (css in html_meta$css2) { + link = xml_add_child(html_head, "link") + xml_set_attrs(link, list( + rel = "stylesheet", + href = css + )) + } + + for (js in html_meta$js2) { + script = xml_add_child(html_head, "script") + xml_set_attr(script, 'src', js) + } + + body = xml_find_first(html_root, '//body') + new_body = xml_add_child(html_root, 'body') + if (!is.null(include <- html_meta$include_before)) { + include = read_html(file.path('site', include)) + include = xml_find_first(include, '//body') + for (node in xml_children(include)) + xml_add_child(new_body, node) + } + + new_body_div = xml_add_child(new_body, 'div') + xml_set_attr(new_body_div, 'class', 'body') + for (node in xml_children(body)) + xml_add_child(new_body_div, node) + + if (!is.null(include <- html_meta$include_after)) { + include = read_html(include) + include = xml_find_first(include, '//body') + for (node in xml_children(include)) + xml_add_child(new_body, node) + } + + xml_remove(body) + + for (link in xml_find_all(html_root, '//a|//link')) { + href = xml_attr(link, 'href') + if (!is.na(href) && !grepl('^https?://', href)) { + xml_attr(link, 'href') <- paste0('../', href) + } + } + + write_html(html_root, out_file) unlink(temp_html) cat(sprintf(" Generated %s\n", out_file)) From cdfad1f28629aa9e6d06e8b19044334b427adc18 Mon Sep 17 00:00:00 2001 From: Ivan K Date: Sat, 17 Jan 2026 19:33:05 +0300 Subject: [PATCH 2/3] Use all_info to resolve links between help pages --- site/generate_reference.R | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/site/generate_reference.R b/site/generate_reference.R index 4f3ed2588..9a2e88af1 100644 --- a/site/generate_reference.R +++ b/site/generate_reference.R @@ -35,17 +35,20 @@ get_rd_info = function(rd_file) { # Generate HTML for each .Rd file cat("Generating individual reference pages...\n") -all_info = list() + +all_info = lapply(setNames(nm = rd_files), get_rd_info) +topic_links = character() +for (topic in all_info) topic_links[topic$aliases] = paste0('reference/', topic$name, '.html') + html_meta = xfun::yaml_load(readLines('site/_litedown.yml'))$output$html$meta for (rd_file in rd_files) { - info = get_rd_info(rd_file) - all_info[[length(all_info) + 1]] <- info + info = all_info[[rd_file]] out_file = file.path(ref_dir, paste0(info$name, ".html")) temp_html = tempfile(fileext = ".html") - Rd2HTML(rd_file, out = temp_html, package = "data.table") + Rd2HTML(rd_file, out = temp_html, package = "data.table", Links = topic_links, Links2 = character()) html_root = read_html(temp_html) if (!is.null(html_meta$lang)) From 9f3efe257db9c6a6c24052460c807b2338bdde45 Mon Sep 17 00:00:00 2001 From: Ivan K Date: Sat, 17 Jan 2026 21:30:47 +0300 Subject: [PATCH 3/3] Scrub links to external help pages --- site/generate_reference.R | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/site/generate_reference.R b/site/generate_reference.R index 9a2e88af1..5b90c41fa 100644 --- a/site/generate_reference.R +++ b/site/generate_reference.R @@ -103,9 +103,11 @@ for (rd_file in rd_files) { for (link in xml_find_all(html_root, '//a|//link')) { href = xml_attr(link, 'href') - if (!is.na(href) && !grepl('^https?://', href)) { - xml_attr(link, 'href') <- paste0('../', href) - } + if (!is.na(href)) + if (startsWith(href, '../../')) # \link[package]{topic} + xml_attr(link, 'href') <- NULL + else if (!grepl('^https?://', href)) + xml_attr(link, 'href') <- paste0('../', href) } write_html(html_root, out_file)