From 13a190ce6be4cbb80fc5ca3f7a15bcdf3d39589d Mon Sep 17 00:00:00 2001 From: TroyHernandez Date: Tue, 3 Mar 2026 20:14:11 -0600 Subject: [PATCH 1/5] Add join_else option, fix else_same_line docs join_else (default TRUE) is a new AST-level transform that moves else to the same line as the preceding }. This is the actual style enforcement that else_same_line was incorrectly documented as doing. else_same_line docs now correctly describe it as text-level parse repair only: it joins top-level }\nelse (a parse error in R) so the code can be formatted. The transform respects line_limit (skips joining if the combined line would be too long) and preserves comments between } and else. 126-package stress test: 0 FAIL, 0 IDEMP with randomized join_else. --- R/RcppExports.R | 14 +++----- R/ast_else.R | 63 ++++++++++++++++++++++++++++++++++++ R/ast_pipeline.R | 7 +++- R/format_tokens.R | 7 ++-- R/rformat.R | 33 +++++++++++++------ inst/tinytest/test_rformat.R | 29 ++++++++++++++++- man/format_pipeline.Rd | 4 ++- man/format_tokens.Rd | 4 ++- man/join_else_transform.Rd | 19 +++++++++++ man/rformat.Rd | 11 +++++-- man/rformat_dir.Rd | 8 +++-- man/rformat_file.Rd | 8 +++-- src/RcppExports.cpp | 18 ++++++----- src/else.cpp | 52 +++++++++++++++++++++++++++++ src/format_pipeline.cpp | 12 +++++-- src/token.h | 3 ++ vignettes/r-core-style.md | 4 +-- vignettes/rformat-options.md | 39 ++++++++++++++++++---- 18 files changed, 283 insertions(+), 52 deletions(-) create mode 100644 R/ast_else.R create mode 100644 man/join_else_transform.Rd create mode 100644 src/else.cpp diff --git a/R/RcppExports.R b/R/RcppExports.R index b003c86..da32e71 100644 --- a/R/RcppExports.R +++ b/R/RcppExports.R @@ -1,18 +1,12 @@ # Generated by using Rcpp::compileAttributes() -> do not edit by hand # Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393 -cpp_format_pipeline <- function(code, indent_str, wrap, expand_if, - brace_style, line_limit, function_space, - control_braces) { - .Call(`_rformat_cpp_format_pipeline`, code, indent_str, wrap, - expand_if, brace_style, line_limit, function_space, - control_braces) +cpp_format_pipeline <- function(code, indent_str, wrap, expand_if, brace_style, line_limit, function_space, control_braces, join_else) { + .Call(`_rformat_cpp_format_pipeline`, code, indent_str, wrap, expand_if, brace_style, line_limit, function_space, control_braces, join_else) } -cpp_format_all <- function(code, indent_str, wrap, expand_if, brace_style, - line_limit, function_space, control_braces) { - .Call(`_rformat_cpp_format_all`, code, indent_str, wrap, expand_if, - brace_style, line_limit, function_space, control_braces) +cpp_format_all <- function(code, indent_str, wrap, expand_if, brace_style, line_limit, function_space, control_braces, join_else) { + .Call(`_rformat_cpp_format_all`, code, indent_str, wrap, expand_if, brace_style, line_limit, function_space, control_braces, join_else) } cpp_set_trace <- function(enable) { diff --git a/R/ast_else.R b/R/ast_else.R new file mode 100644 index 0000000..5cfe499 --- /dev/null +++ b/R/ast_else.R @@ -0,0 +1,63 @@ +#' Join Else to Preceding Close Brace +#' +#' AST transform that moves ELSE tokens (and any following tokens on the +#' same line, like `if` in `else if`) to the same output line as the +#' preceding `}`. Skips if a COMMENT exists between `}` and `else`, +#' or if joining would exceed the line limit. +#' +#' @param terms Enriched terminal DataFrame. +#' @param indent_str Indent string for line width calculation. +#' @param line_limit Maximum line width. +#' @return Updated DataFrame. +#' @keywords internal +join_else_transform <- function(terms, indent_str, line_limit) { + terms <- terms[order(terms$out_line, terms$out_order),] + terms$out_order <- seq_len(nrow(terms)) + n <- nrow(terms) + if (n < 2L) return(terms) + + # Find all ELSE tokens + else_idx <- which(terms$token == "ELSE") + if (length(else_idx) == 0L) return(terms) + + for (ei in else_idx) { + else_line <- terms$out_line[ei] + + # Walk backwards to find preceding non-comment token + rbrace_idx <- NA_integer_ + has_comment <- FALSE + j <- ei - 1L + while (j >= 1L) { + if (terms$token[j] == "COMMENT") { + has_comment <- TRUE + j <- j - 1L + next + } + if (terms$token[j] == "'}'") { + rbrace_idx <- j + } + break + } + + # Skip if no preceding }, or comment between } and else + if (is.na(rbrace_idx) || has_comment) next + + rbrace_line <- terms$out_line[rbrace_idx] + if (rbrace_line == else_line) next # already on same line + + # Check if joining would exceed line_limit + rbrace_width <- ast_line_width(terms, rbrace_line, indent_str) + else_width <- ast_line_width(terms, else_line, indent_str) + if (rbrace_width + 1L + else_width > line_limit) next + + # Move ELSE and all following tokens on the same line to the } line + # This handles "else if (y) {" — moving else, if, (, y, ), { together + k <- ei + while (k <= n && terms$out_line[k] == else_line) { + terms$out_line[k] <- rbrace_line + k <- k + 1L + } + } + + terms +} diff --git a/R/ast_pipeline.R b/R/ast_pipeline.R index 5fd212e..d77778c 100644 --- a/R/ast_pipeline.R +++ b/R/ast_pipeline.R @@ -11,11 +11,12 @@ #' @param line_limit Maximum line length. #' @param function_space Add space after `function`. #' @param control_braces Control brace mode. +#' @param join_else If TRUE, move else to same line as preceding `}`. #' @return Formatted code string. #' @keywords internal format_pipeline <- function(code, indent, wrap, expand_if, brace_style, line_limit, function_space = FALSE, - control_braces = FALSE) { + control_braces = FALSE, join_else = TRUE) { if (is.numeric(indent)) { indent_str <- strrep(" ", indent) } else { @@ -90,6 +91,10 @@ format_pipeline <- function(code, indent, wrap, expand_if, brace_style, terms <- recompute_nesting(terms) } + if (isTRUE(join_else)) { + terms <- join_else_transform(terms, indent_str, line_limit) + } + serialize_tokens(terms, indent_str, wrap, line_limit) } diff --git a/R/format_tokens.R b/R/format_tokens.R index c9d9c56..25fc655 100644 --- a/R/format_tokens.R +++ b/R/format_tokens.R @@ -18,13 +18,14 @@ #' @param line_limit Maximum line length before wrapping (default 80). #' @param function_space If TRUE, add space before `(` in function definitions. #' @param control_braces If TRUE, add braces to bare one-line control flow bodies. +#' @param join_else If TRUE, move else to same line as preceding `}`. #' @return Formatted code as character string. #' @importFrom utils getParseData #' @keywords internal format_tokens <- function(code, indent = 4L, wrap = "paren", expand_if = FALSE, brace_style = "kr", line_limit = 80L, function_space = FALSE, - control_braces = FALSE) { + control_braces = FALSE, join_else = TRUE) { if (is.character(indent)) { indent_str <- indent } else { @@ -43,7 +44,7 @@ format_tokens <- function(code, indent = 4L, wrap = "paren", if (is.loaded("_rformat_cpp_format_all")) { return(cpp_format_all(code, indent_str, wrap, expand_if, brace_style, line_limit, function_space, - cb_str)) + cb_str, join_else)) } # Pure R fallback: split into top-level expressions, format each @@ -57,7 +58,7 @@ format_tokens <- function(code, indent = 4L, wrap = "paren", parts[i] <- format_pipeline(chunks[[i]]$text, indent_str, wrap, expand_if, brace_style, line_limit, function_space, - control_braces) + control_braces, join_else) } } result <- paste(parts, collapse = "\n") diff --git a/R/rformat.R b/R/rformat.R index 690558d..625291a 100644 --- a/R/rformat.R +++ b/R/rformat.R @@ -14,11 +14,15 @@ #' bodies (e.g., `if (x) y` becomes `if (x) { y }`). Default FALSE #' matches R Core source code where 59% of control flow bodies are bare. #' @param expand_if Expand inline if-else to multi-line (default FALSE). -#' @param else_same_line If TRUE (default), fix `}\\nelse` to `} else {` on -#' the same line. Matches R Core source code where 70% use same-line else. +#' @param else_same_line If TRUE (default), repair top-level `}\\nelse` +#' (which is a parse error in R) by joining to `} else` before formatting. +#' When FALSE, unparseable input is returned unchanged with a warning. #' @param function_space If TRUE, add space before `(` in function definitions: #' `function (x)` instead of `function(x)`. Default FALSE matches 96% of #' R Core source code. +#' @param join_else If TRUE (default), move `else` to the same line as the +#' preceding `}`: `} else {`. Matches R Core source code where 70% +#' use same-line else. When FALSE, `}\\nelse` on separate lines is preserved. #' @return Formatted code as a character string. #' @export #' @examples @@ -45,7 +49,7 @@ rformat <- function(code, indent = 4L, line_limit = 80L, wrap = "paren", brace_style = "kr", control_braces = FALSE, expand_if = FALSE, else_same_line = TRUE, - function_space = FALSE) { + function_space = FALSE, join_else = TRUE) { if (!is.character(code)) { stop("`code` must be a character string") } @@ -68,7 +72,8 @@ rformat <- function(code, indent = 4L, line_limit = 80L, wrap = "paren", brace_style = brace_style, line_limit = line_limit, function_space = function_space, - control_braces = control_braces) + control_braces = control_braces, + join_else = join_else) formatted <- format_blank_lines(formatted) # Fix } else if formatter broke valid input @@ -99,10 +104,13 @@ rformat <- function(code, indent = 4L, line_limit = 80L, wrap = "paren", #' @param control_braces If TRUE, add braces to bare one-line control flow #' bodies. Default FALSE matches R Core majority style. #' @param expand_if Expand inline if-else to multi-line (default FALSE). -#' @param else_same_line If TRUE (default), fix `}\\nelse` to `} else {`. +#' @param else_same_line If TRUE (default), repair top-level `}\\nelse` +#' (which is a parse error in R) by joining to `} else` before formatting. #' @param function_space If TRUE, add space before `(` in function definitions: #' `function (x)` instead of `function(x)`. Default FALSE matches 96% of #' R Core source code. +#' @param join_else If TRUE (default), move `else` to the same line as the +#' preceding `}`. #' @return Invisibly returns formatted code. #' @export #' @examples @@ -119,7 +127,7 @@ rformat_file <- function(path, output = NULL, dry_run = FALSE, indent = 4L, line_limit = 80L, wrap = "paren", brace_style = "kr", control_braces = FALSE, expand_if = FALSE, else_same_line = TRUE, - function_space = FALSE) { + function_space = FALSE, join_else = TRUE) { if (!file.exists(path)) { stop("File not found: ", path) } @@ -130,7 +138,8 @@ rformat_file <- function(path, output = NULL, dry_run = FALSE, indent = 4L, line_limit = line_limit, function_space = function_space, control_braces = control_braces, - else_same_line = else_same_line) + else_same_line = else_same_line, + join_else = join_else) if (!dry_run) { if (is.null(output)) { @@ -161,10 +170,13 @@ rformat_file <- function(path, output = NULL, dry_run = FALSE, indent = 4L, #' @param control_braces If TRUE, add braces to bare one-line control flow #' bodies. Default FALSE matches R Core majority style. #' @param expand_if Expand inline if-else to multi-line (default FALSE). -#' @param else_same_line If TRUE (default), fix `}\\nelse` to `} else {`. +#' @param else_same_line If TRUE (default), repair top-level `}\\nelse` +#' (which is a parse error in R) by joining to `} else` before formatting. #' @param function_space If TRUE, add space before `(` in function definitions: #' `function (x)` instead of `function(x)`. Default FALSE matches 96% of #' R Core source code. +#' @param join_else If TRUE (default), move `else` to the same line as the +#' preceding `}`. #' @return Invisibly returns vector of modified file paths. #' @export #' @examples @@ -181,7 +193,7 @@ rformat_dir <- function(path = ".", recursive = TRUE, dry_run = FALSE, indent = 4L, line_limit = 80L, wrap = "paren", brace_style = "kr", control_braces = FALSE, expand_if = FALSE, else_same_line = TRUE, - function_space = FALSE) { + function_space = FALSE, join_else = TRUE) { if (!dir.exists(path)) { stop("Directory not found: ", path) } @@ -198,7 +210,8 @@ rformat_dir <- function(path = ".", recursive = TRUE, dry_run = FALSE, line_limit = line_limit, function_space = function_space, control_braces = control_braces, - else_same_line = else_same_line) + else_same_line = else_same_line, + join_else = join_else) if (formatted != original) { modified <- c(modified, f) diff --git a/inst/tinytest/test_rformat.R b/inst/tinytest/test_rformat.R index 41250c7..d46718d 100644 --- a/inst/tinytest/test_rformat.R +++ b/inst/tinytest/test_rformat.R @@ -232,11 +232,38 @@ expect_equal( ) # else_same_line = FALSE inside braces (where } \n else does parse) +# Also need join_else = FALSE to preserve the separate-line layout expect_equal( - rformat("{\nif (x) {\n y\n}\nelse {\n z\n}\n}", else_same_line = FALSE), + rformat("{\nif (x) {\n y\n}\nelse {\n z\n}\n}", + else_same_line = FALSE, join_else = FALSE), "{\n if (x) {\n y\n }\n else {\n z\n }\n}\n" ) +# join_else = TRUE (default) moves else to same line as } +expect_equal( + rformat("{\nif (x) {\n y\n}\nelse {\n z\n}\n}"), + "{\n if (x) {\n y\n } else {\n z\n }\n}\n" +) + +# join_else = FALSE preserves } / else on separate lines +expect_equal( + rformat("{\nif (x) {\n y\n}\nelse {\n z\n}\n}", join_else = FALSE), + "{\n if (x) {\n y\n }\n else {\n z\n }\n}\n" +) + +# join_else skips when comment between } and else +expect_equal( + rformat("{\nif (x) {\n y\n} # comment\nelse {\n z\n}\n}", + join_else = TRUE), + "{\n if (x) {\n y\n } # comment\n else {\n z\n }\n}\n" +) + +# join_else with nested if-else chains +expect_equal( + rformat("{\nif (x) {\n 1\n}\nelse if (y) {\n 2\n}\nelse {\n 3\n}\n}"), + "{\n if (x) {\n 1\n } else if (y) {\n 2\n } else {\n 3\n }\n}\n" +) + # Trailing whitespace removal expect_false( grepl(" \n", rformat("x <- 1 \ny <- 2 ")), diff --git a/man/format_pipeline.Rd b/man/format_pipeline.Rd index def14e7..0f2a419 100644 --- a/man/format_pipeline.Rd +++ b/man/format_pipeline.Rd @@ -4,7 +4,7 @@ \title{AST-Based Format Pipeline} \usage{ format_pipeline(code, indent, wrap, expand_if, brace_style, line_limit, - function_space = FALSE, control_braces = FALSE) + function_space = FALSE, control_braces = FALSE, join_else = TRUE) } \arguments{ \item{code}{Code string for one top-level expression.} @@ -22,6 +22,8 @@ format_pipeline(code, indent, wrap, expand_if, brace_style, line_limit, \item{function_space}{Add space after `function`.} \item{control_braces}{Control brace mode.} + +\item{join_else}{If TRUE, move else to same line as preceding `\}`.} } \value{ Formatted code string. diff --git a/man/format_tokens.Rd b/man/format_tokens.Rd index abe879e..e64bfcd 100644 --- a/man/format_tokens.Rd +++ b/man/format_tokens.Rd @@ -5,7 +5,7 @@ \usage{ format_tokens(code, indent = 4L, wrap = "paren", expand_if = FALSE, brace_style = "kr", line_limit = 80L, function_space = FALSE, - control_braces = FALSE) + control_braces = FALSE, join_else = TRUE) } \arguments{ \item{code}{Character string of R code.} @@ -25,6 +25,8 @@ parenthesis, `"fixed"` uses 8-space indent.} \item{function_space}{If TRUE, add space before `(` in function definitions.} \item{control_braces}{If TRUE, add braces to bare one-line control flow bodies.} + +\item{join_else}{If TRUE, move else to same line as preceding `\}`.} } \value{ Formatted code as character string. diff --git a/man/join_else_transform.Rd b/man/join_else_transform.Rd new file mode 100644 index 0000000..f3559cf --- /dev/null +++ b/man/join_else_transform.Rd @@ -0,0 +1,19 @@ +% tinyrox says don't edit this manually, but it can't stop you! +\name{join_else_transform} +\alias{join_else_transform} +\title{Join Else to Preceding Close Brace} +\usage{ +join_else_transform(terms) +} +\arguments{ +\item{terms}{Enriched terminal DataFrame.} +} +\value{ +Updated DataFrame. +} +\description{ +AST transform that moves ELSE tokens (and any following opening brace) +to the same output line as the preceding `\}`. Skips if a COMMENT +exists between `\}` and `else`. +} +\keyword{internal} diff --git a/man/rformat.Rd b/man/rformat.Rd index 3aabef4..b14166c 100644 --- a/man/rformat.Rd +++ b/man/rformat.Rd @@ -5,7 +5,7 @@ \usage{ rformat(code, indent = 4L, line_limit = 80L, wrap = "paren", brace_style = "kr", control_braces = FALSE, expand_if = FALSE, - else_same_line = TRUE, function_space = FALSE) + else_same_line = TRUE, function_space = FALSE, join_else = TRUE) } \arguments{ \item{code}{Character string of R code to format.} @@ -27,12 +27,17 @@ matches R Core source code where 59\% of control flow bodies are bare.} \item{expand_if}{Expand inline if-else to multi-line (default FALSE).} -\item{else_same_line}{If TRUE (default), fix `\}\\nelse` to `\} else \{` on -the same line. Matches R Core source code where 70\% use same-line else.} +\item{else_same_line}{If TRUE (default), repair top-level `\}\\nelse` +(which is a parse error in R) by joining to `\} else` before formatting. +When FALSE, unparseable input is returned unchanged with a warning.} \item{function_space}{If TRUE, add space before `(` in function definitions: `function (x)` instead of `function(x)`. Default FALSE matches 96\% of R Core source code.} + +\item{join_else}{If TRUE (default), move `else` to the same line as the +preceding `\}`: `\} else \{`. Matches R Core source code where 70\% +use same-line else. When FALSE, `\}\\nelse` on separate lines is preserved.} } \value{ Formatted code as a character string. diff --git a/man/rformat_dir.Rd b/man/rformat_dir.Rd index 601ecb3..e6872dd 100644 --- a/man/rformat_dir.Rd +++ b/man/rformat_dir.Rd @@ -6,7 +6,7 @@ rformat_dir(path = ".", recursive = TRUE, dry_run = FALSE, indent = 4L, line_limit = 80L, wrap = "paren", brace_style = "kr", control_braces = FALSE, expand_if = FALSE, else_same_line = TRUE, - function_space = FALSE) + function_space = FALSE, join_else = TRUE) } \arguments{ \item{path}{Path to directory.} @@ -31,11 +31,15 @@ bodies. Default FALSE matches R Core majority style.} \item{expand_if}{Expand inline if-else to multi-line (default FALSE).} -\item{else_same_line}{If TRUE (default), fix `\}\\nelse` to `\} else \{`.} +\item{else_same_line}{If TRUE (default), repair top-level `\}\\nelse` +(which is a parse error in R) by joining to `\} else` before formatting.} \item{function_space}{If TRUE, add space before `(` in function definitions: `function (x)` instead of `function(x)`. Default FALSE matches 96\% of R Core source code.} + +\item{join_else}{If TRUE (default), move `else` to the same line as the +preceding `\}`.} } \value{ Invisibly returns vector of modified file paths. diff --git a/man/rformat_file.Rd b/man/rformat_file.Rd index 31156bd..9ac4202 100644 --- a/man/rformat_file.Rd +++ b/man/rformat_file.Rd @@ -6,7 +6,7 @@ rformat_file(path, output = NULL, dry_run = FALSE, indent = 4L, line_limit = 80L, wrap = "paren", brace_style = "kr", control_braces = FALSE, expand_if = FALSE, else_same_line = TRUE, - function_space = FALSE) + function_space = FALSE, join_else = TRUE) } \arguments{ \item{path}{Path to R file.} @@ -31,11 +31,15 @@ bodies. Default FALSE matches R Core majority style.} \item{expand_if}{Expand inline if-else to multi-line (default FALSE).} -\item{else_same_line}{If TRUE (default), fix `\}\\nelse` to `\} else \{`.} +\item{else_same_line}{If TRUE (default), repair top-level `\}\\nelse` +(which is a parse error in R) by joining to `\} else` before formatting.} \item{function_space}{If TRUE, add space before `(` in function definitions: `function (x)` instead of `function(x)`. Default FALSE matches 96\% of R Core source code.} + +\item{join_else}{If TRUE (default), move `else` to the same line as the +preceding `\}`.} } \value{ Invisibly returns formatted code. diff --git a/src/RcppExports.cpp b/src/RcppExports.cpp index a98e139..fb63277 100644 --- a/src/RcppExports.cpp +++ b/src/RcppExports.cpp @@ -11,8 +11,8 @@ Rcpp::Rostream& Rcpp::Rcerr = Rcpp::Rcpp_cerr_get(); #endif // cpp_format_pipeline -Rcpp::String cpp_format_pipeline(std::string code, std::string indent_str, std::string wrap, bool expand_if, std::string brace_style, int line_limit, bool function_space, std::string control_braces); -RcppExport SEXP _rformat_cpp_format_pipeline(SEXP codeSEXP, SEXP indent_strSEXP, SEXP wrapSEXP, SEXP expand_ifSEXP, SEXP brace_styleSEXP, SEXP line_limitSEXP, SEXP function_spaceSEXP, SEXP control_bracesSEXP) { +Rcpp::String cpp_format_pipeline(std::string code, std::string indent_str, std::string wrap, bool expand_if, std::string brace_style, int line_limit, bool function_space, std::string control_braces, bool join_else); +RcppExport SEXP _rformat_cpp_format_pipeline(SEXP codeSEXP, SEXP indent_strSEXP, SEXP wrapSEXP, SEXP expand_ifSEXP, SEXP brace_styleSEXP, SEXP line_limitSEXP, SEXP function_spaceSEXP, SEXP control_bracesSEXP, SEXP join_elseSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; @@ -24,13 +24,14 @@ BEGIN_RCPP Rcpp::traits::input_parameter< int >::type line_limit(line_limitSEXP); Rcpp::traits::input_parameter< bool >::type function_space(function_spaceSEXP); Rcpp::traits::input_parameter< std::string >::type control_braces(control_bracesSEXP); - rcpp_result_gen = Rcpp::wrap(cpp_format_pipeline(code, indent_str, wrap, expand_if, brace_style, line_limit, function_space, control_braces)); + Rcpp::traits::input_parameter< bool >::type join_else(join_elseSEXP); + rcpp_result_gen = Rcpp::wrap(cpp_format_pipeline(code, indent_str, wrap, expand_if, brace_style, line_limit, function_space, control_braces, join_else)); return rcpp_result_gen; END_RCPP } // cpp_format_all -Rcpp::String cpp_format_all(std::string code, std::string indent_str, std::string wrap, bool expand_if, std::string brace_style, int line_limit, bool function_space, std::string control_braces); -RcppExport SEXP _rformat_cpp_format_all(SEXP codeSEXP, SEXP indent_strSEXP, SEXP wrapSEXP, SEXP expand_ifSEXP, SEXP brace_styleSEXP, SEXP line_limitSEXP, SEXP function_spaceSEXP, SEXP control_bracesSEXP) { +Rcpp::String cpp_format_all(std::string code, std::string indent_str, std::string wrap, bool expand_if, std::string brace_style, int line_limit, bool function_space, std::string control_braces, bool join_else); +RcppExport SEXP _rformat_cpp_format_all(SEXP codeSEXP, SEXP indent_strSEXP, SEXP wrapSEXP, SEXP expand_ifSEXP, SEXP brace_styleSEXP, SEXP line_limitSEXP, SEXP function_spaceSEXP, SEXP control_bracesSEXP, SEXP join_elseSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; @@ -42,7 +43,8 @@ BEGIN_RCPP Rcpp::traits::input_parameter< int >::type line_limit(line_limitSEXP); Rcpp::traits::input_parameter< bool >::type function_space(function_spaceSEXP); Rcpp::traits::input_parameter< std::string >::type control_braces(control_bracesSEXP); - rcpp_result_gen = Rcpp::wrap(cpp_format_all(code, indent_str, wrap, expand_if, brace_style, line_limit, function_space, control_braces)); + Rcpp::traits::input_parameter< bool >::type join_else(join_elseSEXP); + rcpp_result_gen = Rcpp::wrap(cpp_format_all(code, indent_str, wrap, expand_if, brace_style, line_limit, function_space, control_braces, join_else)); return rcpp_result_gen; END_RCPP } @@ -58,8 +60,8 @@ END_RCPP } static const R_CallMethodDef CallEntries[] = { - {"_rformat_cpp_format_pipeline", (DL_FUNC) &_rformat_cpp_format_pipeline, 8}, - {"_rformat_cpp_format_all", (DL_FUNC) &_rformat_cpp_format_all, 8}, + {"_rformat_cpp_format_pipeline", (DL_FUNC) &_rformat_cpp_format_pipeline, 9}, + {"_rformat_cpp_format_all", (DL_FUNC) &_rformat_cpp_format_all, 9}, {"_rformat_cpp_set_trace", (DL_FUNC) &_rformat_cpp_set_trace, 1}, {NULL, NULL, 0} }; diff --git a/src/else.cpp b/src/else.cpp new file mode 100644 index 0000000..0ab109d --- /dev/null +++ b/src/else.cpp @@ -0,0 +1,52 @@ +#include "token.h" + +void join_else_transform(std::vector& tokens, + const FormatOptions& opts) { + reorder_tokens(tokens); + int n = static_cast(tokens.size()); + if (n < 2) return; + + LineIndex lidx; + lidx.build(tokens); + + for (int ei = 0; ei < n; ei++) { + if (tokens[ei].token != "ELSE") continue; + + int else_line = tokens[ei].out_line; + + // Walk backwards to find preceding non-comment token + int rbrace_idx = -1; + bool has_comment = false; + for (int j = ei - 1; j >= 0; j--) { + if (tokens[j].token == "COMMENT") { + has_comment = true; + continue; + } + if (tokens[j].token == "'}'") { + rbrace_idx = j; + } + break; + } + + // Skip if no preceding }, or comment between } and else + if (rbrace_idx < 0 || has_comment) continue; + + int rbrace_line = tokens[rbrace_idx].out_line; + if (rbrace_line == else_line) continue; // already on same line + + // Check if joining would exceed line_limit + int rbrace_width = lidx.width(tokens, rbrace_line, + opts.indent_str, opts.function_space); + int else_width = lidx.width(tokens, else_line, + opts.indent_str, opts.function_space); + if (rbrace_width + 1 + else_width > opts.line_limit) continue; + + // Move ELSE and all following tokens on the same line to the } line + for (int k = ei; k < n && tokens[k].out_line == else_line; k++) { + tokens[k].out_line = rbrace_line; + } + + // Rebuild line index since lines changed + lidx.build(tokens); + } +} diff --git a/src/format_pipeline.cpp b/src/format_pipeline.cpp index 9f75e9f..94484f6 100644 --- a/src/format_pipeline.cpp +++ b/src/format_pipeline.cpp @@ -125,6 +125,10 @@ static void run_pipeline_transforms(std::vector& tokens, TIMED("collapse_calls_3", collapse_calls(tokens, opts)); TIMED("recompute_3", recompute_nesting(tokens)); } + + if (opts.join_else) { + TIMED("join_else", join_else_transform(tokens, opts)); + } } // [[Rcpp::export]] @@ -135,7 +139,8 @@ Rcpp::String cpp_format_pipeline(std::string code, std::string brace_style, int line_limit, bool function_space, - std::string control_braces) { + std::string control_braces, + bool join_else) { // Parse code via R Rcpp::Function parse_fn("parse"); Rcpp::Function getParseData_fn("getParseData"); @@ -177,6 +182,7 @@ Rcpp::String cpp_format_pipeline(std::string code, opts.line_limit = line_limit; opts.function_space = function_space; opts.control_braces = control_braces; + opts.join_else = join_else; run_pipeline_transforms(tokens, opts); @@ -192,7 +198,8 @@ Rcpp::String cpp_format_all(std::string code, std::string brace_style, int line_limit, bool function_space, - std::string control_braces) { + std::string control_braces, + bool join_else) { // Parse full code once Rcpp::Function parse_fn("parse"); Rcpp::Function getParseData_fn("getParseData"); @@ -275,6 +282,7 @@ Rcpp::String cpp_format_all(std::string code, opts.line_limit = line_limit; opts.function_space = function_space; opts.control_braces = control_braces; + opts.join_else = join_else; // Collect terminal tokens per expression, with adjusted line numbers std::vector> expr_tokens(merged.size()); diff --git a/src/token.h b/src/token.h index c04f5de..c96e948 100644 --- a/src/token.h +++ b/src/token.h @@ -27,6 +27,7 @@ struct FormatOptions { int line_limit; bool function_space; std::string control_braces; // "" for FALSE, "single", "multi", "next_line", "same_line" + bool join_else; }; // Sort tokens by (out_line, out_order) @@ -145,5 +146,7 @@ void add_control_braces(std::vector& tokens, const FormatOptions& opts); void expand_call_if_args(std::vector& tokens, const FormatOptions& opts); void reformat_inline_if(std::vector& tokens, const FormatOptions& opts); +void join_else_transform(std::vector& tokens, + const FormatOptions& opts); #endif diff --git a/vignettes/r-core-style.md b/vignettes/r-core-style.md index 502cd2a..8442daa 100644 --- a/vignettes/r-core-style.md +++ b/vignettes/r-core-style.md @@ -111,7 +111,7 @@ default (`control_braces = FALSE`). Use `control_braces = TRUE` to add braces. | `}\nelse` (new line) | 1,987 | 30.3 | Same-line `} else` is the majority practice. rformat enforces this by default -(`else_same_line = TRUE`). Use `else_same_line = FALSE` to preserve `}\nelse`. +(`join_else = TRUE`). Use `join_else = FALSE` to preserve `}\nelse`. ## Source examples @@ -174,7 +174,7 @@ choices where R Core is mixed: |------------|---------------|-----------------|------------------| | Assignment | 100% `<-` | `<-` | — | | Control flow braces | 59% bare, 41% braced | bare | `control_braces = TRUE` | -| `} else` placement | 70% same line | same line | `else_same_line = FALSE` | +| `} else` placement | 70% same line | same line | `join_else = FALSE` | | Brace style | ~53% K&R, ~47% Allman | K&R | `brace_style = "allman"` | | `function` spacing | 96% no space | no space | `function_space = TRUE` | | Indentation | 89% spaces, 11% tabs | 4 spaces | `indent = "\t"` | diff --git a/vignettes/rformat-options.md b/vignettes/rformat-options.md index 115ccb2..9f79100 100644 --- a/vignettes/rformat-options.md +++ b/vignettes/rformat-options.md @@ -134,18 +134,45 @@ cat(rformat(code, expand_if = TRUE)) ## `else_same_line` -When TRUE (default), joins `} else` on the same line. Matches R Core -source code where 70% use same-line else. This also repairs a common -problem: `else` on its own line is a parse error at top level in R, -so this option makes such code formattable. +In R, `else` on its own line is a parse error at top level (though +it's valid inside braces). When TRUE (default), this option repairs +top-level `}\nelse` by joining them to `} else` before formatting. +When FALSE, unparseable input is returned unchanged with a warning. ```{.R} -# } and else on separate lines -- normally a top-level parse error. -# else_same_line (the default) joins them and formats: +# Top-level } else on separate lines is a parse error. +# else_same_line (the default) repairs it: code <- "if (x) {\n 1\n}\nelse {\n 2\n}" cat(rformat(code)) ``` +## `join_else` + +When TRUE (default), moves `else` to the same line as the preceding `}`. +Matches R Core source code where 70% use same-line else. When FALSE, +`}\nelse` on separate lines is preserved. + +This is an AST-level transform that works on already-valid code inside +braces, unlike `else_same_line` which is a text-level parse repair. + +```{.R} +code <- "f <- function(x) { +if (x > 0) { + y <- log(x) +} +else { + y <- NA +} +y +}" + +# Default: else joins the } line +cat(rformat(code)) + +# Preserve } / else on separate lines +cat(rformat(code, join_else = FALSE)) +``` + ## `function_space` When TRUE, adds a space before `(` in function definitions: From dba1090d6d2fa0343d2950f7f37af85e7f7d3575 Mon Sep 17 00:00:00 2001 From: TroyHernandez Date: Wed, 4 Mar 2026 01:25:36 -0600 Subject: [PATCH 2/5] Add tinyelf shared library audit to CI Runs tinyelf::audit_so() on the rformat .so after tests pass (Linux only). Catches GLIBC version regressions, unexpected system deps, and bad rpaths. --- .github/workflows/ci.yaml | 6 ++++++ DESCRIPTION | 2 +- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index ad19176..2301035 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -34,3 +34,9 @@ jobs: - name: Test run: ./run.sh run_tests + + - name: Audit shared libraries + if: runner.os == 'Linux' + run: | + Rscript -e 'remotes::install_github("cornball-ai/tinyelf")' + Rscript -e 'tinyelf::audit_so(system.file("libs", package = "rformat"))' diff --git a/DESCRIPTION b/DESCRIPTION index 64c9f79..840f107 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: rformat Type: Package Title: Base R Code Formatter -Version: 0.1.0 +Version: 0.2.0 Authors@R: c(person("Troy", "Hernandez", role = c("aut", "cre"), email = "troy@cornball.ai", comment = c(ORCID = "0009-0005-4248-604X")), From 9f41a4e76f86558193cbccf6f7bba0ca5c7d5c31 Mon Sep 17 00:00:00 2001 From: TroyHernandez Date: Wed, 4 Mar 2026 01:32:00 -0600 Subject: [PATCH 3/5] Fix tinyelf install: set GITHUB_PAT for remotes The bundled PAT in remotes couldn't reach the GitHub API. Use the workflow's automatic token instead. --- .github/workflows/ci.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 2301035..9b23f5e 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -37,6 +37,8 @@ jobs: - name: Audit shared libraries if: runner.os == 'Linux' + env: + GITHUB_PAT: ${{ github.token }} run: | Rscript -e 'remotes::install_github("cornball-ai/tinyelf")' Rscript -e 'tinyelf::audit_so(system.file("libs", package = "rformat"))' From 835237a146803a321da5a82ce66cb268c3eae1e7 Mon Sep 17 00:00:00 2001 From: TroyHernandez Date: Wed, 4 Mar 2026 13:06:15 -0600 Subject: [PATCH 4/5] Install rformat before auditing its .so R CMD check doesn't leave the package in the default library, so system.file() returned empty. Install from source first. --- .github/workflows/ci.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 9b23f5e..b18108d 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -41,4 +41,5 @@ jobs: GITHUB_PAT: ${{ github.token }} run: | Rscript -e 'remotes::install_github("cornball-ai/tinyelf")' + Rscript -e 'install.packages(".", repos = NULL, type = "source")' Rscript -e 'tinyelf::audit_so(system.file("libs", package = "rformat"))' From 30a17bd76b92c6c3474be2f8ece2a0c7c7d33d27 Mon Sep 17 00:00:00 2001 From: TroyHernandez Date: Wed, 4 Mar 2026 13:09:18 -0600 Subject: [PATCH 5/5] Revert version bump --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 840f107..64c9f79 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: rformat Type: Package Title: Base R Code Formatter -Version: 0.2.0 +Version: 0.1.0 Authors@R: c(person("Troy", "Hernandez", role = c("aut", "cre"), email = "troy@cornball.ai", comment = c(ORCID = "0009-0005-4248-604X")),