diff --git a/.gitignore b/.gitignore index 4acec74..2e3c8cd 100644 --- a/.gitignore +++ b/.gitignore @@ -4,3 +4,4 @@ bin/ .idea/ .vscode/ coverage.out +.audit-cli.yaml diff --git a/CHANGELOG.md b/CHANGELOG.md index feaacdd..dd43514 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,11 +5,28 @@ All notable changes to audit-cli will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [Unreleased] + +### Added + +#### Resolve Commands + +- `resolve url` - Resolve documentation source files to production URLs + - Takes a source file path (.txt) from the docs monorepo + - Returns the corresponding production URL on mongodb.com/docs + - Uses table-of-contents data as the source of truth for URL mappings + - Supports all projects in the monorepo (Atlas, Drivers, Manual, Tools, etc.) + - Handles versioned and non-versioned projects correctly + - Supports custom base URL for staging environments + - Flags: + - `--base-url` - Override the default base URL (default: `https://www.mongodb.com/docs`) + ## [0.3.0] - 2025-01-07 ### Added #### Report Commands + - `report testable-code` - Analyze testable code examples on pages from analytics data - Takes a CSV file with page rankings and URLs from analytics - Resolves URLs to source files using the Snooty Data API @@ -23,6 +40,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - `--details` - Show detailed per-product breakdown #### Internal Packages + - `internal/language` - Programming language utilities (refactored from code-examples) - Language normalization (e.g., "ts" → "typescript", "py" → "python") - File extension mapping for all supported languages @@ -64,6 +82,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added #### Analyze Commands + - `analyze composables` - Analyze composable definitions in snooty.toml files - Inventory all composables across projects and versions - Identify identical composables (same ID, title, and options) across different projects/versions @@ -79,6 +98,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - `--with-rstspec` - Show canonical composable definitions from rstspec.toml #### Configuration System + - Monorepo path configuration via three methods (priority order): 1. Command-line argument (highest priority) 2. Environment variable `AUDIT_CLI_MONOREPO_PATH` @@ -90,6 +110,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Applies to commands: `analyze composables`, `count tested-examples`, `count pages` #### File Path Resolution + - Flexible path resolution for all file-based commands - Supports three path types (priority order): 1. Absolute paths - Used as-is @@ -99,6 +120,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Eliminates need to type full paths when working with monorepo files #### Internal Packages + - `internal/config` - Configuration management - Config file loading from `.audit-cli.yaml` - Environment variable support @@ -115,6 +137,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 Initial release after splitting from the MongoDB code-example-tooling monorepo. #### Extract Commands + - `extract code-examples` - Extract code examples from RST files - Supports `literalinclude`, `code-block`, and `io-code-block` directives - Handles partial file extraction with `:start-after:`, `:end-before:`, `:lines:` options @@ -129,6 +152,7 @@ Initial release after splitting from the MongoDB code-example-tooling monorepo. - Optional include directive expansion #### Search Commands + - `search find-string` - Search for substrings in documentation files - Case-sensitive and case-insensitive search modes - Exact word matching or partial matching @@ -137,6 +161,7 @@ Initial release after splitting from the MongoDB code-example-tooling monorepo. - Language breakdown in verbose mode #### Analyze Commands + - `analyze includes` - Analyze include directive relationships - Tree view of include dependencies - Flat list of all included files @@ -155,6 +180,7 @@ Initial release after splitting from the MongoDB code-example-tooling monorepo. - Variation listing (composable tutorial selections and tabids) #### Compare Commands + - `compare file-contents` - Compare file contents across versions - Direct comparison between two files - Version comparison mode with auto-discovery @@ -162,6 +188,7 @@ Initial release after splitting from the MongoDB code-example-tooling monorepo. - Progressive detail levels (summary, paths, diffs) #### Count Commands + - `count tested-examples` - Count tested code examples in the monorepo - Total count across all products - Per-product breakdown @@ -176,6 +203,7 @@ Initial release after splitting from the MongoDB code-example-tooling monorepo. - Current version only mode #### Internal Packages + - `internal/rst` - RST parsing utilities - Directive parsing (literalinclude, code-block, io-code-block, procedure, step, tabs, composable-tutorial) - Include directive following with circular detection @@ -187,13 +215,14 @@ Initial release after splitting from the MongoDB code-example-tooling monorepo. - Version path resolution #### Documentation + - Comprehensive README.md with usage examples - PROCEDURE_PARSING.md with detailed procedure parsing business logic - AGENTS.md for LLM development assistance ### Technical Details + - Built with Go 1.24 - Uses spf13/cobra v1.10.1 for CLI framework - Uses aymanbagabas/go-udiff v0.3.1 for diff generation - Comprehensive test coverage with deterministic testing for procedure parsing - diff --git a/README.md b/README.md index d3f1679..5513f37 100644 --- a/README.md +++ b/README.md @@ -13,6 +13,7 @@ A Go CLI tool for performing audit-related tasks in the MongoDB documentation mo - [Compare Commands](#compare-commands) - [Count Commands](#count-commands) - [Report Commands](#report-commands) + - [Resolve Commands](#resolve-commands) - [Development](#development) - [Project Structure](#project-structure) - [Adding New Commands](#adding-new-commands) @@ -56,7 +57,7 @@ go run main.go [command] [flags] ### Monorepo Path Configuration -Some commands require a monorepo path (e.g., `analyze composables`, `count tested-examples`, `count pages`). You can configure the monorepo path in three ways, listed in order of priority: +Some commands require a monorepo path (e.g., `analyze composables`, `count tested-examples`, `count pages`, `report testable-code --for-docs-set`). You can configure the monorepo path in three ways, listed in order of priority: ### 1. Command-Line Argument (Highest Priority) @@ -66,6 +67,7 @@ Pass the path directly to the command: ./audit-cli analyze composables /path/to/docs-monorepo ./audit-cli count tested-examples /path/to/docs-monorepo ./audit-cli count pages /path/to/docs-monorepo +./audit-cli report testable-code --for-docs-set manual /path/to/docs-monorepo ``` ### 2. Environment Variable @@ -82,6 +84,7 @@ export AUDIT_CLI_MONOREPO_PATH=/path/to/docs-monorepo ### 3. Config File (Lowest Priority) Create a `.audit-cli.yaml` file in either: + - Current directory: `./.audit-cli.yaml` - Home directory: `~/.audit-cli.yaml` @@ -150,6 +153,7 @@ If the path doesn't exist relative to the monorepo, it falls back to the current ``` **Priority Order:** + 1. If path is absolute → use as-is 2. If monorepo is configured and path exists relative to monorepo → use monorepo-relative path 3. Otherwise → resolve relative to current directory @@ -177,8 +181,10 @@ audit-cli ├── count # Count code examples and documentation pages │ ├── tested-examples │ └── pages -└── report # Generate reports from documentation data - └── testable-code +├── report # Generate reports from documentation data +│ └── testable-code +└── resolve # Resolve documentation paths and URLs + └── url ``` ### Extract Commands @@ -192,6 +198,7 @@ section below. **Use Cases:** This command helps writers: + - Examine all the code examples that make up a specific page or section - Split out code examples into individual files for migration to test infrastructure - Report on the number of code examples by language @@ -241,7 +248,7 @@ This command helps writers: will only extract code examples from the top-level RST file. If you do provide this flag, the tool will follow any `.. include::` directives in the RST file and extract code examples from all included files. When combined with `-r`, the tool will recursively scan all subdirectories for RST files and follow `.. include::` directives in all files. If - an include filepath is *outside* the input directory, the `-r` flag would not parse it, but the `-f` flag would + an include filepath is _outside_ the input directory, the `-r` flag would not parse it, but the `-f` flag would follow the include directive and parse the included file. This effectively lets you parse all the files that make up a single page, if you start from the page's root `.txt` file. - `--dry-run` - Show what would be extracted without writing files @@ -252,6 +259,7 @@ This command helps writers: Extracted files are named: `{source-base}.{directive-type}.{index}.{ext}` Examples: + - `my-doc.code-block.1.js` - First code-block from my-doc.rst - `my-doc.literalinclude.2.py` - Second literalinclude from my-doc.rst - `my-doc.io-code-block.1.input.js` - Input from first io-code-block @@ -260,6 +268,7 @@ Examples: **Report:** After extraction, the code extraction report shows: + - Number of files traversed - Number of output files written - Code examples by language @@ -274,6 +283,7 @@ even if it appears in multiple selections or variations. **Use Cases:** This command helps writers: + - Extract all unique procedures from a page for testing or migration - Generate individual procedure files for each distinct procedure - Understand how many different procedures exist in a document @@ -312,11 +322,13 @@ This command helps writers: Extracted files are named: `{heading}_{first-step-title}_{hash}.rst` The filename includes: + - **Heading**: The section heading above the procedure - **First step title**: The title of the first step (for readability) - **Hash**: A short 6-character hash of the content (for uniqueness) Examples: + - `before-you-begin_pull-the-mongodb-docker-image_e8eeec.rst` - `install-mongodb-community-edition_download-the-tarball_44c437.rst` - `configuration_create-the-data-and-log-directories_f1d35b.rst` @@ -345,6 +357,7 @@ Found 36 unique procedures: **Supported Procedure Types:** The command recognizes and extracts: + - `.. procedure::` directives with `.. step::` directives - Ordered lists (numbered or lettered) as procedures - `.. tabs::` directives with `:tabid:` options for variations @@ -355,10 +368,12 @@ The command recognizes and extracts: **How Uniqueness is Determined:** Procedures are grouped by: + 1. **Heading**: The section heading above the procedure 2. **Content hash**: A hash of the procedure's steps and content This means: + - Procedures with the same heading but different content are treated as separate unique procedures - Procedures with identical content that appear in multiple selections are extracted once - The output file shows all selections where that procedure appears (visible with `-v` flag) @@ -366,6 +381,7 @@ This means: **Report:** After extraction, the report shows: + - Number of unique procedures extracted - Number of files written - Detailed list of procedures with step counts and selections (with `-v` flag) @@ -378,6 +394,7 @@ Search through files for a specific substring. Can search through extracted code source files. **Default Behavior:** + - **Case-insensitive** search (matches "curl", "CURL", "Curl", etc.) - **Exact word matching** (excludes partial matches like "curl" in "libcurl") @@ -387,6 +404,7 @@ of larger words. **Use Cases:** This command helps writers: + - Find specific strings across documentation files or pages - Search for product names, command names, API methods, or other strings that may need to be updated - Understand the number of references and impact of changes across documentation files or pages @@ -433,7 +451,7 @@ This command helps writers: will search only the top-level RST file or directory. If you do provide this flag, the tool will follow any `.. include::` directives in any RST file in the input path and search across all included files. When combined with `-r`, the tool will recursively scan all subdirectories for RST files and follow `.. include::` directives - in all files. If an include filepath is *outside* the input directory, the `-r` flag would not parse it, but the `-f` + in all files. If an include filepath is _outside_ the input directory, the `-r` flag would not parse it, but the `-f` flag would follow the include directive and search the included file. This effectively lets you parse all the files that make up a single page, if you start from the page's root `.txt` file. - `-v, --verbose` - Show file paths and language breakdown @@ -443,10 +461,12 @@ This command helps writers: **Report:** The search report shows: + - Number of files scanned - Number of files containing the substring (each file counted once) With `-v` flag, also shows: + - List of file paths where substring appears - Count broken down by language (file extension) @@ -462,6 +482,7 @@ This helps you understand which content is transcluded into a page. **Use Cases:** This command helps writers: + - Understand the impact of changes to widely-included files - Identify files included multiple times - Document file relationships for maintenance @@ -496,6 +517,7 @@ This command helps writers: **Output Formats:** **Summary** (default - no flags): + ``` ============================================================ INCLUDE ANALYSIS SUMMARY @@ -509,6 +531,7 @@ Max Depth: 2 Use --tree to see the hierarchical structure Use --list to see a flat list of all files ``` + - Root file path - Number of unique files discovered - Total number of include directive instances (counting duplicates) @@ -516,6 +539,7 @@ Use --list to see a flat list of all files - Hints to use --tree or --list for more details **Tree** (--tree flag): + - Hierarchical tree structure showing include relationships - Uses box-drawing characters for visual clarity - Shows which files include which other files @@ -524,17 +548,20 @@ Use --list to see a flat list of all files - Files outside `includes`: `path/from/source/filename.rst` **List** (--list flag): + - Flat numbered list of all unique files - Files listed in depth-first traversal order - Shows absolute paths to all files **Verbose** (-v flag): + - Shows complete dependency tree with all nodes (including duplicates) - Each file displays the number of include directives it contains - Uses visual indicators to show duplicate includes: - `•` (filled bullet) - First occurrence of a file - `◦` (hollow bullet) - Subsequent occurrences (duplicates) - Example output: + ``` • get-started.txt (24 include directives) • get-started/node/language-connection-steps.rst (3 include directives) @@ -585,6 +612,7 @@ io-code-block) that transclude content into pages. Use `--include-toctree` to al for toctree entries, which are navigation links rather than content transclusion. This command helps writers: + - Understand the impact of changes to a file (what pages will be affected) - Find all usages of an include file across the documentation - Track where code examples are referenced @@ -623,10 +651,12 @@ This command helps writers: **Understanding the Counts:** The command shows two metrics: + - **Total Files**: Number of unique files that use the target (deduplicated) - **Total Usages**: Total number of directive occurrences (includes duplicates) When a file includes the target multiple times, it counts as: + - 1 file (in Total Files) - Multiple usages (in Total Usages) @@ -637,17 +667,20 @@ This helps identify both the impact scope (how many files) and duplicate include By default, the command tracks content inclusion directives: 1. **`.. include::`** - RST content includes (transcluded) + ```rst .. include:: /includes/intro.rst ``` 2. **`.. literalinclude::`** - Code file references (transcluded) + ```rst .. literalinclude:: /code-examples/example.py :language: python ``` 3. **`.. io-code-block::`** - Input/output examples with file arguments (transcluded) + ```rst .. io-code-block:: @@ -661,6 +694,7 @@ By default, the command tracks content inclusion directives: With `--include-toctree`, also tracks: 4. **`.. toctree::`** - Table of contents entries (navigation links, not transcluded) + ```rst .. toctree:: :maxdepth: 2 @@ -675,6 +709,7 @@ is not tracked since it doesn't reference external files. **Output Formats:** **Text** (default): + ``` ============================================================ USAGE ANALYSIS @@ -693,6 +728,7 @@ include : 3 files, 4 usages ``` **Text with --verbose:** + ``` ============================================================ USAGE ANALYSIS @@ -715,6 +751,7 @@ include : 3 files, 4 usages ``` **JSON** (--format json): + ```json { "target_file": "/path/to/includes/intro.rst", @@ -802,6 +839,7 @@ Analyze procedures in reStructuredText files to understand procedure complexity, different selections. This command parses procedures from RST files and provides statistics about: + - Total number of unique procedures (grouped by heading and content) - Total number of procedure appearances across all selections - Implementation types (procedure directive vs ordered list) @@ -812,6 +850,7 @@ This command parses procedures from RST files and provides statistics about: **Use Cases:** This command helps writers: + - Understand the complexity of procedures in a document - Count how many unique procedures exist vs. how many times they appear - Identify procedures that use different implementation approaches @@ -844,6 +883,7 @@ This command helps writers: **Output:** **Default output (summary only):** + ``` File: path/to/file.rst Total unique procedures: 36 @@ -851,6 +891,7 @@ Total procedure appearances: 93 ``` **With `--list-summary`:** + ``` File: path/to/file.rst Total unique procedures: 36 @@ -865,6 +906,7 @@ Unique Procedures: ``` **With `--list-all`:** + ``` File: path/to/file.rst Total unique procedures: 36 @@ -920,12 +962,14 @@ The command reports two key metrics: - This represents the total number of procedure instances a user might encounter **Example:** + - A file might have **36 unique procedures** that appear a total of **93 times** across different selections - This means some procedures appear in multiple selections (e.g., a "Before You Begin" procedure that's the same for Docker with and without search) **Supported Procedure Types:** The command recognizes: + - `.. procedure::` directives with `.. step::` directives - Ordered lists (numbered or lettered) as procedures - `.. tabs::` directives with `:tabid:` options for variations @@ -936,6 +980,7 @@ The command recognizes: **Deterministic Parsing:** The parser ensures deterministic results by: + - Sorting all map iterations to ensure consistent ordering - Sorting procedures by line number - Computing content hashes in a consistent manner @@ -952,6 +997,7 @@ Composables are configuration elements in `snooty.toml` that define content vari **Use Cases:** This command helps writers: + - Inventory all composables across projects and versions - Identify identical composables that could be consolidated across projects - Find similar composables with different IDs but overlapping options (potential consolidation candidates) @@ -1002,6 +1048,7 @@ This command helps writers: **Output:** **Default output (summary and table):** + ``` Composables Analysis ==================== @@ -1029,6 +1076,7 @@ atlas (none) language Language Shows two types of consolidation opportunities: 1. **Identical Composables** - Same ID, title, and options across different projects/versions + ``` Identical Composables (Consolidation Candidates) ================================================ @@ -1047,6 +1095,7 @@ Shows two types of consolidation opportunities: ``` 2. **Similar Composables** - Different IDs but similar option sets (60%+ overlap) + ``` Similar Composables (Review Recommended) ======================================== @@ -1109,6 +1158,7 @@ Total usages: 1 **Understanding Composables:** Composables are defined in `snooty.toml` files: + ```toml [[composables]] id = "language" @@ -1125,6 +1175,7 @@ title = "Node.js" ``` They're used in RST files with `.. composable-tutorial::` directives: + ```rst .. composable-tutorial:: :options: language, interface @@ -1145,6 +1196,7 @@ They're used in RST files with `.. composable-tutorial::` directives: The command uses Jaccard similarity (intersection / union) to compare option sets between composables with different IDs. A 60% similarity threshold is used to identify potential consolidation candidates. For example, if you have: + - `language` with 15 options - `language-atlas-only` with 14 options (13 in common with `language`) - `language-local-only` with 14 options (13 in common with `language`) @@ -1156,12 +1208,14 @@ These would be flagged as similar composables (93.3% similarity) and potential c #### `compare file-contents` Compare file contents to identify differences between files. Supports two modes: + 1. **Direct comparison** - Compare two specific files 2. **Version comparison** - Compare the same file across multiple documentation versions **Use Cases:** This command helps writers: + - Identify content drift across documentation versions - Verify that updates have been applied consistently - Scope maintenance work when updating shared content @@ -1219,6 +1273,7 @@ Provide two file paths as arguments: ``` This mode: + - Compares exactly two files - Reports whether they are identical or different - Can show unified diff with `--show-diff` @@ -1239,6 +1294,7 @@ Provide one file path. The product directory and versions are automatically dete ``` This mode: + - Automatically detects the product directory from the file path - Auto-discovers all available versions (unless `--versions` is specified) - Extracts the relative path from the reference file @@ -1249,6 +1305,7 @@ This mode: **Version Directory Structure:** The tool expects MongoDB documentation to be organized as: + ``` product-dir/ ├── manual/ @@ -1268,17 +1325,20 @@ product-dir/ **Output Formats:** **Summary** (default - no flags): + - Total number of versions compared - Count of matching, differing, and missing files - Hints to use `--show-paths` or `--show-diff` for more details **With --show-paths:** + - Summary (as above) - List of files that match (with ✓) - List of files that differ (with ✗) - List of files not found (with -) **With --show-diff:** + - Summary and paths (as above) - Unified diff output for each differing file - Shows added lines (prefixed with +) @@ -1330,6 +1390,7 @@ This command navigates to the `content/code-examples/tested` directory from the **Use Cases:** This command helps writers and maintainers: + - Track the total number of tested code examples - Monitor code example coverage by product - Identify products with few or many examples @@ -1383,6 +1444,7 @@ This command navigates to the `content` directory and recursively counts all `.t **Use Cases:** This command helps writers and maintainers: + - Track the total number of documentation pages across the monorepo - Monitor documentation coverage by product/project - Identify projects with extensive or minimal documentation @@ -1393,6 +1455,7 @@ This command helps writers and maintainers: **Automatic Exclusions:** The command automatically excludes: + - Files in `code-examples` directories at the root of `content` or `source` (these contain plain text examples, not pages) - Files in the following directories at the root of `content`: - `404` - Error pages @@ -1444,10 +1507,12 @@ By default, prints a single integer (total count) for use in CI or scripting. Wi **Versioned Documentation:** Some MongoDB documentation projects contain multiple versions, represented as distinct directories between the project directory and the `source` directory: + - **Versioned project structure**: `content/{project}/{version}/source/...` - **Non-versioned project structure**: `content/{project}/source/...` Version directory names follow these patterns: + - `current` or `manual` - The current/latest version - `upcoming` - Pre-release version - `v{number}` - Specific version (e.g., `v8.0`, `v7.0`) @@ -1511,18 +1576,25 @@ echo "Total documentation pages: $TOTAL_PAGES" #### `report testable-code` -Analyze testable code examples on documentation pages based on analytics CSV data. +Analyze testable code examples on documentation pages based on analytics CSV data or by scanning documentation sets directly. -This command takes a CSV file with page rankings and URLs, resolves each URL to its source file in the monorepo, collects code examples (literalinclude, code-block, io-code-block), and generates a report with testability information. +This command resolves URLs to source files in the monorepo, collects code examples (literalinclude, code-block, io-code-block), and generates a report with testability information. + +**Input Modes:** + +1. **CSV Mode** (default): Takes a CSV file with page rankings and URLs +2. **Docs Set Mode**: Scan all pages in specified content directories using `--for-docs-set` **Use Cases:** This command helps writers and maintainers: + - Identify high-traffic pages with untested code examples - Prioritize which pages to add test coverage to - Track the ratio of tested vs testable code examples - Understand code example distribution by product/language - Find "maybe testable" examples that need manual review +- Scan entire documentation sets for testability analysis **Key Concepts:** @@ -1533,12 +1605,21 @@ This command helps writers and maintainers: **Examples:** ```bash -# Analyze pages from a CSV file (specify monorepo path) +# CSV Mode: Analyze pages from a CSV file (specify monorepo path) ./audit-cli report testable-code analytics.csv /path/to/docs-monorepo -# Use configured monorepo path (from config file or environment variable) +# CSV Mode: Use configured monorepo path (from config file or environment variable) ./audit-cli report testable-code analytics.csv +# Docs Set Mode: Scan all pages in specified docs sets +./audit-cli report testable-code --for-docs-set cloud-docs,golang,node + +# Docs Set Mode: Scan only a specific version +./audit-cli report testable-code --for-docs-set manual --version v8.0 + +# Docs Set Mode: Scan all versions (not just current) +./audit-cli report testable-code --for-docs-set manual --current-only=false + # Output as JSON to a file ./audit-cli report testable-code analytics.csv --format json --output report.json @@ -1562,9 +1643,24 @@ rank,url **Flags:** +Input mode flags: + +- `--for-docs-set ` - Scan all pages in specified docs sets (content directory names, comma-separated). Enables Docs Set Mode instead of CSV Mode. Requires a configured monorepo path (see [Monorepo Path Configuration](#monorepo-path-configuration)). + +Version filtering flags (only apply in Docs Set Mode): + +- `--version ` - Only include pages from specified version (e.g., `v8.0`, `current`, `upcoming`). Overrides `--current-only`. +- `--current-only` - Only include current version pages (default: `true`). Use `--current-only=false` to scan all versions. +- `--base-url ` - Base URL for resolving page URLs (default: `https://www.mongodb.com/docs`) + +Output flags: + - `--format, -f ` - Output format: `text` (default), `json`, or `csv` - `--output, -o ` - Output file path (default: stdout) - `--details` - Show detailed per-product breakdown (for CSV output, includes per-product columns) + +Filter flags: + - `--filter ` - Filter pages by product area (can be specified multiple times) - `--list-drivers` - List all available driver filter options from the Snooty Data API @@ -1573,6 +1669,7 @@ rank,url Use the `--filter` flag to focus on specific product areas. Multiple filters can be specified to include pages matching any filter. Available filters: + - `search` - Pages with "atlas-search" or "search" in URL (excludes vector-search) - `vector-search` - Pages with "vector-search" in URL - `drivers` - All MongoDB driver documentation pages @@ -1598,6 +1695,7 @@ The `--list-drivers` flag queries the Snooty Data API to show all available driv **Testable Products:** Products with test infrastructure (code examples for these products are marked as "testable"): + - C# - Go - Java (Sync) @@ -1650,6 +1748,90 @@ Source: content/node/current/source/quick-start.txt TOTAL 8 4 4 2 6 0 ``` +### Resolve Commands + +#### `resolve url` + +Resolve a documentation source file (.txt) to its production URL. + +This command takes a source file path from the docs monorepo and resolves it to the corresponding production URL on `mongodb.com/docs`. The URL mapping is derived from the table-of-contents data, which is the source of truth for production URLs. + +**Use Cases:** + +This command helps writers: + +- Quickly find the production URL for any source file +- Generate URLs for reports and documentation +- Verify URL structure for new or moved pages +- Include production links in code example reports + +**Basic Usage:** + +```bash +# Resolve a non-versioned project file (e.g., Atlas) +./audit-cli resolve url content/atlas/source/manage-clusters.txt +# Output: https://www.mongodb.com/docs/atlas/manage-clusters/ + +# Resolve a versioned project file (e.g., Go Driver) +./audit-cli resolve url content/golang/current/source/atlas-search.txt +# Output: https://www.mongodb.com/docs/drivers/go/current/atlas-search/ + +# Resolve the MongoDB Manual +./audit-cli resolve url content/manual/manual/source/indexes.txt +# Output: https://www.mongodb.com/docs/manual/indexes/ + +# Resolve an index file (results in trailing slash) +./audit-cli resolve url content/compass/source/index.txt +# Output: https://www.mongodb.com/docs/compass/ + +# Use a different base URL (e.g., for staging) +./audit-cli resolve url content/atlas/source/index.txt --base-url https://docs-staging.mongodb.com +# Output: https://docs-staging.mongodb.com/atlas/ +``` + +**Flags:** + +- `--base-url ` - Base URL for production documentation (default: `https://www.mongodb.com/docs`) + +**Path Resolution:** + +The command supports flexible path input: + +1. **Absolute path** - Full path to the source file +2. **Relative to monorepo root** - If monorepo is configured (via config file or environment variable) +3. **Relative to current directory** - Fallback + +**URL Construction:** + +The production URL is constructed from: + +1. **Base URL** - `https://www.mongodb.com/docs` by default +2. **URL slug** - Project-specific path segment (e.g., `atlas`, `drivers/go`) +3. **Version** - For versioned projects (e.g., `current`, `v8.0`, `manual`) +4. **Page path** - The file's path relative to the `source` directory (without `.txt` extension) + +**Examples:** + +| Source File | Production URL | +| ------------------------------------------------ | --------------------------------------------------------------- | +| `content/atlas/source/manage-clusters.txt` | `https://www.mongodb.com/docs/atlas/manage-clusters/` | +| `content/golang/current/source/atlas-search.txt` | `https://www.mongodb.com/docs/drivers/go/current/atlas-search/` | +| `content/app-services/source/logs.txt` | `https://www.mongodb.com/docs/atlas/app-services/logs/` | +| `content/manual/manual/source/index.txt` | `https://www.mongodb.com/docs/manual/` | +| `content/manual/v8.0/source/indexes.txt` | `https://www.mongodb.com/docs/v8.0/indexes/` | +| `content/compass/source/index.txt` | `https://www.mongodb.com/docs/compass/` | + +**Supported Projects:** + +The command supports all projects defined in the documentation monorepo's table-of-contents, including: + +- Atlas products (Atlas, Atlas CLI, Atlas Operator, App Services) +- MongoDB Server documentation (Manual, all versions) +- Driver documentation (Go, Python, Java, Node.js, C#, Rust, etc.) +- Tools (Compass, Database Tools, MongoDB Shell, etc.) +- Connectors (Kafka, Spark, BI Connector) +- And many more + ## Development ### Project Structure @@ -1804,11 +1986,13 @@ audit-cli/ Example: Adding `extract tables` subcommand 1. **Create the subcommand directory:** + ```bash mkdir -p commands/extract/tables ``` 2. **Create the command file** (`commands/extract/tables/tables.go`): + ```go package tables @@ -1835,6 +2019,7 @@ Example: Adding `extract tables` subcommand ``` 3. **Register the subcommand** in `commands/extract/extract.go`: + ```go import ( "github.com/grove-platform/audit-cli/commands/extract/tables" @@ -1855,11 +2040,13 @@ Example: Adding `extract tables` subcommand Example: Adding `analyze` parent command 1. **Create the parent directory:** + ```bash mkdir -p commands/analyze ``` 2. **Create the parent command** (`commands/analyze/analyze.go`): + ```go package analyze @@ -1880,6 +2067,7 @@ Example: Adding `analyze` parent command ``` 3. **Register in main.go:** + ```go import ( "github.com/grove-platform/audit-cli/commands/analyze" @@ -1925,6 +2113,7 @@ since it contains non-Go files (`.cpp`, `.rst`, etc.). #### Adding New Tests 1. **Create test input files** in `testdata/input-files/source/`: + ```bash # Create a new test RST file cat > testdata/input-files/source/my-test.rst << 'EOF' @@ -1935,6 +2124,7 @@ since it contains non-Go files (`.cpp`, `.rst`, etc.). ``` 2. **Generate expected output**: + ```bash ./audit-cli extract code-examples testdata/input-files/source/my-test.rst \ -o testdata/expected-output @@ -1943,6 +2133,7 @@ since it contains non-Go files (`.cpp`, `.rst`, etc.). 3. **Verify the output** is correct before committing 4. **Add test case** in the appropriate `*_test.go` file: + ```go func TestMyNewFeature(t *testing.T) { testDataDir := filepath.Join("..", "..", "..", "testdata") @@ -2040,6 +2231,7 @@ func RunMyCommand(arg string, flagValue string) error { ``` **Why this pattern?** + - Separates command definition from logic - Makes logic testable without Cobra - Consistent across all commands @@ -2153,6 +2345,7 @@ RunE: func(cmd *cobra.Command, args []string) error { ``` This allows users to specify paths as: + - Absolute: `/full/path/to/file.rst` - Monorepo-relative: `manual/manual/source/file.rst` (if monorepo configured) - Current directory-relative: `./file.rst` @@ -2216,6 +2409,7 @@ The tool extracts code examples from the following reStructuredText directives: Extracts code from external files with support for partial extraction and dedenting. **Syntax:** + ```rst .. literalinclude:: /path/to/file.py :language: python @@ -2225,6 +2419,7 @@ Extracts code from external files with support for partial extraction and dedent ``` **Supported Options:** + - `:language:` - Specifies the programming language (normalized: `ts` → `typescript`, `c++` → `cpp`, `golang` → `go`) - `:start-after:` - Extract content after this tag (skips the entire line containing the tag) - `:end-before:` - Extract content before this tag (cuts before the entire line containing the tag) @@ -2233,6 +2428,7 @@ Extracts code from external files with support for partial extraction and dedent **Example:** Given `code-examples/example.py`: + ```python def main(): # start-example @@ -2242,6 +2438,7 @@ def main(): ``` And RST: + ```rst .. literalinclude:: /code-examples/example.py :language: python @@ -2251,6 +2448,7 @@ And RST: ``` Extracts: + ```python result = calculate(42) print(result) @@ -2261,6 +2459,7 @@ print(result) Inline code blocks with automatic dedenting based on the first line's indentation. **Syntax:** + ```rst .. code-block:: javascript :copyable: false @@ -2271,6 +2470,7 @@ Inline code blocks with automatic dedenting based on the first line's indentatio ``` **Supported Options:** + - Language argument - `.. code-block:: javascript` (optional, defaults to `txt`) - `:language:` - Alternative way to specify language - `:copyable:` - Parsed but not used for extraction @@ -2302,6 +2502,7 @@ def hello(): Input/output code blocks for interactive examples with nested sub-directives. **Syntax:** + ```rst .. io-code-block:: :copyable: true @@ -2322,6 +2523,7 @@ Input/output code blocks for interactive examples with nested sub-directives. ``` **Supported Options:** + - `:copyable:` - Parsed but not used for extraction - Nested `.. input::` sub-directive (required) - Can have filepath argument: `.. input:: /path/to/file.js` @@ -2331,6 +2533,7 @@ Input/output code blocks for interactive examples with nested sub-directives. - Or inline content with `:language:` option **File-based Content:** + ```rst .. io-code-block:: @@ -2344,6 +2547,7 @@ Input/output code blocks for interactive examples with nested sub-directives. **Output Files:** Generates two files: + - `{source}.io-code-block.{index}.input.{ext}` - The input code - `{source}.io-code-block.{index}.output.{ext}` - The output (if present) @@ -2356,6 +2560,7 @@ Example: `my-doc.io-code-block.1.input.js` and `my-doc.io-code-block.1.output.js Follows include directives to process entire documentation trees (when `-f` flag is used). **Syntax:** + ```rst .. include:: /includes/intro.rst ``` @@ -2365,22 +2570,29 @@ Follows include directives to process entire documentation trees (when `-f` flag The tool handles several MongoDB-specific include patterns: ##### Steps Files + Converts directory-based paths to filename-based paths: + - Input: `/includes/steps/run-mongodb-on-linux.rst` - Resolves to: `/includes/steps-run-mongodb-on-linux.yaml` ##### Extracts and Release Files + Resolves ref-based includes by searching YAML files: + - Input: `/includes/extracts/install-mongodb.rst` - Searches: `/includes/extracts-*.yaml` for `ref: install-mongodb` - Resolves to: The YAML file containing that ref ##### Template Variables + Resolves template variables from YAML replacement sections: + ```yaml replacement: release_specification_default: "/includes/release/install-windows-default.rst" ``` + - Input: `{{release_specification_default}}` - Resolves to: `/includes/release/install-windows-default.rst` @@ -2401,16 +2613,19 @@ Provides configuration management for the CLI tool: - **File path resolution** - Resolves file paths as absolute, monorepo-relative, or cwd-relative **Key Functions:** + - `LoadConfig()` - Loads configuration from file or environment - `GetMonorepoPath(cmdLineArg string)` - Resolves monorepo path with priority order - `ResolveFilePath(pathArg string)` - Resolves file paths with flexible resolution **Priority Order for Monorepo Path:** + 1. Command-line argument (highest priority) 2. Environment variable `AUDIT_CLI_MONOREPO_PATH` 3. Config file `.audit-cli.yaml` (lowest priority) **Priority Order for File Paths:** + 1. Absolute path (used as-is) 2. Relative to monorepo root (if monorepo configured and file exists there) 3. Relative to current directory (fallback) @@ -2428,6 +2643,7 @@ Provides centralized utilities for understanding MongoDB documentation project s - **Relative path resolution** - Resolves paths relative to the source directory **Key Functions:** + - `FindSourceDirectory(filePath string)` - Finds the source directory for a given file - `DetectProjectInfo(filePath string)` - Detects project structure information - `DiscoverAllVersions(productDir string)` - Discovers all available versions in a product @@ -2450,6 +2666,7 @@ Provides reusable utilities for parsing and processing RST files: - **Source directory detection** - Finds the documentation root **Key Functions:** + - `ParseFileWithIncludes(filePath string)` - Parses RST file with include expansion - `ParseDirectives(content string)` - Extracts directive information from RST content - `ParseProcedures(filePath string, expandIncludes bool)` - Parses procedures from RST file @@ -2458,6 +2675,7 @@ Provides reusable utilities for parsing and processing RST files: **Rstspec.toml Support:** The `FetchRstspec()` function retrieves the canonical composable definitions from the snooty-parser repository. This provides: + - Standard composable IDs (e.g., `interface`, `language`, `deployment-type`) - Composable titles and descriptions - Default values for each composable @@ -2472,7 +2690,7 @@ See the code in `internal/rst/` for implementation details. The tool normalizes language identifiers to standard file extensions: | Input | Normalized | Extension | -|----------------|--------------|-----------| +| -------------- | ------------ | --------- | | `bash` | `bash` | `.sh` | | `c` | `c` | `.c` | | `c++` | `cpp` | `.cpp` | @@ -2511,6 +2729,7 @@ The tool normalizes language identifiers to standard file extensions: | (unknown) | (unchanged) | `.txt` | **Notes:** + - Language identifiers are case-insensitive - Unknown languages are returned unchanged by `NormalizeLanguage()` but map to `.txt` extension - The normalization handles common aliases (e.g., `ts` → `typescript`, `golang` → `go`, `c++` → `cpp`) diff --git a/commands/report/testable-code/docs_set_scanner.go b/commands/report/testable-code/docs_set_scanner.go new file mode 100644 index 0000000..596c656 --- /dev/null +++ b/commands/report/testable-code/docs_set_scanner.go @@ -0,0 +1,207 @@ +// Package testablecode provides the testable-code subcommand for the report command. +// This file handles scanning documentation sets to find all pages. +package testablecode + +import ( + "fmt" + "os" + "path/filepath" + "sort" + "strings" + + resolveurl "github.com/grove-platform/audit-cli/commands/resolve/url" + "github.com/grove-platform/audit-cli/internal/projectinfo" +) + +// DocsSetError represents an error that occurred while processing a specific docs set. +type DocsSetError struct { + DocsSet string + Err error +} + +// ScanResult contains the results of scanning docs sets, including any errors encountered. +type ScanResult struct { + Entries []PageEntry + Errors []DocsSetError +} + +// HasErrors returns true if any errors occurred during scanning. +func (r *ScanResult) HasErrors() bool { + return len(r.Errors) > 0 +} + +// PrintErrorReport prints a summary of all errors encountered during scanning. +func (r *ScanResult) PrintErrorReport() { + if !r.HasErrors() { + return + } + + fmt.Fprintf(os.Stderr, "\n=== Docs Set Scanning Errors ===\n") + fmt.Fprintf(os.Stderr, "%d docs set(s) had errors:\n\n", len(r.Errors)) + for _, e := range r.Errors { + fmt.Fprintf(os.Stderr, " • %s: %v\n", e.DocsSet, e.Err) + } + fmt.Fprintf(os.Stderr, "================================\n\n") +} + +// ScanDocsSets scans one or more documentation sets (content directories) and returns +// PageEntry structs for all pages found, with URLs resolved using the resolve url logic. +// +// The function continues scanning even if individual docs sets fail, accumulating errors +// in the ScanResult. Check ScanResult.HasErrors() and use PrintErrorReport() to display +// any errors that occurred. +// +// Parameters: +// - monorepoPath: Path to the docs monorepo root +// - docsSets: List of content directory names to scan (e.g., "cloud-docs", "golang") +// - versionFilter: Version filter - "" for all versions, "current" for current only, or specific version like "v8.0" +// - baseURL: Base URL for production documentation (e.g., https://www.mongodb.com/docs) +// +// Returns: +// - *ScanResult: Contains entries from successful scans and any errors encountered +// - error: Only returned for fatal errors (e.g., no docs sets specified, content dir missing) +func ScanDocsSets(monorepoPath string, docsSets []string, versionFilter string, baseURL string) (*ScanResult, error) { + if len(docsSets) == 0 { + return nil, fmt.Errorf("at least one docs set must be specified") + } + + contentDir := filepath.Join(monorepoPath, "content") + if _, err := os.Stat(contentDir); os.IsNotExist(err) { + return nil, fmt.Errorf("content directory not found: %s", contentDir) + } + + result := &ScanResult{ + Entries: []PageEntry{}, + Errors: []DocsSetError{}, + } + rank := 1 + + for _, docsSet := range docsSets { + docsSetPath := filepath.Join(contentDir, docsSet) + if _, err := os.Stat(docsSetPath); os.IsNotExist(err) { + result.Errors = append(result.Errors, DocsSetError{ + DocsSet: docsSet, + Err: fmt.Errorf("couldn't find docs set at path %q", docsSetPath), + }) + continue + } + + entries, err := scanSingleDocsSet(monorepoPath, docsSet, versionFilter, baseURL, &rank) + if err != nil { + result.Errors = append(result.Errors, DocsSetError{ + DocsSet: docsSet, + Err: err, + }) + continue + } + result.Entries = append(result.Entries, entries...) + } + + return result, nil +} + +// scanSingleDocsSet scans a single docs set and returns PageEntry structs. +// The rank parameter is a pointer so it can be incremented across multiple doc sets. +// versionFilter can be: "" (all versions), "current" (only current), or a specific version like "v8.0" +func scanSingleDocsSet(monorepoPath, docsSet string, versionFilter string, baseURL string, rank *int) ([]PageEntry, error) { + contentDir := filepath.Join(monorepoPath, "content") + docsSetPath := filepath.Join(contentDir, docsSet) + + // Collect all .txt files + var txtFiles []string + + err := filepath.Walk(docsSetPath, func(path string, info os.FileInfo, err error) error { + if err != nil { + return err + } + + // Skip directories we shouldn't scan + if info.IsDir() { + dirName := info.Name() + // Skip code-examples directories + if dirName == "code-examples" { + return filepath.SkipDir + } + return nil + } + + // Only process .txt files + if filepath.Ext(path) != ".txt" { + return nil + } + + // Check if this is in a source directory + relPath, _ := filepath.Rel(docsSetPath, path) + if !strings.Contains(relPath, "source"+string(filepath.Separator)) { + return nil + } + + // Apply version filter if specified + if versionFilter != "" { + version := extractVersionFromFilePath(relPath) + if versionFilter == "current" { + // Filter to only current version + if version != "" && !projectinfo.IsCurrentVersion(version) { + return nil + } + } else { + // Filter to specific version + if version != versionFilter { + return nil + } + } + } + + txtFiles = append(txtFiles, path) + return nil + }) + + if err != nil { + return nil, err + } + + // Sort files for deterministic output + sort.Strings(txtFiles) + + // Convert each file to a PageEntry with resolved URL + var entries []PageEntry + for _, filePath := range txtFiles { + url, err := resolveurl.ResolveFileToURL(filePath, baseURL) + if err != nil { + // Log warning but continue with other files + fmt.Fprintf(os.Stderr, "Warning: Could not resolve URL for %s: %v\n", filePath, err) + continue + } + + entries = append(entries, PageEntry{ + Rank: *rank, + URL: url, + DocsSet: docsSet, + }) + *rank++ + } + + return entries, nil +} + +// extractVersionFromFilePath extracts the version from a relative file path. +// For versioned projects: version/source/file.txt -> "version" +// For non-versioned projects: source/file.txt -> "" +func extractVersionFromFilePath(relPath string) string { + parts := strings.Split(relPath, string(filepath.Separator)) + if len(parts) < 2 { + return "" + } + + // If first part is "source", this is non-versioned + if parts[0] == "source" { + return "" + } + + // Check if first part looks like a version + if projectinfo.IsVersionDirectory(parts[0]) { + return parts[0] + } + + return "" +} diff --git a/commands/report/testable-code/output.go b/commands/report/testable-code/output.go index e1b8753..1e45503 100644 --- a/commands/report/testable-code/output.go +++ b/commands/report/testable-code/output.go @@ -9,10 +9,11 @@ import ( ) // BuildPageReport builds a PageReport from a PageAnalysis. -func BuildPageReport(analysis *PageAnalysis) PageReport { +func BuildPageReport(analysis *PageAnalysis, docsSet string) PageReport { report := PageReport{ Rank: analysis.Rank, URL: analysis.URL, + DocsSet: docsSet, SourcePath: analysis.SourcePath, ContentDir: analysis.ContentDir, Error: analysis.Error, @@ -65,6 +66,10 @@ func BuildPageReport(analysis *PageAnalysis) PageReport { } } + // Compute derived fields + report.TotalUntested = report.TotalExamples - report.TotalTested + report.NeedsToBeTested = report.TotalTestable > report.TotalTested + return report } @@ -166,7 +171,7 @@ func outputCSVSummary(w io.Writer, reports []PageReport) error { for _, report := range reports { // Escape fields that might contain commas or quotes url := escapeCSV(report.URL) - sourcePath := escapeCSV(report.SourcePath) + sourcePath := escapeCSV(makeSourcePathRelative(report.SourcePath)) contentDir := escapeCSV(report.ContentDir) errorMsg := escapeCSV(report.Error) @@ -189,7 +194,7 @@ func outputCSVDetails(w io.Writer, reports []PageReport) error { for _, report := range reports { // Escape fields that might contain commas or quotes url := escapeCSV(report.URL) - sourcePath := escapeCSV(report.SourcePath) + sourcePath := escapeCSV(makeSourcePathRelative(report.SourcePath)) contentDir := escapeCSV(report.ContentDir) errorMsg := escapeCSV(report.Error) @@ -238,6 +243,24 @@ func outputCSVDetails(w io.Writer, reports []PageReport) error { return nil } +// makeSourcePathRelative converts an absolute source path to be relative to the content directory. +// For example: /path/to/monorepo/content/cloud-docs/source/page.txt -> cloud-docs/source/page.txt +func makeSourcePathRelative(sourcePath string) string { + if sourcePath == "" { + return "" + } + + // Find the content directory marker and return everything after it + const contentMarker = "/content/" + idx := strings.Index(sourcePath, contentMarker) + if idx >= 0 { + return sourcePath[idx+len(contentMarker):] + } + + // Fallback: return the original path if content marker not found + return sourcePath +} + // escapeCSV escapes a string for CSV output. // If the string contains commas, quotes, or newlines, it wraps in quotes and escapes internal quotes. func escapeCSV(s string) string { diff --git a/commands/report/testable-code/testable_code.go b/commands/report/testable-code/testable_code.go index 594a503..a394ce6 100644 --- a/commands/report/testable-code/testable_code.go +++ b/commands/report/testable-code/testable_code.go @@ -52,21 +52,35 @@ func NewTestableCodeCommand() *cobra.Command { var outputFile string var filters []string var listDrivers bool + var forDocsSets []string + var currentOnly bool + var versionFilter string + var baseURL string cmd := &cobra.Command{ - Use: "testable-code [monorepo-path]", - Short: "Analyze testable code examples on pages from analytics data", - Long: `Analyze testable code examples on documentation pages based on analytics CSV data. + Use: "testable-code [csv-file] [monorepo-path]", + Short: "Analyze testable code examples on pages from analytics data or docs sets", + Long: `Analyze testable code examples on documentation pages based on analytics CSV data +or by scanning specified documentation sets directly. -Takes a CSV file with page rankings and URLs, resolves each URL to its source file -in the monorepo, collects code examples (literalinclude, code-block, io-code-block), -and generates a report with: +INPUT MODES: + +1. CSV Mode (default): Takes a CSV file with page rankings and URLs. + Example: audit-cli report testable-code analytics.csv /path/to/monorepo + +2. Docs Set Mode: Scan all pages in specified content directories. + Example: audit-cli report testable-code --for-docs-set cloud-docs,golang,node + +For each page, the command resolves the URL to its source file in the monorepo, +collects code examples (literalinclude, code-block, io-code-block), and generates +a report with: - Total code examples per page - Breakdown by product/language - Input vs output counts (for io-code-block) - - Tested vs untested counts + - Tested vs untested counts (Untested = Total - Tested) - Testable count (examples that could be tested based on product) - Maybe testable count (javascript/shell examples without clear context) + - NeedsToBeTested flag (Yes if Testable > Tested) The CSV file should have columns for rank and URL. The first row is treated as a header. @@ -102,9 +116,31 @@ Output formats: return runListDrivers() } - // Require CSV file if not listing drivers + // Determine input mode: docs set scan or CSV + if len(forDocsSets) > 0 { + // Docs set mode - scan specified content directories + var cmdLineArg string + if len(args) > 0 { + cmdLineArg = args[0] + } + monorepoPath, err := config.GetMonorepoPath(cmdLineArg) + if err != nil { + return err + } + + // Determine effective version filter + // --version takes precedence over --current-only + effectiveVersionFilter := versionFilter + if effectiveVersionFilter == "" && currentOnly { + effectiveVersionFilter = "current" + } + + return runTestableCodeForDocsSets(forDocsSets, monorepoPath, baseURL, effectiveVersionFilter, outputFormat, showDetails, outputFile, filters) + } + + // CSV mode - require CSV file if len(args) < 1 { - return fmt.Errorf("requires at least 1 arg(s), only received 0") + return fmt.Errorf("requires a CSV file or use --for-docs-set to scan docs sets directly") } csvPath := args[0] @@ -128,6 +164,10 @@ Output formats: cmd.Flags().StringVarP(&outputFile, "output", "o", "", "Output file path (default: stdout)") cmd.Flags().StringSliceVar(&filters, "filter", nil, "Filter pages by product area (search, vector-search, drivers, driver:, mongosh)") cmd.Flags().BoolVar(&listDrivers, "list-drivers", false, "List all drivers from the Snooty Data API") + cmd.Flags().StringSliceVar(&forDocsSets, "for-docs-set", nil, "Scan all pages in specified docs sets (content directory names)") + cmd.Flags().BoolVar(¤tOnly, "current-only", true, "When scanning docs sets, only include current version pages (default: true)") + cmd.Flags().StringVar(&versionFilter, "version", "", "Only include pages from specified version (e.g., v8.0, current, upcoming). Overrides --current-only") + cmd.Flags().StringVar(&baseURL, "base-url", "https://www.mongodb.com/docs", "Base URL for resolving page URLs") return cmd } @@ -148,8 +188,8 @@ func runListDrivers() error { // Build a list of driver info and sort by project name (the filter value) type driverInfo struct { - projectName string - slug string + projectName string + slug string hasTestInfra bool } drivers := make([]driverInfo, 0, len(driverSlugs)) @@ -187,6 +227,107 @@ func runListDrivers() error { return nil } +// runTestableCodeForDocsSets runs the testable-code analysis by scanning specified docs sets. +// versionFilter can be: "" (all versions), "current" (only current), or a specific version like "v8.0" +func runTestableCodeForDocsSets(docsSets []string, monorepoPath, baseURL string, versionFilter string, outputFormat string, showDetails bool, outputFile string, filters []string) error { + fmt.Fprintf(os.Stderr, "Scanning docs sets: %v\n", docsSets) + if versionFilter != "" { + fmt.Fprintf(os.Stderr, "Version filter: %s\n", versionFilter) + } + + // Scan docs sets to get page entries + scanResult, err := ScanDocsSets(monorepoPath, docsSets, versionFilter, baseURL) + if err != nil { + return fmt.Errorf("failed to scan docs sets: %w", err) + } + + // Print any errors that occurred during scanning + if scanResult.HasErrors() { + scanResult.PrintErrorReport() + } + + entries := scanResult.Entries + fmt.Fprintf(os.Stderr, "Found %d pages in docs sets\n", len(entries)) + + if len(entries) == 0 { + return fmt.Errorf("no pages found in specified docs sets") + } + + // Get URL mapping - needed for driver filters and page analysis + urlMapping, err := config.GetURLMapping(monorepoPath) + if err != nil { + return fmt.Errorf("failed to get URL mapping: %w", err) + } + + // Validate filters before applying + if err := validateFilters(filters); err != nil { + return err + } + + // Apply URL filters if specified + if len(filters) > 0 { + originalCount := len(entries) + entries = filterEntries(entries, filters, urlMapping) + fmt.Fprintf(os.Stderr, "Filtered to %d pages matching filter(s): %v\n", len(entries), filters) + if len(entries) == 0 { + fmt.Fprintf(os.Stderr, "Warning: No pages matched the specified filter(s). Original count: %d\n", originalCount) + } + } + + // Load product mappings from rstspec.toml + fmt.Fprintf(os.Stderr, "Loading product mappings from rstspec.toml...\n") + mappings, err := LoadProductMappings() + if err != nil { + return fmt.Errorf("failed to load product mappings: %w", err) + } + + // Analyze each page + var reports []PageReport + for i, entry := range entries { + fmt.Fprintf(os.Stderr, "Analyzing page %d/%d: %s\n", i+1, len(entries), entry.URL) + + analysis, err := AnalyzePage(entry, urlMapping, mappings) + if err != nil { + // Log error but continue with other pages + fmt.Fprintf(os.Stderr, " Warning: %v\n", err) + reports = append(reports, PageReport{ + Rank: entry.Rank, + URL: entry.URL, + DocsSet: entry.DocsSet, + Error: err.Error(), + }) + continue + } + + report := BuildPageReport(analysis, entry.DocsSet) + reports = append(reports, report) + } + + // Determine output writer + var writer *os.File + if outputFile != "" { + f, err := os.Create(outputFile) + if err != nil { + return fmt.Errorf("failed to create output file: %w", err) + } + defer f.Close() + writer = f + fmt.Fprintf(os.Stderr, "Writing output to %s\n", outputFile) + } else { + writer = os.Stdout + } + + // Output results + switch outputFormat { + case "json": + return OutputJSON(writer, reports) + case "csv": + return OutputCSV(writer, reports, showDetails) + default: + return OutputText(writer, reports) + } +} + // runTestableCode is the main entry point for the testable-code command. func runTestableCode(csvPath, monorepoPath, outputFormat string, showDetails bool, outputFile string, filters []string) error { // Parse CSV file @@ -235,14 +376,15 @@ func runTestableCode(csvPath, monorepoPath, outputFormat string, showDetails boo // Log error but continue with other pages fmt.Fprintf(os.Stderr, " Warning: %v\n", err) reports = append(reports, PageReport{ - Rank: entry.Rank, - URL: entry.URL, - Error: err.Error(), + Rank: entry.Rank, + URL: entry.URL, + DocsSet: entry.DocsSet, + Error: err.Error(), }) continue } - report := BuildPageReport(analysis) + report := BuildPageReport(analysis, entry.DocsSet) reports = append(reports, report) } diff --git a/commands/report/testable-code/testable_code_test.go b/commands/report/testable-code/testable_code_test.go index a4b0b99..3b166a3 100644 --- a/commands/report/testable-code/testable_code_test.go +++ b/commands/report/testable-code/testable_code_test.go @@ -14,14 +14,14 @@ import ( func createMockURLMapping() *config.URLMapping { return &config.URLMapping{ URLSlugToProject: map[string]string{ - "drivers/go": "golang", - "drivers/node": "node", - "drivers/csharp": "csharp", + "drivers/go": "golang", + "drivers/node": "node", + "drivers/csharp": "csharp", "languages/python/pymongo-driver": "pymongo", - "drivers/java/sync": "java", - "mongodb-shell": "mongodb-shell", - "mongoid": "mongoid", - "ruby-driver": "ruby-driver", + "drivers/java/sync": "java", + "mongodb-shell": "mongodb-shell", + "mongoid": "mongoid", + "ruby-driver": "ruby-driver", }, DriverSlugs: []string{ "drivers/csharp", @@ -446,8 +446,8 @@ func TestIsMaybeTestable(t *testing.T) { // TestParseComposableOptions tests the parseComposableOptions function. func TestParseComposableOptions(t *testing.T) { testCases := []struct { - options string - expectedLanguage string + options string + expectedLanguage string expectedInterface string }{ {"language=python; interface=driver", "python", "driver"}, @@ -490,7 +490,7 @@ func TestBuildPageReport(t *testing.T) { }, } - report := BuildPageReport(analysis) + report := BuildPageReport(analysis, "") if report.Rank != 1 { t.Errorf("Expected Rank 1, got %d", report.Rank) @@ -507,6 +507,13 @@ func TestBuildPageReport(t *testing.T) { if report.TotalTested != 1 { t.Errorf("Expected TotalTested 1, got %d", report.TotalTested) } + // Verify new computed fields + if report.TotalUntested != 4 { + t.Errorf("Expected TotalUntested 4 (5-1), got %d", report.TotalUntested) + } + if !report.NeedsToBeTested { + t.Errorf("Expected NeedsToBeTested true (4 testable > 1 tested), got false") + } if report.TotalTestable != 4 { t.Errorf("Expected TotalTestable 4, got %d", report.TotalTestable) } @@ -600,9 +607,9 @@ func TestDetermineProduct(t *testing.T) { "go": "Go", }, ComposableInterfaceToProduct: map[string]string{ - "mongosh": "MongoDB Shell", - "driver": "Driver", - "compass": "Compass", + "mongosh": "MongoDB Shell", + "driver": "Driver", + "compass": "Compass", }, } @@ -1171,4 +1178,3 @@ func TestAnalyzePage(t *testing.T) { } }) } - diff --git a/commands/report/testable-code/types.go b/commands/report/testable-code/types.go index 801e2d2..133ebb8 100644 --- a/commands/report/testable-code/types.go +++ b/commands/report/testable-code/types.go @@ -9,10 +9,11 @@ import ( "github.com/grove-platform/audit-cli/internal/snooty" ) -// PageEntry represents a single page from the analytics CSV. +// PageEntry represents a single page from the analytics CSV or docs set scan. type PageEntry struct { - Rank int - URL string + Rank int + URL string + DocsSet string // Content directory name (populated when scanning docs sets) } // CodeExample represents a single code example found in a page. @@ -66,6 +67,7 @@ type ProductStats struct { type PageReport struct { Rank int URL string + DocsSet string // Content directory name (populated when scanning docs sets) SourcePath string ContentDir string Error string @@ -73,8 +75,10 @@ type PageReport struct { TotalInput int TotalOutput int TotalTested int + TotalUntested int // TotalExamples - TotalTested TotalTestable int TotalMaybeTestable int + NeedsToBeTested bool // True if TotalTestable > TotalTested ByProduct map[string]*ProductStats } diff --git a/commands/resolve/resolve.go b/commands/resolve/resolve.go new file mode 100644 index 0000000..4e01d59 --- /dev/null +++ b/commands/resolve/resolve.go @@ -0,0 +1,37 @@ +// Package resolve provides the parent command for resolving documentation paths and URLs. +// +// This package serves as the parent command for resolution operations. +// Currently supports: +// - url: Resolve source .txt files to their production URLs +// +// These commands help writers and tools map between source files and live documentation. +package resolve + +import ( + "github.com/grove-platform/audit-cli/commands/resolve/url" + "github.com/spf13/cobra" +) + +// NewResolveCommand creates the resolve parent command. +// +// This command serves as a parent for various resolution operations. +// It doesn't perform any operations itself but provides a namespace for subcommands. +func NewResolveCommand() *cobra.Command { + cmd := &cobra.Command{ + Use: "resolve", + Short: "Resolve documentation paths and URLs", + Long: `Resolve mappings between source files and production URLs. + +Helps writers and tools understand the relationship between source .txt files +in the documentation monorepo and their corresponding live URLs. + +Currently supports: + - url: Resolve source .txt files to their production URLs`, + } + + // Add subcommands + cmd.AddCommand(url.NewURLCommand()) + + return cmd +} + diff --git a/commands/resolve/url/resolver.go b/commands/resolve/url/resolver.go new file mode 100644 index 0000000..f6a84f2 --- /dev/null +++ b/commands/resolve/url/resolver.go @@ -0,0 +1,267 @@ +// Package url provides URL resolution functionality. +package url + +import ( + "fmt" + "path/filepath" + "strings" + + "github.com/grove-platform/audit-cli/internal/config" +) + +// ResolveFileToURL resolves a source file path to a production URL. +// +// The function identifies the project and version from the file path and +// constructs the appropriate production URL. +// +// Parameters: +// - filePath: Absolute path to the source .txt file +// - baseURL: Base production URL (e.g., https://www.mongodb.com/docs) +// +// Returns: +// - string: The production URL for the file +// - error: Error if resolution fails +func ResolveFileToURL(filePath string, baseURL string) (string, error) { + // Validate file extension + if !strings.HasSuffix(filePath, ".txt") { + return "", fmt.Errorf("file must have .txt extension: %s", filePath) + } + + // Parse the file path to extract project, version, and page path + info, err := parseFilePath(filePath) + if err != nil { + return "", err + } + + // Get the URL slug for this project + urlSlug, err := getURLSlugForProject(info.projectName) + if err != nil { + return "", err + } + + // Build the URL + return buildURL(baseURL, urlSlug, info.version, info.pagePath), nil +} + +// filePathInfo holds parsed information from a source file path. +type filePathInfo struct { + projectName string // Snooty project name (from snooty.toml) + version string // Version slug (e.g., "manual", "v8.0", "current") + pagePath string // Page path relative to source directory + contentDir string // Content directory name +} + +// parseFilePath extracts project, version, and page path from a source file path. +// +// Expected path patterns: +// - content/{project}/source/{page}.txt (non-versioned) +// - content/{project}/{version}/source/{page}.txt (versioned) +func parseFilePath(filePath string) (*filePathInfo, error) { + // Normalize path separators + filePath = filepath.ToSlash(filePath) + + // Find the content directory in the path + contentIdx := strings.Index(filePath, "/content/") + if contentIdx == -1 { + return nil, fmt.Errorf("file path must contain /content/ directory: %s", filePath) + } + + // Get the path starting from content/ + relativePath := filePath[contentIdx+9:] // Skip "/content/" + + // Split into parts + parts := strings.Split(relativePath, "/") + if len(parts) < 3 { + return nil, fmt.Errorf("invalid path structure: %s", filePath) + } + + info := &filePathInfo{ + contentDir: parts[0], + } + + // Find "source" directory to determine structure + sourceIdx := -1 + for i, part := range parts { + if part == "source" { + sourceIdx = i + break + } + } + + if sourceIdx == -1 { + return nil, fmt.Errorf("file path must contain /source/ directory: %s", filePath) + } + + // Determine if versioned or non-versioned + switch sourceIdx { + case 1: + // Non-versioned: content/{project}/source/{page}.txt + info.version = "" + case 2: + // Versioned: content/{project}/{version}/source/{page}.txt + info.version = parts[1] + default: + return nil, fmt.Errorf("unexpected path structure: %s", filePath) + } + + // Get the page path (everything after source/) + pagePathParts := parts[sourceIdx+1:] + pagePath := strings.Join(pagePathParts, "/") + + // Remove .txt extension + pagePath = strings.TrimSuffix(pagePath, ".txt") + + // Handle index.txt -> empty page path (will render as trailing slash) + if pagePath == "index" { + pagePath = "" + } + + info.pagePath = pagePath + + // Get project name from snooty.toml + monorepoPath := extractMonorepoPath(filePath, contentIdx) + projectName, err := getProjectName(monorepoPath, info.contentDir, info.version) + if err != nil { + return nil, err + } + info.projectName = projectName + + return info, nil +} + +// extractMonorepoPath extracts the monorepo root path from a file path. +func extractMonorepoPath(filePath string, contentIdx int) string { + return filePath[:contentIdx] +} + +// getProjectName reads the snooty.toml to get the project name. +func getProjectName(monorepoPath, contentDir, _ string) (string, error) { + urlMapping, err := config.GetURLMapping(monorepoPath) + if err != nil { + return "", fmt.Errorf("failed to get URL mapping: %w", err) + } + + // Reverse lookup: find project name for this content directory + for project, dir := range urlMapping.ProjectToContentDir { + if dir == contentDir { + return project, nil + } + } + + return "", fmt.Errorf("could not find project name for content directory: %s", contentDir) +} + +// projectToURLSlug maps Snooty project names (contentSite) to their URL base slugs. +// This mapping is derived from the table-of-contents data in the docs-mongodb-internal +// monorepo, which is the source of truth for production URLs. +// +// The URL slug is the path component after /docs/ in the production URL. +// For example, "golang" maps to "drivers/go", so the URL would be: +// https://www.mongodb.com/docs/drivers/go/current/ +var projectToURLSlug = map[string]string{ + // Atlas products + "atlas-architecture": "atlas/architecture", + "atlas-cli": "atlas/cli", + "atlas-operator": "atlas/operator", + "cloud-docs": "atlas", + "cloudgov": "atlas/government", + + // Atlas App Services (deprecated but still in monorepo) + "atlas-app-services": "atlas/app-services", + "realm": "atlas/device-sdks", + + // MongoDB Server + "docs": "", // MongoDB Manual uses empty slug (e.g., /docs/manual/) + + // Drivers and Languages + "c": "languages/c/c-driver", + "cpp-driver": "languages/cpp/cpp-driver/read", + "csharp": "drivers/csharp", + "django": "languages/python/django-mongodb", + "golang": "drivers/go", + "hibernate": "languages/java/mongodb-hibernate", + "java": "drivers/java/sync", + "java-rs": "languages/java/reactive-streams-driver", + "kotlin": "drivers/kotlin/coroutine", + "kotlin-sync": "languages/kotlin/kotlin-sync-driver", + "laravel": "drivers/php/laravel-mongodb", + "node": "drivers/node", + "php-library": "php-library", + "pymongo": "languages/python/pymongo-driver", + "pymongo-arrow": "languages/python/pymongo-arrow-driver", + "ruby-driver": "ruby-driver", + "rust": "drivers/rust", + "scala": "languages/scala/scala-driver", + + // Tools and utilities + "bi-connector": "bi-connector", + "charts": "charts", + "cloud-manager": "cloud-manager", + "compass": "compass", + "database-tools": "database-tools", + "docs-k8s-operator": "kubernetes-operator", + "docs-relational-migrator": "relational-migrator", + "drivers": "drivers", + "entity-framework": "entity-framework", + "intellij": "mongodb-intellij", + "kafka-connector": "kafka-connector", + "landing": "management", + "mck": "kubernetes", + "mcp-server": "mcp-server", + "meta": "meta", + "mongocli": "mongocli", + "mongodb-shell": "mongodb-shell", + "mongodb-vscode": "mongodb-vscode", + "mongoid": "mongoid", + "mongosync": "mongosync", + "ops-manager": "ops-manager", + "spark-connector": "spark-connector", + "sql-interface": "sql-interface", + "visual-studio-extension": "mongodb-analyzer", + "voyageai": "voyageai", +} + +// getURLSlugForProject returns the base URL slug for a given project name. +// The mapping is derived from the table-of-contents data which is the source +// of truth for production URLs. +func getURLSlugForProject(projectName string) (string, error) { + if slug, ok := projectToURLSlug[projectName]; ok { + return slug, nil + } + + return "", fmt.Errorf("could not find URL slug for project: %s", projectName) +} + +// buildURL constructs the production URL from components. +// +// Parameters: +// - baseURL: Base URL (e.g., https://www.mongodb.com/docs) +// - urlSlug: Project URL slug (e.g., "atlas", "drivers/go") or empty for MongoDB Manual +// - version: Version slug (e.g., "current", "v8.0", "manual") or empty for non-versioned +// - pagePath: Page path (e.g., "tutorial/install") or empty for index +// +// Returns the full production URL with trailing slash. +func buildURL(baseURL, urlSlug, version, pagePath string) string { + // Ensure base URL doesn't have trailing slash + baseURL = strings.TrimSuffix(baseURL, "/") + + // Build path components (only include non-empty parts) + var pathParts []string + + if urlSlug != "" { + pathParts = append(pathParts, urlSlug) + } + + // Add version if present (for versioned projects) + if version != "" { + pathParts = append(pathParts, version) + } + + // Add page path if present + if pagePath != "" { + pathParts = append(pathParts, pagePath) + } + + // Join with slashes and add trailing slash + return baseURL + "/" + strings.Join(pathParts, "/") + "/" +} diff --git a/commands/resolve/url/resolver_test.go b/commands/resolve/url/resolver_test.go new file mode 100644 index 0000000..b9f0e5d --- /dev/null +++ b/commands/resolve/url/resolver_test.go @@ -0,0 +1,333 @@ +package url + +import ( + "testing" +) + +// TestBuildURL tests the URL construction logic with various combinations +// of URL slug, version, and page path. +func TestBuildURL(t *testing.T) { + tests := []struct { + name string + baseURL string + urlSlug string + version string + pagePath string + expected string + }{ + { + name: "non-versioned project with page", + baseURL: "https://www.mongodb.com/docs", + urlSlug: "atlas", + version: "", + pagePath: "manage-clusters", + expected: "https://www.mongodb.com/docs/atlas/manage-clusters/", + }, + { + name: "non-versioned project index", + baseURL: "https://www.mongodb.com/docs", + urlSlug: "compass", + version: "", + pagePath: "", + expected: "https://www.mongodb.com/docs/compass/", + }, + { + name: "versioned project with page", + baseURL: "https://www.mongodb.com/docs", + urlSlug: "drivers/go", + version: "current", + pagePath: "atlas-search", + expected: "https://www.mongodb.com/docs/drivers/go/current/atlas-search/", + }, + { + name: "versioned project index", + baseURL: "https://www.mongodb.com/docs", + urlSlug: "drivers/node", + version: "current", + pagePath: "", + expected: "https://www.mongodb.com/docs/drivers/node/current/", + }, + { + name: "MongoDB Manual (empty slug) with version", + baseURL: "https://www.mongodb.com/docs", + urlSlug: "", + version: "manual", + pagePath: "indexes", + expected: "https://www.mongodb.com/docs/manual/indexes/", + }, + { + name: "MongoDB Manual index", + baseURL: "https://www.mongodb.com/docs", + urlSlug: "", + version: "manual", + pagePath: "", + expected: "https://www.mongodb.com/docs/manual/", + }, + { + name: "MongoDB Manual specific version", + baseURL: "https://www.mongodb.com/docs", + urlSlug: "", + version: "v8.0", + pagePath: "tutorial/install-mongodb-on-os-x", + expected: "https://www.mongodb.com/docs/v8.0/tutorial/install-mongodb-on-os-x/", + }, + { + name: "nested URL slug", + baseURL: "https://www.mongodb.com/docs", + urlSlug: "atlas/app-services", + version: "", + pagePath: "logs", + expected: "https://www.mongodb.com/docs/atlas/app-services/logs/", + }, + { + name: "base URL with trailing slash", + baseURL: "https://www.mongodb.com/docs/", + urlSlug: "atlas", + version: "", + pagePath: "clusters", + expected: "https://www.mongodb.com/docs/atlas/clusters/", + }, + { + name: "custom base URL (staging)", + baseURL: "https://docs-staging.mongodb.com", + urlSlug: "atlas", + version: "", + pagePath: "index", + expected: "https://docs-staging.mongodb.com/atlas/index/", + }, + { + name: "deeply nested page path", + baseURL: "https://www.mongodb.com/docs", + urlSlug: "drivers/java/sync", + version: "current", + pagePath: "fundamentals/connection/connection-options", + expected: "https://www.mongodb.com/docs/drivers/java/sync/current/fundamentals/connection/connection-options/", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result := buildURL(tt.baseURL, tt.urlSlug, tt.version, tt.pagePath) + if result != tt.expected { + t.Errorf("buildURL() = %q, want %q", result, tt.expected) + } + }) + } +} + +// TestGetURLSlugForProject tests the project name to URL slug lookup. +func TestGetURLSlugForProject(t *testing.T) { + tests := []struct { + name string + projectName string + expected string + expectError bool + }{ + { + name: "Atlas project", + projectName: "cloud-docs", + expected: "atlas", + expectError: false, + }, + { + name: "Go driver", + projectName: "golang", + expected: "drivers/go", + expectError: false, + }, + { + name: "MongoDB Manual (empty slug)", + projectName: "docs", + expected: "", + expectError: false, + }, + { + name: "Compass", + projectName: "compass", + expected: "compass", + expectError: false, + }, + { + name: "App Services", + projectName: "atlas-app-services", + expected: "atlas/app-services", + expectError: false, + }, + { + name: "unknown project", + projectName: "nonexistent-project", + expected: "", + expectError: true, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result, err := getURLSlugForProject(tt.projectName) + + if tt.expectError { + if err == nil { + t.Errorf("getURLSlugForProject() expected error, got nil") + } + } else { + if err != nil { + t.Errorf("getURLSlugForProject() unexpected error: %v", err) + } + if result != tt.expected { + t.Errorf("getURLSlugForProject() = %q, want %q", result, tt.expected) + } + } + }) + } +} + +// TestProjectToURLSlugCoverage verifies important projects are mapped. +func TestProjectToURLSlugCoverage(t *testing.T) { + // These are critical projects that must always have mappings + criticalProjects := []string{ + // Atlas products + "cloud-docs", + "atlas-cli", + "atlas-operator", + "atlas-app-services", + // Server + "docs", + // Key drivers + "golang", + "node", + "java", + "csharp", + "pymongo", + "rust", + // Tools + "compass", + "mongodb-shell", + "database-tools", + } + + for _, project := range criticalProjects { + t.Run(project, func(t *testing.T) { + _, exists := projectToURLSlug[project] + if !exists { + t.Errorf("critical project %q is missing from projectToURLSlug map", project) + } + }) + } +} + +// TestExtractMonorepoPath tests extraction of the monorepo path. +func TestExtractMonorepoPath(t *testing.T) { + tests := []struct { + name string + filePath string + contentIdx int + expected string + }{ + { + name: "typical path", + filePath: "/Users/user/docs-mongodb-internal/content/atlas/source/index.txt", + contentIdx: 33, // Position where /content/ starts + expected: "/Users/user/docs-mongodb-internal", + }, + { + name: "root path", + filePath: "/content/atlas/source/index.txt", + contentIdx: 0, + expected: "", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result := extractMonorepoPath(tt.filePath, tt.contentIdx) + if result != tt.expected { + t.Errorf("extractMonorepoPath() = %q, want %q", result, tt.expected) + } + }) + } +} + +// TestBuildURLEdgeCases tests edge cases in URL construction. +func TestBuildURLEdgeCases(t *testing.T) { + tests := []struct { + name string + baseURL string + urlSlug string + version string + pagePath string + expected string + }{ + { + name: "all empty except base produces double slash", + baseURL: "https://www.mongodb.com/docs", + urlSlug: "", + version: "", + pagePath: "", + // Note: This edge case produces a double slash, but it never occurs in practice + // because there's always either a slug or version for any real project + expected: "https://www.mongodb.com/docs//", + }, + { + name: "only page path", + baseURL: "https://www.mongodb.com/docs", + urlSlug: "", + version: "", + pagePath: "some-page", + expected: "https://www.mongodb.com/docs/some-page/", + }, + { + name: "multiple trailing slashes in base - trims only one", + baseURL: "https://www.mongodb.com/docs///", + urlSlug: "atlas", + version: "", + pagePath: "", + // TrimSuffix only removes one trailing slash + expected: "https://www.mongodb.com/docs///atlas/", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result := buildURL(tt.baseURL, tt.urlSlug, tt.version, tt.pagePath) + if result != tt.expected { + t.Errorf("buildURL() = %q, want %q", result, tt.expected) + } + }) + } +} + +// TestURLSlugNeverDoubleSlash verifies that no URL slug produces double slashes. +func TestURLSlugNeverDoubleSlash(t *testing.T) { + baseURL := "https://www.mongodb.com/docs" + + for project, slug := range projectToURLSlug { + t.Run(project, func(t *testing.T) { + // Test with version + url := buildURL(baseURL, slug, "current", "page") + if containsDoubleSlash(url) { + t.Errorf("URL for project %q contains double slash: %s", project, url) + } + + // Test without version + url = buildURL(baseURL, slug, "", "page") + if containsDoubleSlash(url) { + t.Errorf("URL for project %q (no version) contains double slash: %s", project, url) + } + }) + } +} + +// containsDoubleSlash checks if the URL path contains // +func containsDoubleSlash(url string) bool { + // Skip the protocol part (https://) + if len(url) < 8 { + return false + } + pathPart := url[8:] // Skip "https://" + for i := 0; i < len(pathPart)-1; i++ { + if pathPart[i] == '/' && pathPart[i+1] == '/' { + return true + } + } + return false +} diff --git a/commands/resolve/url/url.go b/commands/resolve/url/url.go new file mode 100644 index 0000000..313c91a --- /dev/null +++ b/commands/resolve/url/url.go @@ -0,0 +1,84 @@ +// Package url implements the url subcommand for resolving source files to production URLs. +package url + +import ( + "fmt" + + "github.com/grove-platform/audit-cli/internal/config" + "github.com/spf13/cobra" +) + +// NewURLCommand creates the url subcommand. +// +// This command resolves source .txt files to their production URLs. +// +// Usage: +// +// resolve url +// resolve url content/manual/manual/source/tutorial/install.txt +// +// Flags: +// - --base-url: Override the base URL for resolution +func NewURLCommand() *cobra.Command { + var baseURL string + + cmd := &cobra.Command{ + Use: "url ", + Short: "Resolve a source .txt file to its production URL", + Long: `Resolve a source .txt file from the documentation monorepo to its production URL. + +This command takes a path to a .txt source file and outputs the corresponding +production URL on mongodb.com. + +How It Works: + 1. Identifies the project from the file path (e.g., content/manual/...) + 2. Extracts the page path relative to the source directory + 3. Constructs the production URL based on the project's URL slug + +File Path Resolution: + Paths can be specified as: + 1. Absolute path: /full/path/to/file.txt + 2. Relative to monorepo root (if configured): content/manual/manual/source/index.txt + 3. Relative to current directory: ./source/index.txt + +Examples: + # Resolve a file to its production URL + resolve url content/manual/manual/source/tutorial/install.txt + # Output: https://www.mongodb.com/docs/manual/tutorial/install/ + + # Resolve an index file + resolve url content/atlas/source/index.txt + # Output: https://www.mongodb.com/docs/atlas/ + + # Override the base URL + resolve url content/manual/manual/source/reference/method.txt --base-url https://docs-staging.mongodb.com`, + Args: cobra.ExactArgs(1), + RunE: func(cmd *cobra.Command, args []string) error { + // Resolve file path (supports absolute, monorepo-relative, or cwd-relative) + filePath, err := config.ResolveFilePath(args[0]) + if err != nil { + return err + } + return runResolveURL(filePath, baseURL) + }, + } + + cmd.Flags().StringVar(&baseURL, "base-url", "https://www.mongodb.com/docs", "Base URL for production documentation") + + return cmd +} + +// runResolveURL executes the URL resolution operation. +func runResolveURL(filePath string, baseURL string) error { + // Resolve the file to a URL + productionURL, err := ResolveFileToURL(filePath, baseURL) + if err != nil { + return fmt.Errorf("failed to resolve URL: %w", err) + } + + // Print the result + fmt.Println(productionURL) + + return nil +} + diff --git a/main.go b/main.go index d8cbd03..f374df0 100644 --- a/main.go +++ b/main.go @@ -10,6 +10,7 @@ // - analyze: Analyze RST file structures and relationships // - compare: Compare files across different versions // - count: Count documentation content (code examples, pages) +// - resolve: Resolve paths and URLs between source files and production package main import ( @@ -20,6 +21,7 @@ import ( "github.com/grove-platform/audit-cli/commands/count" "github.com/grove-platform/audit-cli/commands/extract" "github.com/grove-platform/audit-cli/commands/report" + "github.com/grove-platform/audit-cli/commands/resolve" "github.com/grove-platform/audit-cli/commands/search" "github.com/spf13/cobra" ) @@ -55,6 +57,7 @@ Designed for maintenance tasks, scoping work, and reporting to stakeholders.`, rootCmd.AddCommand(compare.NewCompareCommand()) rootCmd.AddCommand(count.NewCountCommand()) rootCmd.AddCommand(report.NewReportCommand()) + rootCmd.AddCommand(resolve.NewResolveCommand()) err := rootCmd.Execute() if err != nil {