diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
new file mode 100644
index 0000000..af3d6f2
--- /dev/null
+++ b/.github/workflows/ci.yml
@@ -0,0 +1,47 @@
+name: CI
+
+on:
+  push:
+    branches: [main]
+  pull_request:
+    branches: [main]
+
+jobs:
+  lint:
+    name: Lint
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Set up Python 3.11
+        uses: actions/setup-python@v5
+        with:
+          python-version: "3.11"
+          cache: pip
+
+      - name: Install dependencies
+        run: pip install -e ".[dev]"
+
+      - name: Ruff
+        run: ruff check src/
+
+      - name: Mypy
+        run: mypy src/
+
+  test:
+    name: Test
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Set up Python 3.11
+        uses: actions/setup-python@v5
+        with:
+          python-version: "3.11"
+          cache: pip
+
+      - name: Install dependencies
+        run: pip install -e ".[dev]"
+
+      - name: Run tests with coverage
+        run: pytest --cov=ter_calculator --cov-report=term-missing
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
new file mode 100644
index 0000000..e430342
--- /dev/null
+++ b/.pre-commit-config.yaml
@@ -0,0 +1,15 @@
+repos:
+  - repo: https://github.com/astral-sh/ruff-pre-commit
+    rev: v0.8.6
+    hooks:
+      - id: ruff
+        args: [--fix]
+      - id: ruff-format
+
+  - repo: https://github.com/pre-commit/pre-commit-hooks
+    rev: v5.0.0
+    hooks:
+      - id: trailing-whitespace
+      - id: end-of-file-fixer
+      - id: check-yaml
+      - id: check-added-large-files
diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md
new file mode 100644
index 0000000..13f6809
--- /dev/null
+++ b/CODE_OF_CONDUCT.md
@@ -0,0 +1,42 @@
+# Contributor Covenant Code of Conduct
+
+## Our Pledge
+
+We as members, contributors, and leaders pledge to make participation in our
+community a harassment-free experience for everyone, regardless of age, body
+size, visible or invisible disability, ethnicity, sex characteristics, gender
+identity and expression, level of experience, education, socio-economic status,
+nationality, personal appearance, race, caste, color, religion, or sexual
+identity and orientation.
+
+## Our Standards
+
+Examples of behavior that contributes to a positive environment:
+
+- Using welcoming and inclusive language
+- Being respectful of differing viewpoints and experiences
+- Gracefully accepting constructive criticism
+- Focusing on what is best for the community
+- Showing empathy towards other community members
+
+Examples of unacceptable behavior:
+
+- The use of sexualized language or imagery, and sexual attention or advances
+- Trolling, insulting or derogatory comments, and personal or political attacks
+- Public or private harassment
+- Publishing others' private information without explicit permission
+- Other conduct which could reasonably be considered inappropriate
+
+## Enforcement
+
+Instances of abusive, harassing, or otherwise unacceptable behavior may be
+reported by opening an issue on GitHub or contacting the project maintainers.
+
+All complaints will be reviewed and investigated promptly and fairly. Project
+maintainers are obligated to respect the privacy and security of the reporter.
+
+## Attribution
+
+This Code of Conduct is adapted from the [Contributor Covenant](https://www.contributor-covenant.org),
+version 2.1, available at
+[https://www.contributor-covenant.org/version/2/1/code_of_conduct.html](https://www.contributor-covenant.org/version/2/1/code_of_conduct.html).
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
new file mode 100644
index 0000000..7325931
--- /dev/null
+++ b/CONTRIBUTING.md
@@ -0,0 +1,114 @@
+# Contributing to TER Calculator
+
+Thanks for your interest in contributing! This guide covers everything you need to get started.
+
+## Getting Started
+
+```bash
+# Fork and clone the repo
+git clone https://github.com/<your-username>/TER.git
+cd TER
+
+# Install in development mode
+pip install -e ".[dev]"
+
+# Install pre-commit hooks
+pip install pre-commit
+pre-commit install
+```
+
+Requires Python 3.11+.
+
+## Development Workflow
+
+### Running Tests
+
+```bash
+pytest                                    # All tests
+pytest tests/unit/test_classifier.py -v   # Specific module
+pytest --cov=ter_calculator               # With coverage
+```
+
+### Linting and Type Checking
+
+```bash
+ruff check src/                           # Lint
+ruff format src/ tests/                   # Format
+mypy src/                                 # Type check
+```
+
+Pre-commit hooks run ruff automatically on staged files.
+
+### Branch Naming
+
+- `feature/<description>` -- new functionality
+- `fix/<description>` -- bug fixes
+- `docs/<description>` -- documentation changes
+- `refactor/<description>` -- code restructuring
+- `test/<description>` -- test additions or fixes
+
+### Commit Messages
+
+Use [Conventional Commits](https://www.conventionalcommits.org/):
+
+```
+feat: add rolling window size option to watch command
+fix: correct token count for merged reasoning spans
+docs: add context orchestrator usage examples
+test: add unit tests for waste_detectors module
+refactor: extract shared CLI argument definitions
+```
+
+## Pull Request Process
+
+1. Create a feature branch from `main`
+2. Make your changes with tests
+3. Ensure all checks pass: `pytest && ruff check src/ && mypy src/`
+4. Open a PR against `main` with a clear description
+5. One approval required for merge
+
+### PR Guidelines
+
+- One feature or fix per PR
+- Include tests for new functionality
+- Update documentation if behavior changes
+- Keep PRs focused -- separate unrelated changes into different PRs
+
+## Code Style
+
+- Python 3.11+ -- use modern syntax (type unions with `|`, match statements where appropriate)
+- Dataclasses for models (see `models.py`)
+- Lazy imports in CLI handlers for fast startup
+- Ruff handles formatting and linting -- don't fight the formatter
+
+## Project Structure
+
+```
+src/ter_calculator/    # Source modules
+tests/unit/            # Unit tests
+tests/features/        # BDD feature files
+tests/integration/     # Integration tests
+docs/                  # Architecture and user documentation
+sample_sessions/       # Sample JSONL files for testing
+```
+
+## Reporting Bugs
+
+Open a [GitHub Issue](https://github.com/lgriffin/TER/issues) with:
+
+- Steps to reproduce
+- Expected vs actual behavior
+- Python version and OS
+- Sample session file (if applicable, redact sensitive content)
+
+## Requesting Features
+
+Open a [GitHub Issue](https://github.com/lgriffin/TER/issues) with the `enhancement` label describing:
+
+- The problem you're trying to solve
+- Your proposed solution
+- Any alternatives you've considered
+
+## License
+
+By contributing, you agree that your contributions will be licensed under the [Apache License 2.0](LICENSE).
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000..6622082
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,191 @@
+
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to the Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by the Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding any notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   Copyright 2026 Leigh Griffin
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
diff --git a/README.md b/README.md
index 0c21153..4e1e3b0 100644
--- a/README.md
+++ b/README.md
@@ -1,5 +1,8 @@
 # TER Calculator
 
+[![CI](https://github.com/lgriffin/TER/actions/workflows/ci.yml/badge.svg)](https://github.com/lgriffin/TER/actions/workflows/ci.yml)
+[![License](https://img.shields.io/badge/License-Apache_2.0-blue.svg)](LICENSE)
+
 Token Efficiency Ratio (TER) calculator for Claude Code sessions. Measures how efficiently an AI coding agent uses its token budget by classifying output token spans as **aligned** (contributing to intent) or **waste** (redundant reasoning, unnecessary tool calls, over-explanation), and surfaces session economics, context optimization, and cross-session consistency.
 
 ## Features
@@ -349,15 +352,24 @@ See [docs/architecture.md](docs/architecture.md) for detailed diagrams and data
 - [Context Orchestrator](docs/context-orchestrator.md) -- patent implementation reference
 - [User Guide](docs/user-guide.md) -- installation, workflows, troubleshooting
 
+## Contributing
+
+We welcome contributions! See [CONTRIBUTING.md](CONTRIBUTING.md) for guidelines on setting up your development environment, running tests, and submitting pull requests.
+
+This project follows the [Contributor Covenant Code of Conduct](CODE_OF_CONDUCT.md).
+
 ## Development
 
 ```bash
-# Run tests (93 context orchestrator + 538 BDD + existing unit tests)
+# Run tests
 pytest
 
 # Lint
 ruff check src/
 
+# Type check
+mypy src/
+
 # Run specific test modules
 pytest tests/unit/test_fragment_store.py -v
 pytest tests/unit/test_budget_optimizer.py -v
diff --git a/pyproject.toml b/pyproject.toml
index b73a769..fbc39aa 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -6,6 +6,7 @@ build-backend = "setuptools.build_meta"
 name = "ter-calculator"
 version = "0.1.0"
 description = "Token Efficiency Ratio calculator for Claude Code sessions"
+license = "Apache-2.0"
 requires-python = ">=3.11"
 dependencies = [
     "sentence-transformers>=2.2.0",
@@ -21,6 +22,7 @@ dev = [
     "pytest-bdd>=7.0.0",
     "mypy>=1.0.0",
     "ruff>=0.1.0",
+    "pre-commit>=3.0.0",
 ]
 
 [project.scripts]
diff --git a/src/ter_calculator/cli.py b/src/ter_calculator/cli.py
index 1f6c865..171f9db 100644
--- a/src/ter_calculator/cli.py
+++ b/src/ter_calculator/cli.py
@@ -18,129 +18,95 @@ def _setup_stdout_encoding():
         )
 
 
-def main(argv: list[str] | None = None) -> int:
-    parser = argparse.ArgumentParser(
-        prog="ter",
-        description="Token Efficiency Ratio calculator for Claude Code sessions",
-    )
-    parser.add_argument(
-        "--version", action="version", version=f"%(prog)s {__version__}"
-    )
+def _add_analysis_args(parser: argparse.ArgumentParser) -> None:
+    """Add the shared analysis arguments used by both analyze and report."""
     parser.add_argument(
-        "--verbose", action="store_true", help="Enable verbose output"
-    )
-    parser.add_argument(
-        "--quiet", action="store_true", help="Suppress non-essential output"
-    )
-
-    subparsers = parser.add_subparsers(dest="command")
-
-    # analyze subcommand
-    analyze_parser = subparsers.add_parser(
-        "analyze", help="Analyze a Claude Code session"
-    )
-    analyze_parser.add_argument(
         "session_path", nargs="?", default=None,
         help="Path to a JSONL session file (optional if --latest is used)"
     )
-    analyze_parser.add_argument(
+    parser.add_argument(
         "--latest", action="store_true",
-        help="Analyze the most recent session (based on file modification time)"
+        help="Use the most recent session (based on file modification time)"
     )
-    analyze_parser.add_argument(
-        "--format", dest="output_format", choices=["text", "json"],
-        default="text", help="Output format (default: text)"
-    )
-    analyze_parser.add_argument(
+    parser.add_argument(
         "--similarity-threshold", type=float, default=0.40,
         help="Cosine similarity threshold for alignment (default: 0.40)"
     )
-    analyze_parser.add_argument(
+    parser.add_argument(
         "--confidence-threshold", type=float, default=0.75,
         help="Classifier confidence threshold (default: 0.75)"
     )
-    analyze_parser.add_argument(
+    parser.add_argument(
         "--restatement-threshold", type=float, default=0.85,
         help="Similarity threshold for context restatement (default: 0.85)"
     )
-    analyze_parser.add_argument(
+    parser.add_argument(
         "--phase-weights", type=str, default="0.3,0.4,0.3",
         help="Phase weights as r,t,g (default: 0.3,0.4,0.3)"
     )
-    analyze_parser.add_argument(
+    parser.add_argument(
         "--no-waste-patterns", action="store_true",
         help="Disable waste pattern detection"
     )
-    analyze_parser.add_argument(
+    parser.add_argument(
         "--cost-model", type=str, default="sonnet",
         help="Cost model: 'sonnet' (default) or custom 'input,output,cache_read,cache_write' rates per MTok"
     )
-    analyze_parser.add_argument(
+    parser.add_argument(
         "--no-input-analysis", action="store_true",
         help="Disable input analysis (user/model token breakdown, drift, and alignment)"
     )
-    analyze_parser.add_argument(
+    parser.add_argument(
         "--prompt-similarity-threshold", type=float, default=0.75,
         help="Cosine similarity threshold for flagging redundant prompts (default: 0.75)"
     )
-    analyze_parser.add_argument(
-        "--group", action="store_true",
-        help="Include subagent sessions in grouped analysis"
-    )
-    analyze_parser.add_argument(
+    parser.add_argument(
         "--cost-weighted", action="store_true",
         help="Include cost-weighted TER analysis"
     )
-    analyze_parser.add_argument(
+    parser.add_argument(
         "--check-overthinking", action="store_true",
         help="Analyze reasoning efficiency and detect overthinking"
     )
 
-    # report — Markdown summary (same analysis pipeline as analyze)
-    report_parser = subparsers.add_parser(
-        "report",
-        help="Print a Markdown summary (headline metrics, calibration, top waste, next steps)",
-    )
-    report_parser.add_argument(
-        "session_path", nargs="?", default=None,
-        help="Path to a JSONL session file (optional if --latest is used)"
-    )
-    report_parser.add_argument(
-        "--latest", action="store_true",
-        help="Report on the most recent session (based on file modification time)"
-    )
-    report_parser.add_argument(
-        "--similarity-threshold", type=float, default=0.40,
-        help="Cosine similarity threshold for alignment (default: 0.40)"
+
+def main(argv: list[str] | None = None) -> int:
+    parser = argparse.ArgumentParser(
+        prog="ter",
+        description="Token Efficiency Ratio calculator for Claude Code sessions",
     )
-    report_parser.add_argument(
-        "--confidence-threshold", type=float, default=0.75,
-        help="Classifier confidence threshold (default: 0.75)"
+    parser.add_argument(
+        "--version", action="version", version=f"%(prog)s {__version__}"
     )
-    report_parser.add_argument(
-        "--restatement-threshold", type=float, default=0.85,
-        help="Similarity threshold for context restatement (default: 0.85)"
+    parser.add_argument(
+        "--verbose", action="store_true", help="Enable verbose output"
     )
-    report_parser.add_argument(
-        "--phase-weights", type=str, default="0.3,0.4,0.3",
-        help="Phase weights as r,t,g (default: 0.3,0.4,0.3)"
+    parser.add_argument(
+        "--quiet", action="store_true", help="Suppress non-essential output"
     )
-    report_parser.add_argument(
-        "--no-waste-patterns", action="store_true",
-        help="Disable waste pattern detection"
+
+    subparsers = parser.add_subparsers(dest="command")
+
+    # analyze subcommand
+    analyze_parser = subparsers.add_parser(
+        "analyze", help="Analyze a Claude Code session"
     )
-    report_parser.add_argument(
-        "--cost-model", type=str, default="sonnet",
-        help="Cost model: 'sonnet' (default) or custom rates per MTok"
+    _add_analysis_args(analyze_parser)
+    analyze_parser.add_argument(
+        "--format", dest="output_format", choices=["text", "json"],
+        default="text", help="Output format (default: text)"
     )
-    report_parser.add_argument(
-        "--no-input-analysis", action="store_true",
-        help="Disable input analysis"
+    analyze_parser.add_argument(
+        "--group", action="store_true",
+        help="Include subagent sessions in grouped analysis"
     )
-    report_parser.add_argument(
-        "--prompt-similarity-threshold", type=float, default=0.75,
-        help="Cosine similarity threshold for redundant prompts (default: 0.75)"
+
+    # report — Markdown summary (same analysis pipeline as analyze)
+    report_parser = subparsers.add_parser(
+        "report",
+        help="Print a Markdown summary (headline metrics, calibration, top waste, next steps)",
     )
+    _add_analysis_args(report_parser)
     report_parser.add_argument(
         "-o",
         "--output",
@@ -149,14 +115,6 @@ def main(argv: list[str] | None = None) -> int:
         default=None,
         help="Write Markdown to FILE instead of stdout (e.g. report.md)",
     )
-    report_parser.add_argument(
-        "--cost-weighted", action="store_true",
-        help="Include cost-weighted TER analysis"
-    )
-    report_parser.add_argument(
-        "--check-overthinking", action="store_true",
-        help="Analyze reasoning efficiency and detect overthinking"
-    )
 
     # compare subcommand
     compare_parser = subparsers.add_parser(
diff --git a/src/ter_calculator/formatter.py b/src/ter_calculator/formatter.py
index fd18706..51d827e 100644
--- a/src/ter_calculator/formatter.py
+++ b/src/ter_calculator/formatter.py
@@ -1,40 +1,44 @@
-"""Output formatting for TER results."""
+"""Output formatting for TER results.
 
-from __future__ import annotations
+Public API: format_ter_result, format_comparison, format_grouped_analysis.
+Format-specific rendering lives in formatter_rich, formatter_text, formatter_json.
+"""
 
-import json
-import io
+from __future__ import annotations
 
-from .models import CostModel, InputAnalysis, TERResult, WastePattern
-from .waste import summarize_waste
+from .models import CostModel, TERResult, WastePattern
 
 
 def format_ter_result(
     result: TERResult, fmt: str = "text", use_rich: bool = True,
 ) -> str:
-    """Format a TER result for output."""
     if fmt == "json":
-        return _format_json(result)
+        from .formatter_json import format_json
+        return format_json(result)
     if use_rich:
         try:
-            return _format_rich(result)
+            from .formatter_rich import format_rich
+            return format_rich(result)
         except (ImportError, UnicodeEncodeError):
             pass
-    return _format_text(result)
+    from .formatter_text import format_text
+    return format_text(result)
 
 
 def format_comparison(
     results: list[TERResult], fmt: str = "text", use_rich: bool = True,
 ) -> str:
-    """Format multiple TER results as a comparison."""
     if fmt == "json":
-        return _format_comparison_json(results)
+        from .formatter_json import format_comparison_json
+        return format_comparison_json(results)
     if use_rich:
         try:
-            return _format_comparison_rich(results)
+            from .formatter_rich import format_comparison_rich
+            return format_comparison_rich(results)
         except (ImportError, UnicodeEncodeError):
             pass
-    return _format_comparison_text(results)
+    from .formatter_text import format_comparison_text
+    return format_comparison_text(results)
 
 
 def format_grouped_analysis(
@@ -43,19 +47,25 @@ def format_grouped_analysis(
     fmt: str = "text",
     use_rich: bool = True,
 ) -> str:
-    """Format a grouped parent + subagent analysis."""
     if fmt == "json":
-        return _format_grouped_json(parent_result, subagent_results)
+        from .formatter_json import format_grouped_json
+        return format_grouped_json(parent_result, subagent_results)
     if use_rich:
         try:
-            return _format_grouped_rich(parent_result, subagent_results)
+            from .formatter_rich import format_grouped_rich
+            return format_grouped_rich(parent_result, subagent_results)
         except (ImportError, UnicodeEncodeError):
             pass
-    return _format_grouped_text(parent_result, subagent_results)
+    from .formatter_text import format_grouped_text
+    return format_grouped_text(parent_result, subagent_results)
+
+
+# ---------------------------------------------------------------------------
+# Shared helpers used by formatter_rich, formatter_text, formatter_json
+# ---------------------------------------------------------------------------
 
 
 def _compute_group_aggregates(all_results: list[TERResult]) -> dict:
-    """Compute aggregate metrics across a group of sessions."""
     total_tokens = sum(r.total_tokens for r in all_results)
     total_waste = sum(r.waste_tokens for r in all_results)
     weighted_ter = (
@@ -78,172 +88,7 @@ def _compute_group_aggregates(all_results: list[TERResult]) -> dict:
     }
 
 
-# --- Rich formatting ---
-
-# Import shared components
-from .rich_components import ter_color as _ter_color
-
-
-def _format_rich(result: TERResult) -> str:
-    """Format TER result using Rich library."""
-    from rich.console import Console
-    from rich.panel import Panel
-    from rich.table import Table
-    from rich.text import Text
-
-    buf = io.StringIO()
-    console = Console(file=buf, force_terminal=True, width=72)
-
-    # --- Header panel ---
-    ter_text = Text(f"{result.aggregate_ter:.2f}", style=_ter_color(result.aggregate_ter))
-    waste_pct = (result.waste_tokens / result.total_tokens * 100) if result.total_tokens else 0
-    sid = result.session_id
-    if len(sid) > 20:
-        sid = sid[:8] + "..."
-
-    # Line 1: TER | Waste | Cost
-    line1_parts: list = [("TER: ", "bold"), ter_text]
-    line1_parts.append(("  |  ", ""))
-    line1_parts.append((f"Waste: {waste_pct:.1f}%", "red" if waste_pct > 10 else ""))
-    if result.economics:
-        line1_parts.append(("  |  ", ""))
-        line1_parts.append((f"Cost: ${result.economics.estimated_cost_usd:.2f}", ""))
-        waste_cost = _compute_waste_cost(result)
-        if waste_cost > 0:
-            line1_parts.append(("  |  ", ""))
-            line1_parts.append((f"Waste $: ${waste_cost:.2f}", "red"))
-
-    # Line 2: Input analysis headline (if available)
-    ia = result.input_analysis
-    if ia is not None:
-        drift = ia.intent_drift
-        pra = ia.prompt_response_alignment
-        ps = ia.prompt_similarity
-        bd = ia.token_breakdown
-
-        drift_colors = {
-            "convergent": "red", "divergent": "green",
-            "stable": "green", "mixed": "yellow",
-        }
-        d_color = drift_colors.get(drift.overall_trajectory, "")
-        a_color = "red" if pra.average_alignment < 0.3 else (
-            "yellow" if pra.average_alignment < 0.5 else "green"
-        )
-        r_color = "red" if ps.prompt_redundancy_score > 0.5 else (
-            "yellow" if ps.prompt_redundancy_score > 0 else "green"
-        )
-
-        line2_parts: list = [
-            ("Drift: ", "bold"),
-            (f"{drift.overall_trajectory}", d_color),
-        ]
-        if pra.pairs:
-            line2_parts.append(("  |  ", ""))
-            line2_parts.append(("Alignment: ", "bold"))
-            line2_parts.append((f"{pra.average_alignment:.2f}", a_color))
-        if ps.prompt_count >= 2:
-            line2_parts.append(("  |  ", ""))
-            line2_parts.append(("Redundancy: ", "bold"))
-            line2_parts.append((f"{ps.prompt_redundancy_score:.0%}", r_color))
-        line2_parts.append(("  |  ", ""))
-        line2_parts.append((f"User: {bd.user_ratio:.0%}", "dim"))
-
-        header = Text.assemble(
-            *line1_parts, ("\n", ""), *line2_parts,
-        )
-    else:
-        header = Text.assemble(*line1_parts)
-
-    console.print(Panel(header, title=sid, expand=False))
-
-    # --- Combined scores table (phases + tokens in one) ---
-    table = Table(show_header=True, show_edge=True)
-    table.add_column("Phase", style="bold", width=12)
-    table.add_column("TER", justify="right", width=6)
-    table.add_column("", width=3)
-    table.add_column("Metric", style="bold", width=14)
-    table.add_column("Value", justify="right", width=12)
-
-    phases = [
-        ("Reasoning", result.phase_scores.get("reasoning", 0)),
-        ("Tool Use", result.phase_scores.get("tool_use", 0)),
-        ("Generation", result.phase_scores.get("generation", 0)),
-    ]
-    right_rows = [
-        ("Output Tokens", f"{result.total_tokens:,}"),
-        ("Aligned", f"{result.aligned_tokens:,}"),
-        ("Waste", f"{result.waste_tokens:,}"),
-    ]
-
-    for i in range(3):
-        p_name, p_score = phases[i]
-        p_color = _ter_color(p_score)
-        r_label, r_value = right_rows[i]
-        table.add_row(
-            p_name,
-            f"[{p_color}]{p_score:.2f}[/{p_color}]",
-            "",
-            r_label,
-            r_value,
-        )
-    console.print(table)
-
-    # --- Session economics (compact) ---
-    if result.economics is not None:
-        econ = result.economics
-        cache_pct = econ.cache_hit_rate * 100
-        cache_color = "green" if cache_pct >= 50 else "yellow" if cache_pct >= 20 else "red"
-
-        econ_table = Table(show_header=True, show_edge=True)
-        econ_table.add_column("Economics", style="bold", width=18)
-        econ_table.add_column("", justify="right", width=12)
-        econ_table.add_column("", width=3)
-        econ_table.add_column("Context", style="bold", width=14)
-        econ_table.add_column("", justify="right", width=12)
-
-        pos = econ.positional
-        g = econ.input_growth
-        bloat_str = "[red]YES[/red]" if g.context_bloat_detected else (
-            "[yellow]WATCH[/yellow]" if g.is_superlinear else "[green]NO[/green]"
-        )
-
-        left_rows = [
-            ("Input Tokens", f"{econ.total_input_tokens:,}"),
-            ("Cache Read", f"{econ.total_cache_read_tokens:,}"),
-            ("Cache Hit Rate", f"[{cache_color}]{cache_pct:.1f}%[/{cache_color}]"),
-        ]
-        right_rows_e = [
-            ("Growth", f"{g.growth_rate:.1f}x ({len(g.turn_input_tokens)} turns)"),
-            ("Bloat", bloat_str),
-            ("Positional", f"{pos.early_ter:.2f} / {pos.mid_ter:.2f} / {pos.late_ter:.2f}"),
-        ]
-
-        for i in range(3):
-            l_label, l_value = left_rows[i]
-            r_label, r_value = right_rows_e[i]
-            econ_table.add_row(l_label, l_value, "", r_label, r_value)
-        console.print(econ_table)
-
-    # --- Waste breakdown table ---
-    _format_waste_breakdown_rich(console, result)
-
-    # --- Input analysis ---
-    if result.input_analysis is not None:
-        _format_input_analysis_rich(console, result.input_analysis)
-
-    # --- Cost report ---
-    if result.cost_report is not None:
-        _format_cost_report_rich(console, result.cost_report)
-
-    # --- Overthinking analysis ---
-    if result.overthinking_result is not None:
-        _format_overthinking_rich(console, result.overthinking_result)
-
-    return buf.getvalue().rstrip()
-
-
 def _compute_waste_cost(result: TERResult) -> float:
-    """Compute total waste $ from breakdown rows (mixed input/output pricing)."""
     rows = _build_waste_breakdown(result)
     if not rows:
         return 0.0
@@ -256,7 +101,6 @@ def _compute_waste_cost(result: TERResult) -> float:
 
 
 def _pattern_pricing(pattern_type: str) -> str:
-    """Structural patterns whose token cost is mostly input-side context."""
     if pattern_type in (
         "repetitive_read",
         "bash_antipattern",
@@ -270,19 +114,11 @@ def _pattern_pricing(pattern_type: str) -> str:
 def _build_waste_breakdown(
     result: TERResult,
 ) -> list[tuple[str, int, int, str]]:
-    """Build rows: (label, tokens, instance_count, pricing_kind).
-
-    ``pricing_kind`` is ``output`` (billed as generation / assistant tool
-    JSON) or ``input`` (tool results re-sent as context). Output-priced
-    rows are scaled elsewhere to match API ``output_tokens`` when known.
-
-    Skips pattern rows that duplicate classified span categories.
-    """
+    """Build rows: (label, tokens, instance_count, pricing_kind)."""
     from .models import ALIGNED_LABELS
 
     rows: list[tuple[str, int, int, str]] = []
 
-    # Classified waste: assistant spans priced as output; rare user spans as input.
     category_map = {
         "redundant_reasoning": "Redundant Reasoning",
         "unnecessary_tool_call": "Unnecessary Tool Calls",
@@ -348,791 +184,7 @@ def _build_waste_breakdown(
     return rows
 
 
-def _format_waste_breakdown_rich(console, result: TERResult) -> None:
-    """Render waste breakdown as a Rich table."""
-    from rich.table import Table
-
-    rows = _build_waste_breakdown(result)
-    if not rows:
-        return
-
-    total_waste = sum(t for _, t, _, _ in rows)
-    cm = result.economics.cost_model if result.economics else CostModel()
-
-    table = Table(show_header=True, show_edge=True, title="Waste Breakdown")
-    table.add_column("Source", style="bold", width=22)
-    table.add_column("Tokens", justify="right", width=10)
-    table.add_column("%", justify="right", width=6)
-    table.add_column("Cost", justify="right", width=8)
-    table.add_column("Count", justify="right", width=6, style="dim")
-
-    for label, tokens, count, kind in rows:
-        pct = (tokens / total_waste * 100) if total_waste > 0 else 0
-        rate = cm.output_rate if kind == "output" else cm.input_rate
-        row_cost = float(tokens) * rate / 1_000_000
-        table.add_row(
-            label,
-            f"{tokens:,}",
-            f"{pct:.0f}%",
-            f"${row_cost:.4f}",
-            str(count),
-        )
-
-    table.add_section()
-    total_cost = _compute_waste_cost(result)
-    table.add_row(
-        "[bold]Total[/bold]",
-        f"[bold]{total_waste:,}[/bold]",
-        "[bold]100%[/bold]",
-        f"[bold]${total_cost:.4f}[/bold]",
-        "",
-    )
-    console.print(table)
-
-
-def _collapse_waste_patterns(patterns: list[WastePattern]) -> list[str]:
-    """Collapse waste patterns into a summary by type."""
-    by_type: dict[str, list[WastePattern]] = {}
-    for wp in patterns:
-        by_type.setdefault(wp.pattern_type, []).append(wp)
-
-    lines: list[str] = []
-    for ptype, wps in by_type.items():
-        label = ptype.replace("_", " ").title()
-        total_tokens = sum(wp.tokens_wasted for wp in wps)
-        count = len(wps)
-        if count == 1:
-            lines.append(f"{label}: {wps[0].description} ({total_tokens:,} tokens)")
-        else:
-            lines.append(f"{label}: {count} instances ({total_tokens:,} tokens)")
-    return lines
-
-
-def _format_comparison_rich(results: list[TERResult]) -> str:
-    """Format comparison using Rich table."""
-    from rich.console import Console
-    from rich.table import Table
-
-    buf = io.StringIO()
-    console = Console(file=buf, force_terminal=True, width=90)
-
-    table = Table(title="TER Comparison", show_header=True)
-    table.add_column("#", justify="right", style="dim")
-    table.add_column("Session", style="bold")
-    table.add_column("TER", justify="right")
-    table.add_column("Waste%", justify="right")
-    table.add_column("Cache%", justify="right")
-    table.add_column("Cost", justify="right")
-    table.add_column("Waste $", justify="right")
-    table.add_column("Patterns", justify="right")
-
-    for i, r in enumerate(results, 1):
-        color = _ter_color(r.aggregate_ter)
-        pattern_count = len(r.waste_patterns) if r.waste_patterns else 0
-        waste_pct = (r.waste_tokens / r.total_tokens * 100) if r.total_tokens else 0
-        cache_str = ""
-        cost_str = ""
-        waste_cost_str = ""
-        if r.economics:
-            cache_pct = r.economics.cache_hit_rate * 100
-            cache_str = f"{cache_pct:.0f}%"
-            cost_str = f"${r.economics.estimated_cost_usd:.2f}"
-            wc = _compute_waste_cost(r)
-            waste_cost_str = f"[red]${wc:.2f}[/red]"
-        sid = r.session_id
-        if len(sid) > 20:
-            sid = sid[:8] + "..."
-        table.add_row(
-            str(i),
-            sid,
-            f"[{color}]{r.aggregate_ter:.2f}[/{color}]",
-            f"{waste_pct:.1f}%",
-            cache_str,
-            cost_str,
-            waste_cost_str,
-            str(pattern_count),
-        )
-
-    console.print(table)
-
-    if results:
-        avg_ter = sum(r.aggregate_ter for r in results) / len(results)
-        total_cost = sum(r.economics.estimated_cost_usd for r in results if r.economics)
-        total_waste_cost = sum(_compute_waste_cost(r) for r in results)
-        color = _ter_color(avg_ter)
-        console.print(f"\nAverage TER: [{color}]{avg_ter:.2f}[/{color}]  |  Total Cost: ${total_cost:.2f}  |  Total Waste: [red]${total_waste_cost:.2f}[/red]")
-
-    return buf.getvalue().rstrip()
-
-
-def _format_grouped_rich(
-    parent_result: TERResult,
-    subagent_results: list[TERResult],
-) -> str:
-    """Format grouped analysis using Rich."""
-    from rich.console import Console
-    from rich.panel import Panel
-    from rich.table import Table
-    from rich.text import Text
-
-    all_results = [parent_result] + subagent_results
-    agg = _compute_group_aggregates(all_results)
-
-    buf = io.StringIO()
-    console = Console(file=buf, force_terminal=True, width=90)
-
-    # Header panel.
-    sid = parent_result.session_id
-    if len(sid) > 20:
-        sid = sid[:8] + "..."
-    ter_text = Text(f"{agg['weighted_ter']:.2f}", style=_ter_color(agg["weighted_ter"]))
-
-    header = Text.assemble(
-        ("TER: ", "bold"), ter_text,
-        ("  |  ", ""),
-        (f"Waste: {agg['waste_pct']:.1f}%", "red" if agg["waste_pct"] > 10 else ""),
-        ("  |  ", ""),
-        (f"Cost: ${agg['total_cost_usd']:.2f}", ""),
-        ("  |  ", ""),
-        (f"Waste $: ${agg['total_waste_cost_usd']:.2f}", "red"),
-        ("\n", ""),
-        (f"Sessions: 1 parent + {len(subagent_results)} subagent(s)", "dim"),
-        ("  |  ", ""),
-        (f"Tokens: {agg['total_tokens']:,}", "dim"),
-    )
-    console.print(Panel(header, title=f"Group: {sid}", expand=False))
-
-    # Per-session table.
-    table = Table(show_header=True, title="Session Breakdown")
-    table.add_column("Role", width=10)
-    table.add_column("Session", width=14)
-    table.add_column("TER", justify="right", width=6)
-    table.add_column("Waste%", justify="right", width=7)
-    table.add_column("Tokens", justify="right", width=10)
-    table.add_column("Cost", justify="right", width=8)
-    table.add_column("Waste $", justify="right", width=8)
-    table.add_column("Patterns", justify="right", width=8)
-
-    def _add_session_row(r: TERResult, role: str):
-        color = _ter_color(r.aggregate_ter)
-        waste_pct = (r.waste_tokens / r.total_tokens * 100) if r.total_tokens else 0
-        cost_str = f"${r.economics.estimated_cost_usd:.2f}" if r.economics else ""
-        wc = _compute_waste_cost(r)
-        waste_str = f"[red]${wc:.2f}[/red]" if wc > 0 else ""
-        pattern_count = len(r.waste_patterns) if r.waste_patterns else 0
-        rsid = r.session_id
-        if len(rsid) > 14:
-            rsid = rsid[:8] + "..."
-        table.add_row(
-            role, rsid,
-            f"[{color}]{r.aggregate_ter:.2f}[/{color}]",
-            f"{waste_pct:.1f}%",
-            f"{r.total_tokens:,}",
-            cost_str, waste_str, str(pattern_count),
-        )
-
-    _add_session_row(parent_result, "parent")
-    for r in subagent_results:
-        _add_session_row(r, "agent")
-
-    # Total row.
-    table.add_section()
-    color = _ter_color(agg["weighted_ter"])
-    table.add_row(
-        "[bold]Total[/bold]", "",
-        f"[bold][{color}]{agg['weighted_ter']:.2f}[/{color}][/bold]",
-        f"[bold]{agg['waste_pct']:.1f}%[/bold]",
-        f"[bold]{agg['total_tokens']:,}[/bold]",
-        f"[bold]${agg['total_cost_usd']:.2f}[/bold]",
-        f"[bold][red]${agg['total_waste_cost_usd']:.2f}[/red][/bold]",
-        "",
-    )
-    console.print(table)
-
-    return buf.getvalue().rstrip()
-
-
-def _format_grouped_text(
-    parent_result: TERResult,
-    subagent_results: list[TERResult],
-) -> str:
-    """Format grouped analysis as plain text."""
-    all_results = [parent_result] + subagent_results
-    agg = _compute_group_aggregates(all_results)
-
-    sid = parent_result.session_id
-    if len(sid) > 20:
-        sid = sid[:8] + "..."
-
-    lines = [
-        f"Group Analysis: {sid}",
-        "\u2550" * 50,
-        "",
-        f"TER: {agg['weighted_ter']:.2f}  |  Waste: {agg['waste_pct']:.1f}%"
-        f"  |  Cost: ${agg['total_cost_usd']:.2f}"
-        f"  |  Waste $: ${agg['total_waste_cost_usd']:.2f}",
-        f"Sessions: 1 parent + {len(subagent_results)} subagent(s)  |  Tokens: {agg['total_tokens']:,}",
-        "",
-        f"  {'Role':<10} {'Session':<14} {'TER':<6} {'Waste%':<8} {'Tokens':<10} {'Cost':<10} {'Waste $':<10} {'Patterns':<8}",
-    ]
-
-    def _add_row(r: TERResult, role: str):
-        rsid = r.session_id[:14] if len(r.session_id) <= 14 else r.session_id[:8] + "..."
-        waste_pct = (r.waste_tokens / r.total_tokens * 100) if r.total_tokens else 0
-        cost_str = f"${r.economics.estimated_cost_usd:.2f}" if r.economics else ""
-        wc = _compute_waste_cost(r)
-        waste_str = f"${wc:.2f}" if wc > 0 else ""
-        pattern_count = len(r.waste_patterns) if r.waste_patterns else 0
-        lines.append(
-            f"  {role:<10} {rsid:<14} {r.aggregate_ter:<6.2f} "
-            f"{waste_pct:<8.1f} {r.total_tokens:<10,} {cost_str:<10} {waste_str:<10} {pattern_count:<8}"
-        )
-
-    _add_row(parent_result, "[parent]")
-    for r in subagent_results:
-        _add_row(r, "[agent]")
-
-    lines.extend([
-        "",
-        f"  {'Total':<10} {'':<14} {agg['weighted_ter']:<6.2f} "
-        f"{agg['waste_pct']:<8.1f} {agg['total_tokens']:<10,} "
-        f"${agg['total_cost_usd']:<9.2f} ${agg['total_waste_cost_usd']:<9.2f}",
-    ])
-
-    return "\n".join(lines)
-
-
-def _format_grouped_json(
-    parent_result: TERResult,
-    subagent_results: list[TERResult],
-) -> str:
-    """Format grouped analysis as JSON."""
-    all_results = [parent_result] + subagent_results
-    agg = _compute_group_aggregates(all_results)
-
-    data = {
-        "group": {
-            "parent_session_id": parent_result.session_id,
-            "subagent_count": len(subagent_results),
-            **agg,
-        },
-        "parent": _ter_result_to_dict(parent_result),
-        "subagents": [_ter_result_to_dict(r) for r in subagent_results],
-    }
-    return json.dumps(data, indent=2)
-
-
-def _format_input_analysis_rich(console, ia: InputAnalysis) -> None:
-    """Render input analysis section using Rich."""
-    from rich.table import Table
-
-    bd = ia.token_breakdown
-    ps = ia.prompt_similarity
-
-    # Token breakdown table.
-    console.print("\n[bold]Input Analysis[/bold]")
-    tb = Table(show_header=True, show_edge=True)
-    tb.add_column("Origin", style="bold", width=14)
-    tb.add_column("Category", width=16)
-    tb.add_column("Tokens", justify="right", width=10)
-
-    tb.add_row("User", "Prompt Text", f"{bd.user_input_tokens:,}")
-    tb.add_row("User", "Tool Results", f"{bd.user_result_tokens:,}")
-    tb.add_row("Model", "Reasoning", f"{bd.model_reasoning_tokens:,}")
-    tb.add_row("Model", "Tool Calls", f"{bd.model_tool_tokens:,}")
-    tb.add_row("Model", "Generation", f"{bd.model_generation_tokens:,}")
-    tb.add_section()
-    tb.add_row("[bold]User Total[/bold]", "", f"[bold]{bd.total_user_tokens:,}[/bold]")
-    tb.add_row("[bold]Model Total[/bold]", "", f"[bold]{bd.total_model_tokens:,}[/bold]")
-    tb.add_row("User Ratio", "", f"{bd.user_ratio:.1%}")
-    console.print(tb)
-
-    # Prompt similarity.
-    if ps.prompt_count >= 2:
-        r_color = "red" if ps.prompt_redundancy_score > 0.5 else (
-            "yellow" if ps.prompt_redundancy_score > 0 else "green"
-        )
-        console.print(
-            f"\nPrompt Redundancy: [{r_color}]{ps.prompt_redundancy_score:.0%}[/{r_color}]"
-            f"  ({ps.prompt_count} prompts, {len(ps.similar_pairs)} similar pair(s))"
-        )
-        for pair in ps.similar_pairs[:5]:
-            a_text = pair.prompt_a_text[:40] + "..." if len(pair.prompt_a_text) > 40 else pair.prompt_a_text
-            b_text = pair.prompt_b_text[:40] + "..." if len(pair.prompt_b_text) > 40 else pair.prompt_b_text
-            console.print(
-                f'  [dim]#{pair.prompt_a_index+1}[/dim] "{a_text}" '
-                f'[dim]~[/dim] [dim]#{pair.prompt_b_index+1}[/dim] "{b_text}" '
-                f'[yellow]({pair.similarity:.2f})[/yellow]'
-            )
-
-    # Intent drift.
-    drift = ia.intent_drift
-    if drift.steps:
-        _drift_colors = {
-            "convergent": "red", "divergent": "green",
-            "stable": "green", "mixed": "yellow",
-        }
-        t_color = _drift_colors.get(drift.overall_trajectory, "")
-        console.print(
-            f"\nIntent Drift: [{t_color}]{drift.overall_trajectory}[/{t_color}]"
-            f"  (avg similarity: {drift.average_drift:.2f})"
-        )
-        for step in drift.steps:
-            s_color = "red" if step.drift_type == "convergent" else (
-                "green" if step.drift_type == "divergent" else "yellow"
-            )
-            console.print(
-                f"  #{step.from_index+1} -> #{step.to_index+1}: "
-                f"[{s_color}]{step.drift_type}[/{s_color}] ({step.similarity:.2f})"
-            )
-
-    # Prompt-response alignment.
-    pra = ia.prompt_response_alignment
-    if pra.pairs:
-        a_color = "red" if pra.average_alignment < 0.3 else (
-            "yellow" if pra.average_alignment < 0.5 else "green"
-        )
-        console.print(
-            f"\nPrompt-Response Alignment: [{a_color}]{pra.average_alignment:.2f}[/{a_color}]"
-            f"  ({len(pra.pairs)} pair(s), {pra.low_alignment_count} low)"
-        )
-        for pair in pra.pairs:
-            p_color = "red" if pair.alignment < 0.3 else (
-                "yellow" if pair.alignment < 0.5 else "green"
-            )
-            prompt_short = pair.prompt_text[:50] + "..." if len(pair.prompt_text) > 50 else pair.prompt_text
-            console.print(
-                f'  [dim]#{pair.prompt_index+1}[/dim] "{prompt_short}" '
-                f'-> [{p_color}]{pair.alignment:.2f}[/{p_color}]'
-            )
-
-
-def _format_cost_report_rich(console, cost_report) -> None:
-    """Render cost report section using Rich."""
-    from rich.table import Table
-
-    console.print("\n[bold]Cost Analysis[/bold]")
-
-    # Cost-weighted TER table
-    cost_table = Table(show_header=True, show_edge=True)
-    cost_table.add_column("Metric", style="cyan", width=20)
-    cost_table.add_column("Value", justify="right", width=16)
-
-    cwter = cost_report.cost_ter
-    cost_table.add_row("Cost-Weighted TER", f"{cwter.cost_weighted_ter:.4f}")
-    cost_table.add_row("Raw TER", f"{cwter.raw_ter:.4f}")
-    cost_table.add_row("Total Cost", f"${cwter.total_cost_usd:.4f}")
-    cost_table.add_row("Waste Cost", f"${cwter.waste_cost_usd:.4f}")
-    waste_pct = (cwter.waste_cost_usd / cwter.total_cost_usd * 100) if cwter.total_cost_usd > 0 else 0
-    cost_table.add_row("Waste %", f"{waste_pct:.1f}%")
-    cost_table.add_row("Semantic Density", f"{cost_report.session_density.density_score:.2%}")
-    cost_table.add_row("Redundancy", f"{cost_report.session_density.redundancy_ratio:.2%}")
-
-    console.print(cost_table)
-
-    # Recommendations
-    if cost_report.recommendations:
-        console.print("\n[bold]Recommendations:[/bold]")
-        for rec in cost_report.recommendations:
-            console.print(f"  • {rec}")
-
-
-def _format_overthinking_rich(console, ot) -> None:
-    """Render overthinking analysis section using Rich."""
-    from rich.table import Table
-
-    console.print("\n[bold]Overthinking Analysis[/bold]")
-
-    # Status
-    status_color = "red" if ot.is_overthinking else "green"
-    status_text = "OVERTHINKING DETECTED" if ot.is_overthinking else "Efficient Reasoning"
-    console.print(f"Status: [{status_color}]{status_text}[/{status_color}]")
-
-    # Overthinking metrics table
-    ot_table = Table(show_header=True, show_edge=True)
-    ot_table.add_column("Metric", style="cyan", width=20)
-    ot_table.add_column("Value", justify="right", width=16)
-
-    ot_table.add_row("Total Reasoning", f"{ot.total_reasoning_tokens:,} tokens")
-    ot_table.add_row("Useful", f"{ot.useful_reasoning_tokens:,} tokens")
-    ot_table.add_row("Efficiency", f"{ot.reasoning_efficiency:.0%}")
-    ot_table.add_row("Wasted", f"{ot.wasted_reasoning_tokens:,} tokens")
-
-    if ot.optimal_cutoff_index is not None:
-        ot_table.add_row("Optimal Cutoff", f"Span {ot.optimal_cutoff_index} (of {len(ot.segments)})")
-
-    ot_table.add_row("Recommended Budget", f"{ot.recommended_budget:,} tokens")
-
-    console.print(ot_table)
-
-    # Explanation
-    console.print(f"\n{ot.explanation}")
-
-
-# --- Plain text formatting ---
-
-
-def _format_text(result: TERResult) -> str:
-    """Format TER result as plain text."""
-    waste_pct = (result.waste_tokens / result.total_tokens * 100) if result.total_tokens else 0
-    sid = result.session_id
-    if len(sid) > 20:
-        sid = sid[:8] + "..."
-
-    lines = [
-        f"TER Report: {sid}",
-        "\u2550" * 40,
-        "",
-    ]
-
-    # Headline.
-    cost_str = ""
-    if result.economics:
-        cost_str = f"  |  Cost: ${result.economics.estimated_cost_usd:.2f}"
-        waste_cost = _compute_waste_cost(result)
-        if waste_cost > 0:
-            cost_str += f"  |  Waste $: ${waste_cost:.2f}"
-    lines.append(f"TER: {result.aggregate_ter:.2f}  |  Waste: {waste_pct:.1f}%{cost_str}")
-
-    # Input analysis headline.
-    ia = result.input_analysis
-    if ia is not None:
-        drift = ia.intent_drift
-        pra = ia.prompt_response_alignment
-        ps = ia.prompt_similarity
-        parts = [f"Drift: {drift.overall_trajectory}"]
-        if pra.pairs:
-            parts.append(f"Alignment: {pra.average_alignment:.2f}")
-        if ps.prompt_count >= 2:
-            parts.append(f"Redundancy: {ps.prompt_redundancy_score:.0%}")
-        parts.append(f"User: {ia.token_breakdown.user_ratio:.0%}")
-        lines.append("  |  ".join(parts))
-
-    lines.append("")
-
-    # Phases.
-    lines.append("Phases:     Reasoning  Tool Use  Generation")
-    lines.append(
-        f"            {result.phase_scores.get('reasoning', 0):.2f}"
-        f"       {result.phase_scores.get('tool_use', 0):.2f}"
-        f"      {result.phase_scores.get('generation', 0):.2f}"
-    )
-    lines.append("")
-
-    # Tokens.
-    lines.append(f"Output Tokens: {result.total_tokens:,}  (aligned: {result.aligned_tokens:,}  waste: {result.waste_tokens:,})")
-
-    # Economics.
-    if result.economics is not None:
-        econ = result.economics
-        cache_pct = econ.cache_hit_rate * 100
-        pos = econ.positional
-        g = econ.input_growth
-
-        lines.extend([
-            "",
-            f"Input: {econ.total_input_tokens:,}  Cache Read: {econ.total_cache_read_tokens:,}  Cache Hit: {cache_pct:.1f}%",
-            f"Context Growth: {g.growth_rate:.1f}x over {len(g.turn_input_tokens)} turns"
-            + (" [BLOAT]" if g.context_bloat_detected else (" [WATCH]" if g.is_superlinear else "")),
-            f"Positional TER: {pos.early_ter:.2f} (early) / {pos.mid_ter:.2f} (mid) / {pos.late_ter:.2f} (late)",
-        ])
-
-    # Waste breakdown.
-    rows = _build_waste_breakdown(result)
-    if rows:
-        total_waste = sum(t for _, t, _, _ in rows)
-        cm = result.economics.cost_model if result.economics else CostModel()
-        lines.extend(["", "Waste Breakdown:"])
-        lines.append(f"  {'Source':<24} {'Tokens':>10} {'%':>5} {'Cost':>10} {'Count':>6}")
-        for label, tokens, count, kind in rows:
-            pct = (tokens / total_waste * 100) if total_waste > 0 else 0
-            rate = cm.output_rate if kind == "output" else cm.input_rate
-            row_cost = float(tokens) * rate / 1_000_000
-            lines.append(
-                f"  {label:<24} {tokens:>10,} {pct:>4.0f}% ${row_cost:>8.4f} {count:>6}"
-            )
-        total_cost = _compute_waste_cost(result)
-        lines.append(f"  {'Total':<24} {total_waste:>10,}  100% ${total_cost:>8.4f}")
-
-    # Input analysis.
-    if result.input_analysis is not None:
-        lines.extend(_format_input_analysis_text(result.input_analysis))
-
-    return "\n".join(lines)
-
-
-def _format_comparison_text(results: list[TERResult]) -> str:
-    """Format comparison as a plain text table."""
-    lines = [
-        "TER Comparison",
-        "\u2550" * 40,
-        "",
-        f"  {'#':<3} {'Session':<12} {'TER':<6} {'Waste%':<8} {'Cache%':<8} {'Cost':<10} {'Waste $':<10} {'Patterns':<8}",
-    ]
-
-    for i, r in enumerate(results, 1):
-        sid = r.session_id[:12] if len(r.session_id) <= 12 else r.session_id[:8] + "..."
-        pattern_count = len(r.waste_patterns) if r.waste_patterns else 0
-        waste_pct = (r.waste_tokens / r.total_tokens * 100) if r.total_tokens else 0
-        cache_str = ""
-        cost_str = ""
-        waste_cost_str = ""
-        if r.economics:
-            cache_pct = r.economics.cache_hit_rate * 100
-            cache_str = f"{cache_pct:.0f}%"
-            cost_str = f"${r.economics.estimated_cost_usd:.2f}"
-            wc = _compute_waste_cost(r)
-            waste_cost_str = f"${wc:.2f}"
-        lines.append(
-            f"  {i:<3} {sid:<12} {r.aggregate_ter:<6.2f} "
-            f"{waste_pct:<8.1f} {cache_str:<8} {cost_str:<10} {waste_cost_str:<10} {pattern_count:<8}"
-        )
-
-    if results:
-        avg_ter = sum(r.aggregate_ter for r in results) / len(results)
-        total_cost = sum(r.economics.estimated_cost_usd for r in results if r.economics)
-        total_waste_cost = sum(_compute_waste_cost(r) for r in results)
-        lines.extend(["", f"Average TER: {avg_ter:.2f}  |  Total Cost: ${total_cost:.2f}  |  Total Waste: ${total_waste_cost:.2f}"])
-
-    return "\n".join(lines)
-
-
-def _format_input_analysis_text(ia: InputAnalysis) -> list[str]:
-    """Format input analysis as plain text lines."""
-    bd = ia.token_breakdown
-    ps = ia.prompt_similarity
-
-    lines = [
-        "",
-        "Input Analysis:",
-        f"  User Tokens:   {bd.total_user_tokens:,} (prompt: {bd.user_input_tokens:,}, tool results: {bd.user_result_tokens:,})",
-        f"  Model Tokens:  {bd.total_model_tokens:,} (reasoning: {bd.model_reasoning_tokens:,}, tool: {bd.model_tool_tokens:,}, generation: {bd.model_generation_tokens:,})",
-        f"  User Ratio:    {bd.user_ratio:.1%}",
-    ]
-
-    if ps.prompt_count >= 2:
-        lines.append(f"  Prompt Redundancy: {ps.prompt_redundancy_score:.0%} ({ps.prompt_count} prompts, {len(ps.similar_pairs)} similar pair(s))")
-        for pair in ps.similar_pairs[:5]:
-            a_text = pair.prompt_a_text[:40] + "..." if len(pair.prompt_a_text) > 40 else pair.prompt_a_text
-            b_text = pair.prompt_b_text[:40] + "..." if len(pair.prompt_b_text) > 40 else pair.prompt_b_text
-            lines.append(f'    #{pair.prompt_a_index+1} "{a_text}" ~ #{pair.prompt_b_index+1} "{b_text}" ({pair.similarity:.2f})')
-
-    # Intent drift.
-    drift = ia.intent_drift
-    if drift.steps:
-        lines.append(f"  Intent Drift: {drift.overall_trajectory} (avg similarity: {drift.average_drift:.2f})")
-        for step in drift.steps:
-            lines.append(f"    #{step.from_index+1} -> #{step.to_index+1}: {step.drift_type} ({step.similarity:.2f})")
-
-    # Prompt-response alignment.
-    pra = ia.prompt_response_alignment
-    if pra.pairs:
-        lines.append(f"  Prompt-Response Alignment: {pra.average_alignment:.2f} ({len(pra.pairs)} pair(s), {pra.low_alignment_count} low)")
-        for pair in pra.pairs:
-            prompt_short = pair.prompt_text[:50] + "..." if len(pair.prompt_text) > 50 else pair.prompt_text
-            marker = " [LOW]" if pair.alignment < 0.3 else ""
-            lines.append(f'    #{pair.prompt_index+1} "{prompt_short}" -> {pair.alignment:.2f}{marker}')
-
-    return lines
-
-
-# --- JSON formatting ---
-
-
-def _format_json(result: TERResult) -> str:
-    """Format TER result as JSON."""
-    data = _ter_result_to_dict(result)
-    return json.dumps(data, indent=2)
-
-
-def _format_comparison_json(results: list[TERResult]) -> str:
-    """Format comparison as JSON."""
-    data = {
-        "sessions": [_ter_result_to_dict(r) for r in results],
-        "average_ter": round(
-            sum(r.aggregate_ter for r in results) / len(results), 4
-        ) if results else 0.0,
-    }
-    return json.dumps(data, indent=2)
-
-
-def _ter_result_to_dict(result: TERResult) -> dict:
-    """Convert TERResult to a JSON-serializable dict."""
-    data: dict = {
-        "session_id": result.session_id,
-        "aggregate_ter": result.aggregate_ter,
-        "raw_ratio": result.raw_ratio,
-        "phase_scores": result.phase_scores,
-        "total_tokens": result.total_tokens,
-        "aligned_tokens": result.aligned_tokens,
-        "waste_tokens": result.waste_tokens,
-    }
-    if result.intent:
-        data["intent_confidence"] = result.intent.confidence
-    if result.waste_patterns:
-        data["waste_patterns"] = [
-            {
-                "type": wp.pattern_type,
-                "start_position": wp.start_position,
-                "end_position": wp.end_position,
-                "spans_involved": wp.spans_involved,
-                "tokens_wasted": wp.tokens_wasted,
-                "description": wp.description,
-            }
-            for wp in result.waste_patterns
-        ]
-    if result.classified_spans:
-        summary = summarize_waste(result.classified_spans, result.waste_patterns or [])
-        data["waste_summary"] = {
-            "total_waste_tokens": summary["total_waste_tokens"],
-            "waste_by_category": summary["waste_by_category"],
-            "waste_by_phase": summary["waste_by_phase"],
-            "top_patterns": summary["top_patterns"],
-            "explanation": summary["explanation"],
-        }
-    rows = _build_waste_breakdown(result)
-    if rows:
-        total_waste = sum(t for _, t, _, _ in rows)
-        cm = result.economics.cost_model if result.economics else CostModel()
-        sources = []
-        for label, tokens, count, kind in rows:
-            rate = cm.output_rate if kind == "output" else cm.input_rate
-            row_cost = float(tokens) * rate / 1_000_000
-            sources.append({
-                "source": label,
-                "tokens": tokens,
-                "percentage": round(tokens / total_waste * 100, 1) if total_waste > 0 else 0,
-                "cost_usd": round(row_cost, 6),
-                "count": count,
-                "pricing": kind,
-            })
-        data["waste_breakdown"] = {
-            "sources": sources,
-            "total_tokens": total_waste,
-            "total_cost_usd": round(_compute_waste_cost(result), 6),
-        }
-    if result.economics is not None:
-        econ = result.economics
-        data["economics"] = {
-            "total_input_tokens": econ.total_input_tokens,
-            "total_output_tokens": econ.total_output_tokens,
-            "total_cache_creation_tokens": econ.total_cache_creation_tokens,
-            "total_cache_read_tokens": econ.total_cache_read_tokens,
-            "input_output_ratio": econ.input_output_ratio,
-            "cache_hit_rate": econ.cache_hit_rate,
-            "estimated_cost_usd": econ.estimated_cost_usd,
-            "estimated_waste_cost_usd": econ.estimated_waste_cost_usd,
-            "cost_model": {
-                "input_rate": econ.cost_model.input_rate,
-                "output_rate": econ.cost_model.output_rate,
-                "cache_read_rate": econ.cost_model.cache_read_rate,
-                "cache_write_rate": econ.cost_model.cache_write_rate,
-            },
-            "positional": {
-                "early_ter": econ.positional.early_ter,
-                "mid_ter": econ.positional.mid_ter,
-                "late_ter": econ.positional.late_ter,
-                "early_span_count": econ.positional.early_span_count,
-                "mid_span_count": econ.positional.mid_span_count,
-                "late_span_count": econ.positional.late_span_count,
-            },
-            "input_growth": {
-                "turn_input_tokens": econ.input_growth.turn_input_tokens,
-                "growth_rate": econ.input_growth.growth_rate,
-                "is_superlinear": econ.input_growth.is_superlinear,
-                "context_bloat_detected": econ.input_growth.context_bloat_detected,
-            },
-        }
-    if result.input_analysis is not None:
-        ia = result.input_analysis
-        bd = ia.token_breakdown
-        ps = ia.prompt_similarity
-        data["input_analysis"] = {
-            "token_breakdown": {
-                "user_input_tokens": bd.user_input_tokens,
-                "user_result_tokens": bd.user_result_tokens,
-                "model_reasoning_tokens": bd.model_reasoning_tokens,
-                "model_tool_tokens": bd.model_tool_tokens,
-                "model_generation_tokens": bd.model_generation_tokens,
-                "total_user_tokens": bd.total_user_tokens,
-                "total_model_tokens": bd.total_model_tokens,
-                "user_ratio": bd.user_ratio,
-            },
-            "prompt_similarity": {
-                "prompt_count": ps.prompt_count,
-                "prompt_redundancy_score": ps.prompt_redundancy_score,
-                "similar_pairs": [
-                    {
-                        "prompt_a_index": p.prompt_a_index,
-                        "prompt_b_index": p.prompt_b_index,
-                        "similarity": p.similarity,
-                        "prompt_a_text": p.prompt_a_text,
-                        "prompt_b_text": p.prompt_b_text,
-                    }
-                    for p in ps.similar_pairs
-                ],
-            },
-            "intent_drift": {
-                "overall_trajectory": ia.intent_drift.overall_trajectory,
-                "average_drift": ia.intent_drift.average_drift,
-                "steps": [
-                    {
-                        "from_index": s.from_index,
-                        "to_index": s.to_index,
-                        "similarity": s.similarity,
-                        "drift_type": s.drift_type,
-                    }
-                    for s in ia.intent_drift.steps
-                ],
-            },
-            "prompt_response_alignment": {
-                "average_alignment": ia.prompt_response_alignment.average_alignment,
-                "low_alignment_count": ia.prompt_response_alignment.low_alignment_count,
-                "pairs": [
-                    {
-                        "prompt_index": p.prompt_index,
-                        "prompt_text": p.prompt_text,
-                        "response_text": p.response_text,
-                        "alignment": p.alignment,
-                    }
-                    for p in ia.prompt_response_alignment.pairs
-                ],
-            },
-        }
-    if result.cost_report is not None:
-        cr = result.cost_report
-        data["cost_report"] = {
-            "cost_weighted_ter": cr.cost_ter.cost_weighted_ter,
-            "raw_ter": cr.cost_ter.raw_ter,
-            "total_cost_usd": cr.cost_ter.total_cost_usd,
-            "waste_cost_usd": cr.cost_ter.waste_cost_usd,
-            "savings_if_perfect": cr.cost_ter.savings_if_perfect,
-            "semantic_density": {
-                "density_score": cr.session_density.density_score,
-                "vocabulary_richness": cr.session_density.vocabulary_richness,
-                "information_entropy": cr.session_density.information_entropy,
-                "redundancy_ratio": cr.session_density.redundancy_ratio,
-            },
-            "recommendations": cr.recommendations,
-            "model_tier": cr.model_tier,
-        }
-    if result.overthinking_result is not None:
-        ot = result.overthinking_result
-        data["overthinking_analysis"] = {
-            "is_overthinking": ot.is_overthinking,
-            "total_reasoning_tokens": ot.total_reasoning_tokens,
-            "useful_reasoning_tokens": ot.useful_reasoning_tokens,
-            "wasted_reasoning_tokens": ot.wasted_reasoning_tokens,
-            "reasoning_efficiency": ot.reasoning_efficiency,
-            "optimal_cutoff_index": ot.optimal_cutoff_index,
-            "recommended_budget": ot.recommended_budget,
-            "explanation": ot.explanation,
-        }
-    return data
-
-
 def _format_waste_pattern(wp: WastePattern) -> str:
-    """Format a single waste pattern for text display."""
     pos = (
         f"spans {wp.start_position}-{wp.end_position}"
         if wp.start_position != wp.end_position
diff --git a/src/ter_calculator/formatter_json.py b/src/ter_calculator/formatter_json.py
new file mode 100644
index 0000000..b02a388
--- /dev/null
+++ b/src/ter_calculator/formatter_json.py
@@ -0,0 +1,221 @@
+"""JSON formatting for TER results."""
+
+from __future__ import annotations
+
+import json
+
+from .models import TERResult
+from .waste import summarize_waste
+
+
+def format_json(result: TERResult) -> str:
+    data = ter_result_to_dict(result)
+    return json.dumps(data, indent=2)
+
+
+def format_comparison_json(results: list[TERResult]) -> str:
+    data = {
+        "sessions": [ter_result_to_dict(r) for r in results],
+        "average_ter": round(
+            sum(r.aggregate_ter for r in results) / len(results), 4
+        ) if results else 0.0,
+    }
+    return json.dumps(data, indent=2)
+
+
+def format_grouped_json(
+    parent_result: TERResult,
+    subagent_results: list[TERResult],
+) -> str:
+    from .formatter import _compute_group_aggregates
+
+    all_results = [parent_result] + subagent_results
+    agg = _compute_group_aggregates(all_results)
+
+    data = {
+        "group": {
+            "parent_session_id": parent_result.session_id,
+            "subagent_count": len(subagent_results),
+            **agg,
+        },
+        "parent": ter_result_to_dict(parent_result),
+        "subagents": [ter_result_to_dict(r) for r in subagent_results],
+    }
+    return json.dumps(data, indent=2)
+
+
+def ter_result_to_dict(result: TERResult) -> dict:
+    from .formatter import _build_waste_breakdown, _compute_waste_cost
+    from .models import CostModel
+
+    data: dict = {
+        "session_id": result.session_id,
+        "aggregate_ter": result.aggregate_ter,
+        "raw_ratio": result.raw_ratio,
+        "phase_scores": result.phase_scores,
+        "total_tokens": result.total_tokens,
+        "aligned_tokens": result.aligned_tokens,
+        "waste_tokens": result.waste_tokens,
+    }
+    if result.intent:
+        data["intent_confidence"] = result.intent.confidence
+    if result.waste_patterns:
+        data["waste_patterns"] = [
+            {
+                "type": wp.pattern_type,
+                "start_position": wp.start_position,
+                "end_position": wp.end_position,
+                "spans_involved": wp.spans_involved,
+                "tokens_wasted": wp.tokens_wasted,
+                "description": wp.description,
+            }
+            for wp in result.waste_patterns
+        ]
+    if result.classified_spans:
+        summary = summarize_waste(result.classified_spans, result.waste_patterns or [])
+        data["waste_summary"] = {
+            "total_waste_tokens": summary["total_waste_tokens"],
+            "waste_by_category": summary["waste_by_category"],
+            "waste_by_phase": summary["waste_by_phase"],
+            "top_patterns": summary["top_patterns"],
+            "explanation": summary["explanation"],
+        }
+    rows = _build_waste_breakdown(result)
+    if rows:
+        total_waste = sum(t for _, t, _, _ in rows)
+        cm = result.economics.cost_model if result.economics else CostModel()
+        sources = []
+        for label, tokens, count, kind in rows:
+            rate = cm.output_rate if kind == "output" else cm.input_rate
+            row_cost = float(tokens) * rate / 1_000_000
+            sources.append({
+                "source": label,
+                "tokens": tokens,
+                "percentage": round(tokens / total_waste * 100, 1) if total_waste > 0 else 0,
+                "cost_usd": round(row_cost, 6),
+                "count": count,
+                "pricing": kind,
+            })
+        data["waste_breakdown"] = {
+            "sources": sources,
+            "total_tokens": total_waste,
+            "total_cost_usd": round(_compute_waste_cost(result), 6),
+        }
+    if result.economics is not None:
+        econ = result.economics
+        data["economics"] = {
+            "total_input_tokens": econ.total_input_tokens,
+            "total_output_tokens": econ.total_output_tokens,
+            "total_cache_creation_tokens": econ.total_cache_creation_tokens,
+            "total_cache_read_tokens": econ.total_cache_read_tokens,
+            "input_output_ratio": econ.input_output_ratio,
+            "cache_hit_rate": econ.cache_hit_rate,
+            "estimated_cost_usd": econ.estimated_cost_usd,
+            "estimated_waste_cost_usd": econ.estimated_waste_cost_usd,
+            "cost_model": {
+                "input_rate": econ.cost_model.input_rate,
+                "output_rate": econ.cost_model.output_rate,
+                "cache_read_rate": econ.cost_model.cache_read_rate,
+                "cache_write_rate": econ.cost_model.cache_write_rate,
+            },
+            "positional": {
+                "early_ter": econ.positional.early_ter,
+                "mid_ter": econ.positional.mid_ter,
+                "late_ter": econ.positional.late_ter,
+                "early_span_count": econ.positional.early_span_count,
+                "mid_span_count": econ.positional.mid_span_count,
+                "late_span_count": econ.positional.late_span_count,
+            },
+            "input_growth": {
+                "turn_input_tokens": econ.input_growth.turn_input_tokens,
+                "growth_rate": econ.input_growth.growth_rate,
+                "is_superlinear": econ.input_growth.is_superlinear,
+                "context_bloat_detected": econ.input_growth.context_bloat_detected,
+            },
+        }
+    if result.input_analysis is not None:
+        ia = result.input_analysis
+        bd = ia.token_breakdown
+        ps = ia.prompt_similarity
+        data["input_analysis"] = {
+            "token_breakdown": {
+                "user_input_tokens": bd.user_input_tokens,
+                "user_result_tokens": bd.user_result_tokens,
+                "model_reasoning_tokens": bd.model_reasoning_tokens,
+                "model_tool_tokens": bd.model_tool_tokens,
+                "model_generation_tokens": bd.model_generation_tokens,
+                "total_user_tokens": bd.total_user_tokens,
+                "total_model_tokens": bd.total_model_tokens,
+                "user_ratio": bd.user_ratio,
+            },
+            "prompt_similarity": {
+                "prompt_count": ps.prompt_count,
+                "prompt_redundancy_score": ps.prompt_redundancy_score,
+                "similar_pairs": [
+                    {
+                        "prompt_a_index": p.prompt_a_index,
+                        "prompt_b_index": p.prompt_b_index,
+                        "similarity": p.similarity,
+                        "prompt_a_text": p.prompt_a_text,
+                        "prompt_b_text": p.prompt_b_text,
+                    }
+                    for p in ps.similar_pairs
+                ],
+            },
+            "intent_drift": {
+                "overall_trajectory": ia.intent_drift.overall_trajectory,
+                "average_drift": ia.intent_drift.average_drift,
+                "steps": [
+                    {
+                        "from_index": s.from_index,
+                        "to_index": s.to_index,
+                        "similarity": s.similarity,
+                        "drift_type": s.drift_type,
+                    }
+                    for s in ia.intent_drift.steps
+                ],
+            },
+            "prompt_response_alignment": {
+                "average_alignment": ia.prompt_response_alignment.average_alignment,
+                "low_alignment_count": ia.prompt_response_alignment.low_alignment_count,
+                "pairs": [
+                    {
+                        "prompt_index": p.prompt_index,
+                        "prompt_text": p.prompt_text,
+                        "response_text": p.response_text,
+                        "alignment": p.alignment,
+                    }
+                    for p in ia.prompt_response_alignment.pairs
+                ],
+            },
+        }
+    if result.cost_report is not None:
+        cr = result.cost_report
+        data["cost_report"] = {
+            "cost_weighted_ter": cr.cost_ter.cost_weighted_ter,
+            "raw_ter": cr.cost_ter.raw_ter,
+            "total_cost_usd": cr.cost_ter.total_cost_usd,
+            "waste_cost_usd": cr.cost_ter.waste_cost_usd,
+            "savings_if_perfect": cr.cost_ter.savings_if_perfect,
+            "semantic_density": {
+                "density_score": cr.session_density.density_score,
+                "vocabulary_richness": cr.session_density.vocabulary_richness,
+                "information_entropy": cr.session_density.information_entropy,
+                "redundancy_ratio": cr.session_density.redundancy_ratio,
+            },
+            "recommendations": cr.recommendations,
+            "model_tier": cr.model_tier,
+        }
+    if result.overthinking_result is not None:
+        ot = result.overthinking_result
+        data["overthinking_analysis"] = {
+            "is_overthinking": ot.is_overthinking,
+            "total_reasoning_tokens": ot.total_reasoning_tokens,
+            "useful_reasoning_tokens": ot.useful_reasoning_tokens,
+            "wasted_reasoning_tokens": ot.wasted_reasoning_tokens,
+            "reasoning_efficiency": ot.reasoning_efficiency,
+            "optimal_cutoff_index": ot.optimal_cutoff_index,
+            "recommended_budget": ot.recommended_budget,
+            "explanation": ot.explanation,
+        }
+    return data
diff --git a/src/ter_calculator/formatter_rich.py b/src/ter_calculator/formatter_rich.py
new file mode 100644
index 0000000..8db3464
--- /dev/null
+++ b/src/ter_calculator/formatter_rich.py
@@ -0,0 +1,476 @@
+"""Rich terminal formatting for TER results."""
+
+from __future__ import annotations
+
+import io
+
+from .models import CostModel, InputAnalysis, TERResult
+from .rich_components import ter_color as _ter_color
+
+
+def format_rich(result: TERResult) -> str:
+    """Format TER result using Rich library."""
+    from rich.console import Console
+    from rich.panel import Panel
+    from rich.table import Table
+    from rich.text import Text
+
+    from .formatter import _build_waste_breakdown, _compute_waste_cost
+
+    buf = io.StringIO()
+    console = Console(file=buf, force_terminal=True, width=72)
+
+    ter_text = Text(f"{result.aggregate_ter:.2f}", style=_ter_color(result.aggregate_ter))
+    waste_pct = (result.waste_tokens / result.total_tokens * 100) if result.total_tokens else 0
+    sid = result.session_id
+    if len(sid) > 20:
+        sid = sid[:8] + "..."
+
+    line1_parts: list = [("TER: ", "bold"), ter_text]
+    line1_parts.append(("  |  ", ""))
+    line1_parts.append((f"Waste: {waste_pct:.1f}%", "red" if waste_pct > 10 else ""))
+    if result.economics:
+        line1_parts.append(("  |  ", ""))
+        line1_parts.append((f"Cost: ${result.economics.estimated_cost_usd:.2f}", ""))
+        waste_cost = _compute_waste_cost(result)
+        if waste_cost > 0:
+            line1_parts.append(("  |  ", ""))
+            line1_parts.append((f"Waste $: ${waste_cost:.2f}", "red"))
+
+    ia = result.input_analysis
+    if ia is not None:
+        drift = ia.intent_drift
+        pra = ia.prompt_response_alignment
+        ps = ia.prompt_similarity
+        bd = ia.token_breakdown
+
+        drift_colors = {
+            "convergent": "red", "divergent": "green",
+            "stable": "green", "mixed": "yellow",
+        }
+        d_color = drift_colors.get(drift.overall_trajectory, "")
+        a_color = "red" if pra.average_alignment < 0.3 else (
+            "yellow" if pra.average_alignment < 0.5 else "green"
+        )
+        r_color = "red" if ps.prompt_redundancy_score > 0.5 else (
+            "yellow" if ps.prompt_redundancy_score > 0 else "green"
+        )
+
+        line2_parts: list = [
+            ("Drift: ", "bold"),
+            (f"{drift.overall_trajectory}", d_color),
+        ]
+        if pra.pairs:
+            line2_parts.append(("  |  ", ""))
+            line2_parts.append(("Alignment: ", "bold"))
+            line2_parts.append((f"{pra.average_alignment:.2f}", a_color))
+        if ps.prompt_count >= 2:
+            line2_parts.append(("  |  ", ""))
+            line2_parts.append(("Redundancy: ", "bold"))
+            line2_parts.append((f"{ps.prompt_redundancy_score:.0%}", r_color))
+        line2_parts.append(("  |  ", ""))
+        line2_parts.append((f"User: {bd.user_ratio:.0%}", "dim"))
+
+        header = Text.assemble(
+            *line1_parts, ("\n", ""), *line2_parts,
+        )
+    else:
+        header = Text.assemble(*line1_parts)
+
+    console.print(Panel(header, title=sid, expand=False))
+
+    table = Table(show_header=True, show_edge=True)
+    table.add_column("Phase", style="bold", width=12)
+    table.add_column("TER", justify="right", width=6)
+    table.add_column("", width=3)
+    table.add_column("Metric", style="bold", width=14)
+    table.add_column("Value", justify="right", width=12)
+
+    phases = [
+        ("Reasoning", result.phase_scores.get("reasoning", 0)),
+        ("Tool Use", result.phase_scores.get("tool_use", 0)),
+        ("Generation", result.phase_scores.get("generation", 0)),
+    ]
+    right_rows = [
+        ("Output Tokens", f"{result.total_tokens:,}"),
+        ("Aligned", f"{result.aligned_tokens:,}"),
+        ("Waste", f"{result.waste_tokens:,}"),
+    ]
+
+    for i in range(3):
+        p_name, p_score = phases[i]
+        p_color = _ter_color(p_score)
+        r_label, r_value = right_rows[i]
+        table.add_row(
+            p_name,
+            f"[{p_color}]{p_score:.2f}[/{p_color}]",
+            "",
+            r_label,
+            r_value,
+        )
+    console.print(table)
+
+    if result.economics is not None:
+        econ = result.economics
+        cache_pct = econ.cache_hit_rate * 100
+        cache_color = "green" if cache_pct >= 50 else "yellow" if cache_pct >= 20 else "red"
+
+        econ_table = Table(show_header=True, show_edge=True)
+        econ_table.add_column("Economics", style="bold", width=18)
+        econ_table.add_column("", justify="right", width=12)
+        econ_table.add_column("", width=3)
+        econ_table.add_column("Context", style="bold", width=14)
+        econ_table.add_column("", justify="right", width=12)
+
+        pos = econ.positional
+        g = econ.input_growth
+        bloat_str = "[red]YES[/red]" if g.context_bloat_detected else (
+            "[yellow]WATCH[/yellow]" if g.is_superlinear else "[green]NO[/green]"
+        )
+
+        left_rows = [
+            ("Input Tokens", f"{econ.total_input_tokens:,}"),
+            ("Cache Read", f"{econ.total_cache_read_tokens:,}"),
+            ("Cache Hit Rate", f"[{cache_color}]{cache_pct:.1f}%[/{cache_color}]"),
+        ]
+        right_rows_e = [
+            ("Growth", f"{g.growth_rate:.1f}x ({len(g.turn_input_tokens)} turns)"),
+            ("Bloat", bloat_str),
+            ("Positional", f"{pos.early_ter:.2f} / {pos.mid_ter:.2f} / {pos.late_ter:.2f}"),
+        ]
+
+        for i in range(3):
+            l_label, l_value = left_rows[i]
+            r_label, r_value = right_rows_e[i]
+            econ_table.add_row(l_label, l_value, "", r_label, r_value)
+        console.print(econ_table)
+
+    _format_waste_breakdown_rich(console, result)
+
+    if result.input_analysis is not None:
+        _format_input_analysis_rich(console, result.input_analysis)
+
+    if result.cost_report is not None:
+        _format_cost_report_rich(console, result.cost_report)
+
+    if result.overthinking_result is not None:
+        _format_overthinking_rich(console, result.overthinking_result)
+
+    return buf.getvalue().rstrip()
+
+
+def _format_waste_breakdown_rich(console, result: TERResult) -> None:
+    from rich.table import Table
+    from .formatter import _build_waste_breakdown, _compute_waste_cost
+
+    rows = _build_waste_breakdown(result)
+    if not rows:
+        return
+
+    total_waste = sum(t for _, t, _, _ in rows)
+    cm = result.economics.cost_model if result.economics else CostModel()
+
+    table = Table(show_header=True, show_edge=True, title="Waste Breakdown")
+    table.add_column("Source", style="bold", width=22)
+    table.add_column("Tokens", justify="right", width=10)
+    table.add_column("%", justify="right", width=6)
+    table.add_column("Cost", justify="right", width=8)
+    table.add_column("Count", justify="right", width=6, style="dim")
+
+    for label, tokens, count, kind in rows:
+        pct = (tokens / total_waste * 100) if total_waste > 0 else 0
+        rate = cm.output_rate if kind == "output" else cm.input_rate
+        row_cost = float(tokens) * rate / 1_000_000
+        table.add_row(
+            label,
+            f"{tokens:,}",
+            f"{pct:.0f}%",
+            f"${row_cost:.4f}",
+            str(count),
+        )
+
+    table.add_section()
+    total_cost = _compute_waste_cost(result)
+    table.add_row(
+        "[bold]Total[/bold]",
+        f"[bold]{total_waste:,}[/bold]",
+        "[bold]100%[/bold]",
+        f"[bold]${total_cost:.4f}[/bold]",
+        "",
+    )
+    console.print(table)
+
+
+def format_comparison_rich(results: list[TERResult]) -> str:
+    from rich.console import Console
+    from rich.table import Table
+    from .formatter import _compute_waste_cost
+
+    buf = io.StringIO()
+    console = Console(file=buf, force_terminal=True, width=90)
+
+    table = Table(title="TER Comparison", show_header=True)
+    table.add_column("#", justify="right", style="dim")
+    table.add_column("Session", style="bold")
+    table.add_column("TER", justify="right")
+    table.add_column("Waste%", justify="right")
+    table.add_column("Cache%", justify="right")
+    table.add_column("Cost", justify="right")
+    table.add_column("Waste $", justify="right")
+    table.add_column("Patterns", justify="right")
+
+    for i, r in enumerate(results, 1):
+        color = _ter_color(r.aggregate_ter)
+        pattern_count = len(r.waste_patterns) if r.waste_patterns else 0
+        waste_pct = (r.waste_tokens / r.total_tokens * 100) if r.total_tokens else 0
+        cache_str = ""
+        cost_str = ""
+        waste_cost_str = ""
+        if r.economics:
+            cache_pct = r.economics.cache_hit_rate * 100
+            cache_str = f"{cache_pct:.0f}%"
+            cost_str = f"${r.economics.estimated_cost_usd:.2f}"
+            wc = _compute_waste_cost(r)
+            waste_cost_str = f"[red]${wc:.2f}[/red]"
+        sid = r.session_id
+        if len(sid) > 20:
+            sid = sid[:8] + "..."
+        table.add_row(
+            str(i),
+            sid,
+            f"[{color}]{r.aggregate_ter:.2f}[/{color}]",
+            f"{waste_pct:.1f}%",
+            cache_str,
+            cost_str,
+            waste_cost_str,
+            str(pattern_count),
+        )
+
+    console.print(table)
+
+    if results:
+        avg_ter = sum(r.aggregate_ter for r in results) / len(results)
+        total_cost = sum(r.economics.estimated_cost_usd for r in results if r.economics)
+        total_waste_cost = sum(_compute_waste_cost(r) for r in results)
+        color = _ter_color(avg_ter)
+        console.print(f"\nAverage TER: [{color}]{avg_ter:.2f}[/{color}]  |  Total Cost: ${total_cost:.2f}  |  Total Waste: [red]${total_waste_cost:.2f}[/red]")
+
+    return buf.getvalue().rstrip()
+
+
+def format_grouped_rich(
+    parent_result: TERResult,
+    subagent_results: list[TERResult],
+) -> str:
+    from rich.console import Console
+    from rich.panel import Panel
+    from rich.table import Table
+    from rich.text import Text
+    from .formatter import _compute_group_aggregates, _compute_waste_cost
+
+    all_results = [parent_result] + subagent_results
+    agg = _compute_group_aggregates(all_results)
+
+    buf = io.StringIO()
+    console = Console(file=buf, force_terminal=True, width=90)
+
+    sid = parent_result.session_id
+    if len(sid) > 20:
+        sid = sid[:8] + "..."
+    ter_text = Text(f"{agg['weighted_ter']:.2f}", style=_ter_color(agg["weighted_ter"]))
+
+    header = Text.assemble(
+        ("TER: ", "bold"), ter_text,
+        ("  |  ", ""),
+        (f"Waste: {agg['waste_pct']:.1f}%", "red" if agg["waste_pct"] > 10 else ""),
+        ("  |  ", ""),
+        (f"Cost: ${agg['total_cost_usd']:.2f}", ""),
+        ("  |  ", ""),
+        (f"Waste $: ${agg['total_waste_cost_usd']:.2f}", "red"),
+        ("\n", ""),
+        (f"Sessions: 1 parent + {len(subagent_results)} subagent(s)", "dim"),
+        ("  |  ", ""),
+        (f"Tokens: {agg['total_tokens']:,}", "dim"),
+    )
+    console.print(Panel(header, title=f"Group: {sid}", expand=False))
+
+    table = Table(show_header=True, title="Session Breakdown")
+    table.add_column("Role", width=10)
+    table.add_column("Session", width=14)
+    table.add_column("TER", justify="right", width=6)
+    table.add_column("Waste%", justify="right", width=7)
+    table.add_column("Tokens", justify="right", width=10)
+    table.add_column("Cost", justify="right", width=8)
+    table.add_column("Waste $", justify="right", width=8)
+    table.add_column("Patterns", justify="right", width=8)
+
+    def _add_session_row(r: TERResult, role: str):
+        color = _ter_color(r.aggregate_ter)
+        waste_pct = (r.waste_tokens / r.total_tokens * 100) if r.total_tokens else 0
+        cost_str = f"${r.economics.estimated_cost_usd:.2f}" if r.economics else ""
+        wc = _compute_waste_cost(r)
+        waste_str = f"[red]${wc:.2f}[/red]" if wc > 0 else ""
+        pattern_count = len(r.waste_patterns) if r.waste_patterns else 0
+        rsid = r.session_id
+        if len(rsid) > 14:
+            rsid = rsid[:8] + "..."
+        table.add_row(
+            role, rsid,
+            f"[{color}]{r.aggregate_ter:.2f}[/{color}]",
+            f"{waste_pct:.1f}%",
+            f"{r.total_tokens:,}",
+            cost_str, waste_str, str(pattern_count),
+        )
+
+    _add_session_row(parent_result, "parent")
+    for r in subagent_results:
+        _add_session_row(r, "agent")
+
+    table.add_section()
+    color = _ter_color(agg["weighted_ter"])
+    table.add_row(
+        "[bold]Total[/bold]", "",
+        f"[bold][{color}]{agg['weighted_ter']:.2f}[/{color}][/bold]",
+        f"[bold]{agg['waste_pct']:.1f}%[/bold]",
+        f"[bold]{agg['total_tokens']:,}[/bold]",
+        f"[bold]${agg['total_cost_usd']:.2f}[/bold]",
+        f"[bold][red]${agg['total_waste_cost_usd']:.2f}[/red][/bold]",
+        "",
+    )
+    console.print(table)
+
+    return buf.getvalue().rstrip()
+
+
+def _format_input_analysis_rich(console, ia: InputAnalysis) -> None:
+    from rich.table import Table
+
+    bd = ia.token_breakdown
+    ps = ia.prompt_similarity
+
+    console.print("\n[bold]Input Analysis[/bold]")
+    tb = Table(show_header=True, show_edge=True)
+    tb.add_column("Origin", style="bold", width=14)
+    tb.add_column("Category", width=16)
+    tb.add_column("Tokens", justify="right", width=10)
+
+    tb.add_row("User", "Prompt Text", f"{bd.user_input_tokens:,}")
+    tb.add_row("User", "Tool Results", f"{bd.user_result_tokens:,}")
+    tb.add_row("Model", "Reasoning", f"{bd.model_reasoning_tokens:,}")
+    tb.add_row("Model", "Tool Calls", f"{bd.model_tool_tokens:,}")
+    tb.add_row("Model", "Generation", f"{bd.model_generation_tokens:,}")
+    tb.add_section()
+    tb.add_row("[bold]User Total[/bold]", "", f"[bold]{bd.total_user_tokens:,}[/bold]")
+    tb.add_row("[bold]Model Total[/bold]", "", f"[bold]{bd.total_model_tokens:,}[/bold]")
+    tb.add_row("User Ratio", "", f"{bd.user_ratio:.1%}")
+    console.print(tb)
+
+    if ps.prompt_count >= 2:
+        r_color = "red" if ps.prompt_redundancy_score > 0.5 else (
+            "yellow" if ps.prompt_redundancy_score > 0 else "green"
+        )
+        console.print(
+            f"\nPrompt Redundancy: [{r_color}]{ps.prompt_redundancy_score:.0%}[/{r_color}]"
+            f"  ({ps.prompt_count} prompts, {len(ps.similar_pairs)} similar pair(s))"
+        )
+        for pair in ps.similar_pairs[:5]:
+            a_text = pair.prompt_a_text[:40] + "..." if len(pair.prompt_a_text) > 40 else pair.prompt_a_text
+            b_text = pair.prompt_b_text[:40] + "..." if len(pair.prompt_b_text) > 40 else pair.prompt_b_text
+            console.print(
+                f'  [dim]#{pair.prompt_a_index+1}[/dim] "{a_text}" '
+                f'[dim]~[/dim] [dim]#{pair.prompt_b_index+1}[/dim] "{b_text}" '
+                f'[yellow]({pair.similarity:.2f})[/yellow]'
+            )
+
+    drift = ia.intent_drift
+    if drift.steps:
+        _drift_colors = {
+            "convergent": "red", "divergent": "green",
+            "stable": "green", "mixed": "yellow",
+        }
+        t_color = _drift_colors.get(drift.overall_trajectory, "")
+        console.print(
+            f"\nIntent Drift: [{t_color}]{drift.overall_trajectory}[/{t_color}]"
+            f"  (avg similarity: {drift.average_drift:.2f})"
+        )
+        for step in drift.steps:
+            s_color = "red" if step.drift_type == "convergent" else (
+                "green" if step.drift_type == "divergent" else "yellow"
+            )
+            console.print(
+                f"  #{step.from_index+1} -> #{step.to_index+1}: "
+                f"[{s_color}]{step.drift_type}[/{s_color}] ({step.similarity:.2f})"
+            )
+
+    pra = ia.prompt_response_alignment
+    if pra.pairs:
+        a_color = "red" if pra.average_alignment < 0.3 else (
+            "yellow" if pra.average_alignment < 0.5 else "green"
+        )
+        console.print(
+            f"\nPrompt-Response Alignment: [{a_color}]{pra.average_alignment:.2f}[/{a_color}]"
+            f"  ({len(pra.pairs)} pair(s), {pra.low_alignment_count} low)"
+        )
+        for pair in pra.pairs:
+            p_color = "red" if pair.alignment < 0.3 else (
+                "yellow" if pair.alignment < 0.5 else "green"
+            )
+            prompt_short = pair.prompt_text[:50] + "..." if len(pair.prompt_text) > 50 else pair.prompt_text
+            console.print(
+                f'  [dim]#{pair.prompt_index+1}[/dim] "{prompt_short}" '
+                f'-> [{p_color}]{pair.alignment:.2f}[/{p_color}]'
+            )
+
+
+def _format_cost_report_rich(console, cost_report) -> None:
+    from rich.table import Table
+
+    console.print("\n[bold]Cost Analysis[/bold]")
+
+    cost_table = Table(show_header=True, show_edge=True)
+    cost_table.add_column("Metric", style="cyan", width=20)
+    cost_table.add_column("Value", justify="right", width=16)
+
+    cwter = cost_report.cost_ter
+    cost_table.add_row("Cost-Weighted TER", f"{cwter.cost_weighted_ter:.4f}")
+    cost_table.add_row("Raw TER", f"{cwter.raw_ter:.4f}")
+    cost_table.add_row("Total Cost", f"${cwter.total_cost_usd:.4f}")
+    cost_table.add_row("Waste Cost", f"${cwter.waste_cost_usd:.4f}")
+    waste_pct = (cwter.waste_cost_usd / cwter.total_cost_usd * 100) if cwter.total_cost_usd > 0 else 0
+    cost_table.add_row("Waste %", f"{waste_pct:.1f}%")
+    cost_table.add_row("Semantic Density", f"{cost_report.session_density.density_score:.2%}")
+    cost_table.add_row("Redundancy", f"{cost_report.session_density.redundancy_ratio:.2%}")
+
+    console.print(cost_table)
+
+    if cost_report.recommendations:
+        console.print("\n[bold]Recommendations:[/bold]")
+        for rec in cost_report.recommendations:
+            console.print(f"  • {rec}")
+
+
+def _format_overthinking_rich(console, ot) -> None:
+    from rich.table import Table
+
+    console.print("\n[bold]Overthinking Analysis[/bold]")
+
+    status_color = "red" if ot.is_overthinking else "green"
+    status_text = "OVERTHINKING DETECTED" if ot.is_overthinking else "Efficient Reasoning"
+    console.print(f"Status: [{status_color}]{status_text}[/{status_color}]")
+
+    ot_table = Table(show_header=True, show_edge=True)
+    ot_table.add_column("Metric", style="cyan", width=20)
+    ot_table.add_column("Value", justify="right", width=16)
+
+    ot_table.add_row("Total Reasoning", f"{ot.total_reasoning_tokens:,} tokens")
+    ot_table.add_row("Useful", f"{ot.useful_reasoning_tokens:,} tokens")
+    ot_table.add_row("Efficiency", f"{ot.reasoning_efficiency:.0%}")
+    ot_table.add_row("Wasted", f"{ot.wasted_reasoning_tokens:,} tokens")
+
+    if ot.optimal_cutoff_index is not None:
+        ot_table.add_row("Optimal Cutoff", f"Span {ot.optimal_cutoff_index} (of {len(ot.segments)})")
+
+    ot_table.add_row("Recommended Budget", f"{ot.recommended_budget:,} tokens")
+
+    console.print(ot_table)
+    console.print(f"\n{ot.explanation}")
diff --git a/src/ter_calculator/formatter_text.py b/src/ter_calculator/formatter_text.py
new file mode 100644
index 0000000..bd2026b
--- /dev/null
+++ b/src/ter_calculator/formatter_text.py
@@ -0,0 +1,212 @@
+"""Plain text formatting for TER results."""
+
+from __future__ import annotations
+
+from .models import CostModel, InputAnalysis, TERResult
+
+
+def format_text(result: TERResult) -> str:
+    from .formatter import _build_waste_breakdown, _compute_waste_cost
+
+    waste_pct = (result.waste_tokens / result.total_tokens * 100) if result.total_tokens else 0
+    sid = result.session_id
+    if len(sid) > 20:
+        sid = sid[:8] + "..."
+
+    lines = [
+        f"TER Report: {sid}",
+        "═" * 40,
+        "",
+    ]
+
+    cost_str = ""
+    if result.economics:
+        cost_str = f"  |  Cost: ${result.economics.estimated_cost_usd:.2f}"
+        waste_cost = _compute_waste_cost(result)
+        if waste_cost > 0:
+            cost_str += f"  |  Waste $: ${waste_cost:.2f}"
+    lines.append(f"TER: {result.aggregate_ter:.2f}  |  Waste: {waste_pct:.1f}%{cost_str}")
+
+    ia = result.input_analysis
+    if ia is not None:
+        drift = ia.intent_drift
+        pra = ia.prompt_response_alignment
+        ps = ia.prompt_similarity
+        parts = [f"Drift: {drift.overall_trajectory}"]
+        if pra.pairs:
+            parts.append(f"Alignment: {pra.average_alignment:.2f}")
+        if ps.prompt_count >= 2:
+            parts.append(f"Redundancy: {ps.prompt_redundancy_score:.0%}")
+        parts.append(f"User: {ia.token_breakdown.user_ratio:.0%}")
+        lines.append("  |  ".join(parts))
+
+    lines.append("")
+
+    lines.append("Phases:     Reasoning  Tool Use  Generation")
+    lines.append(
+        f"            {result.phase_scores.get('reasoning', 0):.2f}"
+        f"       {result.phase_scores.get('tool_use', 0):.2f}"
+        f"      {result.phase_scores.get('generation', 0):.2f}"
+    )
+    lines.append("")
+
+    lines.append(f"Output Tokens: {result.total_tokens:,}  (aligned: {result.aligned_tokens:,}  waste: {result.waste_tokens:,})")
+
+    if result.economics is not None:
+        econ = result.economics
+        cache_pct = econ.cache_hit_rate * 100
+        pos = econ.positional
+        g = econ.input_growth
+
+        lines.extend([
+            "",
+            f"Input: {econ.total_input_tokens:,}  Cache Read: {econ.total_cache_read_tokens:,}  Cache Hit: {cache_pct:.1f}%",
+            f"Context Growth: {g.growth_rate:.1f}x over {len(g.turn_input_tokens)} turns"
+            + (" [BLOAT]" if g.context_bloat_detected else (" [WATCH]" if g.is_superlinear else "")),
+            f"Positional TER: {pos.early_ter:.2f} (early) / {pos.mid_ter:.2f} (mid) / {pos.late_ter:.2f} (late)",
+        ])
+
+    rows = _build_waste_breakdown(result)
+    if rows:
+        total_waste = sum(t for _, t, _, _ in rows)
+        cm = result.economics.cost_model if result.economics else CostModel()
+        lines.extend(["", "Waste Breakdown:"])
+        lines.append(f"  {'Source':<24} {'Tokens':>10} {'%':>5} {'Cost':>10} {'Count':>6}")
+        for label, tokens, count, kind in rows:
+            pct = (tokens / total_waste * 100) if total_waste > 0 else 0
+            rate = cm.output_rate if kind == "output" else cm.input_rate
+            row_cost = float(tokens) * rate / 1_000_000
+            lines.append(
+                f"  {label:<24} {tokens:>10,} {pct:>4.0f}% ${row_cost:>8.4f} {count:>6}"
+            )
+        total_cost = _compute_waste_cost(result)
+        lines.append(f"  {'Total':<24} {total_waste:>10,}  100% ${total_cost:>8.4f}")
+
+    if result.input_analysis is not None:
+        lines.extend(_format_input_analysis_text(result.input_analysis))
+
+    return "\n".join(lines)
+
+
+def format_comparison_text(results: list[TERResult]) -> str:
+    from .formatter import _compute_waste_cost
+
+    lines = [
+        "TER Comparison",
+        "═" * 40,
+        "",
+        f"  {'#':<3} {'Session':<12} {'TER':<6} {'Waste%':<8} {'Cache%':<8} {'Cost':<10} {'Waste $':<10} {'Patterns':<8}",
+    ]
+
+    for i, r in enumerate(results, 1):
+        sid = r.session_id[:12] if len(r.session_id) <= 12 else r.session_id[:8] + "..."
+        pattern_count = len(r.waste_patterns) if r.waste_patterns else 0
+        waste_pct = (r.waste_tokens / r.total_tokens * 100) if r.total_tokens else 0
+        cache_str = ""
+        cost_str = ""
+        waste_cost_str = ""
+        if r.economics:
+            cache_pct = r.economics.cache_hit_rate * 100
+            cache_str = f"{cache_pct:.0f}%"
+            cost_str = f"${r.economics.estimated_cost_usd:.2f}"
+            wc = _compute_waste_cost(r)
+            waste_cost_str = f"${wc:.2f}"
+        lines.append(
+            f"  {i:<3} {sid:<12} {r.aggregate_ter:<6.2f} "
+            f"{waste_pct:<8.1f} {cache_str:<8} {cost_str:<10} {waste_cost_str:<10} {pattern_count:<8}"
+        )
+
+    if results:
+        avg_ter = sum(r.aggregate_ter for r in results) / len(results)
+        total_cost = sum(r.economics.estimated_cost_usd for r in results if r.economics)
+        total_waste_cost = sum(_compute_waste_cost(r) for r in results)
+        lines.extend(["", f"Average TER: {avg_ter:.2f}  |  Total Cost: ${total_cost:.2f}  |  Total Waste: ${total_waste_cost:.2f}"])
+
+    return "\n".join(lines)
+
+
+def format_grouped_text(
+    parent_result: TERResult,
+    subagent_results: list[TERResult],
+) -> str:
+    from .formatter import _compute_group_aggregates, _compute_waste_cost
+
+    all_results = [parent_result] + subagent_results
+    agg = _compute_group_aggregates(all_results)
+
+    sid = parent_result.session_id
+    if len(sid) > 20:
+        sid = sid[:8] + "..."
+
+    lines = [
+        f"Group Analysis: {sid}",
+        "═" * 50,
+        "",
+        f"TER: {agg['weighted_ter']:.2f}  |  Waste: {agg['waste_pct']:.1f}%"
+        f"  |  Cost: ${agg['total_cost_usd']:.2f}"
+        f"  |  Waste $: ${agg['total_waste_cost_usd']:.2f}",
+        f"Sessions: 1 parent + {len(subagent_results)} subagent(s)  |  Tokens: {agg['total_tokens']:,}",
+        "",
+        f"  {'Role':<10} {'Session':<14} {'TER':<6} {'Waste%':<8} {'Tokens':<10} {'Cost':<10} {'Waste $':<10} {'Patterns':<8}",
+    ]
+
+    def _add_row(r: TERResult, role: str):
+        rsid = r.session_id[:14] if len(r.session_id) <= 14 else r.session_id[:8] + "..."
+        waste_pct = (r.waste_tokens / r.total_tokens * 100) if r.total_tokens else 0
+        cost_str = f"${r.economics.estimated_cost_usd:.2f}" if r.economics else ""
+        wc = _compute_waste_cost(r)
+        waste_str = f"${wc:.2f}" if wc > 0 else ""
+        pattern_count = len(r.waste_patterns) if r.waste_patterns else 0
+        lines.append(
+            f"  {role:<10} {rsid:<14} {r.aggregate_ter:<6.2f} "
+            f"{waste_pct:<8.1f} {r.total_tokens:<10,} {cost_str:<10} {waste_str:<10} {pattern_count:<8}"
+        )
+
+    _add_row(parent_result, "[parent]")
+    for r in subagent_results:
+        _add_row(r, "[agent]")
+
+    lines.extend([
+        "",
+        f"  {'Total':<10} {'':<14} {agg['weighted_ter']:<6.2f} "
+        f"{agg['waste_pct']:<8.1f} {agg['total_tokens']:<10,} "
+        f"${agg['total_cost_usd']:<9.2f} ${agg['total_waste_cost_usd']:<9.2f}",
+    ])
+
+    return "\n".join(lines)
+
+
+def _format_input_analysis_text(ia: InputAnalysis) -> list[str]:
+    bd = ia.token_breakdown
+    ps = ia.prompt_similarity
+
+    lines = [
+        "",
+        "Input Analysis:",
+        f"  User Tokens:   {bd.total_user_tokens:,} (prompt: {bd.user_input_tokens:,}, tool results: {bd.user_result_tokens:,})",
+        f"  Model Tokens:  {bd.total_model_tokens:,} (reasoning: {bd.model_reasoning_tokens:,}, tool: {bd.model_tool_tokens:,}, generation: {bd.model_generation_tokens:,})",
+        f"  User Ratio:    {bd.user_ratio:.1%}",
+    ]
+
+    if ps.prompt_count >= 2:
+        lines.append(f"  Prompt Redundancy: {ps.prompt_redundancy_score:.0%} ({ps.prompt_count} prompts, {len(ps.similar_pairs)} similar pair(s))")
+        for pair in ps.similar_pairs[:5]:
+            a_text = pair.prompt_a_text[:40] + "..." if len(pair.prompt_a_text) > 40 else pair.prompt_a_text
+            b_text = pair.prompt_b_text[:40] + "..." if len(pair.prompt_b_text) > 40 else pair.prompt_b_text
+            lines.append(f'    #{pair.prompt_a_index+1} "{a_text}" ~ #{pair.prompt_b_index+1} "{b_text}" ({pair.similarity:.2f})')
+
+    drift = ia.intent_drift
+    if drift.steps:
+        lines.append(f"  Intent Drift: {drift.overall_trajectory} (avg similarity: {drift.average_drift:.2f})")
+        for step in drift.steps:
+            lines.append(f"    #{step.from_index+1} -> #{step.to_index+1}: {step.drift_type} ({step.similarity:.2f})")
+
+    pra = ia.prompt_response_alignment
+    if pra.pairs:
+        lines.append(f"  Prompt-Response Alignment: {pra.average_alignment:.2f} ({len(pra.pairs)} pair(s), {pra.low_alignment_count} low)")
+        for pair in pra.pairs:
+            prompt_short = pair.prompt_text[:50] + "..." if len(pair.prompt_text) > 50 else pair.prompt_text
+            marker = " [LOW]" if pair.alignment < 0.3 else ""
+            lines.append(f'    #{pair.prompt_index+1} "{prompt_short}" -> {pair.alignment:.2f}{marker}')
+
+    return lines
diff --git a/tests/unit/test_token_counting.py b/tests/unit/test_token_counting.py
new file mode 100644
index 0000000..d23adf2
--- /dev/null
+++ b/tests/unit/test_token_counting.py
@@ -0,0 +1,305 @@
+"""Unit tests for ter_calculator.token_counting module."""
+
+from __future__ import annotations
+
+import pytest
+
+from ter_calculator.token_counting import (
+    CountMethod,
+    PhaseMultipliers,
+    TokenCountResult,
+    _code_density,
+    calibrate_multiplier,
+    count_tokens,
+    estimate_tokens_heuristic,
+    token_count_confidence,
+)
+
+
+# ── _code_density ──────────────────────────────────────────────────────────
+
+
+class TestCodeDensity:
+    def test_empty_text_returns_zero(self):
+        assert _code_density("") == 0.0
+
+    def test_natural_text_low_density(self):
+        text = "This is a simple natural language sentence with no code."
+        density = _code_density(text)
+        assert density < 0.05
+
+    def test_code_text_higher_density(self):
+        text = "if (x > 0) { return arr[i]; }"
+        density = _code_density(text)
+        assert density > 0.1
+
+    def test_pure_punctuation_capped_at_one(self):
+        text = "{}[]();=<>"
+        density = _code_density(text)
+        assert density <= 1.0
+
+    def test_json_like_text(self):
+        text = '{"key": "value", "list": [1, 2, 3]}'
+        density = _code_density(text)
+        assert density > 0.05
+
+
+# ── estimate_tokens_heuristic ──────────────────────────────────────────────
+
+
+class TestEstimateTokensHeuristic:
+    def test_empty_text_returns_zero(self):
+        assert estimate_tokens_heuristic("") == 0
+
+    def test_normal_text_default_ratio(self):
+        text = "a" * 40  # 40 chars / 4.0 = 10 tokens
+        result = estimate_tokens_heuristic(text)
+        assert result == 10
+
+    def test_with_reasoning_phase(self):
+        text = "a" * 40  # ratio 4.0 => 10 tokens
+        result = estimate_tokens_heuristic(text, phase="reasoning")
+        assert result == 10
+
+    def test_with_tool_use_phase(self):
+        text = "a" * 32  # ratio 3.2 => 10 tokens
+        result = estimate_tokens_heuristic(text, phase="tool_use")
+        assert result == 10
+
+    def test_with_custom_multipliers(self):
+        custom = PhaseMultipliers(reasoning=2.0, generation=2.0, tool_use=2.0)
+        text = "a" * 20  # 20 / 2.0 = 10
+        result = estimate_tokens_heuristic(
+            text, phase="reasoning", multipliers=custom
+        )
+        assert result == 10
+
+    def test_unknown_phase_falls_back_to_default(self):
+        text = "a" * 40  # default ratio 4.0 => 10
+        result = estimate_tokens_heuristic(text, phase="unknown_phase")
+        assert result == 10
+
+    def test_no_phase_uses_default_ratio(self):
+        text = "a" * 100  # 100 / 4.0 = 25
+        result = estimate_tokens_heuristic(text)
+        assert result == 25
+
+    def test_result_is_non_negative(self):
+        # Even for very short text, result should be >= 0
+        result = estimate_tokens_heuristic("a")
+        assert result >= 0
+
+    def test_rounding(self):
+        # 5 chars / 4.0 = 1.25, rounds to 1
+        assert estimate_tokens_heuristic("a" * 5) == 1
+        # 6 chars / 4.0 = 1.5, rounds to 2
+        assert estimate_tokens_heuristic("a" * 6) == 2
+
+
+# ── calibrate_multiplier ──────────────────────────────────────────────────
+
+
+class TestCalibrateMultiplier:
+    def test_normal_samples(self):
+        # If text has 40 chars and known count is 10, multiplier = 40/10 = 4.0
+        # OLS formula: m = sum(c*t) / sum(t*t) = (40*10)/(10*10) = 4.0
+        samples = [("a" * 40, 10)]
+        result = calibrate_multiplier(samples)
+        assert result == pytest.approx(4.0)
+
+    def test_multiple_samples(self):
+        samples = [
+            ("a" * 40, 10),  # c=40, t=10
+            ("b" * 80, 20),  # c=80, t=20
+        ]
+        # sum_ct = 40*10 + 80*20 = 400 + 1600 = 2000
+        # sum_tt = 10*10 + 20*20 = 100 + 400 = 500
+        # m = 2000/500 = 4.0
+        result = calibrate_multiplier(samples)
+        assert result == pytest.approx(4.0)
+
+    def test_different_ratios(self):
+        samples = [
+            ("a" * 30, 10),  # ratio 3.0
+            ("b" * 50, 10),  # ratio 5.0
+        ]
+        # sum_ct = 30*10 + 50*10 = 300 + 500 = 800
+        # sum_tt = 10*10 + 10*10 = 100 + 100 = 200
+        # m = 800/200 = 4.0
+        result = calibrate_multiplier(samples)
+        assert result == pytest.approx(4.0)
+
+    def test_empty_samples_raises_value_error(self):
+        with pytest.raises(ValueError, match="non-empty"):
+            calibrate_multiplier([])
+
+    def test_all_zero_token_counts_raises_value_error(self):
+        samples = [("some text", 0), ("more text", 0)]
+        with pytest.raises(ValueError, match="zero or negative"):
+            calibrate_multiplier(samples)
+
+    def test_negative_token_counts_skipped(self):
+        # Negative counts are skipped; if all are negative => error
+        samples = [("text", -5)]
+        with pytest.raises(ValueError, match="zero or negative"):
+            calibrate_multiplier(samples)
+
+    def test_mixed_valid_and_invalid(self):
+        samples = [
+            ("a" * 40, 10),  # valid
+            ("ignored", 0),  # skipped (zero)
+            ("also ignored", -1),  # skipped (negative)
+        ]
+        # Only first sample contributes: m = (40*10)/(10*10) = 4.0
+        result = calibrate_multiplier(samples)
+        assert result == pytest.approx(4.0)
+
+
+# ── token_count_confidence ─────────────────────────────────────────────────
+
+
+class TestTokenCountConfidence:
+    def test_api_method_always_1_0(self):
+        assert token_count_confidence("any text", CountMethod.API) == 1.0
+
+    def test_api_method_with_code_still_1_0(self):
+        code_text = "if (x) { return arr[i]; }"
+        assert token_count_confidence(code_text, CountMethod.API) == 1.0
+
+    def test_heuristic_normal_text(self):
+        text = "This is a normal English sentence without code."
+        conf = token_count_confidence(text, CountMethod.HEURISTIC)
+        # Base 0.8, low density -> minimal penalty
+        assert 0.75 <= conf <= 0.80
+
+    def test_heuristic_code_heavy_lower_confidence(self):
+        code = "{[()];=<>{[()];=<>}"
+        conf = token_count_confidence(code, CountMethod.HEURISTIC)
+        # Code density is high -> larger penalty from base 0.8
+        assert conf < 0.80
+
+    def test_calibrated_normal_text(self):
+        text = "Normal text for calibrated counting."
+        conf = token_count_confidence(text, CountMethod.CALIBRATED)
+        # Base 0.9, low density -> near 0.9
+        assert 0.85 <= conf <= 0.90
+
+    def test_calibrated_code_text_penalized(self):
+        code = "function() { return {}; }"
+        conf = token_count_confidence(code, CountMethod.CALIBRATED)
+        assert conf < 0.90
+
+    def test_confidence_never_exceeds_one(self):
+        conf = token_count_confidence("hello", CountMethod.API)
+        assert conf <= 1.0
+
+    def test_confidence_never_below_zero(self):
+        # Even with maximum code density, confidence >= 0
+        extreme = "{" * 1000
+        conf = token_count_confidence(extreme, CountMethod.HEURISTIC)
+        assert conf >= 0.0
+
+    def test_empty_text_heuristic(self):
+        conf = token_count_confidence("", CountMethod.HEURISTIC)
+        # Empty text -> density 0.0 -> no penalty -> base 0.8
+        assert conf == pytest.approx(0.8)
+
+
+# ── count_tokens ───────────────────────────────────────────────────────────
+
+
+class TestCountTokens:
+    def test_empty_text(self):
+        result = count_tokens("")
+        assert result.estimated_tokens == 0
+        assert result.confidence == 1.0
+        assert result.method_used is CountMethod.HEURISTIC
+
+    def test_default_heuristic(self):
+        text = "a" * 40
+        result = count_tokens(text)
+        assert result.estimated_tokens == 10
+        assert result.method_used is CountMethod.HEURISTIC
+        assert 0.0 <= result.confidence <= 1.0
+
+    def test_with_phase(self):
+        text = "a" * 32
+        result = count_tokens(text, phase="tool_use")
+        assert result.estimated_tokens == 10
+        assert result.method_used is CountMethod.HEURISTIC
+
+    def test_with_calibrated_multiplier(self):
+        text = "a" * 50
+        result = count_tokens(text, calibrated_multiplier=5.0)
+        assert result.estimated_tokens == 10  # 50 / 5.0
+        assert result.method_used is CountMethod.CALIBRATED
+        assert result.confidence <= 0.9  # calibrated base
+
+    def test_calibrated_takes_precedence_over_heuristic(self):
+        text = "a" * 40
+        result = count_tokens(
+            text, phase="reasoning", calibrated_multiplier=4.0
+        )
+        # Calibrated path should be chosen over heuristic
+        assert result.method_used is CountMethod.CALIBRATED
+
+    def test_zero_calibrated_multiplier_falls_to_heuristic(self):
+        text = "a" * 40
+        result = count_tokens(text, calibrated_multiplier=0.0)
+        assert result.method_used is CountMethod.HEURISTIC
+
+    def test_negative_calibrated_multiplier_falls_to_heuristic(self):
+        text = "a" * 40
+        result = count_tokens(text, calibrated_multiplier=-1.0)
+        assert result.method_used is CountMethod.HEURISTIC
+
+    def test_use_api_false_skips_api(self):
+        text = "some text"
+        result = count_tokens(text, use_api=False)
+        assert result.method_used in (CountMethod.HEURISTIC, CountMethod.CALIBRATED)
+
+    def test_custom_multipliers_passed_through(self):
+        custom = PhaseMultipliers(reasoning=2.0, generation=2.0, tool_use=2.0)
+        text = "a" * 20  # 20 / 2.0 = 10
+        result = count_tokens(text, phase="reasoning", multipliers=custom)
+        assert result.estimated_tokens == 10
+        assert result.method_used is CountMethod.HEURISTIC
+
+    def test_result_is_token_count_result(self):
+        result = count_tokens("hello world")
+        assert isinstance(result, TokenCountResult)
+
+    def test_result_is_frozen(self):
+        result = count_tokens("hello world")
+        with pytest.raises(AttributeError):
+            result.estimated_tokens = 999  # type: ignore[misc]
+
+
+# ── PhaseMultipliers defaults ──────────────────────────────────────────────
+
+
+class TestPhaseMultipliers:
+    def test_default_values(self):
+        pm = PhaseMultipliers()
+        assert pm.reasoning == 4.0
+        assert pm.generation == 4.0
+        assert pm.tool_use == 3.2
+
+    def test_custom_values(self):
+        pm = PhaseMultipliers(reasoning=3.0, generation=5.0, tool_use=2.5)
+        assert pm.reasoning == 3.0
+        assert pm.generation == 5.0
+        assert pm.tool_use == 2.5
+
+
+# ── CountMethod enum ──────────────────────────────────────────────────────
+
+
+class TestCountMethod:
+    def test_values(self):
+        assert CountMethod.API.value == "api"
+        assert CountMethod.CALIBRATED.value == "calibrated"
+        assert CountMethod.HEURISTIC.value == "heuristic"
+
+    def test_members_count(self):
+        assert len(CountMethod) == 3
diff --git a/tests/unit/test_validation.py b/tests/unit/test_validation.py
new file mode 100644
index 0000000..e3f2c81
--- /dev/null
+++ b/tests/unit/test_validation.py
@@ -0,0 +1,935 @@
+"""Tests for JSONL session validation."""
+
+import json
+
+import pytest
+
+from ter_calculator.validation import (
+    CompletenessAssessment,
+    ContentDistribution,
+    FileValidationResult,
+    HealthReport,
+    SessionValidationResult,
+    ValidationResult,
+    assess_completeness,
+    generate_health_report,
+    validate_jsonl_file,
+    validate_jsonl_line,
+    validate_session,
+)
+
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+
+def _make_line(
+    *,
+    line_type="user",
+    role="user",
+    content="Hello",
+    uuid="u1",
+    session_id="s1",
+    extra_top=None,
+    extra_msg=None,
+    omit_top=None,
+    omit_msg=None,
+):
+    """Build a valid JSONL dict, then serialise it."""
+    top = {
+        "type": line_type,
+        "uuid": uuid,
+        "sessionId": session_id,
+        "message": {
+            "role": role,
+            "content": content,
+        },
+    }
+    if extra_top:
+        top.update(extra_top)
+    if extra_msg:
+        top["message"].update(extra_msg)
+    if omit_top:
+        for k in omit_top:
+            top.pop(k, None)
+    if omit_msg:
+        for k in omit_msg:
+            top["message"].pop(k, None)
+    return json.dumps(top)
+
+
+def _make_assistant_line(
+    content=None,
+    uuid="a1",
+    stop_reason="end_turn",
+    usage=None,
+    timestamp=None,
+):
+    """Build an assistant JSONL dict."""
+    if content is None:
+        content = [{"type": "text", "text": "Hi there!"}]
+    msg = {"role": "assistant", "content": content, "stop_reason": stop_reason}
+    if usage:
+        msg["usage"] = usage
+    entry = {
+        "type": "assistant",
+        "uuid": uuid,
+        "sessionId": "s1",
+        "message": msg,
+    }
+    if timestamp:
+        entry["timestamp"] = timestamp
+    return entry
+
+
+def _make_user_line(content="Hello", uuid="u1", timestamp=None):
+    """Build a user JSONL dict."""
+    entry = {
+        "type": "user",
+        "uuid": uuid,
+        "sessionId": "s1",
+        "message": {"role": "user", "content": content},
+    }
+    if timestamp:
+        entry["timestamp"] = timestamp
+    return entry
+
+
+# ---------------------------------------------------------------------------
+# 1. validate_jsonl_line
+# ---------------------------------------------------------------------------
+
+
+class TestValidateJsonlLine:
+    """Tests for single-line JSONL validation."""
+
+    def test_valid_user_line(self):
+        line = _make_line(role="user", content="Hello")
+        result = validate_jsonl_line(line, line_number=1)
+        assert result.valid is True
+        assert result.errors == []
+
+    def test_valid_assistant_line_with_text_block(self):
+        content = [{"type": "text", "text": "Response"}]
+        line = _make_line(
+            line_type="assistant", role="assistant", content=content
+        )
+        result = validate_jsonl_line(line, line_number=1)
+        assert result.valid is True
+        assert result.errors == []
+
+    def test_invalid_json(self):
+        result = validate_jsonl_line("{not valid json", line_number=5)
+        assert result.valid is False
+        assert len(result.errors) == 1
+        assert "Invalid JSON" in result.errors[0]
+        assert result.line_number == 5
+
+    def test_non_object_json(self):
+        result = validate_jsonl_line(json.dumps([1, 2, 3]), line_number=2)
+        assert result.valid is False
+        assert "Expected a JSON object" in result.errors[0]
+
+    def test_empty_line(self):
+        result = validate_jsonl_line("", line_number=1)
+        assert result.valid is True
+        assert any("Empty line" in w for w in result.warnings)
+
+    def test_whitespace_only_line(self):
+        result = validate_jsonl_line("   \t  ", line_number=1)
+        assert result.valid is True
+        assert any("Empty line" in w for w in result.warnings)
+
+    def test_missing_required_top_level_fields(self):
+        line = _make_line(omit_top=["type", "uuid"])
+        result = validate_jsonl_line(line, line_number=3)
+        assert result.valid is False
+        assert any("Missing required top-level fields" in e for e in result.errors)
+        assert "type" in result.errors[0]
+        assert "uuid" in result.errors[0]
+
+    def test_missing_message_field(self):
+        line = _make_line(omit_top=["message"])
+        result = validate_jsonl_line(line, line_number=1)
+        assert result.valid is False
+        assert any("Missing required top-level fields" in e for e in result.errors)
+
+    def test_missing_required_message_fields(self):
+        line = _make_line(omit_msg=["role", "content"])
+        result = validate_jsonl_line(line, line_number=4)
+        assert result.valid is False
+        assert any("Missing required message fields" in e for e in result.errors)
+
+    def test_message_not_dict(self):
+        raw = json.dumps({
+            "type": "user",
+            "uuid": "u1",
+            "sessionId": "s1",
+            "message": "not a dict",
+        })
+        result = validate_jsonl_line(raw, line_number=1)
+        assert result.valid is False
+        assert any("'message' must be a dict" in e for e in result.errors)
+
+    def test_unexpected_role_warning(self):
+        line = _make_line(role="system")
+        result = validate_jsonl_line(line, line_number=1)
+        assert result.valid is True
+        assert any("Unexpected role" in w for w in result.warnings)
+
+    def test_unknown_block_type_warning(self):
+        content = [{"type": "image_url", "url": "http://example.com"}]
+        line = _make_line(role="assistant", content=content)
+        result = validate_jsonl_line(line, line_number=1)
+        assert result.valid is True
+        assert any("unknown block type" in w for w in result.warnings)
+
+    def test_content_block_missing_type(self):
+        content = [{"text": "no type field here"}]
+        line = _make_line(role="assistant", content=content)
+        result = validate_jsonl_line(line, line_number=1)
+        assert result.valid is False
+        assert any("missing 'type' field" in e for e in result.errors)
+
+    def test_text_block_missing_text_field(self):
+        content = [{"type": "text"}]
+        line = _make_line(role="assistant", content=content)
+        result = validate_jsonl_line(line, line_number=1)
+        assert result.valid is False
+        assert any("missing 'text' field" in e for e in result.errors)
+
+    def test_text_block_text_not_string(self):
+        content = [{"type": "text", "text": 42}]
+        line = _make_line(role="assistant", content=content)
+        result = validate_jsonl_line(line, line_number=1)
+        assert result.valid is False
+        assert any("'text' must be a string" in e for e in result.errors)
+
+    def test_thinking_block_valid(self):
+        content = [{"type": "thinking", "thinking": "Let me think..."}]
+        line = _make_line(role="assistant", content=content)
+        result = validate_jsonl_line(line, line_number=1)
+        assert result.valid is True
+
+    def test_thinking_block_with_text_key(self):
+        content = [{"type": "thinking", "text": "Thinking via text key"}]
+        line = _make_line(role="assistant", content=content)
+        result = validate_jsonl_line(line, line_number=1)
+        assert result.valid is True
+        assert result.warnings == []
+
+    def test_thinking_block_missing_both_fields(self):
+        content = [{"type": "thinking"}]
+        line = _make_line(role="assistant", content=content)
+        result = validate_jsonl_line(line, line_number=1)
+        assert result.valid is True  # warning, not an error
+        assert any("missing" in w and "thinking" in w for w in result.warnings)
+
+    def test_tool_use_block_valid(self):
+        content = [
+            {"type": "tool_use", "id": "t1", "name": "bash", "input": {"cmd": "ls"}}
+        ]
+        line = _make_line(role="assistant", content=content)
+        result = validate_jsonl_line(line, line_number=1)
+        assert result.valid is True
+
+    def test_tool_use_block_missing_name(self):
+        content = [{"type": "tool_use", "id": "t1", "input": {}}]
+        line = _make_line(role="assistant", content=content)
+        result = validate_jsonl_line(line, line_number=1)
+        assert result.valid is False
+        assert any("missing 'name'" in e for e in result.errors)
+
+    def test_tool_use_block_missing_id(self):
+        content = [{"type": "tool_use", "name": "bash", "input": {}}]
+        line = _make_line(role="assistant", content=content)
+        result = validate_jsonl_line(line, line_number=1)
+        assert result.valid is False
+        assert any("missing 'id'" in e for e in result.errors)
+
+    def test_tool_result_block_valid(self):
+        content = [
+            {"type": "tool_result", "tool_use_id": "t1", "content": "OK"}
+        ]
+        line = _make_line(role="user", content=content)
+        result = validate_jsonl_line(line, line_number=1)
+        assert result.valid is True
+
+    def test_tool_result_block_missing_tool_use_id(self):
+        content = [{"type": "tool_result", "content": "OK"}]
+        line = _make_line(role="user", content=content)
+        result = validate_jsonl_line(line, line_number=1)
+        assert result.valid is False
+        assert any("missing 'tool_use_id'" in e for e in result.errors)
+
+    def test_content_not_string_or_list(self):
+        line = _make_line(content=42)
+        result = validate_jsonl_line(line, line_number=1)
+        assert result.valid is False
+        assert any("'content' must be a string or list" in e for e in result.errors)
+
+    def test_content_block_not_dict_warning(self):
+        content = ["just a string in the list"]
+        line = _make_line(role="assistant", content=content)
+        result = validate_jsonl_line(line, line_number=1)
+        assert result.valid is True
+        assert any("is not a dict" in w for w in result.warnings)
+
+    @pytest.mark.parametrize(
+        "meta_type",
+        [
+            "attachment",
+            "file-history-snapshot",
+            "last-prompt",
+            "permission-mode",
+            "progress",
+            "queue-operation",
+            "summary",
+            "system",
+        ],
+    )
+    def test_meta_line_types_are_valid(self, meta_type):
+        raw = json.dumps({"type": meta_type, "data": "something"})
+        result = validate_jsonl_line(raw, line_number=1)
+        assert result.valid is True
+        assert result.errors == []
+        assert result.warnings == []
+
+    def test_line_number_is_preserved(self):
+        result = validate_jsonl_line("{bad json", line_number=99)
+        assert result.line_number == 99
+
+    def test_string_content_is_valid(self):
+        line = _make_line(role="user", content="plain text content")
+        result = validate_jsonl_line(line, line_number=1)
+        assert result.valid is True
+
+
+# ---------------------------------------------------------------------------
+# 2. validate_session
+# ---------------------------------------------------------------------------
+
+
+class TestValidateSession:
+    """Tests for full session validation."""
+
+    def test_empty_session(self):
+        result = validate_session([])
+        assert result.valid is False
+        assert result.message_count == 0
+        assert any("no user messages" in e for e in result.errors)
+        assert any("no assistant messages" in e for e in result.errors)
+
+    def test_single_user_message(self):
+        entries = [_make_user_line()]
+        result = validate_session(entries)
+        assert result.valid is False
+        assert result.message_count == 1
+        assert any("no assistant messages" in e for e in result.errors)
+
+    def test_single_assistant_message(self):
+        entries = [_make_assistant_line()]
+        result = validate_session(entries)
+        assert result.valid is False
+        assert result.message_count == 1
+        assert any("no user messages" in e for e in result.errors)
+
+    def test_valid_user_assistant_pair(self):
+        entries = [
+            _make_user_line(timestamp="2026-04-01T10:00:00.000Z"),
+            _make_assistant_line(timestamp="2026-04-01T10:00:01.000Z"),
+        ]
+        result = validate_session(entries)
+        assert result.valid is True
+        assert result.message_count == 2
+        assert result.errors == []
+
+    def test_timestamp_out_of_order(self):
+        entries = [
+            _make_user_line(
+                uuid="u1", timestamp="2026-04-01T10:00:05.000Z"
+            ),
+            _make_assistant_line(
+                uuid="a1", timestamp="2026-04-01T10:00:01.000Z"
+            ),
+        ]
+        result = validate_session(entries)
+        assert result.valid is False
+        assert any("Timestamp out of order" in e for e in result.errors)
+
+    def test_negative_token_count(self):
+        entries = [
+            _make_user_line(),
+            _make_assistant_line(
+                usage={
+                    "input_tokens": -5,
+                    "output_tokens": 10,
+                    "cache_creation_input_tokens": 0,
+                    "cache_read_input_tokens": 0,
+                }
+            ),
+        ]
+        result = validate_session(entries)
+        assert result.valid is False
+        assert any("Negative token count" in e for e in result.errors)
+
+    def test_tool_result_without_matching_tool_use(self):
+        entries = [
+            _make_user_line(
+                content=[
+                    {"type": "tool_result", "tool_use_id": "orphan_id", "content": "ok"}
+                ]
+            ),
+            _make_assistant_line(),
+        ]
+        result = validate_session(entries)
+        assert result.valid is False
+        assert any("non-existent tool_use ids" in e for e in result.errors)
+
+    def test_tool_use_without_matching_tool_result_warning(self):
+        entries = [
+            _make_user_line(),
+            _make_assistant_line(
+                content=[
+                    {
+                        "type": "tool_use",
+                        "id": "t_orphan",
+                        "name": "bash",
+                        "input": {},
+                    },
+                    {"type": "text", "text": "done"},
+                ]
+            ),
+        ]
+        result = validate_session(entries)
+        assert any("without matching tool_result" in w for w in result.warnings)
+
+    def test_matched_tool_use_and_result(self):
+        entries = [
+            _make_user_line(),
+            _make_assistant_line(
+                content=[
+                    {
+                        "type": "tool_use",
+                        "id": "t1",
+                        "name": "bash",
+                        "input": {"cmd": "ls"},
+                    }
+                ]
+            ),
+            _make_user_line(
+                uuid="u2",
+                content=[
+                    {"type": "tool_result", "tool_use_id": "t1", "content": "file.txt"}
+                ],
+            ),
+            _make_assistant_line(uuid="a2"),
+        ]
+        result = validate_session(entries)
+        assert result.valid is True
+        assert result.errors == []
+        assert result.warnings == []
+
+    def test_meta_lines_are_skipped(self):
+        entries = [
+            {"type": "system", "data": "init"},
+            _make_user_line(),
+            {"type": "summary", "text": "summary"},
+            _make_assistant_line(),
+        ]
+        result = validate_session(entries)
+        assert result.valid is True
+        assert result.message_count == 2
+
+    def test_string_content_counts_as_block(self):
+        entries = [
+            _make_user_line(content="simple string"),
+            _make_assistant_line(),
+        ]
+        result = validate_session(entries)
+        assert result.content_block_count >= 2
+
+    def test_non_dict_entries_skipped(self):
+        entries = [
+            "not a dict",
+            42,
+            None,
+            _make_user_line(),
+            _make_assistant_line(),
+        ]
+        result = validate_session(entries)
+        assert result.valid is True
+        assert result.message_count == 2
+
+
+# ---------------------------------------------------------------------------
+# 3. validate_jsonl_file
+# ---------------------------------------------------------------------------
+
+
+class TestValidateJsonlFile:
+    """Tests for whole-file JSONL validation."""
+
+    def test_valid_file(self, tmp_path):
+        f = tmp_path / "valid.jsonl"
+        lines = [
+            _make_line(role="user", content="Hi"),
+            _make_line(
+                line_type="assistant",
+                role="assistant",
+                content=[{"type": "text", "text": "Hello"}],
+                uuid="a1",
+            ),
+        ]
+        f.write_text("\n".join(lines), encoding="utf-8")
+
+        result = validate_jsonl_file(str(f))
+        assert result.valid is True
+        assert result.total_lines == 2
+        assert result.valid_lines == 2
+        assert result.error_lines == []
+        assert result.errors == []
+
+    def test_nonexistent_file(self):
+        with pytest.raises(FileNotFoundError, match="JSONL file not found"):
+            validate_jsonl_file("/tmp/does_not_exist_abc123.jsonl")
+
+    def test_file_with_invalid_lines(self, tmp_path):
+        f = tmp_path / "mixed.jsonl"
+        valid = _make_line(role="user", content="Hello")
+        invalid = "{bad json"
+        f.write_text(f"{valid}\n{invalid}\n", encoding="utf-8")
+
+        result = validate_jsonl_file(str(f))
+        assert result.valid is False
+        assert result.total_lines == 2
+        assert result.valid_lines == 1
+        assert 2 in result.error_lines
+        assert len(result.errors) == 1
+        assert "Invalid JSON" in result.errors[0]
+
+    def test_file_with_empty_lines(self, tmp_path):
+        f = tmp_path / "empties.jsonl"
+        valid = _make_line(role="user", content="Hello")
+        f.write_text(f"{valid}\n\n\n", encoding="utf-8")
+
+        result = validate_jsonl_file(str(f))
+        assert result.valid is True
+        assert any("Empty line" in w for w in result.warnings)
+
+    def test_file_with_meta_lines(self, tmp_path):
+        f = tmp_path / "meta.jsonl"
+        meta = json.dumps({"type": "permission-mode", "mode": "default"})
+        valid = _make_line(role="user", content="Hello")
+        f.write_text(f"{meta}\n{valid}\n", encoding="utf-8")
+
+        result = validate_jsonl_file(str(f))
+        assert result.valid is True
+        assert result.total_lines == 2
+        assert result.valid_lines == 2
+
+    def test_file_all_invalid(self, tmp_path):
+        f = tmp_path / "bad.jsonl"
+        f.write_text("{bad\n{also bad\n", encoding="utf-8")
+
+        result = validate_jsonl_file(str(f))
+        assert result.valid is False
+        assert result.total_lines == 2
+        assert result.valid_lines == 0
+        assert result.error_lines == [1, 2]
+        assert len(result.errors) == 2
+
+    def test_file_accepts_path_object(self, tmp_path):
+        f = tmp_path / "path_obj.jsonl"
+        f.write_text(_make_line(role="user", content="Hi"), encoding="utf-8")
+        result = validate_jsonl_file(f)  # Pass Path object, not str
+        assert result.valid is True
+
+
+# ---------------------------------------------------------------------------
+# 4. assess_completeness
+# ---------------------------------------------------------------------------
+
+
+class TestAssessCompleteness:
+    """Tests for session completeness assessment."""
+
+    def test_complete_session(self):
+        entries = [
+            _make_user_line(),
+            _make_assistant_line(stop_reason="end_turn"),
+        ]
+        result = assess_completeness(entries)
+        assert result.is_complete is True
+        assert result.completeness_score == 1.0
+        assert result.issues == []
+
+    def test_no_assistant_messages(self):
+        entries = [_make_user_line()]
+        result = assess_completeness(entries)
+        assert result.is_complete is False
+        assert result.completeness_score < 1.0
+        assert any("No assistant messages" in i for i in result.issues)
+
+    def test_wrong_stop_reason(self):
+        entries = [
+            _make_user_line(),
+            _make_assistant_line(stop_reason="max_tokens"),
+        ]
+        result = assess_completeness(entries)
+        assert result.is_complete is False
+        assert result.completeness_score < 1.0
+        assert any("stop_reason" in i for i in result.issues)
+
+    def test_none_stop_reason(self):
+        entries = [
+            _make_user_line(),
+            _make_assistant_line(stop_reason=None),
+        ]
+        result = assess_completeness(entries)
+        assert result.is_complete is False
+
+    def test_unresolved_tool_use(self):
+        entries = [
+            _make_user_line(),
+            _make_assistant_line(
+                content=[
+                    {
+                        "type": "tool_use",
+                        "id": "t1",
+                        "name": "bash",
+                        "input": {},
+                    },
+                    {"type": "text", "text": "done"},
+                ],
+                stop_reason="end_turn",
+            ),
+        ]
+        result = assess_completeness(entries)
+        assert result.is_complete is False
+        assert any("unresolved tool_use" in i for i in result.issues)
+
+    def test_session_ends_mid_tool_use(self):
+        entries = [
+            _make_user_line(),
+            _make_assistant_line(
+                content=[
+                    {
+                        "type": "tool_use",
+                        "id": "t1",
+                        "name": "bash",
+                        "input": {},
+                    }
+                ],
+                stop_reason="tool_use",
+            ),
+        ]
+        result = assess_completeness(entries)
+        assert result.is_complete is False
+        assert any("ends mid-tool-use" in i for i in result.issues)
+
+    def test_completeness_score_clamped_to_zero(self):
+        result = assess_completeness([])
+        assert result.completeness_score >= 0.0
+
+    def test_resolved_tool_use_is_complete(self):
+        entries = [
+            _make_user_line(),
+            _make_assistant_line(
+                uuid="a1",
+                content=[
+                    {
+                        "type": "tool_use",
+                        "id": "t1",
+                        "name": "bash",
+                        "input": {},
+                    }
+                ],
+                stop_reason="tool_use",
+            ),
+            _make_user_line(
+                uuid="u2",
+                content=[
+                    {"type": "tool_result", "tool_use_id": "t1", "content": "done"}
+                ],
+            ),
+            _make_assistant_line(
+                uuid="a2",
+                content=[{"type": "text", "text": "Finished."}],
+                stop_reason="end_turn",
+            ),
+        ]
+        result = assess_completeness(entries)
+        assert result.is_complete is True
+        assert result.completeness_score == 1.0
+
+    def test_meta_lines_ignored(self):
+        entries = [
+            {"type": "system", "data": "init"},
+            _make_user_line(),
+            _make_assistant_line(stop_reason="end_turn"),
+        ]
+        result = assess_completeness(entries)
+        assert result.is_complete is True
+
+
+# ---------------------------------------------------------------------------
+# 5. generate_health_report
+# ---------------------------------------------------------------------------
+
+
+class TestGenerateHealthReport:
+    """Tests for the pre-analysis health report."""
+
+    def test_basic_health_report(self):
+        entries = [
+            _make_user_line(content="Hello"),
+            _make_assistant_line(
+                content=[{"type": "text", "text": "Hi there!"}]
+            ),
+        ]
+        report = generate_health_report(entries)
+        assert report.user_message_count == 1
+        assert report.assistant_message_count == 1
+        assert report.content_distribution.text_count >= 2
+        assert report.generation_tokens > 0
+        assert report.parsing_warnings == []
+
+    def test_empty_session_report(self):
+        report = generate_health_report([])
+        assert report.user_message_count == 0
+        assert report.assistant_message_count == 0
+        assert report.estimated_total_tokens == 0
+        assert report.content_distribution.total == 0
+
+    def test_content_distribution_counts(self):
+        entries = [
+            _make_user_line(),
+            _make_assistant_line(
+                content=[
+                    {"type": "thinking", "thinking": "Let me think..."},
+                    {"type": "text", "text": "Here is the answer"},
+                    {
+                        "type": "tool_use",
+                        "id": "t1",
+                        "name": "bash",
+                        "input": {"cmd": "ls"},
+                    },
+                ]
+            ),
+            _make_user_line(
+                uuid="u2",
+                content=[
+                    {"type": "tool_result", "tool_use_id": "t1", "content": "file.txt"}
+                ],
+            ),
+        ]
+        report = generate_health_report(entries)
+        dist = report.content_distribution
+        assert dist.thinking_count == 1
+        assert dist.text_count >= 1
+        assert dist.tool_use_count == 1
+        assert dist.tool_result_count == 1
+        assert dist.total >= 4
+
+    def test_reasoning_tokens_from_thinking_blocks(self):
+        entries = [
+            _make_user_line(),
+            _make_assistant_line(
+                content=[
+                    {"type": "thinking", "thinking": "Deep reasoning here " * 20},
+                    {"type": "text", "text": "Answer"},
+                ]
+            ),
+        ]
+        report = generate_health_report(entries)
+        assert report.reasoning_tokens > 0
+        assert report.generation_tokens > 0
+
+    def test_tool_use_tokens(self):
+        entries = [
+            _make_user_line(),
+            _make_assistant_line(
+                content=[
+                    {
+                        "type": "tool_use",
+                        "id": "t1",
+                        "name": "bash",
+                        "input": {"cmd": "echo hello world"},
+                    },
+                    {"type": "text", "text": "Done"},
+                ]
+            ),
+            _make_user_line(
+                uuid="u2",
+                content=[
+                    {
+                        "type": "tool_result",
+                        "tool_use_id": "t1",
+                        "content": "hello world",
+                    }
+                ],
+            ),
+        ]
+        report = generate_health_report(entries)
+        assert report.tool_use_tokens > 0
+
+    def test_api_tokens_used_when_available(self):
+        entries = [
+            _make_user_line(),
+            _make_assistant_line(
+                usage={
+                    "input_tokens": 100,
+                    "output_tokens": 50,
+                    "cache_creation_input_tokens": 10,
+                    "cache_read_input_tokens": 5,
+                }
+            ),
+        ]
+        report = generate_health_report(entries)
+        assert report.estimated_total_tokens == 165  # 100 + 50 + 10 + 5
+
+    def test_fallback_to_estimated_tokens(self):
+        entries = [
+            _make_user_line(content="Hello world"),
+            _make_assistant_line(
+                content=[{"type": "text", "text": "Goodbye world"}]
+            ),
+        ]
+        report = generate_health_report(entries)
+        assert report.estimated_total_tokens > 0
+        assert report.estimated_total_tokens == (
+            report.reasoning_tokens + report.tool_use_tokens + report.generation_tokens
+        )
+
+    def test_non_dict_entry_produces_warning(self):
+        entries = ["not a dict", _make_user_line(), _make_assistant_line()]
+        report = generate_health_report(entries)
+        assert len(report.parsing_warnings) == 1
+        assert "Non-dict" in report.parsing_warnings[0]
+
+    def test_meta_lines_skipped(self):
+        entries = [
+            {"type": "system", "data": "init"},
+            {"type": "summary", "text": "blah"},
+            _make_user_line(),
+            _make_assistant_line(),
+        ]
+        report = generate_health_report(entries)
+        assert report.user_message_count == 1
+        assert report.assistant_message_count == 1
+
+    def test_estimated_analysis_seconds(self):
+        entries = [
+            _make_user_line(content="Hello"),
+            _make_assistant_line(
+                content=[
+                    {"type": "text", "text": "A"},
+                    {"type": "text", "text": "B"},
+                    {"type": "text", "text": "C"},
+                ]
+            ),
+        ]
+        report = generate_health_report(entries)
+        # 1 span for user string + 3 spans for assistant blocks = 4 spans
+        # 4 * 0.0005 = 0.002
+        assert report.estimated_analysis_seconds == pytest.approx(0.002)
+
+    def test_string_content_counted_as_text(self):
+        entries = [
+            _make_user_line(content="Plain text user message"),
+            _make_assistant_line(),
+        ]
+        report = generate_health_report(entries)
+        assert report.content_distribution.text_count >= 1
+        assert report.generation_tokens > 0
+
+
+# ---------------------------------------------------------------------------
+# 6. ContentDistribution properties
+# ---------------------------------------------------------------------------
+
+
+class TestContentDistribution:
+    """Tests for ContentDistribution percentage calculations."""
+
+    def test_total(self):
+        cd = ContentDistribution(
+            text_count=5,
+            tool_use_count=3,
+            tool_result_count=3,
+            thinking_count=2,
+            other_count=1,
+        )
+        assert cd.total == 14
+
+    def test_percentages(self):
+        cd = ContentDistribution(
+            text_count=50,
+            tool_use_count=25,
+            tool_result_count=15,
+            thinking_count=10,
+            other_count=0,
+        )
+        assert cd.text_pct == 50.0
+        assert cd.tool_use_pct == 25.0
+        assert cd.tool_result_pct == 15.0
+        assert cd.thinking_pct == 10.0
+        assert cd.other_pct == 0.0
+
+    def test_zero_total_returns_zero_pct(self):
+        cd = ContentDistribution()
+        assert cd.total == 0
+        assert cd.text_pct == 0.0
+        assert cd.tool_use_pct == 0.0
+        assert cd.tool_result_pct == 0.0
+        assert cd.thinking_pct == 0.0
+        assert cd.other_pct == 0.0
+
+
+# ---------------------------------------------------------------------------
+# 7. Dataclass defaults
+# ---------------------------------------------------------------------------
+
+
+class TestDataclassDefaults:
+    """Tests for dataclass default values."""
+
+    def test_validation_result_defaults(self):
+        vr = ValidationResult(valid=True)
+        assert vr.errors == []
+        assert vr.warnings == []
+        assert vr.line_number == 0
+
+    def test_session_validation_result_defaults(self):
+        svr = SessionValidationResult(valid=True)
+        assert svr.errors == []
+        assert svr.warnings == []
+        assert svr.message_count == 0
+        assert svr.content_block_count == 0
+
+    def test_file_validation_result_defaults(self):
+        fvr = FileValidationResult(valid=True, total_lines=0, valid_lines=0)
+        assert fvr.error_lines == []
+        assert fvr.errors == []
+        assert fvr.warnings == []
+
+    def test_health_report_defaults(self):
+        hr = HealthReport(
+            user_message_count=0,
+            assistant_message_count=0,
+            estimated_total_tokens=0,
+            content_distribution=ContentDistribution(),
+            reasoning_tokens=0,
+            tool_use_tokens=0,
+            generation_tokens=0,
+        )
+        assert hr.parsing_warnings == []
+        assert hr.estimated_analysis_seconds == 0.0
+
+    def test_completeness_assessment_defaults(self):
+        ca = CompletenessAssessment(
+            is_complete=True, completeness_score=1.0
+        )
+        assert ca.issues == []
diff --git a/tests/unit/test_waste_detectors.py b/tests/unit/test_waste_detectors.py
new file mode 100644
index 0000000..cd2a9cb
--- /dev/null
+++ b/tests/unit/test_waste_detectors.py
@@ -0,0 +1,954 @@
+"""Tests for extended waste pattern detectors (waste_detectors.py)."""
+
+import pytest
+
+from ter_calculator.models import (
+    ClassifiedSpan,
+    SpanLabel,
+    SpanPhase,
+    TokenSpan,
+)
+from ter_calculator.waste_detectors import (
+    ExtendedWasteType,
+    detect_abandoned_approaches,
+    detect_all_extended,
+    detect_error_retry_spirals,
+    detect_over_reading,
+    detect_permission_loops,
+    detect_verbose_thinking,
+)
+
+
+# ---------------------------------------------------------------------------
+# Helper factories
+# ---------------------------------------------------------------------------
+
+
+def _make_cs(
+    phase: SpanPhase,
+    text: str = "test",
+    position: int = 0,
+    token_count: int = 50,
+    block_type: str = "",
+    label: SpanLabel = SpanLabel.ALIGNED_TOOL_CALL,
+) -> ClassifiedSpan:
+    """Build a ClassifiedSpan with convenient defaults."""
+    if not block_type:
+        if phase == SpanPhase.TOOL_USE:
+            block_type = "tool_use"
+        elif phase == SpanPhase.REASONING:
+            block_type = "thinking"
+        else:
+            block_type = "text"
+    span = TokenSpan(
+        text=text,
+        phase=phase,
+        position=position,
+        token_count=token_count,
+        source_message_uuid="msg-1",
+        block_type=block_type,
+    )
+    return ClassifiedSpan(
+        span=span,
+        label=label,
+        confidence=0.9,
+        cosine_similarity=0.5,
+    )
+
+
+def _tool_use(text: str, position: int, token_count: int = 50) -> ClassifiedSpan:
+    """Shorthand for a tool_use span."""
+    return _make_cs(
+        SpanPhase.TOOL_USE,
+        text=text,
+        position=position,
+        token_count=token_count,
+        block_type="tool_use",
+    )
+
+
+def _tool_result(text: str, position: int, token_count: int = 20) -> ClassifiedSpan:
+    """Shorthand for a tool_result span."""
+    return _make_cs(
+        SpanPhase.TOOL_USE,
+        text=text,
+        position=position,
+        token_count=token_count,
+        block_type="tool_result",
+    )
+
+
+def _reasoning(text: str, position: int, token_count: int = 100) -> ClassifiedSpan:
+    """Shorthand for a reasoning span."""
+    return _make_cs(
+        SpanPhase.REASONING,
+        text=text,
+        position=position,
+        token_count=token_count,
+        label=SpanLabel.ALIGNED_REASONING,
+    )
+
+
+def _generation(text: str, position: int, token_count: int = 50) -> ClassifiedSpan:
+    """Shorthand for a generation span."""
+    return _make_cs(
+        SpanPhase.GENERATION,
+        text=text,
+        position=position,
+        token_count=token_count,
+        label=SpanLabel.ALIGNED_RESPONSE,
+    )
+
+
+# ===================================================================
+# 1. detect_permission_loops
+# ===================================================================
+
+
+class TestDetectPermissionLoops:
+    def test_empty_input(self):
+        assert detect_permission_loops([]) == []
+
+    def test_no_tool_spans_returns_empty(self):
+        """Non-tool spans should produce no permission-loop patterns."""
+        spans = [
+            _reasoning("thinking about it", position=0),
+            _generation("some output", position=1),
+        ]
+        assert detect_permission_loops(spans) == []
+
+    def test_no_permission_issues(self):
+        """Tool calls that succeed should produce no patterns."""
+        spans = [
+            _tool_use('Bash {"command":"ls"}', position=0),
+            _tool_result("file1.py file2.py", position=1),
+            _tool_use('Bash {"command":"cat file1.py"}', position=2),
+            _tool_result("contents...", position=3),
+        ]
+        assert detect_permission_loops(spans) == []
+
+    def test_detects_permission_loop_default_min_retries(self):
+        """Three identical calls with denial results between them = 2 retries."""
+        spans = [
+            _tool_use('Bash {"command":"rm /etc/passwd"}', position=0),
+            _tool_result("permission denied", position=1),
+            _tool_use('Bash {"command":"rm /etc/passwd"}', position=2),
+            _tool_result("permission denied", position=3),
+            _tool_use('Bash {"command":"rm /etc/passwd"}', position=4),
+        ]
+        patterns = detect_permission_loops(spans)
+        assert len(patterns) == 1
+        p = patterns[0]
+        assert p.pattern_type == ExtendedWasteType.PERMISSION_LOOP.value
+        assert p.details["tool_name"] == "Bash"
+        assert p.details["retries"] == 2
+        assert p.start_position == 0
+        assert p.end_position == 4
+        assert p.spans_involved == 3
+        # Wasted tokens = token_count of the 2 retries (positions 2 and 4)
+        assert p.tokens_wasted == 100
+
+    def test_below_min_retries_threshold(self):
+        """Only 1 retry (2 calls total) with default min_retries=2 -- not flagged."""
+        spans = [
+            _tool_use('Bash {"command":"rm /root/x"}', position=0),
+            _tool_result("access denied", position=1),
+            _tool_use('Bash {"command":"rm /root/x"}', position=2),
+        ]
+        assert detect_permission_loops(spans) == []
+
+    def test_custom_min_retries_1(self):
+        """Lowering min_retries=1 should flag a single retry."""
+        spans = [
+            _tool_use('Bash {"command":"rm /root/x"}', position=0),
+            _tool_result("access denied", position=1),
+            _tool_use('Bash {"command":"rm /root/x"}', position=2),
+        ]
+        patterns = detect_permission_loops(spans, min_retries=1)
+        assert len(patterns) == 1
+        assert patterns[0].details["retries"] == 1
+
+    def test_high_min_retries_not_flagged(self):
+        """Raising min_retries above actual retries prevents detection."""
+        spans = [
+            _tool_use('Write {"file_path":"/etc/secret"}', position=0),
+            _tool_result("permission denied", position=1),
+            _tool_use('Write {"file_path":"/etc/secret"}', position=2),
+            _tool_result("permission denied", position=3),
+            _tool_use('Write {"file_path":"/etc/secret"}', position=4),
+        ]
+        # 2 retries, but we require 3
+        patterns = detect_permission_loops(spans, min_retries=3)
+        assert patterns == []
+
+    def test_different_tool_breaks_chain(self):
+        """Switching to a different tool between denied calls breaks the chain."""
+        spans = [
+            _tool_use('Bash {"command":"rm /root/x"}', position=0),
+            _tool_result("permission denied", position=1),
+            _tool_use('Read {"file_path":"/root/x"}', position=2),
+            _tool_result("permission denied", position=3),
+            _tool_use('Bash {"command":"rm /root/x"}', position=4),
+        ]
+        # The chain for Bash is broken by the intervening Read tool_use
+        assert detect_permission_loops(spans) == []
+
+    def test_all_permission_keywords(self):
+        """Each denial keyword should be recognised (case-insensitive)."""
+        for keyword in [
+            "permission denied",
+            "not allowed",
+            "access denied",
+            "EACCES: operation not permitted",
+            "unauthorized request",
+        ]:
+            spans = [
+                _tool_use('Write {"file_path":"/etc/secret"}', position=0),
+                _tool_result(keyword, position=1),
+                _tool_use('Write {"file_path":"/etc/secret"}', position=2),
+                _tool_result(keyword, position=3),
+                _tool_use('Write {"file_path":"/etc/secret"}', position=4),
+            ]
+            patterns = detect_permission_loops(spans)
+            assert len(patterns) == 1, f"Failed for keyword: {keyword}"
+
+    def test_intervening_reasoning_does_not_break_chain(self):
+        """Reasoning spans between tool_use spans should not affect detection."""
+        spans = [
+            _tool_use('Bash {"command":"sudo rm"}', position=0),
+            _tool_result("permission denied", position=1),
+            _reasoning("Let me try again", position=2),
+            _tool_use('Bash {"command":"sudo rm"}', position=3),
+            _tool_result("permission denied", position=4),
+            _reasoning("Still denied, trying once more", position=5),
+            _tool_use('Bash {"command":"sudo rm"}', position=6),
+        ]
+        patterns = detect_permission_loops(spans)
+        assert len(patterns) == 1
+        assert patterns[0].details["retries"] == 2
+
+    def test_no_denial_result_between_calls(self):
+        """If the result between two identical calls is not a denial, no pattern."""
+        spans = [
+            _tool_use('Bash {"command":"make"}', position=0),
+            _tool_result("build succeeded", position=1),
+            _tool_use('Bash {"command":"make"}', position=2),
+            _tool_result("build succeeded", position=3),
+            _tool_use('Bash {"command":"make"}', position=4),
+        ]
+        assert detect_permission_loops(spans) == []
+
+
+# ===================================================================
+# 2. detect_error_retry_spirals
+# ===================================================================
+
+
+class TestDetectErrorRetrySpirals:
+    def test_empty_input(self):
+        assert detect_error_retry_spirals([]) == []
+
+    def test_no_errors(self):
+        """Successful tool calls should produce no patterns."""
+        spans = [
+            _tool_use('Bash {"command":"ls"}', position=0),
+            _tool_result("file1.py", position=1),
+            _tool_use('Bash {"command":"cat file1.py"}', position=2),
+            _tool_result("content", position=3),
+        ]
+        assert detect_error_retry_spirals(spans) == []
+
+    def test_detects_error_spiral_default_min_3(self):
+        """4 identical calls with error results between them = 3 retries."""
+        spans = [
+            _tool_use('Bash {"command":"python run.py --flag=val"}', position=0),
+            _tool_result("error: ModuleNotFoundError", position=1),
+            _tool_use('Bash {"command":"python run.py --flag=val"}', position=2),
+            _tool_result("error: ModuleNotFoundError", position=3),
+            _tool_use('Bash {"command":"python run.py --flag=val"}', position=4),
+            _tool_result("error: ModuleNotFoundError", position=5),
+            _tool_use('Bash {"command":"python run.py --flag=val"}', position=6),
+        ]
+        patterns = detect_error_retry_spirals(spans)
+        assert len(patterns) == 1
+        p = patterns[0]
+        assert p.pattern_type == ExtendedWasteType.ERROR_RETRY_SPIRAL.value
+        assert p.details["tool_name"] == "Bash"
+        assert p.details["retries"] == 3
+        assert p.spans_involved == 4
+
+    def test_below_min_retries_threshold(self):
+        """2 retries with default min_retries=3 should not be flagged."""
+        spans = [
+            _tool_use('Bash {"command":"make build"}', position=0),
+            _tool_result("error: compilation failed", position=1),
+            _tool_use('Bash {"command":"make build"}', position=2),
+            _tool_result("error: compilation failed", position=3),
+            _tool_use('Bash {"command":"make build"}', position=4),
+        ]
+        assert detect_error_retry_spirals(spans) == []
+
+    def test_custom_min_retries_lower(self):
+        """With min_retries=2, two retries should be flagged."""
+        spans = [
+            _tool_use('Bash {"command":"make build"}', position=0),
+            _tool_result("error: compilation failed", position=1),
+            _tool_use('Bash {"command":"make build"}', position=2),
+            _tool_result("error: compilation failed", position=3),
+            _tool_use('Bash {"command":"make build"}', position=4),
+        ]
+        patterns = detect_error_retry_spirals(spans, min_retries=2)
+        assert len(patterns) == 1
+        assert patterns[0].details["retries"] == 2
+
+    def test_significantly_different_params_break_chain(self):
+        """Completely different params should have low similarity and break chain."""
+        spans = [
+            _tool_use(
+                'Bash {"command":"python run.py --mode=fast --verbose"}',
+                position=0,
+            ),
+            _tool_result("error: failed to parse", position=1),
+            _tool_use(
+                'Bash {"command":"node server.js --port=3000 --host=localhost"}',
+                position=2,
+            ),
+            _tool_result("error: failed to start", position=3),
+            _tool_use(
+                'Bash {"command":"cargo build --release --target=x86_64"}',
+                position=4,
+            ),
+            _tool_result("error: missing dependency", position=5),
+            _tool_use(
+                'Bash {"command":"go run main.go --config=/etc/app.yaml"}',
+                position=6,
+            ),
+        ]
+        # Same tool name but very different params -> low similarity -> no chain
+        patterns = detect_error_retry_spirals(spans, min_retries=2)
+        assert len(patterns) == 0
+
+    def test_different_tool_breaks_chain(self):
+        """Switching tool names should break the chain."""
+        spans = [
+            _tool_use('Bash {"command":"ls"}', position=0),
+            _tool_result("error: no such file", position=1),
+            _tool_use('Read {"file_path":"x.py"}', position=2),
+            _tool_result("error: file not found", position=3),
+            _tool_use('Bash {"command":"ls"}', position=4),
+        ]
+        patterns = detect_error_retry_spirals(spans, min_retries=1)
+        assert len(patterns) == 0
+
+    def test_error_keywords_case_insensitive(self):
+        """Error keywords are matched case-insensitively."""
+        for keyword in [
+            "Error occurred",
+            "FAILED to execute",
+            "Exception raised",
+            "Traceback (most recent call last)",
+        ]:
+            spans = [
+                _tool_use('Bash {"command":"test"}', position=0),
+                _tool_result(keyword, position=1),
+                _tool_use('Bash {"command":"test"}', position=2),
+                _tool_result(keyword, position=3),
+                _tool_use('Bash {"command":"test"}', position=4),
+                _tool_result(keyword, position=5),
+                _tool_use('Bash {"command":"test"}', position=6),
+            ]
+            patterns = detect_error_retry_spirals(spans)
+            assert len(patterns) == 1, f"Failed for keyword: {keyword}"
+
+    def test_custom_similarity_threshold(self):
+        """A lower similarity threshold allows more variation in params."""
+        # Params vary slightly each time
+        spans = [
+            _tool_use('Bash {"command":"python test.py --flag=a"}', position=0),
+            _tool_result("error: test failed", position=1),
+            _tool_use('Bash {"command":"python test.py --flag=b"}', position=2),
+            _tool_result("error: test failed", position=3),
+            _tool_use('Bash {"command":"python test.py --flag=c"}', position=4),
+            _tool_result("error: test failed", position=5),
+            _tool_use('Bash {"command":"python test.py --flag=d"}', position=6),
+        ]
+        # With a very strict threshold these may not chain; with relaxed they will
+        patterns_strict = detect_error_retry_spirals(
+            spans, similarity_threshold=0.99
+        )
+        patterns_relaxed = detect_error_retry_spirals(
+            spans, similarity_threshold=0.50
+        )
+        # Relaxed should find at least as many patterns as strict
+        assert len(patterns_relaxed) >= len(patterns_strict)
+
+    def test_wasted_tokens_excludes_first_call(self):
+        """tokens_wasted should only count retry calls, not the original."""
+        spans = [
+            _tool_use('Bash {"command":"test"}', position=0, token_count=100),
+            _tool_result("error: fail", position=1),
+            _tool_use('Bash {"command":"test"}', position=2, token_count=100),
+            _tool_result("error: fail", position=3),
+            _tool_use('Bash {"command":"test"}', position=4, token_count=100),
+            _tool_result("error: fail", position=5),
+            _tool_use('Bash {"command":"test"}', position=6, token_count=100),
+        ]
+        patterns = detect_error_retry_spirals(spans)
+        assert len(patterns) == 1
+        # 3 retries x 100 tokens = 300 wasted (first call excluded)
+        assert patterns[0].tokens_wasted == 300
+
+
+# ===================================================================
+# 3. detect_over_reading
+# ===================================================================
+
+
+class TestDetectOverReading:
+    def test_empty_input(self):
+        assert detect_over_reading([]) == []
+
+    def test_single_read_no_pattern(self):
+        """A single read should never produce a pattern."""
+        spans = [
+            _tool_use('Read {"file_path":"src/main.py"}', position=0),
+            _tool_result("def main(): pass", position=1),
+        ]
+        assert detect_over_reading(spans) == []
+
+    def test_two_reads_no_pattern_default_min(self):
+        """Two reads total = 1 redundant read, below default min_reads=2."""
+        spans = [
+            _tool_use('Read {"file_path":"src/main.py"}', position=0),
+            _tool_result("content", position=1),
+            _tool_use('Read {"file_path":"src/main.py"}', position=2),
+            _tool_result("content", position=3),
+        ]
+        assert detect_over_reading(spans) == []
+
+    def test_three_reads_detected(self):
+        """Three reads of the same file = 2 redundant, triggers default min_reads=2."""
+        spans = [
+            _tool_use('Read {"file_path":"src/main.py"}', position=0),
+            _tool_result("content", position=1),
+            _tool_use('Read {"file_path":"src/main.py"}', position=2),
+            _tool_result("content", position=3),
+            _tool_use('Read {"file_path":"src/main.py"}', position=4),
+            _tool_result("content", position=5),
+        ]
+        patterns = detect_over_reading(spans)
+        assert len(patterns) == 1
+        p = patterns[0]
+        assert p.pattern_type == ExtendedWasteType.OVER_READING.value
+        assert p.details["file_path"] == "src/main.py"
+        assert p.details["read_count"] == 3
+        assert p.details["redundant_reads"] == 2
+        # Wasted tokens = token_count of the 2 redundant reads
+        assert p.tokens_wasted == 100  # 2 x 50
+
+    def test_edit_resets_read_count(self):
+        """An intervening Edit to the same file resets the read chain."""
+        spans = [
+            _tool_use('Read {"file_path":"src/main.py"}', position=0),
+            _tool_result("content", position=1),
+            _tool_use('Read {"file_path":"src/main.py"}', position=2),
+            _tool_result("content", position=3),
+            _tool_use('Edit {"file_path":"src/main.py"}', position=4),
+            _tool_result("ok", position=5),
+            _tool_use('Read {"file_path":"src/main.py"}', position=6),
+            _tool_result("content", position=7),
+        ]
+        # After Edit at position 4, tracker resets. Only 1 read post-edit.
+        assert detect_over_reading(spans) == []
+
+    def test_write_resets_read_count(self):
+        """Write tool also resets the read chain."""
+        spans = [
+            _tool_use('Read {"file_path":"x.py"}', position=0),
+            _tool_use('Read {"file_path":"x.py"}', position=1),
+            _tool_use('Read {"file_path":"x.py"}', position=2),
+            _tool_use('Write {"file_path":"x.py"}', position=3),
+            _tool_use('Read {"file_path":"x.py"}', position=4),
+        ]
+        # Write at position 3 resets; only 1 read afterwards
+        assert detect_over_reading(spans) == []
+
+    def test_different_files_tracked_independently(self):
+        """Reads of different files should be tracked separately."""
+        spans = [
+            _tool_use('Read {"file_path":"a.py"}', position=0),
+            _tool_use('Read {"file_path":"b.py"}', position=1),
+            _tool_use('Read {"file_path":"a.py"}', position=2),
+            _tool_use('Read {"file_path":"b.py"}', position=3),
+        ]
+        # Each file read twice = 1 redundant read each, below min_reads=2
+        assert detect_over_reading(spans) == []
+
+    def test_custom_min_reads_1(self):
+        """Lowering min_reads=1 flags files read just twice."""
+        spans = [
+            _tool_use('Read {"file_path":"src/main.py"}', position=0),
+            _tool_result("content", position=1),
+            _tool_use('Read {"file_path":"src/main.py"}', position=2),
+            _tool_result("content", position=3),
+        ]
+        patterns = detect_over_reading(spans, min_reads=1)
+        assert len(patterns) == 1
+        assert patterns[0].details["redundant_reads"] == 1
+
+    def test_cat_tool_recognised_as_read(self):
+        """The 'cat' tool name should also be treated as a read."""
+        spans = [
+            _tool_use('cat {"file_path":"src/main.py"}', position=0),
+            _tool_use('cat {"file_path":"src/main.py"}', position=1),
+            _tool_use('cat {"file_path":"src/main.py"}', position=2),
+        ]
+        patterns = detect_over_reading(spans)
+        assert len(patterns) == 1
+
+    def test_results_sorted_by_wasted_tokens_descending(self):
+        """Multiple over-read files should be sorted by tokens_wasted descending."""
+        spans = [
+            # a.py read 3 times at 50 tokens each => 100 wasted
+            _tool_use('Read {"file_path":"a.py"}', position=0, token_count=50),
+            _tool_use('Read {"file_path":"a.py"}', position=1, token_count=50),
+            _tool_use('Read {"file_path":"a.py"}', position=2, token_count=50),
+            # b.py read 3 times at 200 tokens each => 400 wasted
+            _tool_use('Read {"file_path":"b.py"}', position=3, token_count=200),
+            _tool_use('Read {"file_path":"b.py"}', position=4, token_count=200),
+            _tool_use('Read {"file_path":"b.py"}', position=5, token_count=200),
+        ]
+        patterns = detect_over_reading(spans)
+        assert len(patterns) == 2
+        assert patterns[0].details["file_path"] == "b.py"
+        assert patterns[1].details["file_path"] == "a.py"
+
+    def test_path_key_fallback(self):
+        """When 'file_path' is absent, 'path' key should be used."""
+        spans = [
+            _tool_use('Read {"path":"src/utils.py"}', position=0),
+            _tool_use('Read {"path":"src/utils.py"}', position=1),
+            _tool_use('Read {"path":"src/utils.py"}', position=2),
+        ]
+        patterns = detect_over_reading(spans)
+        assert len(patterns) == 1
+        assert patterns[0].details["file_path"] == "src/utils.py"
+
+    def test_no_file_path_spans_ignored(self):
+        """Tool calls without parseable file paths are skipped."""
+        spans = [
+            _tool_use("Bash {}", position=0),
+            _tool_use("Bash {}", position=1),
+            _tool_use("Bash {}", position=2),
+        ]
+        assert detect_over_reading(spans) == []
+
+    def test_non_tool_use_spans_ignored(self):
+        """Reasoning / generation spans should not affect over-reading detection."""
+        spans = [
+            _tool_use('Read {"file_path":"x.py"}', position=0),
+            _reasoning("thinking", position=1),
+            _tool_use('Read {"file_path":"x.py"}', position=2),
+            _generation("output", position=3),
+            _tool_use('Read {"file_path":"x.py"}', position=4),
+        ]
+        patterns = detect_over_reading(spans)
+        assert len(patterns) == 1
+
+
+# ===================================================================
+# 4. detect_abandoned_approaches
+# ===================================================================
+
+
+class TestDetectAbandonedApproaches:
+    def test_empty_input(self):
+        assert detect_abandoned_approaches([]) == []
+
+    def test_no_abandonment_when_file_revisited(self):
+        """File edited and then touched again later -- not abandoned."""
+        spans = [
+            _tool_use('Edit {"file_path":"src/a.py"}', position=0),
+            _tool_result("ok", position=1),
+            _tool_use('Read {"file_path":"src/b.py"}', position=2),
+            _tool_result("content", position=3),
+            _tool_use('Read {"file_path":"src/a.py"}', position=4),
+            _tool_result("content", position=5),
+        ]
+        assert detect_abandoned_approaches(spans) == []
+
+    def test_detects_abandoned_file(self):
+        """File edited, then agent moves to different file and never returns."""
+        spans = [
+            _tool_use(
+                'Edit {"file_path":"src/attempt1.py"}',
+                position=0,
+                token_count=80,
+            ),
+            _tool_result("ok", position=1),
+            _tool_use(
+                'Edit {"file_path":"src/attempt2.py"}',
+                position=2,
+                token_count=60,
+            ),
+            _tool_result("ok", position=3),
+        ]
+        patterns = detect_abandoned_approaches(spans)
+        assert len(patterns) == 1
+        p = patterns[0]
+        assert p.pattern_type == ExtendedWasteType.ABANDONED_APPROACH.value
+        assert p.details["file_path"] == "src/attempt1.py"
+        assert "attempt1.py" in p.description
+
+    def test_last_file_not_abandoned(self):
+        """The last file touched should not be flagged (no subsequent work)."""
+        spans = [
+            _tool_use('Edit {"file_path":"src/only.py"}', position=0),
+            _tool_result("ok", position=1),
+        ]
+        assert detect_abandoned_approaches(spans) == []
+
+    def test_file_revisited_later_not_abandoned(self):
+        """If file is touched again after other work, it is not abandoned."""
+        spans = [
+            _tool_use('Edit {"file_path":"src/a.py"}', position=0),
+            _tool_use('Edit {"file_path":"src/b.py"}', position=2),
+            _tool_use('Edit {"file_path":"src/a.py"}', position=4),
+        ]
+        # a.py revisited at position 4, so not abandoned.
+        # b.py: last touch is 2, agent works on a.py at 4 -> b.py abandoned.
+        patterns = detect_abandoned_approaches(spans)
+        assert any(p.details["file_path"] == "src/b.py" for p in patterns)
+        assert not any(p.details["file_path"] == "src/a.py" for p in patterns)
+
+    def test_write_tool_also_counts(self):
+        """Write tool should be recognised same as Edit for abandonment."""
+        spans = [
+            _tool_use('Write {"file_path":"src/temp.py"}', position=0),
+            _tool_result("ok", position=1),
+            _tool_use('Edit {"file_path":"src/main.py"}', position=2),
+            _tool_result("ok", position=3),
+        ]
+        patterns = detect_abandoned_approaches(spans)
+        assert len(patterns) == 1
+        assert patterns[0].details["file_path"] == "src/temp.py"
+
+    def test_only_reads_no_abandonment(self):
+        """Reading files (not editing) should not produce abandoned-approach patterns."""
+        spans = [
+            _tool_use('Read {"file_path":"src/a.py"}', position=0),
+            _tool_result("content", position=1),
+            _tool_use('Read {"file_path":"src/b.py"}', position=2),
+            _tool_result("content", position=3),
+        ]
+        assert detect_abandoned_approaches(spans) == []
+
+    def test_no_file_path_spans_ignored(self):
+        """Tool calls without parseable file paths should be skipped."""
+        spans = [
+            _tool_use("Bash {}", position=0),
+            _tool_result("ok", position=1),
+        ]
+        assert detect_abandoned_approaches(spans) == []
+
+    def test_multiple_abandoned_files(self):
+        """Multiple files can be flagged as abandoned."""
+        spans = [
+            _tool_use('Edit {"file_path":"src/a.py"}', position=0, token_count=100),
+            _tool_use('Edit {"file_path":"src/b.py"}', position=1, token_count=200),
+            _tool_use('Edit {"file_path":"src/final.py"}', position=2, token_count=50),
+        ]
+        patterns = detect_abandoned_approaches(spans)
+        abandoned_files = {p.details["file_path"] for p in patterns}
+        assert "src/a.py" in abandoned_files
+        assert "src/b.py" in abandoned_files
+        # final.py is the last file -- not abandoned
+        assert "src/final.py" not in abandoned_files
+
+    def test_results_sorted_by_wasted_tokens_descending(self):
+        """Patterns should be sorted by tokens_wasted descending."""
+        spans = [
+            _tool_use('Edit {"file_path":"src/small.py"}', position=0, token_count=50),
+            _tool_use('Edit {"file_path":"src/large.py"}', position=1, token_count=500),
+            _tool_use('Edit {"file_path":"src/final.py"}', position=2, token_count=10),
+        ]
+        patterns = detect_abandoned_approaches(spans)
+        assert len(patterns) == 2
+        assert patterns[0].tokens_wasted >= patterns[1].tokens_wasted
+
+    def test_duplicate_file_not_reported_twice(self):
+        """Same file edited multiple times then abandoned should only appear once."""
+        spans = [
+            _tool_use('Edit {"file_path":"src/dup.py"}', position=0, token_count=100),
+            _tool_use('Edit {"file_path":"src/dup.py"}', position=1, token_count=100),
+            _tool_use('Edit {"file_path":"src/other.py"}', position=2, token_count=50),
+        ]
+        patterns = detect_abandoned_approaches(spans)
+        dup_patterns = [p for p in patterns if p.details["file_path"] == "src/dup.py"]
+        assert len(dup_patterns) == 1
+
+
+# ===================================================================
+# 5. detect_verbose_thinking
+# ===================================================================
+
+
+class TestDetectVerboseThinking:
+    def test_empty_input(self):
+        assert detect_verbose_thinking([]) == []
+
+    def test_no_thinking_spans(self):
+        """Non-reasoning spans should produce no patterns."""
+        spans = [
+            _generation("output text", position=0),
+            _tool_use('Bash {"command":"ls"}', position=1),
+        ]
+        assert detect_verbose_thinking(spans) == []
+
+    def test_proportional_thinking_not_flagged(self):
+        """A reasonable thinking-to-action ratio should not be flagged."""
+        spans = [
+            _reasoning("Let me think about this...", position=0, token_count=200),
+            _tool_use('Bash {"command":"ls"}', position=1, token_count=50),
+        ]
+        # ratio = 200/50 = 4.0 < default 10.0
+        assert detect_verbose_thinking(spans) == []
+
+    def test_below_min_thinking_tokens_not_flagged(self):
+        """High ratio should not flag when thinking tokens < min_thinking_tokens."""
+        spans = [
+            _reasoning("Short thought", position=0, token_count=100),
+            _tool_use('Bash {"command":"ls"}', position=1, token_count=5),
+        ]
+        # ratio = 100/5 = 20.0 > 10.0 but 100 < 500 default min
+        assert detect_verbose_thinking(spans) == []
+
+    def test_detects_verbose_thinking(self):
+        """Large thinking block with small action should be flagged."""
+        spans = [
+            _reasoning("Very long reasoning...", position=0, token_count=6000),
+            _tool_use('Bash {"command":"ls"}', position=1, token_count=50),
+        ]
+        # ratio = 6000/50 = 120.0 > 10.0, and 6000 > 500
+        patterns = detect_verbose_thinking(spans)
+        assert len(patterns) == 1
+        p = patterns[0]
+        assert p.pattern_type == ExtendedWasteType.VERBOSE_THINKING.value
+        assert p.details["thinking_tokens"] == 6000
+        assert p.details["action_tokens"] == 50
+        assert p.details["ratio"] == 120.0
+        assert p.start_position == 0
+        assert p.end_position == 1
+        # Excess = 6000 - (50 * 10) = 5500
+        assert p.tokens_wasted == 5500
+
+    def test_thinking_with_no_subsequent_action(self):
+        """Thinking block at end of session with no action is flagged."""
+        spans = [
+            _reasoning("Final rumination...", position=0, token_count=1000),
+        ]
+        patterns = detect_verbose_thinking(spans)
+        assert len(patterns) == 1
+        p = patterns[0]
+        assert p.details["action_tokens"] == 0
+        assert p.tokens_wasted == 1000
+
+    def test_thinking_followed_by_zero_token_action(self):
+        """Action with 0 tokens should produce infinite ratio and be flagged."""
+        spans = [
+            _reasoning("Thinking...", position=0, token_count=600),
+            _tool_use('Bash {"command":""}', position=1, token_count=0),
+        ]
+        patterns = detect_verbose_thinking(spans)
+        assert len(patterns) == 1
+        assert patterns[0].details["ratio"] == float("inf")
+
+    def test_custom_ratio_threshold(self):
+        """Custom ratio_threshold should change what gets flagged."""
+        spans = [
+            _reasoning("Moderate thinking", position=0, token_count=600),
+            _tool_use('Bash {"command":"ls"}', position=1, token_count=50),
+        ]
+        # ratio = 12.0
+        assert detect_verbose_thinking(spans, ratio_threshold=15.0) == []
+        patterns = detect_verbose_thinking(spans, ratio_threshold=5.0)
+        assert len(patterns) == 1
+
+    def test_custom_min_thinking_tokens(self):
+        """Custom min_thinking_tokens should change what gets flagged."""
+        spans = [
+            _reasoning("Some thinking", position=0, token_count=200),
+            _tool_use('Bash {"command":"ls"}', position=1, token_count=10),
+        ]
+        # ratio = 20.0 > 10.0, but 200 < 500 default -> not flagged
+        assert detect_verbose_thinking(spans) == []
+        # Lower min to 100
+        patterns = detect_verbose_thinking(spans, min_thinking_tokens=100)
+        assert len(patterns) == 1
+
+    def test_skips_reasoning_to_find_next_action(self):
+        """The detector looks past consecutive reasoning spans for the action."""
+        spans = [
+            _reasoning("First thought", position=0, token_count=2000),
+            _reasoning("Second thought", position=1, token_count=1000),
+            _tool_use('Bash {"command":"ls"}', position=2, token_count=50),
+        ]
+        # First reasoning: next non-reasoning = tool_use at position 2
+        #   ratio = 2000/50 = 40.0
+        # Second reasoning: next non-reasoning = tool_use at position 2
+        #   ratio = 1000/50 = 20.0
+        patterns = detect_verbose_thinking(spans)
+        assert len(patterns) == 2
+
+    def test_generation_as_action(self):
+        """A generation span is a valid action target."""
+        spans = [
+            _reasoning("Deep thinking...", position=0, token_count=600),
+            _generation("Here is the answer", position=1, token_count=50),
+        ]
+        # ratio = 600/50 = 12.0 > 10.0
+        patterns = detect_verbose_thinking(spans)
+        assert len(patterns) == 1
+        assert patterns[0].details["action_tokens"] == 50
+
+    def test_excess_calculation(self):
+        """tokens_wasted should be thinking_tokens - (action_tokens * threshold)."""
+        spans = [
+            _reasoning("Long thought", position=0, token_count=1000),
+            _tool_use("Bash {}", position=1, token_count=50),
+        ]
+        # ratio = 1000/50 = 20.0, excess = 1000 - (50 * 10) = 500
+        patterns = detect_verbose_thinking(spans)
+        assert len(patterns) == 1
+        assert patterns[0].tokens_wasted == 500
+
+
+# ===================================================================
+# 6. detect_all_extended
+# ===================================================================
+
+
+class TestDetectAllExtended:
+    def test_empty_input(self):
+        assert detect_all_extended([]) == []
+
+    def test_returns_list(self):
+        """Even with no detectable patterns the return type should be list."""
+        spans = [
+            _reasoning("thinking", position=0),
+            _generation("output", position=1),
+        ]
+        result = detect_all_extended(spans)
+        assert isinstance(result, list)
+
+    def test_combines_multiple_detectors(self):
+        """detect_all_extended should run all five detectors and combine results."""
+        spans = [
+            # Permission loop: 3 calls with denial
+            _tool_use('Bash {"command":"rm /root"}', position=0, token_count=30),
+            _tool_result("permission denied", position=1),
+            _tool_use('Bash {"command":"rm /root"}', position=2, token_count=30),
+            _tool_result("permission denied", position=3),
+            _tool_use('Bash {"command":"rm /root"}', position=4, token_count=30),
+            # Over-reading: same file read 3 times
+            _tool_use(
+                'Read {"file_path":"config.yaml"}', position=10, token_count=40
+            ),
+            _tool_use(
+                'Read {"file_path":"config.yaml"}', position=11, token_count=40
+            ),
+            _tool_use(
+                'Read {"file_path":"config.yaml"}', position=12, token_count=40
+            ),
+            # Verbose thinking
+            _reasoning("Lots of thinking...", position=20, token_count=5000),
+            _tool_use('Bash {"command":"echo hi"}', position=21, token_count=10),
+        ]
+
+        patterns = detect_all_extended(spans)
+        types_found = {p.pattern_type for p in patterns}
+        assert ExtendedWasteType.PERMISSION_LOOP.value in types_found
+        assert ExtendedWasteType.OVER_READING.value in types_found
+        assert ExtendedWasteType.VERBOSE_THINKING.value in types_found
+
+    def test_sorted_by_start_position(self):
+        """Results from detect_all_extended should be sorted by start_position."""
+        spans = [
+            # Verbose thinking at position 20
+            _reasoning("Lots of thinking...", position=20, token_count=5000),
+            _tool_use('Bash {"command":"echo hi"}', position=21, token_count=10),
+            # Permission loop at position 0
+            _tool_use('Bash {"command":"rm /root"}', position=0, token_count=30),
+            _tool_result("permission denied", position=1),
+            _tool_use('Bash {"command":"rm /root"}', position=2, token_count=30),
+            _tool_result("permission denied", position=3),
+            _tool_use('Bash {"command":"rm /root"}', position=4, token_count=30),
+        ]
+        patterns = detect_all_extended(spans)
+        positions = [p.start_position for p in patterns]
+        assert positions == sorted(positions)
+
+    def test_forwards_permission_min_retries(self):
+        """permission_min_retries parameter should be forwarded."""
+        spans = [
+            _tool_use('Bash {"command":"rm /root"}', position=0, token_count=30),
+            _tool_result("permission denied", position=1),
+            _tool_use('Bash {"command":"rm /root"}', position=2, token_count=30),
+            _tool_result("permission denied", position=3),
+            _tool_use('Bash {"command":"rm /root"}', position=4, token_count=30),
+        ]
+        # Default min_retries=2 triggers (2 retries)
+        patterns_default = detect_all_extended(spans)
+        assert any(
+            p.pattern_type == ExtendedWasteType.PERMISSION_LOOP.value
+            for p in patterns_default
+        )
+
+        # Raising to 3 prevents detection
+        patterns_strict = detect_all_extended(spans, permission_min_retries=3)
+        assert not any(
+            p.pattern_type == ExtendedWasteType.PERMISSION_LOOP.value
+            for p in patterns_strict
+        )
+
+    def test_forwards_verbose_thinking_params(self):
+        """verbose_ratio_threshold and verbose_min_thinking_tokens forwarded."""
+        spans = [
+            _reasoning("Thinking...", position=0, token_count=300),
+            _tool_use("Bash {}", position=1, token_count=10),
+        ]
+        # Default: 300 < 500 min_thinking_tokens -> not flagged
+        patterns_default = detect_all_extended(spans)
+        assert not any(
+            p.pattern_type == ExtendedWasteType.VERBOSE_THINKING.value
+            for p in patterns_default
+        )
+
+        # Lower min_thinking_tokens to 100
+        patterns_low_min = detect_all_extended(
+            spans, verbose_min_thinking_tokens=100
+        )
+        assert any(
+            p.pattern_type == ExtendedWasteType.VERBOSE_THINKING.value
+            for p in patterns_low_min
+        )
+
+    def test_forwards_over_reading_min_reads(self):
+        """over_reading_min_reads parameter should be forwarded."""
+        spans = [
+            _tool_use('Read {"file_path":"x.py"}', position=0),
+            _tool_use('Read {"file_path":"x.py"}', position=1),
+        ]
+        # Default min_reads=2 requires 3 reads total -> not flagged
+        patterns_default = detect_all_extended(spans)
+        assert not any(
+            p.pattern_type == ExtendedWasteType.OVER_READING.value
+            for p in patterns_default
+        )
+
+        # Lower to min_reads=1 -> 2 reads total qualifies
+        patterns_low = detect_all_extended(spans, over_reading_min_reads=1)
+        assert any(
+            p.pattern_type == ExtendedWasteType.OVER_READING.value
+            for p in patterns_low
+        )