From 2e19059e429d8bd302e7dd076c18f510b56bf325 Mon Sep 17 00:00:00 2001
From: Carlos Ruiz <carlos.ruiz@elembio.com>
Date: Sat, 28 Mar 2026 08:09:24 -0700
Subject: [PATCH 1/3] Addressing main PR comments

---
 multiqc/modules/bases2fastq/bases2fastq.py  | 249 +++++++++++---------
 multiqc/modules/bases2fastq/plot_runs.py    |  28 +--
 multiqc/modules/bases2fastq/plot_samples.py |  20 +-
 3 files changed, 164 insertions(+), 133 deletions(-)

diff --git a/multiqc/modules/bases2fastq/bases2fastq.py b/multiqc/modules/bases2fastq/bases2fastq.py
index d170241de1..f2c9504634 100644
--- a/multiqc/modules/bases2fastq/bases2fastq.py
+++ b/multiqc/modules/bases2fastq/bases2fastq.py
@@ -1,39 +1,41 @@
-from collections import defaultdict
 import copy
-from itertools import chain
-import re
 import json
 import logging
 import random
-from typing import Any, Callable, Dict, List, Optional, Tuple, Union
+import re
 import uuid
+from collections import defaultdict
+from itertools import chain
 from pathlib import Path
+from typing import Any, Callable
+
 from natsort import natsorted
+
 from multiqc import config
 from multiqc.base_module import BaseMultiqcModule, ModuleNoSamplesFound
-from multiqc.types import LoadedFileDict
-from multiqc.utils import mqc_colour
-
 from multiqc.modules.bases2fastq.plot_runs import (
+    plot_base_quality_by_cycle,
+    plot_base_quality_hist,
     plot_run_stats,
-    tabulate_manifest_stats,
     tabulate_index_assignment_stats,
-    tabulate_unassigned_index_stats,
-    tabulate_run_stats,
+    tabulate_manifest_stats,
     tabulate_project_stats,
-    plot_base_quality_hist,
-    plot_base_quality_by_cycle,
+    tabulate_run_stats,
+    tabulate_unassigned_index_stats,
 )
 from multiqc.modules.bases2fastq.plot_samples import (
-    tabulate_sample_stats,
-    sequence_content_plot,
-    plot_per_cycle_N_content,
     plot_adapter_content,
+    plot_per_cycle_N_content,
     plot_per_read_gc_hist,
+    sequence_content_plot,
+    tabulate_sample_stats,
 )
+from multiqc.types import LoadedFileDict
+from multiqc.utils import mqc_colour
 
 log = logging.getLogger(__name__)
 
+ELEMBIO_DOCS_URL = "https://docs.elembio.io/docs/bases2fastq/introduction/"
 
 # Default minimum polony threshold - samples below this are skipped
 DEFAULT_MIN_POLONIES = 1000
@@ -121,11 +123,11 @@ class MultiqcModule(BaseMultiqcModule):
 
     Data Structures
     ---------------
-    - `run_level_data`: Dict[run_name, run_stats] - Run-level QC metrics
-    - `run_level_samples`: Dict[sample_id, sample_stats] - Sample metrics from run-level
-    - `project_level_data`: Dict[project_name, project_stats] - Project-level QC metrics
-    - `project_level_samples`: Dict[sample_id, sample_stats] - Sample metrics from project-level
-    - `*_samples_to_project`: Dict[sample_id, project_name] - Maps samples to their projects
+    - `run_level_data`: dict[run_name, run_stats] - Run-level QC metrics
+    - `run_level_samples`: dict[sample_id, sample_stats] - Sample metrics from run-level
+    - `project_level_data`: dict[project_name, project_stats] - Project-level QC metrics
+    - `project_level_samples`: dict[sample_id, sample_stats] - Sample metrics from project-level
+    - `*_samples_to_project`: dict[sample_id, project_name] - Maps samples to their projects
 
     Sample Naming Convention
     ------------------------
@@ -151,7 +153,7 @@ def __init__(self):
         super(MultiqcModule, self).__init__(
             name="Bases2Fastq",
             anchor="bases2fastq",
-            href="https://docs.elembio.io/docs/bases2fastq/introduction/",
+            href=ELEMBIO_DOCS_URL,
             info="Demultiplexes and converts Element AVITI base calls into FASTQ files",
             doi="10.1038/s41587-023-01750-7",
         )
@@ -198,24 +200,24 @@ def _init_data_structures(self) -> None:
         """
         # File cache to avoid reading the same JSON files multiple times
         # Key: resolved file path, Value: parsed JSON data
-        self._file_cache: Dict[str, Any] = {}
+        self._file_cache: dict[str, Any] = {}
 
         # === Run-level data structures ===
         # Populated from <run_output>/RunStats.json
-        self.run_level_data: Dict[str, Any] = {}  # run_name -> full run stats
-        self.run_level_samples: Dict[str, Any] = {}  # sample_id -> sample stats
-        self.run_level_samples_to_project: Dict[str, str] = {}  # sample_id -> project name
+        self.run_level_data: dict[str, Any] = {}  # run_name -> full run stats
+        self.run_level_samples: dict[str, Any] = {}  # sample_id -> sample stats
+        self.run_level_samples_to_project: dict[str, str] = {}  # sample_id -> project name
 
         # === Project-level data structures ===
         # Populated from <run_output>/Samples/<project>/RunStats.json
-        self.project_level_data: Dict[str, Any] = {}  # project_name -> project stats
-        self.project_level_samples: Dict[str, Any] = {}  # sample_id -> sample stats
-        self.project_level_samples_to_project: Dict[str, str] = {}  # sample_id -> project name
+        self.project_level_data: dict[str, Any] = {}  # project_name -> project stats
+        self.project_level_samples: dict[str, Any] = {}  # sample_id -> sample stats
+        self.project_level_samples_to_project: dict[str, str] = {}  # sample_id -> project name
 
         # === Grouping structures for color assignment ===
-        self.group_dict: Dict[str, Any] = {}  # group_name -> list of members
-        self.group_lookup_dict: Dict[str, Any] = {}  # item -> group it belongs to
-        self.project_lookup_dict: Dict[str, Any] = {}  # sample -> project mapping
+        self.group_dict: dict[str, Any] = {}  # group_name -> list of members
+        self.group_lookup_dict: dict[str, Any] = {}  # item -> group it belongs to
+        self.project_lookup_dict: dict[str, Any] = {}  # sample -> project mapping
 
     def _validate_path(self, file_path: Path, base_directory: Path) -> bool:
         """
@@ -241,7 +243,7 @@ def _validate_path(self, file_path: Path, base_directory: Path) -> bool:
             )
             return False
 
-    def _read_json_file(self, file_path: Path, base_directory: Optional[Path] = None) -> Optional[Dict[str, Any]]:
+    def _read_json_file(self, file_path: Path, base_directory: Path | None = None) -> dict[str, Any] | None:
         """
         Read and parse a JSON file with caching.
 
@@ -265,7 +267,7 @@ def _read_json_file(self, file_path: Path, base_directory: Optional[Path] = None
             log.error(
                 f"{file_path.name} does not exist at {file_path}.\n"
                 f"Please visit Elembio online documentation for more information - "
-                f"https://docs.elembio.io/docs/bases2fastq/introduction/"
+                f"{ELEMBIO_DOCS_URL}"
             )
             return None
 
@@ -285,9 +287,12 @@ def _parse_and_validate_data(self) -> str:
         Returns:
             summary_path: The determined summary path ('run_level', 'project_level', or 'combined_level')
         """
-        # Collect log files once per pattern (find_log_files returns a generator)
-        run_level_log_files = list(self.find_log_files("bases2fastq/run"))
-        project_level_log_files = list(self.find_log_files("bases2fastq/project"))
+        # Collect log files once per pattern (find_log_files returns a generator).
+        # Stored as instance vars so downstream parsers can reuse them.
+        self._run_level_log_files = list(self.find_log_files("bases2fastq/run"))
+        self._project_level_log_files = list(self.find_log_files("bases2fastq/project"))
+        run_level_log_files = self._run_level_log_files
+        project_level_log_files = self._project_level_log_files
 
         if len(run_level_log_files) == 0 and len(project_level_log_files) == 0:
             error_msg = "No run- or project-level log files found within the Bases2Fastq results."
@@ -366,8 +371,8 @@ def _determine_summary_path(self) -> str:
 
     def _select_data_by_summary_path(
         self, summary_path: str
-    ) -> Tuple[
-        Dict[str, Any], Dict[str, Any], Dict[str, str], Dict[str, Any], Dict[str, Any], Dict[int, Dict[str, Any]]
+    ) -> tuple[
+        dict[str, Any], dict[str, Any], dict[str, str], dict[str, Any], dict[str, Any], dict[int, dict[str, Any]]
     ]:
         """
         Select the appropriate data sources based on the summary path.
@@ -377,31 +382,35 @@ def _select_data_by_summary_path(
                 index_assignment_data, unassigned_sequences)
         """
         if summary_path == "run_level":
+            manifest_log_files = list(self.find_log_files("bases2fastq/manifest"))
             return (
                 self.run_level_data,
                 self.run_level_samples,
                 self.run_level_samples_to_project,
-                self._parse_run_manifest("bases2fastq/manifest"),
-                self._parse_index_assignment("bases2fastq/manifest"),
-                self._parse_run_unassigned_sequences("bases2fastq/run"),
+                self._parse_run_manifest("bases2fastq/manifest", log_files=manifest_log_files),
+                self._parse_index_assignment("bases2fastq/manifest", log_files=manifest_log_files),
+                self._parse_run_unassigned_sequences("bases2fastq/run", log_files=self._run_level_log_files),
             )
         elif summary_path == "project_level":
             return (
                 self.project_level_data,
                 self.project_level_samples,
                 self.project_level_samples_to_project,
-                self._parse_run_manifest_in_project("bases2fastq/project"),
-                self._parse_index_assignment_in_project("bases2fastq/project"),
+                self._parse_run_manifest_in_project("bases2fastq/project", log_files=self._project_level_log_files),
+                self._parse_index_assignment_in_project("bases2fastq/project", log_files=self._project_level_log_files),
                 {},  # No unassigned sequences for project level
             )
         elif summary_path == "combined_level":
+            # Use run-level stats for the run table (more complete), but
+            # project-level samples for per-sample plots (properly split by project).
+            manifest_log_files = list(self.find_log_files("bases2fastq/manifest"))
             return (
                 self.run_level_data,
                 self.project_level_samples,
                 self.project_level_samples_to_project,
-                self._parse_run_manifest("bases2fastq/manifest"),
-                self._parse_index_assignment("bases2fastq/manifest"),
-                self._parse_run_unassigned_sequences("bases2fastq/run"),
+                self._parse_run_manifest("bases2fastq/manifest", log_files=manifest_log_files),
+                self._parse_index_assignment("bases2fastq/manifest", log_files=manifest_log_files),
+                self._parse_run_unassigned_sequences("bases2fastq/run", log_files=self._run_level_log_files),
             )
         else:
             error_msg = "No run- or project-level data was retained. No report will be generated."
@@ -409,16 +418,16 @@ def _select_data_by_summary_path(
             raise ModuleNoSamplesFound(error_msg)
 
     def _setup_colors(
-        self, sample_data: Dict[str, Any], samples_to_projects: Dict[str, str], summary_path: str
+        self, sample_data: dict[str, Any], samples_to_projects: dict[str, str], summary_path: str
     ) -> None:
         """Set up color schemes for groups and samples."""
         # Create run and project groups
-        run_groups: Dict[str, List] = defaultdict(list)
-        project_groups: Dict[str, List] = defaultdict(list)
-        ind_sample_groups: Dict[str, List] = defaultdict(list)
+        run_groups: dict[str, list] = defaultdict(list)
+        project_groups: dict[str, list] = defaultdict(list)
+        ind_sample_groups: dict[str, list] = defaultdict(list)
 
         for sample in natsorted(sample_data.keys()):
-            run_name, _ = sample.split("__")
+            run_name, _ = sample.split("__", maxsplit=1)
             run_groups[run_name].append(sample)
             sample_project = samples_to_projects.get(sample, "DefaultProject")
             project_groups[sample_project].append(sample)
@@ -448,7 +457,7 @@ def _setup_colors(
         }
 
         # Assign colors to samples
-        self.sample_color: Dict[str, str] = {}
+        self.sample_color: dict[str, str] = {}
         for sample_name in natsorted(samples_to_projects.keys()):
             if summary_path == "project_level" or len(project_groups) == 1:
                 sample_color = self.group_color[sample_name]
@@ -463,12 +472,12 @@ def _setup_colors(
     def _generate_plots(
         self,
         summary_path: str,
-        run_data: Dict[str, Any],
-        sample_data: Dict[str, Any],
-        samples_to_projects: Dict[str, str],
-        manifest_data: Dict[str, Any],
-        index_assignment_data: Dict[str, Any],
-        unassigned_sequences: Dict[int, Dict[str, Any]],
+        run_data: dict[str, Any],
+        sample_data: dict[str, Any],
+        samples_to_projects: dict[str, str],
+        manifest_data: dict[str, Any],
+        index_assignment_data: dict[str, Any],
+        unassigned_sequences: dict[int, dict[str, Any]],
     ) -> None:
         """Generate all plots and add sections to the report."""
         # QC metrics table
@@ -505,9 +514,9 @@ def get_uuid(self) -> str:
 
     def _extract_run_analysis_name(
         self,
-        data: Dict[str, Any],
+        data: dict[str, Any],
         source_info: str = "RunStats.json",
-    ) -> Optional[str]:
+    ) -> str | None:
         """
         Extract and validate run_analysis_name from data dict.
 
@@ -526,15 +535,15 @@ def _extract_run_analysis_name(
                 f"Error with {source_info}. Either RunName or AnalysisID is absent.\n"
                 f"RunName: {run_name}, AnalysisID: {analysis_id}\n"
                 f"Please visit Elembio online documentation for more information - "
-                f"https://docs.elembio.io/docs/bases2fastq/introduction/"
+                f"{ELEMBIO_DOCS_URL}"
             )
             return None
 
         return f"{run_name}-{analysis_id[0:4]}"
 
     def _parse_run_project_data(
-        self, data_source: str, log_files: Optional[List[LoadedFileDict[Any]]] = None
-    ) -> Tuple[Dict[str, Any], Dict[str, Any], Dict[str, str]]:
+        self, data_source: str, log_files: list[LoadedFileDict[Any]] | None = None
+    ) -> tuple[dict[str, Any], dict[str, Any], dict[str, str]]:
         """
         Parse RunStats.json files to extract run/project and sample-level data.
 
@@ -548,16 +557,16 @@ def _parse_run_project_data(
 
         Returns:
             Tuple of:
-            - runs_global_data: Dict[run_name, run_stats] - Run/project level metrics
-            - runs_sample_data: Dict[sample_id, sample_stats] - Per-sample metrics
-            - sample_to_project: Dict[sample_id, project_name] - Sample-to-project mapping
+            - runs_global_data: dict[run_name, run_stats] - Run/project level metrics
+            - runs_sample_data: dict[sample_id, sample_stats] - Per-sample metrics
+            - sample_to_project: dict[sample_id, project_name] - Sample-to-project mapping
 
         Data Flow:
             RunStats.json -> parse -> filter samples by min_polonies -> populate dicts
         """
-        runs_global_data: Dict[str, Any] = {}
-        runs_sample_data: Dict[str, Any] = {}
-        sample_to_project: Dict[str, str] = {}
+        runs_global_data: dict[str, Any] = {}
+        runs_sample_data: dict[str, Any] = {}
+        sample_to_project: dict[str, str] = {}
         if data_source == "":
             return (runs_global_data, runs_sample_data, sample_to_project)
 
@@ -618,8 +627,8 @@ def _parse_run_project_data(
         return (runs_global_data, runs_sample_data, sample_to_project)
 
     def _extract_manifest_lane_settings(
-        self, run_manifest_data: Dict[str, Any], run_analysis_name: str
-    ) -> Dict[str, Dict[str, Any]]:
+        self, run_manifest_data: dict[str, Any], run_analysis_name: str
+    ) -> dict[str, dict[str, Any]]:
         """
         Extract per-lane settings from a parsed RunManifest.json Settings section.
 
@@ -628,10 +637,10 @@ def _extract_manifest_lane_settings(
             run_analysis_name: Run identifier for building run_lane keys
 
         Returns:
-            Dict[run_lane, settings] where run_lane = "{run_analysis_name} | L{lane_id}"
+            dict[run_lane, settings] where run_lane = "{run_analysis_name} | L{lane_id}"
             and settings contain Indexing, AdapterTrimType, R1/R2AdapterMinimumTrimmedLength
         """
-        result: Dict[str, Dict[str, Any]] = {}
+        result: dict[str, dict[str, Any]] = {}
         if "Settings" not in run_manifest_data:
             return result
         for lane_data in run_manifest_data["Settings"]:
@@ -659,7 +668,9 @@ def _extract_manifest_lane_settings(
             result[run_lane]["R2AdapterMinimumTrimmedLength"] = lane_data.get("R2AdapterMinimumTrimmedLength", "N/A")
         return result
 
-    def _parse_run_manifest(self, data_source: str) -> Dict[str, Any]:
+    def _parse_run_manifest(
+        self, data_source: str, log_files: list[LoadedFileDict[Any]] | None = None
+    ) -> dict[str, Any]:
         """
         Parse RunManifest.json for run-level analysis to extract lane and adapter settings.
 
@@ -670,16 +681,18 @@ def _parse_run_manifest(self, data_source: str) -> Dict[str, Any]:
 
         Args:
             data_source: Search pattern key for RunManifest.json files
+            log_files: Optional pre-collected list of file dicts from find_log_files.
 
         Returns:
-            Dict[run_lane, settings] where run_lane = "{run_name} | L{lane_id}"
+            dict[run_lane, settings] where run_lane = "{run_name} | L{lane_id}"
         """
-        runs_manifest_data: Dict[str, Dict[str, Any]] = {}
+        runs_manifest_data: dict[str, dict[str, Any]] = {}
 
         if data_source == "":
             return runs_manifest_data
 
-        for f in self.find_log_files(data_source):
+        files_to_process = log_files if log_files is not None else list(self.find_log_files(data_source))
+        for f in files_to_process:
             directory = f.get("root")
             if not directory:
                 continue
@@ -706,7 +719,9 @@ def _parse_run_manifest(self, data_source: str) -> Dict[str, Any]:
 
         return runs_manifest_data
 
-    def _parse_run_manifest_in_project(self, data_source: str) -> Dict[str, Any]:
+    def _parse_run_manifest_in_project(
+        self, data_source: str, log_files: list[LoadedFileDict[Any]] | None = None
+    ) -> dict[str, Any]:
         """
         Parse RunManifest.json for project-level analysis.
 
@@ -718,21 +733,22 @@ def _parse_run_manifest_in_project(self, data_source: str) -> Dict[str, Any]:
             + ../../RunManifest.json (run-level manifest)
             -> Extract per-lane settings
         """
-        project_manifest_data: Dict[str, Dict[str, Any]] = {}
+        project_manifest_data: dict[str, dict[str, Any]] = {}
 
         if data_source == "":
             return project_manifest_data
 
-        for f in self.find_log_files(data_source):
+        files_to_process = log_files if log_files is not None else list(self.find_log_files(data_source))
+        for f in files_to_process:
             directory = f.get("root")
             if not directory:
                 continue
 
-            # Get RunManifest.json from run output root (check if it exists in the same directory or try two levels up)
+            # Resolve base_directory to the run output root (not the project subdirectory),
+            # since RunManifest.json lives at the run root. Path validation in _read_json_file
+            # will check the manifest path against this run root directory.
             base_directory = Path(directory).resolve()
-            if (base_directory / "RunManifest.json").exists():
-                base_directory = base_directory
-            else:
+            if not (base_directory / "RunManifest.json").exists():
                 base_directory = base_directory.parent.parent
             run_manifest = base_directory / "RunManifest.json"
             project_stats = json.loads(f["f"])
@@ -768,10 +784,10 @@ def _parse_run_manifest_in_project(self, data_source: str) -> Dict[str, Any]:
 
     def _build_index_assignment_from_stats(
         self,
-        stats_dict: Dict[str, Any],
+        stats_dict: dict[str, Any],
         run_analysis_name: str,
-        project: Optional[str] = None,
-    ) -> Tuple[Dict[str, Dict[str, Any]], int]:
+        project: str | None = None,
+    ) -> tuple[dict[str, dict[str, Any]], int]:
         """
         Build per-run index assignment dict from RunStats SampleStats/Occurrences.
 
@@ -779,7 +795,7 @@ def _build_index_assignment_from_stats(
             Tuple of (run_inner_dict, total_polonies). run_inner_dict is
             { merged_expected_sequence -> { SampleID, SamplePolonyCounts, PercentOfPolonies, Index1, Index2, ... } }
         """
-        run_inner: Dict[str, Dict[str, Any]] = {}
+        run_inner: dict[str, dict[str, Any]] = {}
         total_polonies = stats_dict.get("NumPoloniesBeforeTrimming", 0)
         if "SampleStats" not in stats_dict:
             return (run_inner, total_polonies)
@@ -796,7 +812,7 @@ def _build_index_assignment_from_stats(
                     log.error(f"Missing data needed to extract index assignment for sample {sample_id}. Skipping.")
                     continue
                 if sample_expected_seq not in run_inner:
-                    entry: Dict[str, Any] = {
+                    entry: dict[str, Any] = {
                         "SampleID": sample_id,
                         "SamplePolonyCounts": 0,
                         "PercentOfPolonies": float("nan"),
@@ -814,8 +830,8 @@ def _build_index_assignment_from_stats(
 
     def _merge_manifest_index_sequences(
         self,
-        sample_to_index_assignment: Dict[str, Any],
-        run_manifest_data: Dict[str, Any],
+        sample_to_index_assignment: dict[str, Any],
+        run_manifest_data: dict[str, Any],
         run_analysis_name: str,
     ) -> None:
         """Merge Index1/Index2 from RunManifest Samples into sample_to_index_assignment (mutates)."""
@@ -843,7 +859,9 @@ def _merge_manifest_index_sequences(
                 run_data[merged_indices]["Index1"] = index_1
                 run_data[merged_indices]["Index2"] = index_2
 
-    def _parse_run_unassigned_sequences(self, data_source: str) -> Dict[int, Dict[str, Any]]:
+    def _parse_run_unassigned_sequences(
+        self, data_source: str, log_files: list[LoadedFileDict[Any]] | None = None
+    ) -> dict[int, dict[str, Any]]:
         """
         Parse unassigned/unknown barcode sequences from run-level data.
 
@@ -854,11 +872,12 @@ def _parse_run_unassigned_sequences(self, data_source: str) -> Dict[int, Dict[st
             RunStats.json -> Lanes -> UnassignedSequences
             -> Extract: sequence, count, percentage of total polonies
         """
-        run_unassigned_sequences: Dict[int, Dict[str, Any]] = {}
+        run_unassigned_sequences: dict[int, dict[str, Any]] = {}
         if data_source == "":
             return run_unassigned_sequences
 
-        for f in self.find_log_files(data_source):
+        files_to_process = log_files if log_files is not None else list(self.find_log_files(data_source))
+        for f in files_to_process:
             data = json.loads(f["f"])
 
             # Get RunName and AnalysisID
@@ -902,7 +921,9 @@ def _parse_run_unassigned_sequences(self, data_source: str) -> Dict[int, Dict[st
 
         return run_unassigned_sequences
 
-    def _parse_index_assignment(self, manifest_data_source: str) -> Dict[str, Any]:
+    def _parse_index_assignment(
+        self, manifest_data_source: str, log_files: list[LoadedFileDict[Any]] | None = None
+    ) -> dict[str, Any]:
         """
         Parse index assignment statistics for run-level analysis.
 
@@ -914,12 +935,13 @@ def _parse_index_assignment(self, manifest_data_source: str) -> Dict[str, Any]:
             + RunManifest.json -> Samples -> index sequences (Index1, Index2)
             -> Combined index assignment table
         """
-        sample_to_index_assignment: Dict[str, Dict[str, Dict[str, Any]]] = {}
+        sample_to_index_assignment: dict[str, dict[str, dict[str, Any]]] = {}
 
         if manifest_data_source == "":
             return sample_to_index_assignment
 
-        for f in self.find_log_files(manifest_data_source):
+        files_to_process = log_files if log_files is not None else list(self.find_log_files(manifest_data_source))
+        for f in files_to_process:
             directory = f.get("root")
             if not directory:
                 continue
@@ -944,7 +966,7 @@ def _parse_index_assignment(self, manifest_data_source: str) -> Dict[str, Any]:
                     f"Error, missing SampleStats in RunStats.json. Skipping index assignment metrics.\n"
                     f"Available keys: {list(run_stats.keys())}\n"
                     f"Please visit Elembio online documentation for more information - "
-                    f"https://docs.elembio.io/docs/bases2fastq/introduction/"
+                    f"{ELEMBIO_DOCS_URL}"
                 )
                 continue
 
@@ -964,7 +986,9 @@ def _parse_index_assignment(self, manifest_data_source: str) -> Dict[str, Any]:
 
         return sample_to_index_assignment
 
-    def _parse_index_assignment_in_project(self, data_source: str) -> Dict[str, Any]:
+    def _parse_index_assignment_in_project(
+        self, data_source: str, log_files: list[LoadedFileDict[Any]] | None = None
+    ) -> dict[str, Any]:
         """
         Parse index assignment statistics for project-level analysis.
 
@@ -976,12 +1000,13 @@ def _parse_index_assignment_in_project(self, data_source: str) -> Dict[str, Any]
             + ../../RunManifest.json -> Samples -> index sequences
             -> Combined index assignment table
         """
-        sample_to_index_assignment: Dict[str, Dict[str, Dict[str, Any]]] = {}
+        sample_to_index_assignment: dict[str, dict[str, dict[str, Any]]] = {}
 
         if data_source == "":
             return sample_to_index_assignment
 
-        for f in self.find_log_files(data_source):
+        files_to_process = log_files if log_files is not None else list(self.find_log_files(data_source))
+        for f in files_to_process:
             directory = f.get("root")
             if not directory:
                 continue
@@ -1009,7 +1034,7 @@ def _parse_index_assignment_in_project(self, data_source: str) -> Dict[str, Any]
                     f"Error, missing SampleStats in RunStats.json. Skipping index assignment metrics.\n"
                     f"Available keys: {list(project_stats.keys())}\n"
                     f"Please visit Elembio online documentation for more information - "
-                    f"https://docs.elembio.io/docs/bases2fastq/introduction/"
+                    f"{ELEMBIO_DOCS_URL}"
                 )
                 continue
 
@@ -1032,16 +1057,21 @@ def _parse_index_assignment_in_project(self, data_source: str) -> Dict[str, Any]
 
         return sample_to_index_assignment
 
-    def add_run_plots(self, data: Dict[Any, Any], plot_functions: List[Callable]) -> None:
+    def add_run_plots(self, data: dict[Any, Any], plot_functions: list[Callable]) -> None:
+        if not data:
+            return
         for func in plot_functions:
             plot_html, plot_name, anchor, description, helptext, plot_data = func(data, self.run_color)
-            self.add_section(name=plot_name, plot=plot_html, anchor=anchor, description=description, helptext=helptext)
-            self.write_data_file(plot_data, f"base2fastq:{plot_name}")
+            if plot_html is not None:
+                self.add_section(name=plot_name, plot=plot_html, anchor=anchor, description=description, helptext=helptext)
+                self.write_data_file(plot_data, f"base2fastq:{plot_name}")
 
     def add_sample_plots(
-        self, data: Dict[str, Any], group_lookup: Dict[str, str], project_lookup: Dict[str, str]
+        self, data: dict[str, Any], group_lookup: dict[str, str], project_lookup: dict[str, str]
     ) -> None:
-        plot_functions: List[Callable] = [
+        if not data:
+            return
+        plot_functions: list[Callable] = [
             tabulate_sample_stats,
             sequence_content_plot,
             plot_per_cycle_N_content,
@@ -1052,5 +1082,6 @@ def add_sample_plots(
             plot_html, plot_name, anchor, description, helptext, plot_data = func(
                 data, group_lookup, project_lookup, self.sample_color
             )
-            self.add_section(name=plot_name, plot=plot_html, anchor=anchor, description=description, helptext=helptext)
-            self.write_data_file(plot_data, f"base2fastq:{plot_name}")
+            if plot_html is not None:
+                self.add_section(name=plot_name, plot=plot_html, anchor=anchor, description=description, helptext=helptext)
+                self.write_data_file(plot_data, f"base2fastq:{plot_name}")
diff --git a/multiqc/modules/bases2fastq/plot_runs.py b/multiqc/modules/bases2fastq/plot_runs.py
index 39499dc723..c8ccbbde20 100644
--- a/multiqc/modules/bases2fastq/plot_runs.py
+++ b/multiqc/modules/bases2fastq/plot_runs.py
@@ -1,5 +1,5 @@
 import math
-from typing import Any, Dict, cast
+from typing import Any, cast
 
 from multiqc.plots import bargraph, linegraph, table
 from multiqc.plots.table_object import ColumnDict, SectionT
@@ -212,7 +212,7 @@ def tabulate_project_stats(run_data, color_dict):
         first_key = run_keys[0]
         project_header = f"{run_data[first_key]['Project']} | "
     plot_name = f"{project_header}Sequencing QC Metrics Table"
-    plot_html = table.plot(plot_content, cast(Dict[Any, ColumnDict], headers), pconfig=pconfig)
+    plot_html = table.plot(plot_content, cast(dict[Any, ColumnDict], headers), pconfig=pconfig)
     anchor = "project_run_qc_metrics_table"
     description = "QC metrics per run, per project"
     helptext = """
@@ -339,7 +339,7 @@ def tabulate_run_stats(run_data, color_dict):
     }
 
     plot_name = "Sequencing Run QC Metrics Table"
-    plot_html = table.plot(plot_content, cast(Dict[Any, ColumnDict], headers), pconfig=pconfig)
+    plot_html = table.plot(plot_content, cast(dict[Any, ColumnDict], headers), pconfig=pconfig)
     anchor = "run_qc_metrics_table"
     description = "QC metrics per run"
     helptext = """
@@ -398,7 +398,7 @@ def tabulate_manifest_stats(run_data, color_dict):
     }
 
     plot_name = "Run Manifest Table"
-    plot_html = table.plot(plot_content, cast(Dict[Any, ColumnDict], headers), pconfig=pconfig)
+    plot_html = table.plot(plot_content, cast(dict[Any, ColumnDict], headers), pconfig=pconfig)
     anchor = "run_manifest_metrics_table"
     description = "Run parameters used."
     helptext = """
@@ -429,7 +429,7 @@ def tabulate_index_assignment_stats(run_data, color_dict):
             if "Project" in sample_data:
                 sample_index_stats.update({"project": sample_data["Project"]})
                 project_present = True
-            sample_index_stats.update({"sample_name": sample_data["SampleID"].split("__")[1]})
+            sample_index_stats.update({"sample_name": sample_data["SampleID"].split("__", maxsplit=1)[1]})
             sample_index_stats.update({"index_1": sample_data["Index1"]})
             sample_index_stats.update({"index_2": sample_data["Index2"]})
             sample_index_stats.update({"assigned_polonies": sample_data["SamplePolonyCounts"]})
@@ -437,7 +437,7 @@ def tabulate_index_assignment_stats(run_data, color_dict):
             plot_content.update({index: sample_index_stats})
             index += 1
 
-    headers: Dict[str, Any] = {}
+    headers: dict[str, Any] = {}
     headers["run_name"] = {
         "title": "Run Name",
         "description": "Run Name.",
@@ -480,7 +480,7 @@ def tabulate_index_assignment_stats(run_data, color_dict):
     }
 
     plot_name = "Index Assignment Metrics"
-    plot_html = table.plot(cast(SectionT, plot_content), cast(Dict[Any, ColumnDict], headers), pconfig=pconfig)
+    plot_html = table.plot(cast(SectionT, plot_content), cast(dict[Any, ColumnDict], headers), pconfig=pconfig)
     anchor = "index_assignment_metrics"
     description = "Index assignment metrics."
     helptext = """
@@ -506,7 +506,7 @@ def tabulate_unassigned_index_stats(run_data, color_dict):
         - Polonies
         - % Polonies
     """
-    headers: Dict[str, Any] = {}
+    headers: dict[str, Any] = {}
     headers["Run Name"] = {
         "title": "Run Name",
         "description": "Run Name (Run ID + Analysis ID).",
@@ -544,7 +544,7 @@ def tabulate_unassigned_index_stats(run_data, color_dict):
     }
 
     plot_name = "Unassigned Indices Metrics"
-    plot_html = table.plot(cast(SectionT, run_data), cast(Dict[Any, ColumnDict], headers), pconfig=pconfig)
+    plot_html = table.plot(cast(SectionT, run_data), cast(dict[Any, ColumnDict], headers), pconfig=pconfig)
     anchor = "index_unassignment_metrics"
     description = "Index unassignment metrics."
     helptext = """
@@ -572,7 +572,7 @@ def _run_has_reads(run_entry: dict) -> bool:
 
 def plot_base_quality_hist(run_data, color_dict):
     # Prepare plot data for per base BQ histogram (skip runs without Reads)
-    bq_hist_dict: Dict[str, Dict[int, float]] = {}
+    bq_hist_dict: dict[str, dict[int, float]] = {}
     for s_name in natsorted(run_data.keys()):
         if not _run_has_reads(run_data[s_name]):
             continue
@@ -588,7 +588,7 @@ def plot_base_quality_hist(run_data, color_dict):
             bq_hist_dict[s_name].update({quality: R1R2_base_quality_counts[quality] / total_bases * 100})
 
     # Prepare plot data for per read average BQ histogram
-    per_read_quality_hist_dict: Dict[str, Dict[int, float]] = {}
+    per_read_quality_hist_dict: dict[str, dict[int, float]] = {}
     for s_name in natsorted(run_data.keys()):
         if not _run_has_reads(run_data[s_name]):
             continue
@@ -676,10 +676,10 @@ def plot_base_quality_by_cycle(run_data, color_dict):
             R1CycleNum = len(read0["Cycles"])
             r1r2_split = max(r1r2_split, R1CycleNum)
 
-    median_dict: Dict[str, Dict[int, float]] = {}
+    median_dict: dict[str, dict[int, float]] = {}
     for s_name in natsorted(runs_with_reads):
         paired_end = True if len(run_data[s_name]["Reads"]) > 1 else False
-        cycle_dict: Dict[int, float] = {}
+        cycle_dict: dict[int, float] = {}
         R1CycleNum = len(run_data[s_name]["Reads"][0]["Cycles"])
         for cycle in run_data[s_name]["Reads"][0]["Cycles"]:
             cycle_no = int(cycle["Cycle"])
@@ -691,7 +691,7 @@ def plot_base_quality_by_cycle(run_data, color_dict):
         median_dict.update({s_name: cycle_dict})
 
     # Prepare plot data for mean BQ of each cycle
-    mean_dict: Dict[str, Dict[int, float]] = {}
+    mean_dict: dict[str, dict[int, float]] = {}
     for s_name in natsorted(runs_with_reads):
         paired_end = True if len(run_data[s_name]["Reads"]) > 1 else False
         # Update each sample cycle info
diff --git a/multiqc/modules/bases2fastq/plot_samples.py b/multiqc/modules/bases2fastq/plot_samples.py
index e7987135bb..4b715c6819 100644
--- a/multiqc/modules/bases2fastq/plot_samples.py
+++ b/multiqc/modules/bases2fastq/plot_samples.py
@@ -1,4 +1,4 @@
-from typing import Any, Dict, cast
+from typing import Any, cast
 
 from natsort import natsorted
 
@@ -153,7 +153,7 @@ def tabulate_sample_stats(sample_data, group_lookup_dict, project_lookup_dict, s
     }
 
     plot_name = "Sample QC Metrics Table"
-    plot_html = table.plot(plot_content, cast(Dict[Any, ColumnDict], headers), pconfig=pconfig)
+    plot_html = table.plot(plot_content, cast(dict[Any, ColumnDict], headers), pconfig=pconfig)
     anchor = "sample_qc_metrics_table"
     description = "QC metrics per unique sample"
     helptext = """
@@ -177,7 +177,7 @@ def sequence_content_plot(sample_data, group_lookup_dict, project_lookup_dict, c
     """Create the epic HTML for the FastQC sequence content heatmap"""
     samples_with_reads = [s for s in sample_data if _sample_has_reads(sample_data[s])]
     if not samples_with_reads:
-        empty_data: Dict[str, Dict[int, Any]] = {}
+        empty_data: dict[str, dict[int, Any]] = {}
         plot_html = linegraph.plot(
             empty_data,
             pconfig={
@@ -190,7 +190,7 @@ def sequence_content_plot(sample_data, group_lookup_dict, project_lookup_dict, c
         return plot_html, "Per Cycle Base Content", "base_content", "", "", empty_data
 
     # Prep the data
-    data: Dict[str, Dict[int, Any]] = {}
+    data: dict[str, dict[int, Any]] = {}
 
     r1r2_split = 0
     for s_name in natsorted(samples_with_reads):
@@ -259,7 +259,7 @@ def sequence_content_plot(sample_data, group_lookup_dict, project_lookup_dict, c
 def plot_per_cycle_N_content(sample_data, group_lookup_dict, project_lookup_dict, color_dict):
     samples_with_reads = [s for s in sample_data if _sample_has_reads(sample_data[s])]
     if not samples_with_reads:
-        empty_data: Dict[str, Dict[int, float]] = {}
+        empty_data: dict[str, dict[int, float]] = {}
         plot_html = linegraph.plot(
             empty_data,
             pconfig={
@@ -271,7 +271,7 @@ def plot_per_cycle_N_content(sample_data, group_lookup_dict, project_lookup_dict
         )
         return plot_html, "Per Cycle N Content", "n_content", "", "", empty_data
 
-    data: Dict[str, Dict[int, float]] = {}
+    data: dict[str, dict[int, float]] = {}
     r1r2_split = 0
     for s_name in natsorted(samples_with_reads):
         data[s_name] = {}
@@ -344,7 +344,7 @@ def plot_per_read_gc_hist(sample_data, group_lookup_dict, project_lookup_dict, s
     """
     samples_with_reads = [s for s in sample_data if _sample_has_reads(sample_data[s])]
     if not samples_with_reads:
-        empty_gc_hist: Dict[str, Dict[float, float]] = {}
+        empty_gc_hist: dict[str, dict[float, float]] = {}
         plot_html = linegraph.plot(
             empty_gc_hist,
             pconfig={
@@ -356,7 +356,7 @@ def plot_per_read_gc_hist(sample_data, group_lookup_dict, project_lookup_dict, s
         )
         return plot_html, "Per Sample GC Histogram", "gc_histogram", "", "", empty_gc_hist
 
-    gc_hist_dict: Dict[str, Dict[float, float]] = {}
+    gc_hist_dict: dict[str, dict[float, float]] = {}
     for s_name in natsorted(samples_with_reads):
         r0 = sample_data[s_name]["Reads"][0]
         if "PerReadGCCountHistogram" not in r0:
@@ -421,7 +421,7 @@ def plot_adapter_content(sample_data, group_lookup_dict, project_lookup_dict, sa
     """
     samples_with_reads = [s for s in sample_data if _sample_has_reads(sample_data[s])]
     if not samples_with_reads:
-        empty_content: Dict[str, Dict[int, float]] = {}
+        empty_content: dict[str, dict[int, float]] = {}
         plot_html = linegraph.plot(
             empty_content,
             pconfig={
@@ -433,7 +433,7 @@ def plot_adapter_content(sample_data, group_lookup_dict, project_lookup_dict, sa
         )
         return plot_html, "Per Sample Adapter Content", "adapter_content", "", "", empty_content
 
-    plot_content: Dict[str, Dict[int, float]] = {}
+    plot_content: dict[str, dict[int, float]] = {}
 
     r1r2_split = 0
     for s_name in natsorted(samples_with_reads):

From eee4169c370fbca2e145e25fc05b2626aad7af32 Mon Sep 17 00:00:00 2001
From: Carlos Ruiz <carlos.ruiz@elembio.com>
Date: Sat, 28 Mar 2026 08:13:31 -0700
Subject: [PATCH 2/3] Lint

---
 multiqc/modules/bases2fastq/bases2fastq.py | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/multiqc/modules/bases2fastq/bases2fastq.py b/multiqc/modules/bases2fastq/bases2fastq.py
index f2c9504634..7fe21a1084 100644
--- a/multiqc/modules/bases2fastq/bases2fastq.py
+++ b/multiqc/modules/bases2fastq/bases2fastq.py
@@ -1063,7 +1063,9 @@ def add_run_plots(self, data: dict[Any, Any], plot_functions: list[Callable]) ->
         for func in plot_functions:
             plot_html, plot_name, anchor, description, helptext, plot_data = func(data, self.run_color)
             if plot_html is not None:
-                self.add_section(name=plot_name, plot=plot_html, anchor=anchor, description=description, helptext=helptext)
+                self.add_section(
+                    name=plot_name, plot=plot_html, anchor=anchor, description=description, helptext=helptext
+                )
                 self.write_data_file(plot_data, f"base2fastq:{plot_name}")
 
     def add_sample_plots(
@@ -1083,5 +1085,7 @@ def add_sample_plots(
                 data, group_lookup, project_lookup, self.sample_color
             )
             if plot_html is not None:
-                self.add_section(name=plot_name, plot=plot_html, anchor=anchor, description=description, helptext=helptext)
+                self.add_section(
+                    name=plot_name, plot=plot_html, anchor=anchor, description=description, helptext=helptext
+                )
                 self.write_data_file(plot_data, f"base2fastq:{plot_name}")

From 3e7bf3ad566172ce0be86e3082cbe289d5f89b0d Mon Sep 17 00:00:00 2001
From: Carlos Ruiz <carlos.ruiz@elembio.com>
Date: Sat, 28 Mar 2026 08:21:32 -0700
Subject: [PATCH 3/3] Reverted new typing syntax

---
 multiqc/modules/bases2fastq/bases2fastq.py  | 140 ++++++++++----------
 multiqc/modules/bases2fastq/plot_runs.py    |  28 ++--
 multiqc/modules/bases2fastq/plot_samples.py |  20 +--
 3 files changed, 94 insertions(+), 94 deletions(-)

diff --git a/multiqc/modules/bases2fastq/bases2fastq.py b/multiqc/modules/bases2fastq/bases2fastq.py
index 7fe21a1084..a42b075bd7 100644
--- a/multiqc/modules/bases2fastq/bases2fastq.py
+++ b/multiqc/modules/bases2fastq/bases2fastq.py
@@ -7,7 +7,7 @@
 from collections import defaultdict
 from itertools import chain
 from pathlib import Path
-from typing import Any, Callable
+from typing import Any, Callable, Dict, List, Optional, Tuple
 
 from natsort import natsorted
 
@@ -123,11 +123,11 @@ class MultiqcModule(BaseMultiqcModule):
 
     Data Structures
     ---------------
-    - `run_level_data`: dict[run_name, run_stats] - Run-level QC metrics
-    - `run_level_samples`: dict[sample_id, sample_stats] - Sample metrics from run-level
-    - `project_level_data`: dict[project_name, project_stats] - Project-level QC metrics
-    - `project_level_samples`: dict[sample_id, sample_stats] - Sample metrics from project-level
-    - `*_samples_to_project`: dict[sample_id, project_name] - Maps samples to their projects
+    - `run_level_data`: Dict[run_name, run_stats] - Run-level QC metrics
+    - `run_level_samples`: Dict[sample_id, sample_stats] - Sample metrics from run-level
+    - `project_level_data`: Dict[project_name, project_stats] - Project-level QC metrics
+    - `project_level_samples`: Dict[sample_id, sample_stats] - Sample metrics from project-level
+    - `*_samples_to_project`: Dict[sample_id, project_name] - Maps samples to their projects
 
     Sample Naming Convention
     ------------------------
@@ -200,24 +200,24 @@ def _init_data_structures(self) -> None:
         """
         # File cache to avoid reading the same JSON files multiple times
         # Key: resolved file path, Value: parsed JSON data
-        self._file_cache: dict[str, Any] = {}
+        self._file_cache: Dict[str, Any] = {}
 
         # === Run-level data structures ===
         # Populated from <run_output>/RunStats.json
-        self.run_level_data: dict[str, Any] = {}  # run_name -> full run stats
-        self.run_level_samples: dict[str, Any] = {}  # sample_id -> sample stats
-        self.run_level_samples_to_project: dict[str, str] = {}  # sample_id -> project name
+        self.run_level_data: Dict[str, Any] = {}  # run_name -> full run stats
+        self.run_level_samples: Dict[str, Any] = {}  # sample_id -> sample stats
+        self.run_level_samples_to_project: Dict[str, str] = {}  # sample_id -> project name
 
         # === Project-level data structures ===
         # Populated from <run_output>/Samples/<project>/RunStats.json
-        self.project_level_data: dict[str, Any] = {}  # project_name -> project stats
-        self.project_level_samples: dict[str, Any] = {}  # sample_id -> sample stats
-        self.project_level_samples_to_project: dict[str, str] = {}  # sample_id -> project name
+        self.project_level_data: Dict[str, Any] = {}  # project_name -> project stats
+        self.project_level_samples: Dict[str, Any] = {}  # sample_id -> sample stats
+        self.project_level_samples_to_project: Dict[str, str] = {}  # sample_id -> project name
 
         # === Grouping structures for color assignment ===
-        self.group_dict: dict[str, Any] = {}  # group_name -> list of members
-        self.group_lookup_dict: dict[str, Any] = {}  # item -> group it belongs to
-        self.project_lookup_dict: dict[str, Any] = {}  # sample -> project mapping
+        self.group_dict: Dict[str, Any] = {}  # group_name -> list of members
+        self.group_lookup_dict: Dict[str, Any] = {}  # item -> group it belongs to
+        self.project_lookup_dict: Dict[str, Any] = {}  # sample -> project mapping
 
     def _validate_path(self, file_path: Path, base_directory: Path) -> bool:
         """
@@ -243,7 +243,7 @@ def _validate_path(self, file_path: Path, base_directory: Path) -> bool:
             )
             return False
 
-    def _read_json_file(self, file_path: Path, base_directory: Path | None = None) -> dict[str, Any] | None:
+    def _read_json_file(self, file_path: Path, base_directory: Optional[Path] = None) -> Optional[Dict[str, Any]]:
         """
         Read and parse a JSON file with caching.
 
@@ -371,8 +371,8 @@ def _determine_summary_path(self) -> str:
 
     def _select_data_by_summary_path(
         self, summary_path: str
-    ) -> tuple[
-        dict[str, Any], dict[str, Any], dict[str, str], dict[str, Any], dict[str, Any], dict[int, dict[str, Any]]
+    ) -> Tuple[
+        Dict[str, Any], Dict[str, Any], Dict[str, str], Dict[str, Any], Dict[str, Any], Dict[int, Dict[str, Any]]
     ]:
         """
         Select the appropriate data sources based on the summary path.
@@ -418,13 +418,13 @@ def _select_data_by_summary_path(
             raise ModuleNoSamplesFound(error_msg)
 
     def _setup_colors(
-        self, sample_data: dict[str, Any], samples_to_projects: dict[str, str], summary_path: str
+        self, sample_data: Dict[str, Any], samples_to_projects: Dict[str, str], summary_path: str
     ) -> None:
         """Set up color schemes for groups and samples."""
         # Create run and project groups
-        run_groups: dict[str, list] = defaultdict(list)
-        project_groups: dict[str, list] = defaultdict(list)
-        ind_sample_groups: dict[str, list] = defaultdict(list)
+        run_groups: Dict[str, List] = defaultdict(list)
+        project_groups: Dict[str, List] = defaultdict(list)
+        ind_sample_groups: Dict[str, List] = defaultdict(list)
 
         for sample in natsorted(sample_data.keys()):
             run_name, _ = sample.split("__", maxsplit=1)
@@ -457,7 +457,7 @@ def _setup_colors(
         }
 
         # Assign colors to samples
-        self.sample_color: dict[str, str] = {}
+        self.sample_color: Dict[str, str] = {}
         for sample_name in natsorted(samples_to_projects.keys()):
             if summary_path == "project_level" or len(project_groups) == 1:
                 sample_color = self.group_color[sample_name]
@@ -472,12 +472,12 @@ def _setup_colors(
     def _generate_plots(
         self,
         summary_path: str,
-        run_data: dict[str, Any],
-        sample_data: dict[str, Any],
-        samples_to_projects: dict[str, str],
-        manifest_data: dict[str, Any],
-        index_assignment_data: dict[str, Any],
-        unassigned_sequences: dict[int, dict[str, Any]],
+        run_data: Dict[str, Any],
+        sample_data: Dict[str, Any],
+        samples_to_projects: Dict[str, str],
+        manifest_data: Dict[str, Any],
+        index_assignment_data: Dict[str, Any],
+        unassigned_sequences: Dict[int, Dict[str, Any]],
     ) -> None:
         """Generate all plots and add sections to the report."""
         # QC metrics table
@@ -514,9 +514,9 @@ def get_uuid(self) -> str:
 
     def _extract_run_analysis_name(
         self,
-        data: dict[str, Any],
+        data: Dict[str, Any],
         source_info: str = "RunStats.json",
-    ) -> str | None:
+    ) -> Optional[str]:
         """
         Extract and validate run_analysis_name from data dict.
 
@@ -542,8 +542,8 @@ def _extract_run_analysis_name(
         return f"{run_name}-{analysis_id[0:4]}"
 
     def _parse_run_project_data(
-        self, data_source: str, log_files: list[LoadedFileDict[Any]] | None = None
-    ) -> tuple[dict[str, Any], dict[str, Any], dict[str, str]]:
+        self, data_source: str, log_files: Optional[List[LoadedFileDict[Any]]] = None
+    ) -> Tuple[Dict[str, Any], Dict[str, Any], Dict[str, str]]:
         """
         Parse RunStats.json files to extract run/project and sample-level data.
 
@@ -557,16 +557,16 @@ def _parse_run_project_data(
 
         Returns:
             Tuple of:
-            - runs_global_data: dict[run_name, run_stats] - Run/project level metrics
-            - runs_sample_data: dict[sample_id, sample_stats] - Per-sample metrics
-            - sample_to_project: dict[sample_id, project_name] - Sample-to-project mapping
+            - runs_global_data: Dict[run_name, run_stats] - Run/project level metrics
+            - runs_sample_data: Dict[sample_id, sample_stats] - Per-sample metrics
+            - sample_to_project: Dict[sample_id, project_name] - Sample-to-project mapping
 
         Data Flow:
             RunStats.json -> parse -> filter samples by min_polonies -> populate dicts
         """
-        runs_global_data: dict[str, Any] = {}
-        runs_sample_data: dict[str, Any] = {}
-        sample_to_project: dict[str, str] = {}
+        runs_global_data: Dict[str, Any] = {}
+        runs_sample_data: Dict[str, Any] = {}
+        sample_to_project: Dict[str, str] = {}
         if data_source == "":
             return (runs_global_data, runs_sample_data, sample_to_project)
 
@@ -627,8 +627,8 @@ def _parse_run_project_data(
         return (runs_global_data, runs_sample_data, sample_to_project)
 
     def _extract_manifest_lane_settings(
-        self, run_manifest_data: dict[str, Any], run_analysis_name: str
-    ) -> dict[str, dict[str, Any]]:
+        self, run_manifest_data: Dict[str, Any], run_analysis_name: str
+    ) -> Dict[str, Dict[str, Any]]:
         """
         Extract per-lane settings from a parsed RunManifest.json Settings section.
 
@@ -637,10 +637,10 @@ def _extract_manifest_lane_settings(
             run_analysis_name: Run identifier for building run_lane keys
 
         Returns:
-            dict[run_lane, settings] where run_lane = "{run_analysis_name} | L{lane_id}"
+            Dict[run_lane, settings] where run_lane = "{run_analysis_name} | L{lane_id}"
             and settings contain Indexing, AdapterTrimType, R1/R2AdapterMinimumTrimmedLength
         """
-        result: dict[str, dict[str, Any]] = {}
+        result: Dict[str, Dict[str, Any]] = {}
         if "Settings" not in run_manifest_data:
             return result
         for lane_data in run_manifest_data["Settings"]:
@@ -669,8 +669,8 @@ def _extract_manifest_lane_settings(
         return result
 
     def _parse_run_manifest(
-        self, data_source: str, log_files: list[LoadedFileDict[Any]] | None = None
-    ) -> dict[str, Any]:
+        self, data_source: str, log_files: Optional[List[LoadedFileDict[Any]]] = None
+    ) -> Dict[str, Any]:
         """
         Parse RunManifest.json for run-level analysis to extract lane and adapter settings.
 
@@ -684,9 +684,9 @@ def _parse_run_manifest(
             log_files: Optional pre-collected list of file dicts from find_log_files.
 
         Returns:
-            dict[run_lane, settings] where run_lane = "{run_name} | L{lane_id}"
+            Dict[run_lane, settings] where run_lane = "{run_name} | L{lane_id}"
         """
-        runs_manifest_data: dict[str, dict[str, Any]] = {}
+        runs_manifest_data: Dict[str, Dict[str, Any]] = {}
 
         if data_source == "":
             return runs_manifest_data
@@ -720,8 +720,8 @@ def _parse_run_manifest(
         return runs_manifest_data
 
     def _parse_run_manifest_in_project(
-        self, data_source: str, log_files: list[LoadedFileDict[Any]] | None = None
-    ) -> dict[str, Any]:
+        self, data_source: str, log_files: Optional[List[LoadedFileDict[Any]]] = None
+    ) -> Dict[str, Any]:
         """
         Parse RunManifest.json for project-level analysis.
 
@@ -733,7 +733,7 @@ def _parse_run_manifest_in_project(
             + ../../RunManifest.json (run-level manifest)
             -> Extract per-lane settings
         """
-        project_manifest_data: dict[str, dict[str, Any]] = {}
+        project_manifest_data: Dict[str, Dict[str, Any]] = {}
 
         if data_source == "":
             return project_manifest_data
@@ -784,10 +784,10 @@ def _parse_run_manifest_in_project(
 
     def _build_index_assignment_from_stats(
         self,
-        stats_dict: dict[str, Any],
+        stats_dict: Dict[str, Any],
         run_analysis_name: str,
-        project: str | None = None,
-    ) -> tuple[dict[str, dict[str, Any]], int]:
+        project: Optional[str] = None,
+    ) -> Tuple[Dict[str, Dict[str, Any]], int]:
         """
         Build per-run index assignment dict from RunStats SampleStats/Occurrences.
 
@@ -795,7 +795,7 @@ def _build_index_assignment_from_stats(
             Tuple of (run_inner_dict, total_polonies). run_inner_dict is
             { merged_expected_sequence -> { SampleID, SamplePolonyCounts, PercentOfPolonies, Index1, Index2, ... } }
         """
-        run_inner: dict[str, dict[str, Any]] = {}
+        run_inner: Dict[str, Dict[str, Any]] = {}
         total_polonies = stats_dict.get("NumPoloniesBeforeTrimming", 0)
         if "SampleStats" not in stats_dict:
             return (run_inner, total_polonies)
@@ -812,7 +812,7 @@ def _build_index_assignment_from_stats(
                     log.error(f"Missing data needed to extract index assignment for sample {sample_id}. Skipping.")
                     continue
                 if sample_expected_seq not in run_inner:
-                    entry: dict[str, Any] = {
+                    entry: Dict[str, Any] = {
                         "SampleID": sample_id,
                         "SamplePolonyCounts": 0,
                         "PercentOfPolonies": float("nan"),
@@ -830,8 +830,8 @@ def _build_index_assignment_from_stats(
 
     def _merge_manifest_index_sequences(
         self,
-        sample_to_index_assignment: dict[str, Any],
-        run_manifest_data: dict[str, Any],
+        sample_to_index_assignment: Dict[str, Any],
+        run_manifest_data: Dict[str, Any],
         run_analysis_name: str,
     ) -> None:
         """Merge Index1/Index2 from RunManifest Samples into sample_to_index_assignment (mutates)."""
@@ -860,8 +860,8 @@ def _merge_manifest_index_sequences(
                 run_data[merged_indices]["Index2"] = index_2
 
     def _parse_run_unassigned_sequences(
-        self, data_source: str, log_files: list[LoadedFileDict[Any]] | None = None
-    ) -> dict[int, dict[str, Any]]:
+        self, data_source: str, log_files: Optional[List[LoadedFileDict[Any]]] = None
+    ) -> Dict[int, Dict[str, Any]]:
         """
         Parse unassigned/unknown barcode sequences from run-level data.
 
@@ -872,7 +872,7 @@ def _parse_run_unassigned_sequences(
             RunStats.json -> Lanes -> UnassignedSequences
             -> Extract: sequence, count, percentage of total polonies
         """
-        run_unassigned_sequences: dict[int, dict[str, Any]] = {}
+        run_unassigned_sequences: Dict[int, Dict[str, Any]] = {}
         if data_source == "":
             return run_unassigned_sequences
 
@@ -922,8 +922,8 @@ def _parse_run_unassigned_sequences(
         return run_unassigned_sequences
 
     def _parse_index_assignment(
-        self, manifest_data_source: str, log_files: list[LoadedFileDict[Any]] | None = None
-    ) -> dict[str, Any]:
+        self, manifest_data_source: str, log_files: Optional[List[LoadedFileDict[Any]]] = None
+    ) -> Dict[str, Any]:
         """
         Parse index assignment statistics for run-level analysis.
 
@@ -935,7 +935,7 @@ def _parse_index_assignment(
             + RunManifest.json -> Samples -> index sequences (Index1, Index2)
             -> Combined index assignment table
         """
-        sample_to_index_assignment: dict[str, dict[str, dict[str, Any]]] = {}
+        sample_to_index_assignment: Dict[str, Dict[str, Dict[str, Any]]] = {}
 
         if manifest_data_source == "":
             return sample_to_index_assignment
@@ -987,8 +987,8 @@ def _parse_index_assignment(
         return sample_to_index_assignment
 
     def _parse_index_assignment_in_project(
-        self, data_source: str, log_files: list[LoadedFileDict[Any]] | None = None
-    ) -> dict[str, Any]:
+        self, data_source: str, log_files: Optional[List[LoadedFileDict[Any]]] = None
+    ) -> Dict[str, Any]:
         """
         Parse index assignment statistics for project-level analysis.
 
@@ -1000,7 +1000,7 @@ def _parse_index_assignment_in_project(
             + ../../RunManifest.json -> Samples -> index sequences
             -> Combined index assignment table
         """
-        sample_to_index_assignment: dict[str, dict[str, dict[str, Any]]] = {}
+        sample_to_index_assignment: Dict[str, Dict[str, Dict[str, Any]]] = {}
 
         if data_source == "":
             return sample_to_index_assignment
@@ -1057,7 +1057,7 @@ def _parse_index_assignment_in_project(
 
         return sample_to_index_assignment
 
-    def add_run_plots(self, data: dict[Any, Any], plot_functions: list[Callable]) -> None:
+    def add_run_plots(self, data: Dict[Any, Any], plot_functions: List[Callable]) -> None:
         if not data:
             return
         for func in plot_functions:
@@ -1069,11 +1069,11 @@ def add_run_plots(self, data: dict[Any, Any], plot_functions: list[Callable]) ->
                 self.write_data_file(plot_data, f"base2fastq:{plot_name}")
 
     def add_sample_plots(
-        self, data: dict[str, Any], group_lookup: dict[str, str], project_lookup: dict[str, str]
+        self, data: Dict[str, Any], group_lookup: Dict[str, str], project_lookup: Dict[str, str]
     ) -> None:
         if not data:
             return
-        plot_functions: list[Callable] = [
+        plot_functions: List[Callable] = [
             tabulate_sample_stats,
             sequence_content_plot,
             plot_per_cycle_N_content,
diff --git a/multiqc/modules/bases2fastq/plot_runs.py b/multiqc/modules/bases2fastq/plot_runs.py
index c8ccbbde20..5aff1739a7 100644
--- a/multiqc/modules/bases2fastq/plot_runs.py
+++ b/multiqc/modules/bases2fastq/plot_runs.py
@@ -1,5 +1,5 @@
 import math
-from typing import Any, cast
+from typing import Any, Dict, List, cast
 
 from multiqc.plots import bargraph, linegraph, table
 from multiqc.plots.table_object import ColumnDict, SectionT
@@ -212,7 +212,7 @@ def tabulate_project_stats(run_data, color_dict):
         first_key = run_keys[0]
         project_header = f"{run_data[first_key]['Project']} | "
     plot_name = f"{project_header}Sequencing QC Metrics Table"
-    plot_html = table.plot(plot_content, cast(dict[Any, ColumnDict], headers), pconfig=pconfig)
+    plot_html = table.plot(plot_content, cast(Dict[Any, ColumnDict], headers), pconfig=pconfig)
     anchor = "project_run_qc_metrics_table"
     description = "QC metrics per run, per project"
     helptext = """
@@ -339,7 +339,7 @@ def tabulate_run_stats(run_data, color_dict):
     }
 
     plot_name = "Sequencing Run QC Metrics Table"
-    plot_html = table.plot(plot_content, cast(dict[Any, ColumnDict], headers), pconfig=pconfig)
+    plot_html = table.plot(plot_content, cast(Dict[Any, ColumnDict], headers), pconfig=pconfig)
     anchor = "run_qc_metrics_table"
     description = "QC metrics per run"
     helptext = """
@@ -398,7 +398,7 @@ def tabulate_manifest_stats(run_data, color_dict):
     }
 
     plot_name = "Run Manifest Table"
-    plot_html = table.plot(plot_content, cast(dict[Any, ColumnDict], headers), pconfig=pconfig)
+    plot_html = table.plot(plot_content, cast(Dict[Any, ColumnDict], headers), pconfig=pconfig)
     anchor = "run_manifest_metrics_table"
     description = "Run parameters used."
     helptext = """
@@ -437,7 +437,7 @@ def tabulate_index_assignment_stats(run_data, color_dict):
             plot_content.update({index: sample_index_stats})
             index += 1
 
-    headers: dict[str, Any] = {}
+    headers: Dict[str, Any] = {}
     headers["run_name"] = {
         "title": "Run Name",
         "description": "Run Name.",
@@ -480,7 +480,7 @@ def tabulate_index_assignment_stats(run_data, color_dict):
     }
 
     plot_name = "Index Assignment Metrics"
-    plot_html = table.plot(cast(SectionT, plot_content), cast(dict[Any, ColumnDict], headers), pconfig=pconfig)
+    plot_html = table.plot(cast(SectionT, plot_content), cast(Dict[Any, ColumnDict], headers), pconfig=pconfig)
     anchor = "index_assignment_metrics"
     description = "Index assignment metrics."
     helptext = """
@@ -506,7 +506,7 @@ def tabulate_unassigned_index_stats(run_data, color_dict):
         - Polonies
         - % Polonies
     """
-    headers: dict[str, Any] = {}
+    headers: Dict[str, Any] = {}
     headers["Run Name"] = {
         "title": "Run Name",
         "description": "Run Name (Run ID + Analysis ID).",
@@ -544,7 +544,7 @@ def tabulate_unassigned_index_stats(run_data, color_dict):
     }
 
     plot_name = "Unassigned Indices Metrics"
-    plot_html = table.plot(cast(SectionT, run_data), cast(dict[Any, ColumnDict], headers), pconfig=pconfig)
+    plot_html = table.plot(cast(SectionT, run_data), cast(Dict[Any, ColumnDict], headers), pconfig=pconfig)
     anchor = "index_unassignment_metrics"
     description = "Index unassignment metrics."
     helptext = """
@@ -572,7 +572,7 @@ def _run_has_reads(run_entry: dict) -> bool:
 
 def plot_base_quality_hist(run_data, color_dict):
     # Prepare plot data for per base BQ histogram (skip runs without Reads)
-    bq_hist_dict: dict[str, dict[int, float]] = {}
+    bq_hist_dict: Dict[str, Dict[int, float]] = {}
     for s_name in natsorted(run_data.keys()):
         if not _run_has_reads(run_data[s_name]):
             continue
@@ -588,7 +588,7 @@ def plot_base_quality_hist(run_data, color_dict):
             bq_hist_dict[s_name].update({quality: R1R2_base_quality_counts[quality] / total_bases * 100})
 
     # Prepare plot data for per read average BQ histogram
-    per_read_quality_hist_dict: dict[str, dict[int, float]] = {}
+    per_read_quality_hist_dict: Dict[str, Dict[int, float]] = {}
     for s_name in natsorted(run_data.keys()):
         if not _run_has_reads(run_data[s_name]):
             continue
@@ -655,7 +655,7 @@ def plot_base_quality_by_cycle(run_data, color_dict):
     # Prepare plot data for median BQ of each cycle (skip runs without Reads/Cycles)
     runs_with_reads = [s for s in run_data if _run_has_reads(run_data[s]) and run_data[s]["Reads"][0].get("Cycles")]
     if not runs_with_reads:
-        plot_content: list[Any] = []
+        plot_content: List[Any] = []
         plot_html = linegraph.plot(
             plot_content,
             pconfig={"id": "bases2fastq_run_bq_by_cycle", "title": "bases2fastq: Run Base Quality by Cycle"},
@@ -676,10 +676,10 @@ def plot_base_quality_by_cycle(run_data, color_dict):
             R1CycleNum = len(read0["Cycles"])
             r1r2_split = max(r1r2_split, R1CycleNum)
 
-    median_dict: dict[str, dict[int, float]] = {}
+    median_dict: Dict[str, Dict[int, float]] = {}
     for s_name in natsorted(runs_with_reads):
         paired_end = True if len(run_data[s_name]["Reads"]) > 1 else False
-        cycle_dict: dict[int, float] = {}
+        cycle_dict: Dict[int, float] = {}
         R1CycleNum = len(run_data[s_name]["Reads"][0]["Cycles"])
         for cycle in run_data[s_name]["Reads"][0]["Cycles"]:
             cycle_no = int(cycle["Cycle"])
@@ -691,7 +691,7 @@ def plot_base_quality_by_cycle(run_data, color_dict):
         median_dict.update({s_name: cycle_dict})
 
     # Prepare plot data for mean BQ of each cycle
-    mean_dict: dict[str, dict[int, float]] = {}
+    mean_dict: Dict[str, Dict[int, float]] = {}
     for s_name in natsorted(runs_with_reads):
         paired_end = True if len(run_data[s_name]["Reads"]) > 1 else False
         # Update each sample cycle info
diff --git a/multiqc/modules/bases2fastq/plot_samples.py b/multiqc/modules/bases2fastq/plot_samples.py
index 4b715c6819..e7987135bb 100644
--- a/multiqc/modules/bases2fastq/plot_samples.py
+++ b/multiqc/modules/bases2fastq/plot_samples.py
@@ -1,4 +1,4 @@
-from typing import Any, cast
+from typing import Any, Dict, cast
 
 from natsort import natsorted
 
@@ -153,7 +153,7 @@ def tabulate_sample_stats(sample_data, group_lookup_dict, project_lookup_dict, s
     }
 
     plot_name = "Sample QC Metrics Table"
-    plot_html = table.plot(plot_content, cast(dict[Any, ColumnDict], headers), pconfig=pconfig)
+    plot_html = table.plot(plot_content, cast(Dict[Any, ColumnDict], headers), pconfig=pconfig)
     anchor = "sample_qc_metrics_table"
     description = "QC metrics per unique sample"
     helptext = """
@@ -177,7 +177,7 @@ def sequence_content_plot(sample_data, group_lookup_dict, project_lookup_dict, c
     """Create the epic HTML for the FastQC sequence content heatmap"""
     samples_with_reads = [s for s in sample_data if _sample_has_reads(sample_data[s])]
     if not samples_with_reads:
-        empty_data: dict[str, dict[int, Any]] = {}
+        empty_data: Dict[str, Dict[int, Any]] = {}
         plot_html = linegraph.plot(
             empty_data,
             pconfig={
@@ -190,7 +190,7 @@ def sequence_content_plot(sample_data, group_lookup_dict, project_lookup_dict, c
         return plot_html, "Per Cycle Base Content", "base_content", "", "", empty_data
 
     # Prep the data
-    data: dict[str, dict[int, Any]] = {}
+    data: Dict[str, Dict[int, Any]] = {}
 
     r1r2_split = 0
     for s_name in natsorted(samples_with_reads):
@@ -259,7 +259,7 @@ def sequence_content_plot(sample_data, group_lookup_dict, project_lookup_dict, c
 def plot_per_cycle_N_content(sample_data, group_lookup_dict, project_lookup_dict, color_dict):
     samples_with_reads = [s for s in sample_data if _sample_has_reads(sample_data[s])]
     if not samples_with_reads:
-        empty_data: dict[str, dict[int, float]] = {}
+        empty_data: Dict[str, Dict[int, float]] = {}
         plot_html = linegraph.plot(
             empty_data,
             pconfig={
@@ -271,7 +271,7 @@ def plot_per_cycle_N_content(sample_data, group_lookup_dict, project_lookup_dict
         )
         return plot_html, "Per Cycle N Content", "n_content", "", "", empty_data
 
-    data: dict[str, dict[int, float]] = {}
+    data: Dict[str, Dict[int, float]] = {}
     r1r2_split = 0
     for s_name in natsorted(samples_with_reads):
         data[s_name] = {}
@@ -344,7 +344,7 @@ def plot_per_read_gc_hist(sample_data, group_lookup_dict, project_lookup_dict, s
     """
     samples_with_reads = [s for s in sample_data if _sample_has_reads(sample_data[s])]
     if not samples_with_reads:
-        empty_gc_hist: dict[str, dict[float, float]] = {}
+        empty_gc_hist: Dict[str, Dict[float, float]] = {}
         plot_html = linegraph.plot(
             empty_gc_hist,
             pconfig={
@@ -356,7 +356,7 @@ def plot_per_read_gc_hist(sample_data, group_lookup_dict, project_lookup_dict, s
         )
         return plot_html, "Per Sample GC Histogram", "gc_histogram", "", "", empty_gc_hist
 
-    gc_hist_dict: dict[str, dict[float, float]] = {}
+    gc_hist_dict: Dict[str, Dict[float, float]] = {}
     for s_name in natsorted(samples_with_reads):
         r0 = sample_data[s_name]["Reads"][0]
         if "PerReadGCCountHistogram" not in r0:
@@ -421,7 +421,7 @@ def plot_adapter_content(sample_data, group_lookup_dict, project_lookup_dict, sa
     """
     samples_with_reads = [s for s in sample_data if _sample_has_reads(sample_data[s])]
     if not samples_with_reads:
-        empty_content: dict[str, dict[int, float]] = {}
+        empty_content: Dict[str, Dict[int, float]] = {}
         plot_html = linegraph.plot(
             empty_content,
             pconfig={
@@ -433,7 +433,7 @@ def plot_adapter_content(sample_data, group_lookup_dict, project_lookup_dict, sa
         )
         return plot_html, "Per Sample Adapter Content", "adapter_content", "", "", empty_content
 
-    plot_content: dict[str, dict[int, float]] = {}
+    plot_content: Dict[str, Dict[int, float]] = {}
 
     r1r2_split = 0
     for s_name in natsorted(samples_with_reads):