From 72b377bee6c3c1a7a1b65446fe79b2572a2b00c2 Mon Sep 17 00:00:00 2001
From: Carlos Ruiz <carlos.ruiz@elembio.com>
Date: Sat, 21 Feb 2026 10:18:12 -0800
Subject: [PATCH 1/6] Addressed review comments

---
 multiqc/modules/bases2fastq/bases2fastq.py    | 401 ++++++++----------
 multiqc/modules/bases2fastq/plot_runs.py      | 102 ++---
 multiqc/modules/bases2fastq/plot_samples.py   |  51 ++-
 .../cells2stats/cells2stats_bar_plots.py      |   1 +
 4 files changed, 250 insertions(+), 305 deletions(-)

diff --git a/multiqc/modules/bases2fastq/bases2fastq.py b/multiqc/modules/bases2fastq/bases2fastq.py
index 3c8aae9d18..3f1f212b20 100644
--- a/multiqc/modules/bases2fastq/bases2fastq.py
+++ b/multiqc/modules/bases2fastq/bases2fastq.py
@@ -5,10 +5,10 @@
 import json
 import logging
 import random
-from typing import Any, Callable, Dict, List, Optional, Tuple
+from typing import Any, Callable, Dict, List, Optional, Tuple, Union
 import uuid
 from pathlib import Path
-
+from natsort import natsorted
 from multiqc import config
 from multiqc.base_module import BaseMultiqcModule, ModuleNoSamplesFound
 from multiqc.types import LoadedFileDict
@@ -217,14 +217,6 @@ def _init_data_structures(self) -> None:
         self.group_lookup_dict: Dict[str, Any] = {}  # item -> group it belongs to
         self.project_lookup_dict: Dict[str, Any] = {}  # sample -> project mapping
 
-        # === Legacy/auxiliary data structures ===
-        self.b2f_sample_data: Dict[str, Any] = {}
-        self.b2f_run_data: Dict[str, Any] = {}
-        self.b2f_run_project_data: Dict[str, Any] = {}
-        self.b2f_run_project_sample_data: Dict[str, Any] = {}
-        self.missing_runs: set = set()  # Runs referenced but not found
-        self.sample_id_to_run: Dict[str, str] = {}  # sample_id -> run_analysis_name
-
     def _validate_path(self, file_path: Path, base_directory: Path) -> bool:
         """
         Validate that a file path doesn't escape outside the expected directory hierarchy.
@@ -293,23 +285,23 @@ def _parse_and_validate_data(self) -> str:
         Returns:
             summary_path: The determined summary path ('run_level', 'project_level', or 'combined_level')
         """
-        # Check for available log files
-        run_level_log_files = len(list(self.find_log_files("bases2fastq/run")))
-        project_level_log_files = len(list(self.find_log_files("bases2fastq/project")))
+        # Collect log files once per pattern (find_log_files returns a generator)
+        run_level_log_files = list(self.find_log_files("bases2fastq/run"))
+        project_level_log_files = list(self.find_log_files("bases2fastq/project"))
 
-        if run_level_log_files == 0 and project_level_log_files == 0:
+        if len(run_level_log_files) == 0 and len(project_level_log_files) == 0:
             error_msg = "No run- or project-level log files found within the Bases2Fastq results."
             log.error(error_msg)
             raise ModuleNoSamplesFound(error_msg)
 
         # Parse data from available sources
-        if run_level_log_files > 0:
+        if len(run_level_log_files) > 0:
             (self.run_level_data, self.run_level_samples, self.run_level_samples_to_project) = (
-                self._parse_run_project_data("bases2fastq/run")
+                self._parse_run_project_data("bases2fastq/run", log_files=run_level_log_files)
             )
-        if project_level_log_files > 0:
+        if len(project_level_log_files) > 0:
             (self.project_level_data, self.project_level_samples, self.project_level_samples_to_project) = (
-                self._parse_run_project_data("bases2fastq/project")
+                self._parse_run_project_data("bases2fastq/project", log_files=project_level_log_files)
             )
 
         # Count samples
@@ -332,6 +324,9 @@ def _parse_and_validate_data(self) -> str:
         # Determine summary path
         summary_path = self._determine_summary_path()
 
+        # Required call to confirm module is used (after confirming data was found)
+        self.add_software_version(None)
+
         # Log what was found
         log.info(f"Found {len(self.run_level_data)} run(s) within the Bases2Fastq results.")
         log.info(f"Found {len(self.project_level_data)} project(s) within the Bases2Fastq results.")
@@ -340,9 +335,6 @@ def _parse_and_validate_data(self) -> str:
         else:
             log.info(f"Found {num_project_level_samples} sample(s) within the Bases2Fastq results.")
 
-        # Required call to confirm module is used
-        self.add_software_version(None)
-
         # Warn if no data found
         if len(self.run_level_data) == 0 and len(self.project_level_data) == 0:
             log.warning("No run/project stats found!")
@@ -374,7 +366,9 @@ def _determine_summary_path(self) -> str:
 
     def _select_data_by_summary_path(
         self, summary_path: str
-    ) -> Tuple[Dict[str, Any], Dict[str, Any], Dict[str, str], Dict[str, Any], Dict[str, Any], Dict[str, Any]]:
+    ) -> Tuple[
+        Dict[str, Any], Dict[str, Any], Dict[str, str], Dict[str, Any], Dict[str, Any], Dict[int, Dict[str, Any]]
+    ]:
         """
         Select the appropriate data sources based on the summary path.
 
@@ -421,13 +415,14 @@ def _setup_colors(
         # Create run and project groups
         run_groups: Dict[str, List] = defaultdict(list)
         project_groups: Dict[str, List] = defaultdict(list)
+        # Only populated when summary_path == "project_level"; empty for run_level/combined_level
         in_project_sample_groups: Dict[str, List] = defaultdict(list)
         ind_sample_groups: Dict[str, List] = defaultdict(list)
 
-        for sample in sample_data.keys():
+        for sample in natsorted(sample_data.keys()):
             run_name, _ = sample.split("__")
             run_groups[run_name].append(sample)
-            sample_project = samples_to_projects[sample]
+            sample_project = samples_to_projects.get(sample, "DefaultProject")
             project_groups[sample_project].append(sample)
             ind_sample_groups[sample] = [sample]
             if summary_path == "project_level":
@@ -458,7 +453,7 @@ def _setup_colors(
 
         # Assign colors to samples
         self.sample_color: Dict[str, str] = {}
-        for sample_name in samples_to_projects.keys():
+        for sample_name in natsorted(samples_to_projects.keys()):
             if summary_path == "project_level" or len(project_groups) == 1:
                 sample_color = self.group_color[sample_name]
             else:
@@ -477,7 +472,7 @@ def _generate_plots(
         samples_to_projects: Dict[str, str],
         manifest_data: Dict[str, Any],
         index_assignment_data: Dict[str, Any],
-        unassigned_sequences: Dict[str, Any],
+        unassigned_sequences: Dict[int, Dict[str, Any]],
     ) -> None:
         """Generate all plots and add sections to the report."""
         # QC metrics table
@@ -541,7 +536,9 @@ def _extract_run_analysis_name(
 
         return f"{run_name}-{analysis_id[0:4]}"
 
-    def _parse_run_project_data(self, data_source: str) -> List[Dict[str, Any]]:
+    def _parse_run_project_data(
+        self, data_source: str, log_files: Optional[List[LoadedFileDict[Any]]] = None
+    ) -> Tuple[Dict[str, Any], Dict[str, Any], Dict[str, str]]:
         """
         Parse RunStats.json files to extract run/project and sample-level data.
 
@@ -550,9 +547,11 @@ def _parse_run_project_data(self, data_source: str) -> List[Dict[str, Any]]:
 
         Args:
             data_source: Search pattern key ("bases2fastq/run" or "bases2fastq/project")
+            log_files: Optional pre-collected list of file dicts from find_log_files.
+                When provided, used instead of calling find_log_files again.
 
         Returns:
-            List containing:
+            Tuple of:
             - runs_global_data: Dict[run_name, run_stats] - Run/project level metrics
             - runs_sample_data: Dict[sample_id, sample_stats] - Per-sample metrics
             - sample_to_project: Dict[sample_id, project_name] - Sample-to-project mapping
@@ -560,13 +559,14 @@ def _parse_run_project_data(self, data_source: str) -> List[Dict[str, Any]]:
         Data Flow:
             RunStats.json -> parse -> filter samples by min_polonies -> populate dicts
         """
-        runs_global_data = {}
-        runs_sample_data = {}
-        sample_to_project = {}
+        runs_global_data: Dict[str, Any] = {}
+        runs_sample_data: Dict[str, Any] = {}
+        sample_to_project: Dict[str, str] = {}
         if data_source == "":
-            return [runs_global_data, runs_sample_data, sample_to_project]
+            return (runs_global_data, runs_sample_data, sample_to_project)
 
-        for f in self.find_log_files(data_source):
+        files_to_process = log_files if log_files is not None else list(self.find_log_files(data_source))
+        for f in files_to_process:
             data = json.loads(f["f"])
 
             # Copy incomind data and reset samples to include only desired
@@ -619,7 +619,53 @@ def _parse_run_project_data(self, data_source: str) -> List[Dict[str, Any]]:
 
             self.add_data_source(f=f, s_name=run_analysis_name, module="bases2fastq")
 
-        return [runs_global_data, runs_sample_data, sample_to_project]
+        return (runs_global_data, runs_sample_data, sample_to_project)
+
+    def _extract_manifest_lane_settings(
+        self, run_manifest_data: Dict[str, Any], run_analysis_name: str
+    ) -> Dict[str, Dict[str, Any]]:
+        """
+        Extract per-lane settings from a parsed RunManifest.json Settings section.
+
+        Args:
+            run_manifest_data: Parsed RunManifest.json (must contain "Settings" list)
+            run_analysis_name: Run identifier for building run_lane keys
+
+        Returns:
+            Dict[run_lane, settings] where run_lane = "{run_analysis_name} | L{lane_id}"
+            and settings contain Indexing, AdapterTrimType, R1/R2AdapterMinimumTrimmedLength
+        """
+        result: Dict[str, Dict[str, Any]] = {}
+        if "Settings" not in run_manifest_data:
+            return result
+        for lane_data in run_manifest_data["Settings"]:
+            lane_id = lane_data.get("Lane")
+            if not lane_id:
+                log.error("<Lane> not found in Settings section of RunManifest. Skipping lanes.")
+                continue
+            lane_name = f"L{lane_id}"
+            run_lane = f"{run_analysis_name} | {lane_name}"
+            result[run_lane] = {}
+
+            indices = []
+            indices_cycles = []
+            mask_pattern = re.compile(r"^I\d+Mask$")
+            matching_keys = [key for key in lane_data.keys() if mask_pattern.match(key)]
+            for key in matching_keys:
+                for mask_info in lane_data[key]:
+                    if mask_info["Read"] not in indices:
+                        indices.append(mask_info["Read"])
+                    indices_cycles.append(str(len(mask_info["Cycles"])))
+            indexing = f"{' + '.join(indices_cycles)}<br>{' + '.join(indices)}"
+            result[run_lane]["Indexing"] = indexing
+            result[run_lane]["AdapterTrimType"] = lane_data.get("AdapterTrimType", "N/A")
+            result[run_lane]["R1AdapterMinimumTrimmedLength"] = lane_data.get(
+                "R1AdapterMinimumTrimmedLength", "N/A"
+            )
+            result[run_lane]["R2AdapterMinimumTrimmedLength"] = lane_data.get(
+                "R2AdapterMinimumTrimmedLength", "N/A"
+            )
+        return result
 
     def _parse_run_manifest(self, data_source: str) -> Dict[str, Any]:
         """
@@ -636,7 +682,7 @@ def _parse_run_manifest(self, data_source: str) -> Dict[str, Any]:
         Returns:
             Dict[run_lane, settings] where run_lane = "{run_name} | L{lane_id}"
         """
-        runs_manifest_data = {}
+        runs_manifest_data: Dict[str, Dict[str, Any]] = {}
 
         if data_source == "":
             return runs_manifest_data
@@ -662,34 +708,9 @@ def _parse_run_manifest(self, data_source: str) -> Dict[str, Any]:
                     f"<Settings> section not found in {directory}/RunManifest.json.\nSkipping RunManifest metrics."
                 )
             else:
-                for lane_data in run_manifest["Settings"]:
-                    lane_id = lane_data.get("Lane")
-                    if not lane_id:
-                        log.error("<Lane> not found in Settings section of RunManifest. Skipping lanes.")
-                        continue
-                    lane_name = f"L{lane_id}"
-                    run_lane = f"{run_analysis_name} | {lane_name}"
-                    runs_manifest_data[run_lane] = {}
-
-                    indices = []
-                    indices_cycles = []
-                    mask_pattern = re.compile(r"^I\d+Mask$")
-                    matching_keys = [key for key in lane_data.keys() if mask_pattern.match(key)]
-                    for key in matching_keys:
-                        for mask_info in lane_data[key]:
-                            if mask_info["Read"] not in indices:
-                                indices.append(mask_info["Read"])
-                            indices_cycles.append(str(len(mask_info["Cycles"])))
-                    indexing = f"{' + '.join(indices_cycles)}<br>{' + '.join(indices)}"
-                    runs_manifest_data[run_lane]["Indexing"] = indexing
-
-                    runs_manifest_data[run_lane]["AdapterTrimType"] = lane_data.get("AdapterTrimType", "N/A")
-                    runs_manifest_data[run_lane]["R1AdapterMinimumTrimmedLength"] = lane_data.get(
-                        "R1AdapterMinimumTrimmedLength", "N/A"
-                    )
-                    runs_manifest_data[run_lane]["R2AdapterMinimumTrimmedLength"] = lane_data.get(
-                        "R2AdapterMinimumTrimmedLength", "N/A"
-                    )
+                runs_manifest_data.update(
+                    self._extract_manifest_lane_settings(run_manifest, run_analysis_name)
+                )
 
             self.add_data_source(f=f, s_name=run_analysis_name, module="bases2fastq")
 
@@ -707,7 +728,7 @@ def _parse_run_manifest_in_project(self, data_source: str) -> Dict[str, Any]:
             + ../../RunManifest.json (run-level manifest)
             -> Extract per-lane settings
         """
-        project_manifest_data = {}
+        project_manifest_data: Dict[str, Dict[str, Any]] = {}
 
         if data_source == "":
             return project_manifest_data
@@ -739,34 +760,9 @@ def _parse_run_manifest_in_project(self, data_source: str) -> Dict[str, Any]:
             if "Settings" not in run_manifest_data:
                 log.warning(f"<Settings> section not found in {run_manifest}.\nSkipping RunManifest metrics.")
             else:
-                for lane_data in run_manifest_data["Settings"]:
-                    lane_id = lane_data.get("Lane")
-                    if not lane_id:
-                        log.error("<Lane> not found in Settings section of RunManifest. Skipping lanes.")
-                        continue
-                    lane_name = f"L{lane_id}"
-                    run_lane = f"{run_analysis_name} | {lane_name}"
-                    project_manifest_data[run_lane] = {}
-
-                    indices = []
-                    indices_cycles = []
-                    mask_pattern = re.compile(r"^I\d+Mask$")
-                    matching_keys = [key for key in lane_data.keys() if mask_pattern.match(key)]
-                    for key in matching_keys:
-                        for mask_info in lane_data[key]:
-                            if mask_info["Read"] not in indices:
-                                indices.append(mask_info["Read"])
-                            indices_cycles.append(str(len(mask_info["Cycles"])))
-                    indexing = f"{' + '.join(indices_cycles)}<br>{' + '.join(indices)}"
-                    project_manifest_data[run_lane]["Indexing"] = indexing
-
-                    project_manifest_data[run_lane]["AdapterTrimType"] = lane_data.get("AdapterTrimType", "N/A")
-                    project_manifest_data[run_lane]["R1AdapterMinimumTrimmedLength"] = lane_data.get(
-                        "R1AdapterMinimumTrimmedLength", "N/A"
-                    )
-                    project_manifest_data[run_lane]["R2AdapterMinimumTrimmedLength"] = lane_data.get(
-                        "R2AdapterMinimumTrimmedLength", "N/A"
-                    )
+                project_manifest_data.update(
+                    self._extract_manifest_lane_settings(run_manifest_data, run_analysis_name)
+                )
             data_source_info: LoadedFileDict[Any] = {
                 "fn": str(run_manifest.name),
                 "root": str(run_manifest.parent),
@@ -778,7 +774,84 @@ def _parse_run_manifest_in_project(self, data_source: str) -> Dict[str, Any]:
 
         return project_manifest_data
 
-    def _parse_run_unassigned_sequences(self, data_source: str) -> Dict[str, Any]:
+    def _build_index_assignment_from_stats(
+        self,
+        stats_dict: Dict[str, Any],
+        run_analysis_name: str,
+        project: Optional[str] = None,
+    ) -> Tuple[Dict[str, Dict[str, Any]], int]:
+        """
+        Build per-run index assignment dict from RunStats SampleStats/Occurrences.
+
+        Returns:
+            Tuple of (run_inner_dict, total_polonies). run_inner_dict is
+            { merged_expected_sequence -> { SampleID, SamplePolonyCounts, PercentOfPolonies, Index1, Index2, ... } }
+        """
+        run_inner: Dict[str, Dict[str, Any]] = {}
+        total_polonies = stats_dict.get("NumPoloniesBeforeTrimming", 0)
+        if "SampleStats" not in stats_dict:
+            return (run_inner, total_polonies)
+        for sample_data in stats_dict["SampleStats"]:
+            sample_name = sample_data.get("SampleName")
+            sample_id = "__".join([run_analysis_name, sample_name]) if (run_analysis_name and sample_name) else None
+            if "Occurrences" not in sample_data:
+                log.error(f"Missing data needed to extract index assignment for sample {sample_id}. Skipping.")
+                continue
+            for occurrence in sample_data["Occurrences"]:
+                sample_expected_seq = occurrence.get("ExpectedSequence")
+                sample_counts = occurrence.get("NumPoloniesBeforeTrimming")
+                if any(x is None for x in [sample_expected_seq, sample_counts, sample_id]):
+                    log.error(f"Missing data needed to extract index assignment for sample {sample_id}. Skipping.")
+                    continue
+                if sample_expected_seq not in run_inner:
+                    entry: Dict[str, Any] = {
+                        "SampleID": sample_id,
+                        "SamplePolonyCounts": 0,
+                        "PercentOfPolonies": float("nan"),
+                        "Index1": "",
+                        "Index2": "",
+                    }
+                    if project is not None:
+                        entry["Project"] = project
+                    run_inner[sample_expected_seq] = entry
+                run_inner[sample_expected_seq]["SamplePolonyCounts"] += sample_counts
+        for entry in run_inner.values():
+            if total_polonies > 0:
+                entry["PercentOfPolonies"] = round(entry["SamplePolonyCounts"] / total_polonies * 100, 2)
+        return (run_inner, total_polonies)
+
+    def _merge_manifest_index_sequences(
+        self,
+        sample_to_index_assignment: Dict[str, Any],
+        run_manifest_data: Dict[str, Any],
+        run_analysis_name: str,
+    ) -> None:
+        """Merge Index1/Index2 from RunManifest Samples into sample_to_index_assignment (mutates)."""
+        if "Samples" not in run_manifest_data or run_analysis_name not in sample_to_index_assignment:
+            return
+        run_data = sample_to_index_assignment[run_analysis_name]
+        for sample_data in run_manifest_data["Samples"]:
+            sample_name = sample_data.get("SampleName")
+            if run_analysis_name is None or sample_name is None or "Indexes" not in sample_data:
+                continue
+            sample_id = "__".join([run_analysis_name, sample_name])
+            for index_data in sample_data["Indexes"]:
+                index_1 = index_data.get("Index1", "")
+                index_2 = index_data.get("Index2", "")
+                merged_indices = f"{index_1}{index_2}"
+                if merged_indices not in run_data:
+                    log.error(f"Index assignment information not found for sample {sample_id}. Skipping.")
+                    continue
+                if sample_id != run_data[merged_indices]["SampleID"]:
+                    log.error(
+                        f"RunManifest SampleID <{sample_id}> does not match "
+                        f"RunStats SampleID {run_data[merged_indices]['SampleID']}. Skipping."
+                    )
+                    continue
+                run_data[merged_indices]["Index1"] = index_1
+                run_data[merged_indices]["Index2"] = index_2
+
+    def _parse_run_unassigned_sequences(self, data_source: str) -> Dict[int, Dict[str, Any]]:
         """
         Parse unassigned/unknown barcode sequences from run-level data.
 
@@ -789,7 +862,7 @@ def _parse_run_unassigned_sequences(self, data_source: str) -> Dict[str, Any]:
             RunStats.json -> Lanes -> UnassignedSequences
             -> Extract: sequence, count, percentage of total polonies
         """
-        run_unassigned_sequences = {}
+        run_unassigned_sequences: Dict[int, Dict[str, Any]] = {}
         if data_source == "":
             return run_unassigned_sequences
 
@@ -849,7 +922,7 @@ def _parse_index_assignment(self, manifest_data_source: str) -> Dict[str, Any]:
             + RunManifest.json -> Samples -> index sequences (Index1, Index2)
             -> Combined index assignment table
         """
-        sample_to_index_assignment = {}
+        sample_to_index_assignment: Dict[str, Dict[str, Dict[str, Any]]] = {}
 
         if manifest_data_source == "":
             return sample_to_index_assignment
@@ -865,9 +938,6 @@ def _parse_index_assignment(self, manifest_data_source: str) -> Dict[str, Any]:
             if run_stats is None:
                 continue
 
-            total_polonies = 0
-
-            # Get run name information
             run_analysis_name = self._extract_run_analysis_name(run_stats, source_info=str(run_stats_path))
             if run_analysis_name is None:
                 continue
@@ -877,7 +947,6 @@ def _parse_index_assignment(self, manifest_data_source: str) -> Dict[str, Any]:
                 log.info(f"Skipping <{run_analysis_name}> because it is present in ignore list.")
                 continue
 
-            # Ensure sample stats are present
             if "SampleStats" not in run_stats:
                 log.error(
                     f"Error, missing SampleStats in RunStats.json. Skipping index assignment metrics.\n"
@@ -887,43 +956,8 @@ def _parse_index_assignment(self, manifest_data_source: str) -> Dict[str, Any]:
                 )
                 continue
 
-            # Extract per sample polony counts and overall total counts
-            total_polonies = run_stats.get("NumPoloniesBeforeTrimming", 0)
-            for sample_data in run_stats["SampleStats"]:
-                sample_name = sample_data.get("SampleName")
-                sample_id = None
-                if run_analysis_name and sample_name:
-                    sample_id = "__".join([run_analysis_name, sample_name])
-
-                if "Occurrences" not in sample_data:
-                    log.error(f"Missing data needed to extract index assignment for sample {sample_id}. Skipping.")
-                    continue
-
-                for occurrence in sample_data["Occurrences"]:
-                    sample_expected_seq = occurrence.get("ExpectedSequence")
-                    sample_counts = occurrence.get("NumPoloniesBeforeTrimming")
-                    if any([element is None for element in [sample_expected_seq, sample_counts, sample_id]]):
-                        log.error(f"Missing data needed to extract index assignment for sample {sample_id}. Skipping.")
-                        continue
-                    if run_analysis_name not in sample_to_index_assignment:
-                        sample_to_index_assignment[run_analysis_name] = {}
-                    if sample_expected_seq not in sample_to_index_assignment[run_analysis_name]:
-                        sample_to_index_assignment[run_analysis_name][sample_expected_seq] = {
-                            "SampleID": sample_id,
-                            "SamplePolonyCounts": 0,
-                            "PercentOfPolonies": float("nan"),
-                            "Index1": "",
-                            "Index2": "",
-                        }
-                    sample_to_index_assignment[run_analysis_name][sample_expected_seq]["SamplePolonyCounts"] += (
-                        sample_counts
-                    )
-
-            for sample_data in sample_to_index_assignment[run_analysis_name].values():
-                if total_polonies > 0:
-                    sample_data["PercentOfPolonies"] = round(
-                        sample_data["SamplePolonyCounts"] / total_polonies * 100, 2
-                    )
+            run_inner, _ = self._build_index_assignment_from_stats(run_stats, run_analysis_name)
+            sample_to_index_assignment[run_analysis_name] = run_inner
 
             run_manifest = json.loads(f["f"])
             if "Samples" not in run_manifest:
@@ -934,28 +968,7 @@ def _parse_index_assignment(self, manifest_data_source: str) -> Dict[str, Any]:
             elif len(sample_to_index_assignment) == 0:
                 log.warning("Index assignment data missing. Skipping creation of index assignment metrics.")
             else:
-                for sample_data in run_manifest["Samples"]:
-                    sample_name = sample_data.get("SampleName")
-                    sample_id = None
-                    if run_analysis_name is None or sample_name is None or "Indexes" not in sample_data:
-                        continue
-                    sample_id = "__".join([run_analysis_name, sample_name])
-                    for index_data in sample_data["Indexes"]:
-                        index_1 = index_data.get("Index1", "")
-                        index_2 = index_data.get("Index2", "")
-                        merged_indices = f"{index_1}{index_2}"
-                        if merged_indices not in sample_to_index_assignment[run_analysis_name]:
-                            log.error(f"Index assignment information not found for sample {sample_id}. Skipping.")
-                            continue
-                        if sample_id != sample_to_index_assignment[run_analysis_name][merged_indices]["SampleID"]:
-                            log.error(
-                                f"RunManifest SampleID <{sample_id}> does not match "
-                                f"RunStats SampleID {sample_to_index_assignment[merged_indices]['SampleID']}."
-                                "Skipping."
-                            )
-                            continue
-                        sample_to_index_assignment[run_analysis_name][merged_indices]["Index1"] = index_1
-                        sample_to_index_assignment[run_analysis_name][merged_indices]["Index2"] = index_2
+                self._merge_manifest_index_sequences(sample_to_index_assignment, run_manifest, run_analysis_name)
 
         return sample_to_index_assignment
 
@@ -971,7 +984,7 @@ def _parse_index_assignment_in_project(self, data_source: str) -> Dict[str, Any]
             + ../../RunManifest.json -> Samples -> index sequences
             -> Combined index assignment table
         """
-        sample_to_index_assignment = {}
+        sample_to_index_assignment: Dict[str, Dict[str, Dict[str, Any]]] = {}
 
         if data_source == "":
             return sample_to_index_assignment
@@ -999,7 +1012,6 @@ def _parse_index_assignment_in_project(self, data_source: str) -> Dict[str, Any]
                 log.info(f"Skipping <{run_analysis_name}> because it is present in ignore list.")
                 continue
 
-            # Ensure sample stats are present
             if "SampleStats" not in project_stats:
                 log.error(
                     f"Error, missing SampleStats in RunStats.json. Skipping index assignment metrics.\n"
@@ -1009,45 +1021,10 @@ def _parse_index_assignment_in_project(self, data_source: str) -> Dict[str, Any]
                 )
                 continue
 
-            # Extract per sample polony counts and overall total counts
-            total_polonies = project_stats.get("NumPoloniesBeforeTrimming", 0)
-            for sample_data in project_stats["SampleStats"]:
-                sample_name = sample_data.get("SampleName")
-                sample_id = None
-
-                if run_analysis_name and sample_name:
-                    sample_id = "__".join([run_analysis_name, sample_name])
-
-                if "Occurrences" not in sample_data:
-                    log.error(f"Missing data needed to extract index assignment for sample {sample_id}. Skipping.")
-                    continue
-
-                for occurrence in sample_data["Occurrences"]:
-                    sample_expected_seq = occurrence.get("ExpectedSequence")
-                    sample_counts = occurrence.get("NumPoloniesBeforeTrimming")
-                    if any([element is None for element in [sample_expected_seq, sample_counts, sample_id]]):
-                        log.error(f"Missing data needed to extract index assignment for sample {sample_id}. Skipping.")
-                        continue
-                    if run_analysis_name not in sample_to_index_assignment:
-                        sample_to_index_assignment[run_analysis_name] = {}
-                    if sample_expected_seq not in sample_to_index_assignment[run_analysis_name]:
-                        sample_to_index_assignment[run_analysis_name][sample_expected_seq] = {
-                            "SampleID": sample_id,
-                            "Project": project,
-                            "SamplePolonyCounts": 0,
-                            "PercentOfPolonies": float("nan"),
-                            "Index1": "",
-                            "Index2": "",
-                        }
-                    sample_to_index_assignment[run_analysis_name][sample_expected_seq]["SamplePolonyCounts"] += (
-                        sample_counts
-                    )
-
-            for sample_data in sample_to_index_assignment[run_analysis_name].values():
-                if total_polonies > 0:
-                    sample_data["PercentOfPolonies"] = round(
-                        sample_data["SamplePolonyCounts"] / total_polonies * 100, 2
-                    )
+            run_inner, _ = self._build_index_assignment_from_stats(
+                project_stats, run_analysis_name, project=project
+            )
+            sample_to_index_assignment[run_analysis_name] = run_inner
 
             run_manifest_data = self._read_json_file(run_manifest, base_directory=base_directory)
             if run_manifest_data is None:
@@ -1061,31 +1038,13 @@ def _parse_index_assignment_in_project(self, data_source: str) -> Dict[str, Any]
             elif len(sample_to_index_assignment) == 0:
                 log.warning("Index assignment data missing. Skipping creation of index assignment metrics.")
             else:
-                for sample_data in run_manifest_data["Samples"]:
-                    sample_name = sample_data.get("SampleName")
-                    sample_id = None
-                    if run_analysis_name is None or sample_name is None or "Indexes" not in sample_data:
-                        continue
-                    sample_id = "__".join([run_analysis_name, sample_name])
-                    for index_data in sample_data["Indexes"]:
-                        index_1 = index_data.get("Index1", "")
-                        index_2 = index_data.get("Index2", "")
-                        merged_indices = f"{index_1}{index_2}"
-                        if merged_indices not in sample_to_index_assignment[run_analysis_name]:
-                            continue
-                        if sample_id != sample_to_index_assignment[run_analysis_name][merged_indices]["SampleID"]:
-                            log.error(
-                                f"RunManifest SampleID <{sample_id}> does not match "
-                                f"RunStats SampleID {sample_to_index_assignment[merged_indices]['SampleID']}."
-                                "Skipping."
-                            )
-                            continue
-                        sample_to_index_assignment[run_analysis_name][merged_indices]["Index1"] = index_1
-                        sample_to_index_assignment[run_analysis_name][merged_indices]["Index2"] = index_2
+                self._merge_manifest_index_sequences(
+                    sample_to_index_assignment, run_manifest_data, run_analysis_name
+                )
 
         return sample_to_index_assignment
 
-    def add_run_plots(self, data: Dict[str, Any], plot_functions: List[Callable]) -> None:
+    def add_run_plots(self, data: Dict[Any, Any], plot_functions: List[Callable]) -> None:
         for func in plot_functions:
             plot_html, plot_name, anchor, description, helptext, plot_data = func(data, self.run_color)
             self.add_section(name=plot_name, plot=plot_html, anchor=anchor, description=description, helptext=helptext)
diff --git a/multiqc/modules/bases2fastq/plot_runs.py b/multiqc/modules/bases2fastq/plot_runs.py
index 87151b3baa..22900ee625 100644
--- a/multiqc/modules/bases2fastq/plot_runs.py
+++ b/multiqc/modules/bases2fastq/plot_runs.py
@@ -1,26 +1,16 @@
 import math
+from typing import Any, Dict, cast
 
 from multiqc.plots import bargraph, linegraph, table
-from multiqc import config
+from multiqc.plots.table_object import ColumnDict, SectionT
 from natsort import natsorted
-import random
-import string
-
-"""
-Functions for plotting per run information of bases2fastq
-"""
-
-
-def generate_random_string(length: int):
-    return "".join(random.choices(string.ascii_letters + string.digits, k=length))
 
 
 def plot_run_stats(run_data, color_dict):
     """
     Plot a bar graph for polony numbers, Q30/Q40, index assignment rate and yields for each run
     """
-    run_names = list(run_data.keys())
-    run_names.sort()
+    run_names = natsorted(run_data.keys())
     num_polonies = dict()
     yields = dict()
     for run in run_names:
@@ -64,7 +54,7 @@ def plot_run_stats(run_data, color_dict):
         ],
         "cpswitch": True,
         "stacking": "normal",
-        "id": f"run_metrics_bar_{generate_random_string(10)}",
+        "id": "bases2fastq_run_metrics_bar",
         "title": "bases2fastq: General Sequencing Run QC metrics plot",
         "ylab": "QC",
     }
@@ -119,7 +109,7 @@ def tabulate_project_stats(run_data, color_dict):
     plot_content = dict()
     is_percent_q50_present = False
     reads_present = []
-    for s_name in run_data.keys():
+    for s_name in natsorted(run_data.keys()):
         project = run_data[s_name]["Project"]
         run_project_name = f"{s_name} | {project}"
         run_stats = dict()
@@ -210,7 +200,7 @@ def tabulate_project_stats(run_data, color_dict):
     pconfig = {
         "title": "bases2fastq: General Sequencing (Project) QC metrics",
         "col1_header": "Run Name",
-        "id": f"project_run_metrics_table_{generate_random_string(5)}",
+        "id": "bases2fastq_project_run_metrics_table",
         "ylab": "QC",
     }
 
@@ -222,7 +212,7 @@ def tabulate_project_stats(run_data, color_dict):
         first_key = run_keys[0]
         project_header = f"{run_data[first_key]['Project']} | "
     plot_name = f"{project_header}Sequencing QC Metrics Table"
-    plot_html = table.plot(plot_content, headers, pconfig=pconfig)
+    plot_html = table.plot(plot_content, cast(Dict[Any, ColumnDict], headers), pconfig=pconfig)
     anchor = "project_run_qc_metrics_table"
     description = "QC metrics per run, per project"
     helptext = """
@@ -246,7 +236,7 @@ def tabulate_run_stats(run_data, color_dict):
     plot_content = dict()
     is_percent_q50_present = False
     reads_present = []
-    for s_name in run_data.keys():
+    for s_name in natsorted(run_data.keys()):
         run_stats = dict()
         run_stats.update({"num_polonies_run": int(run_data[s_name]["NumPolonies"])})
         run_stats.update({"percent_assigned_run": run_data[s_name].get("PercentAssignedReads", 100.0)})
@@ -274,7 +264,7 @@ def tabulate_run_stats(run_data, color_dict):
     headers = {}
     headers["num_polonies_run"] = {
         "title": "# Polonies",
-        "description": "The total number of polonies that are calculated for the run.)",
+        "description": "The total number of polonies that are calculated for the run.",
         "min": 0,
         "scale": "RdYlGn",
     }
@@ -344,12 +334,12 @@ def tabulate_run_stats(run_data, color_dict):
     pconfig = {
         "title": "Bases2Fastq: General Sequencing Run QC metrics",
         "col1_header": "Run Name",
-        "id": f"run_metrics_table_{generate_random_string(5)}",
+        "id": "bases2fastq_run_metrics_table",
         "ylab": "QC",
     }
 
     plot_name = "Sequencing Run QC Metrics Table"
-    plot_html = table.plot(plot_content, headers, pconfig=pconfig)
+    plot_html = table.plot(plot_content, cast(Dict[Any, ColumnDict], headers), pconfig=pconfig)
     anchor = "run_qc_metrics_table"
     description = "QC metrics per run"
     helptext = """
@@ -372,7 +362,7 @@ def tabulate_manifest_stats(run_data, color_dict):
     Tabulate general information and statistics of each run
     """
     plot_content = dict()
-    for s_name in run_data.keys():
+    for s_name in natsorted(run_data.keys()):
         run_stats = dict()
         run_stats.update({"indexing": run_data[s_name]["Indexing"]})
         run_stats.update({"adapter_trim_type": run_data[s_name]["AdapterTrimType"]})
@@ -397,18 +387,18 @@ def tabulate_manifest_stats(run_data, color_dict):
     }
     headers["min_read_length_r2"] = {
         "title": "Minimum Read Length R2",
-        "description": "Minimum read length for read R1 (if applicable).",
+        "description": "Minimum read length for read R2 (if applicable).",
         "scale": "RdYlGn",
     }
 
     pconfig = {
         "title": "Bases2Fastq: Run Manifest Metrics",
         "col1_header": "Run Name | Lane",
-        "id": f"run_manifest_metrics_table_{generate_random_string(5)}",
+        "id": "bases2fastq_run_manifest_metrics_table",
     }
 
     plot_name = "Run Manifest Table"
-    plot_html = table.plot(plot_content, headers, pconfig=pconfig)
+    plot_html = table.plot(plot_content, cast(Dict[Any, ColumnDict], headers), pconfig=pconfig)
     anchor = "run_manifest_metrics_table"
     description = "Run parameters used."
     helptext = """
@@ -426,7 +416,7 @@ def tabulate_index_assignment_stats(run_data, color_dict):
     Tabulate general information and statistics of each run
     """
     plot_content = dict()
-    run_names = sorted(run_data.keys())
+    run_names = natsorted(run_data.keys())
     index = 1
     project_present = False
     for run in run_names:
@@ -447,7 +437,7 @@ def tabulate_index_assignment_stats(run_data, color_dict):
             plot_content.update({index: sample_index_stats})
             index += 1
 
-    headers = {}
+    headers: Dict[str, Any] = {}
     headers["run_name"] = {
         "title": "Run Name",
         "description": "Run Name.",
@@ -477,8 +467,8 @@ def tabulate_index_assignment_stats(run_data, color_dict):
     headers["polony_percentage"] = {
         "title": "Polony %",
         "description": "Percentage of total polonies assigned to this index combination.",
-        "max": 100,
-        "min": 0,
+        "max": 100.0,
+        "min": 0.0,
         "scale": "RdYlGn",
         "suffix": "%",
     }
@@ -486,11 +476,11 @@ def tabulate_index_assignment_stats(run_data, color_dict):
     pconfig = {
         "title": "Bases2Fastq: Index Assignment Metrics",
         "col1_header": "Sample #",
-        "id": f"index_assignment_metrics_{generate_random_string(5)}",
+        "id": "bases2fastq_index_assignment_metrics",
     }
 
     plot_name = "Index Assignment Metrics"
-    plot_html = table.plot(plot_content, headers, pconfig=pconfig)
+    plot_html = table.plot(cast(SectionT, plot_content), cast(Dict[Any, ColumnDict], headers), pconfig=pconfig)
     anchor = "index_assignment_metrics"
     description = "Index assignment metrics."
     helptext = """
@@ -516,7 +506,7 @@ def tabulate_unassigned_index_stats(run_data, color_dict):
         - Polonies
         - % Polonies
     """
-    headers = {}
+    headers: Dict[str, Any] = {}
     headers["Run Name"] = {
         "title": "Run Name",
         "description": "Run Name (Run ID + Analysis ID).",
@@ -541,8 +531,8 @@ def tabulate_unassigned_index_stats(run_data, color_dict):
     headers["% Polonies"] = {
         "title": "% Polonies",
         "description": "Percentage of total polonies assigned to this index combination.",
-        "max": 100,
-        "min": 0,
+        "max": 100.0,
+        "min": 0.0,
         "scale": "RdYlGn-rev",
         "suffix": "%",
     }
@@ -550,11 +540,11 @@ def tabulate_unassigned_index_stats(run_data, color_dict):
     pconfig = {
         "title": "Bases2Fastq: Unassigned Indices Metrics",
         "col1_header": "Index #",
-        "id": f"index_unassignment_metrics_{generate_random_string(5)}",
+        "id": "bases2fastq_index_unassignment_metrics",
     }
 
     plot_name = "Unassigned Indices Metrics"
-    plot_html = table.plot(run_data, headers, pconfig=pconfig)
+    plot_html = table.plot(cast(SectionT, run_data), cast(Dict[Any, ColumnDict], headers), pconfig=pconfig)
     anchor = "index_unassignment_metrics"
     description = "Index unassignment metrics."
     helptext = """
@@ -571,8 +561,8 @@ def tabulate_unassigned_index_stats(run_data, color_dict):
 
 def plot_base_quality_hist(run_data, color_dict):
     # Prepare plot data for per base BQ histogram
-    bq_hist_dict = dict()
-    for s_name in run_data.keys():
+    bq_hist_dict: Dict[str, Dict[int, float]] = {}
+    for s_name in natsorted(run_data.keys()):
         paired_end = True if len(run_data[s_name]["Reads"]) > 1 else False
         R1_base_quality_counts = run_data[s_name]["Reads"][0]["QualityScoreHistogram"]
         R2_base_quality_counts = [0] * len(R1_base_quality_counts)
@@ -585,8 +575,8 @@ def plot_base_quality_hist(run_data, color_dict):
             bq_hist_dict[s_name].update({quality: R1R2_base_quality_counts[quality] / total_bases * 100})
 
     # Prepare plot data for per read average BQ histogram
-    per_read_quality_hist_dict = dict()
-    for s_name in run_data.keys():
+    per_read_quality_hist_dict: Dict[str, Dict[int, float]] = {}
+    for s_name in natsorted(run_data.keys()):
         paired_end = True if len(run_data[s_name]["Reads"]) > 1 else False
         R1_quality_counts = run_data[s_name]["Reads"][0]["PerReadMeanQualityScoreHistogram"]
         R2_quality_counts = [0] * len(R1_quality_counts)
@@ -622,7 +612,7 @@ def plot_base_quality_hist(run_data, color_dict):
                 "colors": color_dict,
             },
         ],
-        "id": f"per_run_bq_hist_{generate_random_string(5)}",
+        "id": "bases2fastq_per_run_bq_hist",
         "title": "bases2fastq: Quality Histograms",
         "ylab": "Percentage",
     }
@@ -633,8 +623,8 @@ def plot_base_quality_hist(run_data, color_dict):
     helptext = """
     Run base qualities histogram, summarised by bases and reads. 
     Use tabs to switch between the views:\n
-       - Quality Per Base: distribution of base qualities.\n
-       - Quality Per Read: distribution of read qualities.\n
+        - Quality Per Base: distribution of base qualities.\n
+        - Quality Per Read: distribution of read qualities.\n
     \n
     _The y-axis on the graph shows the quality scores. The higher the score, the better
     the base call. The background of the graph divides the y-axis into very good quality
@@ -650,16 +640,14 @@ def plot_base_quality_by_cycle(run_data, color_dict):
     # Prepare plot data for median BQ of each cycle
 
     r1r2_split = 0
-    for s_name in run_data.keys():
-        paired_end = True if len(run_data[s_name]["Reads"]) > 1 else False
-        cycle_dict = dict()
+    for s_name in natsorted(run_data.keys()):
         R1CycleNum = len(run_data[s_name]["Reads"][0]["Cycles"])
         r1r2_split = max(r1r2_split, R1CycleNum)
 
-    median_dict = {}
-    for s_name in run_data.keys():
+    median_dict: Dict[str, Dict[int, float]] = {}
+    for s_name in natsorted(run_data.keys()):
         paired_end = True if len(run_data[s_name]["Reads"]) > 1 else False
-        cycle_dict = dict()
+        cycle_dict: Dict[int, float] = {}
         R1CycleNum = len(run_data[s_name]["Reads"][0]["Cycles"])
         for cycle in run_data[s_name]["Reads"][0]["Cycles"]:
             cycle_no = int(cycle["Cycle"])
@@ -671,11 +659,11 @@ def plot_base_quality_by_cycle(run_data, color_dict):
         median_dict.update({s_name: cycle_dict})
 
     # Prepare plot data for mean BQ of each cycle
-    mean_dict = {}
-    for s_name in run_data.keys():
+    mean_dict: Dict[str, Dict[int, float]] = {}
+    for s_name in natsorted(run_data.keys()):
         paired_end = True if len(run_data[s_name]["Reads"]) > 1 else False
         # Update each sample cycle info
-        cycle_dict = dict()
+        cycle_dict = {}
         for cycle in run_data[s_name]["Reads"][0]["Cycles"]:
             cycle_no = int(cycle["Cycle"])
             cycle_dict.update({cycle_no: cycle["QualityScoreMean"]})
@@ -687,7 +675,7 @@ def plot_base_quality_by_cycle(run_data, color_dict):
 
     # Prepare plot data for %Q30 of each cycle
     Q30_dict = {}
-    for s_name in run_data.keys():
+    for s_name in natsorted(run_data.keys()):
         paired_end = True if len(run_data[s_name]["Reads"]) > 1 else False
         # Update each sample cycle info
         cycle_dict = dict()
@@ -702,7 +690,7 @@ def plot_base_quality_by_cycle(run_data, color_dict):
 
     # Prepare plot data for %Q40 of each cycle
     Q40_dict = {}
-    for s_name in run_data.keys():
+    for s_name in natsorted(run_data.keys()):
         paired_end = True if len(run_data[s_name]["Reads"]) > 1 else False
         cycle_dict = dict()
         for cycle in run_data[s_name]["Reads"][0]["Cycles"]:
@@ -717,7 +705,7 @@ def plot_base_quality_by_cycle(run_data, color_dict):
     # Prepare plot data for %Q50 of each cycle
     Q50_dict = {}
     percent_q50_values = set()
-    for s_name in run_data.keys():
+    for s_name in natsorted(run_data.keys()):
         paired_end = True if len(run_data[s_name]["Reads"]) > 1 else False
         cycle_dict = dict()
         for cycle in run_data[s_name]["Reads"][0]["Cycles"]:
@@ -741,7 +729,7 @@ def plot_base_quality_by_cycle(run_data, color_dict):
 
     # Prepare plot data for % base calls below PF threshold
     below_pf_dict = {}
-    for s_name in run_data.keys():
+    for s_name in natsorted(run_data.keys()):
         paired_end = True if len(run_data[s_name]["Reads"]) > 1 else False
         cycle_dict = dict()
         R1CycleNum = len(run_data[s_name]["Reads"][0]["Cycles"])
@@ -769,7 +757,7 @@ def plot_base_quality_by_cycle(run_data, color_dict):
         "x_lines": [{"color": "#FF0000", "width": 2, "value": r1r2_split, "dashStyle": "dash"}],
         "colors": color_dict,
         "ymin": 0,
-        "id": f"per_run_quality_by_cycle_{generate_random_string(5)}",
+        "id": "bases2fastq_per_run_quality_by_cycle",
         "title": "bases2fastq: Quality by cycles",
         "ylab": "QC",
     }
diff --git a/multiqc/modules/bases2fastq/plot_samples.py b/multiqc/modules/bases2fastq/plot_samples.py
index ebaab9b166..c7e859edce 100644
--- a/multiqc/modules/bases2fastq/plot_samples.py
+++ b/multiqc/modules/bases2fastq/plot_samples.py
@@ -1,13 +1,10 @@
-from typing import Any, Dict
-from multiqc.plots import bargraph, linegraph, table
-from multiqc import config
-from .plot_runs import generate_random_string
+from typing import Any, Dict, cast
 
-import numpy as np
+from natsort import natsorted
 
-"""
-Functions for plotting per sample information of bases2fastq
-"""
+from multiqc.plots import bargraph, linegraph, table
+from multiqc.plots.table_object import ColumnDict
+from multiqc import config
 
 
 def _calculate_sample_reads_eliminated(run_data) -> int:
@@ -39,7 +36,7 @@ def tabulate_sample_stats(sample_data, group_lookup_dict, project_lookup_dict, s
     plot_content = dict()
     reads_present = set()
     is_percent_q50_present = False
-    for s_name in sample_data.keys():
+    for s_name in natsorted(sample_data.keys()):
         general_stats = dict()
         general_stats.update({"group": group_lookup_dict[s_name]})
         general_stats.update({"project": project_lookup_dict.get(s_name, "")})
@@ -141,13 +138,13 @@ def tabulate_sample_stats(sample_data, group_lookup_dict, project_lookup_dict, s
     }
 
     pconfig = {
-        "id": f"sample_qc_metric_table_{generate_random_string(5)}",
+        "id": "bases2fastq_sample_qc_metric_table",
         "title": "Sample QC Metrics Table",
         "no_violin": False,
     }
 
     plot_name = "Sample QC Metrics Table"
-    plot_html = table.plot(plot_content, headers, pconfig=pconfig)
+    plot_html = table.plot(plot_content, cast(Dict[Any, ColumnDict], headers), pconfig=pconfig)
     anchor = "sample_qc_metrics_table"
     description = "QC metrics per unique sample"
     helptext = """
@@ -171,10 +168,10 @@ def sequence_content_plot(sample_data, group_lookup_dict, project_lookup_dict, c
     """Create the epic HTML for the FastQC sequence content heatmap"""
 
     # Prep the data
-    data = dict()
+    data: Dict[str, Dict[int, Any]] = {}
 
     r1r2_split = 0
-    for s_name in sorted(sample_data.keys()):
+    for s_name in natsorted(sample_data.keys()):
         paired_end = True if len(sample_data[s_name]["Reads"]) > 1 else False
         for base in "ACTG":
             base_s_name = "__".join([s_name, base])
@@ -182,7 +179,7 @@ def sequence_content_plot(sample_data, group_lookup_dict, project_lookup_dict, c
             R1 = sample_data[s_name]["Reads"][0]["Cycles"]
             r1r2_split = max(r1r2_split, len(R1))
 
-    for s_name in sorted(sample_data.keys()):
+    for s_name in natsorted(sample_data.keys()):
         paired_end = True if len(sample_data[s_name]["Reads"]) > 1 else False
         R1 = sample_data[s_name]["Reads"][0]["Cycles"]
         for cycle in range(len(R1)):
@@ -216,7 +213,7 @@ def sequence_content_plot(sample_data, group_lookup_dict, project_lookup_dict, c
         "x_lines": [{"color": "#FF0000", "width": 2, "value": r1r2_split, "dashStyle": "dash"}],
         "colors": color_dict,
         "ymin": 0,
-        "id": f"per_cycle_base_content_{generate_random_string(5)}",
+        "id": "bases2fastq_per_cycle_base_content",
         "title": "bases2fastq: Per Cycle Base Content Percentage",
     }
     plot_html = linegraph.plot(plot_content, pconfig=pconfig)
@@ -239,15 +236,15 @@ def sequence_content_plot(sample_data, group_lookup_dict, project_lookup_dict, c
 
 
 def plot_per_cycle_N_content(sample_data, group_lookup_dict, project_lookup_dict, color_dict):
-    data = dict()
+    data: Dict[str, Dict[int, float]] = {}
     r1r2_split = 0
-    for s_name in sorted(sample_data.keys()):
+    for s_name in natsorted(sample_data.keys()):
         data[s_name] = {}
         R1 = sample_data[s_name]["Reads"][0]["Cycles"]
         R1_cycle_num = len(R1)
         r1r2_split = max(r1r2_split, R1_cycle_num)
 
-    for s_name in sorted(sample_data.keys()):
+    for s_name in natsorted(sample_data.keys()):
         paired_end = True if len(sample_data[s_name]["Reads"]) > 1 else False
         R1 = sample_data[s_name]["Reads"][0]["Cycles"]
         R1_cycle_num = len(R1)
@@ -283,11 +280,11 @@ def plot_per_cycle_N_content(sample_data, group_lookup_dict, project_lookup_dict
         "colors": color_dict,
         "ymin": 0,
         "ymax": 100,
-        "id": f"per_cycle_n_content_{generate_random_string(5)}",
+        "id": "bases2fastq_per_cycle_n_content",
         "title": "bases2fastq: Per Cycle N Content Percentage",
     }
     plot_html = linegraph.plot(plot_content, pconfig=pconfig)
-    plot_name = "Per Cycle N Content."
+    plot_name = "Per Cycle N Content"
     anchor = "n_content"
     description = """
     Percentage of unidentified bases ("N" bases) by each sequencing cycle.
@@ -310,8 +307,8 @@ def plot_per_read_gc_hist(sample_data, group_lookup_dict, project_lookup_dict, s
     """
     Plot GC Histogram per Sample
     """
-    gc_hist_dict = dict()
-    for s_name in sample_data.keys():
+    gc_hist_dict: Dict[str, Dict[float, float]] = {}
+    for s_name in natsorted(sample_data.keys()):
         R1_gc_counts = sample_data[s_name]["Reads"][0]["PerReadGCCountHistogram"]
         R2_gc_counts = [0] * len(R1_gc_counts)
         if len(sample_data[s_name]["Reads"]) > 1:
@@ -340,7 +337,7 @@ def plot_per_read_gc_hist(sample_data, group_lookup_dict, project_lookup_dict, s
         "xlab": "GC Content (%)",
         "ylab": "Percentage of reads that have GC (%)",
         "colors": sample_color,
-        "id": f"gc_hist_{generate_random_string(5)}",
+        "id": "bases2fastq_gc_hist",
         "title": "bases2fastq: Per Sample GC Content Histogram",
     }
     plot_name = "Per Sample GC Histogram"
@@ -370,17 +367,17 @@ def plot_adapter_content(sample_data, group_lookup_dict, project_lookup_dict, sa
     """
     Plot Adapter Content per Sample
     """
-    plot_content = dict()
+    plot_content: Dict[str, Dict[int, float]] = {}
 
     r1r2_split = 0
-    for s_name in sample_data.keys():
+    for s_name in natsorted(sample_data.keys()):
         plot_content.update({s_name: {}})
         # Read 1
         cycles = sample_data[s_name]["Reads"][0]["Cycles"]
         R1_cycle_num = len(cycles)
         r1r2_split = max(r1r2_split, R1_cycle_num)
 
-    for s_name in sample_data.keys():
+    for s_name in natsorted(sample_data.keys()):
         paired_end = True if len(sample_data[s_name]["Reads"]) > 1 else False
         plot_content.update({s_name: {}})
         # Read 1
@@ -397,7 +394,7 @@ def plot_adapter_content(sample_data, group_lookup_dict, project_lookup_dict, sa
                 adapter_percent = cycle["PercentReadsTrimmed"]
                 plot_content[s_name].update({cycle_no: adapter_percent})
     pconfig = {
-        "id": f"per_cycle_adapter_content_{generate_random_string(5)}",
+        "id": "bases2fastq_per_cycle_adapter_content",
         "title": "bases2fastq: Per Cycle Adapter Content",
         "xlab": "Cycle",
         "ylab": "% of Sequences",
diff --git a/multiqc/modules/cells2stats/cells2stats_bar_plots.py b/multiqc/modules/cells2stats/cells2stats_bar_plots.py
index 75eeda73e0..4e5c22aed2 100644
--- a/multiqc/modules/cells2stats/cells2stats_bar_plots.py
+++ b/multiqc/modules/cells2stats/cells2stats_bar_plots.py
@@ -99,6 +99,7 @@ def plot_cell_assignment(c2s_run_data):
     cats = [{"total_density": {"name": "Total Density"}}, cat, {"total_count": {"name": "Total Counts"}}, cat, cat]
 
     plot_name = "Barcoding Cell Assignment Metrics"
+    # Check if any dictionary is empty (len(dict) == 0) inside the plot_content list
     plot_html = (
         bargraph.plot(plot_content, cats, pconfig=pconfig) if min([len(el) for el in plot_content]) > 0 else None
     )

From 0e160285b046c5dc53d25ad30c4a4b49c4fc7598 Mon Sep 17 00:00:00 2001
From: Carlos Ruiz <carlos.ruiz@elembio.com>
Date: Sat, 21 Feb 2026 12:09:23 -0800
Subject: [PATCH 2/6] Added additional fixture-based unit tests.

---
 multiqc/modules/bases2fastq/bases2fastq.py    |  10 +-
 multiqc/modules/bases2fastq/plot_runs.py      |  61 ++-
 multiqc/modules/bases2fastq/plot_samples.py   |  95 +++-
 multiqc/modules/bases2fastq/tests/conftest.py |  25 +
 .../PairedEndDefaultProject/RunManifest.json  |  22 +
 .../PairedEndDefaultProject/RunStats.json     |  18 +
 .../DefaultProject_RunStats.json              |  26 +
 .../fixtures/PairedEndNoProject/RunStats.json |  18 +
 .../RunStats.json                             |  23 +
 .../PairedEndNoProjectWithLanes/RunStats.json |  31 ++
 .../tests/fixtures/project_runstats.json      |  24 +
 .../tests/fixtures/run_manifest.json          |  10 +
 .../fixtures/run_manifest_with_samples.json   |  21 +
 .../tests/fixtures/run_runstats.json          |  18 +
 .../fixtures/run_runstats_low_polonies.json   |  21 +
 .../run_runstats_with_occurrences.json        |  23 +
 .../run_runstats_with_unassigned.json         |  29 +
 .../bases2fastq/tests/test_bases2fastq.py     | 507 ++++++++++++++++++
 18 files changed, 952 insertions(+), 30 deletions(-)
 create mode 100644 multiqc/modules/bases2fastq/tests/conftest.py
 create mode 100644 multiqc/modules/bases2fastq/tests/fixtures/PairedEndDefaultProject/RunManifest.json
 create mode 100644 multiqc/modules/bases2fastq/tests/fixtures/PairedEndDefaultProject/RunStats.json
 create mode 100644 multiqc/modules/bases2fastq/tests/fixtures/PairedEndDefaultProject/Samples/DefaultProject/DefaultProject_RunStats.json
 create mode 100644 multiqc/modules/bases2fastq/tests/fixtures/PairedEndNoProject/RunStats.json
 create mode 100644 multiqc/modules/bases2fastq/tests/fixtures/PairedEndNoProjectLowPolonies/RunStats.json
 create mode 100644 multiqc/modules/bases2fastq/tests/fixtures/PairedEndNoProjectWithLanes/RunStats.json
 create mode 100644 multiqc/modules/bases2fastq/tests/fixtures/project_runstats.json
 create mode 100644 multiqc/modules/bases2fastq/tests/fixtures/run_manifest.json
 create mode 100644 multiqc/modules/bases2fastq/tests/fixtures/run_manifest_with_samples.json
 create mode 100644 multiqc/modules/bases2fastq/tests/fixtures/run_runstats.json
 create mode 100644 multiqc/modules/bases2fastq/tests/fixtures/run_runstats_low_polonies.json
 create mode 100644 multiqc/modules/bases2fastq/tests/fixtures/run_runstats_with_occurrences.json
 create mode 100644 multiqc/modules/bases2fastq/tests/fixtures/run_runstats_with_unassigned.json
 create mode 100644 multiqc/modules/bases2fastq/tests/test_bases2fastq.py

diff --git a/multiqc/modules/bases2fastq/bases2fastq.py b/multiqc/modules/bases2fastq/bases2fastq.py
index 3f1f212b20..68135b4fb5 100644
--- a/multiqc/modules/bases2fastq/bases2fastq.py
+++ b/multiqc/modules/bases2fastq/bases2fastq.py
@@ -36,7 +36,7 @@
 
 
 # Default minimum polony threshold - samples below this are skipped
-DEFAULT_MIN_POLONIES = 10000
+DEFAULT_MIN_POLONIES = 1000
 
 
 def _get_min_polonies() -> int:
@@ -738,8 +738,12 @@ def _parse_run_manifest_in_project(self, data_source: str) -> Dict[str, Any]:
             if not directory:
                 continue
 
-            # Get RunManifest.json from run output root (two levels up from project directory)
-            base_directory = Path(directory).parent.parent
+            # Get RunManifest.json from run output root (check if it exists in the same directory or try two levels up)
+            base_directory = Path(directory).resolve()
+            if (base_directory / "RunManifest.json").exists():
+                base_directory = base_directory
+            else:
+                base_directory = base_directory.parent.parent
             run_manifest = base_directory / "RunManifest.json"
             project_stats = json.loads(f["f"])
             run_analysis_name = self._extract_run_analysis_name(
diff --git a/multiqc/modules/bases2fastq/plot_runs.py b/multiqc/modules/bases2fastq/plot_runs.py
index 22900ee625..d1c2e3e524 100644
--- a/multiqc/modules/bases2fastq/plot_runs.py
+++ b/multiqc/modules/bases2fastq/plot_runs.py
@@ -559,10 +559,23 @@ def tabulate_unassigned_index_stats(run_data, color_dict):
     return plot_html, plot_name, anchor, description, helptext, run_data
 
 
+def _run_has_reads(run_entry: dict) -> bool:
+    """True if run has valid Reads list with at least one read and required keys for run plots."""
+    reads = run_entry.get("Reads")
+    if not reads or not isinstance(reads, list):
+        return False
+    if len(reads) < 1:
+        return False
+    r0 = reads[0]
+    return isinstance(r0, dict) and "QualityScoreHistogram" in r0 and "PerReadMeanQualityScoreHistogram" in r0
+
+
 def plot_base_quality_hist(run_data, color_dict):
-    # Prepare plot data for per base BQ histogram
+    # Prepare plot data for per base BQ histogram (skip runs without Reads)
     bq_hist_dict: Dict[str, Dict[int, float]] = {}
     for s_name in natsorted(run_data.keys()):
+        if not _run_has_reads(run_data[s_name]):
+            continue
         paired_end = True if len(run_data[s_name]["Reads"]) > 1 else False
         R1_base_quality_counts = run_data[s_name]["Reads"][0]["QualityScoreHistogram"]
         R2_base_quality_counts = [0] * len(R1_base_quality_counts)
@@ -577,6 +590,8 @@ def plot_base_quality_hist(run_data, color_dict):
     # Prepare plot data for per read average BQ histogram
     per_read_quality_hist_dict: Dict[str, Dict[int, float]] = {}
     for s_name in natsorted(run_data.keys()):
+        if not _run_has_reads(run_data[s_name]):
+            continue
         paired_end = True if len(run_data[s_name]["Reads"]) > 1 else False
         R1_quality_counts = run_data[s_name]["Reads"][0]["PerReadMeanQualityScoreHistogram"]
         R2_quality_counts = [0] * len(R1_quality_counts)
@@ -637,15 +652,37 @@ def plot_base_quality_hist(run_data, color_dict):
 
 
 def plot_base_quality_by_cycle(run_data, color_dict):
-    # Prepare plot data for median BQ of each cycle
+    # Prepare plot data for median BQ of each cycle (skip runs without Reads/Cycles)
+    runs_with_reads = [
+        s
+        for s in run_data
+        if _run_has_reads(run_data[s])
+        and run_data[s]["Reads"][0].get("Cycles")
+    ]
+    if not runs_with_reads:
+        plot_content: list[Any] = []
+        plot_html = linegraph.plot(
+            plot_content,
+            pconfig={"id": "bases2fastq_run_bq_by_cycle", "title": "bases2fastq: Run Base Quality by Cycle"},
+        )
+        return (
+            plot_html,
+            "Run Base Quality by Cycle",
+            "bq_by_cycle",
+            "Base quality by cycle",
+            "No run data with Reads available.",
+            plot_content,
+        )
 
     r1r2_split = 0
-    for s_name in natsorted(run_data.keys()):
-        R1CycleNum = len(run_data[s_name]["Reads"][0]["Cycles"])
-        r1r2_split = max(r1r2_split, R1CycleNum)
+    for s_name in natsorted(runs_with_reads):
+        read0 = run_data[s_name]["Reads"][0]
+        if read0.get("Cycles"):
+            R1CycleNum = len(read0["Cycles"])
+            r1r2_split = max(r1r2_split, R1CycleNum)
 
     median_dict: Dict[str, Dict[int, float]] = {}
-    for s_name in natsorted(run_data.keys()):
+    for s_name in natsorted(runs_with_reads):
         paired_end = True if len(run_data[s_name]["Reads"]) > 1 else False
         cycle_dict: Dict[int, float] = {}
         R1CycleNum = len(run_data[s_name]["Reads"][0]["Cycles"])
@@ -660,7 +697,7 @@ def plot_base_quality_by_cycle(run_data, color_dict):
 
     # Prepare plot data for mean BQ of each cycle
     mean_dict: Dict[str, Dict[int, float]] = {}
-    for s_name in natsorted(run_data.keys()):
+    for s_name in natsorted(runs_with_reads):
         paired_end = True if len(run_data[s_name]["Reads"]) > 1 else False
         # Update each sample cycle info
         cycle_dict = {}
@@ -675,7 +712,7 @@ def plot_base_quality_by_cycle(run_data, color_dict):
 
     # Prepare plot data for %Q30 of each cycle
     Q30_dict = {}
-    for s_name in natsorted(run_data.keys()):
+    for s_name in natsorted(runs_with_reads):
         paired_end = True if len(run_data[s_name]["Reads"]) > 1 else False
         # Update each sample cycle info
         cycle_dict = dict()
@@ -690,7 +727,7 @@ def plot_base_quality_by_cycle(run_data, color_dict):
 
     # Prepare plot data for %Q40 of each cycle
     Q40_dict = {}
-    for s_name in natsorted(run_data.keys()):
+    for s_name in natsorted(runs_with_reads):
         paired_end = True if len(run_data[s_name]["Reads"]) > 1 else False
         cycle_dict = dict()
         for cycle in run_data[s_name]["Reads"][0]["Cycles"]:
@@ -705,7 +742,7 @@ def plot_base_quality_by_cycle(run_data, color_dict):
     # Prepare plot data for %Q50 of each cycle
     Q50_dict = {}
     percent_q50_values = set()
-    for s_name in natsorted(run_data.keys()):
+    for s_name in natsorted(runs_with_reads):
         paired_end = True if len(run_data[s_name]["Reads"]) > 1 else False
         cycle_dict = dict()
         for cycle in run_data[s_name]["Reads"][0]["Cycles"]:
@@ -729,11 +766,11 @@ def plot_base_quality_by_cycle(run_data, color_dict):
 
     # Prepare plot data for % base calls below PF threshold
     below_pf_dict = {}
-    for s_name in natsorted(run_data.keys()):
+    for s_name in natsorted(runs_with_reads):
         paired_end = True if len(run_data[s_name]["Reads"]) > 1 else False
         cycle_dict = dict()
         R1CycleNum = len(run_data[s_name]["Reads"][0]["Cycles"])
-        if "PercentBelowFilterThreshold" not in run_data[s_name]["Reads"][0]["Cycles"][0]:
+        if not run_data[s_name]["Reads"][0]["Cycles"] or "PercentBelowFilterThreshold" not in run_data[s_name]["Reads"][0]["Cycles"][0]:
             continue
         for cycle in run_data[s_name]["Reads"][0]["Cycles"]:
             cycle_no = int(cycle["Cycle"])
diff --git a/multiqc/modules/bases2fastq/plot_samples.py b/multiqc/modules/bases2fastq/plot_samples.py
index c7e859edce..e7987135bb 100644
--- a/multiqc/modules/bases2fastq/plot_samples.py
+++ b/multiqc/modules/bases2fastq/plot_samples.py
@@ -7,6 +7,15 @@
 from multiqc import config
 
 
+def _sample_has_reads(sample_entry: dict) -> bool:
+    """True if sample has valid Reads list with at least one read and Cycles."""
+    reads = sample_entry.get("Reads")
+    if not reads or not isinstance(reads, list) or len(reads) < 1:
+        return False
+    r0 = reads[0]
+    return bool(isinstance(r0, dict) and r0.get("Cycles"))
+
+
 def _calculate_sample_reads_eliminated(run_data) -> int:
     """
     Calculate the total number of reads eliminated during trimming.
@@ -41,16 +50,16 @@ def tabulate_sample_stats(sample_data, group_lookup_dict, project_lookup_dict, s
         general_stats.update({"group": group_lookup_dict[s_name]})
         general_stats.update({"project": project_lookup_dict.get(s_name, "")})
         general_stats.update({"num_polonies_sample": sample_data[s_name]["NumPolonies"]})
-        general_stats.update({"yield_sample": sample_data[s_name]["Yield"]})
-        general_stats.update({"mean_base_quality_sample": sample_data[s_name]["QualityScoreMean"]})
-        general_stats.update({"percent_q30_sample": sample_data[s_name]["PercentQ30"]})
-        general_stats.update({"percent_q40_sample": sample_data[s_name]["PercentQ40"]})
+        general_stats.update({"yield_sample": sample_data[s_name].get("Yield", 0.0)})
+        general_stats.update({"mean_base_quality_sample": sample_data[s_name].get("QualityScoreMean", 0)})
+        general_stats.update({"percent_q30_sample": sample_data[s_name].get("PercentQ30", 0)})
+        general_stats.update({"percent_q40_sample": sample_data[s_name].get("PercentQ40", 0)})
         percent_q50 = sample_data[s_name].get("PercentQ50")
         if percent_q50 is not None:
             is_percent_q50_present = True
             general_stats.update({"percent_q50_run": percent_q50})
         general_stats.update({"reads_eliminated": _calculate_sample_reads_eliminated(sample_data[s_name])})
-        general_stats.update({"percent_mismatch": sample_data[s_name]["PercentMismatch"]})
+        general_stats.update({"percent_mismatch": sample_data[s_name].get("PercentMismatch", 0)})
         if "Reads" in sample_data[s_name]:
             for read in sample_data[s_name]["Reads"]:
                 read_name = read["Read"]
@@ -166,20 +175,32 @@ def tabulate_sample_stats(sample_data, group_lookup_dict, project_lookup_dict, s
 
 def sequence_content_plot(sample_data, group_lookup_dict, project_lookup_dict, color_dict):
     """Create the epic HTML for the FastQC sequence content heatmap"""
+    samples_with_reads = [s for s in sample_data if _sample_has_reads(sample_data[s])]
+    if not samples_with_reads:
+        empty_data: Dict[str, Dict[int, Any]] = {}
+        plot_html = linegraph.plot(
+            empty_data,
+            pconfig={
+                "id": "bases2fastq_per_cycle_base_content",
+                "title": "bases2fastq: Per Cycle Base Content Percentage",
+                "xlab": "Cycle",
+                "ylab": "Percentage of Total Reads",
+            },
+        )
+        return plot_html, "Per Cycle Base Content", "base_content", "", "", empty_data
 
     # Prep the data
     data: Dict[str, Dict[int, Any]] = {}
 
     r1r2_split = 0
-    for s_name in natsorted(sample_data.keys()):
-        paired_end = True if len(sample_data[s_name]["Reads"]) > 1 else False
+    for s_name in natsorted(samples_with_reads):
         for base in "ACTG":
             base_s_name = "__".join([s_name, base])
             data[base_s_name] = {}
             R1 = sample_data[s_name]["Reads"][0]["Cycles"]
             r1r2_split = max(r1r2_split, len(R1))
 
-    for s_name in natsorted(sample_data.keys()):
+    for s_name in natsorted(samples_with_reads):
         paired_end = True if len(sample_data[s_name]["Reads"]) > 1 else False
         R1 = sample_data[s_name]["Reads"][0]["Cycles"]
         for cycle in range(len(R1)):
@@ -236,15 +257,29 @@ def sequence_content_plot(sample_data, group_lookup_dict, project_lookup_dict, c
 
 
 def plot_per_cycle_N_content(sample_data, group_lookup_dict, project_lookup_dict, color_dict):
+    samples_with_reads = [s for s in sample_data if _sample_has_reads(sample_data[s])]
+    if not samples_with_reads:
+        empty_data: Dict[str, Dict[int, float]] = {}
+        plot_html = linegraph.plot(
+            empty_data,
+            pconfig={
+                "id": "bases2fastq_per_cycle_n_content",
+                "title": "bases2fastq: Per Cycle N Content",
+                "xlab": "Cycle",
+                "ylab": "Percentage of N bases",
+            },
+        )
+        return plot_html, "Per Cycle N Content", "n_content", "", "", empty_data
+
     data: Dict[str, Dict[int, float]] = {}
     r1r2_split = 0
-    for s_name in natsorted(sample_data.keys()):
+    for s_name in natsorted(samples_with_reads):
         data[s_name] = {}
         R1 = sample_data[s_name]["Reads"][0]["Cycles"]
         R1_cycle_num = len(R1)
         r1r2_split = max(r1r2_split, R1_cycle_num)
 
-    for s_name in natsorted(sample_data.keys()):
+    for s_name in natsorted(samples_with_reads):
         paired_end = True if len(sample_data[s_name]["Reads"]) > 1 else False
         R1 = sample_data[s_name]["Reads"][0]["Cycles"]
         R1_cycle_num = len(R1)
@@ -307,9 +342,26 @@ def plot_per_read_gc_hist(sample_data, group_lookup_dict, project_lookup_dict, s
     """
     Plot GC Histogram per Sample
     """
+    samples_with_reads = [s for s in sample_data if _sample_has_reads(sample_data[s])]
+    if not samples_with_reads:
+        empty_gc_hist: Dict[str, Dict[float, float]] = {}
+        plot_html = linegraph.plot(
+            empty_gc_hist,
+            pconfig={
+                "id": "bases2fastq_gc_hist",
+                "title": "bases2fastq: Per Sample GC Content Histogram",
+                "xlab": "GC Content (%)",
+                "ylab": "Percentage of reads that have GC (%)",
+            },
+        )
+        return plot_html, "Per Sample GC Histogram", "gc_histogram", "", "", empty_gc_hist
+
     gc_hist_dict: Dict[str, Dict[float, float]] = {}
-    for s_name in natsorted(sample_data.keys()):
-        R1_gc_counts = sample_data[s_name]["Reads"][0]["PerReadGCCountHistogram"]
+    for s_name in natsorted(samples_with_reads):
+        r0 = sample_data[s_name]["Reads"][0]
+        if "PerReadGCCountHistogram" not in r0:
+            continue
+        R1_gc_counts = r0["PerReadGCCountHistogram"]
         R2_gc_counts = [0] * len(R1_gc_counts)
         if len(sample_data[s_name]["Reads"]) > 1:
             R2_gc_counts_raw = sample_data[s_name]["Reads"][1]["PerReadGCCountHistogram"]
@@ -367,17 +419,30 @@ def plot_adapter_content(sample_data, group_lookup_dict, project_lookup_dict, sa
     """
     Plot Adapter Content per Sample
     """
+    samples_with_reads = [s for s in sample_data if _sample_has_reads(sample_data[s])]
+    if not samples_with_reads:
+        empty_content: Dict[str, Dict[int, float]] = {}
+        plot_html = linegraph.plot(
+            empty_content,
+            pconfig={
+                "id": "bases2fastq_per_cycle_adapter_content",
+                "title": "bases2fastq: Per Cycle Adapter Content",
+                "xlab": "Cycle",
+                "ylab": "% of Sequences",
+            },
+        )
+        return plot_html, "Per Sample Adapter Content", "adapter_content", "", "", empty_content
+
     plot_content: Dict[str, Dict[int, float]] = {}
 
     r1r2_split = 0
-    for s_name in natsorted(sample_data.keys()):
+    for s_name in natsorted(samples_with_reads):
         plot_content.update({s_name: {}})
-        # Read 1
         cycles = sample_data[s_name]["Reads"][0]["Cycles"]
         R1_cycle_num = len(cycles)
         r1r2_split = max(r1r2_split, R1_cycle_num)
 
-    for s_name in natsorted(sample_data.keys()):
+    for s_name in natsorted(samples_with_reads):
         paired_end = True if len(sample_data[s_name]["Reads"]) > 1 else False
         plot_content.update({s_name: {}})
         # Read 1
diff --git a/multiqc/modules/bases2fastq/tests/conftest.py b/multiqc/modules/bases2fastq/tests/conftest.py
new file mode 100644
index 0000000000..181417f978
--- /dev/null
+++ b/multiqc/modules/bases2fastq/tests/conftest.py
@@ -0,0 +1,25 @@
+"""Pytest configuration and fixtures for bases2fastq module tests."""
+
+from pathlib import Path
+
+import pytest
+
+from multiqc.utils import testing
+
+
+@pytest.fixture
+def data_dir():
+    """Return path to MultiQC test-data repo data directory (test-data/data)."""
+    return testing.data_dir()
+
+
+@pytest.fixture
+def fixtures_dir():
+    """Return path to in-repo JSON fixtures (no test-data clone required).
+
+    - PairedEndNoProject/RunStats.json (run-level only)
+    - PairedEndDefaultProject/RunStats.json, RunManifest.json, Samples/DefaultProject/DefaultProject_RunStats.json
+    - PairedEndNoProjectWithLanes/RunStats.json (run-level with Lanes/UnassignedSequences)
+    - PairedEndNoProjectLowPolonies/RunStats.json (two samples, one below min_polonies)
+    """
+    return Path(__file__).parent / "fixtures"
diff --git a/multiqc/modules/bases2fastq/tests/fixtures/PairedEndDefaultProject/RunManifest.json b/multiqc/modules/bases2fastq/tests/fixtures/PairedEndDefaultProject/RunManifest.json
new file mode 100644
index 0000000000..4365e3e6cd
--- /dev/null
+++ b/multiqc/modules/bases2fastq/tests/fixtures/PairedEndDefaultProject/RunManifest.json
@@ -0,0 +1,22 @@
+{
+    "Settings": [
+        {
+            "Lane": 1,
+            "AdapterTrimType": "Paired-End",
+            "R1AdapterMinimumTrimmedLength": 16,
+            "R2AdapterMinimumTrimmedLength": 16
+        }
+    ],
+    "Samples": [
+        {
+            "SampleName": "Sample1",
+            "Indexes": [
+                {
+                    "Lane": 1,
+                    "Index1": "AAA",
+                    "Index2": "TTT"
+                }
+            ]
+        }
+    ]
+}
\ No newline at end of file
diff --git a/multiqc/modules/bases2fastq/tests/fixtures/PairedEndDefaultProject/RunStats.json b/multiqc/modules/bases2fastq/tests/fixtures/PairedEndDefaultProject/RunStats.json
new file mode 100644
index 0000000000..e0f2afee0f
--- /dev/null
+++ b/multiqc/modules/bases2fastq/tests/fixtures/PairedEndDefaultProject/RunStats.json
@@ -0,0 +1,18 @@
+{
+    "RunName": "RUN01",
+    "AnalysisID": "a1b2c3d4e5f6",
+    "NumPolonies": 50000,
+    "AssignedYield": 1.5,
+    "QualityScoreMean": 35,
+    "PercentQ30": 95,
+    "PercentQ40": 90,
+    "PercentAssignedReads": 100.0,
+    "PercentMismatch": 0,
+    "SampleStats": [
+        {
+            "SampleID": "s1",
+            "SampleName": "Sample1",
+            "NumPolonies": 50000
+        }
+    ]
+}
\ No newline at end of file
diff --git a/multiqc/modules/bases2fastq/tests/fixtures/PairedEndDefaultProject/Samples/DefaultProject/DefaultProject_RunStats.json b/multiqc/modules/bases2fastq/tests/fixtures/PairedEndDefaultProject/Samples/DefaultProject/DefaultProject_RunStats.json
new file mode 100644
index 0000000000..29f960f0ec
--- /dev/null
+++ b/multiqc/modules/bases2fastq/tests/fixtures/PairedEndDefaultProject/Samples/DefaultProject/DefaultProject_RunStats.json
@@ -0,0 +1,26 @@
+{
+    "RunName": "RUN01",
+    "AnalysisID": "a1b2c3d4e5f6",
+    "Project": "DefaultProject",
+    "NumPolonies": 50000,
+    "NumPoloniesBeforeTrimming": 100000,
+    "AssignedYield": 1.5,
+    "QualityScoreMean": 35,
+    "PercentQ30": 95,
+    "PercentQ40": 90,
+    "PercentAssignedReads": 100.0,
+    "PercentMismatch": 0,
+    "SampleStats": [
+        {
+            "SampleID": "s1",
+            "SampleName": "Sample1",
+            "NumPolonies": 50000,
+            "Occurrences": [
+                {
+                    "ExpectedSequence": "AAATTT",
+                    "NumPoloniesBeforeTrimming": 5000
+                }
+            ]
+        }
+    ]
+}
\ No newline at end of file
diff --git a/multiqc/modules/bases2fastq/tests/fixtures/PairedEndNoProject/RunStats.json b/multiqc/modules/bases2fastq/tests/fixtures/PairedEndNoProject/RunStats.json
new file mode 100644
index 0000000000..e0f2afee0f
--- /dev/null
+++ b/multiqc/modules/bases2fastq/tests/fixtures/PairedEndNoProject/RunStats.json
@@ -0,0 +1,18 @@
+{
+    "RunName": "RUN01",
+    "AnalysisID": "a1b2c3d4e5f6",
+    "NumPolonies": 50000,
+    "AssignedYield": 1.5,
+    "QualityScoreMean": 35,
+    "PercentQ30": 95,
+    "PercentQ40": 90,
+    "PercentAssignedReads": 100.0,
+    "PercentMismatch": 0,
+    "SampleStats": [
+        {
+            "SampleID": "s1",
+            "SampleName": "Sample1",
+            "NumPolonies": 50000
+        }
+    ]
+}
\ No newline at end of file
diff --git a/multiqc/modules/bases2fastq/tests/fixtures/PairedEndNoProjectLowPolonies/RunStats.json b/multiqc/modules/bases2fastq/tests/fixtures/PairedEndNoProjectLowPolonies/RunStats.json
new file mode 100644
index 0000000000..feef607045
--- /dev/null
+++ b/multiqc/modules/bases2fastq/tests/fixtures/PairedEndNoProjectLowPolonies/RunStats.json
@@ -0,0 +1,23 @@
+{
+    "RunName": "RUN01",
+    "AnalysisID": "a1b2c3d4e5f6",
+    "NumPolonies": 50050,
+    "AssignedYield": 1.5,
+    "QualityScoreMean": 35,
+    "PercentQ30": 95,
+    "PercentQ40": 90,
+    "PercentAssignedReads": 100.0,
+    "PercentMismatch": 0,
+    "SampleStats": [
+        {
+            "SampleID": "s1",
+            "SampleName": "Sample1",
+            "NumPolonies": 50
+        },
+        {
+            "SampleID": "s2",
+            "SampleName": "Sample2",
+            "NumPolonies": 50000
+        }
+    ]
+}
\ No newline at end of file
diff --git a/multiqc/modules/bases2fastq/tests/fixtures/PairedEndNoProjectWithLanes/RunStats.json b/multiqc/modules/bases2fastq/tests/fixtures/PairedEndNoProjectWithLanes/RunStats.json
new file mode 100644
index 0000000000..1ff24f6fdc
--- /dev/null
+++ b/multiqc/modules/bases2fastq/tests/fixtures/PairedEndNoProjectWithLanes/RunStats.json
@@ -0,0 +1,31 @@
+{
+    "RunName": "RUN01",
+    "AnalysisID": "a1b2c3d4e5f6",
+    "NumPolonies": 50000,
+    "NumPoloniesBeforeTrimming": 100000,
+    "AssignedYield": 1.5,
+    "QualityScoreMean": 35,
+    "PercentQ30": 95,
+    "PercentQ40": 90,
+    "PercentAssignedReads": 100.0,
+    "PercentMismatch": 0,
+    "SampleStats": [
+        {
+            "SampleID": "s1",
+            "SampleName": "Sample1",
+            "NumPolonies": 50000
+        }
+    ],
+    "Lanes": [
+        {
+            "Lane": 1,
+            "UnassignedSequences": [
+                {
+                    "I1": "AAA",
+                    "I2": "TTT",
+                    "Count": 100
+                }
+            ]
+        }
+    ]
+}
\ No newline at end of file
diff --git a/multiqc/modules/bases2fastq/tests/fixtures/project_runstats.json b/multiqc/modules/bases2fastq/tests/fixtures/project_runstats.json
new file mode 100644
index 0000000000..dc73144d25
--- /dev/null
+++ b/multiqc/modules/bases2fastq/tests/fixtures/project_runstats.json
@@ -0,0 +1,24 @@
+{
+  "RunName": "RUN01",
+  "AnalysisID": "a1b2c3d4e5f6",
+  "Project": "MyProject",
+  "NumPolonies": 50000,
+  "NumPoloniesBeforeTrimming": 60000,
+  "AssignedYield": 1.5,
+  "QualityScoreMean": 35.0,
+  "PercentQ30": 95.0,
+  "PercentQ40": 90.0,
+  "SampleStats": [
+    {
+      "SampleID": "s1",
+      "SampleName": "Sample1",
+      "NumPolonies": 50000,
+      "Occurrences": [
+        {
+          "ExpectedSequence": "AAATTT",
+          "NumPoloniesBeforeTrimming": 3000
+        }
+      ]
+    }
+  ]
+}
diff --git a/multiqc/modules/bases2fastq/tests/fixtures/run_manifest.json b/multiqc/modules/bases2fastq/tests/fixtures/run_manifest.json
new file mode 100644
index 0000000000..202fbaf157
--- /dev/null
+++ b/multiqc/modules/bases2fastq/tests/fixtures/run_manifest.json
@@ -0,0 +1,10 @@
+{
+  "Settings": [
+    {
+      "Lane": 1,
+      "AdapterTrimType": "N/A",
+      "R1AdapterMinimumTrimmedLength": "N/A",
+      "R2AdapterMinimumTrimmedLength": "N/A"
+    }
+  ]
+}
diff --git a/multiqc/modules/bases2fastq/tests/fixtures/run_manifest_with_samples.json b/multiqc/modules/bases2fastq/tests/fixtures/run_manifest_with_samples.json
new file mode 100644
index 0000000000..a6f8e33548
--- /dev/null
+++ b/multiqc/modules/bases2fastq/tests/fixtures/run_manifest_with_samples.json
@@ -0,0 +1,21 @@
+{
+  "Settings": [
+    {
+      "Lane": 1,
+      "AdapterTrimType": "N/A",
+      "R1AdapterMinimumTrimmedLength": "N/A",
+      "R2AdapterMinimumTrimmedLength": "N/A"
+    }
+  ],
+  "Samples": [
+    {
+      "SampleName": "Sample1",
+      "Indexes": [
+        {
+          "Index1": "AAA",
+          "Index2": "TTT"
+        }
+      ]
+    }
+  ]
+}
diff --git a/multiqc/modules/bases2fastq/tests/fixtures/run_runstats.json b/multiqc/modules/bases2fastq/tests/fixtures/run_runstats.json
new file mode 100644
index 0000000000..7e1812b321
--- /dev/null
+++ b/multiqc/modules/bases2fastq/tests/fixtures/run_runstats.json
@@ -0,0 +1,18 @@
+{
+  "RunName": "RUN01",
+  "AnalysisID": "a1b2c3d4e5f6",
+  "NumPolonies": 50000,
+  "AssignedYield": 1.5,
+  "QualityScoreMean": 35,
+  "PercentQ30": 95,
+  "PercentQ40": 90,
+  "PercentAssignedReads": 100.0,
+  "PercentMismatch": 0,
+  "SampleStats": [
+    {
+      "SampleID": "s1",
+      "SampleName": "Sample1",
+      "NumPolonies": 50000
+    }
+  ]
+}
\ No newline at end of file
diff --git a/multiqc/modules/bases2fastq/tests/fixtures/run_runstats_low_polonies.json b/multiqc/modules/bases2fastq/tests/fixtures/run_runstats_low_polonies.json
new file mode 100644
index 0000000000..66032d6a6d
--- /dev/null
+++ b/multiqc/modules/bases2fastq/tests/fixtures/run_runstats_low_polonies.json
@@ -0,0 +1,21 @@
+{
+  "RunName": "RUN01",
+  "AnalysisID": "a1b2c3d4e5f6",
+  "NumPolonies": 50050,
+  "AssignedYield": 1.5,
+  "QualityScoreMean": 35.0,
+  "PercentQ30": 95.0,
+  "PercentQ40": 90.0,
+  "SampleStats": [
+    {
+      "SampleID": "s1",
+      "SampleName": "Sample1",
+      "NumPolonies": 50
+    },
+    {
+      "SampleID": "s2",
+      "SampleName": "Sample2",
+      "NumPolonies": 50000
+    }
+  ]
+}
diff --git a/multiqc/modules/bases2fastq/tests/fixtures/run_runstats_with_occurrences.json b/multiqc/modules/bases2fastq/tests/fixtures/run_runstats_with_occurrences.json
new file mode 100644
index 0000000000..a93faccf73
--- /dev/null
+++ b/multiqc/modules/bases2fastq/tests/fixtures/run_runstats_with_occurrences.json
@@ -0,0 +1,23 @@
+{
+  "RunName": "RUN01",
+  "AnalysisID": "a1b2c3d4e5f6",
+  "NumPolonies": 50000,
+  "NumPoloniesBeforeTrimming": 100000,
+  "AssignedYield": 1.5,
+  "QualityScoreMean": 35.0,
+  "PercentQ30": 95.0,
+  "PercentQ40": 90.0,
+  "SampleStats": [
+    {
+      "SampleID": "s1",
+      "SampleName": "Sample1",
+      "NumPolonies": 50000,
+      "Occurrences": [
+        {
+          "ExpectedSequence": "AAATTT",
+          "NumPoloniesBeforeTrimming": 5000
+        }
+      ]
+    }
+  ]
+}
diff --git a/multiqc/modules/bases2fastq/tests/fixtures/run_runstats_with_unassigned.json b/multiqc/modules/bases2fastq/tests/fixtures/run_runstats_with_unassigned.json
new file mode 100644
index 0000000000..9f8479b1fb
--- /dev/null
+++ b/multiqc/modules/bases2fastq/tests/fixtures/run_runstats_with_unassigned.json
@@ -0,0 +1,29 @@
+{
+  "RunName": "RUN01",
+  "AnalysisID": "a1b2c3d4e5f6",
+  "NumPolonies": 50000,
+  "NumPoloniesBeforeTrimming": 100000,
+  "AssignedYield": 1.5,
+  "QualityScoreMean": 35.0,
+  "PercentQ30": 95.0,
+  "PercentQ40": 90.0,
+  "SampleStats": [
+    {
+      "SampleID": "s1",
+      "SampleName": "Sample1",
+      "NumPolonies": 50000
+    }
+  ],
+  "Lanes": [
+    {
+      "Lane": 1,
+      "UnassignedSequences": [
+        {
+          "I1": "AAA",
+          "I2": "TTT",
+          "Count": 100
+        }
+      ]
+    }
+  ]
+}
diff --git a/multiqc/modules/bases2fastq/tests/test_bases2fastq.py b/multiqc/modules/bases2fastq/tests/test_bases2fastq.py
new file mode 100644
index 0000000000..72ab3b4d3f
--- /dev/null
+++ b/multiqc/modules/bases2fastq/tests/test_bases2fastq.py
@@ -0,0 +1,507 @@
+"""Tests for bases2fastq module: parsers and integration."""
+
+import json
+from pathlib import Path
+from typing import Any, List
+from unittest.mock import patch
+
+import pytest
+
+from multiqc import report, config
+from multiqc.base_module import ModuleNoSamplesFound
+from multiqc.types import LoadedFileDict
+
+from multiqc.modules.bases2fastq.bases2fastq import MultiqcModule, _get_min_polonies
+
+
+def _load_fixture(fixtures_dir: Path, *parts: str) -> dict:
+    """Load JSON fixture; path is fixtures_dir / path0 / path1 / ... / filename."""
+    path = fixtures_dir.joinpath(*parts)
+    with path.open() as f:
+        return json.load(f)
+
+
+class TestExtractManifestLaneSettings:
+    """Tests for _extract_manifest_lane_settings helper."""
+
+    def test_extract_manifest_lane_settings_minimal(self, fixtures_dir):
+        """Manifest with one lane yields run_lane -> settings."""
+        report.reset()
+        run_dir = fixtures_dir / "PairedEndDefaultProject"
+        report.analysis_files = [str(run_dir)]
+        report.search_files(["bases2fastq"])
+        m = MultiqcModule()
+        run_manifest = _load_fixture(fixtures_dir, "PairedEndDefaultProject", "RunManifest.json")
+        result = m._extract_manifest_lane_settings(run_manifest, "RUN01-a1b2")
+        assert len(result) == 1
+        run_lane = "RUN01-a1b2 | L1"
+        assert run_lane in result
+        assert result[run_lane]["AdapterTrimType"] == "Paired-End"
+        assert result[run_lane]["R1AdapterMinimumTrimmedLength"] == 16
+        assert result[run_lane]["R2AdapterMinimumTrimmedLength"] == 16
+        assert "Indexing" in result[run_lane]
+
+    def test_extract_manifest_lane_settings_empty_settings(self, fixtures_dir, tmp_path):
+        """Manifest without Settings returns empty dict."""
+        report.reset()
+        run_stats = _load_fixture(fixtures_dir, "PairedEndNoProject", "RunStats.json")
+        (tmp_path / "RunStats.json").write_text(json.dumps(run_stats))
+        (tmp_path / "RunManifest.json").write_text(json.dumps({}))
+        report.analysis_files = [str(tmp_path)]
+        report.search_files(["bases2fastq"])
+        m = MultiqcModule()
+        result = m._extract_manifest_lane_settings({}, "RUN01-a1b2")
+        assert result == {}
+
+
+class TestBuildIndexAssignmentFromStats:
+    """Tests for _build_index_assignment_from_stats helper."""
+
+    def test_build_index_assignment_from_stats_with_occurrences(self, fixtures_dir):
+        """Project RunStats with Occurrences produces run_inner and percentages."""
+        report.reset()
+        run_dir = fixtures_dir / "PairedEndDefaultProject"
+        report.analysis_files = [str(run_dir)]
+        report.search_files(["bases2fastq"])
+        m = MultiqcModule()
+        project_stats = _load_fixture(
+            fixtures_dir, "PairedEndDefaultProject", "Samples", "DefaultProject", "DefaultProject_RunStats.json"
+        )
+        run_inner, total = m._build_index_assignment_from_stats(
+            project_stats, "RUN01-a1b2", project="DefaultProject"
+        )
+        assert total == 100000
+        assert "AAATTT" in run_inner
+        assert run_inner["AAATTT"]["SamplePolonyCounts"] == 5000
+        assert run_inner["AAATTT"]["PercentOfPolonies"] == 5.0
+
+    def test_build_index_assignment_from_stats_project(self, fixtures_dir):
+        """Project-level stats (Samples/DefaultProject) add Project key to entries."""
+        report.reset()
+        run_dir = fixtures_dir / "PairedEndDefaultProject"
+        report.analysis_files = [str(run_dir)]
+        report.search_files(["bases2fastq"])
+        m = MultiqcModule()
+        project_stats = _load_fixture(
+            fixtures_dir, "PairedEndDefaultProject", "Samples", "DefaultProject", "DefaultProject_RunStats.json"
+        )
+        run_inner, _ = m._build_index_assignment_from_stats(
+            project_stats, "RUN01-a1b2", project="DefaultProject"
+        )
+        assert run_inner
+        for entry in run_inner.values():
+            assert entry.get("Project") == "DefaultProject"
+
+
+class TestMergeManifestIndexSequences:
+    """Tests for _merge_manifest_index_sequences helper."""
+
+    def test_merge_manifest_index_sequences(self, fixtures_dir):
+        """Index1/Index2 from RunManifest Samples merged into assignment dict."""
+        report.reset()
+        run_dir = fixtures_dir / "PairedEndDefaultProject"
+        report.analysis_files = [str(run_dir)]
+        report.search_files(["bases2fastq"])
+        m = MultiqcModule()
+        project_stats = _load_fixture(
+            fixtures_dir, "PairedEndDefaultProject", "Samples", "DefaultProject", "DefaultProject_RunStats.json"
+        )
+        run_manifest = _load_fixture(fixtures_dir, "PairedEndDefaultProject", "RunManifest.json")
+        run_inner, _ = m._build_index_assignment_from_stats(
+            project_stats, "RUN01-a1b2", project="DefaultProject"
+        )
+        sample_to_index = {"RUN01-a1b2": run_inner}
+        m._merge_manifest_index_sequences(sample_to_index, run_manifest, "RUN01-a1b2")
+        assert run_inner["AAATTT"]["Index1"] == "AAA"
+        assert run_inner["AAATTT"]["Index2"] == "TTT"
+
+
+class TestParseRunProjectData:
+    """Tests for run-level and project-level parsing."""
+
+    def test_parse_run_project_data_run_level(self, fixtures_dir):
+        """Run-level only (PairedEndNoProject) populates run and sample dicts."""
+        report.reset()
+        run_dir = fixtures_dir / "PairedEndNoProject"
+        report.analysis_files = [str(run_dir)]
+        report.search_files(["bases2fastq"])
+        m = MultiqcModule()
+        assert len(m.run_level_data) >= 1
+        assert len(m.run_level_samples) >= 1
+        sample_id = next(iter(m.run_level_samples))
+        assert "__" in sample_id
+        assert sample_id in m.run_level_samples_to_project
+
+    def test_parse_run_project_data_min_polonies_filter(self, fixtures_dir):
+        """Samples below min_polonies excluded (PairedEndNoProjectLowPolonies, config lowered)."""
+        report.reset()
+        run_dir = fixtures_dir / "PairedEndNoProjectLowPolonies"
+        report.analysis_files = [str(run_dir)]
+        report.search_files(["bases2fastq"])
+        import multiqc.modules.bases2fastq.bases2fastq as b2f_mod
+        with patch.object(b2f_mod, "_get_min_polonies", return_value=100):
+            m = MultiqcModule()
+        assert len(m.run_level_samples) == 1
+        sample_id = next(iter(m.run_level_samples))
+        assert sample_id.endswith("__Sample2")
+        assert not any(s.endswith("__Sample1") for s in m.run_level_samples)
+
+
+class TestParseRunUnassignedSequences:
+    """Tests for unassigned sequences parser."""
+
+    def test_parse_run_unassigned_sequences(self, fixtures_dir):
+        """RunStats with Lanes/UnassignedSequences (PairedEndNoProjectWithLanes) produces int-keyed dict."""
+        report.reset()
+        run_dir = fixtures_dir / "PairedEndNoProjectWithLanes"
+        report.analysis_files = [str(run_dir)]
+        report.search_files(["bases2fastq"])
+        m = MultiqcModule()
+        unassigned = m._parse_run_unassigned_sequences("bases2fastq/run")
+        assert isinstance(unassigned, dict)
+        for k, v in unassigned.items():
+            assert isinstance(k, int)
+            assert "Run Name" in v
+            assert "I1" in v
+            assert "I2" in v
+            assert "Number of Polonies" in v
+
+
+class TestGetMinPolonies:
+    """Tests for _get_min_polonies config helper."""
+
+    def test_get_min_polonies_default_when_config_not_dict(self):
+        with patch.object(config, "bases2fastq_config", None, create=True):
+            assert _get_min_polonies() == 1000
+        with patch.object(config, "bases2fastq_config", "string", create=True):
+            assert _get_min_polonies() == 1000
+
+    def test_get_min_polonies_invalid_int_uses_default(self):
+        with patch.object(config, "bases2fastq_config", {"min_polonies": "bad"}, create=True):
+            assert _get_min_polonies() == 1000
+        with patch.object(config, "bases2fastq_config", {"min_polonies": None}, create=True):
+            assert _get_min_polonies() == 1000
+
+    def test_get_min_polonies_custom_value(self):
+        with patch.object(config, "bases2fastq_config", {"min_polonies": 5000}, create=True):
+            assert _get_min_polonies() == 5000
+
+
+class TestValidatePath:
+    """Tests for _validate_path security check."""
+
+    def test_validate_path_escaped_returns_false(self, fixtures_dir, tmp_path):
+        report.reset()
+        report.analysis_files = [str(fixtures_dir / "PairedEndNoProject")]
+        report.search_files(["bases2fastq"])
+        m = MultiqcModule()
+        base = tmp_path / "sub"
+        base.mkdir()
+        outside = base.parent.parent.resolve()
+        assert m._validate_path(outside / "any", base.resolve()) is False
+
+    def test_validate_path_inside_returns_true(self, fixtures_dir):
+        report.reset()
+        run_dir = fixtures_dir / "PairedEndNoProject"
+        report.analysis_files = [str(run_dir)]
+        report.search_files(["bases2fastq"])
+        m = MultiqcModule()
+        assert m._validate_path(run_dir / "RunStats.json", run_dir) is True
+
+
+class TestReadJsonFile:
+    """Tests for _read_json_file with validation and errors."""
+
+    def test_read_json_file_path_outside_base_returns_none(self, fixtures_dir, tmp_path):
+        report.reset()
+        run_dir = fixtures_dir / "PairedEndNoProject"
+        report.analysis_files = [str(run_dir)]
+        report.search_files(["bases2fastq"])
+        m = MultiqcModule()
+        outside = (tmp_path / "..").resolve()
+        assert m._read_json_file(outside / "any.json", base_directory=tmp_path) is None
+
+    def test_read_json_file_missing_file_returns_none(self, fixtures_dir):
+        report.reset()
+        run_dir = fixtures_dir / "PairedEndNoProject"
+        report.analysis_files = [str(run_dir)]
+        report.search_files(["bases2fastq"])
+        m = MultiqcModule()
+        assert m._read_json_file(run_dir / "DoesNotExist.json") is None
+
+    def test_read_json_file_invalid_json_returns_none(self, fixtures_dir, tmp_path):
+        report.reset()
+        run_dir = fixtures_dir / "PairedEndNoProject"
+        report.analysis_files = [str(run_dir)]
+        report.search_files(["bases2fastq"])
+        m = MultiqcModule()
+        bad = tmp_path / "bad.json"
+        bad.write_text("not json {")
+        assert m._read_json_file(bad) is None
+
+
+class TestExtractRunAnalysisName:
+    """Tests for _extract_run_analysis_name."""
+
+    def test_extract_run_analysis_name_missing_runname_returns_none(self, fixtures_dir):
+        report.reset()
+        report.analysis_files = [str(fixtures_dir / "PairedEndNoProject")]
+        report.search_files(["bases2fastq"])
+        m = MultiqcModule()
+        assert m._extract_run_analysis_name({"AnalysisID": "a1b2"}, "test") is None
+
+    def test_extract_run_analysis_name_missing_analysisid_returns_none(self, fixtures_dir):
+        report.reset()
+        report.analysis_files = [str(fixtures_dir / "PairedEndNoProject")]
+        report.search_files(["bases2fastq"])
+        m = MultiqcModule()
+        assert m._extract_run_analysis_name({"RunName": "RUN01"}, "test") is None
+
+    def test_extract_run_analysis_name_ok(self, fixtures_dir):
+        report.reset()
+        report.analysis_files = [str(fixtures_dir / "PairedEndNoProject")]
+        report.search_files(["bases2fastq"])
+        m = MultiqcModule()
+        assert m._extract_run_analysis_name({"RunName": "RUN01", "AnalysisID": "a1b2c3d4"}) == "RUN01-a1b2"
+
+
+class TestParseRunProjectDataEdgeCases:
+    """Edge cases for _parse_run_project_data."""
+
+    def test_parse_run_project_data_empty_data_source_returns_empty(self, fixtures_dir):
+        report.reset()
+        report.analysis_files = [str(fixtures_dir / "PairedEndNoProject")]
+        report.search_files(["bases2fastq"])
+        m = MultiqcModule()
+        run_data, sample_data, sample_to_project = m._parse_run_project_data("", log_files=[])
+        assert run_data == {}
+        assert sample_data == {}
+        assert sample_to_project == {}
+
+    def test_parse_run_project_data_ignore_sample_skips_run(self, fixtures_dir):
+        report.reset()
+        run_dir = fixtures_dir / "PairedEndNoProject"
+        report.analysis_files = [str(run_dir)]
+        report.search_files(["bases2fastq"])
+        run_stats = _load_fixture(fixtures_dir, "PairedEndNoProject", "RunStats.json")
+        log_files: List[LoadedFileDict[Any]] = [
+            {
+                "f": json.dumps(run_stats),
+                "root": str(run_dir),
+                "fn": "RunStats.json",
+                "sp_key": "bases2fastq/run",
+                "s_name": "RUN01-a1b2",
+            }
+        ]
+        m = MultiqcModule()
+        with patch.object(m, "is_ignore_sample", return_value=True):
+            run_data, sample_data, _ = m._parse_run_project_data("bases2fastq/run", log_files=log_files)
+        assert run_data == {}
+        assert sample_data == {}
+
+
+class TestBuildIndexAssignmentEdgeCases:
+    """Edge cases for _build_index_assignment_from_stats."""
+
+    def test_build_index_assignment_no_samplestats_returns_empty(self, fixtures_dir):
+        report.reset()
+        report.analysis_files = [str(fixtures_dir / "PairedEndNoProject")]
+        report.search_files(["bases2fastq"])
+        m = MultiqcModule()
+        run_inner, total = m._build_index_assignment_from_stats(
+            {"NumPoloniesBeforeTrimming": 1000}, "RUN01-a1b2"
+        )
+        assert run_inner == {}
+        assert total == 1000
+
+    def test_build_index_assignment_sample_without_occurrences_skipped(self, fixtures_dir):
+        report.reset()
+        run_dir = fixtures_dir / "PairedEndDefaultProject"
+        report.analysis_files = [str(run_dir)]
+        report.search_files(["bases2fastq"])
+        m = MultiqcModule()
+        # Stats with one sample that has no Occurrences
+        stats = {
+            "RunName": "RUN01",
+            "AnalysisID": "a1b2c3d4",
+            "NumPoloniesBeforeTrimming": 50000,
+            "SampleStats": [
+                {"SampleID": "s1", "SampleName": "S1", "NumPolonies": 100},
+            ],
+        }
+        run_inner, total = m._build_index_assignment_from_stats(stats, "RUN01-a1b2")
+        assert run_inner == {}
+        assert total == 50000
+
+
+class TestMergeManifestIndexSequencesEdgeCases:
+    """Edge cases for _merge_manifest_index_sequences."""
+
+    def test_merge_manifest_no_samples_returns_early(self, fixtures_dir):
+        report.reset()
+        report.analysis_files = [str(fixtures_dir / "PairedEndNoProject")]
+        report.search_files(["bases2fastq"])
+        m = MultiqcModule()
+        sample_to_index = {"RUN01-a1b2": {"AAATTT": {}}}
+        m._merge_manifest_index_sequences(sample_to_index, {}, "RUN01-a1b2")
+        assert sample_to_index["RUN01-a1b2"]["AAATTT"].get("Index1", "") == ""
+
+    def test_merge_manifest_run_not_in_assignment_returns_early(self, fixtures_dir):
+        report.reset()
+        report.analysis_files = [str(fixtures_dir / "PairedEndNoProject")]
+        report.search_files(["bases2fastq"])
+        m = MultiqcModule()
+        sample_to_index = {}
+        m._merge_manifest_index_sequences(
+            sample_to_index, {"Samples": [{"SampleName": "S1", "Indexes": [{"Index1": "A", "Index2": "T"}]}]}, "RUN01-a1b2"
+        )
+        assert sample_to_index == {}
+
+    def test_merge_manifest_merged_indices_not_in_run_data_skipped(self, fixtures_dir):
+        report.reset()
+        report.analysis_files = [str(fixtures_dir / "PairedEndNoProject")]
+        report.search_files(["bases2fastq"])
+        m = MultiqcModule()
+        run_inner = {"AAATTT": {"SampleID": "RUN01-a1b2__S1"}}
+        sample_to_index = {"RUN01-a1b2": run_inner}
+        m._merge_manifest_index_sequences(
+            sample_to_index,
+            {"Samples": [{"SampleName": "S1", "Indexes": [{"Index1": "XXX", "Index2": "YYY"}]}]},
+            "RUN01-a1b2",
+        )
+        assert run_inner["AAATTT"].get("Index1", "") == ""
+
+
+class TestParseRunUnassignedEdgeCases:
+    """Edge cases for _parse_run_unassigned_sequences."""
+
+    def test_parse_run_unassigned_empty_data_source_returns_empty(self, fixtures_dir):
+        report.reset()
+        report.analysis_files = [str(fixtures_dir / "PairedEndNoProject")]
+        report.search_files(["bases2fastq"])
+        m = MultiqcModule()
+        assert m._parse_run_unassigned_sequences("") == {}
+
+    def test_parse_run_unassigned_no_lanes_skipped(self, fixtures_dir):
+        report.reset()
+        run_dir = fixtures_dir / "PairedEndNoProject"
+        report.analysis_files = [str(run_dir)]
+        report.search_files(["bases2fastq"])
+        m = MultiqcModule()
+        unassigned = m._parse_run_unassigned_sequences("bases2fastq/run")
+        assert unassigned == {}
+
+
+class TestModuleNoSamplesFound:
+    """Tests that ModuleNoSamplesFound is raised when no data."""
+
+    def test_no_log_files_raises(self, tmp_path):
+        report.reset()
+        empty_dir = tmp_path / "empty"
+        empty_dir.mkdir()
+        report.analysis_files = [str(empty_dir)]
+        report.search_files(["bases2fastq"])
+        with pytest.raises(ModuleNoSamplesFound):
+            MultiqcModule()
+
+
+class TestProjectLevelPath:
+    """Tests for project_level summary path (tabulate_project_stats, manifest in project)."""
+
+    def test_project_level_only_produces_sections(self, fixtures_dir, tmp_path):
+        """Directory with only project-level RunStats (no run-level) uses project_level path."""
+        report.reset()
+        project_stats = _load_fixture(
+            fixtures_dir, "PairedEndDefaultProject", "Samples", "DefaultProject", "DefaultProject_RunStats.json"
+        )
+        manifest = _load_fixture(fixtures_dir, "PairedEndDefaultProject", "RunManifest.json")
+        (tmp_path / "Samples" / "DefaultProject").mkdir(parents=True)
+        (tmp_path / "Samples" / "DefaultProject" / "DefaultProject_RunStats.json").write_text(
+            json.dumps(project_stats)
+        )
+        (tmp_path / "RunManifest.json").write_text(json.dumps(manifest))
+        report.analysis_files = [str(tmp_path)]
+        report.search_files(["bases2fastq"])
+        config.strict = True
+        m = MultiqcModule()
+        assert len(m.project_level_data) >= 1
+        assert len(m.run_level_data) == 0
+        assert len(m.sections) > 0
+
+
+class TestSelectDataBySummaryPath:
+    """Tests for _select_data_by_summary_path branches."""
+
+    def test_select_data_project_level(self, fixtures_dir):
+        report.reset()
+        run_dir = fixtures_dir / "PairedEndDefaultProject"
+        report.analysis_files = [str(run_dir)]
+        report.search_files(["bases2fastq"])
+        m = MultiqcModule()
+        run_data, sample_data, samples_to_projects, manifest_data, index_data, unassigned = m._select_data_by_summary_path(
+            "project_level"
+        )
+        assert run_data is m.project_level_data
+        assert sample_data is m.project_level_samples
+        assert unassigned == {}
+
+    def test_select_data_combined_level(self, fixtures_dir):
+        report.reset()
+        run_dir = fixtures_dir / "PairedEndDefaultProject"
+        report.analysis_files = [str(run_dir)]
+        report.search_files(["bases2fastq"])
+        m = MultiqcModule()
+        run_data, sample_data, samples_to_projects, manifest_data, index_data, unassigned = m._select_data_by_summary_path(
+            "combined_level"
+        )
+        assert run_data is m.run_level_data
+        assert sample_data is m.project_level_samples
+        assert isinstance(unassigned, dict)
+
+
+class TestParseIndexAssignmentEdgeCases:
+    """Edge cases for _parse_index_assignment."""
+
+    def test_parse_index_assignment_runstats_missing_samplestats(self, fixtures_dir, tmp_path):
+        report.reset()
+        run_stats = {"RunName": "RUN01", "AnalysisID": "a1b2c3d4", "NumPolonies": 100}
+        (tmp_path / "RunStats.json").write_text(json.dumps(run_stats))
+        (tmp_path / "RunManifest.json").write_text(json.dumps({"Settings": [{"Lane": 1}]}))
+        report.analysis_files = [str(fixtures_dir / "PairedEndNoProject"), str(tmp_path)]
+        report.search_files(["bases2fastq"])
+        m = MultiqcModule()
+        result = m._parse_index_assignment("bases2fastq/manifest")
+        assert isinstance(result, dict)
+
+
+def _test_data_bases2fastq_dir():
+    """Path to test-data/data/modules/bases2fastq (used for skipif, no fixture)."""
+    repo_root = Path(__file__).resolve().parents[4]
+    return repo_root / "test-data" / "data" / "modules" / "bases2fastq"
+
+
+class TestIntegration:
+    """Integration test using test-data repo (skipped when absent)."""
+
+    @pytest.mark.skipif(
+        not _test_data_bases2fastq_dir().exists(),
+        reason="test-data/data/modules/bases2fastq not found (clone test-data repo)",
+    )
+    def test_module_run_with_test_data(self, data_dir):
+        """Full module run against test-data repo produces sections and general stats."""
+        report.reset()
+        mod_dir = data_dir / "modules" / "bases2fastq"
+        report.analysis_files = [str(mod_dir)]
+        report.search_files(["bases2fastq"])
+        config.strict = True
+        m = MultiqcModule()
+        # Test-data has multiple run roots (WGS, WES, PairedEndNoProject, PairedEndDefaultProject, etc.)
+        assert len(m.run_level_data) >= 2, "expected at least 2 runs from test-data"
+        # At least one project-level layout (PairedEndDefaultProject* or PairedEndProjects)
+        assert len(m.project_level_data) >= 1, "expected at least 1 project from test-data"
+        total_samples = len(m.run_level_samples) + len(m.project_level_samples)
+        assert total_samples >= 10, "expected at least 10 samples from test-data"
+        # Module must produce output (general stats and/or sections)
+        assert len(report.general_stats_data) > 0 or len(m.sections) > 0, (
+            "expected general stats or report sections to be populated"
+        )

From fb097385a505caf009d1bb091fab0573a7149ad6 Mon Sep 17 00:00:00 2001
From: Carlos Ruiz <carlos.ruiz@elembio.com>
Date: Sat, 21 Feb 2026 13:55:48 -0800
Subject: [PATCH 3/6] Linting

---
 multiqc/modules/bases2fastq/bases2fastq.py    | 30 ++++-------------
 multiqc/modules/bases2fastq/plot_runs.py      | 12 +++----
 .../bases2fastq/tests/test_bases2fastq.py     | 33 ++++++++-----------
 3 files changed, 25 insertions(+), 50 deletions(-)

diff --git a/multiqc/modules/bases2fastq/bases2fastq.py b/multiqc/modules/bases2fastq/bases2fastq.py
index 68135b4fb5..d170241de1 100644
--- a/multiqc/modules/bases2fastq/bases2fastq.py
+++ b/multiqc/modules/bases2fastq/bases2fastq.py
@@ -415,8 +415,6 @@ def _setup_colors(
         # Create run and project groups
         run_groups: Dict[str, List] = defaultdict(list)
         project_groups: Dict[str, List] = defaultdict(list)
-        # Only populated when summary_path == "project_level"; empty for run_level/combined_level
-        in_project_sample_groups: Dict[str, List] = defaultdict(list)
         ind_sample_groups: Dict[str, List] = defaultdict(list)
 
         for sample in natsorted(sample_data.keys()):
@@ -425,10 +423,8 @@ def _setup_colors(
             sample_project = samples_to_projects.get(sample, "DefaultProject")
             project_groups[sample_project].append(sample)
             ind_sample_groups[sample] = [sample]
-            if summary_path == "project_level":
-                in_project_sample_groups[sample].append(sample)
 
-        merged_groups = {**run_groups, **project_groups, **in_project_sample_groups, **ind_sample_groups}
+        merged_groups = {**run_groups, **project_groups, **ind_sample_groups}
 
         # Build color palette
         self.color_getter = mqc_colour.mqc_colour_scale()
@@ -659,12 +655,8 @@ def _extract_manifest_lane_settings(
             indexing = f"{' + '.join(indices_cycles)}<br>{' + '.join(indices)}"
             result[run_lane]["Indexing"] = indexing
             result[run_lane]["AdapterTrimType"] = lane_data.get("AdapterTrimType", "N/A")
-            result[run_lane]["R1AdapterMinimumTrimmedLength"] = lane_data.get(
-                "R1AdapterMinimumTrimmedLength", "N/A"
-            )
-            result[run_lane]["R2AdapterMinimumTrimmedLength"] = lane_data.get(
-                "R2AdapterMinimumTrimmedLength", "N/A"
-            )
+            result[run_lane]["R1AdapterMinimumTrimmedLength"] = lane_data.get("R1AdapterMinimumTrimmedLength", "N/A")
+            result[run_lane]["R2AdapterMinimumTrimmedLength"] = lane_data.get("R2AdapterMinimumTrimmedLength", "N/A")
         return result
 
     def _parse_run_manifest(self, data_source: str) -> Dict[str, Any]:
@@ -708,9 +700,7 @@ def _parse_run_manifest(self, data_source: str) -> Dict[str, Any]:
                     f"<Settings> section not found in {directory}/RunManifest.json.\nSkipping RunManifest metrics."
                 )
             else:
-                runs_manifest_data.update(
-                    self._extract_manifest_lane_settings(run_manifest, run_analysis_name)
-                )
+                runs_manifest_data.update(self._extract_manifest_lane_settings(run_manifest, run_analysis_name))
 
             self.add_data_source(f=f, s_name=run_analysis_name, module="bases2fastq")
 
@@ -764,9 +754,7 @@ def _parse_run_manifest_in_project(self, data_source: str) -> Dict[str, Any]:
             if "Settings" not in run_manifest_data:
                 log.warning(f"<Settings> section not found in {run_manifest}.\nSkipping RunManifest metrics.")
             else:
-                project_manifest_data.update(
-                    self._extract_manifest_lane_settings(run_manifest_data, run_analysis_name)
-                )
+                project_manifest_data.update(self._extract_manifest_lane_settings(run_manifest_data, run_analysis_name))
             data_source_info: LoadedFileDict[Any] = {
                 "fn": str(run_manifest.name),
                 "root": str(run_manifest.parent),
@@ -1025,9 +1013,7 @@ def _parse_index_assignment_in_project(self, data_source: str) -> Dict[str, Any]
                 )
                 continue
 
-            run_inner, _ = self._build_index_assignment_from_stats(
-                project_stats, run_analysis_name, project=project
-            )
+            run_inner, _ = self._build_index_assignment_from_stats(project_stats, run_analysis_name, project=project)
             sample_to_index_assignment[run_analysis_name] = run_inner
 
             run_manifest_data = self._read_json_file(run_manifest, base_directory=base_directory)
@@ -1042,9 +1028,7 @@ def _parse_index_assignment_in_project(self, data_source: str) -> Dict[str, Any]
             elif len(sample_to_index_assignment) == 0:
                 log.warning("Index assignment data missing. Skipping creation of index assignment metrics.")
             else:
-                self._merge_manifest_index_sequences(
-                    sample_to_index_assignment, run_manifest_data, run_analysis_name
-                )
+                self._merge_manifest_index_sequences(sample_to_index_assignment, run_manifest_data, run_analysis_name)
 
         return sample_to_index_assignment
 
diff --git a/multiqc/modules/bases2fastq/plot_runs.py b/multiqc/modules/bases2fastq/plot_runs.py
index d1c2e3e524..39499dc723 100644
--- a/multiqc/modules/bases2fastq/plot_runs.py
+++ b/multiqc/modules/bases2fastq/plot_runs.py
@@ -653,12 +653,7 @@ def plot_base_quality_hist(run_data, color_dict):
 
 def plot_base_quality_by_cycle(run_data, color_dict):
     # Prepare plot data for median BQ of each cycle (skip runs without Reads/Cycles)
-    runs_with_reads = [
-        s
-        for s in run_data
-        if _run_has_reads(run_data[s])
-        and run_data[s]["Reads"][0].get("Cycles")
-    ]
+    runs_with_reads = [s for s in run_data if _run_has_reads(run_data[s]) and run_data[s]["Reads"][0].get("Cycles")]
     if not runs_with_reads:
         plot_content: list[Any] = []
         plot_html = linegraph.plot(
@@ -770,7 +765,10 @@ def plot_base_quality_by_cycle(run_data, color_dict):
         paired_end = True if len(run_data[s_name]["Reads"]) > 1 else False
         cycle_dict = dict()
         R1CycleNum = len(run_data[s_name]["Reads"][0]["Cycles"])
-        if not run_data[s_name]["Reads"][0]["Cycles"] or "PercentBelowFilterThreshold" not in run_data[s_name]["Reads"][0]["Cycles"][0]:
+        if (
+            not run_data[s_name]["Reads"][0]["Cycles"]
+            or "PercentBelowFilterThreshold" not in run_data[s_name]["Reads"][0]["Cycles"][0]
+        ):
             continue
         for cycle in run_data[s_name]["Reads"][0]["Cycles"]:
             cycle_no = int(cycle["Cycle"])
diff --git a/multiqc/modules/bases2fastq/tests/test_bases2fastq.py b/multiqc/modules/bases2fastq/tests/test_bases2fastq.py
index 72ab3b4d3f..cc498aa8ae 100644
--- a/multiqc/modules/bases2fastq/tests/test_bases2fastq.py
+++ b/multiqc/modules/bases2fastq/tests/test_bases2fastq.py
@@ -67,9 +67,7 @@ def test_build_index_assignment_from_stats_with_occurrences(self, fixtures_dir):
         project_stats = _load_fixture(
             fixtures_dir, "PairedEndDefaultProject", "Samples", "DefaultProject", "DefaultProject_RunStats.json"
         )
-        run_inner, total = m._build_index_assignment_from_stats(
-            project_stats, "RUN01-a1b2", project="DefaultProject"
-        )
+        run_inner, total = m._build_index_assignment_from_stats(project_stats, "RUN01-a1b2", project="DefaultProject")
         assert total == 100000
         assert "AAATTT" in run_inner
         assert run_inner["AAATTT"]["SamplePolonyCounts"] == 5000
@@ -85,9 +83,7 @@ def test_build_index_assignment_from_stats_project(self, fixtures_dir):
         project_stats = _load_fixture(
             fixtures_dir, "PairedEndDefaultProject", "Samples", "DefaultProject", "DefaultProject_RunStats.json"
         )
-        run_inner, _ = m._build_index_assignment_from_stats(
-            project_stats, "RUN01-a1b2", project="DefaultProject"
-        )
+        run_inner, _ = m._build_index_assignment_from_stats(project_stats, "RUN01-a1b2", project="DefaultProject")
         assert run_inner
         for entry in run_inner.values():
             assert entry.get("Project") == "DefaultProject"
@@ -107,9 +103,7 @@ def test_merge_manifest_index_sequences(self, fixtures_dir):
             fixtures_dir, "PairedEndDefaultProject", "Samples", "DefaultProject", "DefaultProject_RunStats.json"
         )
         run_manifest = _load_fixture(fixtures_dir, "PairedEndDefaultProject", "RunManifest.json")
-        run_inner, _ = m._build_index_assignment_from_stats(
-            project_stats, "RUN01-a1b2", project="DefaultProject"
-        )
+        run_inner, _ = m._build_index_assignment_from_stats(project_stats, "RUN01-a1b2", project="DefaultProject")
         sample_to_index = {"RUN01-a1b2": run_inner}
         m._merge_manifest_index_sequences(sample_to_index, run_manifest, "RUN01-a1b2")
         assert run_inner["AAATTT"]["Index1"] == "AAA"
@@ -139,6 +133,7 @@ def test_parse_run_project_data_min_polonies_filter(self, fixtures_dir):
         report.analysis_files = [str(run_dir)]
         report.search_files(["bases2fastq"])
         import multiqc.modules.bases2fastq.bases2fastq as b2f_mod
+
         with patch.object(b2f_mod, "_get_min_polonies", return_value=100):
             m = MultiqcModule()
         assert len(m.run_level_samples) == 1
@@ -308,9 +303,7 @@ def test_build_index_assignment_no_samplestats_returns_empty(self, fixtures_dir)
         report.analysis_files = [str(fixtures_dir / "PairedEndNoProject")]
         report.search_files(["bases2fastq"])
         m = MultiqcModule()
-        run_inner, total = m._build_index_assignment_from_stats(
-            {"NumPoloniesBeforeTrimming": 1000}, "RUN01-a1b2"
-        )
+        run_inner, total = m._build_index_assignment_from_stats({"NumPoloniesBeforeTrimming": 1000}, "RUN01-a1b2")
         assert run_inner == {}
         assert total == 1000
 
@@ -353,7 +346,9 @@ def test_merge_manifest_run_not_in_assignment_returns_early(self, fixtures_dir):
         m = MultiqcModule()
         sample_to_index = {}
         m._merge_manifest_index_sequences(
-            sample_to_index, {"Samples": [{"SampleName": "S1", "Indexes": [{"Index1": "A", "Index2": "T"}]}]}, "RUN01-a1b2"
+            sample_to_index,
+            {"Samples": [{"SampleName": "S1", "Indexes": [{"Index1": "A", "Index2": "T"}]}]},
+            "RUN01-a1b2",
         )
         assert sample_to_index == {}
 
@@ -416,9 +411,7 @@ def test_project_level_only_produces_sections(self, fixtures_dir, tmp_path):
         )
         manifest = _load_fixture(fixtures_dir, "PairedEndDefaultProject", "RunManifest.json")
         (tmp_path / "Samples" / "DefaultProject").mkdir(parents=True)
-        (tmp_path / "Samples" / "DefaultProject" / "DefaultProject_RunStats.json").write_text(
-            json.dumps(project_stats)
-        )
+        (tmp_path / "Samples" / "DefaultProject" / "DefaultProject_RunStats.json").write_text(json.dumps(project_stats))
         (tmp_path / "RunManifest.json").write_text(json.dumps(manifest))
         report.analysis_files = [str(tmp_path)]
         report.search_files(["bases2fastq"])
@@ -438,8 +431,8 @@ def test_select_data_project_level(self, fixtures_dir):
         report.analysis_files = [str(run_dir)]
         report.search_files(["bases2fastq"])
         m = MultiqcModule()
-        run_data, sample_data, samples_to_projects, manifest_data, index_data, unassigned = m._select_data_by_summary_path(
-            "project_level"
+        run_data, sample_data, samples_to_projects, manifest_data, index_data, unassigned = (
+            m._select_data_by_summary_path("project_level")
         )
         assert run_data is m.project_level_data
         assert sample_data is m.project_level_samples
@@ -451,8 +444,8 @@ def test_select_data_combined_level(self, fixtures_dir):
         report.analysis_files = [str(run_dir)]
         report.search_files(["bases2fastq"])
         m = MultiqcModule()
-        run_data, sample_data, samples_to_projects, manifest_data, index_data, unassigned = m._select_data_by_summary_path(
-            "combined_level"
+        run_data, sample_data, samples_to_projects, manifest_data, index_data, unassigned = (
+            m._select_data_by_summary_path("combined_level")
         )
         assert run_data is m.run_level_data
         assert sample_data is m.project_level_samples

From bd4f5de0c4a097c0d585a3c3fa129cce540ea1c0 Mon Sep 17 00:00:00 2001
From: Carlos Ruiz <carlos.ruiz@elembio.com>
Date: Sat, 21 Feb 2026 14:00:13 -0800
Subject: [PATCH 4/6] Prettier linting

---
 .../PairedEndDefaultProject/RunManifest.json  | 38 ++++++-------
 .../PairedEndDefaultProject/RunStats.json     | 34 +++++------
 .../DefaultProject_RunStats.json              | 46 +++++++--------
 .../fixtures/PairedEndNoProject/RunStats.json | 34 +++++------
 .../RunStats.json                             | 44 +++++++--------
 .../PairedEndNoProjectWithLanes/RunStats.json | 56 +++++++++----------
 .../tests/fixtures/run_runstats.json          |  2 +-
 multiqc/templates/default/package-lock.json   |  5 ++
 8 files changed, 132 insertions(+), 127 deletions(-)

diff --git a/multiqc/modules/bases2fastq/tests/fixtures/PairedEndDefaultProject/RunManifest.json b/multiqc/modules/bases2fastq/tests/fixtures/PairedEndDefaultProject/RunManifest.json
index 4365e3e6cd..0a334a49bf 100644
--- a/multiqc/modules/bases2fastq/tests/fixtures/PairedEndDefaultProject/RunManifest.json
+++ b/multiqc/modules/bases2fastq/tests/fixtures/PairedEndDefaultProject/RunManifest.json
@@ -1,22 +1,22 @@
 {
-    "Settings": [
+  "Settings": [
+    {
+      "Lane": 1,
+      "AdapterTrimType": "Paired-End",
+      "R1AdapterMinimumTrimmedLength": 16,
+      "R2AdapterMinimumTrimmedLength": 16
+    }
+  ],
+  "Samples": [
+    {
+      "SampleName": "Sample1",
+      "Indexes": [
         {
-            "Lane": 1,
-            "AdapterTrimType": "Paired-End",
-            "R1AdapterMinimumTrimmedLength": 16,
-            "R2AdapterMinimumTrimmedLength": 16
+          "Lane": 1,
+          "Index1": "AAA",
+          "Index2": "TTT"
         }
-    ],
-    "Samples": [
-        {
-            "SampleName": "Sample1",
-            "Indexes": [
-                {
-                    "Lane": 1,
-                    "Index1": "AAA",
-                    "Index2": "TTT"
-                }
-            ]
-        }
-    ]
-}
\ No newline at end of file
+      ]
+    }
+  ]
+}
diff --git a/multiqc/modules/bases2fastq/tests/fixtures/PairedEndDefaultProject/RunStats.json b/multiqc/modules/bases2fastq/tests/fixtures/PairedEndDefaultProject/RunStats.json
index e0f2afee0f..50c4a71b92 100644
--- a/multiqc/modules/bases2fastq/tests/fixtures/PairedEndDefaultProject/RunStats.json
+++ b/multiqc/modules/bases2fastq/tests/fixtures/PairedEndDefaultProject/RunStats.json
@@ -1,18 +1,18 @@
 {
-    "RunName": "RUN01",
-    "AnalysisID": "a1b2c3d4e5f6",
-    "NumPolonies": 50000,
-    "AssignedYield": 1.5,
-    "QualityScoreMean": 35,
-    "PercentQ30": 95,
-    "PercentQ40": 90,
-    "PercentAssignedReads": 100.0,
-    "PercentMismatch": 0,
-    "SampleStats": [
-        {
-            "SampleID": "s1",
-            "SampleName": "Sample1",
-            "NumPolonies": 50000
-        }
-    ]
-}
\ No newline at end of file
+  "RunName": "RUN01",
+  "AnalysisID": "a1b2c3d4e5f6",
+  "NumPolonies": 50000,
+  "AssignedYield": 1.5,
+  "QualityScoreMean": 35,
+  "PercentQ30": 95,
+  "PercentQ40": 90,
+  "PercentAssignedReads": 100.0,
+  "PercentMismatch": 0,
+  "SampleStats": [
+    {
+      "SampleID": "s1",
+      "SampleName": "Sample1",
+      "NumPolonies": 50000
+    }
+  ]
+}
diff --git a/multiqc/modules/bases2fastq/tests/fixtures/PairedEndDefaultProject/Samples/DefaultProject/DefaultProject_RunStats.json b/multiqc/modules/bases2fastq/tests/fixtures/PairedEndDefaultProject/Samples/DefaultProject/DefaultProject_RunStats.json
index 29f960f0ec..71a39be1fd 100644
--- a/multiqc/modules/bases2fastq/tests/fixtures/PairedEndDefaultProject/Samples/DefaultProject/DefaultProject_RunStats.json
+++ b/multiqc/modules/bases2fastq/tests/fixtures/PairedEndDefaultProject/Samples/DefaultProject/DefaultProject_RunStats.json
@@ -1,26 +1,26 @@
 {
-    "RunName": "RUN01",
-    "AnalysisID": "a1b2c3d4e5f6",
-    "Project": "DefaultProject",
-    "NumPolonies": 50000,
-    "NumPoloniesBeforeTrimming": 100000,
-    "AssignedYield": 1.5,
-    "QualityScoreMean": 35,
-    "PercentQ30": 95,
-    "PercentQ40": 90,
-    "PercentAssignedReads": 100.0,
-    "PercentMismatch": 0,
-    "SampleStats": [
+  "RunName": "RUN01",
+  "AnalysisID": "a1b2c3d4e5f6",
+  "Project": "DefaultProject",
+  "NumPolonies": 50000,
+  "NumPoloniesBeforeTrimming": 100000,
+  "AssignedYield": 1.5,
+  "QualityScoreMean": 35,
+  "PercentQ30": 95,
+  "PercentQ40": 90,
+  "PercentAssignedReads": 100.0,
+  "PercentMismatch": 0,
+  "SampleStats": [
+    {
+      "SampleID": "s1",
+      "SampleName": "Sample1",
+      "NumPolonies": 50000,
+      "Occurrences": [
         {
-            "SampleID": "s1",
-            "SampleName": "Sample1",
-            "NumPolonies": 50000,
-            "Occurrences": [
-                {
-                    "ExpectedSequence": "AAATTT",
-                    "NumPoloniesBeforeTrimming": 5000
-                }
-            ]
+          "ExpectedSequence": "AAATTT",
+          "NumPoloniesBeforeTrimming": 5000
         }
-    ]
-}
\ No newline at end of file
+      ]
+    }
+  ]
+}
diff --git a/multiqc/modules/bases2fastq/tests/fixtures/PairedEndNoProject/RunStats.json b/multiqc/modules/bases2fastq/tests/fixtures/PairedEndNoProject/RunStats.json
index e0f2afee0f..50c4a71b92 100644
--- a/multiqc/modules/bases2fastq/tests/fixtures/PairedEndNoProject/RunStats.json
+++ b/multiqc/modules/bases2fastq/tests/fixtures/PairedEndNoProject/RunStats.json
@@ -1,18 +1,18 @@
 {
-    "RunName": "RUN01",
-    "AnalysisID": "a1b2c3d4e5f6",
-    "NumPolonies": 50000,
-    "AssignedYield": 1.5,
-    "QualityScoreMean": 35,
-    "PercentQ30": 95,
-    "PercentQ40": 90,
-    "PercentAssignedReads": 100.0,
-    "PercentMismatch": 0,
-    "SampleStats": [
-        {
-            "SampleID": "s1",
-            "SampleName": "Sample1",
-            "NumPolonies": 50000
-        }
-    ]
-}
\ No newline at end of file
+  "RunName": "RUN01",
+  "AnalysisID": "a1b2c3d4e5f6",
+  "NumPolonies": 50000,
+  "AssignedYield": 1.5,
+  "QualityScoreMean": 35,
+  "PercentQ30": 95,
+  "PercentQ40": 90,
+  "PercentAssignedReads": 100.0,
+  "PercentMismatch": 0,
+  "SampleStats": [
+    {
+      "SampleID": "s1",
+      "SampleName": "Sample1",
+      "NumPolonies": 50000
+    }
+  ]
+}
diff --git a/multiqc/modules/bases2fastq/tests/fixtures/PairedEndNoProjectLowPolonies/RunStats.json b/multiqc/modules/bases2fastq/tests/fixtures/PairedEndNoProjectLowPolonies/RunStats.json
index feef607045..c56f7c5ef6 100644
--- a/multiqc/modules/bases2fastq/tests/fixtures/PairedEndNoProjectLowPolonies/RunStats.json
+++ b/multiqc/modules/bases2fastq/tests/fixtures/PairedEndNoProjectLowPolonies/RunStats.json
@@ -1,23 +1,23 @@
 {
-    "RunName": "RUN01",
-    "AnalysisID": "a1b2c3d4e5f6",
-    "NumPolonies": 50050,
-    "AssignedYield": 1.5,
-    "QualityScoreMean": 35,
-    "PercentQ30": 95,
-    "PercentQ40": 90,
-    "PercentAssignedReads": 100.0,
-    "PercentMismatch": 0,
-    "SampleStats": [
-        {
-            "SampleID": "s1",
-            "SampleName": "Sample1",
-            "NumPolonies": 50
-        },
-        {
-            "SampleID": "s2",
-            "SampleName": "Sample2",
-            "NumPolonies": 50000
-        }
-    ]
-}
\ No newline at end of file
+  "RunName": "RUN01",
+  "AnalysisID": "a1b2c3d4e5f6",
+  "NumPolonies": 50050,
+  "AssignedYield": 1.5,
+  "QualityScoreMean": 35,
+  "PercentQ30": 95,
+  "PercentQ40": 90,
+  "PercentAssignedReads": 100.0,
+  "PercentMismatch": 0,
+  "SampleStats": [
+    {
+      "SampleID": "s1",
+      "SampleName": "Sample1",
+      "NumPolonies": 50
+    },
+    {
+      "SampleID": "s2",
+      "SampleName": "Sample2",
+      "NumPolonies": 50000
+    }
+  ]
+}
diff --git a/multiqc/modules/bases2fastq/tests/fixtures/PairedEndNoProjectWithLanes/RunStats.json b/multiqc/modules/bases2fastq/tests/fixtures/PairedEndNoProjectWithLanes/RunStats.json
index 1ff24f6fdc..69269bfe48 100644
--- a/multiqc/modules/bases2fastq/tests/fixtures/PairedEndNoProjectWithLanes/RunStats.json
+++ b/multiqc/modules/bases2fastq/tests/fixtures/PairedEndNoProjectWithLanes/RunStats.json
@@ -1,31 +1,31 @@
 {
-    "RunName": "RUN01",
-    "AnalysisID": "a1b2c3d4e5f6",
-    "NumPolonies": 50000,
-    "NumPoloniesBeforeTrimming": 100000,
-    "AssignedYield": 1.5,
-    "QualityScoreMean": 35,
-    "PercentQ30": 95,
-    "PercentQ40": 90,
-    "PercentAssignedReads": 100.0,
-    "PercentMismatch": 0,
-    "SampleStats": [
+  "RunName": "RUN01",
+  "AnalysisID": "a1b2c3d4e5f6",
+  "NumPolonies": 50000,
+  "NumPoloniesBeforeTrimming": 100000,
+  "AssignedYield": 1.5,
+  "QualityScoreMean": 35,
+  "PercentQ30": 95,
+  "PercentQ40": 90,
+  "PercentAssignedReads": 100.0,
+  "PercentMismatch": 0,
+  "SampleStats": [
+    {
+      "SampleID": "s1",
+      "SampleName": "Sample1",
+      "NumPolonies": 50000
+    }
+  ],
+  "Lanes": [
+    {
+      "Lane": 1,
+      "UnassignedSequences": [
         {
-            "SampleID": "s1",
-            "SampleName": "Sample1",
-            "NumPolonies": 50000
+          "I1": "AAA",
+          "I2": "TTT",
+          "Count": 100
         }
-    ],
-    "Lanes": [
-        {
-            "Lane": 1,
-            "UnassignedSequences": [
-                {
-                    "I1": "AAA",
-                    "I2": "TTT",
-                    "Count": 100
-                }
-            ]
-        }
-    ]
-}
\ No newline at end of file
+      ]
+    }
+  ]
+}
diff --git a/multiqc/modules/bases2fastq/tests/fixtures/run_runstats.json b/multiqc/modules/bases2fastq/tests/fixtures/run_runstats.json
index 7e1812b321..50c4a71b92 100644
--- a/multiqc/modules/bases2fastq/tests/fixtures/run_runstats.json
+++ b/multiqc/modules/bases2fastq/tests/fixtures/run_runstats.json
@@ -15,4 +15,4 @@
       "NumPolonies": 50000
     }
   ]
-}
\ No newline at end of file
+}
diff --git a/multiqc/templates/default/package-lock.json b/multiqc/templates/default/package-lock.json
index 4041ea6cf3..38246e4ddf 100644
--- a/multiqc/templates/default/package-lock.json
+++ b/multiqc/templates/default/package-lock.json
@@ -806,6 +806,7 @@
       "integrity": "sha512-P1st0aksCrn9sGZhp8GMYwBnQsbvAWsZAX44oXNNvLHGqAOcoVxmjZiohstwQ7SqKnbR47akdNi+uleWD8+g6A==",
       "dev": true,
       "license": "MIT",
+      "peer": true,
       "funding": {
         "type": "opencollective",
         "url": "https://opencollective.com/popperjs"
@@ -1454,6 +1455,7 @@
       "integrity": "sha512-xCmtksBKd/jdJ9Bt9p7nPKiuqrlBMBuuGkQlkhZjjQk3Ty48lv93k5Dq6OPkKt4XwxDJ7tvlfrTa1MPA9bf+QA==",
       "dev": true,
       "license": "MIT",
+      "peer": true,
       "dependencies": {
         "chokidar": "^4.0.0",
         "immutable": "^5.0.2",
@@ -1506,6 +1508,7 @@
       "integrity": "sha512-+6erLbBm0+LROX2sPXlUYx/ux5PyE9K/a92Wrt6oA+WDAoFTdpHE5tCYCI5PNzq2y8df4rA+QgHLJuR4jNymsg==",
       "dev": true,
       "license": "BSD-2-Clause",
+      "peer": true,
       "dependencies": {
         "@jridgewell/source-map": "^0.3.3",
         "acorn": "^8.14.0",
@@ -1557,6 +1560,7 @@
       "integrity": "sha512-M7BAV6Rlcy5u+m6oPhAPFgJTzAioX/6B0DxyvDlo9l8+T3nLKbrczg2WLUyzd45L8RqfUMyGPzekbMvX2Ldkwg==",
       "dev": true,
       "license": "MIT",
+      "peer": true,
       "engines": {
         "node": ">=12"
       },
@@ -1674,6 +1678,7 @@
       "integrity": "sha512-5gTmgEY/sqK6gFXLIsQNH19lWb4ebPDLA4SdLP7dsWkIXHWlG66oPuVvXSGFPppYZz8ZDZq0dYYrbHfBCVUb1Q==",
       "dev": true,
       "license": "MIT",
+      "peer": true,
       "engines": {
         "node": ">=12"
       },

From 9477572b7bd26908b2ba57a2928e66f6fa66990a Mon Sep 17 00:00:00 2001
From: Carlos Ruiz <carlos.ruiz@elembio.com>
Date: Sat, 21 Feb 2026 14:15:47 -0800
Subject: [PATCH 5/6] Reverted change

---
 multiqc/modules/bases2fastq/tests/test_bases2fastq.py | 6 +++---
 multiqc/templates/default/package-lock.json           | 5 -----
 2 files changed, 3 insertions(+), 8 deletions(-)

diff --git a/multiqc/modules/bases2fastq/tests/test_bases2fastq.py b/multiqc/modules/bases2fastq/tests/test_bases2fastq.py
index cc498aa8ae..08ad6fe42e 100644
--- a/multiqc/modules/bases2fastq/tests/test_bases2fastq.py
+++ b/multiqc/modules/bases2fastq/tests/test_bases2fastq.py
@@ -489,10 +489,10 @@ def test_module_run_with_test_data(self, data_dir):
         config.strict = True
         m = MultiqcModule()
         # Test-data has multiple run roots (WGS, WES, PairedEndNoProject, PairedEndDefaultProject, etc.)
-        assert len(m.run_level_data) >= 2, "expected at least 2 runs from test-data"
-        # At least one project-level layout (PairedEndDefaultProject* or PairedEndProjects)
-        assert len(m.project_level_data) >= 1, "expected at least 1 project from test-data"
         total_samples = len(m.run_level_samples) + len(m.project_level_samples)
+        assert len(m.run_level_data) >= 2 or len(m.project_level_data) >= 1, (
+            "expected at least 2 runs or at least 1 project from test-data"
+        )
         assert total_samples >= 10, "expected at least 10 samples from test-data"
         # Module must produce output (general stats and/or sections)
         assert len(report.general_stats_data) > 0 or len(m.sections) > 0, (
diff --git a/multiqc/templates/default/package-lock.json b/multiqc/templates/default/package-lock.json
index 38246e4ddf..4041ea6cf3 100644
--- a/multiqc/templates/default/package-lock.json
+++ b/multiqc/templates/default/package-lock.json
@@ -806,7 +806,6 @@
       "integrity": "sha512-P1st0aksCrn9sGZhp8GMYwBnQsbvAWsZAX44oXNNvLHGqAOcoVxmjZiohstwQ7SqKnbR47akdNi+uleWD8+g6A==",
       "dev": true,
       "license": "MIT",
-      "peer": true,
       "funding": {
         "type": "opencollective",
         "url": "https://opencollective.com/popperjs"
@@ -1455,7 +1454,6 @@
       "integrity": "sha512-xCmtksBKd/jdJ9Bt9p7nPKiuqrlBMBuuGkQlkhZjjQk3Ty48lv93k5Dq6OPkKt4XwxDJ7tvlfrTa1MPA9bf+QA==",
       "dev": true,
       "license": "MIT",
-      "peer": true,
       "dependencies": {
         "chokidar": "^4.0.0",
         "immutable": "^5.0.2",
@@ -1508,7 +1506,6 @@
       "integrity": "sha512-+6erLbBm0+LROX2sPXlUYx/ux5PyE9K/a92Wrt6oA+WDAoFTdpHE5tCYCI5PNzq2y8df4rA+QgHLJuR4jNymsg==",
       "dev": true,
       "license": "BSD-2-Clause",
-      "peer": true,
       "dependencies": {
         "@jridgewell/source-map": "^0.3.3",
         "acorn": "^8.14.0",
@@ -1560,7 +1557,6 @@
       "integrity": "sha512-M7BAV6Rlcy5u+m6oPhAPFgJTzAioX/6B0DxyvDlo9l8+T3nLKbrczg2WLUyzd45L8RqfUMyGPzekbMvX2Ldkwg==",
       "dev": true,
       "license": "MIT",
-      "peer": true,
       "engines": {
         "node": ">=12"
       },
@@ -1678,7 +1674,6 @@
       "integrity": "sha512-5gTmgEY/sqK6gFXLIsQNH19lWb4ebPDLA4SdLP7dsWkIXHWlG66oPuVvXSGFPppYZz8ZDZq0dYYrbHfBCVUb1Q==",
       "dev": true,
       "license": "MIT",
-      "peer": true,
       "engines": {
         "node": ">=12"
       },

From 549c84360ba56d8f1c3d9b96e7b476d465030692 Mon Sep 17 00:00:00 2001
From: Carlos Ruiz <carlos.ruiz@elembio.com>
Date: Sat, 21 Feb 2026 15:04:18 -0800
Subject: [PATCH 6/6] Exclude node_modules from shutil copy

---
 multiqc/core/write_results.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/multiqc/core/write_results.py b/multiqc/core/write_results.py
index 6605966a06..d086bbe02c 100644
--- a/multiqc/core/write_results.py
+++ b/multiqc/core/write_results.py
@@ -516,13 +516,16 @@ def _write_html_report(to_stdout: bool, report_path: Optional[Path], return_html
             parent_template.template_dir,
             tmp_dir.get_tmp_dir(),
             dirs_exist_ok=True,
-            ignore=shutil.ignore_patterns("*.pyc"),
+            ignore=shutil.ignore_patterns("*.pyc", "node_modules"),
         )
 
     # Copy the template files to the tmp directory (`dirs_exist_ok` makes sure
     # parent template files are overwritten)
     shutil.copytree(
-        template_mod.template_dir, tmp_dir.get_tmp_dir(), dirs_exist_ok=True, ignore=shutil.ignore_patterns("*.pyc")
+        template_mod.template_dir,
+        tmp_dir.get_tmp_dir(),
+        dirs_exist_ok=True,
+        ignore=shutil.ignore_patterns("*.pyc", "node_modules"),
     )
 
     # Function to include file contents in Jinja template