From 299123952b6b97301ace22059779f6ec2b03de40 Mon Sep 17 00:00:00 2001
From: Carlos Ruiz <carlos.ruiz@elembio.com>
Date: Sat, 20 Sep 2025 23:50:39 -0700
Subject: [PATCH 01/29] updates to b2f report

---
 docs/markdown/modules/bases2fastq.md          |   4 +
 multiqc/modules/bases2fastq/bases2fastq.py    | 403 +++++++++---------
 .../modules/bases2fastq/plot_project_runs.py  |  88 ----
 multiqc/modules/bases2fastq/plot_runs.py      | 208 ++++++++-
 multiqc/modules/bases2fastq/plot_samples.py   | 116 ++++-
 multiqc/search_patterns.yaml                  |   4 +
 6 files changed, 494 insertions(+), 329 deletions(-)
 delete mode 100644 multiqc/modules/bases2fastq/plot_project_runs.py

diff --git a/docs/markdown/modules/bases2fastq.md b/docs/markdown/modules/bases2fastq.md
index 2ce4857320..e32db7b5a8 100644
--- a/docs/markdown/modules/bases2fastq.md
+++ b/docs/markdown/modules/bases2fastq.md
@@ -31,4 +31,8 @@ bases2fastq/run:
   contents: SampleStats
   fn: RunStats.json
   num_lines: 100
+bases2fastq/manifest:
+  contents: Settings
+  fn: RunManifest.json
+  num_lines: 100
 ```
diff --git a/multiqc/modules/bases2fastq/bases2fastq.py b/multiqc/modules/bases2fastq/bases2fastq.py
index 132387a226..261a972565 100644
--- a/multiqc/modules/bases2fastq/bases2fastq.py
+++ b/multiqc/modules/bases2fastq/bases2fastq.py
@@ -1,8 +1,10 @@
+from collections import defaultdict
 import copy
 import csv
 import json
 import logging
 import random
+from typing import Any, Dict, List
 import uuid
 
 from multiqc.base_module import BaseMultiqcModule, ModuleNoSamplesFound
@@ -11,16 +13,18 @@
 from multiqc.modules.bases2fastq.plot_runs import (
     plot_run_stats,
     tabulate_run_stats,
+    tabulate_project_stats,
     plot_base_quality_hist,
     plot_base_quality_by_cycle,
+    plot_lane_cycle_stats,
 )
-from multiqc.modules.bases2fastq.plot_project_runs import tabulate_project_run_stats
 from multiqc.modules.bases2fastq.plot_samples import (
     tabulate_sample_stats,
     sequence_content_plot,
     plot_per_cycle_N_content,
     plot_adapter_content,
     plot_per_read_gc_hist,
+    plot_sample_read_length,
 )
 
 log = logging.getLogger(__name__)
@@ -39,252 +43,245 @@ def __init__(self):
             doi="10.1038/s41587-023-01750-7",
         )
 
+        # Initialize run, project and sample level structures
+        self.run_level_data = {}
+        self.run_level_samples = {}
+        self.run_level_samples_to_project = {}
+        self.project_level_data = {}
+        self.project_level_samples = {}
+        self.project_level_samples_to_project = {}
+        num_run_level_samples = 0
+        num_project_level_samples = 0
+
+        # Initialize run and project groups
+        self.group_dict = dict()
+        self.group_lookup_dict = dict()
+        self.project_lookup_dict = dict()
+
+
         self.b2f_sample_data = dict()
         self.b2f_run_data = dict()
         self.b2f_run_project_data = dict()
+        self.b2f_run_project_sample_data = dict()
         self.missing_runs = set()
         self.sample_id_to_run = dict()
 
-        # Group by run name
-        self.group_dict = dict()
-        self.group_lookup_dict = dict()
-        self.project_lookup_dict = dict()
+        # Define if call is project- or run-level
+        run_level_log_files = len(list(self.find_log_files("bases2fastq/run")))
+        project_level_log_files = len(list(self.find_log_files("bases2fastq/project")))
+        
+        if run_level_log_files == 0 and project_level_log_files == 0:
+            error_msg = "No run- or project-level log files found within the Bases2Fastq results."
+            log.error(error_msg)
+            raise ModuleNoSamplesFound(error_msg)
+        
+        # Parse data
+        if run_level_log_files > 0:
+            (
+                self.run_level_data, self.run_level_samples, self.run_level_samples_to_project
+            ) = self._parse_run_project_data("bases2fastq/run")
+        if project_level_log_files > 0:
+            (
+                self.project_level_data, self.project_level_samples, self.project_level_samples_to_project
+            ) = self._parse_run_project_data("bases2fastq/project")
+
+        # Get run- and project-level samples
+        for data in self.run_level_samples.values():
+            num_run_level_samples += len(data.keys())
+        for data in self.project_level_samples.values():
+            num_project_level_samples += len(data.keys())
+
+        # Ensure run/sample data found
+        if all([
+            len(self.run_level_data) == 0,
+            num_run_level_samples == 0,
+            len(self.project_level_data),
+            num_project_level_samples == 0,
+        ]):
+            error_msg = "No run-, project- or sample-level data found"
+            log.error(error_msg)
+            raise ModuleNoSamplesFound(error_msg)
+        
+        # Log runs, projects and samples found
+        log.info(f"Found {len(self.run_level_data)} run(s) within the Bases2Fastq results.")
+        log.info(f"Found {len(self.project_level_data)} project(s) within the Bases2Fastq results.")
+        log.info(f"Found {num_project_level_samples} sample(s) within the Bases2Fastq results.")
+
+        # Superfluous function call to confirm that it is used in this module
+        self.add_software_version(None)
+
+        # Warn user if run-level/project-level or sample-level metrics were not found
+        if len(self.run_level_data) == 0 and len(self.project_level_data) == 0:
+            log.warning("No run/project stats found!")
+        if num_project_level_samples == 0:
+            log.warning("No sample stats found!")
+        
+        # Choose path to take, if project use only project-level data, otherwise use run- and project-level
+        summary_path = ""
+        if len(self.run_level_data) > 0 and len(self.project_level_data) == 0:
+            summary_path = "run_level"
+        if len(self.run_level_data) == 0 and len(self.project_level_data) > 0:
+            summary_path = "project_level"
+        elif len(self.run_level_data) > 0 and len(self.project_level_data) > 0:
+            summary_path = "combined_level"
+        
+        # Define data to use
+        run_data = {}
+        sample_data = {}
+        samples_to_projects = {}
+        if summary_path == "run_level":
+            run_data = self.run_level_data
+            sample_data = self.project_level_samples
+            samples_to_projects = self.run_level_samples_to_project
+        elif summary_path == "project_level":
+            run_data = self.project_level_data
+            sample_data = self.project_level_samples
+            samples_to_projects = self.project_level_samples_to_project
+        elif summary_path == "combined_level":
+            run_data = self.run_level_data
+            sample_data = self.project_level_samples
+            samples_to_projects = self.project_level_samples_to_project
+        else:
+            error_msg = "No run- or project-level data was retained. No report will be generated."
+            log.error(error_msg)
+            return
+
+        # Create run and project groups
+        run_groups = defaultdict(list)
+        project_groups = defaultdict(list)
+        sample_to_run_group = {}
+        for sample in sample_data.keys():
+            (_run_name, _) = sample.split("__")
+            run_groups[_run_name].append(sample)
+            sample_to_run_group[sample] = _run_name
+            sample_project = samples_to_projects[sample]
+            project_groups[sample_project].append(sample)
+        merged_groups = dict(run_groups) | dict(project_groups)
+
+        # Assign color for each group
+        self.color_getter = mqc_colour.mqc_colour_scale()
+        self.palette = sum(
+            [
+                self.color_getter.get_colours(hue)
+                for hue in ["Set2", "Pastel1", "Accent", "Set1", "Set3", "Dark2", "Paired", "Pastel2"]
+            ],
+            [],
+        )
+        if len(merged_groups) > len(self.palette):
+            hex_range = 2**24
+            extra_colors = [hex(random.randrange(0, hex_range)) for _ in range(len(merged_groups), len(self.palette))]
+            self.palette = self.palette + extra_colors
+        self.group_color = {g: c for g, c in zip(merged_groups.keys(), self.palette[: len(merged_groups)])}
+        self.sample_color = dict()
+        for s_name in samples_to_projects.keys():
+            self.sample_color.update({s_name: self.group_color[samples_to_projects[s_name]]})
+        self.run_color = copy.deepcopy(self.group_color)  # Make sure that run colors and group colors match
+        self.palette = self.palette[len(merged_groups) :]
 
-        # bases2fastq/run
-        num_runs = 0
-        num_samples = 0
-        for f in self.find_log_files("bases2fastq/run"):
+
+        # Plot metrics
+        qc_metrics_function = (
+            tabulate_run_stats if summary_path in ["run_level", "combined_level"] else tabulate_project_stats
+        )
+        self.add_run_plots(
+            data=run_data,
+            plot_functions=[
+                qc_metrics_function,
+                plot_lane_cycle_stats,
+                plot_run_stats,
+                plot_base_quality_hist,
+                plot_base_quality_by_cycle
+            ]
+        )
+        self.add_sample_plots(
+            data=sample_data, group_lookup=samples_to_projects, project_lookup=samples_to_projects
+        )
+
+    def get_uuid(self):
+        return str(uuid.uuid4()).replace("-", "").lower()
+
+    def _parse_run_project_data(self, data_source: str) -> List[Dict[str, Any]]:
+        runs_global_data = {}
+        runs_sample_data = {}
+        sample_to_project = {}
+        if data_source == "":
+            return [runs_global_data, runs_sample_data, sample_to_project]
+
+        for f in self.find_log_files(data_source):
             data = json.loads(f["f"])
 
+            # Copy incomind data and reset samples to include only desired
+            data_to_return = copy.deepcopy(data)
+            data_to_return["SampleStats"] = []
+
             # get run + analysis
             run_name = data.get("RunName", None)
             analysis_id = data.get("AnalysisID", None)[0:4]
 
             if not run_name or not analysis_id:
-                log.error("Error with RunStats.json. Either RunName or AnalysisID is absent.")
                 log.error(
-                    "Please visit Elembio online documentation for more information - https://docs.elembio.io/docs/bases2fastq/introduction/"
+                    "Error with RunStats.json. Either RunName or AnalysisID is absent.\n"
+                    "Please visit Elembio online documentation for more information - "
+                    "https://docs.elembio.io/docs/bases2fastq/introduction/"
                 )
                 continue
-
+        
             run_analysis_name = "-".join([run_name, analysis_id])
             run_analysis_name = self.clean_s_name(run_analysis_name, f)
 
+            # skip run if in user provider ignore list
+            if self.is_ignore_sample(run_analysis_name):
+                log.info(
+                    f"Skipping <{run_analysis_name}> because it is present in ignore list."
+                )
+                continue
+
+            # Check run is present in the final dictionaries
+            if run_analysis_name not in runs_global_data:
+                runs_global_data[run_analysis_name] = data_to_return
+
+            project = self.clean_s_name(data.get("Project", "DefaultProject"), f)
+
             # map sample UUIDs to run_analysis_name
             for sample_data in data["SampleStats"]:
                 sample_id = sample_data["SampleID"]
                 sample_name = sample_data["SampleName"]
                 sample_data["RunName"] = run_name
-
                 run_analysis_sample_name = "__".join([run_analysis_name, sample_name])
 
                 num_polonies = sample_data["NumPolonies"]
-                if num_polonies < MIN_POLONIES:
+                if num_polonies < 1000:
                     log.warning(
-                        f"Skipping {run_analysis_sample_name} because it has <{MIN_POLONIES} assigned reads [n={num_polonies}]."
+                        f"Skipping {run_analysis_sample_name} because it has"
+                        f" <{MIN_POLONIES} assigned reads [n={num_polonies}]."
                     )
                     continue
 
                 # skip run if in user provider ignore list
-                if self.is_ignore_sample(sample_id):
-                    continue
-                if self.is_ignore_sample(run_analysis_sample_name):
+                if self.is_ignore_sample(sample_id) or self.is_ignore_sample(run_analysis_sample_name):
+                    log.info(
+                        f"Skipping <{sample_id}> ({run_analysis_sample_name}) because it is present in ignore list."
+                    )
                     continue
 
-                self.sample_id_to_run[sample_id] = run_analysis_name
-                self.b2f_sample_data[run_analysis_sample_name] = sample_data
-                num_samples += 1
-
-            # skip run if in user provider ignore list
-            if self.is_ignore_sample(run_analysis_name):
-                continue
+                # If sample passes all checks add it back
+                runs_sample_data[run_analysis_sample_name] = sample_data
+                sample_to_project[run_analysis_sample_name] = project
 
-            num_runs += 1
-            self.b2f_run_data[run_analysis_name] = data
             self.add_data_source(f=f, s_name=run_analysis_name, module="bases2fastq")
 
-        # Checking if run lengths configurations are the same for all samples.
-        self.run_r1r2_lens = []
-        for s in self.b2f_run_data.keys():
-            read_lens = str(len(self.b2f_run_data[s]["Reads"][0]["Cycles"]))
-            if len(self.b2f_run_data[s]["Reads"]) > 1:
-                read_lens += "+" + str(len(self.b2f_run_data[s]["Reads"][1]["Cycles"]))
-            self.run_r1r2_lens.append(read_lens)
-
-        run_r1r2_lens_dict = {}
-        for nn, rl in enumerate(self.run_r1r2_lens):
-            if not run_r1r2_lens_dict.get(rl):
-                run_r1r2_lens_dict[rl] = []
-            run_r1r2_lens_dict[rl].append(list(self.b2f_run_data.keys())[nn])
-
-        #
-        # bases2fastq/project
-        #
-        num_projects = 0
-        for f in self.find_log_files("bases2fastq/project"):
-            data = json.loads(f["f"])
-            samples = data["Samples"]
-
-            # get run + analysis
-            run_name = data.get("RunName", None)
-            analysis_id = data.get("AnalysisID", None)[0:4]
-
-            run_analysis_name = "-".join([run_name, analysis_id])
-            run_analysis_name = self.clean_s_name(run_analysis_name, f)
-
-            if not run_name or not analysis_id:
-                log.error(f"Error with {f['root']}.  Either RunName or AnalysisID is absent.")
-                log.error("Please visit Elembio online documentation for more information -")
-                continue
-
-            project = self.clean_s_name(data.get("Project", "DefaultProject"), f)
-
-            run_analysis_project_name = "__".join([run_name, project, analysis_id])
-            run_analysis_project_name = self.clean_s_name(run_analysis_project_name, f)
-
-            # skip project if in user provider ignore list
-            if self.is_ignore_sample(run_analysis_project_name):
-                continue
-
-            for sample_name in samples:
-                run_analysis_sample_name = self.clean_s_name("__".join([run_analysis_name, sample_name]), f)
-                self.project_lookup_dict[run_analysis_sample_name] = project
-            num_projects += 1
-
-            # remove samples
-            del data["Samples"]
-
-            self.b2f_run_project_data[run_analysis_project_name] = data
-            self.add_data_source(f=f, s_name=project, module="bases2fastq")
-
-        # if all RunStats.json too large, none will be found.  Guide customer and Exit at this point.
-        if len(self.sample_id_to_run) != 0:
-            log.info(f"Found {num_runs} total RunStats.json")
-
-        # ensure run/sample data found
-        if num_projects == 0 and num_samples == 0:
-            raise ModuleNoSamplesFound
-        log.info(f"Found {num_samples} samples and {num_projects} projects within the bases2fastq results")
-
-        # Superfluous function call to confirm that it is used in this module
-        self.add_software_version(None)
-
-        # process groups / projects
-        for s_name in self.b2f_sample_data.keys():
-            s_group = self.b2f_sample_data[s_name]["RunName"]
-
-            if not self.group_dict.get(s_group):
-                self.group_dict.update({s_group: []})
+        return [runs_global_data, runs_sample_data, sample_to_project]
 
-            self.group_dict[s_group].append(s_name)
-            self.group_lookup_dict.update({s_name: s_group})
-
-        # Assign project
-        for s_name in self.b2f_sample_data.keys():
-            if self.project_lookup_dict.get(s_name):
-                s_group = self.project_lookup_dict[s_name]
-                if not self.group_dict.get(s_group):
-                    self.group_dict.update({s_group: []})
-                self.group_dict[s_group].append(s_name)
-                self.group_lookup_dict.update({s_name: s_group})
-
-        # Assign color for each group
-        self.color_getter = mqc_colour.mqc_colour_scale()
-        self.palette = sum(
-            [
-                self.color_getter.get_colours(hue)
-                for hue in ["Set2", "Pastel1", "Accent", "Set1", "Set3", "Dark2", "Paired", "Pastel2"]
-            ],
-            [],
-        )
-        if len(self.group_dict) > len(self.palette):
-            hex_range = 2**24
-            extra_colors = [hex(random.randrange(0, hex_range)) for _ in range(len(self.group_dict), len(self.palette))]
-            self.palette = self.palette + extra_colors
-        self.group_color = {g: c for g, c in zip(self.group_dict.keys(), self.palette[: len(self.group_dict)])}
-        self.sample_color = dict()
-        for s_name in self.b2f_sample_data.keys():
-            self.sample_color.update({s_name: self.group_color[self.group_lookup_dict[s_name]]})
-        self.run_color = copy.deepcopy(self.group_color)  # Make sure that run colors and group colors match
-        self.palette = self.palette[len(self.group_dict) :]
-
-        # Read custom group info
-        self.group_info_exist = False
-        for f in self.find_log_files("bases2fastq/group"):
-            if self.group_info_exist:
-                log.warning(
-                    "More than one group assignment files are found. Please only keep "
-                    "one assignment file in the analysis folder. Bases2Fastq stats will "
-                    "not be plotted"
-                )
-            for row in csv.DictReader(f["f"]):
-                s_group = row["Group"]
-                s_name = row["Sample Name"]
-                if self.group_dict.get(s_group) is None:
-                    self.group_dict[s_group] = []
-                self.group_dict[s_group].append(s_name)
-                self.group_lookup_dict[s_name] = s_group
-        for group in self.group_dict.keys():
-            if group not in self.run_color:
-                if len(self.palette) > 0:
-                    self.group_color[group] = self.palette.pop(0)
-                else:
-                    hex_range = 2**24
-                    extra_color = hex(random.randrange(0, hex_range))
-                    self.group_color[group] = extra_color
-        self.sample_color = dict()
-        for s_name in self.b2f_sample_data.keys():
-            self.sample_color.update({s_name: self.group_color[self.group_lookup_dict[s_name]]})
-
-        # sort run
-        data_keys = list(self.b2f_run_data.keys())
-        data_keys.sort()
-        sorted_data = {s_name: self.b2f_run_data[s_name] for s_name in data_keys}
-        self.b2f_run_data = sorted_data
-        # sort projects
-        data_keys = list(self.b2f_run_project_data.keys())
-        data_keys.sort()
-        sorted_data = {s_name: self.b2f_run_project_data[s_name] for s_name in data_keys}
-        self.b2f_run_project_data = sorted_data
-        # sort samples
-        data_keys = list(self.b2f_sample_data.keys())
-        sorted_keys = sorted(data_keys, key=lambda x: (self.group_lookup_dict[x], x))
-        sorted_data = {s_name: self.b2f_sample_data[s_name] for s_name in sorted_keys}
-        self.b2f_sample_data = sorted_data
-
-        if len(self.b2f_run_data) == 0:
-            log.warning("No run stats file found!")
-        if len(self.b2f_sample_data) == 0:
-            log.warning("No sample stats file found!")
-
-        # Add sections
-        self.add_run_plots()
-        if num_projects > 0:
-            self.add_project_run_plots()
-        self.add_sample_plots()
-
-    def get_uuid(self):
-        return str(uuid.uuid4()).replace("-", "").lower()
-
-    def add_run_plots(self):
-        plot_functions = [tabulate_run_stats, plot_run_stats, plot_base_quality_hist, plot_base_quality_by_cycle]
+    def add_run_plots(self, data, plot_functions):
         for func in plot_functions:
-            plot_html, plot_name, anchor, description, helptext, plot_data = func(self.b2f_run_data, self.run_color)
+            plot_html, plot_name, anchor, description, helptext, plot_data = func(data, self.run_color)
             self.add_section(name=plot_name, plot=plot_html, anchor=anchor, description=description, helptext=helptext)
             self.write_data_file(plot_data, f"base2fastq:{plot_name}")
 
-    def add_project_run_plots(self):
-        plot_functions = [tabulate_project_run_stats]
-        for func in plot_functions:
-            plot_html, plot_name, anchor, description, helptext, plot_data = func(
-                self.b2f_run_project_data, self.run_color
-            )
-            self.add_section(name=plot_name, plot=plot_html, anchor=anchor, description=description, helptext=helptext)
-            self.write_data_file(plot_data, f"base2fastq_projects:{plot_name}")
-
-    def add_sample_plots(self):
+    def add_sample_plots(self, data, group_lookup, project_lookup):
         plot_functions = [
             tabulate_sample_stats,
+            plot_sample_read_length,
             sequence_content_plot,
             plot_per_cycle_N_content,
             plot_adapter_content,
@@ -292,7 +289,7 @@ def add_sample_plots(self):
         ]
         for func in plot_functions:
             plot_html, plot_name, anchor, description, helptext, plot_data = func(
-                self.b2f_sample_data, self.group_lookup_dict, self.project_lookup_dict, self.sample_color
+                data, group_lookup, project_lookup, self.sample_color
             )
             self.add_section(name=plot_name, plot=plot_html, anchor=anchor, description=description, helptext=helptext)
             self.write_data_file(plot_data, f"base2fastq:{plot_name}")
diff --git a/multiqc/modules/bases2fastq/plot_project_runs.py b/multiqc/modules/bases2fastq/plot_project_runs.py
deleted file mode 100644
index 6a3663535d..0000000000
--- a/multiqc/modules/bases2fastq/plot_project_runs.py
+++ /dev/null
@@ -1,88 +0,0 @@
-from multiqc.plots import table
-from multiqc import config
-
-"""
-Functions for plotting per run information of bases2fastq
-"""
-
-
-def tabulate_project_run_stats(run_data, color_dict):
-    """
-    Tabulate general information and statistics of each run
-    """
-    plot_content = dict()
-    for s_name in run_data.keys():
-        run_stats = dict()
-        run_stats.update({"num_polonies_run": int(run_data[s_name]["NumPolonies"])})
-        run_stats.update({"yield_run": run_data[s_name]["AssignedYield"]})
-        run_stats.update({"mean_base_quality_run": run_data[s_name]["QualityScoreMean"]})
-        run_stats.update({"percent_q30_run": run_data[s_name]["PercentQ30"]})
-        run_stats.update({"percent_q40_run": run_data[s_name]["PercentQ40"]})
-        plot_content.update({s_name: run_stats})
-
-    headers = {}
-    headers["num_polonies_run"] = {
-        "title": f"# Polonies ({config.base_count_prefix})",
-        "description": f"The total number of polonies that are calculated for the run ({config.base_count_desc})",
-        "min": 0,
-        "scale": "RdYlGn",
-        "shared_key": "base_count",
-    }
-    headers["percent_assigned_run"] = {
-        "title": "% Assigned Reads",
-        "description": "The percentage of reads assigned to sample(s)",
-        "max": 100,
-        "min": 0,
-        "scale": "BuPu",
-        "suffix": "%",
-    }
-    headers["yield_run"] = {
-        "title": "Assigned Yield (Gb)",
-        "description": "The run yield based on assigned reads in gigabases",
-        "scale": "Blues",
-    }
-    headers["mean_base_quality_run"] = {
-        "title": "Quality Score Mean",
-        "description": "Average base quality across Read 1 and Read 2",
-        "min": 0,
-        "scale": "Spectral",
-    }
-    headers["percent_q30_run"] = {
-        "title": "Percent Q30",
-        "description": "The percentage of ≥ Q30 Q scores for the project. This includes assigned and unassigned reads and excludes filtered reads and no calls.",
-        "max": 100,
-        "min": 0,
-        "scale": "RdYlGn",
-        "suffix": "%",
-    }
-    headers["percent_q40_run"] = {
-        "title": "Percent Q40",
-        "description": "The percentage of ≥ Q40 Q scores for the project. This includes assigned and unassigned reads and excludes filtered reads and no calls.",
-        "max": 100,
-        "min": 0,
-        "scale": "RdYlGn",
-        "suffix": "%",
-    }
-
-    pconfig = {
-        "title": "bases2fastq: General Sequencing (Project) QC metrics",
-        "col1_header": "Run Name",
-        "id": "project_run_metrics_table",
-        "ylab": "QC",
-    }
-
-    plot_name = "(Project) Sequencing QC metrics table"
-    plot_html = table.plot(plot_content, headers, pconfig=pconfig)
-    anchor = "project_run_qc_metrics_table"
-    description = "QC metrics per run, per project"
-    helptext = """
-    This section displays metrics that indicate the quality of each sequencing run: \n
-       - Run Name: Unique identifier composed of (RunName)__(UUID), where (RunName) maps to the AVITI run name and (UUID) maps to the unique Bases2Fastq analysis result.\n
-       - Number of Polonies: The total number of polonies that are calculated for the run.\n
-       - Percentage Assigned Reads: The percentage of reads that are assigned to a sample.\n
-       - Assigned Yield (Gb): The run yield that is based on assigned reads in gigabases.\n
-       - Quality Score Mean: The mean Q score of base calls for the samples. This excludes filtered reads and no calls.\n
-       - Percent Q30: The percentage of ≥ Q30 Q scores for the run. This includes assigned and unassigned reads and excludes filtered reads and no calls.\n
-       - Percent Q40: The percentage of ≥ Q40 Q scores for the run. This includes assigned and unassigned reads and excludes filtered reads and no calls.\n
-    """
-    return plot_html, plot_name, anchor, description, helptext, plot_content
diff --git a/multiqc/modules/bases2fastq/plot_runs.py b/multiqc/modules/bases2fastq/plot_runs.py
index accd532678..a7c8ad8d4a 100644
--- a/multiqc/modules/bases2fastq/plot_runs.py
+++ b/multiqc/modules/bases2fastq/plot_runs.py
@@ -19,8 +19,8 @@ def plot_run_stats(run_data, color_dict):
     yields = dict()
     for run in run_names:
         # Index Assignment Polonies and Yields ###
-        # percent_assigned = run_data[run].get("PercentAssignedReads",100.0)
-        percent_assigned = run_data[run]["PercentAssignedReads"]
+        percent_assigned = run_data[run].get("PercentAssignedReads", 100.0)
+        # percent_assigned = run_data[run]["PercentAssignedReads"]
 
         percent_perfect_assigned = (
             100.00 - run_data[run]["PercentMismatch"]
@@ -42,7 +42,7 @@ def plot_run_stats(run_data, color_dict):
         num_polonies[run] = num_polonies_run
 
         total_yield_run = {}
-        total_yield = run_data[run].get("TotalYield", 300.0)
+        total_yield = run_data[run].get("TotalYield", run_data[run].get("AssignedYield", 300.0))
         total_yield_run["Perfect Index"] = total_yield * percent_perfect_total * 0.01
         total_yield_run["Mismatched Index"] = total_yield * percent_imperfect_total * 0.01
         total_yield_run["Unassigned"] = (
@@ -54,7 +54,7 @@ def plot_run_stats(run_data, color_dict):
     pconfig = {
         "data_labels": [
             {"name": "Number of Polonies", "ylab": "Number of Polonies", "format": "{d}"},
-            {"name": "Yield (Gb)", "ylab": "Gb"},
+            {"name": "Yield (Gb)", "ylab": "Yield"},
         ],
         "cpswitch": True,
         "stacking": "normal",
@@ -69,15 +69,135 @@ def plot_run_stats(run_data, color_dict):
             "Unassigned": {"name": "Unassigned Index", "color": "#434348"},
         }
     ] * 2
-
     plot_name = "Sequencing Run Yield"
     plot_html = bargraph.plot(plot_content, cats, pconfig=pconfig)
     anchor = "run_yield_plot"
     description = "Bar plots of sequencing run yields. Please see individual run reports for details"
     helptext = """
     This section shows and compare the yield and index assignment rate of each sequencing run.\n\n
-       - Number of Polonies: The total number of polonies that are calculated for the run.\n
-       - Yield: The total yield of all assigned reads in gigabases.
+        - Number of Polonies: The total number of polonies that are calculated for the run.\n
+        - Yield: The total yield of all assigned reads in gigabases.
+    """
+    return plot_html, plot_name, anchor, description, helptext, plot_content
+
+
+def _calculate_reads_eliminated(run_data) -> int:
+    """
+    Calculate the total number of reads eliminated during trimming.
+
+    This function iterates over the lanes in the given run data and sums the
+    difference between the number of polonies before trimming and after trimming.
+    If required fields are missing, they are skipped.
+
+    Args:
+        run_data (dict): Dictionary containing sequencing run data with lane information.
+
+    Returns:
+        int: The total number of reads eliminated across all lanes.
+    """
+    reads_eliminated = 0
+    if "Lanes" not in run_data:
+        return reads_eliminated
+    for lane in run_data["Lanes"]:
+        if "NumPolonies" not in lane or "NumPoloniesBeforeTrimming" not in lane:
+            continue
+        reads_eliminated += lane["NumPoloniesBeforeTrimming"] - lane["NumPolonies"]
+    
+    return reads_eliminated
+
+
+def tabulate_project_stats(run_data, color_dict):
+    """
+    Tabulate general information and statistics of each run
+    """
+    plot_content = dict()
+    for s_name in run_data.keys():
+        project = run_data[s_name]["Project"]
+        run_project_name = f"{s_name} | {project}"
+        run_stats = dict()
+        run_stats.update({"num_polonies_run": int(run_data[s_name]["NumPolonies"])})
+        run_stats.update({"yield_run": run_data[s_name]["AssignedYield"]})
+        run_stats.update({"mean_base_quality_run": run_data[s_name]["QualityScoreMean"]})
+        run_stats.update({"percent_q30_run": run_data[s_name]["PercentQ30"]})
+        run_stats.update({"percent_q40_run": run_data[s_name]["PercentQ40"]})
+        run_stats.update({"reads_eliminated": _calculate_reads_eliminated(run_data[s_name])})
+        plot_content.update({run_project_name: run_stats})
+
+    headers = {}
+    headers["num_polonies_run"] = {
+        "title": "# Polonies",
+        "description": "The total number of polonies that are calculated for the run.",
+        "min": 0,
+        "scale": "RdYlGn",
+    }
+    headers["percent_assigned_run"] = {
+        "title": "% Assigned Reads",
+        "description": "The percentage of reads assigned to sample(s)",
+        "max": 100,
+        "min": 0,
+        "scale": "BuPu",
+        "suffix": "%",
+    }
+    headers["yield_run"] = {
+        "title": "Assigned Yield (Gb)",
+        "description": "The run yield based on assigned reads in gigabases",
+        "scale": "Blues",
+    }
+    headers["mean_base_quality_run"] = {
+        "title": "Quality Score Mean",
+        "description": "Average base quality across Read 1 and Read 2",
+        "min": 0,
+        "scale": "Spectral",
+    }
+    headers["percent_q30_run"] = {
+        "title": "Percent Q30",
+        "description": "The percentage of ≥ Q30 Q scores for the project. This includes assigned and unassigned reads and excludes filtered reads and no calls.",
+        "max": 100,
+        "min": 0,
+        "scale": "RdYlGn",
+        "suffix": "%",
+    }
+    headers["percent_q40_run"] = {
+        "title": "Percent Q40",
+        "description": "The percentage of ≥ Q40 Q scores for the project. This includes assigned and unassigned reads and excludes filtered reads and no calls.",
+        "max": 100,
+        "min": 0,
+        "scale": "RdYlGn",
+        "suffix": "%",
+    }
+    headers["reads_eliminated"] = {
+        "title": "Reads Eliminated",
+        "description": "Number of reads eliminated.",
+    }
+
+    pconfig = {
+        "title": "bases2fastq: General Sequencing (Project) QC metrics",
+        "col1_header": "Run Name",
+        "id": "project_run_metrics_table",
+        "ylab": "QC",
+    }
+
+    project_header = ""
+    run_keys = list(run_data.keys())
+    if len(run_keys) > 1:
+        project_header = "(Project) "
+    elif len(run_keys) == 1:
+        first_key = run_keys[0]
+        project_header = f'{run_data[first_key]["Project"]} | '        
+    plot_name = f"{project_header}Sequencing QC Metrics Table"
+    plot_html = table.plot(plot_content, headers, pconfig=pconfig)
+    anchor = "project_run_qc_metrics_table"
+    description = "QC metrics per run, per project"
+    helptext = """
+    This section displays metrics that indicate the quality of each sequencing run: \n
+        - Run Name: Unique identifier composed of (RunName)__(UUID), where (RunName) maps to the AVITI run name and (UUID) maps to the unique Bases2Fastq analysis result.\n
+        - Number of Polonies: The total number of polonies that are calculated for the run.\n
+        - Percentage Assigned Reads: The percentage of reads that are assigned to a sample.\n
+        - Assigned Yield (Gb): The run yield that is based on assigned reads in gigabases.\n
+        - Quality Score Mean: The mean Q score of base calls for the samples. This excludes filtered reads and no calls.\n
+        - Percent Q30: The percentage of ≥ Q30 Q scores for the run. This includes assigned and unassigned reads and excludes filtered reads and no calls.\n
+        - Percent Q40: The percentage of ≥ Q40 Q scores for the run. This includes assigned and unassigned reads and excludes filtered reads and no calls.\n
+        - Reads Eliminated: Number of reads eliminated across lanes.\n
     """
     return plot_html, plot_name, anchor, description, helptext, plot_content
 
@@ -91,19 +211,20 @@ def tabulate_run_stats(run_data, color_dict):
         run_stats = dict()
         run_stats.update({"num_polonies_run": int(run_data[s_name]["NumPolonies"])})
         run_stats.update({"percent_assigned_run": run_data[s_name].get("PercentAssignedReads", 100.0)})
+        run_stats.update({"percent_unexpected_index_pairs": run_data[s_name].get("PercentUnexpectedIndexPairs", 0.0)})
         run_stats.update({"yield_run": run_data[s_name]["AssignedYield"]})
         run_stats.update({"mean_base_quality_run": run_data[s_name]["QualityScoreMean"]})
         run_stats.update({"percent_q30_run": run_data[s_name]["PercentQ30"]})
         run_stats.update({"percent_q40_run": run_data[s_name]["PercentQ40"]})
+        run_stats.update({"reads_eliminated": _calculate_reads_eliminated(run_data[s_name])})
         plot_content.update({s_name: run_stats})
 
     headers = {}
     headers["num_polonies_run"] = {
-        "title": f"# Polonies ({config.base_count_prefix})",
-        "description": f"The total number of polonies that are calculated for the run. ({config.base_count_desc})",
+        "title": "# Polonies",
+        "description": "The total number of polonies that are calculated for the run.)",
         "min": 0,
         "scale": "RdYlGn",
-        "shared_key": "base_count",
     }
     headers["percent_assigned_run"] = {
         "title": "% Assigned Reads",
@@ -113,6 +234,14 @@ def tabulate_run_stats(run_data, color_dict):
         "scale": "BuPu",
         "suffix": "%",
     }
+    headers["percent_unexpected_index_pairs"] = {
+        "title": "% Unexpected Index Pairs",
+        "description": "The percentage of unexpected index pairs",
+        "max": 100,
+        "min": 0,
+        "scale": "BuPu",
+        "suffix": "%",
+    }
     headers["yield_run"] = {
         "title": "Yield (Gb)",
         "description": "The run yield based on assigned reads in gigabases",
@@ -140,9 +269,13 @@ def tabulate_run_stats(run_data, color_dict):
         "scale": "RdYlGn",
         "suffix": "%",
     }
+    headers["reads_eliminated"] = {
+        "title": "Reads Eliminated",
+        "description": "Number of reads eliminated.",
+    }
 
     pconfig = {
-        "title": "bases2fastq: General Sequencing Run QC metrics",
+        "title": "Bases2Fastq: General Sequencing Run QC metrics",
         "col1_header": "Run Name",
         "id": "run_metrics_table",
         "ylab": "QC",
@@ -161,6 +294,49 @@ def tabulate_run_stats(run_data, color_dict):
        - Quality Score Mean: The mean Q score of base calls for the samples. This excludes filtered reads and no calls.\n
        - Percent Q30: The percentage of ≥ Q30 Q scores for the run. This includes assigned and unassigned reads and excludes filtered reads and no calls.\n
        - Percent Q40: The percentage of ≥ Q40 Q scores for the run. This includes assigned and unassigned reads and excludes filtered reads and no calls.\n
+       - Reads Eliminated: Number of reads eliminated across lanes.\n
+    """
+    return plot_html, plot_name, anchor, description, helptext, plot_content
+
+
+def plot_lane_cycle_stats(run_data, color_dict):
+    """
+    Plot number of cycles per read and lane
+    """
+    plot_content = dict()
+    for s_name in run_data.keys():
+        if "Lanes" not in run_data[s_name]:
+            continue
+        for lane in run_data[s_name]["Lanes"]:
+            if "Lane" not in lane or "Reads" not in lane:
+                continue
+            lane_stats = dict()
+            lane_name = f'L{lane["Lane"]}'
+            run_name = f"{s_name} | {lane_name}"
+            lane_stats[run_name] = {}
+            for read in lane["Reads"]:
+                if "Cycles" not in read or "Read" not in read:
+                    continue
+                read_name = read["Read"]
+                num_cycles = len(read["Cycles"])
+                lane_stats[run_name][read_name] = num_cycles
+            plot_content.update(lane_stats)
+
+    pconfig = {
+        "title": "Bases2Fastq: Cycles Per Read Per Lane",
+        "id": "project_cycles_per_read_per_lane",
+        "ylab": "Read Cycles",
+        "cpswitch": False,
+        "subtitle": None,
+    }
+
+    plot_name = "Cycles Per Read Per Lane"
+    plot_html = bargraph.plot(plot_content, pconfig=pconfig)
+    anchor = "cycles_per_read_per_lane"
+    description = "Number of sequencing cycles per read in each lane."
+    helptext = """
+    Shows the number of cycles used for each read in every flowcell lane. 
+    Useful for confirming that read lengths match the expected sequencing setup across all lanes.
     """
     return plot_html, plot_name, anchor, description, helptext, plot_content
 
@@ -206,15 +382,15 @@ def plot_base_quality_hist(run_data, color_dict):
                 "description": "Histogram of bases quality",
                 "ymin": 0,
                 "ylabel": "Percentage of base quality",
-                "xlabel": "base quality",
+                "xlab": "Q Score",
                 "colors": color_dict,
             },
             {
-                "name": "Qualiter Per Read",
+                "name": "Quality Per Read",
                 "description": "Histogram of average read base quality",
                 "ymin": 0,
                 "ylabel": "Percentage of read quality",
-                "xlabel": "base quality",
+                "xlab": "Q Score",
                 "colors": color_dict,
             },
         ],
@@ -332,7 +508,7 @@ def plot_base_quality_by_cycle(run_data, color_dict):
     pconfig = {
         "data_labels": [
             {"name": "Median Quality", "xlab": "cycle", "ylab": "Quality"},
-            {"name": "Mean Quality", "ylab": "Quality"},
+            {"name": "Mean Quality", "xlab": "cycle", "ylab": "Quality"},
             {"name": "%Q30", "xlab": "cycle", "ylab": "Percentage", "ymax": 100},
             {"name": "%Q40", "xlab": "cycle", "ylab": "Percentage", "ymax": 100},
             {"name": "%Base Calls Below PF", "xlab": "cycle", "ylab": "Percentage", "ymax": 100},
@@ -347,7 +523,7 @@ def plot_base_quality_by_cycle(run_data, color_dict):
     plot_html = linegraph.plot(plot_content, pconfig=pconfig)
     plot_name = "Quality Metrics By Cycle"
     anchor = "per_cycle_quality"
-    description = "Per run base qualities by cycle"
+    description = "Per run base qualities by cycle. Read 1 and Read 2 are separated by a red dashed line."
     helptext = """
     This section plots the base qualities by each instrument cycle.\n
     Choose between Median Quality, Mean Quality, Percent Q30 or Percentage Q40 per cycle.\n
diff --git a/multiqc/modules/bases2fastq/plot_samples.py b/multiqc/modules/bases2fastq/plot_samples.py
index 4cbfd71cf8..71c1ce8624 100644
--- a/multiqc/modules/bases2fastq/plot_samples.py
+++ b/multiqc/modules/bases2fastq/plot_samples.py
@@ -1,4 +1,4 @@
-from multiqc.plots import linegraph, table
+from multiqc.plots import bargraph, linegraph, table
 from multiqc import config
 
 """
@@ -6,6 +6,28 @@
 """
 
 
+def _calculate_sample_reads_eliminated(run_data) -> int:
+    """
+    Calculate the total number of reads eliminated during trimming.
+
+    This function iterates over the lanes in the given run data and sums the
+    difference between the number of polonies before trimming and after trimming.
+    If required fields are missing, they are skipped.
+
+    Args:
+        run_data (dict): Dictionary containing sequencing run data with lane information.
+
+    Returns:
+        int: The total number of reads eliminated across all lanes.
+    """
+    reads_eliminated = 0
+    if "NumPolonies" not in run_data or "NumPoloniesBeforeTrimming" not in run_data:
+        return reads_eliminated
+    reads_eliminated += run_data["NumPoloniesBeforeTrimming"] - run_data["NumPolonies"]
+
+    return reads_eliminated
+
+
 def tabulate_sample_stats(sample_data, group_lookup_dict, project_lookup_dict, sample_color):
     """
     Tabulate general information and statistics per sample
@@ -20,6 +42,8 @@ def tabulate_sample_stats(sample_data, group_lookup_dict, project_lookup_dict, s
         general_stats.update({"mean_base_quality_sample": sample_data[s_name]["QualityScoreMean"]})
         general_stats.update({"percent_q30_sample": sample_data[s_name]["PercentQ30"]})
         general_stats.update({"percent_q40_sample": sample_data[s_name]["PercentQ40"]})
+        general_stats.update({"reads_eliminated": _calculate_sample_reads_eliminated(sample_data[s_name])})
+        general_stats.update({"percent_mismatch": sample_data[s_name]["PercentMismatch"]})
         plot_content.update({s_name: general_stats})
 
     headers = {}
@@ -37,11 +61,10 @@ def tabulate_sample_stats(sample_data, group_lookup_dict, project_lookup_dict, s
         "scale": False,
     }
     headers["num_polonies_sample"] = {
-        "title": f"# Polonies ({config.base_count_prefix})",
-        "description": f"The total number of polonies that are calculated for the run. ({config.base_count_desc})",
+        "title": "# Polonies",
+        "description": "The total number of polonies that are calculated for the run",
         "min": 0,
         "scale": "Blues",
-        "shared_key": "base_count",
     }
     headers["yield_sample"] = {
         "title": "Yield (Gb)",
@@ -70,24 +93,73 @@ def tabulate_sample_stats(sample_data, group_lookup_dict, project_lookup_dict, s
         "scale": "RdYlGn",
         "suffix": "%",
     }
+    headers["reads_eliminated"] = {
+        "title": "Reads Eliminated",
+        "description": "Number of reads eliminated.",
+    }
+    headers["percent_mismatch"] = {
+        "title": "Percent Mismatch",
+        "description": "Percent mismatch",
+        "max": 100,
+        "min": 0,
+        "scale": "RdYlGn",
+        "suffix": "%",
+    }
 
-    pconfig = {"id": "sample_qc_metric_table", "title": "Sample QC Metrics Table", "no_violin": True}
+    pconfig = {"id": "sample_qc_metric_table", "title": "Sample QC Metrics Table", "no_violin": False}
 
     plot_name = "Sample QC Metrics Table"
     plot_html = table.plot(plot_content, headers, pconfig=pconfig)
     anchor = "sample_qc_metrics_table"
     description = "QC metrics per unique sample"
     helptext = """
-     This section displays metrics that indicate the quality of each sample: \n
-       - Sample Name: Unique identifier composed of (RunName)__(UUID)__(SampleName), where (RunName) maps to the AVITI run name, (UUID) maps to the unique Bases2Fastq analysis result, and (SampleName) maps to the sample name as specified in the RunManifest.csv.
-       - Group: Run/Sample group label that assigns colors in the plot.  To customize group tags:\n
-           - 1) Set the project name when running Bases2Fastq. In this case the group tags will be project name.\n
-           - 2) Generate a csv file with the suffix "_b2fgroup.csv", containing the columns "Sample Name" and "Group".\n
-       - Number of Polonies: The total number of polonies that are assigned to the sample.\n
-       - Assigned Yield (Gb): The sample yield that is based on assigned reads in gigabases.\n
-       - Quality Score Mean: The average  Q score of base calls for the sample.\n
-       - Percent Q30: The percentage of ≥ Q30 Q scores for the sample. This includes assigned reads and excludes filtered reads and no calls.\n
-       - Percent Q40: The percentage of ≥ Q40 Q scores for the sample. This includes assigned reads and excludes filtered reads and no calls\n
+    This section displays metrics that indicate the quality of each sample: \n
+        - Sample Name: Unique identifier composed of (RunName)__(UUID)__(SampleName), where (RunName) maps to the AVITI run name, (UUID) maps to the unique Bases2Fastq analysis result, and (SampleName) maps to the sample name as specified in the RunManifest.csv.
+        - Group: Run/Sample group label that assigns colors in the plot.  To customize group tags:\n
+            - 1) Set the project name when running Bases2Fastq. In this case the group tags will be project name.\n
+            - 2) Generate a csv file with the suffix "_b2fgroup.csv", containing the columns "Sample Name" and "Group".\n
+        - Number of Polonies: The total number of polonies that are assigned to the sample.\n
+        - Assigned Yield (Gb): The sample yield that is based on assigned reads in gigabases.\n
+        - Quality Score Mean: The average  Q score of base calls for the sample.\n
+        - Percent Q30: The percentage of ≥ Q30 Q scores for the sample. This includes assigned reads and excludes filtered reads and no calls.\n
+        - Percent Q40: The percentage of ≥ Q40 Q scores for the sample. This includes assigned reads and excludes filtered reads and no calls.\n
+        - Reads Eliminated: Number of reads eliminated across lanes.\n
+        - Percent Mismatch: Percent Mismatch.\n
+    """
+    return plot_html, plot_name, anchor, description, helptext, plot_content
+
+
+def plot_sample_read_length(sample_data, group_lookup_dict, project_lookup_dict, color_dict):
+    """
+    Plot number of cycles per read and lane
+    """
+    plot_content = dict()
+    for s_name, data in sample_data.items():
+        read_lengths = {s_name: {}}
+        if "Reads" not in data:
+            continue
+        for read in data["Reads"]:
+            read_name = read["Read"]
+            mean_length = read["MeanReadLength"]
+            read_lengths[s_name][read_name] = mean_length
+        plot_content.update(read_lengths)
+
+    pconfig = {
+        "title": "Bases2Fastq: Mean Read Length per Sample",
+        "id": "mean_read_length_per_sample",
+        "ylab": "Bases",
+        "cpswitch": False,
+        "subtitle": None,
+        "stacking": "group",
+    }
+
+    plot_name = "Mean Read Length per Sample"
+    plot_html = bargraph.plot(plot_content, pconfig=pconfig)
+    anchor = "mean_read_length_per_sample"
+    description = "Average read length per read for all samples."
+    helptext = """
+    Shows the number of cycles used for each read in every flowcell lane. 
+    Useful for confirming that read lengths match the expected sequencing setup across all lanes.
     """
     return plot_html, plot_name, anchor, description, helptext, plot_content
 
@@ -108,6 +180,7 @@ def sequence_content_plot(sample_data, group_lookup_dict, project_lookup_dict, c
             r1r2_split = max(r1r2_split, len(R1))
 
     for s_name in sorted(sample_data.keys()):
+        paired_end = True if len(sample_data[s_name]["Reads"]) > 1 else False
         R1 = sample_data[s_name]["Reads"][0]["Cycles"]
         for cycle in range(len(R1)):
             base_no = cycle + 1
@@ -135,8 +208,8 @@ def sequence_content_plot(sample_data, group_lookup_dict, project_lookup_dict, c
     plot_content = data
 
     pconfig = {
-        "xlab": "cycle",
-        "ylab": "Percentage",
+        "xlab": "Cycle",
+        "ylab": "Percentage of Total Reads",
         "x_lines": [{"color": "#FF0000", "width": 2, "value": r1r2_split, "dashStyle": "dash"}],
         "colors": color_dict,
         "ymin": 0,
@@ -147,8 +220,7 @@ def sequence_content_plot(sample_data, group_lookup_dict, project_lookup_dict, c
     plot_name = "Per Cycle Base Content"
     anchor = "base_content"
     description = """
-    Percentage of unidentified bases ("N" bases) by each sequencing cycle.
-    Read 1 and Read 2 are separated by a red dashed line
+    Base composition per sample per cycle. Read 1 and Read 2 are separated by a red dashed line.
     """
     helptext = """
     If a sequencer is unable to make a base call with sufficient confidence then it will
@@ -212,7 +284,7 @@ def plot_per_cycle_N_content(sample_data, group_lookup_dict, project_lookup_dict
         "title": "bases2fastq: Per Cycle N Content Percentage",
     }
     plot_html = linegraph.plot(plot_content, pconfig=pconfig)
-    plot_name = "Per Cycle N Content"
+    plot_name = "Per Cycle N Content."
     anchor = "n_content"
     description = """
     Percentage of unidentified bases ("N" bases) by each sequencing cycle.
@@ -253,7 +325,7 @@ def plot_per_read_gc_hist(sample_data, group_lookup_dict, project_lookup_dict, s
 
     pconfig = {
         "xlab": "% GC",
-        "ylab": "Percentage",
+        "ylab": "Percentage of reads that are GC",
         "colors": sample_color,
         "id": "gc_hist",
         "title": "bases2fastq: Per Sample GC Content Histogram",
@@ -323,7 +395,7 @@ def plot_adapter_content(sample_data, group_lookup_dict, project_lookup_dict, sa
     pconfig.update({"colors": sample_color})
     plot_html = linegraph.plot(plot_content, pconfig=pconfig)
     anchor = "adapter_content"
-    description = "Adapter content per cycle"
+    description = "Adapter content per cycle. Read 1 and Read 2 are separated by a red dashed line."
     helptext = """
     The plot shows a cumulative percentage count of the proportion
     of your library which has seen each of the adapter sequences at each cycle.
diff --git a/multiqc/search_patterns.yaml b/multiqc/search_patterns.yaml
index a6513a31ea..22e1625ef7 100644
--- a/multiqc/search_patterns.yaml
+++ b/multiqc/search_patterns.yaml
@@ -43,6 +43,10 @@ bases2fastq/project:
   fn: "*_RunStats.json"
   contents: "SampleStats"
   num_lines: 100
+bases2fastq/manifest:
+  fn: "RunManifest.json"
+  contents: "Settings"
+  num_lines: 100
 bbduk:
   contents: "Executing jgi.BBDuk"
   num_lines: 2

From 520772ca80d1f1109f65c4375b2fa7f036dd298f Mon Sep 17 00:00:00 2001
From: Carlos Ruiz <carlos.ruiz@elembio.com>
Date: Sun, 28 Sep 2025 21:01:21 -0700
Subject: [PATCH 02/29] Added additional assignment metrics

---
 multiqc/modules/bases2fastq/bases2fastq.py  | 319 +++++++++++++++++++-
 multiqc/modules/bases2fastq/plot_runs.py    | 186 +++++++++++-
 multiqc/modules/bases2fastq/plot_samples.py |  49 ++-
 3 files changed, 546 insertions(+), 8 deletions(-)

diff --git a/multiqc/modules/bases2fastq/bases2fastq.py b/multiqc/modules/bases2fastq/bases2fastq.py
index 261a972565..3b3deba312 100644
--- a/multiqc/modules/bases2fastq/bases2fastq.py
+++ b/multiqc/modules/bases2fastq/bases2fastq.py
@@ -1,17 +1,21 @@
 from collections import defaultdict
 import copy
-import csv
+import re
 import json
 import logging
 import random
 from typing import Any, Dict, List
 import uuid
+from pathlib import Path
 
 from multiqc.base_module import BaseMultiqcModule, ModuleNoSamplesFound
 from multiqc.utils import mqc_colour
 
 from multiqc.modules.bases2fastq.plot_runs import (
     plot_run_stats,
+    tabulate_manifest_stats,
+    tabulate_index_assignment_stats,
+    tabulate_unassigned_index_stats,
     tabulate_run_stats,
     tabulate_project_stats,
     plot_base_quality_hist,
@@ -20,6 +24,7 @@
 )
 from multiqc.modules.bases2fastq.plot_samples import (
     tabulate_sample_stats,
+    plot_sample_assignment_histogram,
     sequence_content_plot,
     plot_per_cycle_N_content,
     plot_adapter_content,
@@ -124,15 +129,21 @@ def __init__(self):
             summary_path = "project_level"
         elif len(self.run_level_data) > 0 and len(self.project_level_data) > 0:
             summary_path = "combined_level"
-        
+
         # Define data to use
         run_data = {}
         sample_data = {}
         samples_to_projects = {}
+        manifest_data = {}
+        index_assigment_data = {}
+        unassigned_sequences = {}
         if summary_path == "run_level":
             run_data = self.run_level_data
             sample_data = self.project_level_samples
             samples_to_projects = self.run_level_samples_to_project
+            manifest_data = self._parse_run_manifest("bases2fastq/manifest")
+            index_assigment_data = self._parse_index_assignment("bases2fastq/manifest")
+            unassigned_sequences = self._parse_run_unassigned_sequences("bases2fastq/run")
         elif summary_path == "project_level":
             run_data = self.project_level_data
             sample_data = self.project_level_samples
@@ -141,6 +152,9 @@ def __init__(self):
             run_data = self.run_level_data
             sample_data = self.project_level_samples
             samples_to_projects = self.project_level_samples_to_project
+            manifest_data = self._parse_run_manifest("bases2fastq/manifest")
+            index_assigment_data = self._parse_index_assignment("bases2fastq/manifest")
+            unassigned_sequences = self._parse_run_unassigned_sequences("bases2fastq/run")
         else:
             error_msg = "No run- or project-level data was retained. No report will be generated."
             log.error(error_msg)
@@ -149,6 +163,7 @@ def __init__(self):
         # Create run and project groups
         run_groups = defaultdict(list)
         project_groups = defaultdict(list)
+        in_project_sample_groups = defaultdict(list)
         sample_to_run_group = {}
         for sample in sample_data.keys():
             (_run_name, _) = sample.split("__")
@@ -156,7 +171,9 @@ def __init__(self):
             sample_to_run_group[sample] = _run_name
             sample_project = samples_to_projects[sample]
             project_groups[sample_project].append(sample)
-        merged_groups = dict(run_groups) | dict(project_groups)
+            if summary_path == "project_level":
+                in_project_sample_groups[sample].append(sample)
+        merged_groups = dict(run_groups) | dict(project_groups) | dict(in_project_sample_groups)
 
         # Assign color for each group
         self.color_getter = mqc_colour.mqc_colour_scale()
@@ -174,7 +191,11 @@ def __init__(self):
         self.group_color = {g: c for g, c in zip(merged_groups.keys(), self.palette[: len(merged_groups)])}
         self.sample_color = dict()
         for s_name in samples_to_projects.keys():
-            self.sample_color.update({s_name: self.group_color[samples_to_projects[s_name]]})
+            s_color = (
+                self.group_color[s_name] if summary_path == "project_level" else
+                self.group_color[samples_to_projects[s_name]]
+            )
+            self.sample_color.update({s_name: s_color})
         self.run_color = copy.deepcopy(self.group_color)  # Make sure that run colors and group colors match
         self.palette = self.palette[len(merged_groups) :]
 
@@ -183,16 +204,38 @@ def __init__(self):
         qc_metrics_function = (
             tabulate_run_stats if summary_path in ["run_level", "combined_level"] else tabulate_project_stats
         )
+        self.add_run_plots(data=run_data, plot_functions=[qc_metrics_function])
+
+        if summary_path in ["run_level", "combined_level"]:
+            self.add_run_plots(
+                data=manifest_data,
+                plot_functions=[
+                    tabulate_manifest_stats,
+                ]
+            )
+            self.add_run_plots(
+                data=index_assigment_data,
+                plot_functions=[
+                    tabulate_index_assignment_stats,
+                ]
+            )
+            self.add_run_plots(
+                data=unassigned_sequences,
+                plot_functions=[
+                    tabulate_unassigned_index_stats,
+                ]
+            )
+        
         self.add_run_plots(
             data=run_data,
             plot_functions=[
-                qc_metrics_function,
                 plot_lane_cycle_stats,
                 plot_run_stats,
                 plot_base_quality_hist,
                 plot_base_quality_by_cycle
             ]
         )
+
         self.add_sample_plots(
             data=sample_data, group_lookup=samples_to_projects, project_lookup=samples_to_projects
         )
@@ -271,6 +314,271 @@ def _parse_run_project_data(self, data_source: str) -> List[Dict[str, Any]]:
             self.add_data_source(f=f, s_name=run_analysis_name, module="bases2fastq")
 
         return [runs_global_data, runs_sample_data, sample_to_project]
+    
+
+    def _parse_run_manifest(self, data_source: str) -> Dict[str, Any]:
+        runs_manifest_data = {}
+
+        if data_source == "":
+            return runs_manifest_data
+
+        for f in self.find_log_files(data_source):
+            directory = f.get("root")
+            if not directory:
+                continue
+
+            # Get RunName and RunID from RunStats.json
+            run_stats_path = Path(directory) / "RunStats.json"
+            if not run_stats_path.exists():
+                log.error(
+                    f"RunStats.json does not exist in the Bases2Fastq output directory {directory}.\n"
+                    "Please visit Elembio online documentation for more information - "
+                    "https://docs.elembio.io/docs/bases2fastq/introduction/"
+                )
+                continue
+
+            run_analysis_name = None
+            with open(run_stats_path) as _infile:
+                run_stats = json.load(_infile)
+                run_name = run_stats.get("RunName", None)
+                analysis_id = run_stats.get("AnalysisID", None)
+                if run_name and analysis_id:
+                    run_analysis_name = "-".join([run_name, analysis_id[0:4]])
+                else:
+                    log.error(
+                        "Error with RunStats.json. Either RunName or AnalysisID is absent.\n"
+                        "Please visit Elembio online documentation for more information - "
+                        "https://docs.elembio.io/docs/bases2fastq/introduction/"
+                    )
+                    continue
+
+            run_manifest = json.loads(f["f"])
+            if "Settings" not in run_manifest:
+                log.warning(
+                    f"<Settings> section not found in {directory}/RunManifest.json.\n"
+                    f"Skipping RunManifest metrics."
+                )
+            else:
+                for lane_data in run_manifest["Settings"]:
+                    lane_id = lane_data.get("Lane")
+                    if not lane_id:
+                        log.error("<Lane> not found in Settings section of RunManifest. Skipping lanes.")
+                        continue
+                    lane_name = f"L{lane_id}"
+                    run_lane = f"{run_analysis_name} | {lane_name}"
+                    runs_manifest_data[run_lane] = {}
+
+                    indices = []
+                    indices_cycles = []
+                    mask_pattern = re.compile(r"^I\d+Mask$")
+                    matching_keys = [key for key in lane_data.keys() if mask_pattern.match(key)]
+                    for key in matching_keys:
+                        for mask_info in lane_data[key]:
+                            if mask_info["Read"] not in indices:
+                                indices.append(mask_info["Read"])
+                            indices_cycles.append(str(len(mask_info["Cycles"])))
+                    indexing = f'{" + ".join(indices_cycles)}<br>{" + ".join(indices)}'
+                    runs_manifest_data[run_lane]["Indexing"] = indexing
+
+                    runs_manifest_data[run_lane]["AdapterTrimType"] = lane_data.get("AdapterTrimType", "N/A")
+                    runs_manifest_data[run_lane]["R1AdapterMinimumTrimmedLength"] = lane_data.get(
+                        "R1AdapterMinimumTrimmedLength", "N/A"
+                    )
+                    runs_manifest_data[run_lane]["R2AdapterMinimumTrimmedLength"] = lane_data.get(
+                        "R2AdapterMinimumTrimmedLength", "N/A"
+                    )
+            
+            self.add_data_source(f=f, s_name=run_analysis_name, module="bases2fastq")
+
+        return runs_manifest_data
+
+    def _parse_run_unassigned_sequences(self, data_source: str) -> Dict[str, Any]:
+        run_unassigned_sequences = {}
+        if data_source == "":
+            return run_unassigned_sequences
+
+        for f in self.find_log_files(data_source):
+            data = json.loads(f["f"])
+
+            # Get RunName and AnalysisID
+            run_name = data.get("RunName", None)
+            analysis_id = data.get("AnalysisID", None)[0:4]
+            if not run_name or not analysis_id:
+                log.error(
+                    "Error with RunStats.json. Either RunName or AnalysisID is absent.\n"
+                    "Please visit Elembio online documentation for more information - "
+                    "https://docs.elembio.io/docs/bases2fastq/introduction/"
+                )
+                continue
+            run_analysis_name = "-".join([run_name, analysis_id])
+            run_analysis_name = self.clean_s_name(run_analysis_name, f)
+
+            # skip run if in user provider ignore list
+            if self.is_ignore_sample(run_analysis_name):
+                log.info(
+                    f"Skipping <{run_analysis_name}> because it is present in ignore list."
+                )
+                continue
+            
+            # Get total polonies and build unassigned indices dictionary
+            total_polonies = data.get("NumPoloniesBeforeTrimming", 0)
+            if "Lanes" not in data:
+                log.error(
+                    f"Missing lane information in RunStats.json for run {run_analysis_name}."
+                    f"Skipping building unassigned indices table."
+                )
+                continue
+            index_number = 1
+            for lane in data["Lanes"]:
+                lane_id = lane.get("Lane")
+                if lane_id:
+                    lane_id = f"L{lane_id}"
+                for sequence in lane.get("UnassignedSequences", []):
+                    run_unassigned_sequences[index_number] = {
+                        "Run Name": run_analysis_name,
+                        "Lane": lane_id,
+                        "I1": sequence["I1"],
+                        "I2": sequence["I2"],
+                        "Polonies": sequence["Count"],
+                        "% Polonies": float("nan"),
+                    }
+                    if total_polonies > 0:
+                        run_unassigned_sequences[index_number]["% Polonies"] = round(
+                            sequence["Count"] / total_polonies, 2
+                        )
+                    index_number += 1
+
+        return run_unassigned_sequences
+
+    def _parse_index_assignment(self, manifest_data_source: str) -> Dict[str, Any]:
+        sample_to_index_assignment = {}
+
+        if manifest_data_source == "":
+            return sample_to_index_assignment
+
+        for f in self.find_log_files(manifest_data_source):
+            directory = f.get("root")
+            if not directory:
+                continue
+
+            # Get RunName and RunID from RunParameters.json
+            run_stats_path = Path(directory) / "RunStats.json"
+            if not run_stats_path.exists():
+                log.error(
+                    f"RunStats.json does not exist in the Bases2Fastq output directory {directory}.\n"
+                    "Please visit Elembio online documentation for more information - "
+                    "https://docs.elembio.io/docs/bases2fastq/introduction/"
+                )
+                continue
+
+            run_analysis_name = None
+            total_polonies = 0
+            with open(run_stats_path) as _infile:
+                run_stats = json.load(_infile)
+
+                # Get run name information
+                run_name = run_stats.get("RunName", None)
+                analysis_id = run_stats.get("AnalysisID", None)
+                if run_name and analysis_id:
+                    run_analysis_name = "-".join([run_name, analysis_id[0:4]])
+                else:
+                    log.error(
+                        "Error with RunStats.json. Either RunName or AnalysisID is absent.\n"
+                        "Please visit Elembio online documentation for more information - "
+                        "https://docs.elembio.io/docs/bases2fastq/introduction/"
+                    )
+                    log.debug(f"Error in RunStats.json: {run_stats_path}")
+                    log.debug(f"Missing: RunName: {run_name} or AnalysisID: {analysis_id}")
+                    continue
+                
+                # skip run if in user provider ignore list
+                if self.is_ignore_sample(run_analysis_name):
+                    log.info(
+                        f"Skipping <{run_analysis_name}> because it is present in ignore list."
+                    )
+                    continue
+
+                # Ensure sample stats are present
+                if "SampleStats" not in run_stats:
+                    log.error(
+                        "Error, missing SampleStats in RunStats.json. Skipping index assignment metrics.\n"
+                        "Please visit Elembio online documentation for more information - "
+                        "https://docs.elembio.io/docs/bases2fastq/introduction/"
+                    )
+                    log.debug(f"Missing SampleStats in RunStats.json. Available keys: {list(run_stats.keys())}.")
+                    continue
+            
+                # Extract per sample polony counts and overall total counts
+                total_polonies = run_stats.get("NumPoloniesBeforeTrimming", 0)
+                for sample_data in run_stats["SampleStats"]:
+                    sample_name = sample_data.get("SampleName")
+                    sample_id = None
+                    if run_analysis_name and sample_name:
+                        sample_id = "__".join([run_analysis_name, sample_name])
+
+                    if "Occurrences" not in sample_data:
+                        log.error(f"Missing data needed to extract index assignment for sample {sample_id}. Skipping.")
+                        continue
+
+                    for occurrence in sample_data["Occurrences"]:
+                        sample_expected_seq = occurrence.get("ExpectedSequence")
+                        sample_counts = occurrence.get("NumPoloniesBeforeTrimming")
+                        if any([element is None for element in [sample_expected_seq, sample_counts, sample_id]]):
+                            log.error(
+                                f"Missing data needed to extract index assignment for sample {sample_id}. Skipping."
+                            )
+                            continue
+                        if sample_expected_seq not in sample_to_index_assignment:
+                            sample_to_index_assignment[sample_expected_seq] = {
+                                "SampleID": sample_id,
+                                "SamplePolonyCounts": 0,
+                                "PercentOfPolonies": float("nan"),
+                                "Index1": "",
+                                "Index2": "",
+                            }
+                        sample_to_index_assignment[sample_expected_seq]["SamplePolonyCounts"] += sample_counts
+
+            for index_assigment in sample_to_index_assignment.values():
+                if total_polonies > 0:
+                    index_assigment["PercentOfPolonies"] = round(
+                        index_assigment["SamplePolonyCounts"] / total_polonies * 100, 2
+                    )
+
+            run_manifest = json.loads(f["f"])
+            if "Samples" not in run_manifest:
+                log.warning(
+                    f"<Samples> section not found in {directory}/RunManifest.json.\n"
+                    f"Skipping RunManifest sample index assignment metrics."
+                )
+            elif len(sample_to_index_assignment) == 0:
+                log.warning(
+                    "Index assignment data missing. Skipping creation of index assignment metrics."
+                )
+            else:
+                for sample_data in run_manifest["Samples"]:
+                    sample_name = sample_data.get("SampleName")
+                    sample_id = None
+                    if run_analysis_name is None or sample_name is None or "Indexes" not in sample_data:
+                        continue
+                    sample_id = "__".join([run_analysis_name, sample_name])
+                    for index_data in sample_data["Indexes"]:
+                        index_1 = index_data.get("Index1", "")
+                        index_2 = index_data.get("Index2", "")
+                        merged_indices = f"{index_1}{index_2}"
+                        if merged_indices not in sample_to_index_assignment:
+                            log.error(f"Index assignment information not found for sample {sample_id}. Skipping.")
+                            continue
+                        if sample_id != sample_to_index_assignment[merged_indices]["SampleID"]:
+                            log.error(
+                                f"RunManifest SampleID <{sample_id}> does not match "
+                                f"RunStats SampleID {sample_to_index_assignment[merged_indices]["SampleID"]}."
+                                "Skipping."
+                            )
+                            continue
+                        sample_to_index_assignment[merged_indices]["Index1"] = index_1
+                        sample_to_index_assignment[merged_indices]["Index2"] = index_2
+
+        return sample_to_index_assignment
 
     def add_run_plots(self, data, plot_functions):
         for func in plot_functions:
@@ -281,6 +589,7 @@ def add_run_plots(self, data, plot_functions):
     def add_sample_plots(self, data, group_lookup, project_lookup):
         plot_functions = [
             tabulate_sample_stats,
+            plot_sample_assignment_histogram,
             plot_sample_read_length,
             sequence_content_plot,
             plot_per_cycle_N_content,
diff --git a/multiqc/modules/bases2fastq/plot_runs.py b/multiqc/modules/bases2fastq/plot_runs.py
index a7c8ad8d4a..18d58dee30 100644
--- a/multiqc/modules/bases2fastq/plot_runs.py
+++ b/multiqc/modules/bases2fastq/plot_runs.py
@@ -2,7 +2,7 @@
 
 from multiqc.plots import bargraph, linegraph, table
 from multiqc import config
-
+from natsort import natsorted
 
 """
 Functions for plotting per run information of bases2fastq
@@ -299,6 +299,190 @@ def tabulate_run_stats(run_data, color_dict):
     return plot_html, plot_name, anchor, description, helptext, plot_content
 
 
+def tabulate_manifest_stats(run_data, color_dict):
+    """
+    Tabulate general information and statistics of each run
+    """
+    plot_content = dict()
+    for s_name in run_data.keys():
+        run_stats = dict()
+        run_stats.update({"indexing": run_data[s_name]["Indexing"]})
+        run_stats.update({"adapter_trim_type": run_data[s_name]["AdapterTrimType"]})
+        run_stats.update({"min_read_length_r1": run_data[s_name]["R1AdapterMinimumTrimmedLength"]})
+        run_stats.update({"min_read_length_r2": run_data[s_name]["R2AdapterMinimumTrimmedLength"]})
+        plot_content.update({s_name: run_stats})
+
+    headers = {}
+    headers["indexing"] = {
+        "title": "Indexing",
+        "description": "Indexing scheme.",
+        "scale": "RdYlGn",
+    }
+    headers["adapter_trim_type"] = {
+        "title": "Adapter Trim Type",
+        "description": "Adapter trimming method.",
+    }
+    headers["min_read_length_r1"] = {
+        "title": "Minimum Read Length R1",
+        "description": "Minimum read length for read R1.",
+        "scale": "RdYlGn",
+    }
+    headers["min_read_length_r2"] = {
+        "title": "Minimum Read Length R2",
+        "description": "Minimum read length for read R1 (if applicable).",
+        "scale": "RdYlGn",
+    }
+
+    pconfig = {
+        "title": "Bases2Fastq: Run Manifest Metrics",
+        "col1_header": "Run Name | Lane",
+        "id": "run_manifest_metrics",
+    }
+
+    plot_name = "Run Manifest Table"
+    plot_html = table.plot(plot_content, headers, pconfig=pconfig)
+    anchor = "run_manifest_metrics_table"
+    description = "Run parameters used."
+    helptext = """
+    This section displays metrics that indicate the parameters used in the run: \n
+        - Run Name | Lane: Unique identifier composed of (RunName)__(UUID) | (Lane), where (RunName) maps to the AVITI run name and (UUID) maps to the unique Bases2Fastq analysis result.\n
+        - Indexing: Describes the indexing scheme.\n
+        - Adapter Trim Type: Adapter trimming method.\n
+        - Minimum Read Length R1/R2: Minumum read length after adapter trimming.\n
+    """
+    return plot_html, plot_name, anchor, description, helptext, plot_content
+
+
+def tabulate_index_assignment_stats(run_data, color_dict):
+    """
+    Tabulate general information and statistics of each run
+    """
+    plot_content = dict()
+    sorted_run_data = natsorted(run_data.items(), key=lambda x: x[1]["SampleID"])
+    for index, sample_data in enumerate(sorted_run_data, start=1):
+        sample_data = sample_data[1]
+        sample_index_stats = dict()
+        sample_index_stats.update({"sample_name": sample_data["SampleID"]})
+        sample_index_stats.update({"index_1": sample_data["Index1"]})
+        sample_index_stats.update({"index_2": sample_data["Index2"]})
+        sample_index_stats.update({"polonies": sample_data["SamplePolonyCounts"]})
+        sample_index_stats.update({"polony_percentage": sample_data["PercentOfPolonies"]})
+        plot_content.update({index: sample_index_stats})
+
+    headers = {}
+    headers["sample_name"] = {
+        "title": "Sample Name",
+        "description": "Sample Name (RunID + Sample ID).",
+    }
+    headers["index_1"] = {
+        "title": "Index 1",
+        "description": "Sample Index 1 (I1).",
+    }
+    headers["index_2"] = {
+        "title": "Index 2",
+        "description": "Sample Index 2 (I2).",
+    }
+    headers["polonies"] = {
+        "title": "Polonies",
+        "description": "Number of polonies assigned to sample.",
+        "scale": "RdYlGn",
+    }
+    headers["polony_percentage"] = {
+        "title": "Polony %",
+        "description": "Percentage of total polonies assigned to this index combination.",
+        "max": 100,
+        "min": 0,
+        "scale": "RdYlGn",
+        "suffix": "%",
+    }
+
+    pconfig = {
+        "title": "Bases2Fastq: Index Assignment Metrics",
+        "col1_header": "Sample #",
+        "id": "index_assignment_metrics",
+    }
+
+    plot_name = "Index Assignment Metrics"
+    plot_html = table.plot(plot_content, headers, pconfig=pconfig)
+    anchor = "index_assignment_metrics"
+    description = "Index assignment metrics."
+    helptext = """
+    This section displays index assignment metrics including: \n
+        - Sample Name: Sample identifier combining RunID and SampleID.\n
+        - Index 1: Sample I1.\n
+        - Index 2: Sample I2.\n
+        - Polonies: Number of polonies assigned each sample.\n
+        - Polony %: Percentage of total run's polonies assigned to each sample.\n
+    """
+    return plot_html, plot_name, anchor, description, helptext, plot_content
+
+
+def tabulate_unassigned_index_stats(run_data, color_dict):
+    """
+    Tabulate unassigned index metrics.
+
+    run_data: Dictionary with unassigned index data including:
+        - RunName
+        - Lane
+        - I1
+        - I2
+        - Polonies
+        - % Polonies
+    """
+
+    headers = {}
+    headers["Run Name"] = {
+        "title": "Run Name",
+        "description": "Run Name (Run ID + Analysis ID).",
+    }
+    headers["Lane"] = {
+        "title": "Lane",
+        "description": "Index Lane.",
+    }
+    headers["I1"] = {
+        "title": "I1",
+        "description": "Index 1.",
+    }
+    headers["I2"] = {
+        "title": "I2",
+        "description": "Index 2.",
+    }
+    headers["Polonies"] = {
+        "title": "Polonies",
+        "description": "Number of polonies assigned to indices.",
+        "scale": "GnYlRd",
+    }
+    headers["% Polonies"] = {
+        "title": "% Polonies",
+        "description": "Percentage of total polonies assigned to this index combination.",
+        "max": 100,
+        "min": 0,
+        "scale": "GnYlRd",
+        "suffix": "%",
+    }
+
+    pconfig = {
+        "title": "Bases2Fastq: Unassiged Indices Metrics",
+        "col1_header": "Index #",
+        "id": "index_unassignment_metrics",
+    }
+
+    plot_name = "Unassiged Indices Metrics"
+    plot_html = table.plot(run_data, headers, pconfig=pconfig)
+    anchor = "index_unassignment_metrics"
+    description = "Index unassignment metrics."
+    helptext = """
+    This section displays index assignment metrics including: \n
+        - Run Name: Run identifier. Built from Run ID and Analysis ID.\n
+        - Lane: Lane number.\n
+        - Index 1: Sample I1.\n
+        - Index 2: Sample I2.\n
+        - Polonies: Number of polonies assigned each index combination.\n
+        - Polony %: Percentage of total run's polonies assigned to each index combination.\n
+    """
+    return plot_html, plot_name, anchor, description, helptext, run_data
+
+
 def plot_lane_cycle_stats(run_data, color_dict):
     """
     Plot number of cycles per read and lane
diff --git a/multiqc/modules/bases2fastq/plot_samples.py b/multiqc/modules/bases2fastq/plot_samples.py
index 71c1ce8624..b9f02a538e 100644
--- a/multiqc/modules/bases2fastq/plot_samples.py
+++ b/multiqc/modules/bases2fastq/plot_samples.py
@@ -1,6 +1,8 @@
 from multiqc.plots import bargraph, linegraph, table
 from multiqc import config
 
+import numpy as np
+
 """
 Functions for plotting per sample information of bases2fastq
 """
@@ -129,6 +131,49 @@ def tabulate_sample_stats(sample_data, group_lookup_dict, project_lookup_dict, s
     return plot_html, plot_name, anchor, description, helptext, plot_content
 
 
+def plot_sample_assignment_histogram(sample_data, group_lookup_dict, project_lookup_dict, color_dict):
+    """
+    Plot number of cycles per read and lane
+    """
+    plot_content = dict()
+    polony_assignments = []
+    for s_name in sample_data.keys():
+        polonies = sample_data[s_name].get("NumPolonies")
+        if polonies:
+            polony_assignments.append(polonies)
+    
+    bins = 100
+    for bins in [100, 50, 20, 10]:
+        if len(polony_assignments) > bins:
+            break
+
+    hist, bin_edges = np.histogram(polony_assignments, bins=bins)
+    bin_ranges = [f"({bin_edges[i]}, {bin_edges[i+1]})" for i in range(len(bin_edges)-1)]
+
+    for range_data, frequency in zip(bin_ranges, hist):
+        plot_content[range_data] = {}
+        plot_content[range_data]["Assigned Polonies"] = float(frequency)
+
+    pconfig = {
+        "title": "Bases2Fastq: Sample Polony Assignment Histogram",
+        "id": "sample_assignment_hist",
+        "ylab": "Number of Samples",
+        "xlab": "Range Assigned Polonies",
+        "cpswitch": False,
+        "subtitle": None,
+    }
+
+    plot_name = "Sample Polony Assignment Histogram"
+    plot_html = bargraph.plot(plot_content, pconfig=pconfig)
+    anchor = "sample_assignment_hist"
+    description = "Average read length per read for all samples."
+    helptext = """
+    Shows the number of cycles used for each read in every flowcell lane. 
+    Useful for confirming that read lengths match the expected sequencing setup across all lanes.
+    """
+    return plot_html, plot_name, anchor, description, helptext, plot_content
+
+
 def plot_sample_read_length(sample_data, group_lookup_dict, project_lookup_dict, color_dict):
     """
     Plot number of cycles per read and lane
@@ -324,8 +369,8 @@ def plot_per_read_gc_hist(sample_data, group_lookup_dict, project_lookup_dict, s
     plot_content = gc_hist_dict
 
     pconfig = {
-        "xlab": "% GC",
-        "ylab": "Percentage of reads that are GC",
+        "xlab": "GC Content (%)",
+        "ylab": "Percentage of reads that have GC (%)",
         "colors": sample_color,
         "id": "gc_hist",
         "title": "bases2fastq: Per Sample GC Content Histogram",

From 8ea72b39b19c38660efbaa594d3d9051c7a3eb83 Mon Sep 17 00:00:00 2001
From: Carlos Ruiz <carlos.ruiz@elembio.com>
Date: Mon, 29 Sep 2025 09:14:18 -0700
Subject: [PATCH 03/29] Fixed typo

---
 multiqc/modules/bases2fastq/plot_runs.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/multiqc/modules/bases2fastq/plot_runs.py b/multiqc/modules/bases2fastq/plot_runs.py
index 18d58dee30..e6db7f69a3 100644
--- a/multiqc/modules/bases2fastq/plot_runs.py
+++ b/multiqc/modules/bases2fastq/plot_runs.py
@@ -462,12 +462,12 @@ def tabulate_unassigned_index_stats(run_data, color_dict):
     }
 
     pconfig = {
-        "title": "Bases2Fastq: Unassiged Indices Metrics",
+        "title": "Bases2Fastq: Unassigned Indices Metrics",
         "col1_header": "Index #",
         "id": "index_unassignment_metrics",
     }
 
-    plot_name = "Unassiged Indices Metrics"
+    plot_name = "Unassigned Indices Metrics"
     plot_html = table.plot(run_data, headers, pconfig=pconfig)
     anchor = "index_unassignment_metrics"
     description = "Index unassignment metrics."

From 2a3e3db4e23bce831f0f8a439c9fcb91b8936539 Mon Sep 17 00:00:00 2001
From: Carlos Ruiz <carlos.ruiz@elembio.com>
Date: Wed, 1 Oct 2025 00:13:05 -0700
Subject: [PATCH 04/29] Fixed --no-project B2F failure.

---
 multiqc/modules/bases2fastq/bases2fastq.py  |  35 ++--
 multiqc/modules/bases2fastq/plot_samples.py | 184 ++++++++++++++------
 2 files changed, 145 insertions(+), 74 deletions(-)

diff --git a/multiqc/modules/bases2fastq/bases2fastq.py b/multiqc/modules/bases2fastq/bases2fastq.py
index 3b3deba312..0ba862f60c 100644
--- a/multiqc/modules/bases2fastq/bases2fastq.py
+++ b/multiqc/modules/bases2fastq/bases2fastq.py
@@ -91,10 +91,8 @@ def __init__(self):
             ) = self._parse_run_project_data("bases2fastq/project")
 
         # Get run- and project-level samples
-        for data in self.run_level_samples.values():
-            num_run_level_samples += len(data.keys())
-        for data in self.project_level_samples.values():
-            num_project_level_samples += len(data.keys())
+        num_run_level_samples = len(self.run_level_samples)
+        num_project_level_samples = len(self.project_level_samples)
 
         # Ensure run/sample data found
         if all([
@@ -107,10 +105,22 @@ def __init__(self):
             log.error(error_msg)
             raise ModuleNoSamplesFound(error_msg)
         
+        # Choose path to take, if project use only project-level data, otherwise use run- and project-level
+        summary_path = ""
+        if len(self.run_level_data) > 0 and len(self.project_level_data) == 0:
+            summary_path = "run_level"
+        if len(self.run_level_data) == 0 and len(self.project_level_data) > 0:
+            summary_path = "project_level"
+        elif len(self.run_level_data) > 0 and len(self.project_level_data) > 0:
+            summary_path = "combined_level"
+        
         # Log runs, projects and samples found
         log.info(f"Found {len(self.run_level_data)} run(s) within the Bases2Fastq results.")
         log.info(f"Found {len(self.project_level_data)} project(s) within the Bases2Fastq results.")
-        log.info(f"Found {num_project_level_samples} sample(s) within the Bases2Fastq results.")
+        if summary_path == "run_level":
+            log.info(f"Found {num_run_level_samples} sample(s) within the Bases2Fastq results.")
+        else:
+            log.info(f"Found {num_project_level_samples} sample(s) within the Bases2Fastq results.")
 
         # Superfluous function call to confirm that it is used in this module
         self.add_software_version(None)
@@ -118,17 +128,8 @@ def __init__(self):
         # Warn user if run-level/project-level or sample-level metrics were not found
         if len(self.run_level_data) == 0 and len(self.project_level_data) == 0:
             log.warning("No run/project stats found!")
-        if num_project_level_samples == 0:
+        if num_run_level_samples == 0 and num_project_level_samples == 0:
             log.warning("No sample stats found!")
-        
-        # Choose path to take, if project use only project-level data, otherwise use run- and project-level
-        summary_path = ""
-        if len(self.run_level_data) > 0 and len(self.project_level_data) == 0:
-            summary_path = "run_level"
-        if len(self.run_level_data) == 0 and len(self.project_level_data) > 0:
-            summary_path = "project_level"
-        elif len(self.run_level_data) > 0 and len(self.project_level_data) > 0:
-            summary_path = "combined_level"
 
         # Define data to use
         run_data = {}
@@ -139,7 +140,7 @@ def __init__(self):
         unassigned_sequences = {}
         if summary_path == "run_level":
             run_data = self.run_level_data
-            sample_data = self.project_level_samples
+            sample_data = self.run_level_samples
             samples_to_projects = self.run_level_samples_to_project
             manifest_data = self._parse_run_manifest("bases2fastq/manifest")
             index_assigment_data = self._parse_index_assignment("bases2fastq/manifest")
@@ -293,7 +294,7 @@ def _parse_run_project_data(self, data_source: str) -> List[Dict[str, Any]]:
                 run_analysis_sample_name = "__".join([run_analysis_name, sample_name])
 
                 num_polonies = sample_data["NumPolonies"]
-                if num_polonies < 1000:
+                if num_polonies < MIN_POLONIES:
                     log.warning(
                         f"Skipping {run_analysis_sample_name} because it has"
                         f" <{MIN_POLONIES} assigned reads [n={num_polonies}]."
diff --git a/multiqc/modules/bases2fastq/plot_samples.py b/multiqc/modules/bases2fastq/plot_samples.py
index b9f02a538e..a555a88630 100644
--- a/multiqc/modules/bases2fastq/plot_samples.py
+++ b/multiqc/modules/bases2fastq/plot_samples.py
@@ -133,79 +133,149 @@ def tabulate_sample_stats(sample_data, group_lookup_dict, project_lookup_dict, s
 
 def plot_sample_assignment_histogram(sample_data, group_lookup_dict, project_lookup_dict, color_dict):
     """
-    Plot number of cycles per read and lane
+    Plots a histogram of number of assigned polonies in all samples for each run.
     """
-    plot_content = dict()
-    polony_assignments = []
-    for s_name in sample_data.keys():
-        polonies = sample_data[s_name].get("NumPolonies")
-        if polonies:
-            polony_assignments.append(polonies)
+    plot_content = []
+    polony_assignments = {}
+    for s_name, data in sample_data.items():
+        if "NumPolonies" not in data:
+            continue
+        run_name, _ = s_name.split("__")
+        if run_name not in polony_assignments:
+            polony_assignments[run_name] = []
+        polonies = data["NumPolonies"]
+        polony_assignments[run_name].append(polonies)
+
+    pconfig = {"data_labels": []}
+    for run_name, assignment_data in polony_assignments.items():
+        run_data = {}
+        bins = 50
+        for bins in [50, 20, 10]:
+            if len(assignment_data) > bins:
+                break
+        hist, bin_edges = np.histogram(assignment_data, bins=bins)
+        bin_ranges = [f"({round(bin_edges[i], 2)}-{round(bin_edges[i+1], 2)})" for i in range(len(bin_edges)-1)]
+        points = [float(point) for point in hist]
+        run_data["Polonies Assigned"] = {bin_range: point for bin_range, point in zip(bin_ranges, points)}
+        plot_content.append(run_data)
+
+        pconfig["data_labels"].append({
+            "name": run_name,
+            "xlab": "Assigned Polonies (Range)",
+            "ylab": "Number of Samples with N Polonies Assigned",
+        })
     
-    bins = 100
-    for bins in [100, 50, 20, 10]:
-        if len(polony_assignments) > bins:
-            break
-
-    hist, bin_edges = np.histogram(polony_assignments, bins=bins)
-    bin_ranges = [f"({bin_edges[i]}, {bin_edges[i+1]})" for i in range(len(bin_edges)-1)]
-
-    for range_data, frequency in zip(bin_ranges, hist):
-        plot_content[range_data] = {}
-        plot_content[range_data]["Assigned Polonies"] = float(frequency)
-
-    pconfig = {
-        "title": "Bases2Fastq: Sample Polony Assignment Histogram",
-        "id": "sample_assignment_hist",
-        "ylab": "Number of Samples",
-        "xlab": "Range Assigned Polonies",
-        "cpswitch": False,
-        "subtitle": None,
-    }
+    pconfig = pconfig | {
+            "id": "sample_assignment_hist",
+            "title": "bases2fastq: Sample Polony Assignment Histogram",
+            "style": 'lines+markers',
+            "xlab": "Assigned Polonies (Range)",
+            "ylab": "Number of Samples with N Polonies Assigned",
+            "categories": True,
+        }
 
     plot_name = "Sample Polony Assignment Histogram"
-    plot_html = bargraph.plot(plot_content, pconfig=pconfig)
+    plot_html = linegraph.plot(plot_content, pconfig=pconfig)
     anchor = "sample_assignment_hist"
-    description = "Average read length per read for all samples."
+    description = "Histogram showing the distribution of samples according to the number of polonies assigned to them."
     helptext = """
-    Shows the number of cycles used for each read in every flowcell lane. 
-    Useful for confirming that read lengths match the expected sequencing setup across all lanes.
+    Shows bins of assigned polony counts on the X-axis and the number of samples whose number of polonies fall
+    within each bin on the Y-axis.
     """
+
     return plot_html, plot_name, anchor, description, helptext, plot_content
 
 
 def plot_sample_read_length(sample_data, group_lookup_dict, project_lookup_dict, color_dict):
     """
-    Plot number of cycles per read and lane
+    Plots the average read length for each sample if less than 50 samples in total, or the distribution per run
+    as a lineplot based on histogram bins.
     """
+    total_samples = len(sample_data.keys())
     plot_content = dict()
-    for s_name, data in sample_data.items():
-        read_lengths = {s_name: {}}
-        if "Reads" not in data:
-            continue
-        for read in data["Reads"]:
-            read_name = read["Read"]
-            mean_length = read["MeanReadLength"]
-            read_lengths[s_name][read_name] = mean_length
-        plot_content.update(read_lengths)
-
-    pconfig = {
-        "title": "Bases2Fastq: Mean Read Length per Sample",
-        "id": "mean_read_length_per_sample",
-        "ylab": "Bases",
-        "cpswitch": False,
-        "subtitle": None,
-        "stacking": "group",
-    }
-
+    pconfig = {}
+    plot_html = None
     plot_name = "Mean Read Length per Sample"
-    plot_html = bargraph.plot(plot_content, pconfig=pconfig)
     anchor = "mean_read_length_per_sample"
-    description = "Average read length per read for all samples."
-    helptext = """
-    Shows the number of cycles used for each read in every flowcell lane. 
-    Useful for confirming that read lengths match the expected sequencing setup across all lanes.
-    """
+    description = ""
+    helptext = ""
+
+    if total_samples <= 50:
+        for s_name, data in sample_data.items():
+            read_lengths = {s_name: {}}
+            if "Reads" not in data:
+                continue
+            for read in data["Reads"]:
+                read_name = read["Read"]
+                mean_length = read["MeanReadLength"]
+                read_lengths[s_name][read_name] = mean_length
+            plot_content.update(read_lengths)
+
+        pconfig = {
+            "title": "Bases2Fastq: Mean Read Length per Sample",
+            "id": "mean_read_length_per_sample",
+            "ylab": "Bases",
+            "cpswitch": False,
+            "subtitle": None,
+            "stacking": "group",
+        }
+        plot_html = bargraph.plot(plot_content, pconfig=pconfig)
+        description = "Average read length per read for all samples."
+        helptext = """
+        Shows the average read length for each read in each sample.
+        """
+
+    elif total_samples > 50:
+        plot_content = []
+        read_lengths = {}
+        for s_name, data in sample_data.items():
+            if "Reads" not in data:
+                continue
+            run_name, _ = s_name.split("__")
+            if run_name not in read_lengths:
+                read_lengths[run_name] = {}
+            for read in data["Reads"]:
+                read_id = read["Read"]
+                if read_id not in read_lengths[run_name]:
+                    read_lengths[run_name][read_id] = []
+                read_lengths[run_name][read_id].append(read["MeanReadLength"])
+
+        pconfig = {"data_labels": []}
+        for run_name, read_data in read_lengths.items():
+            run_data = {}
+            for read_name, read_lengths in read_data.items():
+                bins = 50
+                for bins in [50, 20, 10]:
+                    if len(read_lengths) > bins:
+                        break
+                hist, bin_edges = np.histogram(read_lengths, bins=bins)
+                bin_ranges = [f"({round(bin_edges[i], 2)}-{round(bin_edges[i+1], 2)})" for i in range(len(bin_edges)-1)]
+                points = [float(point) for point in hist]
+                run_data[read_name] = {bin_range: point for bin_range, point in zip(bin_ranges, points)}
+            plot_content.append(run_data)
+
+            pconfig["data_labels"].append({
+                "name": run_name,
+                "xlab": "Average Read Length (Range)",
+                "ylab": "Samples with Average Read Length",
+            })
+
+        pconfig = pconfig | {
+            "id": "mean_read_length_per_sample",
+            "title": "bases2fastq: Mean Read Length Per Sample",
+            "style": 'lines+markers',
+            "xlab": "Average Read Length (Range)",
+            "ylab": "Samples with Average Read Length",
+            "categories": True,
+        }
+
+        plot_html = linegraph.plot(plot_content, pconfig=pconfig)
+        description = "Distribution of average read lengths for all samples."
+        helptext = """
+        Shows the distribution of samples whose average read lengths fall in a given range.
+        Reads are shown as different lines.
+        """
+
     return plot_html, plot_name, anchor, description, helptext, plot_content
 
 

From 783cbd2dece66cfc6ff502c2ff7895625cbd5f06 Mon Sep 17 00:00:00 2001
From: Carlos Ruiz <carlos.ruiz@elembio.com>
Date: Wed, 1 Oct 2025 00:27:41 -0700
Subject: [PATCH 05/29] Fixed f-string bug

---
 multiqc/modules/bases2fastq/bases2fastq.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/multiqc/modules/bases2fastq/bases2fastq.py b/multiqc/modules/bases2fastq/bases2fastq.py
index 0ba862f60c..e823589a92 100644
--- a/multiqc/modules/bases2fastq/bases2fastq.py
+++ b/multiqc/modules/bases2fastq/bases2fastq.py
@@ -572,7 +572,7 @@ def _parse_index_assignment(self, manifest_data_source: str) -> Dict[str, Any]:
                         if sample_id != sample_to_index_assignment[merged_indices]["SampleID"]:
                             log.error(
                                 f"RunManifest SampleID <{sample_id}> does not match "
-                                f"RunStats SampleID {sample_to_index_assignment[merged_indices]["SampleID"]}."
+                                f"RunStats SampleID {sample_to_index_assignment[merged_indices]['SampleID']}."
                                 "Skipping."
                             )
                             continue

From 9204e3157241da3bb4b6d26ee0aa89ca735217e2 Mon Sep 17 00:00:00 2001
From: Carlos Ruiz <carlos.ruiz@elembio.com>
Date: Wed, 1 Oct 2025 00:44:41 -0700
Subject: [PATCH 06/29] Fixed formatting

---
 multiqc/modules/bases2fastq/bases2fastq.py  | 92 +++++++++------------
 multiqc/modules/bases2fastq/plot_runs.py    |  6 +-
 multiqc/modules/bases2fastq/plot_samples.py | 48 ++++++-----
 3 files changed, 68 insertions(+), 78 deletions(-)

diff --git a/multiqc/modules/bases2fastq/bases2fastq.py b/multiqc/modules/bases2fastq/bases2fastq.py
index e823589a92..6538f2633e 100644
--- a/multiqc/modules/bases2fastq/bases2fastq.py
+++ b/multiqc/modules/bases2fastq/bases2fastq.py
@@ -63,7 +63,6 @@ def __init__(self):
         self.group_lookup_dict = dict()
         self.project_lookup_dict = dict()
 
-
         self.b2f_sample_data = dict()
         self.b2f_run_data = dict()
         self.b2f_run_project_data = dict()
@@ -74,37 +73,39 @@ def __init__(self):
         # Define if call is project- or run-level
         run_level_log_files = len(list(self.find_log_files("bases2fastq/run")))
         project_level_log_files = len(list(self.find_log_files("bases2fastq/project")))
-        
+
         if run_level_log_files == 0 and project_level_log_files == 0:
             error_msg = "No run- or project-level log files found within the Bases2Fastq results."
             log.error(error_msg)
             raise ModuleNoSamplesFound(error_msg)
-        
+
         # Parse data
         if run_level_log_files > 0:
-            (
-                self.run_level_data, self.run_level_samples, self.run_level_samples_to_project
-            ) = self._parse_run_project_data("bases2fastq/run")
+            (self.run_level_data, self.run_level_samples, self.run_level_samples_to_project) = (
+                self._parse_run_project_data("bases2fastq/run")
+            )
         if project_level_log_files > 0:
-            (
-                self.project_level_data, self.project_level_samples, self.project_level_samples_to_project
-            ) = self._parse_run_project_data("bases2fastq/project")
+            (self.project_level_data, self.project_level_samples, self.project_level_samples_to_project) = (
+                self._parse_run_project_data("bases2fastq/project")
+            )
 
         # Get run- and project-level samples
         num_run_level_samples = len(self.run_level_samples)
         num_project_level_samples = len(self.project_level_samples)
 
         # Ensure run/sample data found
-        if all([
-            len(self.run_level_data) == 0,
-            num_run_level_samples == 0,
-            len(self.project_level_data),
-            num_project_level_samples == 0,
-        ]):
+        if all(
+            [
+                len(self.run_level_data) == 0,
+                num_run_level_samples == 0,
+                len(self.project_level_data),
+                num_project_level_samples == 0,
+            ]
+        ):
             error_msg = "No run-, project- or sample-level data found"
             log.error(error_msg)
             raise ModuleNoSamplesFound(error_msg)
-        
+
         # Choose path to take, if project use only project-level data, otherwise use run- and project-level
         summary_path = ""
         if len(self.run_level_data) > 0 and len(self.project_level_data) == 0:
@@ -113,7 +114,7 @@ def __init__(self):
             summary_path = "project_level"
         elif len(self.run_level_data) > 0 and len(self.project_level_data) > 0:
             summary_path = "combined_level"
-        
+
         # Log runs, projects and samples found
         log.info(f"Found {len(self.run_level_data)} run(s) within the Bases2Fastq results.")
         log.info(f"Found {len(self.project_level_data)} project(s) within the Bases2Fastq results.")
@@ -193,14 +194,14 @@ def __init__(self):
         self.sample_color = dict()
         for s_name in samples_to_projects.keys():
             s_color = (
-                self.group_color[s_name] if summary_path == "project_level" else
-                self.group_color[samples_to_projects[s_name]]
+                self.group_color[s_name]
+                if summary_path == "project_level"
+                else self.group_color[samples_to_projects[s_name]]
             )
             self.sample_color.update({s_name: s_color})
         self.run_color = copy.deepcopy(self.group_color)  # Make sure that run colors and group colors match
         self.palette = self.palette[len(merged_groups) :]
 
-
         # Plot metrics
         qc_metrics_function = (
             tabulate_run_stats if summary_path in ["run_level", "combined_level"] else tabulate_project_stats
@@ -212,34 +213,27 @@ def __init__(self):
                 data=manifest_data,
                 plot_functions=[
                     tabulate_manifest_stats,
-                ]
+                ],
             )
             self.add_run_plots(
                 data=index_assigment_data,
                 plot_functions=[
                     tabulate_index_assignment_stats,
-                ]
+                ],
             )
             self.add_run_plots(
                 data=unassigned_sequences,
                 plot_functions=[
                     tabulate_unassigned_index_stats,
-                ]
+                ],
             )
-        
+
         self.add_run_plots(
             data=run_data,
-            plot_functions=[
-                plot_lane_cycle_stats,
-                plot_run_stats,
-                plot_base_quality_hist,
-                plot_base_quality_by_cycle
-            ]
+            plot_functions=[plot_lane_cycle_stats, plot_run_stats, plot_base_quality_hist, plot_base_quality_by_cycle],
         )
 
-        self.add_sample_plots(
-            data=sample_data, group_lookup=samples_to_projects, project_lookup=samples_to_projects
-        )
+        self.add_sample_plots(data=sample_data, group_lookup=samples_to_projects, project_lookup=samples_to_projects)
 
     def get_uuid(self):
         return str(uuid.uuid4()).replace("-", "").lower()
@@ -269,15 +263,13 @@ def _parse_run_project_data(self, data_source: str) -> List[Dict[str, Any]]:
                     "https://docs.elembio.io/docs/bases2fastq/introduction/"
                 )
                 continue
-        
+
             run_analysis_name = "-".join([run_name, analysis_id])
             run_analysis_name = self.clean_s_name(run_analysis_name, f)
 
             # skip run if in user provider ignore list
             if self.is_ignore_sample(run_analysis_name):
-                log.info(
-                    f"Skipping <{run_analysis_name}> because it is present in ignore list."
-                )
+                log.info(f"Skipping <{run_analysis_name}> because it is present in ignore list.")
                 continue
 
             # Check run is present in the final dictionaries
@@ -315,7 +307,6 @@ def _parse_run_project_data(self, data_source: str) -> List[Dict[str, Any]]:
             self.add_data_source(f=f, s_name=run_analysis_name, module="bases2fastq")
 
         return [runs_global_data, runs_sample_data, sample_to_project]
-    
 
     def _parse_run_manifest(self, data_source: str) -> Dict[str, Any]:
         runs_manifest_data = {}
@@ -356,8 +347,7 @@ def _parse_run_manifest(self, data_source: str) -> Dict[str, Any]:
             run_manifest = json.loads(f["f"])
             if "Settings" not in run_manifest:
                 log.warning(
-                    f"<Settings> section not found in {directory}/RunManifest.json.\n"
-                    f"Skipping RunManifest metrics."
+                    f"<Settings> section not found in {directory}/RunManifest.json.\nSkipping RunManifest metrics."
                 )
             else:
                 for lane_data in run_manifest["Settings"]:
@@ -378,7 +368,7 @@ def _parse_run_manifest(self, data_source: str) -> Dict[str, Any]:
                             if mask_info["Read"] not in indices:
                                 indices.append(mask_info["Read"])
                             indices_cycles.append(str(len(mask_info["Cycles"])))
-                    indexing = f'{" + ".join(indices_cycles)}<br>{" + ".join(indices)}'
+                    indexing = f"{' + '.join(indices_cycles)}<br>{' + '.join(indices)}"
                     runs_manifest_data[run_lane]["Indexing"] = indexing
 
                     runs_manifest_data[run_lane]["AdapterTrimType"] = lane_data.get("AdapterTrimType", "N/A")
@@ -388,7 +378,7 @@ def _parse_run_manifest(self, data_source: str) -> Dict[str, Any]:
                     runs_manifest_data[run_lane]["R2AdapterMinimumTrimmedLength"] = lane_data.get(
                         "R2AdapterMinimumTrimmedLength", "N/A"
                     )
-            
+
             self.add_data_source(f=f, s_name=run_analysis_name, module="bases2fastq")
 
         return runs_manifest_data
@@ -416,11 +406,9 @@ def _parse_run_unassigned_sequences(self, data_source: str) -> Dict[str, Any]:
 
             # skip run if in user provider ignore list
             if self.is_ignore_sample(run_analysis_name):
-                log.info(
-                    f"Skipping <{run_analysis_name}> because it is present in ignore list."
-                )
+                log.info(f"Skipping <{run_analysis_name}> because it is present in ignore list.")
                 continue
-            
+
             # Get total polonies and build unassigned indices dictionary
             total_polonies = data.get("NumPoloniesBeforeTrimming", 0)
             if "Lanes" not in data:
@@ -491,12 +479,10 @@ def _parse_index_assignment(self, manifest_data_source: str) -> Dict[str, Any]:
                     log.debug(f"Error in RunStats.json: {run_stats_path}")
                     log.debug(f"Missing: RunName: {run_name} or AnalysisID: {analysis_id}")
                     continue
-                
+
                 # skip run if in user provider ignore list
                 if self.is_ignore_sample(run_analysis_name):
-                    log.info(
-                        f"Skipping <{run_analysis_name}> because it is present in ignore list."
-                    )
+                    log.info(f"Skipping <{run_analysis_name}> because it is present in ignore list.")
                     continue
 
                 # Ensure sample stats are present
@@ -508,7 +494,7 @@ def _parse_index_assignment(self, manifest_data_source: str) -> Dict[str, Any]:
                     )
                     log.debug(f"Missing SampleStats in RunStats.json. Available keys: {list(run_stats.keys())}.")
                     continue
-            
+
                 # Extract per sample polony counts and overall total counts
                 total_polonies = run_stats.get("NumPoloniesBeforeTrimming", 0)
                 for sample_data in run_stats["SampleStats"]:
@@ -552,9 +538,7 @@ def _parse_index_assignment(self, manifest_data_source: str) -> Dict[str, Any]:
                     f"Skipping RunManifest sample index assignment metrics."
                 )
             elif len(sample_to_index_assignment) == 0:
-                log.warning(
-                    "Index assignment data missing. Skipping creation of index assignment metrics."
-                )
+                log.warning("Index assignment data missing. Skipping creation of index assignment metrics.")
             else:
                 for sample_data in run_manifest["Samples"]:
                     sample_name = sample_data.get("SampleName")
diff --git a/multiqc/modules/bases2fastq/plot_runs.py b/multiqc/modules/bases2fastq/plot_runs.py
index e6db7f69a3..706b265353 100644
--- a/multiqc/modules/bases2fastq/plot_runs.py
+++ b/multiqc/modules/bases2fastq/plot_runs.py
@@ -102,7 +102,7 @@ def _calculate_reads_eliminated(run_data) -> int:
         if "NumPolonies" not in lane or "NumPoloniesBeforeTrimming" not in lane:
             continue
         reads_eliminated += lane["NumPoloniesBeforeTrimming"] - lane["NumPolonies"]
-    
+
     return reads_eliminated
 
 
@@ -183,7 +183,7 @@ def tabulate_project_stats(run_data, color_dict):
         project_header = "(Project) "
     elif len(run_keys) == 1:
         first_key = run_keys[0]
-        project_header = f'{run_data[first_key]["Project"]} | '        
+        project_header = f"{run_data[first_key]['Project']} | "
     plot_name = f"{project_header}Sequencing QC Metrics Table"
     plot_html = table.plot(plot_content, headers, pconfig=pconfig)
     anchor = "project_run_qc_metrics_table"
@@ -495,7 +495,7 @@ def plot_lane_cycle_stats(run_data, color_dict):
             if "Lane" not in lane or "Reads" not in lane:
                 continue
             lane_stats = dict()
-            lane_name = f'L{lane["Lane"]}'
+            lane_name = f"L{lane['Lane']}"
             run_name = f"{s_name} | {lane_name}"
             lane_stats[run_name] = {}
             for read in lane["Reads"]:
diff --git a/multiqc/modules/bases2fastq/plot_samples.py b/multiqc/modules/bases2fastq/plot_samples.py
index a5c91449e2..9351b8d099 100644
--- a/multiqc/modules/bases2fastq/plot_samples.py
+++ b/multiqc/modules/bases2fastq/plot_samples.py
@@ -154,25 +154,27 @@ def plot_sample_assignment_histogram(sample_data, group_lookup_dict, project_loo
             if len(assignment_data) > bins:
                 break
         hist, bin_edges = np.histogram(assignment_data, bins=bins)
-        bin_ranges = [f"({round(bin_edges[i], 2)}-{round(bin_edges[i+1], 2)})" for i in range(len(bin_edges)-1)]
+        bin_ranges = [f"({round(bin_edges[i], 2)}-{round(bin_edges[i + 1], 2)})" for i in range(len(bin_edges) - 1)]
         points = [float(point) for point in hist]
         run_data["Polonies Assigned"] = {bin_range: point for bin_range, point in zip(bin_ranges, points)}
         plot_content.append(run_data)
 
-        pconfig["data_labels"].append({
-            "name": run_name,
-            "xlab": "Assigned Polonies (Range)",
-            "ylab": "Number of Samples with N Polonies Assigned",
-        })
-    
+        pconfig["data_labels"].append(
+            {
+                "name": run_name,
+                "xlab": "Assigned Polonies (Range)",
+                "ylab": "Number of Samples with N Polonies Assigned",
+            }
+        )
+
     pconfig = pconfig | {
-            "id": "sample_assignment_hist",
-            "title": "bases2fastq: Sample Polony Assignment Histogram",
-            "style": 'lines+markers',
-            "xlab": "Assigned Polonies (Range)",
-            "ylab": "Number of Samples with N Polonies Assigned",
-            "categories": True,
-        }
+        "id": "sample_assignment_hist",
+        "title": "bases2fastq: Sample Polony Assignment Histogram",
+        "style": "lines+markers",
+        "xlab": "Assigned Polonies (Range)",
+        "ylab": "Number of Samples with N Polonies Assigned",
+        "categories": True,
+    }
 
     plot_name = "Sample Polony Assignment Histogram"
     plot_html = linegraph.plot(plot_content, pconfig=pconfig)
@@ -249,21 +251,25 @@ def plot_sample_read_length(sample_data, group_lookup_dict, project_lookup_dict,
                     if len(read_lengths) > bins:
                         break
                 hist, bin_edges = np.histogram(read_lengths, bins=bins)
-                bin_ranges = [f"({round(bin_edges[i], 2)}-{round(bin_edges[i+1], 2)})" for i in range(len(bin_edges)-1)]
+                bin_ranges = [
+                    f"({round(bin_edges[i], 2)}-{round(bin_edges[i + 1], 2)})" for i in range(len(bin_edges) - 1)
+                ]
                 points = [float(point) for point in hist]
                 run_data[read_name] = {bin_range: point for bin_range, point in zip(bin_ranges, points)}
             plot_content.append(run_data)
 
-            pconfig["data_labels"].append({
-                "name": run_name,
-                "xlab": "Average Read Length (Range)",
-                "ylab": "Samples with Average Read Length",
-            })
+            pconfig["data_labels"].append(
+                {
+                    "name": run_name,
+                    "xlab": "Average Read Length (Range)",
+                    "ylab": "Samples with Average Read Length",
+                }
+            )
 
         pconfig = pconfig | {
             "id": "mean_read_length_per_sample",
             "title": "bases2fastq: Mean Read Length Per Sample",
-            "style": 'lines+markers',
+            "style": "lines+markers",
             "xlab": "Average Read Length (Range)",
             "ylab": "Samples with Average Read Length",
             "categories": True,

From e39c5bc99be9bde8350d35135f0d2a36c686509c Mon Sep 17 00:00:00 2001
From: Carlos Ruiz <carlos.ruiz@elembio.com>
Date: Wed, 1 Oct 2025 00:47:48 -0700
Subject: [PATCH 07/29] Fixed dictionary merging for older python version

---
 multiqc/modules/bases2fastq/bases2fastq.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/multiqc/modules/bases2fastq/bases2fastq.py b/multiqc/modules/bases2fastq/bases2fastq.py
index 6538f2633e..3a356f477d 100644
--- a/multiqc/modules/bases2fastq/bases2fastq.py
+++ b/multiqc/modules/bases2fastq/bases2fastq.py
@@ -175,7 +175,7 @@ def __init__(self):
             project_groups[sample_project].append(sample)
             if summary_path == "project_level":
                 in_project_sample_groups[sample].append(sample)
-        merged_groups = dict(run_groups) | dict(project_groups) | dict(in_project_sample_groups)
+        merged_groups = {**run_groups, **project_groups, **in_project_sample_groups}
 
         # Assign color for each group
         self.color_getter = mqc_colour.mqc_colour_scale()

From 5ff5bd784c907de35084983ae494c726c0cd2cc9 Mon Sep 17 00:00:00 2001
From: Carlos Ruiz <carlos.ruiz@elembio.com>
Date: Wed, 1 Oct 2025 08:06:28 -0700
Subject: [PATCH 08/29] Fixed dict constructor

---
 multiqc/modules/bases2fastq/plot_samples.py | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/multiqc/modules/bases2fastq/plot_samples.py b/multiqc/modules/bases2fastq/plot_samples.py
index 9351b8d099..b6088ba186 100644
--- a/multiqc/modules/bases2fastq/plot_samples.py
+++ b/multiqc/modules/bases2fastq/plot_samples.py
@@ -1,3 +1,4 @@
+from typing import Any, Dict
 from multiqc.plots import bargraph, linegraph, table
 from multiqc import config
 
@@ -146,7 +147,7 @@ def plot_sample_assignment_histogram(sample_data, group_lookup_dict, project_loo
         polonies = data["NumPolonies"]
         polony_assignments[run_name].append(polonies)
 
-    pconfig = {"data_labels": []}
+    pconfig: Dict[str, Any] = {"data_labels": []}
     for run_name, assignment_data in polony_assignments.items():
         run_data = {}
         bins = 50
@@ -167,14 +168,14 @@ def plot_sample_assignment_histogram(sample_data, group_lookup_dict, project_loo
             }
         )
 
-    pconfig = pconfig | {
+    pconfig.update({
         "id": "sample_assignment_hist",
         "title": "bases2fastq: Sample Polony Assignment Histogram",
         "style": "lines+markers",
         "xlab": "Assigned Polonies (Range)",
         "ylab": "Number of Samples with N Polonies Assigned",
         "categories": True,
-    }
+    })
 
     plot_name = "Sample Polony Assignment Histogram"
     plot_html = linegraph.plot(plot_content, pconfig=pconfig)
@@ -242,7 +243,7 @@ def plot_sample_read_length(sample_data, group_lookup_dict, project_lookup_dict,
                     read_lengths[run_name][read_id] = []
                 read_lengths[run_name][read_id].append(read["MeanReadLength"])
 
-        pconfig = {"data_labels": []}
+        pconfig: Dict[str, Any] = {"data_labels": []}
         for run_name, read_data in read_lengths.items():
             run_data = {}
             for read_name, read_lengths in read_data.items():
@@ -266,14 +267,14 @@ def plot_sample_read_length(sample_data, group_lookup_dict, project_lookup_dict,
                 }
             )
 
-        pconfig = pconfig | {
+        pconfig.update({
             "id": "mean_read_length_per_sample",
             "title": "bases2fastq: Mean Read Length Per Sample",
             "style": "lines+markers",
             "xlab": "Average Read Length (Range)",
             "ylab": "Samples with Average Read Length",
             "categories": True,
-        }
+        })
 
         plot_html = linegraph.plot(plot_content, pconfig=pconfig)
         description = "Distribution of average read lengths for all samples."

From fa734f8f85a6d6745acecc8304edca99be3a1ae5 Mon Sep 17 00:00:00 2001
From: Carlos Ruiz <carlos.ruiz@elembio.com>
Date: Wed, 1 Oct 2025 08:11:01 -0700
Subject: [PATCH 09/29] Linting

---
 multiqc/modules/bases2fastq/plot_samples.py | 36 ++++++++++++---------
 1 file changed, 20 insertions(+), 16 deletions(-)

diff --git a/multiqc/modules/bases2fastq/plot_samples.py b/multiqc/modules/bases2fastq/plot_samples.py
index b6088ba186..af745b97fa 100644
--- a/multiqc/modules/bases2fastq/plot_samples.py
+++ b/multiqc/modules/bases2fastq/plot_samples.py
@@ -168,14 +168,16 @@ def plot_sample_assignment_histogram(sample_data, group_lookup_dict, project_loo
             }
         )
 
-    pconfig.update({
-        "id": "sample_assignment_hist",
-        "title": "bases2fastq: Sample Polony Assignment Histogram",
-        "style": "lines+markers",
-        "xlab": "Assigned Polonies (Range)",
-        "ylab": "Number of Samples with N Polonies Assigned",
-        "categories": True,
-    })
+    pconfig.update(
+        {
+            "id": "sample_assignment_hist",
+            "title": "bases2fastq: Sample Polony Assignment Histogram",
+            "style": "lines+markers",
+            "xlab": "Assigned Polonies (Range)",
+            "ylab": "Number of Samples with N Polonies Assigned",
+            "categories": True,
+        }
+    )
 
     plot_name = "Sample Polony Assignment Histogram"
     plot_html = linegraph.plot(plot_content, pconfig=pconfig)
@@ -267,14 +269,16 @@ def plot_sample_read_length(sample_data, group_lookup_dict, project_lookup_dict,
                 }
             )
 
-        pconfig.update({
-            "id": "mean_read_length_per_sample",
-            "title": "bases2fastq: Mean Read Length Per Sample",
-            "style": "lines+markers",
-            "xlab": "Average Read Length (Range)",
-            "ylab": "Samples with Average Read Length",
-            "categories": True,
-        })
+        pconfig.update(
+            {
+                "id": "mean_read_length_per_sample",
+                "title": "bases2fastq: Mean Read Length Per Sample",
+                "style": "lines+markers",
+                "xlab": "Average Read Length (Range)",
+                "ylab": "Samples with Average Read Length",
+                "categories": True,
+            }
+        )
 
         plot_html = linegraph.plot(plot_content, pconfig=pconfig)
         description = "Distribution of average read lengths for all samples."

From 585e241d358612ae7f6f6d3f9db41f6e5290a180 Mon Sep 17 00:00:00 2001
From: Carlos Ruiz <carlos.ruiz@elembio.com>
Date: Wed, 1 Oct 2025 08:33:50 -0700
Subject: [PATCH 10/29] Fix raise error

---
 multiqc/modules/bases2fastq/bases2fastq.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/multiqc/modules/bases2fastq/bases2fastq.py b/multiqc/modules/bases2fastq/bases2fastq.py
index 3a356f477d..f3809ae50c 100644
--- a/multiqc/modules/bases2fastq/bases2fastq.py
+++ b/multiqc/modules/bases2fastq/bases2fastq.py
@@ -160,7 +160,7 @@ def __init__(self):
         else:
             error_msg = "No run- or project-level data was retained. No report will be generated."
             log.error(error_msg)
-            return
+            raise ModuleNoSamplesFound(error_msg)
 
         # Create run and project groups
         run_groups = defaultdict(list)

From b33b7ca939b546592ab4926b528533130a6d0338 Mon Sep 17 00:00:00 2001
From: Carlos Ruiz <carlos.ruiz@elembio.com>
Date: Wed, 1 Oct 2025 09:02:00 -0700
Subject: [PATCH 11/29] test plot with random id

---
 multiqc/modules/bases2fastq/plot_runs.py | 32 +++++++++++++++---------
 1 file changed, 20 insertions(+), 12 deletions(-)

diff --git a/multiqc/modules/bases2fastq/plot_runs.py b/multiqc/modules/bases2fastq/plot_runs.py
index 706b265353..ab4110b8f0 100644
--- a/multiqc/modules/bases2fastq/plot_runs.py
+++ b/multiqc/modules/bases2fastq/plot_runs.py
@@ -3,16 +3,23 @@
 from multiqc.plots import bargraph, linegraph, table
 from multiqc import config
 from natsort import natsorted
+import random
+import string
 
 """
 Functions for plotting per run information of bases2fastq
 """
 
 
+def generate_random_string():
+    return ''.join(random.choices(string.ascii_letters + string.digits, k=4))
+
+
 def plot_run_stats(run_data, color_dict):
     """
     Plot a bar graph for polony numbers, Q30/Q40, index assignment rate and yields for each run
     """
+    random_id = generate_random_string()
     run_names = list(run_data.keys())
     run_names.sort()
     num_polonies = dict()
@@ -58,7 +65,7 @@ def plot_run_stats(run_data, color_dict):
         ],
         "cpswitch": True,
         "stacking": "normal",
-        "id": "run_metrics_bar",
+        "id": f"run_metrics_bar_{random_id}",
         "title": "bases2fastq: General Sequencing Run QC metrics plot",
         "ylab": "QC",
     }
@@ -71,7 +78,7 @@ def plot_run_stats(run_data, color_dict):
     ] * 2
     plot_name = "Sequencing Run Yield"
     plot_html = bargraph.plot(plot_content, cats, pconfig=pconfig)
-    anchor = "run_yield_plot"
+    anchor = f"run_metrics_bar_{random_id}"
     description = "Bar plots of sequencing run yields. Please see individual run reports for details"
     helptext = """
     This section shows and compare the yield and index assignment rate of each sequencing run.\n\n
@@ -287,14 +294,14 @@ def tabulate_run_stats(run_data, color_dict):
     description = "QC metrics per run"
     helptext = """
     This section displays metrics that indicate the quality of each sequencing run: \n
-       - Run Name: Unique identifier composed of (RunName)__(UUID), where (RunName) maps to the AVITI run name and (UUID) maps to the unique Bases2Fastq analysis result.\n
-       - Number of Polonies: The total number of polonies that are calculated for the run.\n
-       - Percentage Assigned Reads: The percentage of reads that are assigned to a sample.\n
-       - Assigned Yield (Gb): The run yield that is based on assigned reads in gigabases.\n
-       - Quality Score Mean: The mean Q score of base calls for the samples. This excludes filtered reads and no calls.\n
-       - Percent Q30: The percentage of ≥ Q30 Q scores for the run. This includes assigned and unassigned reads and excludes filtered reads and no calls.\n
-       - Percent Q40: The percentage of ≥ Q40 Q scores for the run. This includes assigned and unassigned reads and excludes filtered reads and no calls.\n
-       - Reads Eliminated: Number of reads eliminated across lanes.\n
+        - Run Name: Unique identifier composed of (RunName)__(UUID), where (RunName) maps to the AVITI run name and (UUID) maps to the unique Bases2Fastq analysis result.\n
+        - Number of Polonies: The total number of polonies that are calculated for the run.\n
+        - Percentage Assigned Reads: The percentage of reads that are assigned to a sample.\n
+        - Assigned Yield (Gb): The run yield that is based on assigned reads in gigabases.\n
+        - Quality Score Mean: The mean Q score of base calls for the samples. This excludes filtered reads and no calls.\n
+        - Percent Q30: The percentage of ≥ Q30 Q scores for the run. This includes assigned and unassigned reads and excludes filtered reads and no calls.\n
+        - Percent Q40: The percentage of ≥ Q40 Q scores for the run. This includes assigned and unassigned reads and excludes filtered reads and no calls.\n
+        - Reads Eliminated: Number of reads eliminated across lanes.\n
     """
     return plot_html, plot_name, anchor, description, helptext, plot_content
 
@@ -303,6 +310,7 @@ def tabulate_manifest_stats(run_data, color_dict):
     """
     Tabulate general information and statistics of each run
     """
+    random_id = generate_random_string()
     plot_content = dict()
     for s_name in run_data.keys():
         run_stats = dict()
@@ -336,12 +344,12 @@ def tabulate_manifest_stats(run_data, color_dict):
     pconfig = {
         "title": "Bases2Fastq: Run Manifest Metrics",
         "col1_header": "Run Name | Lane",
-        "id": "run_manifest_metrics",
+        "id": f"run_manifest_metrics_table_{random_id}",
     }
 
     plot_name = "Run Manifest Table"
     plot_html = table.plot(plot_content, headers, pconfig=pconfig)
-    anchor = "run_manifest_metrics_table"
+    anchor = f"run_manifest_metrics_table_{random_id}"
     description = "Run parameters used."
     helptext = """
     This section displays metrics that indicate the parameters used in the run: \n

From 0580139822db7a1bd918d453bcf9c51e29dc8f81 Mon Sep 17 00:00:00 2001
From: Carlos Ruiz <carlos.ruiz@elembio.com>
Date: Wed, 1 Oct 2025 09:03:48 -0700
Subject: [PATCH 12/29] pre xcommit

---
 multiqc/modules/bases2fastq/plot_runs.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/multiqc/modules/bases2fastq/plot_runs.py b/multiqc/modules/bases2fastq/plot_runs.py
index ab4110b8f0..a9e428cd6b 100644
--- a/multiqc/modules/bases2fastq/plot_runs.py
+++ b/multiqc/modules/bases2fastq/plot_runs.py
@@ -12,7 +12,7 @@
 
 
 def generate_random_string():
-    return ''.join(random.choices(string.ascii_letters + string.digits, k=4))
+    return "".join(random.choices(string.ascii_letters + string.digits, k=4))
 
 
 def plot_run_stats(run_data, color_dict):

From ed1d894ecee56408d4b86b6f2a77e292fd4d5772 Mon Sep 17 00:00:00 2001
From: Carlos Ruiz <carlos.ruiz@elembio.com>
Date: Wed, 1 Oct 2025 09:45:32 -0700
Subject: [PATCH 13/29] Test no id

---
 multiqc/modules/bases2fastq/plot_runs.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/multiqc/modules/bases2fastq/plot_runs.py b/multiqc/modules/bases2fastq/plot_runs.py
index a9e428cd6b..defc6d1149 100644
--- a/multiqc/modules/bases2fastq/plot_runs.py
+++ b/multiqc/modules/bases2fastq/plot_runs.py
@@ -344,12 +344,12 @@ def tabulate_manifest_stats(run_data, color_dict):
     pconfig = {
         "title": "Bases2Fastq: Run Manifest Metrics",
         "col1_header": "Run Name | Lane",
-        "id": f"run_manifest_metrics_table_{random_id}",
+        # "id": f"run_manifest_metrics_table_{random_id}",
     }
 
     plot_name = "Run Manifest Table"
     plot_html = table.plot(plot_content, headers, pconfig=pconfig)
-    anchor = f"run_manifest_metrics_table_{random_id}"
+    anchor = f"run_manifest_metrics_table"
     description = "Run parameters used."
     helptext = """
     This section displays metrics that indicate the parameters used in the run: \n

From 16f5655e988baa81e27c45d78e76f91415a667f1 Mon Sep 17 00:00:00 2001
From: Carlos Ruiz <carlos.ruiz@elembio.com>
Date: Wed, 1 Oct 2025 09:51:59 -0700
Subject: [PATCH 14/29] Random IDs

---
 multiqc/modules/bases2fastq/plot_runs.py | 14 ++++++--------
 1 file changed, 6 insertions(+), 8 deletions(-)

diff --git a/multiqc/modules/bases2fastq/plot_runs.py b/multiqc/modules/bases2fastq/plot_runs.py
index defc6d1149..e905237ee7 100644
--- a/multiqc/modules/bases2fastq/plot_runs.py
+++ b/multiqc/modules/bases2fastq/plot_runs.py
@@ -11,15 +11,14 @@
 """
 
 
-def generate_random_string():
-    return "".join(random.choices(string.ascii_letters + string.digits, k=4))
+def generate_random_string(length: int):
+    return "".join(random.choices(string.ascii_letters + string.digits, k=length))
 
 
 def plot_run_stats(run_data, color_dict):
     """
     Plot a bar graph for polony numbers, Q30/Q40, index assignment rate and yields for each run
     """
-    random_id = generate_random_string()
     run_names = list(run_data.keys())
     run_names.sort()
     num_polonies = dict()
@@ -65,7 +64,7 @@ def plot_run_stats(run_data, color_dict):
         ],
         "cpswitch": True,
         "stacking": "normal",
-        "id": f"run_metrics_bar_{random_id}",
+        "id": generate_random_string(10),
         "title": "bases2fastq: General Sequencing Run QC metrics plot",
         "ylab": "QC",
     }
@@ -78,7 +77,7 @@ def plot_run_stats(run_data, color_dict):
     ] * 2
     plot_name = "Sequencing Run Yield"
     plot_html = bargraph.plot(plot_content, cats, pconfig=pconfig)
-    anchor = f"run_metrics_bar_{random_id}"
+    anchor = "run_metrics_bar"
     description = "Bar plots of sequencing run yields. Please see individual run reports for details"
     helptext = """
     This section shows and compare the yield and index assignment rate of each sequencing run.\n\n
@@ -310,7 +309,6 @@ def tabulate_manifest_stats(run_data, color_dict):
     """
     Tabulate general information and statistics of each run
     """
-    random_id = generate_random_string()
     plot_content = dict()
     for s_name in run_data.keys():
         run_stats = dict()
@@ -344,12 +342,12 @@ def tabulate_manifest_stats(run_data, color_dict):
     pconfig = {
         "title": "Bases2Fastq: Run Manifest Metrics",
         "col1_header": "Run Name | Lane",
-        # "id": f"run_manifest_metrics_table_{random_id}",
+        "id": generate_random_string(10),
     }
 
     plot_name = "Run Manifest Table"
     plot_html = table.plot(plot_content, headers, pconfig=pconfig)
-    anchor = f"run_manifest_metrics_table"
+    anchor = "run_manifest_metrics_table"
     description = "Run parameters used."
     helptext = """
     This section displays metrics that indicate the parameters used in the run: \n

From 1e0df111431c34dbccae2935fad1a377054379a6 Mon Sep 17 00:00:00 2001
From: Carlos Ruiz <carlos.ruiz@elembio.com>
Date: Wed, 1 Oct 2025 09:57:28 -0700
Subject: [PATCH 15/29] Test mix static plus random

---
 multiqc/modules/bases2fastq/plot_runs.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/multiqc/modules/bases2fastq/plot_runs.py b/multiqc/modules/bases2fastq/plot_runs.py
index e905237ee7..66274762ba 100644
--- a/multiqc/modules/bases2fastq/plot_runs.py
+++ b/multiqc/modules/bases2fastq/plot_runs.py
@@ -405,7 +405,7 @@ def tabulate_index_assignment_stats(run_data, color_dict):
     pconfig = {
         "title": "Bases2Fastq: Index Assignment Metrics",
         "col1_header": "Sample #",
-        "id": "index_assignment_metrics",
+        "id": f"index_assignment_metrics_{generate_random_string(5)}",
     }
 
     plot_name = "Index Assignment Metrics"

From 20b59b7c67db83f3de4b1108cb1160494ded039f Mon Sep 17 00:00:00 2001
From: Carlos Ruiz <carlos.ruiz@elembio.com>
Date: Wed, 1 Oct 2025 10:12:51 -0700
Subject: [PATCH 16/29] standardize plot id

---
 multiqc/modules/bases2fastq/plot_runs.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/multiqc/modules/bases2fastq/plot_runs.py b/multiqc/modules/bases2fastq/plot_runs.py
index 66274762ba..9b2bde785d 100644
--- a/multiqc/modules/bases2fastq/plot_runs.py
+++ b/multiqc/modules/bases2fastq/plot_runs.py
@@ -342,7 +342,7 @@ def tabulate_manifest_stats(run_data, color_dict):
     pconfig = {
         "title": "Bases2Fastq: Run Manifest Metrics",
         "col1_header": "Run Name | Lane",
-        "id": generate_random_string(10),
+        "id": f"run_manifest_metrics_table_{generate_random_string(5)}",
     }
 
     plot_name = "Run Manifest Table"

From 9bd588cdf18ea8f6ab341cbaec7317d0e4be3f77 Mon Sep 17 00:00:00 2001
From: Carlos Ruiz <carlos.ruiz@elembio.com>
Date: Wed, 1 Oct 2025 11:45:00 -0700
Subject: [PATCH 17/29] Added random ids to plots

---
 multiqc/modules/bases2fastq/plot_runs.py    |  8 ++++----
 multiqc/modules/bases2fastq/plot_samples.py | 17 +++++++++--------
 2 files changed, 13 insertions(+), 12 deletions(-)

diff --git a/multiqc/modules/bases2fastq/plot_runs.py b/multiqc/modules/bases2fastq/plot_runs.py
index 9b2bde785d..4cff385247 100644
--- a/multiqc/modules/bases2fastq/plot_runs.py
+++ b/multiqc/modules/bases2fastq/plot_runs.py
@@ -470,7 +470,7 @@ def tabulate_unassigned_index_stats(run_data, color_dict):
     pconfig = {
         "title": "Bases2Fastq: Unassigned Indices Metrics",
         "col1_header": "Index #",
-        "id": "index_unassignment_metrics",
+        "id": f"index_unassignment_metrics_{generate_random_string(5)}",
     }
 
     plot_name = "Unassigned Indices Metrics"
@@ -514,7 +514,7 @@ def plot_lane_cycle_stats(run_data, color_dict):
 
     pconfig = {
         "title": "Bases2Fastq: Cycles Per Read Per Lane",
-        "id": "project_cycles_per_read_per_lane",
+        "id": f"project_cycles_per_read_per_lane_{generate_random_string(5)}",
         "ylab": "Read Cycles",
         "cpswitch": False,
         "subtitle": None,
@@ -584,7 +584,7 @@ def plot_base_quality_hist(run_data, color_dict):
                 "colors": color_dict,
             },
         ],
-        "id": "per_run_bq_hist",
+        "id": f"per_run_bq_hist_{generate_random_string(5)}",
         "title": "bases2fastq: Quality Histograms",
         "ylab": "Percentage",
     }
@@ -706,7 +706,7 @@ def plot_base_quality_by_cycle(run_data, color_dict):
         "x_lines": [{"color": "#FF0000", "width": 2, "value": r1r2_split, "dashStyle": "dash"}],
         "colors": color_dict,
         "ymin": 0,
-        "id": "per_run_quality_by_cycle",
+        "id": f"per_run_quality_by_cycle_{generate_random_string(5)}",
         "title": "bases2fastq: Quality by cycles",
         "ylab": "QC",
     }
diff --git a/multiqc/modules/bases2fastq/plot_samples.py b/multiqc/modules/bases2fastq/plot_samples.py
index af745b97fa..5ef104ddb4 100644
--- a/multiqc/modules/bases2fastq/plot_samples.py
+++ b/multiqc/modules/bases2fastq/plot_samples.py
@@ -1,6 +1,7 @@
 from typing import Any, Dict
 from multiqc.plots import bargraph, linegraph, table
 from multiqc import config
+from .plot_runs import generate_random_string
 
 import numpy as np
 
@@ -109,7 +110,7 @@ def tabulate_sample_stats(sample_data, group_lookup_dict, project_lookup_dict, s
         "suffix": "%",
     }
 
-    pconfig = {"id": "sample_qc_metric_table", "title": "Sample QC Metrics Table", "no_violin": False}
+    pconfig = {"id": f"sample_qc_metric_table_{generate_random_string(5)}", "title": "Sample QC Metrics Table", "no_violin": False}
 
     plot_name = "Sample QC Metrics Table"
     plot_html = table.plot(plot_content, headers, pconfig=pconfig)
@@ -170,7 +171,7 @@ def plot_sample_assignment_histogram(sample_data, group_lookup_dict, project_loo
 
     pconfig.update(
         {
-            "id": "sample_assignment_hist",
+            "id": f"sample_assignment_hist_{generate_random_string(5)}",
             "title": "bases2fastq: Sample Polony Assignment Histogram",
             "style": "lines+markers",
             "xlab": "Assigned Polonies (Range)",
@@ -218,7 +219,7 @@ def plot_sample_read_length(sample_data, group_lookup_dict, project_lookup_dict,
 
         pconfig = {
             "title": "Bases2Fastq: Mean Read Length per Sample",
-            "id": "mean_read_length_per_sample",
+            "id": f"mean_read_length_per_sample_{generate_random_string(5)}",
             "ylab": "Bases",
             "cpswitch": False,
             "subtitle": None,
@@ -271,7 +272,7 @@ def plot_sample_read_length(sample_data, group_lookup_dict, project_lookup_dict,
 
         pconfig.update(
             {
-                "id": "mean_read_length_per_sample",
+                "id": f"mean_read_length_per_sample_{generate_random_string(5)}",
                 "title": "bases2fastq: Mean Read Length Per Sample",
                 "style": "lines+markers",
                 "xlab": "Average Read Length (Range)",
@@ -339,7 +340,7 @@ def sequence_content_plot(sample_data, group_lookup_dict, project_lookup_dict, c
         "x_lines": [{"color": "#FF0000", "width": 2, "value": r1r2_split, "dashStyle": "dash"}],
         "colors": color_dict,
         "ymin": 0,
-        "id": "per_cycle_base_content",
+        "id": f"per_cycle_base_content_{generate_random_string(5)}",
         "title": "bases2fastq: Per Cycle Base Content Percentage",
     }
     plot_html = linegraph.plot(plot_content, pconfig=pconfig)
@@ -406,7 +407,7 @@ def plot_per_cycle_N_content(sample_data, group_lookup_dict, project_lookup_dict
         "colors": color_dict,
         "ymin": 0,
         "ymax": 100,
-        "id": "per_cycle_n_content",
+        "id": f"per_cycle_n_content_{generate_random_string(5)}",
         "title": "bases2fastq: Per Cycle N Content Percentage",
     }
     plot_html = linegraph.plot(plot_content, pconfig=pconfig)
@@ -463,7 +464,7 @@ def plot_per_read_gc_hist(sample_data, group_lookup_dict, project_lookup_dict, s
         "xlab": "GC Content (%)",
         "ylab": "Percentage of reads that have GC (%)",
         "colors": sample_color,
-        "id": "gc_hist",
+        "id": f"gc_hist_{generate_random_string(5)}",
         "title": "bases2fastq: Per Sample GC Content Histogram",
     }
     plot_name = "Per Sample GC Histogram"
@@ -520,7 +521,7 @@ def plot_adapter_content(sample_data, group_lookup_dict, project_lookup_dict, sa
                 adapter_percent = cycle["PercentReadsTrimmed"]
                 plot_content[s_name].update({cycle_no: adapter_percent})
     pconfig = {
-        "id": "per_cycle_adapter_content",
+        "id": f"per_cycle_adapter_content_{generate_random_string(5)}",
         "title": "bases2fastq: Per Cycle Adapter Content",
         "xlab": "Cycle",
         "ylab": "% of Sequences",

From f299394064f09264d906f748bfb0c71efeb692f9 Mon Sep 17 00:00:00 2001
From: Carlos Ruiz <carlos.ruiz@elembio.com>
Date: Wed, 1 Oct 2025 12:07:40 -0700
Subject: [PATCH 18/29] Fix skipping

---
 multiqc/modules/bases2fastq/bases2fastq.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/multiqc/modules/bases2fastq/bases2fastq.py b/multiqc/modules/bases2fastq/bases2fastq.py
index f3809ae50c..ad1655e71d 100644
--- a/multiqc/modules/bases2fastq/bases2fastq.py
+++ b/multiqc/modules/bases2fastq/bases2fastq.py
@@ -74,6 +74,7 @@ def __init__(self):
         run_level_log_files = len(list(self.find_log_files("bases2fastq/run")))
         project_level_log_files = len(list(self.find_log_files("bases2fastq/project")))
 
+
         if run_level_log_files == 0 and project_level_log_files == 0:
             error_msg = "No run- or project-level log files found within the Bases2Fastq results."
             log.error(error_msg)
@@ -98,7 +99,7 @@ def __init__(self):
             [
                 len(self.run_level_data) == 0,
                 num_run_level_samples == 0,
-                len(self.project_level_data),
+                len(self.project_level_data) == 0,
                 num_project_level_samples == 0,
             ]
         ):

From 06eafeda008d09ebbb3dfd3da9ea47bd216b626a Mon Sep 17 00:00:00 2001
From: Carlos Ruiz <carlos.ruiz@elembio.com>
Date: Wed, 1 Oct 2025 14:06:10 -0700
Subject: [PATCH 19/29] Fixed color palette

---
 multiqc/modules/bases2fastq/plot_runs.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/multiqc/modules/bases2fastq/plot_runs.py b/multiqc/modules/bases2fastq/plot_runs.py
index 4cff385247..efc074f103 100644
--- a/multiqc/modules/bases2fastq/plot_runs.py
+++ b/multiqc/modules/bases2fastq/plot_runs.py
@@ -64,7 +64,7 @@ def plot_run_stats(run_data, color_dict):
         ],
         "cpswitch": True,
         "stacking": "normal",
-        "id": generate_random_string(10),
+        "id": f"run_metrics_bar_{generate_random_string(10)}",
         "title": "bases2fastq: General Sequencing Run QC metrics plot",
         "ylab": "QC",
     }
@@ -456,14 +456,14 @@ def tabulate_unassigned_index_stats(run_data, color_dict):
     headers["Polonies"] = {
         "title": "Polonies",
         "description": "Number of polonies assigned to indices.",
-        "scale": "GnYlRd",
+        "scale": "RdYlGn_r",
     }
     headers["% Polonies"] = {
         "title": "% Polonies",
         "description": "Percentage of total polonies assigned to this index combination.",
         "max": 100,
         "min": 0,
-        "scale": "GnYlRd",
+        "scale": "RdYlGn_r",
         "suffix": "%",
     }
 

From 8addc829394a2bffc1b3c843f7d0b72e63e8edb3 Mon Sep 17 00:00:00 2001
From: Carlos Ruiz <carlos.ruiz@elembio.com>
Date: Thu, 2 Oct 2025 17:16:31 -0700
Subject: [PATCH 20/29] DefaultProject bug fix, added mean len to table

---
 multiqc/modules/bases2fastq/bases2fastq.py  |  7 ++++---
 multiqc/modules/bases2fastq/plot_samples.py | 16 ++++++++++++++++
 2 files changed, 20 insertions(+), 3 deletions(-)

diff --git a/multiqc/modules/bases2fastq/bases2fastq.py b/multiqc/modules/bases2fastq/bases2fastq.py
index ad1655e71d..29b1eb7e5a 100644
--- a/multiqc/modules/bases2fastq/bases2fastq.py
+++ b/multiqc/modules/bases2fastq/bases2fastq.py
@@ -167,6 +167,7 @@ def __init__(self):
         run_groups = defaultdict(list)
         project_groups = defaultdict(list)
         in_project_sample_groups = defaultdict(list)
+        ind_sample_groups = defaultdict(list)
         sample_to_run_group = {}
         for sample in sample_data.keys():
             (_run_name, _) = sample.split("__")
@@ -174,9 +175,10 @@ def __init__(self):
             sample_to_run_group[sample] = _run_name
             sample_project = samples_to_projects[sample]
             project_groups[sample_project].append(sample)
+            ind_sample_groups[sample] = [sample]
             if summary_path == "project_level":
                 in_project_sample_groups[sample].append(sample)
-        merged_groups = {**run_groups, **project_groups, **in_project_sample_groups}
+        merged_groups = {**run_groups, **project_groups, **in_project_sample_groups, **ind_sample_groups}
 
         # Assign color for each group
         self.color_getter = mqc_colour.mqc_colour_scale()
@@ -188,8 +190,7 @@ def __init__(self):
             [],
         )
         if len(merged_groups) > len(self.palette):
-            hex_range = 2**24
-            extra_colors = [hex(random.randrange(0, hex_range)) for _ in range(len(merged_groups), len(self.palette))]
+            extra_colors = ["#{:06x}".format(random.randrange(0, 0xFFFFFF)) for _ in range(len(self.palette), len(merged_groups))]
             self.palette = self.palette + extra_colors
         self.group_color = {g: c for g, c in zip(merged_groups.keys(), self.palette[: len(merged_groups)])}
         self.sample_color = dict()
diff --git a/multiqc/modules/bases2fastq/plot_samples.py b/multiqc/modules/bases2fastq/plot_samples.py
index 5ef104ddb4..72caebb7fb 100644
--- a/multiqc/modules/bases2fastq/plot_samples.py
+++ b/multiqc/modules/bases2fastq/plot_samples.py
@@ -37,6 +37,7 @@ def tabulate_sample_stats(sample_data, group_lookup_dict, project_lookup_dict, s
     Tabulate general information and statistics per sample
     """
     plot_content = dict()
+    reads_present = set()
     for s_name in sample_data.keys():
         general_stats = dict()
         general_stats.update({"group": group_lookup_dict[s_name]})
@@ -48,6 +49,13 @@ def tabulate_sample_stats(sample_data, group_lookup_dict, project_lookup_dict, s
         general_stats.update({"percent_q40_sample": sample_data[s_name]["PercentQ40"]})
         general_stats.update({"reads_eliminated": _calculate_sample_reads_eliminated(sample_data[s_name])})
         general_stats.update({"percent_mismatch": sample_data[s_name]["PercentMismatch"]})
+        if "Reads" in sample_data[s_name]:
+            for read in sample_data[s_name]["Reads"]:
+                read_name = read["Read"]
+                reads_present.add(read_name)
+                mean_length = read["MeanReadLength"]
+                general_stats.update({f"{read_name}_mean_len": mean_length})
+
         plot_content.update({s_name: general_stats})
 
     headers = {}
@@ -97,6 +105,14 @@ def tabulate_sample_stats(sample_data, group_lookup_dict, project_lookup_dict, s
         "scale": "RdYlGn",
         "suffix": "%",
     }
+
+    for read in sorted(reads_present):
+        headers[f"{read}_mean_len"] = {
+            "title": f"{read} Mean Lenght",
+            "description": f"Average read length for read {read}",
+            "scale": "RdYlGn",
+        }
+
     headers["reads_eliminated"] = {
         "title": "Reads Eliminated",
         "description": "Number of reads eliminated.",

From aa87263a3cf061ca6cafaa7d09b95e07e60de5b7 Mon Sep 17 00:00:00 2001
From: Carlos Ruiz <carlos.ruiz@elembio.com>
Date: Thu, 2 Oct 2025 17:25:58 -0700
Subject: [PATCH 21/29] Removed histograms

---
 multiqc/modules/bases2fastq/bases2fastq.py  |   4 -
 multiqc/modules/bases2fastq/plot_samples.py | 160 +-------------------
 2 files changed, 1 insertion(+), 163 deletions(-)

diff --git a/multiqc/modules/bases2fastq/bases2fastq.py b/multiqc/modules/bases2fastq/bases2fastq.py
index 29b1eb7e5a..19851b6b4a 100644
--- a/multiqc/modules/bases2fastq/bases2fastq.py
+++ b/multiqc/modules/bases2fastq/bases2fastq.py
@@ -24,12 +24,10 @@
 )
 from multiqc.modules.bases2fastq.plot_samples import (
     tabulate_sample_stats,
-    plot_sample_assignment_histogram,
     sequence_content_plot,
     plot_per_cycle_N_content,
     plot_adapter_content,
     plot_per_read_gc_hist,
-    plot_sample_read_length,
 )
 
 log = logging.getLogger(__name__)
@@ -576,8 +574,6 @@ def add_run_plots(self, data, plot_functions):
     def add_sample_plots(self, data, group_lookup, project_lookup):
         plot_functions = [
             tabulate_sample_stats,
-            plot_sample_assignment_histogram,
-            plot_sample_read_length,
             sequence_content_plot,
             plot_per_cycle_N_content,
             plot_adapter_content,
diff --git a/multiqc/modules/bases2fastq/plot_samples.py b/multiqc/modules/bases2fastq/plot_samples.py
index 72caebb7fb..919a996bb3 100644
--- a/multiqc/modules/bases2fastq/plot_samples.py
+++ b/multiqc/modules/bases2fastq/plot_samples.py
@@ -108,7 +108,7 @@ def tabulate_sample_stats(sample_data, group_lookup_dict, project_lookup_dict, s
 
     for read in sorted(reads_present):
         headers[f"{read}_mean_len"] = {
-            "title": f"{read} Mean Lenght",
+            "title": f"{read} Mean Length",
             "description": f"Average read length for read {read}",
             "scale": "RdYlGn",
         }
@@ -149,164 +149,6 @@ def tabulate_sample_stats(sample_data, group_lookup_dict, project_lookup_dict, s
     return plot_html, plot_name, anchor, description, helptext, plot_content
 
 
-def plot_sample_assignment_histogram(sample_data, group_lookup_dict, project_lookup_dict, color_dict):
-    """
-    Plots a histogram of number of assigned polonies in all samples for each run.
-    """
-    plot_content = []
-    polony_assignments = {}
-    for s_name, data in sample_data.items():
-        if "NumPolonies" not in data:
-            continue
-        run_name, _ = s_name.split("__")
-        if run_name not in polony_assignments:
-            polony_assignments[run_name] = []
-        polonies = data["NumPolonies"]
-        polony_assignments[run_name].append(polonies)
-
-    pconfig: Dict[str, Any] = {"data_labels": []}
-    for run_name, assignment_data in polony_assignments.items():
-        run_data = {}
-        bins = 50
-        for bins in [50, 20, 10]:
-            if len(assignment_data) > bins:
-                break
-        hist, bin_edges = np.histogram(assignment_data, bins=bins)
-        bin_ranges = [f"({round(bin_edges[i], 2)}-{round(bin_edges[i + 1], 2)})" for i in range(len(bin_edges) - 1)]
-        points = [float(point) for point in hist]
-        run_data["Polonies Assigned"] = {bin_range: point for bin_range, point in zip(bin_ranges, points)}
-        plot_content.append(run_data)
-
-        pconfig["data_labels"].append(
-            {
-                "name": run_name,
-                "xlab": "Assigned Polonies (Range)",
-                "ylab": "Number of Samples with N Polonies Assigned",
-            }
-        )
-
-    pconfig.update(
-        {
-            "id": f"sample_assignment_hist_{generate_random_string(5)}",
-            "title": "bases2fastq: Sample Polony Assignment Histogram",
-            "style": "lines+markers",
-            "xlab": "Assigned Polonies (Range)",
-            "ylab": "Number of Samples with N Polonies Assigned",
-            "categories": True,
-        }
-    )
-
-    plot_name = "Sample Polony Assignment Histogram"
-    plot_html = linegraph.plot(plot_content, pconfig=pconfig)
-    anchor = "sample_assignment_hist"
-    description = "Histogram showing the distribution of samples according to the number of polonies assigned to them."
-    helptext = """
-    Shows bins of assigned polony counts on the X-axis and the number of samples whose number of polonies fall
-    within each bin on the Y-axis.
-    """
-
-    return plot_html, plot_name, anchor, description, helptext, plot_content
-
-
-def plot_sample_read_length(sample_data, group_lookup_dict, project_lookup_dict, color_dict):
-    """
-    Plots the average read length for each sample if less than 50 samples in total, or the distribution per run
-    as a lineplot based on histogram bins.
-    """
-    total_samples = len(sample_data.keys())
-    plot_content = dict()
-    pconfig = {}
-    plot_html = None
-    plot_name = "Mean Read Length per Sample"
-    anchor = "mean_read_length_per_sample"
-    description = ""
-    helptext = ""
-
-    if total_samples <= 50:
-        for s_name, data in sample_data.items():
-            read_lengths = {s_name: {}}
-            if "Reads" not in data:
-                continue
-            for read in data["Reads"]:
-                read_name = read["Read"]
-                mean_length = read["MeanReadLength"]
-                read_lengths[s_name][read_name] = mean_length
-            plot_content.update(read_lengths)
-
-        pconfig = {
-            "title": "Bases2Fastq: Mean Read Length per Sample",
-            "id": f"mean_read_length_per_sample_{generate_random_string(5)}",
-            "ylab": "Bases",
-            "cpswitch": False,
-            "subtitle": None,
-            "stacking": "group",
-        }
-        plot_html = bargraph.plot(plot_content, pconfig=pconfig)
-        description = "Average read length per read for all samples."
-        helptext = """
-        Shows the average read length for each read in each sample.
-        """
-
-    elif total_samples > 50:
-        plot_content = []
-        read_lengths = {}
-        for s_name, data in sample_data.items():
-            if "Reads" not in data:
-                continue
-            run_name, _ = s_name.split("__")
-            if run_name not in read_lengths:
-                read_lengths[run_name] = {}
-            for read in data["Reads"]:
-                read_id = read["Read"]
-                if read_id not in read_lengths[run_name]:
-                    read_lengths[run_name][read_id] = []
-                read_lengths[run_name][read_id].append(read["MeanReadLength"])
-
-        pconfig: Dict[str, Any] = {"data_labels": []}
-        for run_name, read_data in read_lengths.items():
-            run_data = {}
-            for read_name, read_lengths in read_data.items():
-                bins = 50
-                for bins in [50, 20, 10]:
-                    if len(read_lengths) > bins:
-                        break
-                hist, bin_edges = np.histogram(read_lengths, bins=bins)
-                bin_ranges = [
-                    f"({round(bin_edges[i], 2)}-{round(bin_edges[i + 1], 2)})" for i in range(len(bin_edges) - 1)
-                ]
-                points = [float(point) for point in hist]
-                run_data[read_name] = {bin_range: point for bin_range, point in zip(bin_ranges, points)}
-            plot_content.append(run_data)
-
-            pconfig["data_labels"].append(
-                {
-                    "name": run_name,
-                    "xlab": "Average Read Length (Range)",
-                    "ylab": "Samples with Average Read Length",
-                }
-            )
-
-        pconfig.update(
-            {
-                "id": f"mean_read_length_per_sample_{generate_random_string(5)}",
-                "title": "bases2fastq: Mean Read Length Per Sample",
-                "style": "lines+markers",
-                "xlab": "Average Read Length (Range)",
-                "ylab": "Samples with Average Read Length",
-                "categories": True,
-            }
-        )
-
-        plot_html = linegraph.plot(plot_content, pconfig=pconfig)
-        description = "Distribution of average read lengths for all samples."
-        helptext = """
-        Shows the distribution of samples whose average read lengths fall in a given range.
-        Reads are shown as different lines.
-        """
-
-    return plot_html, plot_name, anchor, description, helptext, plot_content
-
-
 def sequence_content_plot(sample_data, group_lookup_dict, project_lookup_dict, color_dict):
     """Create the epic HTML for the FastQC sequence content heatmap"""
 

From 626544a0c461ab5065c4e1e978bfec054a079764 Mon Sep 17 00:00:00 2001
From: Carlos Ruiz <carlos.ruiz@elembio.com>
Date: Fri, 3 Oct 2025 09:52:21 -0700
Subject: [PATCH 22/29] Linting

---
 multiqc/modules/bases2fastq/plot_runs.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/multiqc/modules/bases2fastq/plot_runs.py b/multiqc/modules/bases2fastq/plot_runs.py
index efc074f103..ae06f9af12 100644
--- a/multiqc/modules/bases2fastq/plot_runs.py
+++ b/multiqc/modules/bases2fastq/plot_runs.py
@@ -371,7 +371,7 @@ def tabulate_index_assignment_stats(run_data, color_dict):
         sample_index_stats.update({"sample_name": sample_data["SampleID"]})
         sample_index_stats.update({"index_1": sample_data["Index1"]})
         sample_index_stats.update({"index_2": sample_data["Index2"]})
-        sample_index_stats.update({"polonies": sample_data["SamplePolonyCounts"]})
+        sample_index_stats.update({"assigned_polonies": sample_data["SamplePolonyCounts"]})
         sample_index_stats.update({"polony_percentage": sample_data["PercentOfPolonies"]})
         plot_content.update({index: sample_index_stats})
 
@@ -388,8 +388,8 @@ def tabulate_index_assignment_stats(run_data, color_dict):
         "title": "Index 2",
         "description": "Sample Index 2 (I2).",
     }
-    headers["polonies"] = {
-        "title": "Polonies",
+    headers["assigned_polonies"] = {
+        "title": "Assigned Polonies",
         "description": "Number of polonies assigned to sample.",
         "scale": "RdYlGn",
     }
@@ -456,14 +456,14 @@ def tabulate_unassigned_index_stats(run_data, color_dict):
     headers["Polonies"] = {
         "title": "Polonies",
         "description": "Number of polonies assigned to indices.",
-        "scale": "RdYlGn_r",
+        "scale": "RdYlGn-rev",
     }
     headers["% Polonies"] = {
         "title": "% Polonies",
         "description": "Percentage of total polonies assigned to this index combination.",
         "max": 100,
         "min": 0,
-        "scale": "RdYlGn_r",
+        "scale": "RdYlGn-rev",
         "suffix": "%",
     }
 

From e34652bf37a258f151e6705f14f4c95d6877b3e0 Mon Sep 17 00:00:00 2001
From: Carlos Ruiz <carlos.ruiz@elembio.com>
Date: Fri, 3 Oct 2025 09:55:02 -0700
Subject: [PATCH 23/29] fix pre-commit

---
 multiqc/modules/bases2fastq/bases2fastq.py  | 5 +++--
 multiqc/modules/bases2fastq/plot_samples.py | 6 +++++-
 2 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/multiqc/modules/bases2fastq/bases2fastq.py b/multiqc/modules/bases2fastq/bases2fastq.py
index 19851b6b4a..598082e295 100644
--- a/multiqc/modules/bases2fastq/bases2fastq.py
+++ b/multiqc/modules/bases2fastq/bases2fastq.py
@@ -72,7 +72,6 @@ def __init__(self):
         run_level_log_files = len(list(self.find_log_files("bases2fastq/run")))
         project_level_log_files = len(list(self.find_log_files("bases2fastq/project")))
 
-
         if run_level_log_files == 0 and project_level_log_files == 0:
             error_msg = "No run- or project-level log files found within the Bases2Fastq results."
             log.error(error_msg)
@@ -188,7 +187,9 @@ def __init__(self):
             [],
         )
         if len(merged_groups) > len(self.palette):
-            extra_colors = ["#{:06x}".format(random.randrange(0, 0xFFFFFF)) for _ in range(len(self.palette), len(merged_groups))]
+            extra_colors = [
+                "#{:06x}".format(random.randrange(0, 0xFFFFFF)) for _ in range(len(self.palette), len(merged_groups))
+            ]
             self.palette = self.palette + extra_colors
         self.group_color = {g: c for g, c in zip(merged_groups.keys(), self.palette[: len(merged_groups)])}
         self.sample_color = dict()
diff --git a/multiqc/modules/bases2fastq/plot_samples.py b/multiqc/modules/bases2fastq/plot_samples.py
index 919a996bb3..b629f0fc0e 100644
--- a/multiqc/modules/bases2fastq/plot_samples.py
+++ b/multiqc/modules/bases2fastq/plot_samples.py
@@ -126,7 +126,11 @@ def tabulate_sample_stats(sample_data, group_lookup_dict, project_lookup_dict, s
         "suffix": "%",
     }
 
-    pconfig = {"id": f"sample_qc_metric_table_{generate_random_string(5)}", "title": "Sample QC Metrics Table", "no_violin": False}
+    pconfig = {
+        "id": f"sample_qc_metric_table_{generate_random_string(5)}",
+        "title": "Sample QC Metrics Table",
+        "no_violin": False,
+    }
 
     plot_name = "Sample QC Metrics Table"
     plot_html = table.plot(plot_content, headers, pconfig=pconfig)

From b1dbaf6ef76517c8a416977a5f110f818d0386b7 Mon Sep 17 00:00:00 2001
From: Carlos Ruiz <carlos.ruiz@elembio.com>
Date: Fri, 3 Oct 2025 11:07:49 -0700
Subject: [PATCH 24/29] Derep plot ids

---
 multiqc/modules/bases2fastq/bases2fastq.py | 2 +-
 multiqc/modules/bases2fastq/plot_runs.py   | 7 +++----
 2 files changed, 4 insertions(+), 5 deletions(-)

diff --git a/multiqc/modules/bases2fastq/bases2fastq.py b/multiqc/modules/bases2fastq/bases2fastq.py
index 598082e295..4f13006f2e 100644
--- a/multiqc/modules/bases2fastq/bases2fastq.py
+++ b/multiqc/modules/bases2fastq/bases2fastq.py
@@ -429,7 +429,7 @@ def _parse_run_unassigned_sequences(self, data_source: str) -> Dict[str, Any]:
                         "Lane": lane_id,
                         "I1": sequence["I1"],
                         "I2": sequence["I2"],
-                        "Polonies": sequence["Count"],
+                        "Number of Polonies": sequence["Count"],
                         "% Polonies": float("nan"),
                     }
                     if total_polonies > 0:
diff --git a/multiqc/modules/bases2fastq/plot_runs.py b/multiqc/modules/bases2fastq/plot_runs.py
index ae06f9af12..a06061ec5f 100644
--- a/multiqc/modules/bases2fastq/plot_runs.py
+++ b/multiqc/modules/bases2fastq/plot_runs.py
@@ -179,7 +179,7 @@ def tabulate_project_stats(run_data, color_dict):
     pconfig = {
         "title": "bases2fastq: General Sequencing (Project) QC metrics",
         "col1_header": "Run Name",
-        "id": "project_run_metrics_table",
+        "id": f"project_run_metrics_table_{generate_random_string(5)}",
         "ylab": "QC",
     }
 
@@ -283,7 +283,7 @@ def tabulate_run_stats(run_data, color_dict):
     pconfig = {
         "title": "Bases2Fastq: General Sequencing Run QC metrics",
         "col1_header": "Run Name",
-        "id": "run_metrics_table",
+        "id": f"run_metrics_table_{generate_random_string(5)}",
         "ylab": "QC",
     }
 
@@ -435,7 +435,6 @@ def tabulate_unassigned_index_stats(run_data, color_dict):
         - Polonies
         - % Polonies
     """
-
     headers = {}
     headers["Run Name"] = {
         "title": "Run Name",
@@ -453,7 +452,7 @@ def tabulate_unassigned_index_stats(run_data, color_dict):
         "title": "I2",
         "description": "Index 2.",
     }
-    headers["Polonies"] = {
+    headers["Number of Polonies"] = {
         "title": "Polonies",
         "description": "Number of polonies assigned to indices.",
         "scale": "RdYlGn-rev",

From 7d6f18ef2043d083c3c528bd67bd150cc88435a7 Mon Sep 17 00:00:00 2001
From: Carlos Ruiz <carlos.ruiz@elembio.com>
Date: Sun, 5 Oct 2025 18:19:14 -0700
Subject: [PATCH 25/29] Added Q50 metrics

---
 multiqc/modules/bases2fastq/bases2fastq.py  |  13 +-
 multiqc/modules/bases2fastq/plot_runs.py    | 134 +++++++++++++-------
 multiqc/modules/bases2fastq/plot_samples.py |  24 +++-
 3 files changed, 117 insertions(+), 54 deletions(-)

diff --git a/multiqc/modules/bases2fastq/bases2fastq.py b/multiqc/modules/bases2fastq/bases2fastq.py
index 4f13006f2e..a642611718 100644
--- a/multiqc/modules/bases2fastq/bases2fastq.py
+++ b/multiqc/modules/bases2fastq/bases2fastq.py
@@ -20,7 +20,6 @@
     tabulate_project_stats,
     plot_base_quality_hist,
     plot_base_quality_by_cycle,
-    plot_lane_cycle_stats,
 )
 from multiqc.modules.bases2fastq.plot_samples import (
     tabulate_sample_stats,
@@ -196,7 +195,7 @@ def __init__(self):
         for s_name in samples_to_projects.keys():
             s_color = (
                 self.group_color[s_name]
-                if summary_path == "project_level"
+                if (summary_path == "project_level" or len(project_groups) == 1)
                 else self.group_color[samples_to_projects[s_name]]
             )
             self.sample_color.update({s_name: s_color})
@@ -231,7 +230,7 @@ def __init__(self):
 
         self.add_run_plots(
             data=run_data,
-            plot_functions=[plot_lane_cycle_stats, plot_run_stats, plot_base_quality_hist, plot_base_quality_by_cycle],
+            plot_functions=[plot_run_stats, plot_base_quality_hist, plot_base_quality_by_cycle],
         )
 
         self.add_sample_plots(data=sample_data, group_lookup=samples_to_projects, project_lookup=samples_to_projects)
@@ -516,15 +515,17 @@ def _parse_index_assignment(self, manifest_data_source: str) -> Dict[str, Any]:
                                 f"Missing data needed to extract index assignment for sample {sample_id}. Skipping."
                             )
                             continue
-                        if sample_expected_seq not in sample_to_index_assignment:
-                            sample_to_index_assignment[sample_expected_seq] = {
+                        if run_analysis_name not in sample_to_index_assignment:
+                            sample_to_index_assignment[run_analysis_name] = {}
+                        if sample_expected_seq not in sample_to_index_assignment[run_analysis_name]:
+                            sample_to_index_assignment[run_analysis_name][sample_expected_seq] = {
                                 "SampleID": sample_id,
                                 "SamplePolonyCounts": 0,
                                 "PercentOfPolonies": float("nan"),
                                 "Index1": "",
                                 "Index2": "",
                             }
-                        sample_to_index_assignment[sample_expected_seq]["SamplePolonyCounts"] += sample_counts
+                        sample_to_index_assignment[run_analysis_name][sample_expected_seq]["SamplePolonyCounts"] += sample_counts
 
             for index_assigment in sample_to_index_assignment.values():
                 if total_polonies > 0:
diff --git a/multiqc/modules/bases2fastq/plot_runs.py b/multiqc/modules/bases2fastq/plot_runs.py
index a06061ec5f..ba51644b9f 100644
--- a/multiqc/modules/bases2fastq/plot_runs.py
+++ b/multiqc/modules/bases2fastq/plot_runs.py
@@ -117,6 +117,8 @@ def tabulate_project_stats(run_data, color_dict):
     Tabulate general information and statistics of each run
     """
     plot_content = dict()
+    is_percent_q50_present = False
+    reads_present = []
     for s_name in run_data.keys():
         project = run_data[s_name]["Project"]
         run_project_name = f"{s_name} | {project}"
@@ -126,7 +128,20 @@ def tabulate_project_stats(run_data, color_dict):
         run_stats.update({"mean_base_quality_run": run_data[s_name]["QualityScoreMean"]})
         run_stats.update({"percent_q30_run": run_data[s_name]["PercentQ30"]})
         run_stats.update({"percent_q40_run": run_data[s_name]["PercentQ40"]})
+        percent_q50 = run_data[s_name].get("PercentQ50")
+        if percent_q50 is not None:
+            is_percent_q50_present = True
+            run_stats.update({"percent_q50_run": percent_q50})
         run_stats.update({"reads_eliminated": _calculate_reads_eliminated(run_data[s_name])})
+        if "Reads" in run_data[s_name]:
+            for read in run_data[s_name]["Reads"]:
+                if "Cycles" not in read or "Read" not in read:
+                    continue
+                read_name = read["Read"]
+                num_cycles = len(read["Cycles"])
+                reads_present.append(read_name)
+                run_stats.update({f"{read_name}_cycles": num_cycles})
+
         plot_content.update({run_project_name: run_stats})
 
     headers = {}
@@ -171,6 +186,22 @@ def tabulate_project_stats(run_data, color_dict):
         "scale": "RdYlGn",
         "suffix": "%",
     }
+    if is_percent_q50_present:
+        headers["percent_q50_run"] = {
+            "title": "Percent Q50",
+            "description": "The percentage of ≥ Q50 Q scores for the run. This includes assigned and unassigned reads and excludes filtered reads and no calls.",
+            "max": 100,
+            "min": 0,
+            "scale": "RdYlGn",
+            "suffix": "%",
+        }
+    for read in reads_present:
+        headers[f"{read}_cycles"] = {
+            "title": f"Cycles {read}",
+            "description": f"Number of cycles for read {read}.",
+            "scale": "RdPu",
+        }
+
     headers["reads_eliminated"] = {
         "title": "Reads Eliminated",
         "description": "Number of reads eliminated.",
@@ -213,6 +244,8 @@ def tabulate_run_stats(run_data, color_dict):
     Tabulate general information and statistics of each run
     """
     plot_content = dict()
+    is_percent_q50_present = False
+    reads_present = []
     for s_name in run_data.keys():
         run_stats = dict()
         run_stats.update({"num_polonies_run": int(run_data[s_name]["NumPolonies"])})
@@ -222,7 +255,20 @@ def tabulate_run_stats(run_data, color_dict):
         run_stats.update({"mean_base_quality_run": run_data[s_name]["QualityScoreMean"]})
         run_stats.update({"percent_q30_run": run_data[s_name]["PercentQ30"]})
         run_stats.update({"percent_q40_run": run_data[s_name]["PercentQ40"]})
+        percent_q50 = run_data[s_name].get("PercentQ50")
+        if percent_q50 is not None:
+            is_percent_q50_present = True
+            run_stats.update({"percent_q50_run": percent_q50})
         run_stats.update({"reads_eliminated": _calculate_reads_eliminated(run_data[s_name])})
+        if "Reads" in run_data[s_name]:
+            for read in run_data[s_name]["Reads"]:
+                if "Cycles" not in read or "Read" not in read:
+                    continue
+                read_name = read["Read"]
+                num_cycles = len(read["Cycles"])
+                reads_present.append(read_name)
+                run_stats.update({f"{read_name}_cycles": num_cycles})
+
         plot_content.update({s_name: run_stats})
 
     headers = {}
@@ -275,6 +321,21 @@ def tabulate_run_stats(run_data, color_dict):
         "scale": "RdYlGn",
         "suffix": "%",
     }
+    if is_percent_q50_present:
+        headers["percent_q50_run"] = {
+            "title": "Percent Q50",
+            "description": "The percentage of ≥ Q50 Q scores for the run. This includes assigned and unassigned reads and excludes filtered reads and no calls.",
+            "max": 100,
+            "min": 0,
+            "scale": "RdYlGn",
+            "suffix": "%",
+        }
+    for read in reads_present:
+        headers[f"{read}_cycles"] = {
+            "title": f"Cycles {read}",
+            "description": f"Number of cycles for read {read}.",
+            "scale": "RdPu",
+        }
     headers["reads_eliminated"] = {
         "title": "Reads Eliminated",
         "description": "Number of reads eliminated.",
@@ -300,6 +361,7 @@ def tabulate_run_stats(run_data, color_dict):
         - Quality Score Mean: The mean Q score of base calls for the samples. This excludes filtered reads and no calls.\n
         - Percent Q30: The percentage of ≥ Q30 Q scores for the run. This includes assigned and unassigned reads and excludes filtered reads and no calls.\n
         - Percent Q40: The percentage of ≥ Q40 Q scores for the run. This includes assigned and unassigned reads and excludes filtered reads and no calls.\n
+        - Percent Q50: The percentage of ≥ Q50 Q scores for the run (when applicable). This includes assigned and unassigned reads and excludes filtered reads and no calls.\n
         - Reads Eliminated: Number of reads eliminated across lanes.\n
     """
     return plot_html, plot_name, anchor, description, helptext, plot_content
@@ -488,48 +550,6 @@ def tabulate_unassigned_index_stats(run_data, color_dict):
     return plot_html, plot_name, anchor, description, helptext, run_data
 
 
-def plot_lane_cycle_stats(run_data, color_dict):
-    """
-    Plot number of cycles per read and lane
-    """
-    plot_content = dict()
-    for s_name in run_data.keys():
-        if "Lanes" not in run_data[s_name]:
-            continue
-        for lane in run_data[s_name]["Lanes"]:
-            if "Lane" not in lane or "Reads" not in lane:
-                continue
-            lane_stats = dict()
-            lane_name = f"L{lane['Lane']}"
-            run_name = f"{s_name} | {lane_name}"
-            lane_stats[run_name] = {}
-            for read in lane["Reads"]:
-                if "Cycles" not in read or "Read" not in read:
-                    continue
-                read_name = read["Read"]
-                num_cycles = len(read["Cycles"])
-                lane_stats[run_name][read_name] = num_cycles
-            plot_content.update(lane_stats)
-
-    pconfig = {
-        "title": "Bases2Fastq: Cycles Per Read Per Lane",
-        "id": f"project_cycles_per_read_per_lane_{generate_random_string(5)}",
-        "ylab": "Read Cycles",
-        "cpswitch": False,
-        "subtitle": None,
-    }
-
-    plot_name = "Cycles Per Read Per Lane"
-    plot_html = bargraph.plot(plot_content, pconfig=pconfig)
-    anchor = "cycles_per_read_per_lane"
-    description = "Number of sequencing cycles per read in each lane."
-    helptext = """
-    Shows the number of cycles used for each read in every flowcell lane. 
-    Useful for confirming that read lengths match the expected sequencing setup across all lanes.
-    """
-    return plot_html, plot_name, anchor, description, helptext, plot_content
-
-
 def plot_base_quality_hist(run_data, color_dict):
     # Prepare plot data for per base BQ histogram
     bq_hist_dict = dict()
@@ -675,6 +695,31 @@ def plot_base_quality_by_cycle(run_data, color_dict):
                 cycle_dict.update({cycle_no: cycle["PercentQ40"]})
         Q40_dict.update({s_name: cycle_dict})
 
+    # Prepare plot data for %Q50 of each cycle
+    Q50_dict = {}
+    percent_q50_values = set()
+    for s_name in run_data.keys():
+        paired_end = True if len(run_data[s_name]["Reads"]) > 1 else False
+        cycle_dict = dict()
+        for cycle in run_data[s_name]["Reads"][0]["Cycles"]:
+            cycle_no = int(cycle["Cycle"])
+            if "PercentQ50" not in cycle:
+                continue
+            cycle_perc_q50 = cycle["PercentQ50"]
+            cycle_dict.update({cycle_no: cycle_perc_q50})
+            if cycle_perc_q50 is not None:
+                percent_q50_values.add(cycle_perc_q50)
+        if paired_end:
+            for cycle in run_data[s_name]["Reads"][1]["Cycles"]:
+                cycle_no = int(cycle["Cycle"]) + r1r2_split
+                if "PercentQ50" not in cycle:
+                    continue
+                cycle_perc_q50 = cycle["PercentQ50"]
+                cycle_dict.update({cycle_no: cycle_perc_q50})
+                if cycle_perc_q50 is not None:
+                    percent_q50_values.add(cycle_perc_q50)
+        Q50_dict.update({s_name: cycle_dict})
+
     # Prepare plot data for % base calls below PF threshold
     below_pf_dict = {}
     for s_name in run_data.keys():
@@ -709,13 +754,16 @@ def plot_base_quality_by_cycle(run_data, color_dict):
         "title": "bases2fastq: Quality by cycles",
         "ylab": "QC",
     }
+    if len(percent_q50_values) > 0 and any(v is not None for v in percent_q50_values):
+        plot_content.insert(4, Q50_dict)
+        pconfig["data_labels"].insert(4, {"name": "%Q50", "xlab": "cycle", "ylab": "Percentage", "ymax": 100})
     plot_html = linegraph.plot(plot_content, pconfig=pconfig)
     plot_name = "Quality Metrics By Cycle"
     anchor = "per_cycle_quality"
     description = "Per run base qualities by cycle. Read 1 and Read 2 are separated by a red dashed line."
     helptext = """
     This section plots the base qualities by each instrument cycle.\n
-    Choose between Median Quality, Mean Quality, Percent Q30 or Percentage Q40 per cycle.\n
+    Choose between Median Quality, Mean Quality, Percent Q30, Percent Q40 or Percent Q50 (when applicable) per cycle.\n
     Read 1 and Read 2 are separated by a red dashed line.
     """
     return plot_html, plot_name, anchor, description, helptext, plot_content
diff --git a/multiqc/modules/bases2fastq/plot_samples.py b/multiqc/modules/bases2fastq/plot_samples.py
index b629f0fc0e..ebaab9b166 100644
--- a/multiqc/modules/bases2fastq/plot_samples.py
+++ b/multiqc/modules/bases2fastq/plot_samples.py
@@ -38,6 +38,7 @@ def tabulate_sample_stats(sample_data, group_lookup_dict, project_lookup_dict, s
     """
     plot_content = dict()
     reads_present = set()
+    is_percent_q50_present = False
     for s_name in sample_data.keys():
         general_stats = dict()
         general_stats.update({"group": group_lookup_dict[s_name]})
@@ -47,6 +48,10 @@ def tabulate_sample_stats(sample_data, group_lookup_dict, project_lookup_dict, s
         general_stats.update({"mean_base_quality_sample": sample_data[s_name]["QualityScoreMean"]})
         general_stats.update({"percent_q30_sample": sample_data[s_name]["PercentQ30"]})
         general_stats.update({"percent_q40_sample": sample_data[s_name]["PercentQ40"]})
+        percent_q50 = sample_data[s_name].get("PercentQ50")
+        if percent_q50 is not None:
+            is_percent_q50_present = True
+            general_stats.update({"percent_q50_run": percent_q50})
         general_stats.update({"reads_eliminated": _calculate_sample_reads_eliminated(sample_data[s_name])})
         general_stats.update({"percent_mismatch": sample_data[s_name]["PercentMismatch"]})
         if "Reads" in sample_data[s_name]:
@@ -74,24 +79,24 @@ def tabulate_sample_stats(sample_data, group_lookup_dict, project_lookup_dict, s
     }
     headers["num_polonies_sample"] = {
         "title": "# Polonies",
-        "description": "The total number of polonies that are calculated for the run",
+        "description": "The total number of polonies that are calculated for the run.",
         "min": 0,
         "scale": "Blues",
     }
     headers["yield_sample"] = {
         "title": "Yield (Gb)",
-        "description": "The sample yield based on assigned reads in gigabases",
+        "description": "The sample yield based on assigned reads in gigabases.",
         "scale": "Greens",
     }
     headers["mean_base_quality_sample"] = {
         "title": "Mean Base Quality",
-        "description": "Average base quality across R1/R2",
+        "description": "Average base quality across R1/R2.",
         "min": 0,
         "scale": "Spectral",
     }
     headers["percent_q30_sample"] = {
         "title": "Percent Q30",
-        "description": "The percentage of ≥ Q30 Q scores for the sample. This includes assigned reads and excludes filtered reads and no calls",
+        "description": "The percentage of ≥ Q30 Q scores for the sample. This includes assigned reads and excludes filtered reads and no calls.",
         "max": 100,
         "min": 0,
         "scale": "RdYlGn",
@@ -99,12 +104,21 @@ def tabulate_sample_stats(sample_data, group_lookup_dict, project_lookup_dict, s
     }
     headers["percent_q40_sample"] = {
         "title": "Percent Q40",
-        "description": "The percentage of ≥ Q40 Q scores for the sample. This includes assigned reads and excludes filtered reads and no calls",
+        "description": "The percentage of ≥ Q40 Q scores for the sample. This includes assigned reads and excludes filtered reads and no calls.",
         "max": 100,
         "min": 0,
         "scale": "RdYlGn",
         "suffix": "%",
     }
+    if is_percent_q50_present:
+        headers["percent_q50_run"] = {
+            "title": "Percent Q50",
+            "description": "The percentage of ≥ Q50 Q scores for the sample. This includes assigned reads and excludes filtered reads and no calls.",
+            "max": 100,
+            "min": 0,
+            "scale": "RdYlGn",
+            "suffix": "%",
+        }
 
     for read in sorted(reads_present):
         headers[f"{read}_mean_len"] = {

From 7a1a59febba1a6efb1a82dbea9084b0bc537d202 Mon Sep 17 00:00:00 2001
From: Carlos Ruiz <carlos.ruiz@elembio.com>
Date: Sun, 5 Oct 2025 18:19:59 -0700
Subject: [PATCH 26/29] Linting

---
 multiqc/modules/bases2fastq/bases2fastq.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/multiqc/modules/bases2fastq/bases2fastq.py b/multiqc/modules/bases2fastq/bases2fastq.py
index a642611718..179de71855 100644
--- a/multiqc/modules/bases2fastq/bases2fastq.py
+++ b/multiqc/modules/bases2fastq/bases2fastq.py
@@ -525,7 +525,9 @@ def _parse_index_assignment(self, manifest_data_source: str) -> Dict[str, Any]:
                                 "Index1": "",
                                 "Index2": "",
                             }
-                        sample_to_index_assignment[run_analysis_name][sample_expected_seq]["SamplePolonyCounts"] += sample_counts
+                        sample_to_index_assignment[run_analysis_name][sample_expected_seq]["SamplePolonyCounts"] += (
+                            sample_counts
+                        )
 
             for index_assigment in sample_to_index_assignment.values():
                 if total_polonies > 0:

From f97aa411e5277ae37fe21b44d0a28f604ccc9fe6 Mon Sep 17 00:00:00 2001
From: Carlos Ruiz <carlos.ruiz@elembio.com>
Date: Thu, 9 Oct 2025 15:22:52 -0700
Subject: [PATCH 27/29] Added in project index assignment

---
 multiqc/modules/bases2fastq/bases2fastq.py | 158 +++++++++++++++++++--
 multiqc/modules/bases2fastq/plot_runs.py   |  41 ++++--
 2 files changed, 179 insertions(+), 20 deletions(-)

diff --git a/multiqc/modules/bases2fastq/bases2fastq.py b/multiqc/modules/bases2fastq/bases2fastq.py
index 179de71855..9e7d7b900a 100644
--- a/multiqc/modules/bases2fastq/bases2fastq.py
+++ b/multiqc/modules/bases2fastq/bases2fastq.py
@@ -147,6 +147,7 @@ def __init__(self):
             run_data = self.project_level_data
             sample_data = self.project_level_samples
             samples_to_projects = self.project_level_samples_to_project
+            index_assigment_data = self._parse_index_assignment_in_project("bases2fastq/project")
         elif summary_path == "combined_level":
             run_data = self.run_level_data
             sample_data = self.project_level_samples
@@ -227,6 +228,13 @@ def __init__(self):
                     tabulate_unassigned_index_stats,
                 ],
             )
+        else:
+            self.add_run_plots(
+                data=index_assigment_data,
+                plot_functions=[
+                    tabulate_index_assignment_stats,
+                ],
+            )
 
         self.add_run_plots(
             data=run_data,
@@ -529,11 +537,11 @@ def _parse_index_assignment(self, manifest_data_source: str) -> Dict[str, Any]:
                             sample_counts
                         )
 
-            for index_assigment in sample_to_index_assignment.values():
-                if total_polonies > 0:
-                    index_assigment["PercentOfPolonies"] = round(
-                        index_assigment["SamplePolonyCounts"] / total_polonies * 100, 2
-                    )
+                for sample_data in sample_to_index_assignment[run_analysis_name].values():
+                    if total_polonies > 0:
+                        sample_data["PercentOfPolonies"] = round(
+                            sample_data["SamplePolonyCounts"] / total_polonies * 100, 2
+                        )
 
             run_manifest = json.loads(f["f"])
             if "Samples" not in run_manifest:
@@ -554,18 +562,150 @@ def _parse_index_assignment(self, manifest_data_source: str) -> Dict[str, Any]:
                         index_1 = index_data.get("Index1", "")
                         index_2 = index_data.get("Index2", "")
                         merged_indices = f"{index_1}{index_2}"
-                        if merged_indices not in sample_to_index_assignment:
+                        if merged_indices not in sample_to_index_assignment[run_analysis_name]:
                             log.error(f"Index assignment information not found for sample {sample_id}. Skipping.")
                             continue
-                        if sample_id != sample_to_index_assignment[merged_indices]["SampleID"]:
+                        if sample_id != sample_to_index_assignment[run_analysis_name][merged_indices]["SampleID"]:
+                            log.error(
+                                f"RunManifest SampleID <{sample_id}> does not match "
+                                f"RunStats SampleID {sample_to_index_assignment[merged_indices]['SampleID']}."
+                                "Skipping."
+                            )
+                            continue
+                        sample_to_index_assignment[run_analysis_name][merged_indices]["Index1"] = index_1
+                        sample_to_index_assignment[run_analysis_name][merged_indices]["Index2"] = index_2
+
+        return sample_to_index_assignment
+    
+    def _parse_index_assignment_in_project(self, data_source: str) -> Dict[str, Any]:
+        sample_to_index_assignment = {}
+
+        if data_source == "":
+            return sample_to_index_assignment
+
+        for f in self.find_log_files(data_source):
+            directory = f.get("root")
+            if not directory:
+                continue
+
+            # Get RunName and RunID from RunParameters.json
+            run_manifest = Path(directory) / "../../RunManifest.json"
+            if not run_manifest.exists():
+                log.error(
+                    f"RunManifest.json could not be found in {run_manifest}. Skipping index assignment.\n"
+                    "Please visit Elembio online documentation for more information - "
+                    "https://docs.elembio.io/docs/bases2fastq/introduction/"
+                )
+                continue
+
+            project_stats = json.loads(f["f"])
+            run_analysis_name = None
+            run_name = project_stats.get("RunName", None)
+            analysis_id = project_stats.get("AnalysisID", None)
+            project = self.clean_s_name(project_stats.get("Project", "DefaultProject"), f)
+
+            if run_name and analysis_id:
+                run_analysis_name = "-".join([run_name, analysis_id[0:4]])
+            else:
+                log.error(
+                    "Error with project's RunStats.json. Either RunName or AnalysisID is absent.\n"
+                    "Please visit Elembio online documentation for more information - "
+                    "https://docs.elembio.io/docs/bases2fastq/introduction/"
+                )
+                log.debug(f"Error in RunStats.json: {f['fn']}")
+                log.debug(f"Missing: RunName: {run_name} or AnalysisID: {analysis_id}")
+                continue
+
+            # skip run if in user provider ignore list
+            if self.is_ignore_sample(run_analysis_name):
+                log.info(f"Skipping <{run_analysis_name}> because it is present in ignore list.")
+                continue
+
+            # Ensure sample stats are present
+            if "SampleStats" not in project_stats:
+                log.error(
+                    "Error, missing SampleStats in RunStats.json. Skipping index assignment metrics.\n"
+                    "Please visit Elembio online documentation for more information - "
+                    "https://docs.elembio.io/docs/bases2fastq/introduction/"
+                )
+                log.debug(f"Missing SampleStats in RunStats.json. Available keys: {list(project_stats.keys())}.")
+                continue
+
+            # Extract per sample polony counts and overall total counts
+            total_polonies = project_stats.get("NumPoloniesBeforeTrimming", 0)
+            for sample_data in project_stats["SampleStats"]:
+                sample_name = sample_data.get("SampleName")
+                sample_id = None
+
+                if run_analysis_name and sample_name:
+                    sample_id = "__".join([run_analysis_name, sample_name])
+
+                if "Occurrences" not in sample_data:
+                    log.error(f"Missing data needed to extract index assignment for sample {sample_id}. Skipping.")
+                    continue
+
+                for occurrence in sample_data["Occurrences"]:
+                    sample_expected_seq = occurrence.get("ExpectedSequence")
+                    sample_counts = occurrence.get("NumPoloniesBeforeTrimming")
+                    if any([element is None for element in [sample_expected_seq, sample_counts, sample_id]]):
+                        log.error(
+                            f"Missing data needed to extract index assignment for sample {sample_id}. Skipping."
+                        )
+                        continue
+                    if run_analysis_name not in sample_to_index_assignment:
+                        sample_to_index_assignment[run_analysis_name] = {}
+                    if sample_expected_seq not in sample_to_index_assignment[run_analysis_name]:
+                        sample_to_index_assignment[run_analysis_name][sample_expected_seq] = {
+                            "SampleID": sample_id,
+                            "Project": project,
+                            "SamplePolonyCounts": 0,
+                            "PercentOfPolonies": float("nan"),
+                            "Index1": "",
+                            "Index2": "",
+                        }
+                    sample_to_index_assignment[run_analysis_name][sample_expected_seq]["SamplePolonyCounts"] += (
+                        sample_counts
+                    )
+
+            for sample_data in sample_to_index_assignment[run_analysis_name].values():
+                if total_polonies > 0:
+                    sample_data["PercentOfPolonies"] = round(
+                        sample_data["SamplePolonyCounts"] / total_polonies * 100, 2
+                    )
+            
+            run_manifest_data = None
+            with open(run_manifest) as _infile:
+                run_manifest_data = json.load(_infile)
+
+            if "Samples" not in run_manifest_data:
+                log.warning(
+                    f"<Samples> section not found in {directory}/RunManifest.json.\n"
+                    f"Skipping RunManifest sample index assignment metrics."
+                )
+            elif len(sample_to_index_assignment) == 0:
+                log.warning("Index assignment data missing. Skipping creation of index assignment metrics.")
+            else:
+                for sample_data in run_manifest_data["Samples"]:
+                    sample_name = sample_data.get("SampleName")
+                    sample_id = None
+                    if run_analysis_name is None or sample_name is None or "Indexes" not in sample_data:
+                        continue
+                    sample_id = "__".join([run_analysis_name, sample_name])
+                    for index_data in sample_data["Indexes"]:
+                        index_1 = index_data.get("Index1", "")
+                        index_2 = index_data.get("Index2", "")
+                        merged_indices = f"{index_1}{index_2}"
+                        if merged_indices not in sample_to_index_assignment[run_analysis_name]:
+                            continue
+                        if sample_id != sample_to_index_assignment[run_analysis_name][merged_indices]["SampleID"]:
                             log.error(
                                 f"RunManifest SampleID <{sample_id}> does not match "
                                 f"RunStats SampleID {sample_to_index_assignment[merged_indices]['SampleID']}."
                                 "Skipping."
                             )
                             continue
-                        sample_to_index_assignment[merged_indices]["Index1"] = index_1
-                        sample_to_index_assignment[merged_indices]["Index2"] = index_2
+                        sample_to_index_assignment[run_analysis_name][merged_indices]["Index1"] = index_1
+                        sample_to_index_assignment[run_analysis_name][merged_indices]["Index2"] = index_2
 
         return sample_to_index_assignment
 
diff --git a/multiqc/modules/bases2fastq/plot_runs.py b/multiqc/modules/bases2fastq/plot_runs.py
index ba51644b9f..cd63978dba 100644
--- a/multiqc/modules/bases2fastq/plot_runs.py
+++ b/multiqc/modules/bases2fastq/plot_runs.py
@@ -426,21 +426,40 @@ def tabulate_index_assignment_stats(run_data, color_dict):
     Tabulate general information and statistics of each run
     """
     plot_content = dict()
-    sorted_run_data = natsorted(run_data.items(), key=lambda x: x[1]["SampleID"])
-    for index, sample_data in enumerate(sorted_run_data, start=1):
-        sample_data = sample_data[1]
-        sample_index_stats = dict()
-        sample_index_stats.update({"sample_name": sample_data["SampleID"]})
-        sample_index_stats.update({"index_1": sample_data["Index1"]})
-        sample_index_stats.update({"index_2": sample_data["Index2"]})
-        sample_index_stats.update({"assigned_polonies": sample_data["SamplePolonyCounts"]})
-        sample_index_stats.update({"polony_percentage": sample_data["PercentOfPolonies"]})
-        plot_content.update({index: sample_index_stats})
+    run_names = sorted(run_data.keys())
+    index = 1
+    project_present = False
+    for run in run_names:
+        run_sample_data = run_data[run]
+        sorted_run_sample_data = natsorted(run_sample_data.items(), key=lambda x: x[1]["SampleID"])
+        for sample_data in sorted_run_sample_data:
+            sample_data = sample_data[1]
+            sample_index_stats = dict()
+            sample_index_stats.update({"run_name": run})
+            if "Project" in sample_data:
+                sample_index_stats.update({"project": sample_data["Project"]})
+                project_present = True
+            sample_index_stats.update({"sample_name": sample_data["SampleID"].split("__")[1]})
+            sample_index_stats.update({"index_1": sample_data["Index1"]})
+            sample_index_stats.update({"index_2": sample_data["Index2"]})
+            sample_index_stats.update({"assigned_polonies": sample_data["SamplePolonyCounts"]})
+            sample_index_stats.update({"polony_percentage": sample_data["PercentOfPolonies"]})
+            plot_content.update({index: sample_index_stats})
+            index += 1
 
     headers = {}
+    headers["run_name"] = {
+        "title": "Run Name",
+        "description": "Run Name.",
+    }
+    if project_present:
+        headers["project"] = {
+        "title": "Project",
+        "description": "Run Project.",
+    }
     headers["sample_name"] = {
         "title": "Sample Name",
-        "description": "Sample Name (RunID + Sample ID).",
+        "description": "Sample Name.",
     }
     headers["index_1"] = {
         "title": "Index 1",

From 8709cff9ea4ea9b372e1fd0cdee6254180a79477 Mon Sep 17 00:00:00 2001
From: Carlos Ruiz <carlos.ruiz@elembio.com>
Date: Thu, 9 Oct 2025 15:40:48 -0700
Subject: [PATCH 28/29] Linting

---
 multiqc/modules/bases2fastq/bases2fastq.py | 8 +++-----
 multiqc/modules/bases2fastq/plot_runs.py   | 6 +++---
 2 files changed, 6 insertions(+), 8 deletions(-)

diff --git a/multiqc/modules/bases2fastq/bases2fastq.py b/multiqc/modules/bases2fastq/bases2fastq.py
index 9e7d7b900a..c84c77da2c 100644
--- a/multiqc/modules/bases2fastq/bases2fastq.py
+++ b/multiqc/modules/bases2fastq/bases2fastq.py
@@ -576,7 +576,7 @@ def _parse_index_assignment(self, manifest_data_source: str) -> Dict[str, Any]:
                         sample_to_index_assignment[run_analysis_name][merged_indices]["Index2"] = index_2
 
         return sample_to_index_assignment
-    
+
     def _parse_index_assignment_in_project(self, data_source: str) -> Dict[str, Any]:
         sample_to_index_assignment = {}
 
@@ -648,9 +648,7 @@ def _parse_index_assignment_in_project(self, data_source: str) -> Dict[str, Any]
                     sample_expected_seq = occurrence.get("ExpectedSequence")
                     sample_counts = occurrence.get("NumPoloniesBeforeTrimming")
                     if any([element is None for element in [sample_expected_seq, sample_counts, sample_id]]):
-                        log.error(
-                            f"Missing data needed to extract index assignment for sample {sample_id}. Skipping."
-                        )
+                        log.error(f"Missing data needed to extract index assignment for sample {sample_id}. Skipping.")
                         continue
                     if run_analysis_name not in sample_to_index_assignment:
                         sample_to_index_assignment[run_analysis_name] = {}
@@ -672,7 +670,7 @@ def _parse_index_assignment_in_project(self, data_source: str) -> Dict[str, Any]
                     sample_data["PercentOfPolonies"] = round(
                         sample_data["SamplePolonyCounts"] / total_polonies * 100, 2
                     )
-            
+
             run_manifest_data = None
             with open(run_manifest) as _infile:
                 run_manifest_data = json.load(_infile)
diff --git a/multiqc/modules/bases2fastq/plot_runs.py b/multiqc/modules/bases2fastq/plot_runs.py
index cd63978dba..87151b3baa 100644
--- a/multiqc/modules/bases2fastq/plot_runs.py
+++ b/multiqc/modules/bases2fastq/plot_runs.py
@@ -454,9 +454,9 @@ def tabulate_index_assignment_stats(run_data, color_dict):
     }
     if project_present:
         headers["project"] = {
-        "title": "Project",
-        "description": "Run Project.",
-    }
+            "title": "Project",
+            "description": "Run Project.",
+        }
     headers["sample_name"] = {
         "title": "Sample Name",
         "description": "Sample Name.",

From 77e72ab8eec3ff4384692a4e5049930a26dd2938 Mon Sep 17 00:00:00 2001
From: Carlos Ruiz <carlos.ruiz@elembio.com>
Date: Thu, 9 Oct 2025 20:40:30 -0700
Subject: [PATCH 29/29] Added run manifest for project

---
 multiqc/modules/bases2fastq/bases2fastq.py | 104 +++++++++++++++++++--
 1 file changed, 97 insertions(+), 7 deletions(-)

diff --git a/multiqc/modules/bases2fastq/bases2fastq.py b/multiqc/modules/bases2fastq/bases2fastq.py
index c84c77da2c..df4e29af77 100644
--- a/multiqc/modules/bases2fastq/bases2fastq.py
+++ b/multiqc/modules/bases2fastq/bases2fastq.py
@@ -9,6 +9,7 @@
 from pathlib import Path
 
 from multiqc.base_module import BaseMultiqcModule, ModuleNoSamplesFound
+from multiqc.types import LoadedFileDict
 from multiqc.utils import mqc_colour
 
 from multiqc.modules.bases2fastq.plot_runs import (
@@ -147,6 +148,7 @@ def __init__(self):
             run_data = self.project_level_data
             sample_data = self.project_level_samples
             samples_to_projects = self.project_level_samples_to_project
+            manifest_data = self._parse_run_manifest_in_project("bases2fastq/project")
             index_assigment_data = self._parse_index_assignment_in_project("bases2fastq/project")
         elif summary_path == "combined_level":
             run_data = self.run_level_data
@@ -208,14 +210,13 @@ def __init__(self):
             tabulate_run_stats if summary_path in ["run_level", "combined_level"] else tabulate_project_stats
         )
         self.add_run_plots(data=run_data, plot_functions=[qc_metrics_function])
-
+        self.add_run_plots(
+            data=manifest_data,
+            plot_functions=[
+                tabulate_manifest_stats,
+            ],
+        )
         if summary_path in ["run_level", "combined_level"]:
-            self.add_run_plots(
-                data=manifest_data,
-                plot_functions=[
-                    tabulate_manifest_stats,
-                ],
-            )
             self.add_run_plots(
                 data=index_assigment_data,
                 plot_functions=[
@@ -391,6 +392,95 @@ def _parse_run_manifest(self, data_source: str) -> Dict[str, Any]:
 
         return runs_manifest_data
 
+    def _parse_run_manifest_in_project(self, data_source: str) -> Dict[str, Any]:
+        project_manifest_data = {}
+
+        if data_source == "":
+            return project_manifest_data
+
+        for f in self.find_log_files(data_source):
+            directory = f.get("root")
+            if not directory:
+                continue
+
+            # Get RunName and RunID from RunParameters.json
+            run_manifest = Path(directory) / "../../RunManifest.json"
+            if not run_manifest.exists():
+                log.error(
+                    f"RunManifest.json could not be found in {run_manifest}. Skipping index assignment.\n"
+                    "Please visit Elembio online documentation for more information - "
+                    "https://docs.elembio.io/docs/bases2fastq/introduction/"
+                )
+                continue
+
+            project_stats = json.loads(f["f"])
+            run_analysis_name = None
+            run_name = project_stats.get("RunName", None)
+            analysis_id = project_stats.get("AnalysisID", None)
+
+            if run_name and analysis_id:
+                run_analysis_name = "-".join([run_name, analysis_id[0:4]])
+            else:
+                log.error(
+                    "Error with project's RunStats.json. Either RunName or AnalysisID is absent.\n"
+                    "Please visit Elembio online documentation for more information - "
+                    "https://docs.elembio.io/docs/bases2fastq/introduction/"
+                )
+                log.debug(f"Error in RunStats.json: {f['fn']}")
+                log.debug(f"Missing: RunName: {run_name} or AnalysisID: {analysis_id}")
+                continue
+
+            # skip run if in user provider ignore list
+            if self.is_ignore_sample(run_analysis_name):
+                log.info(f"Skipping <{run_analysis_name}> because it is present in ignore list.")
+                continue
+
+            run_manifest_data = None
+            with open(run_manifest) as _infile:
+                run_manifest_data = json.load(_infile)
+
+            if "Settings" not in run_manifest_data:
+                log.warning(f"<Settings> section not found in {run_manifest}.\nSkipping RunManifest metrics.")
+            else:
+                for lane_data in run_manifest_data["Settings"]:
+                    lane_id = lane_data.get("Lane")
+                    if not lane_id:
+                        log.error("<Lane> not found in Settings section of RunManifest. Skipping lanes.")
+                        continue
+                    lane_name = f"L{lane_id}"
+                    run_lane = f"{run_analysis_name} | {lane_name}"
+                    project_manifest_data[run_lane] = {}
+
+                    indices = []
+                    indices_cycles = []
+                    mask_pattern = re.compile(r"^I\d+Mask$")
+                    matching_keys = [key for key in lane_data.keys() if mask_pattern.match(key)]
+                    for key in matching_keys:
+                        for mask_info in lane_data[key]:
+                            if mask_info["Read"] not in indices:
+                                indices.append(mask_info["Read"])
+                            indices_cycles.append(str(len(mask_info["Cycles"])))
+                    indexing = f"{' + '.join(indices_cycles)}<br>{' + '.join(indices)}"
+                    project_manifest_data[run_lane]["Indexing"] = indexing
+
+                    project_manifest_data[run_lane]["AdapterTrimType"] = lane_data.get("AdapterTrimType", "N/A")
+                    project_manifest_data[run_lane]["R1AdapterMinimumTrimmedLength"] = lane_data.get(
+                        "R1AdapterMinimumTrimmedLength", "N/A"
+                    )
+                    project_manifest_data[run_lane]["R2AdapterMinimumTrimmedLength"] = lane_data.get(
+                        "R2AdapterMinimumTrimmedLength", "N/A"
+                    )
+            data_source_info: LoadedFileDict[Any] = {
+                "fn": str(run_manifest.name),
+                "root": str(run_manifest.parent),
+                "sp_key": data_source,
+                "s_name": str(run_manifest.with_suffix("").name),
+                "f": run_manifest_data,
+            }
+            self.add_data_source(f=data_source_info, s_name=run_analysis_name, module="bases2fastq")
+
+        return project_manifest_data
+
     def _parse_run_unassigned_sequences(self, data_source: str) -> Dict[str, Any]:
         run_unassigned_sequences = {}
         if data_source == "":