diff --git a/multiqc/modules/bases2fastq/plot_project_runs.py b/multiqc/modules/bases2fastq/plot_project_runs.py index 6a3663535d..e40c1fb141 100644 --- a/multiqc/modules/bases2fastq/plot_project_runs.py +++ b/multiqc/modules/bases2fastq/plot_project_runs.py @@ -22,11 +22,10 @@ def tabulate_project_run_stats(run_data, color_dict): headers = {} headers["num_polonies_run"] = { - "title": f"# Polonies ({config.base_count_prefix})", - "description": f"The total number of polonies that are calculated for the run ({config.base_count_desc})", + "title": "# Polonies", + "description": "The total number of polonies that are calculated for the run", "min": 0, "scale": "RdYlGn", - "shared_key": "base_count", } headers["percent_assigned_run"] = { "title": "% Assigned Reads", diff --git a/multiqc/modules/bases2fastq/plot_runs.py b/multiqc/modules/bases2fastq/plot_runs.py index accd532678..c2f88603eb 100644 --- a/multiqc/modules/bases2fastq/plot_runs.py +++ b/multiqc/modules/bases2fastq/plot_runs.py @@ -54,7 +54,7 @@ def plot_run_stats(run_data, color_dict): pconfig = { "data_labels": [ {"name": "Number of Polonies", "ylab": "Number of Polonies", "format": "{d}"}, - {"name": "Yield (Gb)", "ylab": "Gb"}, + {"name": "Yield (Gb)", "ylab": "Yield"}, ], "cpswitch": True, "stacking": "normal", @@ -99,11 +99,10 @@ def tabulate_run_stats(run_data, color_dict): headers = {} headers["num_polonies_run"] = { - "title": f"# Polonies ({config.base_count_prefix})", - "description": f"The total number of polonies that are calculated for the run. ({config.base_count_desc})", + "title": "# Polonies", + "description": "The total number of polonies that are calculated for the run.", "min": 0, "scale": "RdYlGn", - "shared_key": "base_count", } headers["percent_assigned_run"] = { "title": "% Assigned Reads", @@ -221,6 +220,7 @@ def plot_base_quality_hist(run_data, color_dict): "id": "per_run_bq_hist", "title": "bases2fastq: Quality Histograms", "ylab": "Percentage", + "xlab": "Q score", } plot_html = linegraph.plot(plot_content, pconfig=pconfig) plot_name = "Run Base Quality Histogram" @@ -347,10 +347,9 @@ def plot_base_quality_by_cycle(run_data, color_dict): plot_html = linegraph.plot(plot_content, pconfig=pconfig) plot_name = "Quality Metrics By Cycle" anchor = "per_cycle_quality" - description = "Per run base qualities by cycle" + description = "Per run base qualities by cycle. Read 1 and Read 2 are separated by a red dashed line." helptext = """ This section plots the base qualities by each instrument cycle.\n - Choose between Median Quality, Mean Quality, Percent Q30 or Percentage Q40 per cycle.\n - Read 1 and Read 2 are separated by a red dashed line. + Choose between Median Quality, Mean Quality, Percent Q30 or Percentage Q40 per cycle. """ return plot_html, plot_name, anchor, description, helptext, plot_content diff --git a/multiqc/modules/bases2fastq/plot_samples.py b/multiqc/modules/bases2fastq/plot_samples.py index 4cbfd71cf8..d8f977c24f 100644 --- a/multiqc/modules/bases2fastq/plot_samples.py +++ b/multiqc/modules/bases2fastq/plot_samples.py @@ -20,6 +20,7 @@ def tabulate_sample_stats(sample_data, group_lookup_dict, project_lookup_dict, s general_stats.update({"mean_base_quality_sample": sample_data[s_name]["QualityScoreMean"]}) general_stats.update({"percent_q30_sample": sample_data[s_name]["PercentQ30"]}) general_stats.update({"percent_q40_sample": sample_data[s_name]["PercentQ40"]}) + general_stats.update({"percent_mismatch": sample_data[s_name]["PercentMismatch"]}) plot_content.update({s_name: general_stats}) headers = {} @@ -37,11 +38,10 @@ def tabulate_sample_stats(sample_data, group_lookup_dict, project_lookup_dict, s "scale": False, } headers["num_polonies_sample"] = { - "title": f"# Polonies ({config.base_count_prefix})", - "description": f"The total number of polonies that are calculated for the run. ({config.base_count_desc})", + "title": "# Polonies", + "description": "The total number of polonies that are calculated for the run.", "min": 0, "scale": "Blues", - "shared_key": "base_count", } headers["yield_sample"] = { "title": "Yield (Gb)", @@ -70,6 +70,14 @@ def tabulate_sample_stats(sample_data, group_lookup_dict, project_lookup_dict, s "scale": "RdYlGn", "suffix": "%", } + headers["percent_mismatch"] = { + "title": "Percent Mismatch", + "description": "The percentage of mismatching reads for the sample.", + "max": 100, + "min": 0, + "scale": "RdYlGn", + "suffix": "%", + } pconfig = {"id": "sample_qc_metric_table", "title": "Sample QC Metrics Table", "no_violin": True} @@ -100,7 +108,6 @@ def sequence_content_plot(sample_data, group_lookup_dict, project_lookup_dict, c r1r2_split = 0 for s_name in sorted(sample_data.keys()): - paired_end = True if len(sample_data[s_name]["Reads"]) > 1 else False for base in "ACTG": base_s_name = "__".join([s_name, base]) data[base_s_name] = {} @@ -108,6 +115,8 @@ def sequence_content_plot(sample_data, group_lookup_dict, project_lookup_dict, c r1r2_split = max(r1r2_split, len(R1)) for s_name in sorted(sample_data.keys()): + paired_end = True if len(sample_data[s_name]["Reads"]) > 1 else False + R1 = sample_data[s_name]["Reads"][0]["Cycles"] for cycle in range(len(R1)): base_no = cycle + 1 @@ -135,8 +144,8 @@ def sequence_content_plot(sample_data, group_lookup_dict, project_lookup_dict, c plot_content = data pconfig = { - "xlab": "cycle", - "ylab": "Percentage", + "xlab": "Cycle", + "ylab": "Percentage of total reads", "x_lines": [{"color": "#FF0000", "width": 2, "value": r1r2_split, "dashStyle": "dash"}], "colors": color_dict, "ymin": 0, @@ -147,8 +156,8 @@ def sequence_content_plot(sample_data, group_lookup_dict, project_lookup_dict, c plot_name = "Per Cycle Base Content" anchor = "base_content" description = """ - Percentage of unidentified bases ("N" bases) by each sequencing cycle. - Read 1 and Read 2 are separated by a red dashed line + Base composition per sample per cycle. + Read 1 and Read 2 are separated by a red dashed line. """ helptext = """ If a sequencer is unable to make a base call with sufficient confidence then it will @@ -252,8 +261,8 @@ def plot_per_read_gc_hist(sample_data, group_lookup_dict, project_lookup_dict, s plot_content = gc_hist_dict pconfig = { - "xlab": "% GC", - "ylab": "Percentage", + "xlab": "Percentage of total reads", + "ylab": "Percentage of reads that are GC", "colors": sample_color, "id": "gc_hist", "title": "bases2fastq: Per Sample GC Content Histogram", @@ -323,7 +332,10 @@ def plot_adapter_content(sample_data, group_lookup_dict, project_lookup_dict, sa pconfig.update({"colors": sample_color}) plot_html = linegraph.plot(plot_content, pconfig=pconfig) anchor = "adapter_content" - description = "Adapter content per cycle" + description = """ + Adapter content per cycle. + Read 1 and Read 2 are separated by a red dashed line. + """ helptext = """ The plot shows a cumulative percentage count of the proportion of your library which has seen each of the adapter sequences at each cycle.