From 8df721ec2880b7de2192ddf0ae18ce6e1c163461 Mon Sep 17 00:00:00 2001
From: Guillaume Poirier-Morency <poirigui@msl.ubc.ca>
Date: Tue, 24 Feb 2026 10:39:03 -0800
Subject: [PATCH 1/6] Allow DEA to be performed on a subset of the samples

---
 .../main/config/bash_completion.d/gemma-cli   |   4 +-
 .../bash_completion.d/gemma-cli-staging       |   4 +-
 .../fish/completions/gemma-cli-staging.fish   |   6 +-
 .../config/fish/completions/gemma-cli.fish    |   6 +-
 .../DifferentialExpressionAnalysisCli.java    |  91 +-
 .../gemma/cli/completion/CompletionType.java  |   1 +
 .../ubic/gemma/cli/util/EntityLocator.java    |  30 +-
 .../gemma/cli/util/EntityLocatorImpl.java     |  84 +-
 .../DifferentialExpressionAnalysisConfig.java |  15 +
 .../expression/diff/LinearModelAnalyzer.java  | 808 +++++++++---------
 .../experiment/ExpressionExperimentDao.java   |   4 +
 .../ExpressionExperimentDaoImpl.java          |  34 +
 .../ExpressionExperimentService.java          |  17 +
 .../ExpressionExperimentServiceImpl.java      |  12 +
 14 files changed, 662 insertions(+), 454 deletions(-)

diff --git a/gemma-cli/src/main/config/bash_completion.d/gemma-cli b/gemma-cli/src/main/config/bash_completion.d/gemma-cli
index 8f477c9d26..99aa9ccd32 100644
--- a/gemma-cli/src/main/config/bash_completion.d/gemma-cli
+++ b/gemma-cli/src/main/config/bash_completion.d/gemma-cli
@@ -251,7 +251,7 @@ function __gemma_cli_complete() {
     fi
     if [[ " $words " =~ ' ubic.gemma.apps.DifferentialExpressionAnalysisCli ' ]]; then
         if ! [[ "$current_option" =~ (--batch-format|--batch-output-file|--batch-report-frequency|--delete-analysis|--delete-subset|--eeListfile|--excludeEEFile|--experiment|--experiment-set|--expressionQuery|--factors|--filter-minimum-number-of-cells-per-gene|--filter-minimum-number-of-cells-per-sample|--filter-minimum-variance|--filter-repetitive-values-minimum-number-of-samples|--filter-repetitive-values-minimum-unique-values|--filter-repetitive-values-mode|--output-dir|--redo-analysis|--redo-subset|--subset|--taxon|--type|-batchFormat|-batchOutputFile|-batchReportFrequency|-d|-deleteAnalysis|-deleteSubset|-e|-eeset|-f|-factors|-filterMinNumberOfCellsPerGene|-filterMinNumberOfCellsPerSample|-filterMinVariance|-filterRepetitiveValuesMinSamples|-filterRepetitiveValuesMinUniqueValues|-filterRepetitiveValuesMode|-mdate|-q|-redoAnalysis|-redoSubset|-subset|-t|-type|-x) ]]; then
-            mapfile -t -O "${#COMPREPLY[@]}" COMPREPLY < <(compgen -W "--batch-format --batch-output-file --batch-report-frequency --complete-analyses --complete-factors --complete-subset-factors --complete-subsets --delete --delete-analysis --delete-subset --eeListfile --excludeEEFile --experiment --experiment-set --expressionQuery --factors --filter-minimum-number-of-cells-per-gene --filter-minimum-number-of-cells-per-sample --filter-minimum-variance --filter-repetitive-values-minimum-number-of-samples --filter-repetitive-values-minimum-unique-values --filter-repetitive-values-mode --force --help --ignore-failing-subsets --no-bayes --no-db --no-files --output-dir --redo --redo-analysis --redo-subset --subset --taxon --type --use-batch-factor -all -auto -batchFormat -batchOutputFile -batchReportFrequency -completeAnalyses -completeFactors -completeSubsetFactors -completeSubsets -d -delete -deleteAnalysis -deleteSubset -e -eeset -f -factors -filterMinNumberOfCellsPerGene -filterMinNumberOfCellsPerSample -filterMinVariance -filterRepetitiveValuesMinSamples -filterRepetitiveValuesMinUniqueValues -filterRepetitiveValuesMode -force -h -ignoreFailingSubsets -mdate -nobayes -nodb -nofiles -q -redo -redoAnalysis -redoSubset -subset -t -type -usebatch -x" -- "$2")
+            mapfile -t -O "${#COMPREPLY[@]}" COMPREPLY < <(compgen -W "--batch-format --batch-output-file --batch-report-frequency --complete-analyses --complete-factors --complete-subset-factors --complete-subsets --delete --delete-analysis --delete-subset --eeListfile --excludeEEFile --experiment --experiment-set --expressionQuery --factors --filter-minimum-number-of-cells-per-gene --filter-minimum-number-of-cells-per-sample --filter-minimum-variance --filter-repetitive-values-minimum-number-of-samples --filter-repetitive-values-minimum-unique-values --filter-repetitive-values-mode --force --help --ignore-failing-subsets --no-bayes --no-db --no-files --output-dir --redo --redo-analysis --redo-subset --samples --subset --taxon --type --use-batch-factor -all -auto -batchFormat -batchOutputFile -batchReportFrequency -completeAnalyses -completeFactors -completeSubsetFactors -completeSubsets -d -delete -deleteAnalysis -deleteSubset -e -eeset -f -factors -filterMinNumberOfCellsPerGene -filterMinNumberOfCellsPerSample -filterMinVariance -filterRepetitiveValuesMinSamples -filterRepetitiveValuesMinUniqueValues -filterRepetitiveValuesMode -force -h -ignoreFailingSubsets -mdate -nobayes -nodb -nofiles -q -redo -redoAnalysis -redoSubset -samples -subset -t -type -usebatch -x" -- "$2")
         fi
         if [[ "$current_option" =~ (--batch-output-file|--eeListfile|--excludeEEFile|--output-dir|-batchOutputFile|-d|-f|-x) ]]; then
             mapfile -t -O "${#COMPREPLY[@]}" COMPREPLY < <(compgen -f -- "$2")
@@ -259,7 +259,7 @@ function __gemma_cli_complete() {
     fi
     if [[ " $words " =~ ' diffExAnalyze ' ]]; then
         if ! [[ "$current_option" =~ (--batch-format|--batch-output-file|--batch-report-frequency|--delete-analysis|--delete-subset|--eeListfile|--excludeEEFile|--experiment|--experiment-set|--expressionQuery|--factors|--filter-minimum-number-of-cells-per-gene|--filter-minimum-number-of-cells-per-sample|--filter-minimum-variance|--filter-repetitive-values-minimum-number-of-samples|--filter-repetitive-values-minimum-unique-values|--filter-repetitive-values-mode|--output-dir|--redo-analysis|--redo-subset|--subset|--taxon|--type|-batchFormat|-batchOutputFile|-batchReportFrequency|-d|-deleteAnalysis|-deleteSubset|-e|-eeset|-f|-factors|-filterMinNumberOfCellsPerGene|-filterMinNumberOfCellsPerSample|-filterMinVariance|-filterRepetitiveValuesMinSamples|-filterRepetitiveValuesMinUniqueValues|-filterRepetitiveValuesMode|-mdate|-q|-redoAnalysis|-redoSubset|-subset|-t|-type|-x) ]]; then
-            mapfile -t -O "${#COMPREPLY[@]}" COMPREPLY < <(compgen -W "--batch-format --batch-output-file --batch-report-frequency --complete-analyses --complete-factors --complete-subset-factors --complete-subsets --delete --delete-analysis --delete-subset --eeListfile --excludeEEFile --experiment --experiment-set --expressionQuery --factors --filter-minimum-number-of-cells-per-gene --filter-minimum-number-of-cells-per-sample --filter-minimum-variance --filter-repetitive-values-minimum-number-of-samples --filter-repetitive-values-minimum-unique-values --filter-repetitive-values-mode --force --help --ignore-failing-subsets --no-bayes --no-db --no-files --output-dir --redo --redo-analysis --redo-subset --subset --taxon --type --use-batch-factor -all -auto -batchFormat -batchOutputFile -batchReportFrequency -completeAnalyses -completeFactors -completeSubsetFactors -completeSubsets -d -delete -deleteAnalysis -deleteSubset -e -eeset -f -factors -filterMinNumberOfCellsPerGene -filterMinNumberOfCellsPerSample -filterMinVariance -filterRepetitiveValuesMinSamples -filterRepetitiveValuesMinUniqueValues -filterRepetitiveValuesMode -force -h -ignoreFailingSubsets -mdate -nobayes -nodb -nofiles -q -redo -redoAnalysis -redoSubset -subset -t -type -usebatch -x" -- "$2")
+            mapfile -t -O "${#COMPREPLY[@]}" COMPREPLY < <(compgen -W "--batch-format --batch-output-file --batch-report-frequency --complete-analyses --complete-factors --complete-subset-factors --complete-subsets --delete --delete-analysis --delete-subset --eeListfile --excludeEEFile --experiment --experiment-set --expressionQuery --factors --filter-minimum-number-of-cells-per-gene --filter-minimum-number-of-cells-per-sample --filter-minimum-variance --filter-repetitive-values-minimum-number-of-samples --filter-repetitive-values-minimum-unique-values --filter-repetitive-values-mode --force --help --ignore-failing-subsets --no-bayes --no-db --no-files --output-dir --redo --redo-analysis --redo-subset --samples --subset --taxon --type --use-batch-factor -all -auto -batchFormat -batchOutputFile -batchReportFrequency -completeAnalyses -completeFactors -completeSubsetFactors -completeSubsets -d -delete -deleteAnalysis -deleteSubset -e -eeset -f -factors -filterMinNumberOfCellsPerGene -filterMinNumberOfCellsPerSample -filterMinVariance -filterRepetitiveValuesMinSamples -filterRepetitiveValuesMinUniqueValues -filterRepetitiveValuesMode -force -h -ignoreFailingSubsets -mdate -nobayes -nodb -nofiles -q -redo -redoAnalysis -redoSubset -samples -subset -t -type -usebatch -x" -- "$2")
         fi
         if [[ "$current_option" =~ (--batch-output-file|--eeListfile|--excludeEEFile|--output-dir|-batchOutputFile|-d|-f|-x) ]]; then
             mapfile -t -O "${#COMPREPLY[@]}" COMPREPLY < <(compgen -f -- "$2")
diff --git a/gemma-cli/src/main/config/bash_completion.d/gemma-cli-staging b/gemma-cli/src/main/config/bash_completion.d/gemma-cli-staging
index 31c56d042e..200f0ff436 100644
--- a/gemma-cli/src/main/config/bash_completion.d/gemma-cli-staging
+++ b/gemma-cli/src/main/config/bash_completion.d/gemma-cli-staging
@@ -251,7 +251,7 @@ function __gemma_cli_staging_complete() {
     fi
     if [[ " $words " =~ ' ubic.gemma.apps.DifferentialExpressionAnalysisCli ' ]]; then
         if ! [[ "$current_option" =~ (--batch-format|--batch-output-file|--batch-report-frequency|--delete-analysis|--delete-subset|--eeListfile|--excludeEEFile|--experiment|--experiment-set|--expressionQuery|--factors|--filter-minimum-number-of-cells-per-gene|--filter-minimum-number-of-cells-per-sample|--filter-minimum-variance|--filter-repetitive-values-minimum-number-of-samples|--filter-repetitive-values-minimum-unique-values|--filter-repetitive-values-mode|--output-dir|--redo-analysis|--redo-subset|--subset|--taxon|--type|-batchFormat|-batchOutputFile|-batchReportFrequency|-d|-deleteAnalysis|-deleteSubset|-e|-eeset|-f|-factors|-filterMinNumberOfCellsPerGene|-filterMinNumberOfCellsPerSample|-filterMinVariance|-filterRepetitiveValuesMinSamples|-filterRepetitiveValuesMinUniqueValues|-filterRepetitiveValuesMode|-mdate|-q|-redoAnalysis|-redoSubset|-subset|-t|-type|-x) ]]; then
-            mapfile -t -O "${#COMPREPLY[@]}" COMPREPLY < <(compgen -W "--batch-format --batch-output-file --batch-report-frequency --complete-analyses --complete-factors --complete-subset-factors --complete-subsets --delete --delete-analysis --delete-subset --eeListfile --excludeEEFile --experiment --experiment-set --expressionQuery --factors --filter-minimum-number-of-cells-per-gene --filter-minimum-number-of-cells-per-sample --filter-minimum-variance --filter-repetitive-values-minimum-number-of-samples --filter-repetitive-values-minimum-unique-values --filter-repetitive-values-mode --force --help --ignore-failing-subsets --no-bayes --no-db --no-files --output-dir --redo --redo-analysis --redo-subset --subset --taxon --type --use-batch-factor -all -auto -batchFormat -batchOutputFile -batchReportFrequency -completeAnalyses -completeFactors -completeSubsetFactors -completeSubsets -d -delete -deleteAnalysis -deleteSubset -e -eeset -f -factors -filterMinNumberOfCellsPerGene -filterMinNumberOfCellsPerSample -filterMinVariance -filterRepetitiveValuesMinSamples -filterRepetitiveValuesMinUniqueValues -filterRepetitiveValuesMode -force -h -ignoreFailingSubsets -mdate -nobayes -nodb -nofiles -q -redo -redoAnalysis -redoSubset -subset -t -type -usebatch -x" -- "$2")
+            mapfile -t -O "${#COMPREPLY[@]}" COMPREPLY < <(compgen -W "--batch-format --batch-output-file --batch-report-frequency --complete-analyses --complete-factors --complete-subset-factors --complete-subsets --delete --delete-analysis --delete-subset --eeListfile --excludeEEFile --experiment --experiment-set --expressionQuery --factors --filter-minimum-number-of-cells-per-gene --filter-minimum-number-of-cells-per-sample --filter-minimum-variance --filter-repetitive-values-minimum-number-of-samples --filter-repetitive-values-minimum-unique-values --filter-repetitive-values-mode --force --help --ignore-failing-subsets --no-bayes --no-db --no-files --output-dir --redo --redo-analysis --redo-subset --samples --subset --taxon --type --use-batch-factor -all -auto -batchFormat -batchOutputFile -batchReportFrequency -completeAnalyses -completeFactors -completeSubsetFactors -completeSubsets -d -delete -deleteAnalysis -deleteSubset -e -eeset -f -factors -filterMinNumberOfCellsPerGene -filterMinNumberOfCellsPerSample -filterMinVariance -filterRepetitiveValuesMinSamples -filterRepetitiveValuesMinUniqueValues -filterRepetitiveValuesMode -force -h -ignoreFailingSubsets -mdate -nobayes -nodb -nofiles -q -redo -redoAnalysis -redoSubset -samples -subset -t -type -usebatch -x" -- "$2")
         fi
         if [[ "$current_option" =~ (--batch-output-file|--eeListfile|--excludeEEFile|--output-dir|-batchOutputFile|-d|-f|-x) ]]; then
             mapfile -t -O "${#COMPREPLY[@]}" COMPREPLY < <(compgen -f -- "$2")
@@ -259,7 +259,7 @@ function __gemma_cli_staging_complete() {
     fi
     if [[ " $words " =~ ' diffExAnalyze ' ]]; then
         if ! [[ "$current_option" =~ (--batch-format|--batch-output-file|--batch-report-frequency|--delete-analysis|--delete-subset|--eeListfile|--excludeEEFile|--experiment|--experiment-set|--expressionQuery|--factors|--filter-minimum-number-of-cells-per-gene|--filter-minimum-number-of-cells-per-sample|--filter-minimum-variance|--filter-repetitive-values-minimum-number-of-samples|--filter-repetitive-values-minimum-unique-values|--filter-repetitive-values-mode|--output-dir|--redo-analysis|--redo-subset|--subset|--taxon|--type|-batchFormat|-batchOutputFile|-batchReportFrequency|-d|-deleteAnalysis|-deleteSubset|-e|-eeset|-f|-factors|-filterMinNumberOfCellsPerGene|-filterMinNumberOfCellsPerSample|-filterMinVariance|-filterRepetitiveValuesMinSamples|-filterRepetitiveValuesMinUniqueValues|-filterRepetitiveValuesMode|-mdate|-q|-redoAnalysis|-redoSubset|-subset|-t|-type|-x) ]]; then
-            mapfile -t -O "${#COMPREPLY[@]}" COMPREPLY < <(compgen -W "--batch-format --batch-output-file --batch-report-frequency --complete-analyses --complete-factors --complete-subset-factors --complete-subsets --delete --delete-analysis --delete-subset --eeListfile --excludeEEFile --experiment --experiment-set --expressionQuery --factors --filter-minimum-number-of-cells-per-gene --filter-minimum-number-of-cells-per-sample --filter-minimum-variance --filter-repetitive-values-minimum-number-of-samples --filter-repetitive-values-minimum-unique-values --filter-repetitive-values-mode --force --help --ignore-failing-subsets --no-bayes --no-db --no-files --output-dir --redo --redo-analysis --redo-subset --subset --taxon --type --use-batch-factor -all -auto -batchFormat -batchOutputFile -batchReportFrequency -completeAnalyses -completeFactors -completeSubsetFactors -completeSubsets -d -delete -deleteAnalysis -deleteSubset -e -eeset -f -factors -filterMinNumberOfCellsPerGene -filterMinNumberOfCellsPerSample -filterMinVariance -filterRepetitiveValuesMinSamples -filterRepetitiveValuesMinUniqueValues -filterRepetitiveValuesMode -force -h -ignoreFailingSubsets -mdate -nobayes -nodb -nofiles -q -redo -redoAnalysis -redoSubset -subset -t -type -usebatch -x" -- "$2")
+            mapfile -t -O "${#COMPREPLY[@]}" COMPREPLY < <(compgen -W "--batch-format --batch-output-file --batch-report-frequency --complete-analyses --complete-factors --complete-subset-factors --complete-subsets --delete --delete-analysis --delete-subset --eeListfile --excludeEEFile --experiment --experiment-set --expressionQuery --factors --filter-minimum-number-of-cells-per-gene --filter-minimum-number-of-cells-per-sample --filter-minimum-variance --filter-repetitive-values-minimum-number-of-samples --filter-repetitive-values-minimum-unique-values --filter-repetitive-values-mode --force --help --ignore-failing-subsets --no-bayes --no-db --no-files --output-dir --redo --redo-analysis --redo-subset --samples --subset --taxon --type --use-batch-factor -all -auto -batchFormat -batchOutputFile -batchReportFrequency -completeAnalyses -completeFactors -completeSubsetFactors -completeSubsets -d -delete -deleteAnalysis -deleteSubset -e -eeset -f -factors -filterMinNumberOfCellsPerGene -filterMinNumberOfCellsPerSample -filterMinVariance -filterRepetitiveValuesMinSamples -filterRepetitiveValuesMinUniqueValues -filterRepetitiveValuesMode -force -h -ignoreFailingSubsets -mdate -nobayes -nodb -nofiles -q -redo -redoAnalysis -redoSubset -samples -subset -t -type -usebatch -x" -- "$2")
         fi
         if [[ "$current_option" =~ (--batch-output-file|--eeListfile|--excludeEEFile|--output-dir|-batchOutputFile|-d|-f|-x) ]]; then
             mapfile -t -O "${#COMPREPLY[@]}" COMPREPLY < <(compgen -f -- "$2")
diff --git a/gemma-cli/src/main/config/fish/completions/gemma-cli-staging.fish b/gemma-cli/src/main/config/fish/completions/gemma-cli-staging.fish
index 61297964dd..cae2d14408 100644
--- a/gemma-cli/src/main/config/fish/completions/gemma-cli-staging.fish
+++ b/gemma-cli/src/main/config/fish/completions/gemma-cli-staging.fish
@@ -536,12 +536,13 @@ complete -c gemma-cli-staging -n '__fish_seen_subcommand_from ubic.gemma.apps.Di
 complete -c gemma-cli-staging -n '__fish_seen_subcommand_from ubic.gemma.apps.DifferentialExpressionAnalysisCli' -o type -l type -r -a '(echo -e "GENERICLM\tgeneric linear model without interaction\nOSTTEST\tone-sample t-test\nOWA\tone-way ANOVA\nTTEST\ttwo-sample t-test\nTWO_WAY_ANOVA_WITH_INTERACTION\ttwo-way ANOVA with interaction\nTWO_WAY_ANOVA_NO_INTERACTION\ttwo-way ANOVA without interaction" 2>/dev/null)' -f --description 'Type of analysis to perform. If omitted, the system will try to guess based on the experimental design.\nThis option is not available when processing more than one experiment. Possible values are: GENERICLM, OSTTEST, OWA, TTEST, TWO_WAY_ANOVA_WITH_INTERACTION, TWO_WAY_ANOVA_NO_INTERACTION.'
 complete -c gemma-cli-staging -n '__fish_seen_subcommand_from ubic.gemma.apps.DifferentialExpressionAnalysisCli' -o factors -l factors -r -f --description 'ID numbers, categories or names of the factor(s) to use, comma-delimited, with spaces '"'"' '"'"' and colons '"'"':'"'"' replaced by underscores '"'"'_'"'"'. Interaction can be specified by using '"'"':'"'"' as a delimiter (e.g. '"'"'factor1:factor2'"'"'). Only categorical factors can be used in interactions. Multiple factors can be provided using comma-delimited IDs or by passing the option multiple times. If omitted, factors will be selected automatically. This is incompatible with -redo,--redo, -redoAnalysis,--redo-analysis or -redoSubset,--redo-subset.\nThis option is not available when processing more than one experiment.'
 complete -c gemma-cli-staging -n '__fish_seen_subcommand_from ubic.gemma.apps.DifferentialExpressionAnalysisCli' -o subset -l subset -r -f --description 'ID number, category or name of the factor to use for subsetting the analysis. The factor must be categorical. If used without specifying  -factors,--factors, factors will be selected automatically among the remaining one in the design. If the experiment already has subsets for the factor, those will be reused. This is incompatible with -redo,--redo, -redoAnalysis,--redo-analysis or -redoSubset,--redo-subset.\nThis option is not available when processing more than one experiment.'
+complete -c gemma-cli-staging -n '__fish_seen_subcommand_from ubic.gemma.apps.DifferentialExpressionAnalysisCli' -o samples -l samples -f --description 'ID, name or accession of samples to be included in the analysis. Defaults to all samples in the dataset being analyzed. Requires the -nodb,--no-db option to be set.\nThis option is not available when processing more than one experiment.'
 complete -c gemma-cli-staging -n '__fish_seen_subcommand_from ubic.gemma.apps.DifferentialExpressionAnalysisCli' -o usebatch -l use-batch-factor -f --description 'If a batch factor is available, use it. Otherwise, batch information can/will be ignored in the analysis. This is incompatible with -factors,--factors, -redo,--redo, -redoAnalysis,--redo-analysis and -redoSubset,--redo-subset.'
 complete -c gemma-cli-staging -n '__fish_seen_subcommand_from ubic.gemma.apps.DifferentialExpressionAnalysisCli' -o nobayes -l no-bayes -f --description 'Do not apply empirical-Bayes moderated statistics. Default is to use eBayes.'
 complete -c gemma-cli-staging -n '__fish_seen_subcommand_from ubic.gemma.apps.DifferentialExpressionAnalysisCli' -o ignoreFailingSubsets -l ignore-failing-subsets -f --description 'Ignore failing subsets and continue processing other subsets. Requires the -subset,--subset option to be set or -redo,--redo option with existing subset analyses.'
 complete -c gemma-cli-staging -n '__fish_seen_subcommand_from ubic.gemma.apps.DifferentialExpressionAnalysisCli' -s d -l output-dir -r -F --description 'Write diff. ex. archive files inside the given directory. Defaults to the current directory of no other destination is selected.'
 complete -c gemma-cli-staging -n '__fish_seen_subcommand_from ubic.gemma.apps.DifferentialExpressionAnalysisCli' -o nodb -l no-db -f --description 'Do not persist diff. ex. results to the database and instead save them to the current directory (or the location defined by -d,--output-dir).'
-complete -c gemma-cli-staging -n '__fish_seen_subcommand_from ubic.gemma.apps.DifferentialExpressionAnalysisCli' -o nofiles -l no-files -f --description 'Don'"'"'t create archive files after analysis. Default is to make them. This is incompatible with -nodb,--no-db.'
+complete -c gemma-cli-staging -n '__fish_seen_subcommand_from ubic.gemma.apps.DifferentialExpressionAnalysisCli' -o nofiles -l no-files -f --description 'Don'"'"'t create archive files after analysis. Default is to make them. This is incompatible with -nodb,--no-db option to be set.'
 complete -c gemma-cli-staging -n '__fish_seen_subcommand_from ubic.gemma.apps.DifferentialExpressionAnalysisCli' -o redo -l redo -f --description 'Re-run all analyses for the experiment. Try to base analysis on previous analysis'"'"'s choice of statistical model.'
 complete -c gemma-cli-staging -n '__fish_seen_subcommand_from ubic.gemma.apps.DifferentialExpressionAnalysisCli' -o redoAnalysis -l redo-analysis -r -f --description 'Re-run a specific analysis for the experiment. Try to base analysis on previous analysis'"'"'s choice of statistical model. Multiple analysis can be provided using comma-delimited IDs or by passing the option multiple times.\nThis option is not available when processing more than one experiment.'
 complete -c gemma-cli-staging -n '__fish_seen_subcommand_from ubic.gemma.apps.DifferentialExpressionAnalysisCli' -o redoSubset -l redo-subset -r -f --description 'Re-run all analyses for a subset of the experiment. Try to base analysis on previous analysis'"'"'s choice of statistical model. Multiple subsets can be provided using comma-delimited IDs or by passing the option multiple times.\nThis option is not available when processing more than one experiment.'
@@ -577,12 +578,13 @@ complete -c gemma-cli-staging -n '__fish_seen_subcommand_from diffExAnalyze' -o
 complete -c gemma-cli-staging -n '__fish_seen_subcommand_from diffExAnalyze' -o type -l type -r -a '(echo -e "GENERICLM\tgeneric linear model without interaction\nOSTTEST\tone-sample t-test\nOWA\tone-way ANOVA\nTTEST\ttwo-sample t-test\nTWO_WAY_ANOVA_WITH_INTERACTION\ttwo-way ANOVA with interaction\nTWO_WAY_ANOVA_NO_INTERACTION\ttwo-way ANOVA without interaction" 2>/dev/null)' -f --description 'Type of analysis to perform. If omitted, the system will try to guess based on the experimental design.\nThis option is not available when processing more than one experiment. Possible values are: GENERICLM, OSTTEST, OWA, TTEST, TWO_WAY_ANOVA_WITH_INTERACTION, TWO_WAY_ANOVA_NO_INTERACTION.'
 complete -c gemma-cli-staging -n '__fish_seen_subcommand_from diffExAnalyze' -o factors -l factors -r -f --description 'ID numbers, categories or names of the factor(s) to use, comma-delimited, with spaces '"'"' '"'"' and colons '"'"':'"'"' replaced by underscores '"'"'_'"'"'. Interaction can be specified by using '"'"':'"'"' as a delimiter (e.g. '"'"'factor1:factor2'"'"'). Only categorical factors can be used in interactions. Multiple factors can be provided using comma-delimited IDs or by passing the option multiple times. If omitted, factors will be selected automatically. This is incompatible with -redo,--redo, -redoAnalysis,--redo-analysis or -redoSubset,--redo-subset.\nThis option is not available when processing more than one experiment.'
 complete -c gemma-cli-staging -n '__fish_seen_subcommand_from diffExAnalyze' -o subset -l subset -r -f --description 'ID number, category or name of the factor to use for subsetting the analysis. The factor must be categorical. If used without specifying  -factors,--factors, factors will be selected automatically among the remaining one in the design. If the experiment already has subsets for the factor, those will be reused. This is incompatible with -redo,--redo, -redoAnalysis,--redo-analysis or -redoSubset,--redo-subset.\nThis option is not available when processing more than one experiment.'
+complete -c gemma-cli-staging -n '__fish_seen_subcommand_from diffExAnalyze' -o samples -l samples -f --description 'ID, name or accession of samples to be included in the analysis. Defaults to all samples in the dataset being analyzed. Requires the -nodb,--no-db option to be set.\nThis option is not available when processing more than one experiment.'
 complete -c gemma-cli-staging -n '__fish_seen_subcommand_from diffExAnalyze' -o usebatch -l use-batch-factor -f --description 'If a batch factor is available, use it. Otherwise, batch information can/will be ignored in the analysis. This is incompatible with -factors,--factors, -redo,--redo, -redoAnalysis,--redo-analysis and -redoSubset,--redo-subset.'
 complete -c gemma-cli-staging -n '__fish_seen_subcommand_from diffExAnalyze' -o nobayes -l no-bayes -f --description 'Do not apply empirical-Bayes moderated statistics. Default is to use eBayes.'
 complete -c gemma-cli-staging -n '__fish_seen_subcommand_from diffExAnalyze' -o ignoreFailingSubsets -l ignore-failing-subsets -f --description 'Ignore failing subsets and continue processing other subsets. Requires the -subset,--subset option to be set or -redo,--redo option with existing subset analyses.'
 complete -c gemma-cli-staging -n '__fish_seen_subcommand_from diffExAnalyze' -s d -l output-dir -r -F --description 'Write diff. ex. archive files inside the given directory. Defaults to the current directory of no other destination is selected.'
 complete -c gemma-cli-staging -n '__fish_seen_subcommand_from diffExAnalyze' -o nodb -l no-db -f --description 'Do not persist diff. ex. results to the database and instead save them to the current directory (or the location defined by -d,--output-dir).'
-complete -c gemma-cli-staging -n '__fish_seen_subcommand_from diffExAnalyze' -o nofiles -l no-files -f --description 'Don'"'"'t create archive files after analysis. Default is to make them. This is incompatible with -nodb,--no-db.'
+complete -c gemma-cli-staging -n '__fish_seen_subcommand_from diffExAnalyze' -o nofiles -l no-files -f --description 'Don'"'"'t create archive files after analysis. Default is to make them. This is incompatible with -nodb,--no-db option to be set.'
 complete -c gemma-cli-staging -n '__fish_seen_subcommand_from diffExAnalyze' -o redo -l redo -f --description 'Re-run all analyses for the experiment. Try to base analysis on previous analysis'"'"'s choice of statistical model.'
 complete -c gemma-cli-staging -n '__fish_seen_subcommand_from diffExAnalyze' -o redoAnalysis -l redo-analysis -r -f --description 'Re-run a specific analysis for the experiment. Try to base analysis on previous analysis'"'"'s choice of statistical model. Multiple analysis can be provided using comma-delimited IDs or by passing the option multiple times.\nThis option is not available when processing more than one experiment.'
 complete -c gemma-cli-staging -n '__fish_seen_subcommand_from diffExAnalyze' -o redoSubset -l redo-subset -r -f --description 'Re-run all analyses for a subset of the experiment. Try to base analysis on previous analysis'"'"'s choice of statistical model. Multiple subsets can be provided using comma-delimited IDs or by passing the option multiple times.\nThis option is not available when processing more than one experiment.'
diff --git a/gemma-cli/src/main/config/fish/completions/gemma-cli.fish b/gemma-cli/src/main/config/fish/completions/gemma-cli.fish
index 446ec8365d..ad06251c76 100644
--- a/gemma-cli/src/main/config/fish/completions/gemma-cli.fish
+++ b/gemma-cli/src/main/config/fish/completions/gemma-cli.fish
@@ -536,12 +536,13 @@ complete -c gemma-cli -n '__fish_seen_subcommand_from ubic.gemma.apps.Differenti
 complete -c gemma-cli -n '__fish_seen_subcommand_from ubic.gemma.apps.DifferentialExpressionAnalysisCli' -o type -l type -r -a '(echo -e "GENERICLM\tgeneric linear model without interaction\nOSTTEST\tone-sample t-test\nOWA\tone-way ANOVA\nTTEST\ttwo-sample t-test\nTWO_WAY_ANOVA_WITH_INTERACTION\ttwo-way ANOVA with interaction\nTWO_WAY_ANOVA_NO_INTERACTION\ttwo-way ANOVA without interaction" 2>/dev/null)' -f --description 'Type of analysis to perform. If omitted, the system will try to guess based on the experimental design.\nThis option is not available when processing more than one experiment. Possible values are: GENERICLM, OSTTEST, OWA, TTEST, TWO_WAY_ANOVA_WITH_INTERACTION, TWO_WAY_ANOVA_NO_INTERACTION.'
 complete -c gemma-cli -n '__fish_seen_subcommand_from ubic.gemma.apps.DifferentialExpressionAnalysisCli' -o factors -l factors -r -f --description 'ID numbers, categories or names of the factor(s) to use, comma-delimited, with spaces '"'"' '"'"' and colons '"'"':'"'"' replaced by underscores '"'"'_'"'"'. Interaction can be specified by using '"'"':'"'"' as a delimiter (e.g. '"'"'factor1:factor2'"'"'). Only categorical factors can be used in interactions. Multiple factors can be provided using comma-delimited IDs or by passing the option multiple times. If omitted, factors will be selected automatically. This is incompatible with -redo,--redo, -redoAnalysis,--redo-analysis or -redoSubset,--redo-subset.\nThis option is not available when processing more than one experiment.'
 complete -c gemma-cli -n '__fish_seen_subcommand_from ubic.gemma.apps.DifferentialExpressionAnalysisCli' -o subset -l subset -r -f --description 'ID number, category or name of the factor to use for subsetting the analysis. The factor must be categorical. If used without specifying  -factors,--factors, factors will be selected automatically among the remaining one in the design. If the experiment already has subsets for the factor, those will be reused. This is incompatible with -redo,--redo, -redoAnalysis,--redo-analysis or -redoSubset,--redo-subset.\nThis option is not available when processing more than one experiment.'
+complete -c gemma-cli -n '__fish_seen_subcommand_from ubic.gemma.apps.DifferentialExpressionAnalysisCli' -o samples -l samples -f --description 'ID, name or accession of samples to be included in the analysis. Defaults to all samples in the dataset being analyzed. Requires the -nodb,--no-db option to be set.\nThis option is not available when processing more than one experiment.'
 complete -c gemma-cli -n '__fish_seen_subcommand_from ubic.gemma.apps.DifferentialExpressionAnalysisCli' -o usebatch -l use-batch-factor -f --description 'If a batch factor is available, use it. Otherwise, batch information can/will be ignored in the analysis. This is incompatible with -factors,--factors, -redo,--redo, -redoAnalysis,--redo-analysis and -redoSubset,--redo-subset.'
 complete -c gemma-cli -n '__fish_seen_subcommand_from ubic.gemma.apps.DifferentialExpressionAnalysisCli' -o nobayes -l no-bayes -f --description 'Do not apply empirical-Bayes moderated statistics. Default is to use eBayes.'
 complete -c gemma-cli -n '__fish_seen_subcommand_from ubic.gemma.apps.DifferentialExpressionAnalysisCli' -o ignoreFailingSubsets -l ignore-failing-subsets -f --description 'Ignore failing subsets and continue processing other subsets. Requires the -subset,--subset option to be set or -redo,--redo option with existing subset analyses.'
 complete -c gemma-cli -n '__fish_seen_subcommand_from ubic.gemma.apps.DifferentialExpressionAnalysisCli' -s d -l output-dir -r -F --description 'Write diff. ex. archive files inside the given directory. Defaults to the current directory of no other destination is selected.'
 complete -c gemma-cli -n '__fish_seen_subcommand_from ubic.gemma.apps.DifferentialExpressionAnalysisCli' -o nodb -l no-db -f --description 'Do not persist diff. ex. results to the database and instead save them to the current directory (or the location defined by -d,--output-dir).'
-complete -c gemma-cli -n '__fish_seen_subcommand_from ubic.gemma.apps.DifferentialExpressionAnalysisCli' -o nofiles -l no-files -f --description 'Don'"'"'t create archive files after analysis. Default is to make them. This is incompatible with -nodb,--no-db.'
+complete -c gemma-cli -n '__fish_seen_subcommand_from ubic.gemma.apps.DifferentialExpressionAnalysisCli' -o nofiles -l no-files -f --description 'Don'"'"'t create archive files after analysis. Default is to make them. This is incompatible with -nodb,--no-db option to be set.'
 complete -c gemma-cli -n '__fish_seen_subcommand_from ubic.gemma.apps.DifferentialExpressionAnalysisCli' -o redo -l redo -f --description 'Re-run all analyses for the experiment. Try to base analysis on previous analysis'"'"'s choice of statistical model.'
 complete -c gemma-cli -n '__fish_seen_subcommand_from ubic.gemma.apps.DifferentialExpressionAnalysisCli' -o redoAnalysis -l redo-analysis -r -f --description 'Re-run a specific analysis for the experiment. Try to base analysis on previous analysis'"'"'s choice of statistical model. Multiple analysis can be provided using comma-delimited IDs or by passing the option multiple times.\nThis option is not available when processing more than one experiment.'
 complete -c gemma-cli -n '__fish_seen_subcommand_from ubic.gemma.apps.DifferentialExpressionAnalysisCli' -o redoSubset -l redo-subset -r -f --description 'Re-run all analyses for a subset of the experiment. Try to base analysis on previous analysis'"'"'s choice of statistical model. Multiple subsets can be provided using comma-delimited IDs or by passing the option multiple times.\nThis option is not available when processing more than one experiment.'
@@ -577,12 +578,13 @@ complete -c gemma-cli -n '__fish_seen_subcommand_from diffExAnalyze' -o force -l
 complete -c gemma-cli -n '__fish_seen_subcommand_from diffExAnalyze' -o type -l type -r -a '(echo -e "GENERICLM\tgeneric linear model without interaction\nOSTTEST\tone-sample t-test\nOWA\tone-way ANOVA\nTTEST\ttwo-sample t-test\nTWO_WAY_ANOVA_WITH_INTERACTION\ttwo-way ANOVA with interaction\nTWO_WAY_ANOVA_NO_INTERACTION\ttwo-way ANOVA without interaction" 2>/dev/null)' -f --description 'Type of analysis to perform. If omitted, the system will try to guess based on the experimental design.\nThis option is not available when processing more than one experiment. Possible values are: GENERICLM, OSTTEST, OWA, TTEST, TWO_WAY_ANOVA_WITH_INTERACTION, TWO_WAY_ANOVA_NO_INTERACTION.'
 complete -c gemma-cli -n '__fish_seen_subcommand_from diffExAnalyze' -o factors -l factors -r -f --description 'ID numbers, categories or names of the factor(s) to use, comma-delimited, with spaces '"'"' '"'"' and colons '"'"':'"'"' replaced by underscores '"'"'_'"'"'. Interaction can be specified by using '"'"':'"'"' as a delimiter (e.g. '"'"'factor1:factor2'"'"'). Only categorical factors can be used in interactions. Multiple factors can be provided using comma-delimited IDs or by passing the option multiple times. If omitted, factors will be selected automatically. This is incompatible with -redo,--redo, -redoAnalysis,--redo-analysis or -redoSubset,--redo-subset.\nThis option is not available when processing more than one experiment.'
 complete -c gemma-cli -n '__fish_seen_subcommand_from diffExAnalyze' -o subset -l subset -r -f --description 'ID number, category or name of the factor to use for subsetting the analysis. The factor must be categorical. If used without specifying  -factors,--factors, factors will be selected automatically among the remaining one in the design. If the experiment already has subsets for the factor, those will be reused. This is incompatible with -redo,--redo, -redoAnalysis,--redo-analysis or -redoSubset,--redo-subset.\nThis option is not available when processing more than one experiment.'
+complete -c gemma-cli -n '__fish_seen_subcommand_from diffExAnalyze' -o samples -l samples -f --description 'ID, name or accession of samples to be included in the analysis. Defaults to all samples in the dataset being analyzed. Requires the -nodb,--no-db option to be set.\nThis option is not available when processing more than one experiment.'
 complete -c gemma-cli -n '__fish_seen_subcommand_from diffExAnalyze' -o usebatch -l use-batch-factor -f --description 'If a batch factor is available, use it. Otherwise, batch information can/will be ignored in the analysis. This is incompatible with -factors,--factors, -redo,--redo, -redoAnalysis,--redo-analysis and -redoSubset,--redo-subset.'
 complete -c gemma-cli -n '__fish_seen_subcommand_from diffExAnalyze' -o nobayes -l no-bayes -f --description 'Do not apply empirical-Bayes moderated statistics. Default is to use eBayes.'
 complete -c gemma-cli -n '__fish_seen_subcommand_from diffExAnalyze' -o ignoreFailingSubsets -l ignore-failing-subsets -f --description 'Ignore failing subsets and continue processing other subsets. Requires the -subset,--subset option to be set or -redo,--redo option with existing subset analyses.'
 complete -c gemma-cli -n '__fish_seen_subcommand_from diffExAnalyze' -s d -l output-dir -r -F --description 'Write diff. ex. archive files inside the given directory. Defaults to the current directory of no other destination is selected.'
 complete -c gemma-cli -n '__fish_seen_subcommand_from diffExAnalyze' -o nodb -l no-db -f --description 'Do not persist diff. ex. results to the database and instead save them to the current directory (or the location defined by -d,--output-dir).'
-complete -c gemma-cli -n '__fish_seen_subcommand_from diffExAnalyze' -o nofiles -l no-files -f --description 'Don'"'"'t create archive files after analysis. Default is to make them. This is incompatible with -nodb,--no-db.'
+complete -c gemma-cli -n '__fish_seen_subcommand_from diffExAnalyze' -o nofiles -l no-files -f --description 'Don'"'"'t create archive files after analysis. Default is to make them. This is incompatible with -nodb,--no-db option to be set.'
 complete -c gemma-cli -n '__fish_seen_subcommand_from diffExAnalyze' -o redo -l redo -f --description 'Re-run all analyses for the experiment. Try to base analysis on previous analysis'"'"'s choice of statistical model.'
 complete -c gemma-cli -n '__fish_seen_subcommand_from diffExAnalyze' -o redoAnalysis -l redo-analysis -r -f --description 'Re-run a specific analysis for the experiment. Try to base analysis on previous analysis'"'"'s choice of statistical model. Multiple analysis can be provided using comma-delimited IDs or by passing the option multiple times.\nThis option is not available when processing more than one experiment.'
 complete -c gemma-cli -n '__fish_seen_subcommand_from diffExAnalyze' -o redoSubset -l redo-subset -r -f --description 'Re-run all analyses for a subset of the experiment. Try to base analysis on previous analysis'"'"'s choice of statistical model. Multiple subsets can be provided using comma-delimited IDs or by passing the option multiple times.\nThis option is not available when processing more than one experiment.'
diff --git a/gemma-cli/src/main/java/ubic/gemma/apps/DifferentialExpressionAnalysisCli.java b/gemma-cli/src/main/java/ubic/gemma/apps/DifferentialExpressionAnalysisCli.java
index 7395f47839..9572094d72 100644
--- a/gemma-cli/src/main/java/ubic/gemma/apps/DifferentialExpressionAnalysisCli.java
+++ b/gemma-cli/src/main/java/ubic/gemma/apps/DifferentialExpressionAnalysisCli.java
@@ -39,6 +39,7 @@
 import ubic.gemma.core.analysis.service.ExpressionDataFileService;
 import ubic.gemma.model.analysis.expression.diff.DifferentialExpressionAnalysis;
 import ubic.gemma.model.common.auditAndSecurity.eventType.DifferentialExpressionAnalysisEvent;
+import ubic.gemma.model.expression.biomaterial.BioMaterial;
 import ubic.gemma.model.expression.experiment.*;
 import ubic.gemma.persistence.service.analysis.expression.diff.DifferentialExpressionAnalysisService;
 
@@ -80,21 +81,40 @@ private enum Mode {
         COMPLETE_FACTORS
     }
 
+    /**
+     * Mode for selecting which factors to include in the analysis.
+     */
+    private enum FactorSelectionMode {
+        /**
+         * Select factors based on a previous analysis.
+         */
+        REDO,
+        /**
+         * Select factors automatically.
+         */
+        AUTOMATIC,
+        /**
+         * Select factors manually. The values that are in {@link #factorIdentifiers} will be considered.
+         */
+        MANUAL
+    }
+
     /**
      * Specific analyses to redo.
      */
     @Nullable
-    private Collection<Long> analysisIds = null;
+    private Collection<Long> analysisIds;
 
     @Nullable
-    private Collection<Long> subsetIds = null;
+    private Collection<Long> subsetIds;
 
     /**
      * Indicate the type of analysis to perform.
      * <p>
      * The default is to detect it based on the dataset and the requested factors.
      */
-    private AnalysisType type = null;
+    @Nullable
+    private AnalysisType type;
 
     /**
      * Mode for selecting factors.
@@ -114,24 +134,33 @@ private enum Mode {
     private String subsetFactorIdentifier;
 
     /**
-     * Whether batch factors should be included (if they exist)
+     * Whether batch factors should be included (if they exist).
+     * <p>
+     * Defaults to true.
      */
-    private boolean ignoreBatch = true;
+    private boolean ignoreBatch;
 
     /**
      * Use moderated statistics.
      */
-    private boolean moderateStatistics = DifferentialExpressionAnalysisConfig.DEFAULT_MODERATE_STATISTICS;
+    private boolean moderateStatistics;
 
     /**
-     * Persist results to the database.
+     * Ignore failure of subset analyses.
+     * <p>
+     * The analysis will still fail if all subset analyses fail.
      */
-    private boolean persist = true;
+    private boolean ignoreFailingSubsets;
 
-    private boolean makeArchiveFiles = true;
-
-    private boolean ignoreFailingSubsets = false;
+    /**
+     * List of sample identifiers to include in the analysis.
+     * <p>
+     * Defaults to all samples.
+     */
+    @Nullable
+    private String[] sampleIdentifiers;
 
+    // data filtering options
     @Nullable
     private Integer filterMinNumberOfCellsPerSample;
     @Nullable
@@ -144,13 +173,22 @@ private enum Mode {
     @Nullable
     private Double filterMinVariance;
 
-    private DataFileOptionValue destination;
+    /**
+     * Persist results to the database.
+     */
+    private boolean persist ;
 
-    enum FactorSelectionMode {
-        REDO,
-        AUTOMATIC,
-        MANUAL
-    }
+    /**
+     * Create archive files.
+     */
+    private boolean makeArchiveFiles ;
+
+    /**
+     * Destination to use for archive files.
+     * <p>
+     * This is only applicable if {@link #makeArchiveFiles} is true.
+     */
+    private DataFileOptionValue destination;
 
     @Override
     public String getCommandName() {
@@ -197,6 +235,9 @@ protected void buildExperimentOptions( Options options ) {
                         + "If the experiment already has subsets for the factor, those will be reused. "
                         + "This is incompatible with -redo,--redo, -redoAnalysis,--redo-analysis or -redoSubset,--redo-subset." ).get() );
 
+        addSingleExperimentOption( options, Option.builder( "samples" ).longOpt( "samples" ).argName( "ID, name, accession" )
+                .desc( "ID, name or accession of samples to be included in the analysis. Defaults to all samples in the dataset being analyzed. Requires the " + formatOption( "nodb", "no-db" ) + " option to be set." ).get() );
+
         options.addOption( "usebatch", "use-batch-factor", false, "If a batch factor is available, use it. Otherwise, batch information can/will be ignored in the analysis. This is incompatible with " + formatOption( options, "factors" ) + ", -redo,--redo, -redoAnalysis,--redo-analysis and -redoSubset,--redo-subset." );
         options.addOption( "nobayes", "no-bayes", false, "Do not apply empirical-Bayes moderated statistics. Default is to use eBayes." );
         options.addOption( "ignoreFailingSubsets", "ignore-failing-subsets", false, "Ignore failing subsets and continue processing other subsets. Requires the " + formatOption( options, "subset" ) + " option to be set or -redo,--redo option with existing subset analyses." );
@@ -205,7 +246,7 @@ protected void buildExperimentOptions( Options options ) {
 
         // destination (db, standard location or custom directory)
         options.addOption( "nodb", "no-db", false, "Do not persist diff. ex. results to the database and instead save them to the current directory (or the location defined by " + formatOption( options, DataFileOptionsUtils.OUTPUT_DIR_OPTION ) + ")." );
-        options.addOption( "nofiles", "no-files", false, "Don't create archive files after analysis. Default is to make them. This is incompatible with " + formatOption( options, "nodb" ) + "." );
+        options.addOption( "nofiles", "no-files", false, "Don't create archive files after analysis. Default is to make them. This is incompatible with " + formatOption( options, "nodb" ) + " option to be set." );
 
         // redo mode
         options.addOption( "redo", "redo", false,
@@ -346,6 +387,7 @@ protected void processExperimentOptions( CommandLine commandLine ) throws ParseE
         // subset analysis can only be done in manual mode
         // note we add the given factor to the list of factors overall to make sure it is considered
         this.subsetFactorIdentifier = getOptionValue( commandLine, "subset", requires( allOf( toBeUnset( "redo" ), toBeUnset( "redoAnalysis" ), toBeUnset( "redoSubset" ) ) ) );
+        this.sampleIdentifiers = getOptionValues( commandLine, "samples", requires( toBeSet( "nodb" ) ) );
         // we can only force the use of a batch factor during automatic selection
         this.ignoreBatch = !hasOption( commandLine, "usebatch", requires( allOf( toBeUnset( "factors" ), toBeUnset( "redo" ), toBeUnset( "redoAnalysis" ), toBeUnset( "redoSubset" ) ) ) );
         this.moderateStatistics = !commandLine.hasOption( "nobayes" );
@@ -433,7 +475,18 @@ protected void processExpressionExperiment( ExpressionExperiment ee ) {
         config.setPersist( this.persist );
         config.setMakeArchiveFile( this.persist && this.makeArchiveFiles );
         config.setIgnoreFailingSubsets( this.ignoreFailingSubsets );
-        config.setUseWeights( super.eeService.isRNASeq( ee ) );
+        config.setUseWeights( eeService.isRNASeq( ee ) );
+
+        // sample selection
+        if ( this.sampleIdentifiers != null ) {
+            Set<BioMaterial> samplesToUse = new HashSet<>();
+            for ( String sampleId : sampleIdentifiers ) {
+                // allow searching in subsets, those are used in single-cell diff ex. analysis that use sub-samples for
+                // representing pseudo-bulks
+                samplesToUse.add( entityLocator.locateSample( ee, sampleId, true ) );
+            }
+            config.setSamplesToInclude( samplesToUse );
+        }
 
         // filtering
         config.setRepetitiveValuesFilterMode( this.filterMode );
diff --git a/gemma-cli/src/main/java/ubic/gemma/cli/completion/CompletionType.java b/gemma-cli/src/main/java/ubic/gemma/cli/completion/CompletionType.java
index 196fddcc5a..b60179429f 100644
--- a/gemma-cli/src/main/java/ubic/gemma/cli/completion/CompletionType.java
+++ b/gemma-cli/src/main/java/ubic/gemma/cli/completion/CompletionType.java
@@ -7,6 +7,7 @@ public enum CompletionType {
     DATASET_GROUP,
     EXTERNAL_DATABASE,
     DATASET,
+    SAMPLE,
     EXPERIMENTAL_FACTOR,
     /**
      * Complete experimental factors and interactions suitable for DEA.
diff --git a/gemma-cli/src/main/java/ubic/gemma/cli/util/EntityLocator.java b/gemma-cli/src/main/java/ubic/gemma/cli/util/EntityLocator.java
index 8e15eb3b80..16d6b4babc 100644
--- a/gemma-cli/src/main/java/ubic/gemma/cli/util/EntityLocator.java
+++ b/gemma-cli/src/main/java/ubic/gemma/cli/util/EntityLocator.java
@@ -8,8 +8,10 @@
 import ubic.gemma.model.expression.bioAssayData.CellLevelCharacteristics;
 import ubic.gemma.model.expression.bioAssayData.CellTypeAssignment;
 import ubic.gemma.model.expression.bioAssayData.DataVector;
+import ubic.gemma.model.expression.biomaterial.BioMaterial;
 import ubic.gemma.model.expression.experiment.ExperimentalFactor;
 import ubic.gemma.model.expression.experiment.ExpressionExperiment;
+import ubic.gemma.model.expression.experiment.ExpressionExperimentSubSet;
 import ubic.gemma.model.genome.Taxon;
 
 import java.util.Collection;
@@ -39,9 +41,35 @@ public interface EntityLocator {
 
     ExperimentalFactor locateExperimentalFactor( ExpressionExperiment expressionExperiment, String ctfName );
 
-    BioAssay locateBioAssay( ExpressionExperiment ee, String sampleId );
+    /**
+     * Locate an assay by its identifier.
+     *
+     * @param ee             dataset to lookup
+     * @param assayId        assay identifier to lookup
+     * @param includeSubSets whether to include assays that belong to subsets of the experiment. This is only relevant
+     *                       for {@link ExpressionExperimentSubSet} that "own" their assays instead of sharing them with
+     *                       the source experiment.
+     */
+    BioAssay locateBioAssay( ExpressionExperiment ee, String assayId, boolean includeSubSets );
 
+    /**
+     * Locate an assay by its identifier in a particular set of vectors.
+     *
+     * @param ee               dataset to lookup
+     * @param quantitationType quantitation type for the vectors
+     * @param sampleId         sample identifier to lookup
+     */
     BioAssay locateBioAssay( ExpressionExperiment ee, QuantitationType quantitationType, String sampleId );
 
+    /**
+     *
+     * @param ee             dataset to lookup
+     * @param sampleId       sample identifier to lookup
+     * @param includeSubSets whether to include samples associated to assays that belong to subsets of the experiment.
+     *                       This is only relevant for {@link ExpressionExperimentSubSet}
+     *                       that "own" their assays instead of sharing them with the source experiment.
+     */
+    BioMaterial locateSample( ExpressionExperiment ee, String sampleId, boolean includeSubSets );
+
     DifferentialExpressionAnalysis locateDiffExAnalysis( ExpressionExperiment ee, String analysisIdentifier );
 }
diff --git a/gemma-cli/src/main/java/ubic/gemma/cli/util/EntityLocatorImpl.java b/gemma-cli/src/main/java/ubic/gemma/cli/util/EntityLocatorImpl.java
index 8a1a3fd793..66f80a02ec 100644
--- a/gemma-cli/src/main/java/ubic/gemma/cli/util/EntityLocatorImpl.java
+++ b/gemma-cli/src/main/java/ubic/gemma/cli/util/EntityLocatorImpl.java
@@ -2,6 +2,7 @@
 
 import lombok.extern.apachecommons.CommonsLog;
 import org.apache.commons.lang3.StringUtils;
+import org.apache.commons.lang3.Strings;
 import org.apache.commons.lang3.tuple.Pair;
 import org.springframework.beans.factory.annotation.Autowired;
 import org.springframework.stereotype.Component;
@@ -15,6 +16,7 @@
 import ubic.gemma.model.expression.arrayDesign.ArrayDesign;
 import ubic.gemma.model.expression.bioAssay.BioAssay;
 import ubic.gemma.model.expression.bioAssayData.*;
+import ubic.gemma.model.expression.biomaterial.BioMaterial;
 import ubic.gemma.model.expression.experiment.ExperimentalFactor;
 import ubic.gemma.model.expression.experiment.ExpressionExperiment;
 import ubic.gemma.model.genome.Taxon;
@@ -268,7 +270,7 @@ public ExperimentalFactor locateExperimentalFactor( ExpressionExperiment express
         ExperimentalFactor factor;
         try {
             Long efId = Long.parseLong( identifier );
-            if ( ( factor = matchOneFactor( expressionExperiment, ef -> ef.getId().equals( efId ) ) ) != null ) {
+            if ( ( factor = matchOneFactor( expressionExperiment, ef -> Objects.equals( ef.getId(), efId ) ) ) != null ) {
                 return factor;
             }
         } catch ( NumberFormatException e ) {
@@ -287,16 +289,16 @@ public ExperimentalFactor locateExperimentalFactor( ExpressionExperiment express
         }
 
         // match by category
-        if ( ( factor = matchOneFactor( expressionExperiment, ef -> ef.getCategory() != null && StringUtils.equalsIgnoreCase( ef.getCategory().getCategory(), finalIdentifier ) ) ) != null ) {
+        if ( ( factor = matchOneFactor( expressionExperiment, ef -> ef.getCategory() != null && Strings.CI.equals( ef.getCategory().getCategory(), finalIdentifier ) ) ) != null ) {
             return factor;
         }
-        if ( ( factor = matchOneFactor( expressionExperiment, ef -> ef.getCategory() != null && StringUtils.equalsIgnoreCase( ef.getCategory().getCategoryUri(), finalIdentifier ) ) ) != null ) {
+        if ( ( factor = matchOneFactor( expressionExperiment, ef -> ef.getCategory() != null && Strings.CI.equals( ef.getCategory().getCategoryUri(), finalIdentifier ) ) ) != null ) {
             return factor;
         }
-        if ( ( factor = matchOneFactor( expressionExperiment, ef -> ef.getCategory() != null && StringUtils.equalsIgnoreCase( ef.getCategory().getValue(), finalIdentifier ) ) ) != null ) {
+        if ( ( factor = matchOneFactor( expressionExperiment, ef -> ef.getCategory() != null && Strings.CI.equals( ef.getCategory().getValue(), finalIdentifier ) ) ) != null ) {
             return factor;
         }
-        if ( ( factor = matchOneFactor( expressionExperiment, ef -> ef.getCategory() != null && StringUtils.equalsIgnoreCase( ef.getCategory().getValueUri(), finalIdentifier ) ) ) != null ) {
+        if ( ( factor = matchOneFactor( expressionExperiment, ef -> ef.getCategory() != null && Strings.CI.equals( ef.getCategory().getValueUri(), finalIdentifier ) ) ) != null ) {
             return factor;
         }
 
@@ -321,10 +323,9 @@ private ExperimentalFactor matchOneFactor( ExpressionExperiment ee, Predicate<Ex
     }
 
     @Override
-    public BioAssay locateBioAssay( ExpressionExperiment ee, String sampleId ) {
-        ee = eeService.thawLite( ee );
-        return requireNonNull( locateBioAssay( ee.getBioAssays(), sampleId ),
-                "Could not locate any assay matching '" + sampleId + "' in " + ee.getShortName() + "." + formatPossibleValues( ee.getBioAssays(), true ) );
+    public BioAssay locateBioAssay( ExpressionExperiment ee, String assayId, boolean includeSubSets ) {
+        return requireNonNull( locateBioAssay( eeService.getBioAssays( ee, includeSubSets ), assayId ),
+                "Could not locate any assay matching '" + assayId + "' in " + ee.getShortName() + "." + formatPossibleValues( ee.getBioAssays(), true ) );
     }
 
     @Override
@@ -342,40 +343,64 @@ public BioAssay locateBioAssay( ExpressionExperiment ee, QuantitationType qt, St
         throw new NullPointerException();
     }
 
-    @Override
-    public DifferentialExpressionAnalysis locateDiffExAnalysis( ExpressionExperiment ee, String analysisIdentifier ) {
-        return requireNonNull( differentialExpressionAnalysisService.findByExperimentAndAnalysisId( ee, true, Long.parseLong( analysisIdentifier ) ),
-                () -> String.format( "Could not locate an analysis matching '%s' in %s.%s",
-                        analysisIdentifier,
-                        ee.getShortName(),
-                        formatPossibleValues( differentialExpressionAnalysisService.findByExperiment( ee, true ), false ) ) );
-    }
-
     @Nullable
-    private BioAssay locateBioAssay( Collection<BioAssay> ee, String sampleId ) {
+    private BioAssay locateBioAssay( Collection<BioAssay> candidates, String sampleId ) {
         BioAssay ba;
         try {
             Long id = Long.parseLong( sampleId );
-            if ( ( ba = matchOneAssay( ee, ba2 -> ba2.getId().equals( id ) ) ) != null ) {
+            if ( ( ba = matchOneAssay( candidates, ba2 -> Objects.equals( ba2.getId(), id ) ) ) != null ) {
                 return ba;
             }
         } catch ( NumberFormatException e ) {
             // ignore
         }
-        if ( ( ba = matchOneAssay( ee, ba2 -> ba2.getShortName() != null && ba2.getShortName().equalsIgnoreCase( sampleId ) ) ) != null ) {
+        if ( ( ba = matchOneAssay( candidates, ba2 -> ba2.getShortName() != null && ba2.getShortName().equalsIgnoreCase( sampleId ) ) ) != null ) {
+            return ba;
+        }
+        if ( ( ba = matchOneAssay( candidates, ba2 -> ba2.getName().equalsIgnoreCase( sampleId ) ) ) != null ) {
             return ba;
         }
-        if ( ( ba = matchOneAssay( ee, ba2 -> ba2.getName().equalsIgnoreCase( sampleId ) ) ) != null ) {
+        if ( ( ba = matchOneAssay( candidates, ba2 -> ba2.getAccession() != null && ba2.getAccession().getAccession().equalsIgnoreCase( sampleId ) ) ) != null ) {
+            return ba;
+        }
+        return null;
+    }
+
+    private BioAssay matchOneAssay( Collection<BioAssay> candidates, Predicate<BioAssay> ba ) {
+        Set<BioAssay> bas = candidates.stream().filter( ba ).collect( Collectors.toSet() );
+        if ( bas.size() == 1 ) {
+            return bas.iterator().next();
+        } else {
+            return null;
+        }
+    }
+
+    @Override
+    public BioMaterial locateSample( ExpressionExperiment ee, String sampleId, boolean includeSubSets ) {
+        return locateSample( eeService.getSamplesUsed( ee, includeSubSets ), sampleId );
+    }
+
+    private BioMaterial locateSample( Collection<BioMaterial> candidates, String sampleId ) {
+        BioMaterial ba;
+        try {
+            Long id = Long.parseLong( sampleId );
+            if ( ( ba = matchOneSample( candidates, ba2 -> Objects.equals( ba2.getId(), id ) ) ) != null ) {
+                return ba;
+            }
+        } catch ( NumberFormatException e ) {
+            // ignore
+        }
+        if ( ( ba = matchOneSample( candidates, ba2 -> ba2.getName().equalsIgnoreCase( sampleId ) ) ) != null ) {
             return ba;
         }
-        if ( ( ba = matchOneAssay( ee, ba2 -> ba2.getAccession() != null && ba2.getAccession().getAccession().equalsIgnoreCase( sampleId ) ) ) != null ) {
+        if ( ( ba = matchOneSample( candidates, ba2 -> ba2.getExternalAccession() != null && ba2.getExternalAccession().getAccession().equalsIgnoreCase( sampleId ) ) ) != null ) {
             return ba;
         }
         return null;
     }
 
-    private BioAssay matchOneAssay( Collection<BioAssay> bioAssays, Predicate<BioAssay> ba ) {
-        Set<BioAssay> bas = bioAssays.stream().filter( ba ).collect( Collectors.toSet() );
+    private BioMaterial matchOneSample( Collection<BioMaterial> candidates, Predicate<BioMaterial> ba ) {
+        Set<BioMaterial> bas = candidates.stream().filter( ba ).collect( Collectors.toSet() );
         if ( bas.size() == 1 ) {
             return bas.iterator().next();
         } else {
@@ -383,6 +408,15 @@ private BioAssay matchOneAssay( Collection<BioAssay> bioAssays, Predicate<BioAss
         }
     }
 
+    @Override
+    public DifferentialExpressionAnalysis locateDiffExAnalysis( ExpressionExperiment ee, String analysisIdentifier ) {
+        return requireNonNull( differentialExpressionAnalysisService.findByExperimentAndAnalysisId( ee, true, Long.parseLong( analysisIdentifier ) ),
+                () -> String.format( "Could not locate an analysis matching '%s' in %s.%s",
+                        analysisIdentifier,
+                        ee.getShortName(),
+                        formatPossibleValues( differentialExpressionAnalysisService.findByExperiment( ee, true ), false ) ) );
+    }
+
     private String formatPossibleValues( Collection<? extends Identifiable> possibleValues, boolean allowAmbiguousIds ) {
         if ( possibleValues.isEmpty() ) {
             return "";
diff --git a/gemma-core/src/main/java/ubic/gemma/core/analysis/expression/diff/DifferentialExpressionAnalysisConfig.java b/gemma-core/src/main/java/ubic/gemma/core/analysis/expression/diff/DifferentialExpressionAnalysisConfig.java
index fb1dd25244..6eb001afec 100644
--- a/gemma-core/src/main/java/ubic/gemma/core/analysis/expression/diff/DifferentialExpressionAnalysisConfig.java
+++ b/gemma-core/src/main/java/ubic/gemma/core/analysis/expression/diff/DifferentialExpressionAnalysisConfig.java
@@ -23,6 +23,7 @@
 import ubic.gemma.core.analysis.preprocess.filter.RepetitiveValuesFilter;
 import ubic.gemma.core.datastructure.matrix.ExpressionDataDoubleMatrix;
 import ubic.gemma.model.expression.bioAssay.BioAssay;
+import ubic.gemma.model.expression.biomaterial.BioMaterial;
 import ubic.gemma.model.expression.experiment.ExperimentalFactor;
 import ubic.gemma.model.expression.experiment.FactorValue;
 
@@ -46,7 +47,10 @@ public class DifferentialExpressionAnalysisConfig {
 
     /**
      * Type of analysis to perform.
+     * <p>
+     * The type is automatically detected if left null.
      */
+    @Nullable
     private AnalysisType analysisType;
 
     /**
@@ -79,6 +83,14 @@ public class DifferentialExpressionAnalysisConfig {
     @Nullable
     private FactorValue subsetFactorValue;
 
+    /**
+     * Set of samples to include in the analysis.
+     * <p>
+     * All samples are included if this is null.
+     */
+    @Nullable
+    private Set<BioMaterial> samplesToInclude;
+
     /**
      * Keep processing other subsets when encountering an {@link AnalysisException} on a subset.
      * <p>
@@ -165,6 +177,9 @@ public DifferentialExpressionAnalysisConfig() {
     public DifferentialExpressionAnalysisConfig( DifferentialExpressionAnalysisConfig baseConfig ) {
         this.analysisType = baseConfig.getAnalysisType();
         this.moderateStatistics = baseConfig.isModerateStatistics();
+        if ( baseConfig.samplesToInclude != null ) {
+            this.samplesToInclude = new HashSet<>( baseConfig.samplesToInclude );
+        }
         this.factorsToInclude.addAll( baseConfig.getFactorsToInclude() );
         this.interactionsToInclude.addAll( baseConfig.getInteractionsToInclude() );
         this.persist = baseConfig.isPersist();
diff --git a/gemma-core/src/main/java/ubic/gemma/core/analysis/expression/diff/LinearModelAnalyzer.java b/gemma-core/src/main/java/ubic/gemma/core/analysis/expression/diff/LinearModelAnalyzer.java
index 1a5f6a7c73..cb13235520 100644
--- a/gemma-core/src/main/java/ubic/gemma/core/analysis/expression/diff/LinearModelAnalyzer.java
+++ b/gemma-core/src/main/java/ubic/gemma/core/analysis/expression/diff/LinearModelAnalyzer.java
@@ -111,9 +111,6 @@ public class LinearModelAnalyzer implements DiffExAnalyzer {
     private static final Comparator<ExperimentalFactor> FACTOR_COMPARATOR =
             Comparator.comparing( DiffExAnalyzerUtils::nameForR, Comparator.naturalOrder() );
 
-    private static final Comparator<BioMaterial> SAMPLE_COMPARATOR =
-            Comparator.comparing( DiffExAnalyzerUtils::nameForR, Comparator.naturalOrder() );
-
     @Autowired
     private CompositeSequenceService compositeSequenceService;
     @Autowired
@@ -123,160 +120,6 @@ public class LinearModelAnalyzer implements DiffExAnalyzer {
     @Autowired
     private BuildInfo buildInfo;
 
-    /**
-     * Generate HitListSize entities that will be stored to count the number of diff. ex probes at various preset
-     * thresholds, to avoid wasting time generating these counts on the fly later. This is done automatically during
-     * analysis, so is just here to allow 'backfilling'.
-     *
-     * @param probeToGeneMap map
-     * @param results        results
-     * @return hit list sizes
-     */
-    private Set<HitListSize> computeHitListSizes( Collection<DifferentialExpressionAnalysisResult> results,
-            Map<CompositeSequence, Collection<Gene>> probeToGeneMap ) {
-        Set<HitListSize> hitListSizes = new HashSet<>();
-        StopWatch timer = new StopWatch();
-        timer.start();
-        double maxThreshold = MathUtil.max( LinearModelAnalyzer.qValueThresholdsForHitLists );
-
-        Collection<Gene> allGenes = new HashSet<>();
-        for ( Collection<Gene> genes : probeToGeneMap.values() ) {
-            allGenes.addAll( genes );
-        }
-
-        // maps from Doubles are a bit dodgy...
-        Map<Double, Integer> upCounts = new HashMap<>();
-        Map<Double, Integer> downCounts = new HashMap<>();
-        Map<Double, Integer> eitherCounts = new HashMap<>();
-
-        Map<Double, Integer> upCountGenes = new HashMap<>();
-        Map<Double, Integer> downCountGenes = new HashMap<>();
-        Map<Double, Integer> eitherCountGenes = new HashMap<>();
-
-        Collection<Gene> seenGenes = new HashSet<>();
-        for ( DifferentialExpressionAnalysisResult r : results ) {
-
-            Double corrP = r.getCorrectedPvalue();
-            if ( corrP == null || corrP > maxThreshold ) {
-                continue;
-            }
-
-            CompositeSequence probe = r.getProbe();
-            Collection<Gene> genesForProbe = probeToGeneMap.get( probe );
-            int numGenes = 0;
-            if ( genesForProbe != null ) {
-                numGenes = this.countNumberOfGenesNotSeenAlready( genesForProbe, seenGenes );
-            }
-
-            // if ( numGenes == 0 ) // This is okay; it might mean we already counted it as differentially expressed.
-
-            Collection<ContrastResult> crs = r.getContrasts();
-            boolean up = false;
-            boolean down = false;
-
-            /*
-             * We set up and down to be true (either or both) if at least on contrast is shown.
-             */
-            for ( ContrastResult cr : crs ) {
-                Double lf = cr.getLogFoldChange();
-                //noinspection StatementWithEmptyBody // Better readability
-                if ( lf == null ) {
-                    /*
-                     * A contrast which is actually not valid, so it won't be counted in the hit list.
-                     */
-                } else if ( lf < 0 ) {
-                    down = true;
-                } else if ( lf > 0 ) {
-                    up = true;
-                }
-            }
-
-            for ( double thresh : LinearModelAnalyzer.qValueThresholdsForHitLists ) {
-
-                if ( !upCounts.containsKey( thresh ) ) {
-                    upCounts.put( thresh, 0 );
-                    upCountGenes.put( thresh, 0 );
-                }
-                if ( !downCounts.containsKey( thresh ) ) {
-                    downCounts.put( thresh, 0 );
-                    downCountGenes.put( thresh, 0 );
-                }
-                if ( !eitherCounts.containsKey( thresh ) ) {
-                    eitherCounts.put( thresh, 0 );
-                    eitherCountGenes.put( thresh, 0 );
-                }
-
-                if ( corrP < thresh ) {
-                    if ( up ) {
-                        upCounts.put( thresh, upCounts.get( thresh ) + 1 );
-                        upCountGenes.put( thresh, upCountGenes.get( thresh ) + numGenes );
-                    }
-                    if ( down ) {
-                        downCounts.put( thresh, downCounts.get( thresh ) + 1 );
-                        downCountGenes.put( thresh, downCountGenes.get( thresh ) + numGenes );
-                    }
-
-                    eitherCounts.put( thresh, eitherCounts.get( thresh ) + 1 );
-                    eitherCountGenes.put( thresh, eitherCountGenes.get( thresh ) + numGenes );
-
-                }
-            }
-
-        }
-
-        for ( double thresh : LinearModelAnalyzer.qValueThresholdsForHitLists ) {
-            // Ensure we don't set values to null.
-            int up = upCounts.getOrDefault( thresh, 0 );
-            int down = downCounts.getOrDefault( thresh, 0 );
-            int either = eitherCounts.getOrDefault( thresh, 0 );
-
-            int upGenes = upCountGenes.getOrDefault( thresh, 0 );
-            int downGenes = downCountGenes.getOrDefault( thresh, 0 );
-            int eitherGenes = eitherCountGenes.getOrDefault( thresh, 0 );
-
-            assert !( allGenes.size() < upGenes || allGenes.size() < downGenes
-                    || allGenes.size() < eitherGenes ) : "There were more genes differentially expressed than exist in the experiment";
-
-            HitListSize upS = HitListSize.Factory.newInstance( thresh, up, Direction.UP, upGenes );
-            HitListSize downS = HitListSize.Factory.newInstance( thresh, down, Direction.DOWN, downGenes );
-            HitListSize eitherS = HitListSize.Factory.newInstance( thresh, either, Direction.EITHER, eitherGenes );
-
-            hitListSizes.add( upS );
-            hitListSizes.add( downS );
-            hitListSizes.add( eitherS );
-
-            assert upGenes <= eitherGenes : "More genes upregulated than upregulated+downregulated";
-            assert downGenes <= eitherGenes : "More genes downregulated than upregulated+downregulated";
-
-        }
-
-        if ( timer.getTime() > 1000 ) {
-            LinearModelAnalyzer.log.info( "Hitlist computation: " + timer.getTime() + " ms" );
-        }
-        return hitListSizes;
-    }
-
-    /**
-     * Utility method
-     *
-     * @param probeToGeneMap map
-     * @param resultList     result list
-     * @return number of genes tested
-     */
-    private int getNumberOfGenesTested( Collection<DifferentialExpressionAnalysisResult> resultList,
-            Map<CompositeSequence, Collection<Gene>> probeToGeneMap ) {
-
-        Collection<Gene> gs = new HashSet<>();
-        for ( DifferentialExpressionAnalysisResult d : resultList ) {
-            CompositeSequence probe = d.getProbe();
-            if ( probeToGeneMap.containsKey( probe ) ) {
-                gs.addAll( probeToGeneMap.get( probe ) );
-            }
-        }
-        return gs.size();
-    }
-
-
     /**
      * I apologize for this being so complicated. Basically there are four phases:
      * <ol>
@@ -290,23 +133,19 @@ private int getNumberOfGenesTested( Collection<DifferentialExpressionAnalysisRes
     @Override
     public Collection<DifferentialExpressionAnalysis> run( ExpressionExperiment expressionExperiment,
             ExpressionDataDoubleMatrix dmatrix, DifferentialExpressionAnalysisConfig config ) throws AnalysisException {
-
         /*
          * Initialize our matrix and factor lists...
          */
-        List<ExperimentalFactor> factors = config.getFactorsToInclude().stream()
-                .sorted( FACTOR_COMPARATOR )
-                .collect( Collectors.toList() );
-
-        /*
-         * FIXME this is the place to strip put the outliers.
-         */
-        List<BioMaterial> samplesUsed = orderByExperimentalDesign( dmatrix, factors, null );
+        List<ExperimentalFactor> factors = selectFactors( config );
+        List<BioMaterial> samplesUsed = selectSamples( dmatrix, factors, config );
+        dropIncompleteFactors( samplesUsed, factors );
+        if ( factors.isEmpty() ) {
+            throw new NoFactorLeftForAnalysisException( "All factors were removed due to incomplete values.", config );
+        }
 
         dmatrix = dmatrix.sliceColumns( samplesUsed, createBADMap( samplesUsed ) ); // enforce ordering
 
         Map<ExperimentalFactor, FactorValue> baselineConditions = BaselineSelection.getBaselineConditions( samplesUsed, factors );
-        dropIncompleteFactors( samplesUsed, factors );
 
         /*
          * Do the analysis, by subsets if requested
@@ -327,23 +166,58 @@ public Collection<DifferentialExpressionAnalysis> run( ExpressionExperiment ee,
         Assert.isTrue( !subsets.isEmpty(), "No subsets provided" );
         Assert.isTrue( config.getSubsetFactor().getFactorValues().containsAll( subsets.keySet() ), "Subsets must use factor values from " + config.getSubsetFactor() + "." );
         Assert.isTrue( subsets.values().stream().allMatch( ss -> ss.getSourceExperiment().equals( ee ) ), "Subsets must use " + ee + " as source experiment." );
-        List<ExperimentalFactor> factors = config.getFactorsToInclude().stream()
-                .sorted( FACTOR_COMPARATOR )
-                .collect( Collectors.toList() );
-        List<BioMaterial> samplesUsed = orderByExperimentalDesign( dmatrix, factors, null );
+        List<ExperimentalFactor> factors = selectFactors( config );
+        List<BioMaterial> samplesUsed = selectSamples( dmatrix, factors, config );
+        dropIncompleteFactors( samplesUsed, factors );
+        if ( factors.isEmpty() ) {
+            throw new NoFactorLeftForAnalysisException( "All factors were removed due to incomplete values.", config );
+        }
         Map<FactorValue, ExpressionDataDoubleMatrix> dmatrixBySubSet = makeSubSetMatrices( dmatrix, samplesUsed, factors, config.getSubsetFactor() );
         Map<ExperimentalFactor, FactorValue> baselineConditions = BaselineSelection.getBaselineConditions( samplesUsed, factors );
-        dropIncompleteFactors( samplesUsed, factors );
-        return doSubSetAnalysis( subsets, dmatrixBySubSet, factors, baselineConditions, config );
+        return doSubSetAnalysis( subsets, dmatrixBySubSet, samplesUsed, factors, baselineConditions, config );
+    }
+
+    @Override
+    public DifferentialExpressionAnalysis run( ExpressionExperimentSubSet subset, ExpressionDataDoubleMatrix dmatrix, DifferentialExpressionAnalysisConfig config ) throws AnalysisException {
+        Assert.notNull( config.getSubsetFactor(), "A subset factor must be set to analyze a subset." );
+        /*
+         * Start by setting it up like the full experiment.
+         */
+
+        ExperimentalFactor ef = config.getSubsetFactor();
+
+        FactorValue subsetFactorValue = config.getSubsetFactorValue();
+        if ( subsetFactorValue == null ) {
+            log.info( "No factor value set in the configuration, will determine it from the samples..." );
+            subsetFactorValue = determineSubsetFactorValue( ef, subset );
+            log.info( "The subset factor value appears to be " + subsetFactorValue + "." );
+        }
+
+        List<ExperimentalFactor> factors = selectFactors( config );
+        List<ExperimentalFactor> subsetFactors = selectFactorsForSubSetAnalysis( subset, dmatrix, factors );
+        if ( subsetFactors.isEmpty() ) {
+            throw new NoFactorLeftForAnalysisException( "Experimental design is not valid for subset: " + subsetFactorValue + ".", config );
+        }
+        List<BioMaterial> samplesInSubset = selectSamples( dmatrix, factors, config );
+        dropIncompleteFactors( samplesInSubset, factors );
+        if ( factors.isEmpty() ) {
+            throw new NoFactorLeftForAnalysisException( "All factors were removed due to incomplete values.", config );
+        }
+
+        // slice.
+        ExpressionDataDoubleMatrix subsetMatrix = dmatrix.sliceColumns( samplesInSubset, createBADMap( samplesInSubset ) );
+
+
+        Map<ExperimentalFactor, FactorValue> baselineConditions = BaselineSelection.getBaselineConditions( samplesInSubset, factors );
+
+        return doAnalysis( subset, subsetMatrix, samplesInSubset, subsetFactors,
+                baselineConditions, subsetFactorValue, createConfigForSubsetAnalysis( samplesInSubset, subsetFactors, subsetFactorValue, config ) );
     }
 
     /**
      * Perform an analysis by subset.
      */
     private Collection<DifferentialExpressionAnalysis> doSubSetAnalysis( ExpressionExperiment expressionExperiment, List<BioMaterial> samplesUsed, List<ExperimentalFactor> factors, Map<ExperimentalFactor, FactorValue> baselineConditions, ExperimentalFactor subsetFactor, ExpressionDataDoubleMatrix dmatrix, DifferentialExpressionAnalysisConfig config ) throws AnalysisException {
-        Assert.isTrue( !factors.contains( subsetFactor ),
-                "Subset factor cannot also be included in the analysis [ Factor was: " + subsetFactor + "]" );
-
         Map<FactorValue, ExpressionDataDoubleMatrix> dmatrixBySubset = makeSubSetMatrices( dmatrix, samplesUsed, factors, subsetFactor );
 
         /*
@@ -362,15 +236,13 @@ private Collection<DifferentialExpressionAnalysis> doSubSetAnalysis( ExpressionE
 
             LinearModelAnalyzer.log.info( "Analyzing subset: " + subsetFactorValue );
 
-            List<BioMaterial> bioMaterials = orderByExperimentalDesign( dmatrixBySubset.get( subsetFactorValue ), factors, null );
-
             /*
              * make a EESubSet
              */
             String subsetName = FactorValueUtils.getSummaryString( subsetFactorValue );
             ExpressionExperimentSubSet eeSubSet = ExpressionExperimentSubSet.Factory.newInstance( subsetName, expressionExperiment );
             Collection<BioAssay> bioAssays = new HashSet<>();
-            for ( BioMaterial bm : bioMaterials ) {
+            for ( BioMaterial bm : samplesUsed ) {
                 bioAssays.addAll( bm.getBioAssaysUsedIn() );
             }
             eeSubSet.getBioAssays().addAll( bioAssays );
@@ -380,10 +252,10 @@ private Collection<DifferentialExpressionAnalysis> doSubSetAnalysis( ExpressionE
 
         LinearModelAnalyzer.log.info( "Total number of subsets: " + subsets.size() );
 
-        return doSubSetAnalysis( subsets, dmatrixBySubset, factors, baselineConditions, config );
+        return doSubSetAnalysis( subsets, dmatrixBySubset, samplesUsed, factors, baselineConditions, config );
     }
 
-    private Collection<DifferentialExpressionAnalysis> doSubSetAnalysis( Map<FactorValue, ExpressionExperimentSubSet> subsets, Map<FactorValue, ExpressionDataDoubleMatrix> dmatrix, List<ExperimentalFactor> factors, Map<ExperimentalFactor, FactorValue> baselineConditions, DifferentialExpressionAnalysisConfig config ) throws AnalysisException {
+    private Collection<DifferentialExpressionAnalysis> doSubSetAnalysis( Map<FactorValue, ExpressionExperimentSubSet> subsets, Map<FactorValue, ExpressionDataDoubleMatrix> dmatrix, List<BioMaterial> samplesInSubSet, List<ExperimentalFactor> factors, Map<ExperimentalFactor, FactorValue> baselineConditions, DifferentialExpressionAnalysisConfig config ) throws AnalysisException {
         /*
          * Now analyze each subset
          */
@@ -400,10 +272,10 @@ private Collection<DifferentialExpressionAnalysis> doSubSetAnalysis( Map<FactorV
             List<BioMaterial> bioMaterials = orderByExperimentalDesign( dmatrix.get( subsetFactorValue ), factors, null );
 
             List<ExperimentalFactor> subsetFactors = this
-                    .fixFactorsForSubset( subsets.get( subsetFactorValue ), dmatrix.get( subsetFactorValue ), factors );
+                    .selectFactorsForSubSetAnalysis( subsets.get( subsetFactorValue ), dmatrix.get( subsetFactorValue ), factors );
 
             DifferentialExpressionAnalysisConfig subsetConfig = this
-                    .fixConfigForSubset( factors, subsetFactorValue, config );
+                    .createConfigForSubsetAnalysis( samplesInSubSet, factors, subsetFactorValue, config );
 
             if ( subsetFactors.isEmpty() ) {
                 LinearModelAnalyzer.log
@@ -436,7 +308,7 @@ private Collection<DifferentialExpressionAnalysis> doSubSetAnalysis( Map<FactorV
     }
 
     /**
-     * Check if a factor value should be excluded from the analysis.
+     * Check if a factor value should be excluded from a subset analysis.
      */
     private boolean isExcluded( FactorValue subsetFactorValue ) {
         for ( Characteristic c : subsetFactorValue.getCharacteristics() ) {
@@ -447,54 +319,31 @@ private boolean isExcluded( FactorValue subsetFactorValue ) {
         return false;
     }
 
-
-    @Override
-    public DifferentialExpressionAnalysis run( ExpressionExperimentSubSet subset, ExpressionDataDoubleMatrix dmatrix, DifferentialExpressionAnalysisConfig config ) throws AnalysisException {
-        Assert.notNull( config.getSubsetFactor(), "A subset factor must be set to analyze a subset." );
-
-        /*
-         * Start by setting it up like the full experiment.
-         */
-
-        ExperimentalFactor ef = config.getSubsetFactor();
-
-        List<BioMaterial> samplesInSubset = subset.getBioAssays().stream()
-                .map( BioAssay::getSampleUsed )
-                .sorted( SAMPLE_COMPARATOR )
-                .collect( Collectors.toList() );
-
-        FactorValue subsetFactorValue = config.getSubsetFactorValue();
-        if ( subsetFactorValue == null ) {
-            log.info( "No factor value set in the configuration, will determine it from the samples..." );
-            subsetFactorValue = determineSubsetFactorValue( ef, subset );
-            log.info( "The subset factor value appears to be " + subsetFactorValue + "." );
-        }
-
-        samplesInSubset = orderByExperimentalDesign( samplesInSubset, config.getFactorsToInclude(), null );
-
-        // slice.
-        ExpressionDataDoubleMatrix subsetMatrix = dmatrix.sliceColumns( samplesInSubset, createBADMap( samplesInSubset ) );
-
-        List<ExperimentalFactor> factors = config.getFactorsToInclude().stream()
+    /**
+     * Select and order the factors to
+     */
+    private List<ExperimentalFactor> selectFactors( DifferentialExpressionAnalysisConfig config ) {
+        return config.getFactorsToInclude().stream()
                 .sorted( FACTOR_COMPARATOR )
                 .collect( Collectors.toList() );
-        List<ExperimentalFactor> subsetFactors = fixFactorsForSubset( subset, dmatrix, factors );
-
-        Map<ExperimentalFactor, FactorValue> baselineConditions = BaselineSelection.getBaselineConditions( samplesInSubset, factors );
-        dropIncompleteFactors( samplesInSubset, factors );
-
-        if ( factors.isEmpty() ) {
-            throw new NoFactorLeftForAnalysisException( "All factors were removed due to incomplete values", config );
-        }
+    }
 
-        if ( subsetFactors.isEmpty() ) {
-            LinearModelAnalyzer.log
-                    .warn( "Experimental design is not valid for subset: " + subsetFactorValue + "; skipping" );
-            return null;
+    /**
+     * Select and order the samples to use.
+     * <p>
+     * Samples are ordered by the experimental design.
+     */
+    private List<BioMaterial> selectSamples( ExpressionDataDoubleMatrix dmatrix, Collection<ExperimentalFactor> factors, DifferentialExpressionAnalysisConfig config ) {
+        List<BioMaterial> samplesUsed = orderByExperimentalDesign( dmatrix, factors, null );
+        if ( config.getSamplesToInclude() != null ) {
+            log.info( "Filtering samples to include based on the configuration..." );
+            Assert.isTrue( new HashSet<>( dmatrix.getBioMaterials() ).containsAll( config.getSamplesToInclude() ),
+                    "All the samples to include must be present in the data matrix." );
+            samplesUsed = samplesUsed.stream()
+                    .filter( config.getSamplesToInclude()::contains )
+                    .collect( Collectors.toList() );
         }
-
-        return doAnalysis( subset, subsetMatrix, samplesInSubset, subsetFactors,
-                baselineConditions, subsetFactorValue, fixConfigForSubset( subsetFactors, subsetFactorValue, config ) );
+        return samplesUsed;
     }
 
     /**
@@ -528,117 +377,72 @@ private FactorValue determineSubsetFactorValue( ExperimentalFactor subsetFactor,
         return subsetFactorValue;
     }
 
-    private DoubleMatrix1D getLibrarySizes( DifferentialExpressionAnalysisConfig config,
-            ExpressionDataDoubleMatrix dmatrix ) {
-
-        DoubleMatrix1D librarySize = new DenseDoubleMatrix1D( dmatrix.columns() );
-        if ( config.isUseWeights() ) {
-            for ( int i = 0; i < dmatrix.columns(); i++ ) {
-                BioAssay ba = dmatrix.getBioAssayForColumn( i );
-
-                Long sequenceReadCount = ba.getSequenceReadCount();
-                if ( !ba.getIsOutlier() && ( sequenceReadCount == null || sequenceReadCount == 0 ) ) {
-                    // double check.
-                    double[] col = dmatrix.getColumnAsDoubles( i );
-                    double maxExpression = DescriptiveWithMissing.max( new DoubleArrayList( col ) );
-                    if ( maxExpression <= 0 ) {
-                        throw new IllegalStateException(
-                                "Sample has a null or zero read-count, isn't marked as an outlier, and max expression level is " + maxExpression
-                                        + ": " + ba );
-                    }
-                }
-
-                if ( sequenceReadCount == null ) sequenceReadCount = 0L;
-
-                librarySize.set( i, sequenceReadCount );
-            }
-        }
-        return librarySize;
-    }
-
     /**
-     * @return boolean true if we need the intercept.
+     * Create a configuration tailored to a particular subset.
+     *
+     * @param factors           the factors that will be included
+     * @param subsetFactorValue factor value to use for the subset analysis
+     * @param config            original configuration to base the subset config on
+     * @return an updated config; the baselines are cleared; subset is cleared; interactions are only kept if
+     * they only involve the given factors.
      */
-    private boolean checkIfNeedToTreatAsIntercept( ExperimentalFactor experimentalFactor,
-            QuantitationType quantitationType ) {
-        if ( experimentalFactor.getFactorValues().size() == 1 ) {
-            if ( quantitationType.getIsRatio() ) {
-                return true;
-            }
-            throw new IllegalArgumentException(
-                    "Cannot deal with constant factors unless the data are ratiometric non-reference design" );
+    private DifferentialExpressionAnalysisConfig createConfigForSubsetAnalysis( List<BioMaterial> samplesUsed, List<ExperimentalFactor> factors,
+            FactorValue subsetFactorValue, DifferentialExpressionAnalysisConfig config ) {
+        DifferentialExpressionAnalysisConfig newConfig = new DifferentialExpressionAnalysisConfig( config );
+        newConfig.setSubsetFactor( null );
+        newConfig.setSubsetFactorValue( subsetFactorValue );
+        // remove any samples not in the given subset
+        if ( newConfig.getSamplesToInclude() != null ) {
+            Set<BioMaterial> samplesUsedSet = new HashSet<>( samplesUsed );
+            newConfig.getSamplesToInclude().removeIf( s -> !samplesUsedSet.contains( s ) );
         }
-        return false;
+        // remove any factors not in the given subset
+        Set<ExperimentalFactor> factorsSet = new HashSet<>( factors );
+        newConfig.getFactorsToInclude().removeIf( f -> !factorsSet.contains( f ) );
+        newConfig.getInteractionsToInclude().removeIf( fv -> !factorsSet.containsAll( fv ) );
+        return newConfig;
     }
 
     /**
-     * Create files that can be used in R to check the results.
+     * Remove factors which are no longer usable, based on the subset.
      */
-    private void outputForDebugging( ExpressionDataDoubleMatrix dmatrix,
-            ObjectMatrix<String, String, Object> designMatrix ) {
-        MatrixWriter mw = new MatrixWriter( entityUrlBuilder, buildInfo );
-        try ( FileWriter writer = new FileWriter( File.createTempFile( "data.", ".txt" ) );
-                FileWriter out = new FileWriter( File.createTempFile( "design.", ".txt" ) ) ) {
-
-            mw.write( dmatrix, ProcessedExpressionDataVector.class, writer );
+    private List<ExperimentalFactor> selectFactorsForSubSetAnalysis( ExpressionExperimentSubSet subset, ExpressionDataDoubleMatrix dmatrix, List<ExperimentalFactor> factors ) {
 
-            ubic.basecode.io.writer.MatrixWriter<String, String> dem = new ubic.basecode.io.writer.MatrixWriter<>(
-                    out );
-            dem.writeMatrix( designMatrix, true );
-
-        } catch ( IOException e ) {
-            log.error( "An I/O error occurred when producing debugging output.", e );
-        }
-    }
+        List<ExperimentalFactor> result = new ArrayList<>();
 
-    /**
-     * Populate the interactionFactorLists and label2Factors
-     *
-     * @param interactionFactorLists gets populated
-     */
-    private void buildModelFormula( final DifferentialExpressionAnalysisConfig config,
-            final Map<String, Collection<ExperimentalFactor>> label2Factors,
-            final List<String[]> interactionFactorLists ) {
+        QuantitationType quantitationType = dmatrix.getQuantitationType();
+        ExperimentalFactor interceptFactor = this.determineInterceptFactor( factors, quantitationType );
 
         /*
-         * Add interaction terms
+         * Remove any constant factors, unless they are that intercept.
          */
-        boolean hasInteractionTerms = config.getInteractionsToInclude() != null && !config.getInteractionsToInclude().isEmpty();
+        for ( ExperimentalFactor f : factors ) {
 
-        if ( hasInteractionTerms ) {
-            for ( Collection<ExperimentalFactor> interactionTerms : config.getInteractionsToInclude() ) {
+            if ( f.getType().equals( FactorType.CONTINUOUS ) ) {
+                result.add( f );
+            } else if ( interceptFactor != null && interceptFactor.equals( f ) ) {
+                result.add( f );
+            } else {
 
-                List<String> interactionFactorNames = new ArrayList<>();
-                for ( ExperimentalFactor factor : interactionTerms ) {
-                    interactionFactorNames.add( DiffExAnalyzerUtils.nameForR( factor ) );
-                }
+                Collection<FactorValue> levels = new HashSet<>();
+                DiffExAnalyzerUtils.populateFactorValuesFromBASet( subset, f, levels );
 
-                interactionFactorLists.add( interactionFactorNames.toArray( new String[] {} ) );
+                if ( levels.size() > 1 ) {
+                    result.add( f );
+                } else {
+                    LinearModelAnalyzer.log.info( "Dropping " + f + " from subset" );
+                }
 
-                // In the ANOVA table.
-                String factTableLabel = StringUtils.join( interactionFactorNames, ":" );
-                label2Factors.put( factTableLabel, new HashSet<>() );
-                label2Factors.get( factTableLabel ).addAll( interactionTerms );
             }
-        }
-    }
 
-    private int countNumberOfGenesNotSeenAlready( @Nullable Collection<Gene> genesForProbe, Collection<Gene> seenGenes ) {
-        int numGenes = 0;
-        if ( genesForProbe != null ) {
-            for ( Gene g : genesForProbe ) {
-                if ( seenGenes.contains( g ) ) {
-                    continue;
-                }
-                numGenes++;
-            }
-            seenGenes.addAll( genesForProbe );
         }
 
-        return numGenes;
+        return result;
     }
 
     /**
+     * Perform a differential expression analysis.
+     *
      * @param bioAssaySet        source data, could be a SubSet
      * @param expressionData     data (for the subset, if it's a subset)
      * @param samplesUsed        samples analyzed
@@ -653,7 +457,6 @@ private DifferentialExpressionAnalysis doAnalysis( BioAssaySet bioAssaySet,
             Map<ExperimentalFactor, FactorValue> baselineConditions,
             @Nullable FactorValue subsetFactorValue,
             DifferentialExpressionAnalysisConfig config ) throws AnalysisException {
-
         if ( factors.isEmpty() ) {
             throw new IllegalArgumentException( "Must provide at least one factor" );
         }
@@ -671,7 +474,6 @@ private DifferentialExpressionAnalysis doAnalysis( BioAssaySet bioAssaySet,
             }
         }
 
-
         ExperimentalFactor interceptFactor = this.determineInterceptFactor( factors, quantitationType );
 
         /*
@@ -732,7 +534,7 @@ private DifferentialExpressionAnalysis doAnalysis( BioAssaySet bioAssaySet,
         Map<String, List<DifferentialExpressionAnalysisResult>> resultLists = new HashMap<>( properDesignMatrix.getTerms().size() );
         Map<String, List<Double>> pvaluesForQvalue = new HashMap<>( properDesignMatrix.getTerms().size() );
 
-        // We use the design matrix to ensure that we only consider terms that actually ended up in the model. 
+        // We use the design matrix to ensure that we only consider terms that actually ended up in the model.
         for ( String factorName : properDesignMatrix.getTerms() ) {
             if ( !label2Factors.containsKey( factorName ) ) continue; // so we skip the intercept
             log.info( "Setting up results for " + factorName );
@@ -840,37 +642,147 @@ private DifferentialExpressionAnalysis doAnalysis( BioAssaySet bioAssaySet,
                     Map<String, Double> interactionContrastCoeffs = lm.getContrastCoefficients( factorName );
                     Map<String, Double> interactionContrastPValues = lm.getContrastPValues( factorName );
 
-                    for ( String term : interactionContrastPValues.keySet() ) {
-                        Double contrastPvalue = interactionContrastPValues.get( term );
+                    for ( String term : interactionContrastPValues.keySet() ) {
+                        Double contrastPvalue = interactionContrastPValues.get( term );
+
+                        this.makeContrast( probeAnalysisResult, factorsForName, term, factorName, contrastPvalue,
+                                interactionContrastTStats, interactionContrastCoeffs );
+
+                    }
+                } else {
+                    throw new UnsupportedOperationException( "Handling more than two-way interactions is not implemented." );
+                }
+
+                probeAnalysisResult.setPvalue( this.nan2Null( overallPValue ) );
+                pvaluesForQvalue.get( factorName ).add( overallPValue );
+                resultLists.get( factorName ).add( probeAnalysisResult );
+            } // over terms
+
+            if ( ++processed % 1000 == 0 ) {
+                LinearModelAnalyzer.log.info( "Processed results for " + processed + " elements..." );
+            }
+        } // over probes
+
+        this.fillRanksAndQvalues( resultLists, pvaluesForQvalue );
+
+        DifferentialExpressionAnalysis expressionAnalysis = this
+                .makeAnalysisEntity( bioAssaySet, expressionData.getQuantitationType(), config, label2Factors,
+                        baselineConditions, interceptFactor, interactionFactorLists, oneSampleTTest, resultLists,
+                        subsetFactorValue, filterResultDescription );
+
+        LinearModelAnalyzer.log.info( "Analysis processing phase done ..." );
+
+        return expressionAnalysis;
+    }
+
+    private DoubleMatrix1D getLibrarySizes( DifferentialExpressionAnalysisConfig config,
+            ExpressionDataDoubleMatrix dmatrix ) {
+
+        DoubleMatrix1D librarySize = new DenseDoubleMatrix1D( dmatrix.columns() );
+        if ( config.isUseWeights() ) {
+            for ( int i = 0; i < dmatrix.columns(); i++ ) {
+                BioAssay ba = dmatrix.getBioAssayForColumn( i );
+
+                Long sequenceReadCount = ba.getSequenceReadCount();
+                if ( !ba.getIsOutlier() && ( sequenceReadCount == null || sequenceReadCount == 0 ) ) {
+                    // double check.
+                    double[] col = dmatrix.getColumnAsDoubles( i );
+                    double maxExpression = DescriptiveWithMissing.max( new DoubleArrayList( col ) );
+                    if ( maxExpression <= 0 ) {
+                        throw new IllegalStateException(
+                                "Sample has a null or zero read-count, isn't marked as an outlier, and max expression level is " + maxExpression
+                                        + ": " + ba );
+                    }
+                }
+
+                if ( sequenceReadCount == null ) sequenceReadCount = 0L;
+
+                librarySize.set( i, sequenceReadCount );
+            }
+        }
+        return librarySize;
+    }
+
+    /**
+     * @return boolean true if we need the intercept.
+     */
+    private boolean checkIfNeedToTreatAsIntercept( ExperimentalFactor experimentalFactor,
+            QuantitationType quantitationType ) {
+        if ( experimentalFactor.getFactorValues().size() == 1 ) {
+            if ( quantitationType.getIsRatio() ) {
+                return true;
+            }
+            throw new IllegalArgumentException(
+                    "Cannot deal with constant factors unless the data are ratiometric non-reference design" );
+        }
+        return false;
+    }
+
+    /**
+     * Create files that can be used in R to check the results.
+     */
+    private void outputForDebugging( ExpressionDataDoubleMatrix dmatrix,
+            ObjectMatrix<String, String, Object> designMatrix ) {
+        MatrixWriter mw = new MatrixWriter( entityUrlBuilder, buildInfo );
+        try ( FileWriter writer = new FileWriter( File.createTempFile( "data.", ".txt" ) );
+                FileWriter out = new FileWriter( File.createTempFile( "design.", ".txt" ) ) ) {
+
+            mw.write( dmatrix, ProcessedExpressionDataVector.class, writer );
+
+            ubic.basecode.io.writer.MatrixWriter<String, String> dem = new ubic.basecode.io.writer.MatrixWriter<>(
+                    out );
+            dem.writeMatrix( designMatrix, true );
+
+        } catch ( IOException e ) {
+            log.error( "An I/O error occurred when producing debugging output.", e );
+        }
+    }
+
+    /**
+     * Populate the interactionFactorLists and label2Factors
+     *
+     * @param interactionFactorLists gets populated
+     */
+    private void buildModelFormula( final DifferentialExpressionAnalysisConfig config,
+            final Map<String, Collection<ExperimentalFactor>> label2Factors,
+            final List<String[]> interactionFactorLists ) {
+
+        /*
+         * Add interaction terms
+         */
+        boolean hasInteractionTerms = config.getInteractionsToInclude() != null && !config.getInteractionsToInclude().isEmpty();
 
-                        this.makeContrast( probeAnalysisResult, factorsForName, term, factorName, contrastPvalue,
-                                interactionContrastTStats, interactionContrastCoeffs );
+        if ( hasInteractionTerms ) {
+            for ( Collection<ExperimentalFactor> interactionTerms : config.getInteractionsToInclude() ) {
 
-                    }
-                } else {
-                    throw new UnsupportedOperationException( "Handling more than two-way interactions is not implemented." );
+                List<String> interactionFactorNames = new ArrayList<>();
+                for ( ExperimentalFactor factor : interactionTerms ) {
+                    interactionFactorNames.add( DiffExAnalyzerUtils.nameForR( factor ) );
                 }
 
-                probeAnalysisResult.setPvalue( this.nan2Null( overallPValue ) );
-                pvaluesForQvalue.get( factorName ).add( overallPValue );
-                resultLists.get( factorName ).add( probeAnalysisResult );
-            } // over terms
+                interactionFactorLists.add( interactionFactorNames.toArray( new String[] {} ) );
 
-            if ( ++processed % 1000 == 0 ) {
-                LinearModelAnalyzer.log.info( "Processed results for " + processed + " elements..." );
+                // In the ANOVA table.
+                String factTableLabel = StringUtils.join( interactionFactorNames, ":" );
+                label2Factors.put( factTableLabel, new HashSet<>() );
+                label2Factors.get( factTableLabel ).addAll( interactionTerms );
             }
-        } // over probes
-
-        this.fillRanksAndQvalues( resultLists, pvaluesForQvalue );
-
-        DifferentialExpressionAnalysis expressionAnalysis = this
-                .makeAnalysisEntity( bioAssaySet, expressionData.getQuantitationType(), config, label2Factors,
-                        baselineConditions, interceptFactor, interactionFactorLists, oneSampleTTest, resultLists,
-                        subsetFactorValue, filterResultDescription );
+        }
+    }
 
-        LinearModelAnalyzer.log.info( "Analysis processing phase done ..." );
+    private int countNumberOfGenesNotSeenAlready( @Nullable Collection<Gene> genesForProbe, Collection<Gene> seenGenes ) {
+        int numGenes = 0;
+        if ( genesForProbe != null ) {
+            for ( Gene g : genesForProbe ) {
+                if ( seenGenes.contains( g ) ) {
+                    continue;
+                }
+                numGenes++;
+            }
+            seenGenes.addAll( genesForProbe );
+        }
 
-        return expressionAnalysis;
+        return numGenes;
     }
 
     private void warnForElement( CompositeSequence el, String s, int warned ) {
@@ -929,63 +841,6 @@ private Map<BioMaterial, Set<FactorValue>> getSampleToFactorValuesMap( Experimen
                         .collect( Collectors.toSet() ) ) );
     }
 
-    /**
-     * Remove all configurations that have to do with factors that aren't in the selected factors.
-     *
-     * @param factors the factors that will be included
-     * @return an updated config; the baselines are cleared; subset is cleared; interactions are only kept if
-     * they only
-     * involve the given factors.
-     */
-    private DifferentialExpressionAnalysisConfig fixConfigForSubset( List<ExperimentalFactor> factors,
-            FactorValue subsetFactorValue, DifferentialExpressionAnalysisConfig config ) {
-        DifferentialExpressionAnalysisConfig newConfig = new DifferentialExpressionAnalysisConfig( config );
-        newConfig.setSubsetFactor( null );
-        newConfig.setSubsetFactorValue( subsetFactorValue );
-        // remove any factors not in the given subset
-        Set<ExperimentalFactor> factorsSet = new HashSet<>( factors );
-        newConfig.getFactorsToInclude().removeIf( f -> !factorsSet.contains( f ) );
-        newConfig.getInteractionsToInclude().removeIf( fv -> !factorsSet.containsAll( fv ) );
-        return newConfig;
-    }
-
-    /**
-     * Remove factors which are no longer usable, based on the subset.
-     */
-    private List<ExperimentalFactor> fixFactorsForSubset( ExpressionExperimentSubSet eesubSet, ExpressionDataDoubleMatrix dmatrix,
-            List<ExperimentalFactor> factors ) {
-
-        List<ExperimentalFactor> result = new ArrayList<>();
-
-        QuantitationType quantitationType = dmatrix.getQuantitationType();
-        ExperimentalFactor interceptFactor = this.determineInterceptFactor( factors, quantitationType );
-
-        /*
-         * Remove any constant factors, unless they are that intercept.
-         */
-        for ( ExperimentalFactor f : factors ) {
-
-            if ( f.getType().equals( FactorType.CONTINUOUS ) ) {
-                result.add( f );
-            } else if ( interceptFactor != null && interceptFactor.equals( f ) ) {
-                result.add( f );
-            } else {
-
-                Collection<FactorValue> levels = new HashSet<>();
-                DiffExAnalyzerUtils.populateFactorValuesFromBASet( eesubSet, f, levels );
-
-                if ( levels.size() > 1 ) {
-                    result.add( f );
-                } else {
-                    LinearModelAnalyzer.log.info( "Dropping " + f + " from subset" );
-                }
-
-            }
-
-        }
-
-        return result;
-    }
 
     /**
      * Needed to compute the number of genes tested/detected.
@@ -1153,6 +1008,157 @@ private Set<ExpressionAnalysisResultSet> makeResultSets(
         return resultSets;
     }
 
+    /**
+     * Generate HitListSize entities that will be stored to count the number of diff. ex probes at various preset
+     * thresholds, to avoid wasting time generating these counts on the fly later. This is done automatically during
+     * analysis, so is just here to allow 'backfilling'.
+     *
+     * @param probeToGeneMap map
+     * @param results        results
+     * @return hit list sizes
+     */
+    private Set<HitListSize> computeHitListSizes( Collection<DifferentialExpressionAnalysisResult> results,
+            Map<CompositeSequence, Collection<Gene>> probeToGeneMap ) {
+        Set<HitListSize> hitListSizes = new HashSet<>();
+        StopWatch timer = new StopWatch();
+        timer.start();
+        double maxThreshold = MathUtil.max( LinearModelAnalyzer.qValueThresholdsForHitLists );
+
+        Collection<Gene> allGenes = new HashSet<>();
+        for ( Collection<Gene> genes : probeToGeneMap.values() ) {
+            allGenes.addAll( genes );
+        }
+
+        // maps from Doubles are a bit dodgy...
+        Map<Double, Integer> upCounts = new HashMap<>();
+        Map<Double, Integer> downCounts = new HashMap<>();
+        Map<Double, Integer> eitherCounts = new HashMap<>();
+
+        Map<Double, Integer> upCountGenes = new HashMap<>();
+        Map<Double, Integer> downCountGenes = new HashMap<>();
+        Map<Double, Integer> eitherCountGenes = new HashMap<>();
+
+        Collection<Gene> seenGenes = new HashSet<>();
+        for ( DifferentialExpressionAnalysisResult r : results ) {
+
+            Double corrP = r.getCorrectedPvalue();
+            if ( corrP == null || corrP > maxThreshold ) {
+                continue;
+            }
+
+            CompositeSequence probe = r.getProbe();
+            Collection<Gene> genesForProbe = probeToGeneMap.get( probe );
+            int numGenes = 0;
+            if ( genesForProbe != null ) {
+                numGenes = this.countNumberOfGenesNotSeenAlready( genesForProbe, seenGenes );
+            }
+
+            // if ( numGenes == 0 ) // This is okay; it might mean we already counted it as differentially expressed.
+
+            Collection<ContrastResult> crs = r.getContrasts();
+            boolean up = false;
+            boolean down = false;
+
+            /*
+             * We set up and down to be true (either or both) if at least on contrast is shown.
+             */
+            for ( ContrastResult cr : crs ) {
+                Double lf = cr.getLogFoldChange();
+                //noinspection StatementWithEmptyBody // Better readability
+                if ( lf == null ) {
+                    /*
+                     * A contrast which is actually not valid, so it won't be counted in the hit list.
+                     */
+                } else if ( lf < 0 ) {
+                    down = true;
+                } else if ( lf > 0 ) {
+                    up = true;
+                }
+            }
+
+            for ( double thresh : LinearModelAnalyzer.qValueThresholdsForHitLists ) {
+
+                if ( !upCounts.containsKey( thresh ) ) {
+                    upCounts.put( thresh, 0 );
+                    upCountGenes.put( thresh, 0 );
+                }
+                if ( !downCounts.containsKey( thresh ) ) {
+                    downCounts.put( thresh, 0 );
+                    downCountGenes.put( thresh, 0 );
+                }
+                if ( !eitherCounts.containsKey( thresh ) ) {
+                    eitherCounts.put( thresh, 0 );
+                    eitherCountGenes.put( thresh, 0 );
+                }
+
+                if ( corrP < thresh ) {
+                    if ( up ) {
+                        upCounts.put( thresh, upCounts.get( thresh ) + 1 );
+                        upCountGenes.put( thresh, upCountGenes.get( thresh ) + numGenes );
+                    }
+                    if ( down ) {
+                        downCounts.put( thresh, downCounts.get( thresh ) + 1 );
+                        downCountGenes.put( thresh, downCountGenes.get( thresh ) + numGenes );
+                    }
+
+                    eitherCounts.put( thresh, eitherCounts.get( thresh ) + 1 );
+                    eitherCountGenes.put( thresh, eitherCountGenes.get( thresh ) + numGenes );
+
+                }
+            }
+        }
+
+        for ( double thresh : LinearModelAnalyzer.qValueThresholdsForHitLists ) {
+            // Ensure we don't set values to null.
+            int up = upCounts.getOrDefault( thresh, 0 );
+            int down = downCounts.getOrDefault( thresh, 0 );
+            int either = eitherCounts.getOrDefault( thresh, 0 );
+
+            int upGenes = upCountGenes.getOrDefault( thresh, 0 );
+            int downGenes = downCountGenes.getOrDefault( thresh, 0 );
+            int eitherGenes = eitherCountGenes.getOrDefault( thresh, 0 );
+
+            assert !( allGenes.size() < upGenes || allGenes.size() < downGenes
+                    || allGenes.size() < eitherGenes ) : "There were more genes differentially expressed than exist in the experiment";
+
+            HitListSize upS = HitListSize.Factory.newInstance( thresh, up, Direction.UP, upGenes );
+            HitListSize downS = HitListSize.Factory.newInstance( thresh, down, Direction.DOWN, downGenes );
+            HitListSize eitherS = HitListSize.Factory.newInstance( thresh, either, Direction.EITHER, eitherGenes );
+
+            hitListSizes.add( upS );
+            hitListSizes.add( downS );
+            hitListSizes.add( eitherS );
+
+            assert upGenes <= eitherGenes : "More genes upregulated than upregulated+downregulated";
+            assert downGenes <= eitherGenes : "More genes downregulated than upregulated+downregulated";
+
+        }
+
+        if ( timer.getTime() > 1000 ) {
+            LinearModelAnalyzer.log.info( "Hitlist computation: " + timer.getTime() + " ms" );
+        }
+        return hitListSizes;
+    }
+
+    /**
+     * Utility method
+     *
+     * @param probeToGeneMap map
+     * @param resultList     result list
+     * @return number of genes tested
+     */
+    private int getNumberOfGenesTested( Collection<DifferentialExpressionAnalysisResult> resultList,
+            Map<CompositeSequence, Collection<Gene>> probeToGeneMap ) {
+        Collection<Gene> gs = new HashSet<>();
+        for ( DifferentialExpressionAnalysisResult d : resultList ) {
+            CompositeSequence probe = d.getProbe();
+            if ( probeToGeneMap.containsKey( probe ) ) {
+                gs.addAll( probeToGeneMap.get( probe ) );
+            }
+        }
+        return gs.size();
+    }
+
     /**
      * Add a contrast to the given result.
      */
@@ -1215,7 +1221,7 @@ private void makeContrast( DifferentialExpressionAnalysisResult probeAnalysisRes
 
             for ( ExperimentalFactor f : factorList ) {
                 for ( FactorValue fv : f.getFactorValues() ) {
-                    if ( fv.getId().equals( factorValueId ) ) {
+                    if ( Objects.equals( fv.getId(), factorValueId ) ) {
                         contrast.setFactorValue( fv );
                         break;
                     }
@@ -1237,7 +1243,7 @@ private void makeContrast( DifferentialExpressionAnalysisResult probeAnalysisRes
 
                 for ( ExperimentalFactor f : factorList ) {
                     for ( FactorValue fv : f.getFactorValues() ) {
-                        if ( fv.getId().equals( factorValueId ) ) {
+                        if ( Objects.equals( fv.getId(), factorValueId ) ) {
                             contrast.setSecondFactorValue( fv );
                             break;
                         }
diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentDao.java b/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentDao.java
index 3e20bb400f..153612df01 100644
--- a/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentDao.java
+++ b/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentDao.java
@@ -240,6 +240,10 @@ class Identifiers {
 
     Map<Long, Collection<AuditEvent>> getAuditEvents( Collection<Long> ids );
 
+    Collection<BioAssay> getBioAssays( ExpressionExperiment ee, boolean includeSubSets );
+
+    Collection<BioMaterial> getSamplesUsed( ExpressionExperiment ee, boolean includeSubSets );
+
     Collection<BioAssayDimension> getBioAssayDimensions( ExpressionExperiment expressionExperiment );
 
     /**
diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentDaoImpl.java b/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentDaoImpl.java
index 5a72437049..5793fe7825 100644
--- a/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentDaoImpl.java
+++ b/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentDaoImpl.java
@@ -1411,6 +1411,40 @@ public Map<Long, Collection<AuditEvent>> getAuditEvents( Collection<Long> ids )
 
     }
 
+    @Override
+    public Collection<BioAssay> getBioAssays( ExpressionExperiment ee, boolean includeSubSets ) {
+        //noinspection unchecked
+        Set<BioAssay> results = new HashSet<>( ( List<BioAssay> ) getSessionFactory().getCurrentSession()
+                .createQuery( "select ba from ExpressionExperiment ee join ee.bioAssays ba where ee = :ee" )
+                .setParameter( "ee", ee )
+                .list() );
+        if ( includeSubSets ) {
+            //noinspection unchecked
+            results.addAll( ( List<BioAssay> ) getSessionFactory().getCurrentSession()
+                    .createQuery( "select ba from ExpressionExperimentSubSet eess join eess.bioAssays ba where eess.sourceExperiment = :ee" )
+                    .setParameter( "ee", ee )
+                    .list() );
+        }
+        return results;
+    }
+
+    @Override
+    public Collection<BioMaterial> getSamplesUsed( ExpressionExperiment ee, boolean includeSubSets ) {
+        //noinspection unchecked
+        Set<BioMaterial> results = new HashSet<>( ( List<BioMaterial> ) getSessionFactory().getCurrentSession()
+                .createQuery( "select bm from ExpressionExperiment ee join ee.bioAssays ba join ba.sampleUsed bm where ee = :ee" )
+                .setParameter( "ee", ee )
+                .list() );
+        if ( includeSubSets ) {
+            //noinspection unchecked
+            results.addAll( ( List<BioMaterial> ) getSessionFactory().getCurrentSession()
+                    .createQuery( "select bm from ExpressionExperimentSubSet eess join eess.bioAssays ba join ba.sampleUsed bm where eess.sourceExperiment = :ee" )
+                    .setParameter( "ee", ee )
+                    .list() );
+        }
+        return results;
+    }
+
     @Override
     public Collection<BioAssayDimension> getBioAssayDimensions( ExpressionExperiment expressionExperiment ) {
         //noinspection unchecked
diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentService.java b/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentService.java
index af94f4116e..f5b21b089a 100644
--- a/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentService.java
+++ b/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentService.java
@@ -664,6 +664,23 @@ class CharacteristicWithUsageStatisticsAndOntologyTerm {
      */
     Map<Taxon, Long> getTaxaUsageFrequency( @Nullable Filters filters, @Nullable Set<Long> extraIds );
 
+    /**
+     * Obtain the assays used by the experiment.
+     *
+     * @param includeSubSets include assays that belong to subsets of the experiment.
+     * @see ExpressionExperiment#getBioAssays()
+     * @see ExpressionExperimentSubSet#getBioAssays()
+     */
+    Collection<BioAssay> getBioAssays( ExpressionExperiment ee, boolean includeSubSets );
+
+    /**
+     * Obtain the samples used by the assay of the experiment.
+     *
+     * @param includeSubSets include samples that are associated to assays that belong to subsets of the experiment.
+     * @see BioAssay#getSampleUsed()
+     */
+    Collection<BioMaterial> getSamplesUsed( ExpressionExperiment ee, boolean includeSubSets );
+
     /**
      * Obtain all the dimensions associated to the given experiment.
      * <p>
diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentServiceImpl.java b/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentServiceImpl.java
index 70a421f5da..eb6e8a6fb2 100755
--- a/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentServiceImpl.java
+++ b/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentServiceImpl.java
@@ -1336,6 +1336,18 @@ public Map<Taxon, Long> getTaxaUsageFrequency( @Nullable Filters filters, @Nulla
         }
     }
 
+    @Override
+    @Transactional(readOnly = true)
+    public Collection<BioAssay> getBioAssays( ExpressionExperiment ee, boolean includeSubSets ) {
+        return expressionExperimentDao.getBioAssays( ee, includeSubSets );
+    }
+
+    @Override
+    @Transactional(readOnly = true)
+    public Collection<BioMaterial> getSamplesUsed( ExpressionExperiment ee, boolean includeSubSets ) {
+        return expressionExperimentDao.getSamplesUsed( ee, includeSubSets );
+    }
+
     @Override
     @Transactional(readOnly = true)
     public Collection<BioAssayDimension> getBioAssayDimensionsWithAssays( ExpressionExperiment expressionExperiment ) {

From 36efca6c7ae75b7fc8b9f9115ca063db4d343591 Mon Sep 17 00:00:00 2001
From: OganM <ogan.mancarcii@gmail.com>
Date: Fri, 6 Mar 2026 11:51:35 -0800
Subject: [PATCH 2/6] fix setting samplesUsed

---
 .../core/analysis/expression/diff/LinearModelAnalyzer.java    | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/gemma-core/src/main/java/ubic/gemma/core/analysis/expression/diff/LinearModelAnalyzer.java b/gemma-core/src/main/java/ubic/gemma/core/analysis/expression/diff/LinearModelAnalyzer.java
index cb13235520..5b6048d805 100644
--- a/gemma-core/src/main/java/ubic/gemma/core/analysis/expression/diff/LinearModelAnalyzer.java
+++ b/gemma-core/src/main/java/ubic/gemma/core/analysis/expression/diff/LinearModelAnalyzer.java
@@ -239,10 +239,12 @@ private Collection<DifferentialExpressionAnalysis> doSubSetAnalysis( ExpressionE
             /*
              * make a EESubSet
              */
+            List<BioMaterial> bioMaterials = orderByExperimentalDesign( dmatrixBySubset.get( subsetFactorValue ), factors, null );
+
             String subsetName = FactorValueUtils.getSummaryString( subsetFactorValue );
             ExpressionExperimentSubSet eeSubSet = ExpressionExperimentSubSet.Factory.newInstance( subsetName, expressionExperiment );
             Collection<BioAssay> bioAssays = new HashSet<>();
-            for ( BioMaterial bm : samplesUsed ) {
+            for ( BioMaterial bm : bioMaterials ) {
                 bioAssays.addAll( bm.getBioAssaysUsedIn() );
             }
             eeSubSet.getBioAssays().addAll( bioAssays );

From 61a66e8b86b37a677dccedc00bfe457fa6a69333 Mon Sep 17 00:00:00 2001
From: OganM <ogan.mancarcii@gmail.com>
Date: Mon, 9 Mar 2026 15:00:22 -0700
Subject: [PATCH 3/6] suggested changes

---
 .../java/ubic/gemma/apps/DifferentialExpressionAnalysisCli.java | 2 +-
 .../src/main/java/ubic/gemma/cli/completion/CompletionType.java | 1 -
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/gemma-cli/src/main/java/ubic/gemma/apps/DifferentialExpressionAnalysisCli.java b/gemma-cli/src/main/java/ubic/gemma/apps/DifferentialExpressionAnalysisCli.java
index 9572094d72..57cbb1e372 100644
--- a/gemma-cli/src/main/java/ubic/gemma/apps/DifferentialExpressionAnalysisCli.java
+++ b/gemma-cli/src/main/java/ubic/gemma/apps/DifferentialExpressionAnalysisCli.java
@@ -246,7 +246,7 @@ protected void buildExperimentOptions( Options options ) {
 
         // destination (db, standard location or custom directory)
         options.addOption( "nodb", "no-db", false, "Do not persist diff. ex. results to the database and instead save them to the current directory (or the location defined by " + formatOption( options, DataFileOptionsUtils.OUTPUT_DIR_OPTION ) + ")." );
-        options.addOption( "nofiles", "no-files", false, "Don't create archive files after analysis. Default is to make them. This is incompatible with " + formatOption( options, "nodb" ) + " option to be set." );
+        options.addOption( "nofiles", "no-files", false, "Don't create archive files after analysis. Default is to make them. This is incompatible with " + formatOption( options, "nodb" ) + "." );
 
         // redo mode
         options.addOption( "redo", "redo", false,
diff --git a/gemma-cli/src/main/java/ubic/gemma/cli/completion/CompletionType.java b/gemma-cli/src/main/java/ubic/gemma/cli/completion/CompletionType.java
index b60179429f..196fddcc5a 100644
--- a/gemma-cli/src/main/java/ubic/gemma/cli/completion/CompletionType.java
+++ b/gemma-cli/src/main/java/ubic/gemma/cli/completion/CompletionType.java
@@ -7,7 +7,6 @@ public enum CompletionType {
     DATASET_GROUP,
     EXTERNAL_DATABASE,
     DATASET,
-    SAMPLE,
     EXPERIMENTAL_FACTOR,
     /**
      * Complete experimental factors and interactions suitable for DEA.

From 1f2a8fe3911d83dcd9f7bc7b1df89c76b45f0de3 Mon Sep 17 00:00:00 2001
From: OganM <ogan.mancarcii@gmail.com>
Date: Mon, 9 Mar 2026 16:14:58 -0700
Subject: [PATCH 4/6] useWeights as a flag

---
 .../gemma/apps/DifferentialExpressionAnalysisCli.java    | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/gemma-cli/src/main/java/ubic/gemma/apps/DifferentialExpressionAnalysisCli.java b/gemma-cli/src/main/java/ubic/gemma/apps/DifferentialExpressionAnalysisCli.java
index 57cbb1e372..6e4c683d99 100644
--- a/gemma-cli/src/main/java/ubic/gemma/apps/DifferentialExpressionAnalysisCli.java
+++ b/gemma-cli/src/main/java/ubic/gemma/apps/DifferentialExpressionAnalysisCli.java
@@ -172,6 +172,7 @@ private enum FactorSelectionMode {
     private Double filterMinUniqueValues;
     @Nullable
     private Double filterMinVariance;
+    private boolean useWeights;
 
     /**
      * Persist results to the database.
@@ -300,6 +301,10 @@ protected void buildExperimentOptions( Options options ) {
                 .type( Double.class )
                 .desc( "Minimum variance required for a design element to be included in the analysis. Defaults to " + DifferentialExpressionAnalysisFilter.DEFAULT_MINIMUM_VARIANCE + "." )
                 .get() );
+        options.addOption( Option.builder( "useWeights" )
+                .hasArg( false )
+                .desc( "Use weights" )
+                .get() );
 
         // delete mode
         options.addOption( "delete", "delete", false, "Remove all the existing analyses for the specified experiment(s). Use with care!" );
@@ -403,6 +408,7 @@ protected void processExperimentOptions( CommandLine commandLine ) throws ParseE
         // TODO: check if the analysis being redone is a subset analysis
         this.ignoreFailingSubsets = hasOption( commandLine, "ignoreFailingSubsets",
                 requires( anyOf( toBeSet( "subset" ), toBeSet( "redo" ) ) ) );
+        this.useWeights = commandLine.hasOption("useWeights");
     }
 
     @Override
@@ -475,7 +481,8 @@ protected void processExpressionExperiment( ExpressionExperiment ee ) {
         config.setPersist( this.persist );
         config.setMakeArchiveFile( this.persist && this.makeArchiveFiles );
         config.setIgnoreFailingSubsets( this.ignoreFailingSubsets );
-        config.setUseWeights( eeService.isRNASeq( ee ) );
+        // if useWeights flag is provided, force to true, otherwise check if RNAseq
+        config.setUseWeights( this.useWeights ? this.useWeights :  eeService.isRNASeq( ee ) );
 
         // sample selection
         if ( this.sampleIdentifiers != null ) {

From a9373de73f6e45ff6b368b8f4de324ee9be88fd0 Mon Sep 17 00:00:00 2001
From: OganM <ogan.mancarcii@gmail.com>
Date: Mon, 9 Mar 2026 18:39:02 -0700
Subject: [PATCH 5/6] actually accept samples

---
 .../ubic/gemma/apps/DifferentialExpressionAnalysisCli.java   | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/gemma-cli/src/main/java/ubic/gemma/apps/DifferentialExpressionAnalysisCli.java b/gemma-cli/src/main/java/ubic/gemma/apps/DifferentialExpressionAnalysisCli.java
index 6e4c683d99..37495d48bc 100644
--- a/gemma-cli/src/main/java/ubic/gemma/apps/DifferentialExpressionAnalysisCli.java
+++ b/gemma-cli/src/main/java/ubic/gemma/apps/DifferentialExpressionAnalysisCli.java
@@ -236,7 +236,10 @@ protected void buildExperimentOptions( Options options ) {
                         + "If the experiment already has subsets for the factor, those will be reused. "
                         + "This is incompatible with -redo,--redo, -redoAnalysis,--redo-analysis or -redoSubset,--redo-subset." ).get() );
 
-        addSingleExperimentOption( options, Option.builder( "samples" ).longOpt( "samples" ).argName( "ID, name, accession" )
+        addSingleExperimentOption( options, Option.builder( "samples" )
+                .longOpt( "samples" )
+                .hasArgs()
+                .argName( "ID, name, accession" )
                 .desc( "ID, name or accession of samples to be included in the analysis. Defaults to all samples in the dataset being analyzed. Requires the " + formatOption( "nodb", "no-db" ) + " option to be set." ).get() );
 
         options.addOption( "usebatch", "use-batch-factor", false, "If a batch factor is available, use it. Otherwise, batch information can/will be ignored in the analysis. This is incompatible with " + formatOption( options, "factors" ) + ", -redo,--redo, -redoAnalysis,--redo-analysis and -redoSubset,--redo-subset." );

From f11133fb943af6474eeb9e3e7d2f12ca453dc817 Mon Sep 17 00:00:00 2001
From: OganM <ogan.mancarcii@gmail.com>
Date: Mon, 9 Mar 2026 20:00:35 -0700
Subject: [PATCH 6/6] an attempt to leverage bioAssays for the search for
 samples

---
 .../ubic/gemma/apps/DifferentialExpressionAnalysisCli.java  | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/gemma-cli/src/main/java/ubic/gemma/apps/DifferentialExpressionAnalysisCli.java b/gemma-cli/src/main/java/ubic/gemma/apps/DifferentialExpressionAnalysisCli.java
index 37495d48bc..3fb6ddd7ed 100644
--- a/gemma-cli/src/main/java/ubic/gemma/apps/DifferentialExpressionAnalysisCli.java
+++ b/gemma-cli/src/main/java/ubic/gemma/apps/DifferentialExpressionAnalysisCli.java
@@ -493,7 +493,11 @@ protected void processExpressionExperiment( ExpressionExperiment ee ) {
             for ( String sampleId : sampleIdentifiers ) {
                 // allow searching in subsets, those are used in single-cell diff ex. analysis that use sub-samples for
                 // representing pseudo-bulks
-                samplesToUse.add( entityLocator.locateSample( ee, sampleId, true ) );
+                BioMaterial sample = entityLocator.locateSample( ee, sampleId, true );
+                if (sample == null){
+                    sample = entityLocator.locateBioAssay( ee, sampleId, true ).getSampleUsed();
+                }
+                samplesToUse.add( sample );
             }
             config.setSamplesToInclude( samplesToUse );
         }