expressionExperiments = new TreeSet<>( Comparator.comparing( ExpressionExperiment::getId ) );
if ( all ) {
if ( useReferencesIfPossible ) {
@@ -426,46 +437,49 @@ protected void doAuthenticatedWork() throws Exception {
this.removeTroubledExperiments( expressionExperiments );
}
- expressionExperiments = preprocessBioAssaySets( expressionExperiments );
+ expressionExperiments = preprocessExpressionExperiments( expressionExperiments );
if ( expressionExperiments.isEmpty() ) {
throw new RuntimeException( "No expression experiments matched the given options." );
} else if ( expressionExperiments.size() == 1 ) {
- BioAssaySet ee = expressionExperiments.iterator().next();
+ ExpressionExperiment ee = expressionExperiments.iterator().next();
log.info( "Final dataset: " + formatExperiment( ee ) );
- processBioAssaySet( expressionExperiments.iterator().next() );
+ ExpressionExperiment bas = expressionExperiments.iterator().next();
+ Assert.notNull( bas, "Cannot process a null ExpressionExperiment." );
+ processExpressionExperiment( bas );
} else {
if ( !singleExperimentOptionsUsed.isEmpty() ) {
throw new IllegalStateException( String.format( "There are single-experiment options used: %s, but more than one experiments was found.",
singleExperimentOptionsUsed.stream().map( o -> "-" + o ).collect( Collectors.joining( ", " ) ) ) );
}
log.info( String.format( "Final list: %d expression experiments", expressionExperiments.size() ) );
- processBioAssaySets( expressionExperiments );
+ processExpressionExperiments( expressionExperiments );
}
}
/**
- * Preprocess the set of {@link BioAssaySet} before invoking {@link #processBioAssaySets(Collection)} or
- * {@link #processBioAssaySet(BioAssaySet)}.
+ * Preprocess the set of {@link ExpressionExperiment} before invoking {@link #processExpressionExperiments(Collection)} or
+ * {@link #processExpressionExperiment(ExpressionExperiment)}.
*
* This can be an opportunity to filter or modify the set of experiments.
*/
- protected Collection preprocessBioAssaySets( Collection expressionExperiments ) {
+ protected Collection preprocessExpressionExperiments( Collection expressionExperiments ) {
return expressionExperiments;
}
/**
- * Process multiple {@link BioAssaySet}.
+ * Process multiple {@link ExpressionExperiment}.
*
* This only called if more than one experiment was found.
*/
- protected void processBioAssaySets( Collection expressionExperiments ) {
+ protected void processExpressionExperiments( Collection expressionExperiments ) {
setEstimatedMaxTasks( expressionExperiments.size() );
- for ( BioAssaySet bas : expressionExperiments ) {
+ for ( ExpressionExperiment ee : expressionExperiments ) {
try {
- processBioAssaySet( bas );
+ Assert.notNull( ee, "Cannot process a null ExpressionExperiment." );
+ processExpressionExperiment( ee );
} catch ( Exception e ) {
- addErrorObject( toBatchObject( bas ), e );
+ addErrorObject( toBatchObject( ee ), e );
if ( abortOnError ) {
throw new RuntimeException( "Aborted processing due to error.", e );
}
@@ -473,24 +487,6 @@ protected void processBioAssaySets( Collection expressionExperiment
}
}
- /**
- * Process a BioAssaySet.
- *
- * This method delegates to one of {@link #processExpressionExperiment(ExpressionExperiment)},
- * {@link #processExpressionExperimentSubSet(ExpressionExperimentSubSet)} or {@link #processOtherBioAssaySet(BioAssaySet)}.
- * @throws Exception if an error occurs, it will be collected via {@link #addErrorObject(Serializable, String, Throwable)}
- */
- protected void processBioAssaySet( BioAssaySet bas ) throws Exception {
- Assert.notNull( bas, "Cannot process a null BioAssaySet." );
- if ( bas instanceof ExpressionExperiment ) {
- processExpressionExperiment( ( ExpressionExperiment ) bas );
- } else if ( bas instanceof ExpressionExperimentSubSet ) {
- processExpressionExperimentSubSet( ( ExpressionExperimentSubSet ) bas );
- } else {
- processOtherBioAssaySet( bas );
- }
- }
-
/**
* Process an {@link ExpressionExperiment}.
*/
@@ -498,43 +494,18 @@ protected void processExpressionExperiment( ExpressionExperiment expressionExper
throw new UnsupportedOperationException( "This command line does support experiments." );
}
- /**
- * Process an {@link ExpressionExperimentSubSet}.
- */
- protected void processExpressionExperimentSubSet( @SuppressWarnings("unused") ExpressionExperimentSubSet expressionExperimentSubSet ) throws Exception {
- throw new UnsupportedOperationException( "This command line does support experiment subsets." );
- }
-
- /**
- * Process other kinds of {@link BioAssaySet} that are neither experiment nor subset.
- */
- protected void processOtherBioAssaySet( @SuppressWarnings("unused") BioAssaySet bas ) throws Exception {
- throw new UnsupportedOperationException( "This command line does support other kinds of BioAssaySet." );
- }
-
- @Override
protected final Serializable toBatchObject( @Nullable ExpressionExperiment object ) {
- return toBatchObject( ( BioAssaySet ) object );
- }
-
- protected final Serializable toBatchObject( @Nullable BioAssaySet object ) {
if ( object == null ) {
return null;
}
- if ( object instanceof ExpressionExperiment ) {
- if ( Hibernate.isInitialized( object ) ) {
- return ( ( ExpressionExperiment ) object ).getShortName();
- } else {
- return "ExpressionExperiment Id=" + object.getId();
- }
- } else if ( object instanceof ExpressionExperimentSubSet ) {
- return "ExpressionExperimentSubSet Id=" + object.getId();
+ if ( Hibernate.isInitialized( object ) ) {
+ return object.getShortName();
} else {
- return "BioAssaySet Id=" + object.getId();
+ return "ExpressionExperiment Id=" + object.getId();
}
}
- private void excludeFromFile( Collection expressionExperiments, Path excludeEeFileName ) throws IOException {
+ private void excludeFromFile( Collection expressionExperiments, Path excludeEeFileName ) throws IOException {
assert !expressionExperiments.isEmpty();
Collection excludeExperiments;
excludeExperiments = this.readExpressionExperimentListFile( excludeEeFileName );
@@ -553,7 +524,7 @@ private List experimentsFromCliList( String[] identifiers
return ees;
}
- private Set experimentsFromEeSet( String optionValue ) {
+ private Set experimentsFromEeSet( String optionValue ) {
Assert.isTrue( StringUtils.isNotBlank( optionValue ), "Please provide an eeset name" );
ExpressionExperimentSet eeSet;
try {
@@ -640,7 +611,7 @@ private Collection readExpressionExperimentListFile( Path
/**
* Obtain EEs that are troubled.
*/
- private void removeTroubledExperiments( Collection expressionExperiments ) {
+ private void removeTroubledExperiments( Collection expressionExperiments ) {
if ( expressionExperiments.isEmpty() ) {
log.warn( "No experiments to remove troubled from" );
return;
@@ -653,14 +624,7 @@ private void removeTroubledExperiments( Collection expressionExperi
AtomicInteger removedTroubledExperiments = new AtomicInteger();
expressionExperiments.removeIf( ee -> {
// for subsets, check source experiment troubled flag
- if ( ee instanceof ExpressionExperimentSubSet ) {
- if ( troubledIds.contains( ( ( ExpressionExperimentSubSet ) ee ).getSourceExperiment().getId() ) ) {
- removedTroubledExperiments.incrementAndGet();
- return true;
- } else {
- return false;
- }
- } else if ( troubledIds.contains( ee.getId() ) ) {
+ if ( troubledIds.contains( ee.getId() ) ) {
removedTroubledExperiments.incrementAndGet();
return true;
} else {
@@ -750,15 +714,11 @@ protected void setAbortOnError() {
*
* Use this for printing datasets if {@link #useReferencesIfPossible} is set to prevent {@link org.hibernate.LazyInitializationException}.
*/
- protected String formatExperiment( BioAssaySet bas ) {
+ protected String formatExperiment( ExpressionExperiment bas ) {
if ( Hibernate.isInitialized( bas ) ) {
return bas + " " + entityUrlBuilder.fromHostUrl().entity( bas ).web().toUriString();
- } else if ( bas instanceof ExpressionExperiment ) {
- return "ExpressionExperiment Id=" + bas.getId() + " " + entityUrlBuilder.fromHostUrl().entity( ( ExpressionExperiment ) bas ).web().toUriString();
- } else if ( bas instanceof ExpressionExperimentSubSet ) {
- return "ExpressionExperimentSubSet Id=" + bas.getId() + entityUrlBuilder.fromHostUrl().entity( ( ExpressionExperimentSubSet ) bas ).web().toUriString();
} else {
- return "BioAssaySet Id=" + bas.getId();
+ return "ExpressionExperiment Id=" + bas.getId() + " " + entityUrlBuilder.fromHostUrl().entity( ( ExpressionExperiment ) bas ).web().toUriString();
}
}
diff --git a/gemma-cli/src/main/java/ubic/gemma/apps/ExpressionExperimentPrimaryPubCli.java b/gemma-cli/src/main/java/ubic/gemma/apps/ExpressionExperimentPrimaryPubCli.java
index 0b30c8220d..c0a4d9aeb9 100644
--- a/gemma-cli/src/main/java/ubic/gemma/apps/ExpressionExperimentPrimaryPubCli.java
+++ b/gemma-cli/src/main/java/ubic/gemma/apps/ExpressionExperimentPrimaryPubCli.java
@@ -29,7 +29,6 @@
import ubic.gemma.core.loader.entrez.pubmed.ExpressionExperimentBibRefFinder;
import ubic.gemma.core.loader.entrez.pubmed.PubMedSearch;
import ubic.gemma.model.common.description.BibliographicReference;
-import ubic.gemma.model.expression.experiment.BioAssaySet;
import ubic.gemma.model.expression.experiment.ExpressionExperiment;
import ubic.gemma.persistence.persister.PersisterHelper;
import ubic.gemma.persistence.service.expression.experiment.ExpressionExperimentService;
@@ -111,14 +110,14 @@ protected void processExperimentOptions( CommandLine commandLine ) throws ParseE
Collection failedEe;
@Override
- protected void processBioAssaySets( Collection expressionExperiments ) {
+ protected void processExpressionExperiments( Collection expressionExperiments ) {
// collect some statistics
nullPubCount = new ArrayList<>();
samePubCount = new ArrayList<>();
diffPubCount = new ArrayList<>();
failedEe = new ArrayList<>();
- super.processBioAssaySets( expressionExperiments );
+ super.processExpressionExperiments( expressionExperiments );
// print statistics
log.info( "\n\n========== Summary ==========" );
diff --git a/gemma-cli/src/main/java/ubic/gemma/apps/ExpressionExperimentVectorsManipulatingCli.java b/gemma-cli/src/main/java/ubic/gemma/apps/ExpressionExperimentVectorsManipulatingCli.java
index 4154ff2491..23e2bf4344 100644
--- a/gemma-cli/src/main/java/ubic/gemma/apps/ExpressionExperimentVectorsManipulatingCli.java
+++ b/gemma-cli/src/main/java/ubic/gemma/apps/ExpressionExperimentVectorsManipulatingCli.java
@@ -97,7 +97,7 @@ protected void processExperimentVectorsOptions( CommandLine commandLine ) throws
}
@Override
- protected void processExpressionExperiment( ExpressionExperiment expressionExperiment ) {
+ protected void processExpressionExperiment( ExpressionExperiment expressionExperiment ) throws Exception {
Collection qts;
if ( qtIdentifier != null ) {
qts = Collections.singleton( entityLocator.locateQuantitationType( expressionExperiment, qtIdentifier, quantitationTypeService.getMappedDataVectorType( dataVectorType ) ) );
@@ -126,7 +126,7 @@ protected void processExpressionExperiment( ExpressionExperiment expressionExper
/**
* Process a set of vectors identified by a {@link QuantitationType}.
*/
- protected abstract void processExpressionExperimentVectors( ExpressionExperiment ee, QuantitationType qt );
+ protected abstract void processExpressionExperimentVectors( ExpressionExperiment ee, QuantitationType qt ) throws Exception;
private QuantitationType locatePreferredQuantitationType( ExpressionExperiment expressionExperiment, Class extends DataVector> dataVectorType ) {
if ( RawExpressionDataVector.class.isAssignableFrom( dataVectorType ) ) {
diff --git a/gemma-cli/src/main/java/ubic/gemma/apps/ExternalDatabaseAdderCli.java b/gemma-cli/src/main/java/ubic/gemma/apps/ExternalDatabaseAdderCli.java
index a8c6f07e14..b9b6019219 100644
--- a/gemma-cli/src/main/java/ubic/gemma/apps/ExternalDatabaseAdderCli.java
+++ b/gemma-cli/src/main/java/ubic/gemma/apps/ExternalDatabaseAdderCli.java
@@ -22,13 +22,19 @@
import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.Option;
import org.apache.commons.cli.Options;
+import org.apache.commons.cli.ParseException;
import org.springframework.beans.factory.annotation.Autowired;
import ubic.gemma.cli.util.AbstractAuthenticatedCLI;
import ubic.gemma.cli.util.CLI;
+import ubic.gemma.cli.util.OptionsUtils;
import ubic.gemma.model.common.description.DatabaseType;
import ubic.gemma.model.common.description.ExternalDatabase;
import ubic.gemma.persistence.service.common.description.ExternalDatabaseService;
+import javax.annotation.Nullable;
+
+import static ubic.gemma.cli.util.OptionsUtils.getEnumOptionValue;
+
/**
* Add a new external database, but requires editing the code to do so. It can be done by SQL manually as well.
*
@@ -40,6 +46,8 @@ public class ExternalDatabaseAdderCli extends AbstractAuthenticatedCLI {
private ExternalDatabaseService externalDatabaseService;
private String name;
+ @Nullable
+ private String description;
private DatabaseType type;
@Override
@@ -60,18 +68,24 @@ public String getShortDesc() {
@Override
protected void buildOptions( Options options ) {
options.addOption( Option.builder( "n" ).longOpt( "name" ).hasArg().required().build() );
- options.addOption( Option.builder( "t" ).longOpt( "type" ).hasArg().required().build() );
+ options.addOption( Option.builder( "d" ).longOpt( "description" ).hasArg().build() );
+ OptionsUtils.addEnumOption( options, "t", "type", "Type of external database to create.", DatabaseType.class );
}
@Override
- protected void processOptions( CommandLine commandLine ) {
+ protected void processOptions( CommandLine commandLine ) throws ParseException {
this.name = commandLine.getOptionValue( "n" );
- this.type = DatabaseType.valueOf( commandLine.getOptionValue( "t" ).toUpperCase() );
+ this.description = commandLine.getOptionValue( "d" );
+ this.type = getEnumOptionValue( commandLine, "t" );
}
@Override
protected void doAuthenticatedWork() throws Exception {
- ExternalDatabase created = externalDatabaseService.create( ExternalDatabase.Factory.newInstance( name, type ) );
+ ExternalDatabase ed = ExternalDatabase.Factory.newInstance( name, type );
+ if ( description != null ) {
+ ed.setDescription( description );
+ }
+ ExternalDatabase created = externalDatabaseService.create( ed );
log.info( "Created " + created );
}
}
diff --git a/gemma-cli/src/main/java/ubic/gemma/apps/LinkAnalysisCli.java b/gemma-cli/src/main/java/ubic/gemma/apps/LinkAnalysisCli.java
index 9afe94a011..757ea1c788 100644
--- a/gemma-cli/src/main/java/ubic/gemma/apps/LinkAnalysisCli.java
+++ b/gemma-cli/src/main/java/ubic/gemma/apps/LinkAnalysisCli.java
@@ -42,7 +42,6 @@
import ubic.gemma.model.expression.bioAssayData.ProcessedExpressionDataVector;
import ubic.gemma.model.expression.biomaterial.BioMaterial;
import ubic.gemma.model.expression.designElement.CompositeSequence;
-import ubic.gemma.model.expression.experiment.BioAssaySet;
import ubic.gemma.model.expression.experiment.ExpressionExperiment;
import ubic.gemma.model.expression.experiment.ExpressionExperimentValueObject;
import ubic.gemma.model.genome.Taxon;
@@ -391,11 +390,11 @@ protected void doAuthenticatedWork() throws Exception {
}
@Override
- protected Collection preprocessBioAssaySets( Collection expressionExperiments ) {
+ protected Collection preprocessExpressionExperiments( Collection expressionExperiments ) {
/*
* Do in decreasing order of size, to help capture more links earlier - reduces fragmentation.
*/
- List sees = new ArrayList<>( expressionExperiments );
+ List sees = new ArrayList<>( expressionExperiments );
if ( expressionExperiments.size() > 1 ) {
log.info( "Sorting data sets by number of samples, doing large data sets first." );
diff --git a/gemma-cli/src/main/java/ubic/gemma/apps/ListQuantitationTypesCli.java b/gemma-cli/src/main/java/ubic/gemma/apps/ListQuantitationTypesCli.java
index 6efb8228dc..adb8adba16 100644
--- a/gemma-cli/src/main/java/ubic/gemma/apps/ListQuantitationTypesCli.java
+++ b/gemma-cli/src/main/java/ubic/gemma/apps/ListQuantitationTypesCli.java
@@ -45,7 +45,7 @@ public String getShortDesc() {
}
@Override
- protected void processExpressionExperiment( ExpressionExperiment expressionExperiment ) {
+ protected void processExpressionExperiment( ExpressionExperiment expressionExperiment ) throws Exception {
getCliContext().getOutputStream().println( formatExperiment( expressionExperiment ) );
super.processExpressionExperiment( expressionExperiment );
getCliContext().getOutputStream().println();
@@ -67,9 +67,17 @@ protected void processExpressionExperimentVectors( ExpressionExperiment ee, Quan
}
BioAssayDimension dimension;
SingleCellDimension scd;
+ SingleCellExpressionExperimentService.SingleCellDimensionInitializationConfig initializationConfig = SingleCellExpressionExperimentService.SingleCellDimensionInitializationConfig.builder()
+ .includeBioAssays( true )
+ .includeCtas( true )
+ .includeClcs( true )
+ .includeProtocol( true )
+ .includeCharacteristics( true )
+ .includeIndices( false )
+ .build();
if ( ( dimension = eeService.getBioAssayDimension( ee, qt ) ) != null ) {
getCliContext().getOutputStream().println( "\t\t" + dimension );
- } else if ( ( scd = singleCellExpressionExperimentService.getSingleCellDimensionWithoutCellIds( ee, qt, true, true, true, true, false ) ) != null ) {
+ } else if ( ( scd = singleCellExpressionExperimentService.getSingleCellDimensionWithoutCellIds( ee, qt, initializationConfig ) ) != null ) {
getCliContext().getOutputStream().println( "\t\t" + scd );
try ( Stream cellIds = singleCellExpressionExperimentService.streamCellIds( ee, qt, true ) ) {
if ( cellIds != null ) {
diff --git a/gemma-cli/src/main/java/ubic/gemma/apps/LockExpressionDataFileCli.java b/gemma-cli/src/main/java/ubic/gemma/apps/LockExpressionDataFileCli.java
index a983b27420..0d09134396 100644
--- a/gemma-cli/src/main/java/ubic/gemma/apps/LockExpressionDataFileCli.java
+++ b/gemma-cli/src/main/java/ubic/gemma/apps/LockExpressionDataFileCli.java
@@ -4,7 +4,6 @@
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.annotation.Value;
import ubic.gemma.core.analysis.service.ExpressionDataFileService;
-import ubic.gemma.core.analysis.service.ExpressionDataFileUtils;
import ubic.gemma.core.util.locking.FileLockInfoUtils;
import ubic.gemma.core.util.locking.FileLockManager;
import ubic.gemma.core.util.locking.LockedPath;
@@ -16,6 +15,8 @@
import java.util.concurrent.TimeUnit;
import java.util.concurrent.TimeoutException;
+import static ubic.gemma.core.analysis.service.ExpressionDataFileUtils.getExpressionExperimentMetadataDirname;
+
/**
* This CLI allows one to lock an experiment data or metadata file.
* @author poirigui
@@ -96,7 +97,7 @@ private void logLockStatus( ExpressionExperiment ee ) {
Path p;
if ( metadata ) {
p = metadataDir
- .resolve( ExpressionDataFileUtils.getEEFolderName( ee ) )
+ .resolve( getExpressionExperimentMetadataDirname( ee ) )
.resolve( filename );
} else {
p = dataDir.resolve( filename );
diff --git a/gemma-cli/src/main/java/ubic/gemma/apps/MakeExperimentPrivateCli.java b/gemma-cli/src/main/java/ubic/gemma/apps/MakeExperimentPrivateCli.java
index 427e397cf1..f4f97dc2a2 100644
--- a/gemma-cli/src/main/java/ubic/gemma/apps/MakeExperimentPrivateCli.java
+++ b/gemma-cli/src/main/java/ubic/gemma/apps/MakeExperimentPrivateCli.java
@@ -3,7 +3,7 @@
import gemma.gsec.SecurityService;
import org.springframework.beans.factory.annotation.Autowired;
import ubic.gemma.model.common.auditAndSecurity.eventType.MakePrivateEvent;
-import ubic.gemma.model.expression.experiment.BioAssaySet;
+import ubic.gemma.model.expression.experiment.ExpressionExperiment;
public class MakeExperimentPrivateCli extends ExpressionExperimentManipulatingCLI {
@@ -21,7 +21,7 @@ public String getShortDesc() {
}
@Override
- protected void processBioAssaySet( BioAssaySet ee ) {
+ protected void processExpressionExperiment( ExpressionExperiment ee ) {
securityService.makePrivate( ee );
this.auditTrailService.addUpdateEvent( ee, MakePrivateEvent.class, "Made private from command line" );
}
diff --git a/gemma-cli/src/main/java/ubic/gemma/apps/MakeExperimentsPublicCli.java b/gemma-cli/src/main/java/ubic/gemma/apps/MakeExperimentsPublicCli.java
index 7a3d524f36..6beddf0990 100644
--- a/gemma-cli/src/main/java/ubic/gemma/apps/MakeExperimentsPublicCli.java
+++ b/gemma-cli/src/main/java/ubic/gemma/apps/MakeExperimentsPublicCli.java
@@ -17,7 +17,7 @@
import gemma.gsec.SecurityService;
import org.springframework.beans.factory.annotation.Autowired;
import ubic.gemma.model.common.auditAndSecurity.eventType.MakePublicEvent;
-import ubic.gemma.model.expression.experiment.BioAssaySet;
+import ubic.gemma.model.expression.experiment.ExpressionExperiment;
/**
* Make data sets public. You must be the owner of the experiment to do this.
@@ -40,7 +40,7 @@ public String getShortDesc() {
}
@Override
- protected void processBioAssaySet( BioAssaySet ee ) {
+ protected void processExpressionExperiment( ExpressionExperiment ee ) {
securityService.makePublic( ee );
this.auditTrailService.addUpdateEvent( ee, MakePublicEvent.class, "Made public from command line" );
}
diff --git a/gemma-cli/src/main/java/ubic/gemma/apps/RNASeqBatchInfoCli.java b/gemma-cli/src/main/java/ubic/gemma/apps/RNASeqBatchInfoCli.java
index fb8f93f71e..a4ab7847e5 100644
--- a/gemma-cli/src/main/java/ubic/gemma/apps/RNASeqBatchInfoCli.java
+++ b/gemma-cli/src/main/java/ubic/gemma/apps/RNASeqBatchInfoCli.java
@@ -18,7 +18,6 @@
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.annotation.Value;
import ubic.gemma.core.analysis.preprocess.batcheffects.BatchInfoPopulationService;
-import ubic.gemma.model.expression.experiment.BioAssaySet;
import ubic.gemma.model.expression.experiment.ExpressionExperiment;
import java.util.Collection;
@@ -53,9 +52,9 @@ protected void buildExperimentOptions( Options options ) {
}
@Override
- protected Collection preprocessBioAssaySets( Collection expressionExperiments ) {
+ protected Collection preprocessExpressionExperiments( Collection expressionExperiments ) {
log.info( "Checking folders for existing experiments in " + fastqRootDir );
- return super.preprocessBioAssaySets( expressionExperiments );
+ return super.preprocessExpressionExperiments( expressionExperiments );
}
@Override
diff --git a/gemma-cli/src/main/java/ubic/gemma/apps/RNASeqDataAddCli.java b/gemma-cli/src/main/java/ubic/gemma/apps/RNASeqDataAddCli.java
index 745d1a5ecf..9f4a921805 100644
--- a/gemma-cli/src/main/java/ubic/gemma/apps/RNASeqDataAddCli.java
+++ b/gemma-cli/src/main/java/ubic/gemma/apps/RNASeqDataAddCli.java
@@ -29,7 +29,6 @@
import ubic.gemma.model.common.quantitationtype.StandardQuantitationType;
import ubic.gemma.model.expression.arrayDesign.ArrayDesign;
import ubic.gemma.model.expression.bioAssay.BioAssay;
-import ubic.gemma.model.expression.experiment.BioAssaySet;
import ubic.gemma.model.expression.experiment.ExpressionExperiment;
import java.io.IOException;
@@ -161,11 +160,11 @@ protected void processExperimentOptions( CommandLine commandLine ) throws ParseE
}
@Override
- protected void processBioAssaySets( Collection bas ) {
+ protected void processExpressionExperiments( Collection bas ) {
if ( !justbackfillLog2cpm ) {
throw new IllegalArgumentException( "Sorry, can only process one experiment with this tool, unless -log2cpm is used." );
}
- super.processBioAssaySets( bas );
+ super.processExpressionExperiments( bas );
}
@Override
diff --git a/gemma-cli/src/main/java/ubic/gemma/apps/RawExpressionDataWriterCli.java b/gemma-cli/src/main/java/ubic/gemma/apps/RawExpressionDataWriterCli.java
index c3cb8f383a..92dd8c0232 100644
--- a/gemma-cli/src/main/java/ubic/gemma/apps/RawExpressionDataWriterCli.java
+++ b/gemma-cli/src/main/java/ubic/gemma/apps/RawExpressionDataWriterCli.java
@@ -8,7 +8,6 @@
import ubic.gemma.cli.util.OptionsUtils;
import ubic.gemma.core.analysis.service.ExpressionDataFileService;
import ubic.gemma.core.analysis.service.ExpressionDataFileUtils;
-import ubic.gemma.core.util.locking.FileLockManager;
import ubic.gemma.core.util.locking.LockedPath;
import ubic.gemma.model.common.quantitationtype.QuantitationType;
import ubic.gemma.model.common.quantitationtype.ScaleType;
@@ -21,6 +20,7 @@
import java.io.OutputStreamWriter;
import java.io.Writer;
import java.nio.charset.StandardCharsets;
+import java.nio.file.Files;
import java.nio.file.Path;
import java.util.Arrays;
import java.util.List;
@@ -34,9 +34,6 @@ public class RawExpressionDataWriterCli extends ExpressionExperimentVectorsManip
@Autowired
private ExpressionDataFileService expressionDataFileService;
- @Autowired
- private FileLockManager fileLockManager;
-
@Nullable
private String[] samples;
@@ -64,7 +61,7 @@ public String getShortDesc() {
@Override
protected void buildExperimentVectorsOptions( Options options ) {
- addExpressionDataFileOptions( options, "raw data" );
+ addExpressionDataFileOptions( options, "raw data", true );
addSingleExperimentOption( options, Option.builder( "samples" ).longOpt( "samples" ).hasArg().valueSeparator( ',' ).desc( "List of sample identifiers to slice. This is incompatible with -standardLocation/--standard-location." ).build() );
OptionsUtils.addEnumOption( options, "scaleType", "scale-type", "Scale type to use for the data. This is incompatible with -standardLocation/--standard-location.", ScaleType.class );
addForceOption( options );
@@ -72,7 +69,7 @@ protected void buildExperimentVectorsOptions( Options options ) {
@Override
protected void processExperimentVectorsOptions( CommandLine commandLine ) throws ParseException {
- this.result = getExpressionDataFileResult( commandLine );
+ this.result = getExpressionDataFileResult( commandLine, true );
this.samples = commandLine.getOptionValues( "samples" );
this.scaleType = OptionsUtils.getEnumOptionValue( commandLine, "scaleType" );
if ( this.result.isStandardLocation() && this.samples != null ) {
@@ -142,9 +139,9 @@ protected void processExpressionExperimentVectors( ExpressionExperiment ee, Quan
private Writer openOutputFile( Path fileName ) throws IOException {
if ( fileName.toString().endsWith( ".gz" ) ) {
- return new OutputStreamWriter( new GZIPOutputStream( fileLockManager.newOutputStream( fileName ) ), StandardCharsets.UTF_8 );
+ return new OutputStreamWriter( new GZIPOutputStream( Files.newOutputStream( fileName ) ), StandardCharsets.UTF_8 );
} else {
- return fileLockManager.newBufferedWriter( fileName );
+ return Files.newBufferedWriter( fileName );
}
}
}
diff --git a/gemma-cli/src/main/java/ubic/gemma/apps/SingleCellDataAggregatorCli.java b/gemma-cli/src/main/java/ubic/gemma/apps/SingleCellDataAggregatorCli.java
index e07ff4ad1c..4423335b27 100644
--- a/gemma-cli/src/main/java/ubic/gemma/apps/SingleCellDataAggregatorCli.java
+++ b/gemma-cli/src/main/java/ubic/gemma/apps/SingleCellDataAggregatorCli.java
@@ -41,10 +41,13 @@ public class SingleCellDataAggregatorCli extends ExpressionExperimentVectorsMani
private static final String
CTA_OPTION = "cta",
CLC_OPTION = "clc",
+ MASK_OPTION = "mask",
+ NO_MASK_OPTION = "noMask",
FACTOR_OPTION = "factor",
MAKE_PREFERRED_OPTION = "p",
SKIP_POST_PROCESSING_OPTION = "nopost",
ADJUST_LIBRARY_SIZES_OPTION = "adjustLibrarySizes",
+ INCLUDE_MASKED_CELLS_IN_LIBRARY_SIZE_OPTION = "includeMaskedCellsInLibrarySize",
ALLOW_UNMAPPED_CHARACTERISTICS_OPTION = "allowUnmappedCharacteristics",
ALLOW_UNMAPPED_FACTOR_VALUES_OPTION = "allowUnmappedFactorValues",
MAPPING_FILE_OPTION = "mappingFile",
@@ -73,11 +76,15 @@ public class SingleCellDataAggregatorCli extends ExpressionExperimentVectorsMani
private String factorName;
@Nullable
private Path mappingFile;
+ @Nullable
+ private String maskIdentifier;
+ private boolean noMask;
private boolean allowUnmappedCharacteristics;
private boolean allowUnmappedFactorValues;
private boolean makePreferred;
private boolean skipPostProcessing;
private boolean adjustLibrarySizes;
+ private boolean includeMaskedCellsInLibrarySize;
private boolean redo;
@Nullable
private String redoQt;
@@ -106,12 +113,15 @@ public String getShortDesc() {
protected void buildExperimentVectorsOptions( Options options ) {
options.addOption( CTA_OPTION, "cell-type-assignment", true, "Name of the cell type assignment to use (defaults to the preferred one). Incompatible with -" + CLC_OPTION + "." );
addSingleExperimentOption( options, CLC_OPTION, "cell-level-characteristics", true, "Identifier of the cell-level characteristics to use. Incompatible with -" + CTA_OPTION + "." );
+ addSingleExperimentOption( options, MASK_OPTION, "mask", true, "Identifier of the cell-level characteristics to use to mask. Defaults to auto-detecting the mask." );
+ addSingleExperimentOption( options, NO_MASK_OPTION, "--no-mask", true, "Do not use a mask if one is auto-detected for aggregating single-cell data. Incompatible with -" + MASK_OPTION + "." );
options.addOption( FACTOR_OPTION, "factor", true, "Identifier of the factor to use (defaults to the cell type factor)" );
addSingleExperimentOption( options, Option.builder( MAPPING_FILE_OPTION ).longOpt( "mapping-file" ).hasArg().type( Path.class ).desc( "File containing explicit mapping between cell-level characteristics and factor values" ).build() );
options.addOption( ALLOW_UNMAPPED_CHARACTERISTICS_OPTION, "allow-unmapped-characteristics", false, "Allow unmapped characteristics from the cell-level characteristics." );
options.addOption( ALLOW_UNMAPPED_FACTOR_VALUES_OPTION, "allow-unmapped-factor-values", false, "Allow unmapped factor values from the experimental factor." );
options.addOption( MAKE_PREFERRED_OPTION, "make-preferred", false, "Make the resulting aggregated data the preferred raw data for the experiment." );
options.addOption( ADJUST_LIBRARY_SIZES_OPTION, false, "Adjust library sizes for the resulting aggregated assays." );
+ options.addOption( INCLUDE_MASKED_CELLS_IN_LIBRARY_SIZE_OPTION, "include-masked-cells-in-library-size", false, "Include masked cells in the library size calculation. By default they are excluded as if they were simply filtered out." );
options.addOption( REDO_OPTION, "redo", false, "Redo the aggregation." );
// a string is fine to use when bulk-processing
options.addOption( REDO_QT_OPTION, "redo-quantitation-type", true, "Quantitation to re-aggregate, defaults to the preferred one. Requires the -" + REDO_OPTION + " flag. Incompatible with -" + REDO_DIMENSION_OPTION + "." );
@@ -127,6 +137,8 @@ protected void processExperimentVectorsOptions( CommandLine commandLine ) throws
if ( ctaIdentifier != null && clcIdentifier != null ) {
throw new ParseException( "Only one of -cta or -clc can be set at a time." );
}
+ maskIdentifier = getOptionValue( commandLine, MASK_OPTION, requires( toBeUnset( NO_MASK_OPTION ) ) );
+ noMask = commandLine.hasOption( NO_MASK_OPTION );
factorName = commandLine.getOptionValue( FACTOR_OPTION );
allowUnmappedCharacteristics = commandLine.hasOption( ALLOW_UNMAPPED_CHARACTERISTICS_OPTION );
allowUnmappedFactorValues = commandLine.hasOption( ALLOW_UNMAPPED_FACTOR_VALUES_OPTION );
@@ -135,6 +147,7 @@ protected void processExperimentVectorsOptions( CommandLine commandLine ) throws
makePreferred = commandLine.hasOption( MAKE_PREFERRED_OPTION );
skipPostProcessing = commandLine.hasOption( SKIP_POST_PROCESSING_OPTION );
adjustLibrarySizes = commandLine.hasOption( ADJUST_LIBRARY_SIZES_OPTION );
+ includeMaskedCellsInLibrarySize = commandLine.hasOption( INCLUDE_MASKED_CELLS_IN_LIBRARY_SIZE_OPTION );
redo = commandLine.hasOption( REDO_OPTION );
redoQt = getOptionValue( commandLine, REDO_QT_OPTION,
requires( allOf( toBeSet( REDO_OPTION ), toBeUnset( REDO_DIMENSION_OPTION ) ) ) );
@@ -159,6 +172,17 @@ protected void processExpressionExperimentVectors( ExpressionExperiment expressi
.orElseThrow( () -> new IllegalStateException( finalExpressionExperiment + " does not have a preferred cell-type assignment for " + qt + "." ) );
}
+ CellLevelCharacteristics mask;
+ if ( noMask ) {
+ mask = null;
+ } else if ( maskIdentifier != null ) {
+ mask = entityLocator.locateCellLevelCharacteristics( expressionExperiment, qt, maskIdentifier );
+ } else {
+ log.info( "Auto-detecting the mask for " + expressionExperiment + " and " + qt + "..." );
+ mask = singleCellExpressionExperimentService.getCellLevelMask( expressionExperiment, qt )
+ .orElse( null );
+ }
+
ExpressionExperiment finalExpressionExperiment1 = expressionExperiment;
ExperimentalFactor cellTypeFactor;
@@ -175,8 +199,10 @@ protected void processExpressionExperimentVectors( ExpressionExperiment expressi
.build();
AggregateConfig config = AggregateConfig.builder()
+ .mask( mask )
.makePreferred( makePreferred )
.adjustLibrarySizes( adjustLibrarySizes )
+ .includeMaskedCellsInLibrarySize( includeMaskedCellsInLibrarySize )
.build();
QuantitationType newQt;
diff --git a/gemma-cli/src/main/java/ubic/gemma/apps/SingleCellDataDownloaderCli.java b/gemma-cli/src/main/java/ubic/gemma/apps/SingleCellDataDownloaderCli.java
index 3d3f37637b..3e777ebee7 100644
--- a/gemma-cli/src/main/java/ubic/gemma/apps/SingleCellDataDownloaderCli.java
+++ b/gemma-cli/src/main/java/ubic/gemma/apps/SingleCellDataDownloaderCli.java
@@ -385,6 +385,13 @@ protected void doWork() throws Exception {
additionalSupplementaryFiles.addAll( detector.getAdditionalSupplementaryFiles( series, sample ) );
}
if ( skipDownload ) {
+ // emulate the behavior of the MEX downloader, which is to raise an unsupported
+ // exception if MEX data is found at the series-level
+ if ( detectedDataType.equalsIgnoreCase( "MEX" ) ) {
+ if ( detector.hasSingleCellDataInSeries( series, SingleCellDataType.MEX ) ) {
+ throw new UnsupportedOperationException( "MEX files were found, but single-cell data is not supported at the series level." );
+ }
+ }
addSuccessObject( geoAccession, "Download was skipped." );
} else {
if ( dataType != null && supplementaryFile != null ) {
@@ -418,8 +425,14 @@ protected void doWork() throws Exception {
}
}
Collection sraAccessions = new ArrayList<>();
- if ( detector.hasSingleCellDataInSra( series, sraAccessions ) ) {
+ Collection otherDataInSra = new ArrayList<>();
+ if ( detector.hasSingleCellDataInSra( series, sraAccessions, otherDataInSra ) ) {
dataInSra = String.join( "|", sraAccessions );
+ } else if ( !otherDataInSra.isEmpty() ) {
+ dataInSra = String.join( "|", otherDataInSra );
+ comment = "Data found in SRA might not be single-cell data.";
+ } else {
+ log.warn( "No data found in SRA for " + geoAccession + "." );
}
} catch ( Exception e ) {
addErrorObject( geoAccession, e );
@@ -493,10 +506,14 @@ private CSVPrinter getSummaryOutputFilePrinter() throws IOException {
return null;
}
CSVFormat.Builder csvFormatBuilder = CSVFormat.TDF.builder();
- if ( !resume ) {
- csvFormatBuilder.setHeader( SUMMARY_HEADER );
+ if ( resume ) {
+ return csvFormatBuilder.get()
+ .print( Files.newBufferedWriter( summaryOutputFile, StandardOpenOption.APPEND ) );
+ } else {
+ return csvFormatBuilder.setHeader( SUMMARY_HEADER )
+ .get()
+ .print( Files.newBufferedWriter( summaryOutputFile ) );
}
- return csvFormatBuilder.get().print( Files.newBufferedWriter( summaryOutputFile, resume ? StandardOpenOption.APPEND : StandardOpenOption.CREATE ) );
}
@Nullable
diff --git a/gemma-cli/src/main/java/ubic/gemma/apps/SingleCellDataLoaderCli.java b/gemma-cli/src/main/java/ubic/gemma/apps/SingleCellDataLoaderCli.java
index 8c7d6cbc51..c5ac342cd9 100644
--- a/gemma-cli/src/main/java/ubic/gemma/apps/SingleCellDataLoaderCli.java
+++ b/gemma-cli/src/main/java/ubic/gemma/apps/SingleCellDataLoaderCli.java
@@ -18,13 +18,14 @@
import ubic.gemma.model.expression.bioAssay.BioAssay;
import ubic.gemma.model.expression.bioAssayData.CellLevelCharacteristics;
import ubic.gemma.model.expression.bioAssayData.CellTypeAssignment;
-import ubic.gemma.model.expression.experiment.BioAssaySet;
import ubic.gemma.model.expression.experiment.ExpressionExperiment;
import javax.annotation.Nullable;
import java.io.IOException;
import java.nio.file.Path;
+import java.util.Arrays;
import java.util.Collection;
+import java.util.List;
import java.util.Map;
import static ubic.gemma.cli.util.EntityOptionsUtils.addGenericPlatformOption;
@@ -54,9 +55,13 @@ public class SingleCellDataLoaderCli extends ExpressionExperimentManipulatingCLI
private static final String
CELL_TYPE_ASSIGNMENT_FILE_OPTION = "ctaFile",
CELL_TYPE_ASSIGNMENT_NAME_OPTION = "ctaName",
+ CELL_TYPE_ASSIGNMENT_DESCRIPTION_OPTION = "ctaDescription",
CELL_TYPE_ASSIGNMENT_PROTOCOL_NAME_OPTION = "ctaProtocol",
- PREFERRED_CELL_TYPE_ASSIGNMENT = "preferredCta",
+ REPLACE_CELL_TYPE_ASSIGNMENT_OPTION = "replaceCta",
+ PREFERRED_CELL_TYPE_ASSIGNMENT_OPTION = "preferredCta",
+ OTHER_CELL_LEVEL_CHARACTERISTICS_NAME = "clcName",
OTHER_CELL_LEVEL_CHARACTERISTICS_FILE = "clcFile",
+ REPLACE_OTHER_CELL_LEVEL_CHARACTERISTICS_OPTION = "replaceClc",
INFER_SAMPLES_FROM_CELL_IDS_OVERLAP_OPTION = "inferSamplesFromCellIdsOverlap",
IGNORE_UNMATCHED_CELL_IDS_OPTION = "ignoreUnmatchedCellIds";
@@ -129,10 +134,16 @@ enum Mode {
@Nullable
private String cellTypeAssignmentName;
@Nullable
+ private String cellTypeAssignmentDescription;
+ @Nullable
private String cellTypeAssignmentProtocolName;
private boolean preferredCellTypeAssignment;
+ private boolean replaceExistingCellTypeAssignments;
@Nullable
private Path otherCellLevelCharacteristicsFile;
+ @Nullable
+ private List otherCellLevelCharacteristicsNames;
+ private boolean replaceExistingOtherCellLevelCharacteristics;
private boolean inferSamplesFromCellIdsOverlap;
private boolean ignoreUnmatchedCellIds;
@Nullable
@@ -165,6 +176,11 @@ enum Mode {
private boolean mexAllowMappingDesignElementsToGeneSymbols;
private boolean mexUseDoublePrecision;
+ // options for streaming vectors when writing data to disk
+ private boolean useStreaming;
+ private int fetchSize;
+ private boolean useCursorFetchIfSupported;
+
@Nullable
@Override
public String getCommandName() {
@@ -188,7 +204,7 @@ protected void buildExperimentOptions( Options options ) {
.longOpt( "data-path" )
.hasArg()
.type( Path.class )
- .desc( "Load single-cell data from the given path instead of looking up the download directory. For AnnData and Seurat Disk, it is a file. For MEX it is a directory. Requires the -" + DATA_TYPE_OPTION + " option to be set." )
+ .desc( "Load single-cell data from the given path instead of looking up the download directory. For AnnData and Seurat Disk, it is a file. For MEX it is a directory. Requires the " + formatOption( options, DATA_TYPE_OPTION ) + " option to be set." )
.build() );
addGenericPlatformOption( options, PLATFORM_OPTION, "platform", "Target platform (must already exist in the system)" );
options.addOption( QT_NAME_OPTION, "quantitation-type-name", true, "Quantitation type to import (optional, use if more than one is present in data)" );
@@ -211,28 +227,32 @@ protected void buildExperimentOptions( Options options ) {
options.addOption( Option.builder( CELL_TYPE_ASSIGNMENT_FILE_OPTION )
.longOpt( "cell-type-assignment-file" )
.hasArg().type( Path.class )
- .desc( "Path to a cell type assignment file. If missing, cell type importing will be delegated to a specific loader. For AnnData, you must supply the -" + ANNDATA_CELL_TYPE_FACTOR_NAME_OPTION + " option." )
+ .desc( "Path to a cell type assignment file. If missing, cell type importing will be delegated to the loader implementation." )
.build() );
- options.addOption( CELL_TYPE_ASSIGNMENT_NAME_OPTION, "cell-type-assignment-name", true, "Name to use for the cell type assignment. This require the -" + CELL_TYPE_ASSIGNMENT_FILE_OPTION + " option to be set." );
+ options.addOption( CELL_TYPE_ASSIGNMENT_NAME_OPTION, "cell-type-assignment-name", true, "Name to use for the cell type assignment. The " + formatOption( options, CELL_TYPE_ASSIGNMENT_FILE_OPTION ) + " option must be set." );
+ options.addOption( CELL_TYPE_ASSIGNMENT_DESCRIPTION_OPTION, "cell-type-assignment-description", true, "Description to use for the cell type assignment. The " + formatOption( options, CELL_TYPE_ASSIGNMENT_FILE_OPTION ) + " option must be set." );
options.addOption( Option.builder( CELL_TYPE_ASSIGNMENT_PROTOCOL_NAME_OPTION )
.longOpt( "cell-type-assignment-protocol" ).hasArg()
.converter( EnumeratedByCommandStringConverter.of( CompletionUtils.generateCompleteCommand( CompletionType.PROTOCOL ) ) )
- .desc( "An identifier for a protocol describing the cell type assignment. This require the -" + CELL_TYPE_ASSIGNMENT_FILE_OPTION + " option to be set." )
+ .desc( "An identifier for a protocol describing the cell type assignment. This require the " + formatOption( options, CELL_TYPE_ASSIGNMENT_FILE_OPTION ) + " option to be set." )
.build() );
- options.addOption( PREFERRED_CELL_TYPE_ASSIGNMENT, "preferred-cell-type-assignment", false, "Make the cell type assignment the preferred one." );
+ options.addOption( REPLACE_CELL_TYPE_ASSIGNMENT_OPTION, "replace-cell-type-assignment", false, String.format( "Replace an existing cell type assignment with the same name. The %s and %s options must be set.", formatOption( options, CELL_TYPE_ASSIGNMENT_FILE_OPTION ), formatOption( options, CELL_TYPE_ASSIGNMENT_NAME_OPTION ) ) );
+ options.addOption( PREFERRED_CELL_TYPE_ASSIGNMENT_OPTION, "preferred-cell-type-assignment", false, "Make the cell type assignment the preferred one. The " + formatOption( options, CELL_TYPE_ASSIGNMENT_FILE_OPTION ) + " option must be set." );
options.addOption( Option.builder( OTHER_CELL_LEVEL_CHARACTERISTICS_FILE )
.longOpt( "cell-level-characteristics-file" )
.hasArg().type( Path.class )
.desc( "Path to a file containing additional cell-level characteristics to import." )
.build() );
+ options.addOption( Option.builder( OTHER_CELL_LEVEL_CHARACTERISTICS_NAME ).longOpt( "cell-level-characteristics-name" )
+ .hasArgs()
+ .valueSeparator( ',' )
+ .desc( "Name to use for the CLC. If the file contains more than one CLC, multiple names can be provided using ',' as a delimiter." )
+ .build() );
+ options.addOption( REPLACE_OTHER_CELL_LEVEL_CHARACTERISTICS_OPTION, "replace-cell-level-characteristics", false,
+ String.format( "Replace existing cell-level characteristics with the same names. The %s and %s options must be set.", formatOption( options, OTHER_CELL_LEVEL_CHARACTERISTICS_FILE ), formatOption( options, OTHER_CELL_LEVEL_CHARACTERISTICS_NAME ) ) );
options.addOption( INFER_SAMPLES_FROM_CELL_IDS_OVERLAP_OPTION, "infer-samples-from-cell-ids-overlap", false, "Infer sample names from cell IDs overlap." );
options.addOption( IGNORE_UNMATCHED_CELL_IDS_OPTION, "ignore-unmatched-cell-ids", false, "Ignore unmatched cell IDs when loading cell type assignments and other cell-level characteristics." );
- options.addOption( Option.builder( SEQUENCING_METADATA_FILE_OPTION )
- .longOpt( "sequencing-metadata-file" )
- .hasArg().type( Path.class )
- .desc( "Path to a file containing sequencing metadata to import. These values will override defaults set by -" + SEQUENCING_READ_LENGTH_OPTION + " and -" + SEQUENCING_IS_PAIRED_OPTION + "." )
- .build() );
options.addOption( Option.builder( SEQUENCING_READ_LENGTH_OPTION )
.longOpt( "sequencing-read-length" )
.hasArg().type( Integer.class )
@@ -243,11 +263,16 @@ protected void buildExperimentOptions( Options options ) {
"Indicate that the sequencing data is paired.",
SEQUENCING_IS_SINGLE_END_OPTION, "sequencing-is-single-end",
"Indicate that the sequencing data is single-end." );
+ options.addOption( Option.builder( SEQUENCING_METADATA_FILE_OPTION )
+ .longOpt( "sequencing-metadata-file" )
+ .hasArg().type( Path.class )
+ .desc( "Path to a file containing sequencing metadata to import. These values will override defaults set by " + formatOption( options, SEQUENCING_READ_LENGTH_OPTION ) + " and " + formatOption( options, SEQUENCING_IS_PAIRED_OPTION ) + "." )
+ .build() );
// for AnnData
options.addOption( ANNDATA_SAMPLE_FACTOR_NAME_OPTION, "anndata-sample-factor-name", true, "Name of the factor used for the sample name." );
- options.addOption( ANNDATA_CELL_TYPE_FACTOR_NAME_OPTION, "anndata-cell-type-factor-name", true, "Name of the factor used for the cell type, incompatible with -" + CELL_TYPE_ASSIGNMENT_FILE_OPTION + "." );
- options.addOption( ANNDATA_IGNORE_CELL_TYPE_FACTOR_OPTION, "anndata-ignore-cell-type-factor", false, "Do not attempt to load a cell type factor. Incompatible with " + ANNDATA_CELL_TYPE_FACTOR_NAME_OPTION + "." );
+ options.addOption( ANNDATA_CELL_TYPE_FACTOR_NAME_OPTION, "anndata-cell-type-factor-name", true, "Name of the factor used for the cell type, incompatible with " + formatOption( options, CELL_TYPE_ASSIGNMENT_FILE_OPTION ) + "." );
+ options.addOption( ANNDATA_IGNORE_CELL_TYPE_FACTOR_OPTION, "anndata-ignore-cell-type-factor", false, "Do not attempt to load a cell type factor. Incompatible with " + formatOption( options, ANNDATA_CELL_TYPE_FACTOR_NAME_OPTION ) + "." );
options.addOption( ANNDATA_UNKNOWN_CELL_TYPE_INDICATOR_OPTION, "anndata-unknown-cell-type-indicator", true, "Indicator used for missing cell type. Defaults to using the standard -1 categorical code." );
OptionsUtils.addAutoOption( options,
ANNDATA_USE_RAW_X_OPTION, "anndata-use-raw-x", "Use raw.X",
@@ -262,6 +287,10 @@ protected void buildExperimentOptions( Options options ) {
MEX_KEEP_EMPTY_CELLS_OPTION, "mex-keep-empty-cells", "Keep empty cells when loading MEX data." );
options.addOption( MEX_ALLOW_MAPPING_DESIGN_ELEMENTS_TO_GENE_SYMBOLS_OPTION, "mex-allow-mapping-design-elements-to-gene-symbols", false, "Allow mapping probe names to gene symbols when loading MEX data (i.e. the second column in features.tsv.gz)." );
options.addOption( MEX_USE_DOUBLE_PRECISION_OPTION, "mex-use-double-precision", false, "Use double precision (i.e. double and long) for storing vectors" );
+
+ options.addOption( "noStreaming", "no-streaming", false, "Use in-memory storage instead of streaming for retrieving and writing vectors." );
+ options.addOption( Option.builder( "fetchSize" ).longOpt( "fetch-size" ).hasArg( true ).type( Integer.class ).desc( "Fetch size to use when retrieving vectors, incompatible with " + formatOption( options, "noStreaming" ) + "." ).build() );
+ options.addOption( "noCursorFetch", "no-cursor-fetch", false, "Disable cursor fetching on the database server and produce results immediately. This is incompatible with " + formatOption( options, "noStreaming" ) + "." );
}
@Override
@@ -306,16 +335,31 @@ protected void processExperimentOptions( CommandLine commandLine ) throws ParseE
renamingFile = commandLine.getParsedOptionValue( RENAMING_FILE_OPTION );
ignoreSamplesLackingData = commandLine.hasOption( IGNORE_SAMPLES_LACKING_DATA_OPTION );
+ // CTAs
cellTypeAssignmentFile = commandLine.getParsedOptionValue( CELL_TYPE_ASSIGNMENT_FILE_OPTION );
cellTypeAssignmentName = getOptionValue( commandLine, CELL_TYPE_ASSIGNMENT_NAME_OPTION, requires( toBeSet( CELL_TYPE_ASSIGNMENT_FILE_OPTION ) ) );
+ cellTypeAssignmentDescription = getOptionValue( commandLine, CELL_TYPE_ASSIGNMENT_DESCRIPTION_OPTION, requires( toBeSet( CELL_TYPE_ASSIGNMENT_FILE_OPTION ) ) );
cellTypeAssignmentProtocolName = getOptionValue( commandLine, CELL_TYPE_ASSIGNMENT_PROTOCOL_NAME_OPTION, requires( toBeSet( CELL_TYPE_ASSIGNMENT_FILE_OPTION ) ) );
- preferredCellTypeAssignment = hasOption( commandLine, PREFERRED_CELL_TYPE_ASSIGNMENT, requires( toBeSet( CELL_TYPE_ASSIGNMENT_FILE_OPTION ) ) );
+ preferredCellTypeAssignment = hasOption( commandLine, PREFERRED_CELL_TYPE_ASSIGNMENT_OPTION, requires( toBeSet( CELL_TYPE_ASSIGNMENT_FILE_OPTION ) ) );
+ replaceExistingCellTypeAssignments = hasOption( commandLine, REPLACE_CELL_TYPE_ASSIGNMENT_OPTION, requires( allOf( toBeSet( CELL_TYPE_ASSIGNMENT_FILE_OPTION ), toBeSet( CELL_TYPE_ASSIGNMENT_NAME_OPTION ) ) ) );
+
+ // CLCs
+ if ( commandLine.hasOption( OTHER_CELL_LEVEL_CHARACTERISTICS_NAME ) ) {
+ otherCellLevelCharacteristicsNames = Arrays.asList( commandLine.getOptionValues( OTHER_CELL_LEVEL_CHARACTERISTICS_NAME ) );
+ } else {
+ otherCellLevelCharacteristicsNames = null;
+ }
otherCellLevelCharacteristicsFile = commandLine.getParsedOptionValue( OTHER_CELL_LEVEL_CHARACTERISTICS_FILE );
+ replaceExistingOtherCellLevelCharacteristics = hasOption( commandLine, REPLACE_OTHER_CELL_LEVEL_CHARACTERISTICS_OPTION,
+ requires( allOf( toBeSet( OTHER_CELL_LEVEL_CHARACTERISTICS_FILE ), toBeSet( OTHER_CELL_LEVEL_CHARACTERISTICS_NAME ) ) ) );
+
+ // applies to both cell type assignments and other cell-level characteristics
inferSamplesFromCellIdsOverlap = hasOption( commandLine, INFER_SAMPLES_FROM_CELL_IDS_OVERLAP_OPTION,
requires( anyOf( toBeSet( CELL_TYPE_ASSIGNMENT_FILE_OPTION ), toBeSet( OTHER_CELL_LEVEL_CHARACTERISTICS_FILE ) ) ) );
ignoreUnmatchedCellIds = hasOption( commandLine, IGNORE_UNMATCHED_CELL_IDS_OPTION,
requires( anyOf( toBeSet( CELL_TYPE_ASSIGNMENT_FILE_OPTION ), toBeSet( OTHER_CELL_LEVEL_CHARACTERISTICS_FILE ) ) ) );
+ // sequencing metadata
sequencingMetadataFile = commandLine.getParsedOptionValue( SEQUENCING_METADATA_FILE_OPTION );
sequencingReadLength = commandLine.getParsedOptionValue( SEQUENCING_READ_LENGTH_OPTION );
sequencingIsPaired = OptionsUtils.getAutoOptionValue( commandLine, SEQUENCING_IS_PAIRED_OPTION, SEQUENCING_IS_SINGLE_END_OPTION );
@@ -338,6 +382,16 @@ protected void processExperimentOptions( CommandLine commandLine ) throws ParseE
mexAllowMappingDesignElementsToGeneSymbols = commandLine.hasOption( MEX_ALLOW_MAPPING_DESIGN_ELEMENTS_TO_GENE_SYMBOLS_OPTION );
mexUseDoublePrecision = commandLine.hasOption( MEX_USE_DOUBLE_PRECISION_OPTION );
}
+
+ if ( commandLine.hasOption( "noStreaming" ) && commandLine.hasOption( "fetchSize" ) ) {
+ throw new ParseException( "Cannot use -noStreaming/--no-streaming and -fetchSize/--fetch-size at the same time." );
+ }
+ if ( commandLine.hasOption( "noStreaming" ) && commandLine.hasOption( "noCursorFetch" ) ) {
+ throw new ParseException( "Cannot use -noStreaming/--no-streaming and -noCursorFetch/--no-cursor-fetch at the same time." );
+ }
+ this.useStreaming = !commandLine.hasOption( "noStreaming" );
+ this.fetchSize = commandLine.getParsedOptionValue( "fetchSize", 30 );
+ this.useCursorFetchIfSupported = !commandLine.hasOption( "noCursorFetch" );
}
private void rejectInvalidOptionsForDataType( CommandLine commandLine, @Nullable SingleCellDataType dataType ) throws ParseException {
@@ -357,11 +411,11 @@ private void rejectInvalidOptionsForDataType( CommandLine commandLine, @Nullable
}
@Override
- protected void processBioAssaySets( Collection expressionExperiments ) {
+ protected void processExpressionExperiments( Collection expressionExperiments ) {
if ( dataPath != null || qtName != null || cellTypeAssignmentFile != null || otherCellLevelCharacteristicsFile != null || sequencingMetadataFile != null ) {
throw new IllegalArgumentException( "Cannot specify a data path, quantitation type name, cell type assignment file, cell-level characteristics file or sequencing metadata file when processing more than one experiment." );
}
- super.processBioAssaySets( expressionExperiments );
+ super.processExpressionExperiments( expressionExperiments );
}
@Override
@@ -404,7 +458,7 @@ protected void processExpressionExperiment( ExpressionExperiment ee ) {
}
if ( qt.getIsSingleCellPreferred() ) {
log.info( "Generating MEX data files for preferred QT: " + qt + "..." );
- try ( LockedPath lockedPath = expressionDataFileService.writeOrLocateMexSingleCellExpressionData( ee, qt, -1, true ) ) {
+ try ( LockedPath lockedPath = expressionDataFileService.writeOrLocateMexSingleCellExpressionData( ee, qt, useStreaming ? fetchSize : -1, useCursorFetchIfSupported, true ) ) {
log.info( "Generated MEX data file for " + qt + " at " + lockedPath.getPath() + "." );
} catch ( IOException e ) {
throw new RuntimeException( "Failed to generate MEX data files for " + qt + ".", e );
@@ -482,18 +536,18 @@ private SingleCellDataLoaderConfig getConfigForDataType( @Nullable SingleCellDat
if ( cellTypeAssignmentFile != null ) {
configBuilder
.cellTypeAssignmentFile( cellTypeAssignmentFile )
+ .cellTypeAssignmentName( cellTypeAssignmentName )
+ .cellTypeAssignmentDescription( cellTypeAssignmentDescription )
+ .cellTypeAssignmentProtocol( cellTypeAssignmentProtocolName != null ? entityLocator.locateProtocol( cellTypeAssignmentProtocolName ) : null )
+ .replaceExistingCellTypeAssignment( replaceExistingCellTypeAssignments )
.markSingleCellTypeAssignmentAsPreferred( preferredCellTypeAssignment );
- if ( cellTypeAssignmentName != null ) {
- configBuilder
- .cellTypeAssignmentName( cellTypeAssignmentName );
- }
- if ( cellTypeAssignmentProtocolName != null ) {
- configBuilder
- .cellTypeAssignmentProtocol( entityLocator.locateProtocol( cellTypeAssignmentProtocolName ) );
- }
}
+
if ( otherCellLevelCharacteristicsFile != null ) {
- configBuilder.otherCellLevelCharacteristicsFile( otherCellLevelCharacteristicsFile );
+ configBuilder
+ .otherCellLevelCharacteristicsFile( otherCellLevelCharacteristicsFile )
+ .otherCellLevelCharacteristicsNames( otherCellLevelCharacteristicsNames )
+ .replaceExistingOtherCellLevelCharacteristics( replaceExistingOtherCellLevelCharacteristics );
}
// infer only on-demand
configBuilder.inferSamplesFromCellIdsOverlap( inferSamplesFromCellIdsOverlap );
diff --git a/gemma-cli/src/main/java/ubic/gemma/apps/SingleCellDataWriterCli.java b/gemma-cli/src/main/java/ubic/gemma/apps/SingleCellDataWriterCli.java
index f4b117bdfc..26248b1f4a 100644
--- a/gemma-cli/src/main/java/ubic/gemma/apps/SingleCellDataWriterCli.java
+++ b/gemma-cli/src/main/java/ubic/gemma/apps/SingleCellDataWriterCli.java
@@ -14,7 +14,6 @@
import ubic.gemma.core.datastructure.matrix.ExpressionDataIntegerMatrix;
import ubic.gemma.core.datastructure.matrix.io.MatrixWriter;
import ubic.gemma.core.util.BuildInfo;
-import ubic.gemma.core.util.locking.FileLockManager;
import ubic.gemma.core.util.locking.LockedPath;
import ubic.gemma.model.common.quantitationtype.QuantitationType;
import ubic.gemma.model.common.quantitationtype.ScaleType;
@@ -54,6 +53,7 @@ public SingleCellDataWriterCli() {
enum MatrixFormat {
TABULAR,
+ CELL_BROWSER,
MEX,
CELL_IDS
}
@@ -67,18 +67,19 @@ enum MatrixFormat {
@Autowired
private ExpressionDataFileService expressionDataFileService;
- @Autowired
- private FileLockManager fileLockManager;
-
@Autowired
private BuildInfo buildInfo;
private MatrixFormat format;
@Nullable
private ScaleType scaleType;
+ private boolean useBioAssayIds;
+ private boolean useRawColumnNames;
private boolean useEnsemblIds;
private boolean useStreaming;
private int fetchSize;
+ private boolean useCursorFetchIfSupported;
+ private boolean autoFlush;
private ExpressionDataFileResult result;
@Nullable
@@ -110,11 +111,15 @@ public String getShortDesc() {
protected void buildExperimentVectorsOptions( Options options ) {
addEnumOption( options, "format", "format", "Format to write the matrix for (defaults to tabular)", MatrixFormat.class );
addEnumOption( options, "scaleType", "scale-type", "Scale type to use when generating data to disk. This is incompatible with -standardLocation/--standard-location.", ScaleType.class );
+ options.addOption( "useBioAssayIds", "use-bioassay-ids", false, "Use BioAssay IDs instead of their names (only for CELL_BROWSER output)." );
+ options.addOption( "useRawColumnNames", "use-raw-column-names", false, "Use raw column names instead of R-friendly ones (only for CELL_BROWSER output)." );
options.addOption( "useEnsemblIds", "use-ensembl-ids", false, "Use Ensembl IDs instead of official gene symbols (only for MEX output). This is incompatible with -standardLocation/--standard-location." );
- options.addOption( "noStreaming", "no-streaming", false, "Use in-memory storage instead streaming for retrieving and writing vectors (defaults to false)" );
+ options.addOption( "noStreaming", "no-streaming", false, "Use in-memory storage instead of streaming for retrieving and writing vectors." );
options.addOption( Option.builder( "fetchSize" ).longOpt( "fetch-size" ).hasArg( true ).type( Integer.class ).desc( "Fetch size to use when retrieving vectors, incompatible with -noStreaming/--no-streaming." ).build() );
+ options.addOption( "noCursorFetch", "no-cursor-fetch", false, "Disable cursor fetching on the database server and produce results immediately. This is incompatible with -noStreaming." );
+ options.addOption( "noAutoFlush", "no-auto-flush", false, "Do not flush the output stream after writing each vector." );
- addExpressionDataFileOptions( options, "single-cell expression data" );
+ addExpressionDataFileOptions( options, "single-cell expression data", true );
// slicing individual samples
addSingleExperimentOption( options, Option.builder( "samples" )
@@ -136,21 +141,43 @@ protected void buildExperimentVectorsOptions( Options options ) {
@Override
protected void processExperimentVectorsOptions( CommandLine commandLine ) throws ParseException {
- this.useEnsemblIds = commandLine.hasOption( "useEnsemblIds" );
if ( commandLine.hasOption( "noStreaming" ) && commandLine.hasOption( "fetchSize" ) ) {
throw new ParseException( "Cannot use -noStreaming/--no-streaming and -fetchSize/--fetch-size at the same time." );
}
+ if ( commandLine.hasOption( "noStreaming" ) && commandLine.hasOption( "noCursorFetch" ) ) {
+ throw new ParseException( "Cannot use -noStreaming/--no-streaming and -noCursorFetch/--no-cursor-fetch at the same time." );
+ }
this.useStreaming = !commandLine.hasOption( "noStreaming" );
this.fetchSize = commandLine.getParsedOptionValue( "fetchSize", 30 );
+ this.useCursorFetchIfSupported = !commandLine.hasOption( "noCursorFetch" );
+ this.autoFlush = !commandLine.hasOption( "noAutoFlush" );
if ( commandLine.hasOption( "format" ) ) {
this.format = getEnumOptionValue( commandLine, "format" );
} else {
this.format = MatrixFormat.TABULAR;
}
+ if ( commandLine.hasOption( "useEnsemblIds" ) ) {
+ if ( this.format != MatrixFormat.MEX ) {
+ throw new ParseException( "Cannot use -useEnsemblIds with other formats than MEX." );
+ }
+ this.useEnsemblIds = true;
+ }
+ if ( commandLine.hasOption( "useBioAssayIds" ) ) {
+ if ( this.format != MatrixFormat.CELL_BROWSER ) {
+ throw new ParseException( "Cannot use -useBioAssayIds with other formats than CELL_BROWSER." );
+ }
+ this.useBioAssayIds = commandLine.hasOption( "useBioAssayIds" );
+ }
+ if ( commandLine.hasOption( "useRawColumnNames" ) ) {
+ if ( this.format != MatrixFormat.CELL_BROWSER ) {
+ throw new ParseException( "Cannot use -useRawColumnNames with other formats than CELL_BROWSER." );
+ }
+ this.useRawColumnNames = commandLine.hasOption( "useRawColumnNames" );
+ }
if ( commandLine.hasOption( "scaleType" ) ) {
this.scaleType = getEnumOptionValue( commandLine, "scaleType" );
}
- this.result = getExpressionDataFileResult( commandLine );
+ this.result = getExpressionDataFileResult( commandLine, true );
if ( this.result.isStandardLocation() && scaleType != null ) {
throw new ParseException( "Cannot use -standardLocation/--standard-location and -scaleType/--scale-type at the same time." );
}
@@ -270,12 +297,12 @@ private int aggregate( ExpressionExperiment ee, QuantitationType qt, @Nullable L
long numberOfVectors = singleCellExpressionExperimentService.getNumberOfSingleCellDataVectors( ee, qt );
Stream scVecs;
if ( assays != null ) {
- scVecs = singleCellExpressionExperimentService.streamSingleCellDataVectors( ee, assays, qt, fetchSize, true, config );
+ scVecs = singleCellExpressionExperimentService.streamSingleCellDataVectors( ee, assays, qt, fetchSize, useCursorFetchIfSupported, true, config );
} else {
- scVecs = singleCellExpressionExperimentService.streamSingleCellDataVectors( ee, qt, fetchSize, true, config );
+ scVecs = singleCellExpressionExperimentService.streamSingleCellDataVectors( ee, qt, fetchSize, useCursorFetchIfSupported, true, config );
}
vecs = scVecs
- .peek( createStreamMonitor( getClass().getName(), 100, numberOfVectors ) )
+ .peek( createStreamMonitor( ee, qt, getClass().getName(), 100, numberOfVectors ) )
.map( createAggregator( aggregationMethod, cellLevelCharacteristics, aggregateUnknownCharacteristics ) )
.collect( Collectors.toList() );
} else {
@@ -295,7 +322,7 @@ private int aggregate( ExpressionExperiment ee, QuantitationType qt, @Nullable L
matrix = new ExpressionDataDoubleMatrix( vecs );
}
MatrixWriter matrixWriter = new MatrixWriter( entityUrlBuilder, buildInfo );
- matrixWriter.setAutoFlush( true );
+ matrixWriter.setAutoFlush( autoFlush );
matrixWriter.setScaleType( scaleType );
return matrixWriter.write( matrix, writer );
}
@@ -313,12 +340,12 @@ private int slice( ExpressionExperiment ee, QuantitationType qt ) throws IOExcep
} else if ( result.isStandardOutput() ) {
fileName = null;
try ( Writer writer = new OutputStreamWriter( getCliContext().getOutputStream(), StandardCharsets.UTF_8 ) ) {
- return expressionDataFileService.writeTabularSingleCellExpressionData( ee, assays, qt, scaleType, useStreaming ? fetchSize : -1, writer, true );
+ return expressionDataFileService.writeTabularSingleCellExpressionData( ee, assays, qt, scaleType, useStreaming ? fetchSize : -1, useCursorFetchIfSupported, writer, autoFlush );
}
} else {
fileName = result.getOutputFile( getDataOutputFilename( ee, assays, qt, ExpressionDataFileUtils.TABULAR_SC_DATA_SUFFIX ) );
try ( Writer writer = Files.newBufferedWriter( fileName ) ) {
- return expressionDataFileService.writeTabularSingleCellExpressionData( ee, assays, qt, scaleType, useStreaming ? fetchSize : -1, writer, true );
+ return expressionDataFileService.writeTabularSingleCellExpressionData( ee, assays, qt, scaleType, useStreaming ? fetchSize : -1, useCursorFetchIfSupported, writer, autoFlush );
}
}
case MEX:
@@ -331,7 +358,7 @@ private int slice( ExpressionExperiment ee, QuantitationType qt ) throws IOExcep
} else {
fileName = result.getOutputFile( getDataOutputFilename( ee, assays, qt, ExpressionDataFileUtils.MEX_SC_DATA_SUFFIX ) );
assert fileName != null;
- return expressionDataFileService.writeMexSingleCellExpressionData( ee, assays, qt, scaleType, useEnsemblIds, useStreaming ? fetchSize : -1, isForce(), fileName );
+ return expressionDataFileService.writeMexSingleCellExpressionData( ee, assays, qt, scaleType, useEnsemblIds, useStreaming ? fetchSize : -1, useCursorFetchIfSupported, isForce(), fileName, autoFlush );
}
default:
throw new IllegalArgumentException( "Unsupported format: " + format );
@@ -342,24 +369,38 @@ private int raw( ExpressionExperiment ee, QuantitationType qt ) throws IOExcepti
switch ( format ) {
case TABULAR:
if ( result.isStandardLocation() ) {
- try ( LockedPath path = expressionDataFileService.writeOrLocateTabularSingleCellExpressionData( ee, qt, useStreaming ? fetchSize : -1, isForce() ) ) {
+ try ( LockedPath path = expressionDataFileService.writeOrLocateTabularSingleCellExpressionData( ee, qt, useStreaming ? fetchSize : -1, useCursorFetchIfSupported, isForce() ) ) {
fileName = path.getPath();
return 0;
}
} else if ( result.isStandardOutput() ) {
fileName = null;
try ( Writer writer = new OutputStreamWriter( getCliContext().getOutputStream(), StandardCharsets.UTF_8 ) ) {
- return expressionDataFileService.writeTabularSingleCellExpressionData( ee, qt, scaleType, useStreaming ? fetchSize : -1, writer, true );
+ return expressionDataFileService.writeTabularSingleCellExpressionData( ee, qt, scaleType, useStreaming ? fetchSize : -1, useCursorFetchIfSupported, writer, autoFlush );
}
} else {
fileName = result.getOutputFile( getDataOutputFilename( ee, qt, ExpressionDataFileUtils.TABULAR_SC_DATA_SUFFIX ) );
try ( Writer writer = new OutputStreamWriter( openOutputFile( fileName ), StandardCharsets.UTF_8 ) ) {
- return expressionDataFileService.writeTabularSingleCellExpressionData( ee, qt, scaleType, useStreaming ? fetchSize : -1, writer, true );
+ return expressionDataFileService.writeTabularSingleCellExpressionData( ee, qt, scaleType, useStreaming ? fetchSize : -1, useCursorFetchIfSupported, writer, autoFlush );
+ }
+ }
+ case CELL_BROWSER:
+ if ( result.isStandardLocation() ) {
+ throw new UnsupportedOperationException( "Writing Cell Browser-compatible data to the standard location is not supported." );
+ } else if ( result.isStandardOutput() ) {
+ fileName = null;
+ try ( Writer writer = new OutputStreamWriter( getCliContext().getOutputStream(), StandardCharsets.UTF_8 ) ) {
+ return expressionDataFileService.writeCellBrowserSingleCellExpressionData( ee, qt, scaleType, useBioAssayIds, useRawColumnNames, useStreaming ? fetchSize : -1, useCursorFetchIfSupported, writer, autoFlush );
+ }
+ } else {
+ fileName = result.getOutputFile( getDataOutputFilename( ee, qt, ExpressionDataFileUtils.CELL_BROWSER_SC_DATA_SUFFIX ) );
+ try ( Writer writer = new OutputStreamWriter( openOutputFile( fileName ), StandardCharsets.UTF_8 ) ) {
+ return expressionDataFileService.writeCellBrowserSingleCellExpressionData( ee, qt, scaleType, useBioAssayIds, useRawColumnNames, useStreaming ? fetchSize : -1, useCursorFetchIfSupported, writer, autoFlush );
}
}
case MEX:
if ( result.isStandardLocation() ) {
- try ( LockedPath path = expressionDataFileService.writeOrLocateMexSingleCellExpressionData( ee, qt, useStreaming ? fetchSize : -1, isForce() ) ) {
+ try ( LockedPath path = expressionDataFileService.writeOrLocateMexSingleCellExpressionData( ee, qt, useStreaming ? fetchSize : -1, useCursorFetchIfSupported, isForce() ) ) {
fileName = path.getPath();
return 0;
}
@@ -370,7 +411,7 @@ private int raw( ExpressionExperiment ee, QuantitationType qt ) throws IOExcepti
} else {
fileName = result.getOutputFile( getDataOutputFilename( ee, qt, ExpressionDataFileUtils.MEX_SC_DATA_SUFFIX ) );
assert fileName != null;
- return expressionDataFileService.writeMexSingleCellExpressionData( ee, qt, scaleType, useEnsemblIds, useStreaming ? fetchSize : -1, isForce(), fileName );
+ return expressionDataFileService.writeMexSingleCellExpressionData( ee, qt, scaleType, useEnsemblIds, useStreaming ? fetchSize : -1, useCursorFetchIfSupported, isForce(), fileName, autoFlush );
}
case CELL_IDS:
if ( result.isStandardLocation() ) {
@@ -387,10 +428,10 @@ private int raw( ExpressionExperiment ee, QuantitationType qt ) throws IOExcepti
}
} else {
fileName = result.getOutputFile( getDataOutputFilename( ee, qt, ".cellIds.txt.gz" ) );
- try ( PrintStream printStream = new PrintStream( openOutputFile( fileName ), true, StandardCharsets.UTF_8.name() );
+ try ( PrintStream printStream = new PrintStream( openOutputFile( fileName ), autoFlush, StandardCharsets.UTF_8.name() );
Stream stream = singleCellExpressionExperimentService.streamCellIds( ee, qt, true ) ) {
if ( stream != null ) {
- stream.forEach( getCliContext().getOutputStream()::println );
+ stream.forEach( printStream::println );
return 0;
} else {
throw new RuntimeException( "Could not find cell IDs for " + qt + "." );
@@ -404,9 +445,9 @@ private int raw( ExpressionExperiment ee, QuantitationType qt ) throws IOExcepti
private OutputStream openOutputFile( Path fileName ) throws IOException {
if ( fileName.toString().endsWith( ".gz" ) ) {
- return new GZIPOutputStream( fileLockManager.newOutputStream( fileName ) );
+ return new GZIPOutputStream( Files.newOutputStream( fileName ) );
} else {
- return fileLockManager.newOutputStream( fileName );
+ return Files.newOutputStream( fileName );
}
}
}
diff --git a/gemma-cli/src/main/java/ubic/gemma/apps/SplitExperimentCli.java b/gemma-cli/src/main/java/ubic/gemma/apps/SplitExperimentCli.java
index 7f3ce73aa7..e3769d411e 100644
--- a/gemma-cli/src/main/java/ubic/gemma/apps/SplitExperimentCli.java
+++ b/gemma-cli/src/main/java/ubic/gemma/apps/SplitExperimentCli.java
@@ -25,10 +25,7 @@
import org.apache.commons.cli.ParseException;
import org.springframework.beans.factory.annotation.Autowired;
import ubic.gemma.core.analysis.preprocess.SplitExperimentService;
-import ubic.gemma.model.expression.experiment.ExperimentalDesignUtils;
-import ubic.gemma.model.expression.experiment.ExperimentalFactor;
-import ubic.gemma.model.expression.experiment.ExperimentalFactorValueObject;
-import ubic.gemma.model.expression.experiment.ExpressionExperiment;
+import ubic.gemma.model.expression.experiment.*;
import ubic.gemma.persistence.service.expression.experiment.ExperimentalFactorService;
import java.util.Collection;
@@ -112,7 +109,7 @@ private ExperimentalFactor guessFactor( ExpressionExperiment ee ) {
ExperimentalFactorValueObject fvo = new ExperimentalFactorValueObject( experimentalFactor );
// do not attempt to switch on 'batch'
- if ( ExperimentalDesignUtils.isBatchFactor( experimentalFactor ) ) {
+ if ( ExperimentFactorUtils.isBatchFactor( experimentalFactor ) ) {
continue;
}
@@ -137,7 +134,7 @@ private ExperimentalFactor guessFactor( ExpressionExperiment ee ) {
throw new IllegalArgumentException( "Factor with id=" + factorId + " does not belong to " + ee );
}
- if ( ExperimentalDesignUtils.isBatchFactor( factor ) ) {
+ if ( ExperimentFactorUtils.isBatchFactor( factor ) ) {
throw new IllegalArgumentException( "Selected factor looks like batch, split not allowed, choose another factor instead" );
}
diff --git a/gemma-cli/src/main/java/ubic/gemma/cli/completion/BashCompletionGenerator.java b/gemma-cli/src/main/java/ubic/gemma/cli/completion/BashCompletionGenerator.java
index 8eac9c81ff..8ed39fe2ab 100644
--- a/gemma-cli/src/main/java/ubic/gemma/cli/completion/BashCompletionGenerator.java
+++ b/gemma-cli/src/main/java/ubic/gemma/cli/completion/BashCompletionGenerator.java
@@ -8,7 +8,7 @@
import java.util.*;
import java.util.stream.Collectors;
-import static ubic.gemma.cli.util.ShellUtils.quoteIfNecessary;
+import static ubic.gemma.core.util.ShellUtils.quoteIfNecessary;
public class BashCompletionGenerator extends AbstractCompletionGenerator {
diff --git a/gemma-cli/src/main/java/ubic/gemma/cli/completion/FishCompletionGenerator.java b/gemma-cli/src/main/java/ubic/gemma/cli/completion/FishCompletionGenerator.java
index 50c8f558cc..99f0b4d9d9 100644
--- a/gemma-cli/src/main/java/ubic/gemma/cli/completion/FishCompletionGenerator.java
+++ b/gemma-cli/src/main/java/ubic/gemma/cli/completion/FishCompletionGenerator.java
@@ -18,7 +18,7 @@
import java.util.Set;
import java.util.stream.Collectors;
-import static ubic.gemma.cli.util.ShellUtils.quoteIfNecessary;
+import static ubic.gemma.core.util.ShellUtils.quoteIfNecessary;
/**
* Generates fish completion script.
diff --git a/gemma-cli/src/main/java/ubic/gemma/cli/main/GemmaCLI.java b/gemma-cli/src/main/java/ubic/gemma/cli/main/GemmaCLI.java
index 7639d49117..f6738e3062 100644
--- a/gemma-cli/src/main/java/ubic/gemma/cli/main/GemmaCLI.java
+++ b/gemma-cli/src/main/java/ubic/gemma/cli/main/GemmaCLI.java
@@ -33,6 +33,7 @@
import ubic.gemma.cli.util.*;
import ubic.gemma.core.context.SpringContextUtils;
import ubic.gemma.core.util.BuildInfo;
+import ubic.gemma.core.util.ShellUtils;
import ubic.gemma.core.util.concurrent.ThreadUtils;
import javax.annotation.Nullable;
diff --git a/gemma-cli/src/main/java/ubic/gemma/cli/util/EntityLocatorImpl.java b/gemma-cli/src/main/java/ubic/gemma/cli/util/EntityLocatorImpl.java
index 6add859d43..c99f4f41e6 100644
--- a/gemma-cli/src/main/java/ubic/gemma/cli/util/EntityLocatorImpl.java
+++ b/gemma-cli/src/main/java/ubic/gemma/cli/util/EntityLocatorImpl.java
@@ -5,6 +5,7 @@
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Component;
import org.springframework.util.Assert;
+import ubic.gemma.core.util.ShellUtils;
import ubic.gemma.model.analysis.expression.diff.DifferentialExpressionAnalysis;
import ubic.gemma.model.common.Describable;
import ubic.gemma.model.common.Identifiable;
diff --git a/gemma-cli/src/main/java/ubic/gemma/cli/util/OptionsUtils.java b/gemma-cli/src/main/java/ubic/gemma/cli/util/OptionsUtils.java
index 3ffda7df77..bb38ba6ad5 100644
--- a/gemma-cli/src/main/java/ubic/gemma/cli/util/OptionsUtils.java
+++ b/gemma-cli/src/main/java/ubic/gemma/cli/util/OptionsUtils.java
@@ -17,6 +17,7 @@
import java.util.function.Predicate;
import java.util.stream.Collectors;
+import static java.util.Objects.requireNonNull;
import static org.apache.commons.lang3.StringUtils.appendIfMissing;
public class OptionsUtils {
@@ -110,15 +111,23 @@ public Date apply( String string ) throws ParseException {
* Use {@link #getAutoOptionValue(CommandLine, String, String)} to retrieve its value later on.
*/
public static void addAutoOption( Options options, String optionName, String longOptionName, String description, String noOptionName, String longNoOptionName, String noDescription ) {
- options.addOption( optionName, longOptionName, false, description + " This option is incompatible with -" + noOptionName + "/--" + longNoOptionName + ". Default is to auto-detect." );
- options.addOption( noOptionName, longNoOptionName, false, noDescription + " This option is incompatible with -" + optionName + "/--" + longOptionName + ". Default is to auto-detect." );
+ options.addOption( optionName, longOptionName, false, description + " This option is incompatible with " + formatOption( noOptionName, longNoOptionName ) + ". Default is to auto-detect." );
+ options.addOption( noOptionName, longNoOptionName, false, noDescription + " This option is incompatible with " + formatOption( optionName, longOptionName ) + ". Default is to auto-detect." );
+ }
+
+ /**
+ * Format an option for display.
+ */
+ public static String formatOption( Options options, String optionName ) {
+ return formatOption( requireNonNull( options.getOption( optionName ),
+ () -> "No option with name " + optionName + " exists. Is it possible it hasn't been declared yet?" ) );
}
@Nullable
public static Boolean getAutoOptionValue( CommandLine commandLine, String optionName, String noOptionName ) throws org.apache.commons.cli.ParseException {
if ( commandLine.hasOption( optionName ) && commandLine.hasOption( noOptionName ) ) {
- throw new org.apache.commons.cli.ParseException( String.format( "Cannot specify -%s and -%s at the same time.",
- optionName, noOptionName ) );
+ throw new org.apache.commons.cli.ParseException( String.format( "Cannot specify %s and %s at the same time.",
+ formatOption( commandLine, optionName ), formatOption( commandLine, noOptionName ) ) );
}
if ( commandLine.hasOption( optionName ) ) {
return true;
@@ -207,8 +216,7 @@ public static String[] getOptionValues( CommandLine commandLine, String
* @see #getOptionValue(CommandLine, String, Predicate)
*/
@Nullable
- public static T
- getParsedOptionValue( CommandLine commandLine, String optionName, Predicate predicate ) throws
+ public static T getParsedOptionValue( CommandLine commandLine, String optionName, Predicate predicate ) throws
org.apache.commons.cli.ParseException {
if ( hasOption( commandLine, optionName, predicate ) ) {
return commandLine.getParsedOptionValue( optionName );
@@ -303,11 +311,18 @@ private static String formatOption( CommandLine cl, String optionName ) {
return Arrays.stream( cl.getOptions() )
.filter( o -> o.getOpt().equals( optionName ) )
.findFirst()
- .map( Option::getLongOpt )
- .map( longOpt -> "-" + optionName + "/" + "--" + longOpt )
+ .map( OptionsUtils::formatOption )
.orElse( "-" + optionName );
}
+ private static String formatOption( Option opt ) {
+ return formatOption( opt.getOpt(), opt.getLongOpt() );
+ }
+
+ private static String formatOption( String opt, @Nullable String longOpt ) {
+ return "-" + opt + ( longOpt != null ? ",--" + longOpt : "" );
+ }
+
private static String formatPredicates( Predicate[] predicates, CommandLine cl, String w,
int depth ) {
String s = Arrays.stream( predicates )
diff --git a/gemma-cli/src/test/java/ubic/gemma/apps/ExpressionExperimentManipulatingCLITest.java b/gemma-cli/src/test/java/ubic/gemma/apps/ExpressionExperimentManipulatingCLITest.java
index 88762bc4c0..f314d328ca 100644
--- a/gemma-cli/src/test/java/ubic/gemma/apps/ExpressionExperimentManipulatingCLITest.java
+++ b/gemma-cli/src/test/java/ubic/gemma/apps/ExpressionExperimentManipulatingCLITest.java
@@ -133,7 +133,7 @@ public void testGetExperimentDataFileResult() {
ExpressionExperimentManipulatingCLI cli = new ExpressionExperimentManipulatingCLI() {
@Override
protected void buildExperimentOptions( Options options ) {
- addExpressionDataFileOptions( options, "test data" );
+ addExpressionDataFileOptions( options, "test data", true );
}
};
assertThat( cli )
diff --git a/gemma-cli/src/test/java/ubic/gemma/cli/util/test/CliAssert.java b/gemma-cli/src/test/java/ubic/gemma/cli/util/test/CliAssert.java
index ffb0baa463..c6221bdb07 100644
--- a/gemma-cli/src/test/java/ubic/gemma/cli/util/test/CliAssert.java
+++ b/gemma-cli/src/test/java/ubic/gemma/cli/util/test/CliAssert.java
@@ -6,8 +6,8 @@
import org.assertj.core.api.ByteArrayAssert;
import org.assertj.core.description.Description;
import ubic.gemma.cli.util.CLI;
-import ubic.gemma.cli.util.ShellUtils;
import ubic.gemma.cli.util.TestCliContext;
+import ubic.gemma.core.util.ShellUtils;
import java.io.*;
import java.util.Arrays;
diff --git a/gemma-cli/update-completion-scripts.sh b/gemma-cli/update-completion-scripts.sh
index 1c023a035d..4bd560cac0 100755
--- a/gemma-cli/update-completion-scripts.sh
+++ b/gemma-cli/update-completion-scripts.sh
@@ -11,8 +11,7 @@ fi
project_dir=$(dirname "$0")
gemma_cli_bin="$project_dir"/target/appassembler/bin/gemma-cli
-# TODO: remove gemma-cli-sc once merged in development
-gemma_cli_aliases="gemma-cli gemma-cli-staging gemma-cli-sc"
+gemma_cli_aliases="gemma-cli gemma-cli-staging"
if [ ! -f "$gemma_cli_bin" ]; then
echo "The $gemma_cli_bin executable does not exist. Building..."
diff --git a/gemma-core/src/main/java/ubic/gemma/core/analysis/expression/coexpression/links/LinkAnalysisServiceImpl.java b/gemma-core/src/main/java/ubic/gemma/core/analysis/expression/coexpression/links/LinkAnalysisServiceImpl.java
index 8b90feacc4..96ade8eaa4 100644
--- a/gemma-core/src/main/java/ubic/gemma/core/analysis/expression/coexpression/links/LinkAnalysisServiceImpl.java
+++ b/gemma-core/src/main/java/ubic/gemma/core/analysis/expression/coexpression/links/LinkAnalysisServiceImpl.java
@@ -185,8 +185,12 @@ private void addAnalysisObj( ExpressionExperiment ee, FilterConfig filterConfig,
analysis.setExperimentAnalyzed( ee );
analysis.setName( ee.getShortName() + " link analysis" );
- analysis.getProtocol().setDescription(
- analysis.getProtocol().getDescription() + "# FilterConfig:\n" + filterConfig.toString() );
+ if ( analysis.getProtocol() != null ) {
+ analysis.getProtocol().setDescription(
+ analysis.getProtocol().getDescription() + "# FilterConfig:\n" + filterConfig.toString() );
+ } else {
+ log.warn( analysis + " has no protocol object associated, cannot append the filter configuration." );
+ }
la.setAnalysisObj( analysis );
}
diff --git a/gemma-core/src/main/java/ubic/gemma/core/analysis/expression/diff/BaselineSelection.java b/gemma-core/src/main/java/ubic/gemma/core/analysis/expression/diff/BaselineSelection.java
index c30aa5a21e..2b542b09bf 100644
--- a/gemma-core/src/main/java/ubic/gemma/core/analysis/expression/diff/BaselineSelection.java
+++ b/gemma-core/src/main/java/ubic/gemma/core/analysis/expression/diff/BaselineSelection.java
@@ -19,10 +19,13 @@
package ubic.gemma.core.analysis.expression.diff;
+import lombok.extern.apachecommons.CommonsLog;
import ubic.gemma.model.common.description.Characteristic;
-import ubic.gemma.model.expression.experiment.FactorValue;
-import ubic.gemma.model.expression.experiment.Statement;
+import ubic.gemma.model.common.measurement.MeasurementUtils;
+import ubic.gemma.model.expression.biomaterial.BioMaterial;
+import ubic.gemma.model.expression.experiment.*;
+import javax.annotation.Nullable;
import java.util.*;
import static org.apache.commons.lang3.StringUtils.normalizeSpace;
@@ -32,6 +35,7 @@
*
* @author paul
*/
+@CommonsLog
public class BaselineSelection {
// see bug 4316. This term is "control"
@@ -155,4 +159,191 @@ private static boolean isForcedBaseline( Statement stmt ) {
|| BaselineSelection.FORCED_BASELINE_VALUE_URI.equalsIgnoreCase( stmt.getSecondObjectUri() );
}
+ /**
+ * Identify the FactorValue that should be treated as 'Baseline' for each of the given factors. This is done
+ * heuristically, and if all else fails we choose arbitrarily. For continuous factors, the minimum value is treated
+ * as baseline.
+ *
+ * @param factors factors
+ * @param samplesUsed These are used to make sure we don't bother using factor values as baselines if they are not
+ * used by any of the samples. This is important for subsets. If null, this is ignored.
+ * @return map of factors to the baseline factorvalue for that factor.
+ */
+ public static Map getBaselineLevels( Collection factors, @Nullable List samplesUsed ) {
+
+ Map result = new HashMap<>();
+
+ for ( ExperimentalFactor factor : factors ) {
+
+ if ( factor.getFactorValues().isEmpty() ) {
+ throw new IllegalStateException( "Factor has no factor values: " + factor );
+ }
+
+ if ( factor.getType().equals( FactorType.CONTINUOUS ) ) {
+ // then there is no baseline, but we'll take the minimum value.
+ TreeMap sortedVals = new TreeMap<>();
+ for ( FactorValue fv : factor.getFactorValues() ) {
+
+ /*
+ * Check that this factor value is used by at least one of the given samples. Only matters if this
+ * is a subset of the full data set.
+ */
+ if ( samplesUsed != null && !BaselineSelection.used( fv, samplesUsed ) ) {
+ // this factorValue cannot be a candidate baseline for this subset.
+ continue;
+ }
+
+ if ( fv.getMeasurement() == null || fv.getMeasurement().getValue() == null ) {
+ // throw new IllegalStateException( "Continuous factors should have Measurements as values" );
+ // This can happen if a value is missing, as nothing would be added to the BioMaterial.
+ BaselineSelection.log.warn( "No value for continuous factor " + factor + " for a sample, will treat as NaN" );
+ sortedVals.put( Double.NaN, fv );
+ continue;
+ }
+
+ if ( fv.getMeasurement().getValue().isEmpty() ) {
+ BaselineSelection.log.warn( "No value for continuous factor " + factor + " for a sample, will treat as NaN" );
+ sortedVals.put( Double.NaN, fv );
+ continue;
+ }
+
+ double v = MeasurementUtils.measurement2double( fv.getMeasurement() );
+ sortedVals.put( v, fv );
+ }
+
+ if ( sortedVals.isEmpty() ) {
+ BaselineSelection.log.warn( "No values for continuous factor " + factor );
+ continue;
+ }
+ result.put( factor, sortedVals.firstEntry().getValue() );
+
+ } else {
+
+ for ( FactorValue fv : factor.getFactorValues() ) {
+
+ /*
+ * Check that this factor value is used by at least one of the given samples. Only matters if this
+ * is a subset of the full data set.
+ */
+ if ( samplesUsed != null && !BaselineSelection.used( fv, samplesUsed ) ) {
+ // this factorValue cannot be a candidate baseline for this subset.
+ continue;
+ }
+
+ if ( isForcedBaseline( fv ) ) {
+ BaselineSelection.log.debug( "Baseline chosen: " + fv );
+ result.put( factor, fv );
+ break;
+ }
+
+ if ( isBaselineCondition( fv ) ) {
+ if ( result.containsKey( factor ) ) {
+ BaselineSelection.log.warn( "A second potential baseline was found for " + factor + ": " + fv );
+ continue;
+ }
+ BaselineSelection.log.debug( "Baseline chosen: " + fv );
+ result.put( factor, fv );
+ }
+ }
+
+ if ( !result.containsKey( factor ) ) { // fallback
+ FactorValue arbitraryBaselineFV = null;
+
+ if ( samplesUsed != null ) {
+ // make sure we choose a fv that is actually used (see above for non-arbitrary case)
+ for ( FactorValue fv : factor.getFactorValues() ) {
+ for ( BioMaterial bm : samplesUsed ) {
+ for ( FactorValue bfv : bm.getAllFactorValues() ) {
+ if ( fv.equals( bfv ) ) {
+ arbitraryBaselineFV = fv;
+ break;
+ }
+ }
+ if ( arbitraryBaselineFV != null )
+ break;
+ }
+ if ( arbitraryBaselineFV != null )
+ break;
+ }
+
+ if ( arbitraryBaselineFV == null ) {
+ // If we get here, we had passed in the samples in consideration but none had a value assigned.
+ throw new IllegalStateException(
+ "None of the samplesUsed have a value for factor: " + factor + " (" + factor
+ .getFactorValues().size() + " factor values) - ensure samples are assigned this factor" );
+ }
+
+ } else {
+ // I'm not sure the use case of this line but it would only be used if we didn't pass in any samples to consider.
+ arbitraryBaselineFV = factor.getFactorValues().iterator().next();
+ }
+
+ // There's no need to log this for batch factors, they are inherently arbitrary and only used
+ // during batch correction.
+ if ( !ExperimentFactorUtils.isBatchFactor( factor ) ) {
+ BaselineSelection.log.info( "Falling back on choosing baseline arbitrarily: " + arbitraryBaselineFV );
+ }
+ result.put( factor, arbitraryBaselineFV );
+ }
+ }
+ }
+
+ return result;
+ }
+
+ /**
+ * Identify the FactorValue that should be treated as 'Baseline' for each of the given factors. This is done
+ * heuristically, and if all else fails we choose arbitrarily.
+ *
+ * @param factors factors
+ * @return map
+ */
+ public static Map getBaselineLevels( Collection factors ) {
+ return getBaselineLevels( factors, null );
+ }
+
+ public static Map getBaselineConditions( List samplesUsed,
+ List factors ) {
+ Map baselineConditions = getBaselineLevels( factors, samplesUsed );
+
+ /*
+ * For factors that don't have an obvious baseline, use the first factorvalue.
+ */
+ Collection factorValuesOfFirstSample = samplesUsed.iterator().next().getAllFactorValues();
+ for ( ExperimentalFactor factor : factors ) {
+ if ( !baselineConditions.containsKey( factor ) ) {
+
+ for ( FactorValue biomf : factorValuesOfFirstSample ) {
+ /*
+ * the first biomaterial has the values used as baseline
+ */
+ if ( biomf.getExperimentalFactor().equals( factor ) ) {
+ baselineConditions.put( factor, biomf );
+ }
+ }
+ }
+ }
+
+ /*
+ * TODO: for OrganismPart (etc) we should allow there to be no baseline but use the global mean as the reference
+ * point.
+ */
+
+ return baselineConditions;
+ }
+
+ /**
+ * @return true if the factorvalue is used by at least one of the samples.
+ */
+ @SuppressWarnings("BooleanMethodIsAlwaysInverted") // Better semantics
+ private static boolean used( FactorValue fv, List samplesUsed ) {
+ for ( BioMaterial bm : samplesUsed ) {
+ for ( FactorValue bfv : bm.getAllFactorValues() ) {
+ if ( fv.equals( bfv ) ) {
+ return true;
+ }
+ }
+ }
+ return false;
+ }
}
diff --git a/gemma-core/src/main/java/ubic/gemma/core/analysis/expression/diff/DiffExAnalyzerUtils.java b/gemma-core/src/main/java/ubic/gemma/core/analysis/expression/diff/DiffExAnalyzerUtils.java
index ed5434abec..43ab4d4fb6 100644
--- a/gemma-core/src/main/java/ubic/gemma/core/analysis/expression/diff/DiffExAnalyzerUtils.java
+++ b/gemma-core/src/main/java/ubic/gemma/core/analysis/expression/diff/DiffExAnalyzerUtils.java
@@ -6,6 +6,8 @@
import ubic.basecode.dataStructure.matrix.DenseDoubleMatrix;
import ubic.basecode.dataStructure.matrix.DoubleMatrix;
import ubic.basecode.dataStructure.matrix.ObjectMatrix;
+import ubic.basecode.dataStructure.matrix.ObjectMatrixImpl;
+import ubic.gemma.model.common.measurement.MeasurementUtils;
import ubic.gemma.model.common.quantitationtype.QuantitationType;
import ubic.gemma.model.expression.bioAssay.BioAssay;
import ubic.gemma.model.expression.bioAssayData.BioAssayDimension;
@@ -16,15 +18,17 @@
import javax.annotation.Nullable;
import java.io.IOException;
import java.io.Writer;
-import java.util.ArrayList;
-import java.util.Collection;
-import java.util.HashSet;
-import java.util.List;
+import java.util.*;
import java.util.stream.Collectors;
@CommonsLog
public class DiffExAnalyzerUtils {
+ public static final String BIO_MATERIAL_RNAME_PREFIX = "biomat_";
+ public static final String FACTOR_RNAME_PREFIX = "fact.";
+ public static final String FACTOR_VALUE_RNAME_PREFIX = "fv_";
+ public static final String FACTOR_VALUE_BASELINE_SUFFIX = "_base";
+
/**
* This bioAssayDimension shouldn't get persisted; it is only for dealing with subset diff ex. analyses.
*
@@ -270,7 +274,7 @@ public static AnalysisType determineAnalysisType( BioAssaySet bioAssaySet, Colle
return null;
}
- if ( ExperimentalDesignUtils.isBatchFactor( f ) ) {
+ if ( ExperimentFactorUtils.isBatchFactor( f ) ) {
log.info( "One of the two factors is 'batch', not using it for an interaction" );
okForInteraction = false;
}
@@ -401,4 +405,128 @@ private static String formatFactor( ExperimentalFactor ef ) {
private static String formatFactorValue( FactorValue fv ) {
return FactorValueUtils.getSummaryString( fv );
}
+
+ /**
+ * Build a design matrix for the given factors and samples.
+ * @param factors factors
+ * @param samplesUsed the samples used
+ * @param allowMissingValues whether to allow missing values, if set to true, the returned matrix may contain nulls
+ * @return the experimental design matrix
+ * @throws IllegalStateException if missing values are found and allowMissingValues is false
+ */
+ public static ObjectMatrix buildDesignMatrix( List factors,
+ List samplesUsed, boolean allowMissingValues ) {
+ ObjectMatrix designMatrix = new ObjectMatrixImpl<>( samplesUsed.size(), factors.size() );
+ designMatrix.setColumnNames( factors );
+ designMatrix.setRowNames( samplesUsed );
+ populateDesignMatrix( designMatrix, factors, samplesUsed, BaselineSelection.getBaselineConditions( samplesUsed, factors ), allowMissingValues );
+ return designMatrix;
+ }
+
+ /**
+ * Build an R-friendly design matrix.
+ *
+ * Rows and columns use names derived from {@link #nameForR(BioMaterial)}, {@link #nameForR(ExperimentalFactor)} and
+ * {@link #nameForR(FactorValue, boolean)} such that the resulting matrix can be passed to R for analysis. It is
+ * otherwise identical to {@link #buildDesignMatrix(List, List, boolean)}.
+ */
+ public static ObjectMatrix buildRDesignMatrix( List factors,
+ List samplesUsed, boolean allowMissingValues ) {
+ return buildRDesignMatrix( factors, samplesUsed, BaselineSelection.getBaselineConditions( samplesUsed, factors ), allowMissingValues );
+ }
+
+ /**
+ * A variant of {@link #buildRDesignMatrix(List, List, boolean)} that allows for reusing baselines for repeated
+ * calls. This is used for subset analysis.
+ */
+ public static ObjectMatrix buildRDesignMatrix( List factors,
+ List samplesUsed, Map baselines, boolean allowMissingValues ) {
+ ObjectMatrix designMatrix = new ObjectMatrixImpl<>( samplesUsed.size(), factors.size() );
+ designMatrix.setColumnNames( factors.stream().map( DiffExAnalyzerUtils::nameForR ).collect( Collectors.toList() ) );
+ designMatrix.setRowNames( samplesUsed.stream().map( DiffExAnalyzerUtils::nameForR ).collect( Collectors.toList() ) );
+ populateDesignMatrix( designMatrix, factors, samplesUsed, baselines, allowMissingValues );
+ return designMatrix;
+ }
+
+ private static void populateDesignMatrix( ObjectMatrix, ?, Object> designMatrix, List factors, List samplesUsed, Map baselines, boolean allowMissingValues ) {
+ Map> factorValueMap = ExperimentalDesignUtils.getFactorValueMap( factors, samplesUsed );
+ for ( int i = 0; i < samplesUsed.size(); i++ ) {
+ BioMaterial samp = samplesUsed.get( i );
+ for ( int j = 0; j < factors.size(); j++ ) {
+ ExperimentalFactor factor = factors.get( j );
+ Object value = DiffExAnalyzerUtils.extractFactorValueForSample( samp, factor, factorValueMap, baselines.get( factor ) );
+ // if the value is null, we have to skip this factor, actually, but we do it later.
+ if ( !allowMissingValues && value == null ) {
+ // FIXME: This error could be worked around when we are doing SampleCoexpression. A legitimate
+ // reason is when we have a DEExclude factor and some samples lack any value for one of the
+ // other factors. We could detect this but it's kind of complicated, rare, and would only
+ // apply for that case.
+ throw new IllegalStateException( samp + " does not have a value for " + factor + "." );
+ }
+ designMatrix.set( i, j, value );
+ }
+ }
+ }
+
+ /**
+ * Extract the "value" of a factor for a sample.
+ *
+ * @param sample sample
+ * @param factor factor to extract a value for
+ * @param baseline the baseline to use (iff the factor is categorical)
+ * @return a double for a continuous factor (or null if the measurement is not set), a string for continuous factor
+ * or null if the factor is not set for the given sample
+ * @throws IllegalStateException if there is more than one factor value assigned to a given sample
+ */
+ @Nullable
+ private static Object extractFactorValueForSample( BioMaterial sample, ExperimentalFactor factor, Map> factorValueMap, @Nullable FactorValue baseline ) {
+ Assert.isTrue( factor.getType().equals( FactorType.CONTINUOUS ) || baseline != null,
+ "There is no baseline defined for " + factor + "." );
+ FactorValue factorValue = factorValueMap.get( factor ).get( sample );
+ if ( factorValue == null ) {
+ return null;
+ }
+ return extractFactorValue( factorValue, factor.getType().equals( FactorType.CONTINUOUS ), baseline != null && baseline.equals( factorValue ) );
+ }
+
+ private static Object extractFactorValue( FactorValue factorValue, boolean isContinuous, boolean isBaseline ) {
+ if ( isContinuous ) {
+ if ( factorValue.getMeasurement() == null ) {
+ throw new IllegalStateException( "Measurement is null for continuous factor value " + factorValue + "." );
+ }
+ return MeasurementUtils.measurement2double( factorValue.getMeasurement() );
+ } else {
+ /*
+ * We always use a dummy value. It's not as human-readable but at least we're sure it is unique and
+ * R-compliant. (assuming the fv is persistent!)
+ */
+ return nameForR( factorValue, isBaseline );
+ }
+ }
+
+ /**
+ * Create a name for a sample suitable for R.
+ */
+ public static String nameForR( BioMaterial sample ) {
+ Assert.notNull( sample.getId(), "Sample must have an ID to have a R-suitable name." );
+ return BIO_MATERIAL_RNAME_PREFIX + sample.getId();
+ }
+
+ /**
+ * Create a name for the factor that is suitable for R.
+ */
+ public static String nameForR( ExperimentalFactor experimentalFactor ) {
+ Assert.notNull( experimentalFactor.getId(), "Factor must have an ID to have a R-suitable name." );
+ return FACTOR_RNAME_PREFIX + experimentalFactor.getId();
+ }
+
+ /**
+ * Create a name for the factor value that is suitable for R.
+ */
+ public static String nameForR( FactorValue fv, boolean isBaseline ) {
+ Assert.isTrue( fv.getExperimentalFactor().getType() == FactorType.CATEGORICAL || !isBaseline,
+ "Continuous factors cannot have a baseline." );
+ Assert.notNull( fv.getId(), "Factor value must have an ID to have a R-suitable name." );
+ return FACTOR_VALUE_RNAME_PREFIX + fv.getId() + ( isBaseline ? FACTOR_VALUE_BASELINE_SUFFIX : "" );
+ }
}
diff --git a/gemma-core/src/main/java/ubic/gemma/core/analysis/expression/diff/DifferentialExpressionAnalysisHelperServiceImpl.java b/gemma-core/src/main/java/ubic/gemma/core/analysis/expression/diff/DifferentialExpressionAnalysisHelperServiceImpl.java
index a2e301d085..e09c570408 100644
--- a/gemma-core/src/main/java/ubic/gemma/core/analysis/expression/diff/DifferentialExpressionAnalysisHelperServiceImpl.java
+++ b/gemma-core/src/main/java/ubic/gemma/core/analysis/expression/diff/DifferentialExpressionAnalysisHelperServiceImpl.java
@@ -28,7 +28,6 @@
import ubic.gemma.model.expression.experiment.ExpressionExperimentSubSet;
import ubic.gemma.persistence.persister.Persister;
import ubic.gemma.persistence.service.analysis.expression.diff.DifferentialExpressionAnalysisService;
-import ubic.gemma.persistence.service.analysis.expression.diff.ExpressionAnalysisResultSetDao;
/**
* Transactional methods for dealing with differential expression analyses.
@@ -48,7 +47,9 @@ public class DifferentialExpressionAnalysisHelperServiceImpl implements Differen
@Override
@Transactional
public DifferentialExpressionAnalysis persistStub( DifferentialExpressionAnalysis entity ) {
- entity.setProtocol( ( Protocol ) persisterHelper.persist( entity.getProtocol() ) );
+ if ( entity.getProtocol() != null ) {
+ entity.setProtocol( ( Protocol ) persisterHelper.persist( entity.getProtocol() ) );
+ }
// Sometimes we have made a new EESubSet as part of the analysis.
if ( ExpressionExperimentSubSet.class.isAssignableFrom( entity.getExperimentAnalyzed().getClass() )
diff --git a/gemma-core/src/main/java/ubic/gemma/core/analysis/expression/diff/LinearModelAnalyzer.java b/gemma-core/src/main/java/ubic/gemma/core/analysis/expression/diff/LinearModelAnalyzer.java
index 56b8ca463a..8ed3a634ad 100644
--- a/gemma-core/src/main/java/ubic/gemma/core/analysis/expression/diff/LinearModelAnalyzer.java
+++ b/gemma-core/src/main/java/ubic/gemma/core/analysis/expression/diff/LinearModelAnalyzer.java
@@ -105,6 +105,15 @@ public class LinearModelAnalyzer implements DiffExAnalyzer {
private static final String EXCLUDE_WARNING = "Found Factor Value with DE_Exclude characteristic. Skipping current subset.";
+ /**
+ * This keeps factor ordered the same way that they appear in the design matrix.
+ */
+ private static final Comparator FACTOR_COMPARATOR =
+ Comparator.comparing( DiffExAnalyzerUtils::nameForR, Comparator.naturalOrder() );
+
+ private static final Comparator SAMPLE_COMPARATOR =
+ Comparator.comparing( DiffExAnalyzerUtils::nameForR, Comparator.naturalOrder() );
+
@Autowired
private CompositeSequenceService compositeSequenceService;
@Autowired
@@ -285,7 +294,9 @@ public Collection run( ExpressionExperiment expr
/*
* Initialize our matrix and factor lists...
*/
- List factors = ExperimentalDesignUtils.getOrderedFactors( config.getFactorsToInclude() );
+ List factors = config.getFactorsToInclude().stream()
+ .sorted( FACTOR_COMPARATOR )
+ .collect( Collectors.toList() );
/*
* FIXME this is the place to strip put the outliers.
@@ -295,7 +306,7 @@ public Collection run( ExpressionExperiment expr
dmatrix = new ExpressionDataDoubleMatrix( dmatrix, samplesUsed,
createBADMap( samplesUsed ) ); // enforce ordering
- Map baselineConditions = ExperimentalDesignUtils
+ Map baselineConditions = BaselineSelection
.getBaselineConditions( samplesUsed, factors );
dropIncompleteFactors( samplesUsed, factors );
@@ -318,10 +329,12 @@ public Collection run( ExpressionExperiment ee,
Assert.isTrue( !subsets.isEmpty(), "No subsets provided" );
Assert.isTrue( config.getSubsetFactor().getFactorValues().containsAll( subsets.keySet() ), "Subsets must use factor values from " + config.getSubsetFactor() + "." );
Assert.isTrue( subsets.values().stream().allMatch( ss -> ss.getSourceExperiment().equals( ee ) ), "Subsets must use " + ee + " as source experiment." );
- List factors = ExperimentalDesignUtils.getOrderedFactors( config.getFactorsToInclude() );
+ List factors = config.getFactorsToInclude().stream()
+ .sorted( FACTOR_COMPARATOR )
+ .collect( Collectors.toList() );
List samplesUsed = orderByExperimentalDesign( dmatrix, factors, null );
Map dmatrixBySubSet = makeSubSetMatrices( dmatrix, samplesUsed, factors, config.getSubsetFactor() );
- Map baselineConditions = ExperimentalDesignUtils
+ Map baselineConditions = BaselineSelection
.getBaselineConditions( samplesUsed, factors );
dropIncompleteFactors( samplesUsed, factors );
return doSubSetAnalysis( subsets, dmatrixBySubSet, factors, baselineConditions, config );
@@ -454,7 +467,7 @@ public DifferentialExpressionAnalysis run( ExpressionExperimentSubSet subset, Ex
List samplesInSubset = subset.getBioAssays().stream()
.map( BioAssay::getSampleUsed )
- .sorted( Comparator.comparing( BioMaterial::getId ) )
+ .sorted( SAMPLE_COMPARATOR )
.collect( Collectors.toList() );
FactorValue subsetFactorValue = config.getSubsetFactorValue();
@@ -469,10 +482,12 @@ public DifferentialExpressionAnalysis run( ExpressionExperimentSubSet subset, Ex
ExpressionDataDoubleMatrix subsetMatrix = new ExpressionDataDoubleMatrix( dmatrix, samplesInSubset,
createBADMap( samplesInSubset ) );
- List factors = ExperimentalDesignUtils.getOrderedFactors( config.getFactorsToInclude() );
+ List factors = config.getFactorsToInclude().stream()
+ .sorted( FACTOR_COMPARATOR )
+ .collect( Collectors.toList() );
List subsetFactors = fixFactorsForSubset( subset, dmatrix, factors );
- Map baselineConditions = ExperimentalDesignUtils
+ Map baselineConditions = BaselineSelection
.getBaselineConditions( samplesInSubset, factors );
dropIncompleteFactors( samplesInSubset, factors );
@@ -603,7 +618,7 @@ private void buildModelFormula( final DifferentialExpressionAnalysisConfig confi
List interactionFactorNames = new ArrayList<>();
for ( ExperimentalFactor factor : interactionTerms ) {
- interactionFactorNames.add( ExperimentalDesignUtils.nameForR( factor ) );
+ interactionFactorNames.add( DiffExAnalyzerUtils.nameForR( factor ) );
}
interactionFactorLists.add( interactionFactorNames.toArray( new String[] {} ) );
@@ -671,7 +686,7 @@ private DifferentialExpressionAnalysis doAnalysis( BioAssaySet bioAssaySet,
* Build our factor terms, with interactions handled specially
*/
List interactionFactorLists = new ArrayList<>();
- ObjectMatrix designMatrix = ExperimentalDesignUtils
+ ObjectMatrix designMatrix = DiffExAnalyzerUtils
.buildRDesignMatrix( factors, samplesUsed, baselineConditions, false );
config.addBaseLineFactorValues( baselineConditions );
@@ -878,10 +893,10 @@ private void warnForElement( CompositeSequence el, String s, int warned ) {
private void dropIncompleteFactors( List samplesUsed, List factors ) {
factors.removeIf( f -> {
- if ( ExperimentalDesignUtils.isComplete( f, samplesUsed ) ) {
+ if ( isComplete( f, samplesUsed ) ) {
return false; // keep
}
- String samplesWithMissingValues = ExperimentalDesignUtils.getSampleToFactorValuesMap( f, samplesUsed )
+ String samplesWithMissingValues = getSampleToFactorValuesMap( f, samplesUsed )
.entrySet().stream().filter( e -> e.getValue().isEmpty() )
.map( Map.Entry::getKey )
.map( BioMaterial::getName )
@@ -892,6 +907,35 @@ private void dropIncompleteFactors( List samplesUsed, List samplesUsed ) {
+ Assert.isTrue( samplesUsed.size() > 1, "At least one sample must be supplied." );
+ for ( BioMaterial samp : samplesUsed ) {
+ if ( samp.getAllFactorValues().stream()
+ .noneMatch( fv -> fv.getExperimentalFactor().equals( factor ) ) ) {
+ return false;
+ }
+ }
+ return true;
+ }
+
+ /**
+ * Create a sample to factor value mapping.
+ *
+ * Under normal circumstances, there should be only one factor value per sample.
+ */
+ private Map> getSampleToFactorValuesMap( ExperimentalFactor factor, Collection samplesUsed ) {
+ return samplesUsed.stream()
+ .collect( Collectors.toMap( bm -> bm, bm -> bm.getAllFactorValues().stream()
+ .filter( fv -> fv.getExperimentalFactor().equals( factor ) )
+ .collect( Collectors.toSet() ) ) );
+ }
+
/**
* Remove all configurations that have to do with factors that aren't in the selected factors.
*
@@ -1031,7 +1075,7 @@ private void fillRanksAndQvalues(
private Map> getRNames( List factors ) {
final Map> label2Factors = new LinkedHashMap<>();
for ( ExperimentalFactor experimentalFactor : factors ) {
- label2Factors.computeIfAbsent( ExperimentalDesignUtils.nameForR( experimentalFactor ), k -> new HashSet<>() )
+ label2Factors.computeIfAbsent( DiffExAnalyzerUtils.nameForR( experimentalFactor ), k -> new HashSet<>() )
.add( experimentalFactor );
}
return label2Factors;
@@ -1146,7 +1190,7 @@ private void makeContrast( DifferentialExpressionAnalysisResult probeAnalysisRes
*/
contrast.setLogFoldChange( this.nan2Null( coefficient ) );
- if ( term.contains( ExperimentalDesignUtils.FACTOR_VALUE_RNAME_PREFIX ) ) {
+ if ( term.contains( DiffExAnalyzerUtils.FACTOR_VALUE_RNAME_PREFIX ) ) {
// otherwise, it's continuous, and
// we don't put in a
// factorvalue.
@@ -1168,7 +1212,7 @@ private void makeContrast( DifferentialExpressionAnalysisResult probeAnalysisRes
try {
factorValueId = Long.parseLong(
- firstTerm.replace( factorNames[0] + ExperimentalDesignUtils.FACTOR_VALUE_RNAME_PREFIX, "" ) );
+ firstTerm.replace( factorNames[0] + DiffExAnalyzerUtils.FACTOR_VALUE_RNAME_PREFIX, "" ) );
} catch ( NumberFormatException e ) {
throw new RuntimeException( "Failed to parse: " + firstTerm + " into a factorvalue id" );
}
@@ -1190,7 +1234,7 @@ private void makeContrast( DifferentialExpressionAnalysisResult probeAnalysisRes
try {
factorValueId = Long.parseLong( secondTerm
- .replace( factorNames[1] + ExperimentalDesignUtils.FACTOR_VALUE_RNAME_PREFIX, "" ) );
+ .replace( factorNames[1] + DiffExAnalyzerUtils.FACTOR_VALUE_RNAME_PREFIX, "" ) );
} catch ( NumberFormatException e ) {
throw new RuntimeException( "Failed to parse: " + secondTerm + " into a factorvalue id" );
}
@@ -1250,8 +1294,8 @@ private DesignMatrix makeDesignMatrix( ObjectMatrix desi
if ( ef.getType().equals( FactorType.CONTINUOUS ) ) {
continue;
}
- String factorName = ExperimentalDesignUtils.nameForR( ef );
- String baselineFactorValue = ExperimentalDesignUtils.nameForR( baselineConditions.get( ef ), true );
+ String factorName = DiffExAnalyzerUtils.nameForR( ef );
+ String baselineFactorValue = DiffExAnalyzerUtils.nameForR( baselineConditions.get( ef ), true );
/*
* If this is a subset, it is possible the baseline chosen is not eligible for the subset.
diff --git a/gemma-core/src/main/java/ubic/gemma/core/analysis/preprocess/PreprocessorServiceImpl.java b/gemma-core/src/main/java/ubic/gemma/core/analysis/preprocess/PreprocessorServiceImpl.java
index a2fd7c59cd..a00ab88612 100644
--- a/gemma-core/src/main/java/ubic/gemma/core/analysis/preprocess/PreprocessorServiceImpl.java
+++ b/gemma-core/src/main/java/ubic/gemma/core/analysis/preprocess/PreprocessorServiceImpl.java
@@ -30,6 +30,7 @@
import ubic.gemma.core.analysis.preprocess.svd.SVDService;
import ubic.gemma.core.analysis.report.ExpressionExperimentReportService;
import ubic.gemma.core.analysis.service.ExpressionDataFileService;
+import ubic.gemma.core.datastructure.matrix.BulkExpressionDataMatrixUtils;
import ubic.gemma.core.datastructure.matrix.ExpressionDataDoubleMatrix;
import ubic.gemma.model.analysis.expression.diff.DifferentialExpressionAnalysis;
import ubic.gemma.model.common.auditAndSecurity.eventType.BatchCorrectionEvent;
@@ -129,7 +130,7 @@ private void batchCorrect( ExpressionExperiment ee ) throws PreprocessingExcepti
ExpressionDataDoubleMatrix correctedData = this.getCorrectedData( ee, vecs );
// Convert to vectors (persist QT)
- int replaced = processedExpressionDataVectorService.replaceProcessedDataVectors( ee, correctedData.toProcessedDataVectors(), false );
+ int replaced = processedExpressionDataVectorService.replaceProcessedDataVectors( ee, BulkExpressionDataMatrixUtils.toVectors( correctedData, ProcessedExpressionDataVector.class ), false );
auditTrailService.addUpdateEvent( ee, BatchCorrectionEvent.class, String.format( "ComBat batch correction, vectors were replaced with %d batch-corrected ones.", replaced ) );
}
diff --git a/gemma-core/src/main/java/ubic/gemma/core/analysis/preprocess/SplitExperimentServiceImpl.java b/gemma-core/src/main/java/ubic/gemma/core/analysis/preprocess/SplitExperimentServiceImpl.java
index a571bfe2c1..09abd7d3c1 100644
--- a/gemma-core/src/main/java/ubic/gemma/core/analysis/preprocess/SplitExperimentServiceImpl.java
+++ b/gemma-core/src/main/java/ubic/gemma/core/analysis/preprocess/SplitExperimentServiceImpl.java
@@ -27,6 +27,7 @@
import org.springframework.stereotype.Service;
import org.springframework.util.Assert;
import ubic.gemma.core.analysis.service.ExpressionDataFileService;
+import ubic.gemma.core.datastructure.matrix.BulkExpressionDataMatrixUtils;
import ubic.gemma.core.datastructure.matrix.ExpressionDataDoubleMatrix;
import ubic.gemma.core.datastructure.matrix.ExpressionDataMatrix;
import ubic.gemma.core.datastructure.matrix.MultiAssayBulkExpressionDataMatrix;
@@ -107,7 +108,7 @@ public ExpressionExperimentSet split( ExpressionExperiment toSplit, Experimental
throw new IllegalArgumentException( "Cannot split experiments that are on more than one platform" );
}
- if ( ExperimentalDesignUtils.isBatchFactor( splitOn ) ) {
+ if ( ExperimentFactorUtils.isBatchFactor( splitOn ) ) {
throw new IllegalArgumentException( "Do not split experiments on 'batch'" );
}
@@ -282,7 +283,7 @@ public ExpressionExperimentSet split( ExpressionExperiment toSplit, Experimental
ExpressionDataDoubleMatrix expressionDataMatrix = new ExpressionDataDoubleMatrix( ( ExpressionDataDoubleMatrix ) qt2mat.get( qt ),
bms, newBAD );
- Collection rawDataVectors = expressionDataMatrix.toRawDataVectors();
+ Collection rawDataVectors = BulkExpressionDataMatrixUtils.toVectors( expressionDataMatrix, RawExpressionDataVector.class );
for ( RawExpressionDataVector v : rawDataVectors ) {
v.setQuantitationType( clonedQt );
v.setExpressionExperiment( split );
diff --git a/gemma-core/src/main/java/ubic/gemma/core/analysis/preprocess/VectorMergingServiceImpl.java b/gemma-core/src/main/java/ubic/gemma/core/analysis/preprocess/VectorMergingServiceImpl.java
index 7ae17126f4..c987200cf8 100644
--- a/gemma-core/src/main/java/ubic/gemma/core/analysis/preprocess/VectorMergingServiceImpl.java
+++ b/gemma-core/src/main/java/ubic/gemma/core/analysis/preprocess/VectorMergingServiceImpl.java
@@ -43,7 +43,7 @@
import java.util.*;
-import static ubic.gemma.core.analysis.preprocess.convert.QuantitationTypeConversionUtils.getDefaultValue;
+import static ubic.gemma.model.common.quantitationtype.QuantitationTypeUtils.getDefaultValue;
/**
* Tackles the problem of concatenating DesignElementDataVectors for a single experiment. This is necessary When a study
diff --git a/gemma-core/src/main/java/ubic/gemma/core/analysis/preprocess/batcheffects/BatchConfoundUtils.java b/gemma-core/src/main/java/ubic/gemma/core/analysis/preprocess/batcheffects/BatchConfoundUtils.java
index 64c7aca174..a0831c8bb0 100644
--- a/gemma-core/src/main/java/ubic/gemma/core/analysis/preprocess/batcheffects/BatchConfoundUtils.java
+++ b/gemma-core/src/main/java/ubic/gemma/core/analysis/preprocess/batcheffects/BatchConfoundUtils.java
@@ -83,7 +83,7 @@ private static Collection factorBatchConfoundTest( BioAssaySet ee
// FV -> index
Map batchIndexes = new HashMap<>();
for ( ExperimentalFactor ef : bioMaterialFactorMap.keySet() ) {
- if ( ExperimentalDesignUtils.isBatchFactor( ef ) ) {
+ if ( ExperimentFactorUtils.isBatchFactor( ef ) ) {
batchFactor = ef;
Map factorValueById = IdentifiableUtils.getIdMap( ef.getFactorValues() );
diff --git a/gemma-core/src/main/java/ubic/gemma/core/analysis/preprocess/batcheffects/BatchInfoPopulationHelperServiceImpl.java b/gemma-core/src/main/java/ubic/gemma/core/analysis/preprocess/batcheffects/BatchInfoPopulationHelperServiceImpl.java
index 953c67a26d..5bf5aa73cb 100644
--- a/gemma-core/src/main/java/ubic/gemma/core/analysis/preprocess/batcheffects/BatchInfoPopulationHelperServiceImpl.java
+++ b/gemma-core/src/main/java/ubic/gemma/core/analysis/preprocess/batcheffects/BatchInfoPopulationHelperServiceImpl.java
@@ -67,7 +67,7 @@ public class BatchInfoPopulationHelperServiceImpl implements BatchInfoPopulation
private static final String FASTQ_HEADER_EXTRACTION_FAILURE_INDICATOR = "FAILURE";
@Autowired
- private BioMaterialService bioMaterialService = null;
+ private BioMaterialService bioMaterialService;
@Autowired
private ExperimentalDesignService experimentalDesignService;
@@ -86,7 +86,7 @@ public ExperimentalFactor createRnaSeqBatchFactor( ExpressionExperiment ee, Map<
*/
Map> batchIdToHeaders;
try {
- batchIdToHeaders = this.convertHeadersToBatches( ee, headers.values() );
+ batchIdToHeaders = convertHeadersToBatches( ee, headers.values() );
} catch ( FASTQHeadersPresentButNotUsableException e ) {
log.info( "Batches unable to be determined from headers: " + ee );
this.auditTrailService.addUpdateEvent( ee, UninformativeFASTQHeadersForBatchingEvent.class, "Batches unable to be determined", "RNA-seq experiment, FASTQ headers and platform not informative for batches" );
@@ -283,7 +283,7 @@ Map> convertDatesToBatches( List allDates, List> convertHeadersToBatches( ExpressionExperiment ee, Collection headers ) throws FASTQHeadersPresentButNotUsableException, SingletonBatchesException {
+ private static Map> convertHeadersToBatches( ExpressionExperiment ee, Collection headers ) throws FASTQHeadersPresentButNotUsableException, SingletonBatchesException {
Map> result = new LinkedHashMap<>();
Map> goodHeaderSampleInfos = new HashMap<>();
@@ -402,7 +402,7 @@ private Map> convertHeadersToBatches( ExpressionExper
/**
* For tests only.
*/
- Map> convertHeadersToBatches( Collection headers ) {
+ static Map> convertHeadersToBatches( Collection headers ) {
return convertHeadersToBatches( ExpressionExperiment.Factory.newInstance(), headers );
}
@@ -420,7 +420,7 @@ Map> convertHeadersToBatches( Collection head
* @return Map of batches (represented by the appropriate FastqHeaderData) to samples that are in the
* batch.
*/
- private Map> batch( ExpressionExperiment ee, Map> batchInfos, int numSamples ) {
+ private static Map> batch( ExpressionExperiment ee, Map> batchInfos, int numSamples ) {
int numBatches = batchInfos.size();
@@ -466,7 +466,7 @@ private Map> batch( ExpressionExperiment ee,
* RNAseq: Update the batch info with a lower resolution. This is only effective if we have a usable header for all
* samples.
*/
- private Map> dropResolution( Map> batchInfos ) {
+ private static Map> dropResolution( Map> batchInfos ) {
Map> result = new HashMap<>();
for ( FastqHeaderData fhd : batchInfos.keySet() ) {
@@ -513,7 +513,7 @@ private Map> dropResolution( Map
+ * Exposed for testing, do not use in production code.
+ */
+ void setFastqHeadersDir( Path fastqHeadersDir );
}
\ No newline at end of file
diff --git a/gemma-core/src/main/java/ubic/gemma/core/analysis/preprocess/batcheffects/BatchInfoPopulationServiceImpl.java b/gemma-core/src/main/java/ubic/gemma/core/analysis/preprocess/batcheffects/BatchInfoPopulationServiceImpl.java
index 0b1c56f981..7201510ea9 100644
--- a/gemma-core/src/main/java/ubic/gemma/core/analysis/preprocess/batcheffects/BatchInfoPopulationServiceImpl.java
+++ b/gemma-core/src/main/java/ubic/gemma/core/analysis/preprocess/batcheffects/BatchInfoPopulationServiceImpl.java
@@ -14,14 +14,14 @@
*/
package ubic.gemma.core.analysis.preprocess.batcheffects;
+import lombok.Setter;
+import lombok.extern.apachecommons.CommonsLog;
import org.apache.commons.io.FileUtils;
import org.apache.commons.lang3.StringUtils;
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
import org.springframework.beans.factory.annotation.Autowired;
+import org.springframework.beans.factory.annotation.Value;
import org.springframework.stereotype.Service;
import org.springframework.transaction.annotation.Transactional;
-import ubic.gemma.core.config.Settings;
import ubic.gemma.core.loader.expression.geo.fetcher.RawDataFetcher;
import ubic.gemma.model.common.auditAndSecurity.AuditEvent;
import ubic.gemma.model.common.auditAndSecurity.eventType.BatchInformationFetchingEvent;
@@ -31,10 +31,7 @@
import ubic.gemma.model.common.description.DatabaseEntry;
import ubic.gemma.model.expression.bioAssay.BioAssay;
import ubic.gemma.model.expression.biomaterial.BioMaterial;
-import ubic.gemma.model.expression.experiment.ExperimentalDesign;
-import ubic.gemma.model.expression.experiment.ExperimentalDesignUtils;
-import ubic.gemma.model.expression.experiment.ExperimentalFactor;
-import ubic.gemma.model.expression.experiment.ExpressionExperiment;
+import ubic.gemma.model.expression.experiment.*;
import ubic.gemma.persistence.service.common.auditAndSecurity.AuditEventService;
import ubic.gemma.persistence.service.common.auditAndSecurity.AuditTrailService;
import ubic.gemma.persistence.service.expression.bioAssay.BioAssayService;
@@ -43,8 +40,9 @@
import java.io.BufferedReader;
import java.io.File;
-import java.io.FileReader;
import java.io.IOException;
+import java.nio.file.Files;
+import java.nio.file.Path;
import java.util.*;
/**
@@ -53,6 +51,7 @@
* @author paul
*/
@Service
+@CommonsLog
public class BatchInfoPopulationServiceImpl implements BatchInfoPopulationService {
/**
@@ -66,12 +65,7 @@ public class BatchInfoPopulationServiceImpl implements BatchInfoPopulationServic
/**
* we have files named like GSE1234.fastq-headers-table.txt; specified in our RNA-seq pipelineF
*/
- private static final String FASTQHEADERSFILE_SUFFIX = ".fastq-headers-table.txt";
- /**
- *
- */
- private static final String GEMMA_FASTQ_HEADERS_DIR_CONFIG = "gemma.fastq.headers.dir";
- private static final Log log = LogFactory.getLog( BatchInfoPopulationServiceImpl.class );
+ static final String FASTQHEADERSFILE_SUFFIX = ".fastq-headers-table.txt";
@Autowired
private AuditEventService auditEventService;
@@ -88,6 +82,10 @@ public class BatchInfoPopulationServiceImpl implements BatchInfoPopulationServic
@Autowired
private ExpressionExperimentBatchInformationService expressionExperimentBatchInformationService;
+ @Setter
+ @Value("${gemma.fastq.headers.dir}")
+ private Path fastqHeadersDir;
+
@Override
@Transactional
public void fillBatchInformation( ExpressionExperiment ee, boolean force ) throws BatchInfoPopulationException {
@@ -133,38 +131,6 @@ public void fillBatchInformation( ExpressionExperiment ee, boolean force ) throw
}
}
- /**
- * Exposed for testing
- *
- * @param accession GEO accession
- * @return map of GEO id to headers, including the platform ID
- */
- Map readFastqHeaders( String accession ) throws IOException {
- Map result = new HashMap<>();
- File headerFile = new File( Settings.getString( GEMMA_FASTQ_HEADERS_DIR_CONFIG ) + File.separator
- + accession + FASTQHEADERSFILE_SUFFIX );
- try ( BufferedReader br = new BufferedReader( new FileReader( headerFile ) ) ) {
- String line;
- while ( ( line = br.readLine() ) != null ) {
-
- String[] fields = StringUtils.split( line, "\t" );
-
- if ( fields.length < 5 ) {
- continue;
- }
-
- String geoID = fields[0];
- String geoPlatformID = fields[2]; // we may use this if the headers are not usable.
- String headers = fields[4]; // this may be FAILURE (possibly more than once)
-
- result.put( geoID, geoPlatformID + MULTIFASTQHEADER_DELIMITER + headers );
- }
-
- }
-
- return result;
- }
-
/**
* Currently only supports GEO
*
@@ -291,12 +257,10 @@ private Map assignRawHeadersToSamples( ExpressionExperiment
ba.setFastqHeaders( h );
- /*
- * TODO we could use this as an opportunity to update the "original platform" if it is not populated
- */
- if ( ba.getOriginalPlatform() == null ) {
-
- }
+ // TODO we could use this as an opportunity to update the "original platform" if it is not populated
+ // if ( ba.getOriginalPlatform() == null ) {
+ //
+ // }
// Note: for microarray processing dates, we persist in the Biomaterialservice.associateBatchFactor.
// The difference for RNAseq is that we want to store the entire header, which includes parts that are not needed for the batch information.
@@ -307,19 +271,6 @@ private Map assignRawHeadersToSamples( ExpressionExperiment
return headers;
}
- /**
- */
- private File locateFASTQheadersForBatchInfo( String accession ) {
- String fhd = Settings.getString( GEMMA_FASTQ_HEADERS_DIR_CONFIG );
-
- if ( StringUtils.isBlank( fhd ) ) {
- throw new IllegalStateException( "You must configure the path to extracted headers directory (" + GEMMA_FASTQ_HEADERS_DIR_CONFIG + ")" );
- }
-
- return new File( fhd + File.separator
- + accession + FASTQHEADERSFILE_SUFFIX );
- }
-
/**
* @param ee ee
* @param rnaSeq if the data set is RNAseq
@@ -378,14 +329,43 @@ private boolean needToRun( ExpressionExperiment ee, boolean rnaSeq ) {
private Map readFastqHeaders( ExpressionExperiment ee ) throws IOException {
String accession = Objects.requireNonNull( ee.getAccession(), String.format( "%s does not have an accession", ee ) )
.getAccession();
- File headerFile = locateFASTQheadersForBatchInfo( accession );
+ Path headerFile = fastqHeadersDir.resolve( accession + FASTQHEADERSFILE_SUFFIX );
- if ( !headerFile.canRead() ) {
+ if ( !Files.isReadable( headerFile ) ) {
throw new IOException( "No header file for " + ee );
}
- return readFastqHeaders( accession );
+ return readFastqHeaders( headerFile );
+ }
+
+ /**
+ * Exposed for testing
+ *
+ * @param headerFile path to the header file to parse
+ * @return map of GEO id to headers, including the platform ID
+ */
+ static Map readFastqHeaders( Path headerFile ) throws IOException {
+ Map result = new HashMap<>();
+ try ( BufferedReader br = Files.newBufferedReader( headerFile ) ) {
+ String line;
+ while ( ( line = br.readLine() ) != null ) {
+
+ String[] fields = StringUtils.split( line, "\t" );
+
+ if ( fields.length < 5 ) {
+ continue;
+ }
+ String geoID = fields[0];
+ String geoPlatformID = fields[2]; // we may use this if the headers are not usable.
+ String headers = fields[4]; // this may be FAILURE (possibly more than once)
+
+ result.put( geoID, geoPlatformID + MULTIFASTQHEADER_DELIMITER + headers );
+ }
+
+ }
+
+ return result;
}
/**
@@ -396,11 +376,16 @@ private Map readFastqHeaders( ExpressionExperiment ee ) throws I
private void removeExistingBatchFactor( ExpressionExperiment ee ) {
ExperimentalDesign ed = ee.getExperimentalDesign();
+ if ( ed == null ) {
+ log.warn( ee + " does not have an experimental design, cannot remove batch factor." );
+ return;
+ }
+
ExperimentalFactor toRemove = null;
for ( ExperimentalFactor ef : ed.getExperimentalFactors() ) {
- if ( ExperimentalDesignUtils.isBatchFactor( ef ) ) {
+ if ( ExperimentFactorUtils.isBatchFactor( ef ) ) {
toRemove = ef;
break;
/*
diff --git a/gemma-core/src/main/java/ubic/gemma/core/analysis/preprocess/batcheffects/ComBat.java b/gemma-core/src/main/java/ubic/gemma/core/analysis/preprocess/batcheffects/ComBat.java
index 64a822ca6b..66011c71be 100644
--- a/gemma-core/src/main/java/ubic/gemma/core/analysis/preprocess/batcheffects/ComBat.java
+++ b/gemma-core/src/main/java/ubic/gemma/core/analysis/preprocess/batcheffects/ComBat.java
@@ -45,10 +45,10 @@
import ubic.gemma.core.util.concurrent.Executors;
import java.awt.*;
-import java.io.File;
-import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStream;
+import java.nio.file.Files;
+import java.nio.file.Path;
import java.util.*;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ExecutorService;
@@ -154,15 +154,15 @@ public void plot( String filePrefix ) {
ghtheoryT.fill( n );
}
XYSeries ghtheory = ghtheoryT.plot();
- File tmpfile;
+ Path tmpfile;
try {
- tmpfile = File.createTempFile( filePrefix + ".gammahat.histogram.", ".png" );
+ tmpfile = Files.createTempFile( filePrefix + ".gammahat.histogram.", ".png" );
ComBat.log.info( tmpfile );
} catch ( IOException e ) {
throw new RuntimeException( e );
}
- try ( OutputStream os = new FileOutputStream( tmpfile ) ) {
+ try ( OutputStream os = Files.newOutputStream( tmpfile ) ) {
this.writePlot( os, ghplot, ghtheory );
/*
@@ -181,10 +181,10 @@ public void plot( String filePrefix ) {
}
XYSeries dhtheory = deltaHatT.plot();
- tmpfile = File.createTempFile( filePrefix + ".deltahat.histogram.", ".png" );
+ tmpfile = Files.createTempFile( filePrefix + ".deltahat.histogram.", ".png" );
ComBat.log.info( tmpfile );
- try ( OutputStream os2 = new FileOutputStream( tmpfile ) ) {
+ try ( OutputStream os2 = Files.newOutputStream( tmpfile ) ) {
this.writePlot( os2, dhplot, dhtheory );
}
} catch ( IOException e ) {
@@ -586,9 +586,9 @@ private DoubleMatrix2D getBatchData( DoubleMatrix2D sdata, String batchId ) {
private DoubleMatrix2D getBatchDesign( String batchId ) {
Collection sampleNames = batches.get( batchId );
- DoubleMatrix2D result = new DenseDoubleMatrix2D( sampleNames.size(), batches.keySet().size() );
+ DoubleMatrix2D result = new DenseDoubleMatrix2D( sampleNames.size(), batches.size() );
- for ( int j = 0; j < batches.keySet().size(); j++ ) {
+ for ( int j = 0; j < batches.size(); j++ ) {
int i = 0;
for ( C sname : sampleNames ) {
@@ -669,7 +669,7 @@ private void initPartA() {
}
}
- numBatches = batches.keySet().size();
+ numBatches = batches.size();
numProbes = y.rows();
}
diff --git a/gemma-core/src/main/java/ubic/gemma/core/analysis/preprocess/batcheffects/ExpressionExperimentBatchCorrectionServiceImpl.java b/gemma-core/src/main/java/ubic/gemma/core/analysis/preprocess/batcheffects/ExpressionExperimentBatchCorrectionServiceImpl.java
index bdef1f4506..81664c2847 100644
--- a/gemma-core/src/main/java/ubic/gemma/core/analysis/preprocess/batcheffects/ExpressionExperimentBatchCorrectionServiceImpl.java
+++ b/gemma-core/src/main/java/ubic/gemma/core/analysis/preprocess/batcheffects/ExpressionExperimentBatchCorrectionServiceImpl.java
@@ -43,8 +43,8 @@
import java.util.*;
import java.util.stream.Collectors;
+import static ubic.gemma.core.analysis.expression.diff.DiffExAnalyzerUtils.buildDesignMatrix;
import static ubic.gemma.core.analysis.preprocess.batcheffects.BatchEffectUtils.getBatchEffectType;
-import static ubic.gemma.model.expression.experiment.ExperimentalDesignUtils.buildDesignMatrix;
/**
* Methods for correcting batch effects.
@@ -260,7 +260,7 @@ public ExperimentalFactor getBatchFactor( ExpressionExperiment ee ) {
ExperimentalFactor batch = null;
for ( ExperimentalFactor ef : ee.getExperimentalDesign().getExperimentalFactors() ) {
- if ( ExperimentalDesignUtils.isBatchFactor( ef ) ) {
+ if ( ExperimentFactorUtils.isBatchFactor( ef ) ) {
batch = ef;
break;
}
diff --git a/gemma-core/src/main/java/ubic/gemma/core/analysis/preprocess/batcheffects/ExpressionExperimentBatchInformationServiceImpl.java b/gemma-core/src/main/java/ubic/gemma/core/analysis/preprocess/batcheffects/ExpressionExperimentBatchInformationServiceImpl.java
index 965b0ea364..0d9a94bbd3 100644
--- a/gemma-core/src/main/java/ubic/gemma/core/analysis/preprocess/batcheffects/ExpressionExperimentBatchInformationServiceImpl.java
+++ b/gemma-core/src/main/java/ubic/gemma/core/analysis/preprocess/batcheffects/ExpressionExperimentBatchInformationServiceImpl.java
@@ -10,10 +10,7 @@
import ubic.gemma.model.common.auditAndSecurity.AuditEvent;
import ubic.gemma.model.common.auditAndSecurity.eventType.*;
import ubic.gemma.model.common.quantitationtype.QuantitationType;
-import ubic.gemma.model.expression.experiment.ExperimentalDesignUtils;
-import ubic.gemma.model.expression.experiment.ExperimentalFactor;
-import ubic.gemma.model.expression.experiment.ExpressionExperiment;
-import ubic.gemma.model.expression.experiment.ExpressionExperimentSubSet;
+import ubic.gemma.model.expression.experiment.*;
import ubic.gemma.persistence.service.common.auditAndSecurity.AuditEventService;
import ubic.gemma.persistence.service.expression.experiment.ExpressionExperimentService;
@@ -218,7 +215,7 @@ public BatchEffectDetails getBatchEffectDetails( ExpressionExperiment ee ) {
ExperimentalFactor ef = ee.getExperimentalDesign().getExperimentalFactors()
.stream()
- .filter( ExperimentalDesignUtils::isBatchFactor )
+ .filter( ExperimentFactorUtils::isBatchFactor )
.findFirst()
.orElse( null );
@@ -307,7 +304,7 @@ private boolean hasBatchFactor( ExpressionExperiment ee ) {
ee = expressionExperimentService.thawLiter( ee );
if ( ee.getExperimentalDesign() != null ) {
for ( ExperimentalFactor ef : ee.getExperimentalDesign().getExperimentalFactors() ) {
- if ( ExperimentalDesignUtils.isBatchFactor( ef ) ) {
+ if ( ExperimentFactorUtils.isBatchFactor( ef ) ) {
return true;
}
}
diff --git a/gemma-core/src/main/java/ubic/gemma/core/analysis/preprocess/convert/QuantitationTypeConversionUtils.java b/gemma-core/src/main/java/ubic/gemma/core/analysis/preprocess/convert/QuantitationTypeConversionUtils.java
index 19fd24a208..2fc4900e3a 100644
--- a/gemma-core/src/main/java/ubic/gemma/core/analysis/preprocess/convert/QuantitationTypeConversionUtils.java
+++ b/gemma-core/src/main/java/ubic/gemma/core/analysis/preprocess/convert/QuantitationTypeConversionUtils.java
@@ -22,7 +22,6 @@
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.springframework.beans.BeanUtils;
-import org.springframework.util.Assert;
import ubic.basecode.dataStructure.matrix.DoubleMatrix;
import ubic.basecode.math.MatrixStats;
import ubic.gemma.core.analysis.preprocess.detect.InferredQuantitationMismatchException;
@@ -37,10 +36,8 @@
import ubic.gemma.model.expression.designElement.CompositeSequence;
import javax.annotation.CheckReturnValue;
-import javax.annotation.Nonnull;
import java.beans.PropertyDescriptor;
import java.util.*;
-import java.util.function.BiConsumer;
import java.util.function.Function;
import java.util.stream.Collectors;
@@ -75,7 +72,7 @@ public class QuantitationTypeConversionUtils {
*
* @param dmatrix matrix
* @return ee data double matrix
- * @throws QuantitationTypeDetectionException if data cannot be converted to log2 scale
+ * @throws QuantitationTypeConversionException if data cannot be converted to log2 scale
*/
public static ExpressionDataDoubleMatrix filterAndLog2Transform( ExpressionDataDoubleMatrix dmatrix ) throws QuantitationTypeConversionException {
dmatrix = QuantitationTypeConversionUtils.ensureLog2Scale( dmatrix );
@@ -125,13 +122,14 @@ public static ExpressionDataDoubleMatrix filterAndLog2Transform( ExpressionDataD
*/
@CheckReturnValue
public static ExpressionDataDoubleMatrix ensureLog2Scale( ExpressionDataDoubleMatrix dmatrix, boolean ignoreQuantitationMismatch ) throws QuantitationTypeDetectionException, QuantitationTypeConversionException {
- QuantitationType quantitationType = dmatrix.getQuantitationTypes().iterator().next();
- if ( quantitationType == null ) {
+ QuantitationType quantitationType;
+ if ( dmatrix.getQuantitationTypes().size() > 1 ) {
+ quantitationType = QuantitationTypeUtils.mergeQuantitationTypes( dmatrix.getQuantitationTypes() );
+ } else if ( !dmatrix.getQuantitationTypes().isEmpty() ) {
+ quantitationType = dmatrix.getQuantitationTypes().iterator().next();
+ } else {
throw new IllegalArgumentException( "Expression data matrix lacks a quantitation type." );
}
- if ( isHeterogeneous( dmatrix ) ) {
- throw new IllegalArgumentException( "Transforming a dataset to log2 scale with mixed quantitation types is not supported." );
- }
if ( quantitationType.getGeneralType() != GeneralType.QUANTITATIVE ) {
throw new IllegalArgumentException( "Only quantitative data is supported on a log2 scale." );
}
@@ -232,6 +230,7 @@ public static ExpressionDataDoubleMatrix ensureLog2Scale( ExpressionDataDoubleMa
.peek( qt -> {
qt.setType( finalType );
qt.setScale( ScaleType.LOG2 );
+ QuantitationTypeUtils.appendToDescription( qt, "Data was converted from " + quantitationType.getScale() + " to " + ScaleType.LOG2 + "." );
} )
.collect( Collectors.toList() );
@@ -262,69 +261,6 @@ public static ExpressionDataDoubleMatrix ensureLog2Scale( ExpressionDataDoubleMa
}
}
- /**
- * Check if an expression data matrix has heterogeneous quantitations.
- *
- * This happens when data from multiple platforms are mixed together. If the data is transformed in the same way,
- * it's generally okay to mix them together.
- */
- private static boolean isHeterogeneous( ExpressionDataDoubleMatrix expressionData ) {
- QuantitationType firstQt = expressionData.getQuantitationTypes().iterator().next();
- if ( firstQt == null ) {
- throw new IllegalArgumentException( "At least one quantitation type is needed." );
- }
- for ( QuantitationType qt : expressionData.getQuantitationTypes() ) {
- if ( qt.getRepresentation() != firstQt.getRepresentation()
- || qt.getGeneralType() != firstQt.getGeneralType()
- || qt.getType() != firstQt.getType()
- || qt.getScale() != firstQt.getScale()
- || qt.getIsNormalized() != firstQt.getIsNormalized()
- || qt.getIsBackground() != firstQt.getIsBackground()
- || qt.getIsBackgroundSubtracted() != firstQt.getIsBackgroundSubtracted()
- || qt.getIsBatchCorrected() != firstQt.getIsBatchCorrected() ) {
- return true;
- }
- }
- return false;
- }
-
- /**
- * Merge a given collection of quantitation types.
- *
- * @throws IllegalStateException if the QTs are incompatible
- */
- public static QuantitationType mergeQuantitationTypes( Collection quantitationTypes ) {
- Assert.isTrue( quantitationTypes.size() > 1, "Two or more quantitation types are needed for merging." );
- QuantitationType qt = new QuantitationType();
- qt.setName( "Merged from " + quantitationTypes.size() + " quantitation types" );
- qt.setDescription( quantitationTypes.stream().map( QuantitationType::toString ).collect( Collectors.joining( "\n" ) ) );
- qt.setGeneralType( getUniqueQuantitationTypeField( quantitationTypes, QuantitationType::getGeneralType ) );
- qt.setType( getUniqueQuantitationTypeField( quantitationTypes, QuantitationType::getType ) );
- qt.setScale( getUniqueQuantitationTypeField( quantitationTypes, QuantitationType::getScale ) );
- qt.setRepresentation( getUniqueQuantitationTypeField( quantitationTypes, QuantitationType::getRepresentation ) );
- qt.setIsRatio( getUniqueQuantitationTypeField( quantitationTypes, QuantitationType::getIsRatio ) );
- qt.setIsRecomputedFromRawData( quantitationTypes.stream().allMatch( QuantitationType::getIsRecomputedFromRawData ) );
- if ( quantitationTypes.stream().anyMatch( QuantitationType::getIsNormalized ) ) {
- throw new IllegalStateException( "One more quantitation types were normalized, cannot merge them. Use getQuantitationTypes() instead." );
- }
- if ( quantitationTypes.stream().anyMatch( QuantitationType::getIsBatchCorrected ) ) {
- throw new IllegalStateException( "One more quantitation types were batch-corrected, cannot merge them. Use getQuantitationTypes() instead." );
- }
- // TODO: background, backgroundSubtracted?
- return qt;
- }
-
- private static S getUniqueQuantitationTypeField( Collection quantitationTypes, Function a ) {
- Set uv = quantitationTypes.stream()
- .map( a )
- .collect( Collectors.toSet() );
- if ( uv.size() > 1 ) {
- throw new IllegalStateException( "There is more than one quantitation type in this matrix, use getQuantitationTypes() instead." );
- } else {
- return uv.iterator().next();
- }
- }
-
/**
* Convert a collection of vectors.
*
@@ -333,7 +269,7 @@ private static S getUniqueQuantitationTypeField( Collection Collection convertVectors( Collection vectors, Function createQtFunc, BiConsumer doToVector, Class vectorType ) {
+ static Collection convertVectors( Collection vectors, Function createQtFunc, DoToVectorFunction doToVector, Class vectorType ) throws QuantitationTypeConversionException {
ArrayList result = new ArrayList<>( vectors.size() );
Map convertedQts = new HashMap<>();
String[] ignoredProperties = getDataVectorIgnoredProperties( vectorType );
@@ -344,22 +280,32 @@ public static Collection convertVectors( Collection
return result;
}
-
/**
* Convert a single vector.
*/
- public static T convertVector( T vector, Function createQtFunc, BiConsumer doToVector, Class vectorType ) {
+ static T convertVector( T vector, Function createQtFunc, DoToVectorFunction doToVector, Class vectorType ) throws QuantitationTypeConversionException {
return createVector( vector, vectorType, createQtFunc.apply( vector.getQuantitationType() ), doToVector, getDataVectorIgnoredProperties( vectorType ) );
}
- private static T createVector( T vector, Class vectorType, QuantitationType convertedQt, BiConsumer doToVector, String[] ignoredProperties ) {
+ private static T createVector( T vector, Class vectorType, QuantitationType convertedQt, DoToVectorFunction doToVector, String[] ignoredProperties ) throws QuantitationTypeConversionException {
T convertedVector = BeanUtils.instantiate( vectorType );
BeanUtils.copyProperties( vector, convertedVector, ignoredProperties );
convertedVector.setQuantitationType( convertedQt );
- doToVector.accept( convertedVector, vector );
+ doToVector.doToVector( convertedVector, vector );
return convertedVector;
}
+ interface DoToVectorFunction {
+
+ /**
+ *
+ * @param convertedVector the new vector being converted with the converted {@link QuantitationType}
+ * @param vector the original vector
+ * @throws QuantitationTypeConversionException
+ */
+ void doToVector( T convertedVector, T vector ) throws QuantitationTypeConversionException;
+ }
+
/**
* List of properties to copy over when converting a vector to a different QT.
*/
@@ -373,29 +319,4 @@ private static String[] getDataVectorIgnoredProperties( Class extends DataVect
return ignoredPropertiesList.toArray( new String[0] );
}
- /**
- * Obtain the default to use for a given quantitation type if no value was provided.
- */
- @Nonnull
- public static Object getDefaultValue( QuantitationType quantitationType ) {
- PrimitiveType pt = quantitationType.getRepresentation();
- switch ( pt ) {
- case DOUBLE:
- return Double.NaN;
- case FLOAT:
- return Float.NaN;
- case STRING:
- return "";
- case CHAR:
- return ( char ) 0;
- case INT:
- return 0;
- case LONG:
- return 0L;
- case BOOLEAN:
- return false;
- default:
- throw new UnsupportedOperationException( "Missing values in data vectors of type " + quantitationType + " is not supported." );
- }
- }
}
diff --git a/gemma-core/src/main/java/ubic/gemma/core/analysis/preprocess/convert/RepresentationConversionUtils.java b/gemma-core/src/main/java/ubic/gemma/core/analysis/preprocess/convert/RepresentationConversionUtils.java
index d4aebd2872..f861722160 100644
--- a/gemma-core/src/main/java/ubic/gemma/core/analysis/preprocess/convert/RepresentationConversionUtils.java
+++ b/gemma-core/src/main/java/ubic/gemma/core/analysis/preprocess/convert/RepresentationConversionUtils.java
@@ -1,12 +1,12 @@
package ubic.gemma.core.analysis.preprocess.convert;
-import org.apache.commons.lang3.StringUtils;
import ubic.gemma.model.common.quantitationtype.PrimitiveType;
import ubic.gemma.model.common.quantitationtype.QuantitationType;
import ubic.gemma.model.expression.bioAssayData.DataVector;
import java.util.Collection;
+import static ubic.gemma.model.common.quantitationtype.QuantitationTypeUtils.appendToDescription;
import static ubic.gemma.persistence.util.ByteArrayUtils.doubleArrayToBytes;
/**
@@ -18,25 +18,18 @@ public class RepresentationConversionUtils {
/**
* Convert a collection of vectors to a desired representation.
*/
- public static Collection convertVectors( Collection vectors, PrimitiveType toRepresentation, Class vectorType ) {
+ public static Collection convertVectors( Collection vectors, PrimitiveType toRepresentation, Class vectorType ) throws QuantitationTypeConversionException {
return QuantitationTypeConversionUtils.convertVectors( vectors, qt -> getConvertedQuantitationType( qt, toRepresentation ), ( vec, origVec ) -> vec.setData( convertData( origVec, toRepresentation ) ), vectorType );
}
private static QuantitationType getConvertedQuantitationType( QuantitationType qt, PrimitiveType toRepresentation ) {
QuantitationType quantitationType = QuantitationType.Factory.newInstance( qt );
- String description;
- if ( StringUtils.isNotBlank( qt.getDescription() ) ) {
- description = StringUtils.appendIfMissing( StringUtils.strip( qt.getDescription() ), "." ) + " ";
- } else {
- description = "";
- }
- description += "Data was converted from " + qt.getRepresentation() + " to " + toRepresentation + ".";
- quantitationType.setDescription( description );
+ appendToDescription( quantitationType, "Data was converted from " + qt.getRepresentation() + " to " + toRepresentation + "." );
quantitationType.setRepresentation( toRepresentation );
return quantitationType;
}
- private static byte[] convertData( DataVector vector, PrimitiveType to ) {
+ private static byte[] convertData( DataVector vector, PrimitiveType to ) throws UnsupportedQuantitationRepresentationConversionException {
PrimitiveType from = vector.getQuantitationType().getRepresentation();
if ( from == to ) {
return vector.getData();
@@ -57,15 +50,15 @@ private static byte[] convertData( DataVector vector, PrimitiveType to ) {
case STRING:
return convertFromString( vector.getDataAsStrings(), to );
default:
- throw unsupportedConversion( from, to );
+ throw new UnsupportedQuantitationRepresentationConversionException( from, to );
}
}
- private static byte[] convertFromDouble( double[] dataAsDoubles, PrimitiveType to ) {
- throw unsupportedConversion( PrimitiveType.DOUBLE, to );
+ private static byte[] convertFromDouble( double[] dataAsDoubles, PrimitiveType to ) throws UnsupportedQuantitationRepresentationConversionException {
+ throw new UnsupportedQuantitationRepresentationConversionException( PrimitiveType.DOUBLE, to );
}
- private static byte[] convertFromFloat( float[] dataAsFloats, PrimitiveType to ) {
+ private static byte[] convertFromFloat( float[] dataAsFloats, PrimitiveType to ) throws UnsupportedQuantitationRepresentationConversionException {
if ( to == PrimitiveType.DOUBLE ) {
double[] result = new double[dataAsFloats.length];
for ( int i = 0; i < dataAsFloats.length; i++ ) {
@@ -73,10 +66,10 @@ private static byte[] convertFromFloat( float[] dataAsFloats, PrimitiveType to )
}
return doubleArrayToBytes( result );
}
- throw unsupportedConversion( PrimitiveType.FLOAT, to );
+ throw new UnsupportedQuantitationRepresentationConversionException( PrimitiveType.FLOAT, to );
}
- private static byte[] convertFromLong( long[] dataAsLongs, PrimitiveType to ) {
+ private static byte[] convertFromLong( long[] dataAsLongs, PrimitiveType to ) throws UnsupportedQuantitationRepresentationConversionException {
if ( to == PrimitiveType.DOUBLE ) {
double[] result = new double[dataAsLongs.length];
for ( int i = 0; i < dataAsLongs.length; i++ ) {
@@ -84,10 +77,10 @@ private static byte[] convertFromLong( long[] dataAsLongs, PrimitiveType to ) {
}
return doubleArrayToBytes( result );
}
- throw unsupportedConversion( PrimitiveType.LONG, to );
+ throw new UnsupportedQuantitationRepresentationConversionException( PrimitiveType.LONG, to );
}
- private static byte[] convertFromInt( int[] dataAsInts, PrimitiveType to ) {
+ private static byte[] convertFromInt( int[] dataAsInts, PrimitiveType to ) throws UnsupportedQuantitationRepresentationConversionException {
if ( to == PrimitiveType.DOUBLE ) {
double[] result = new double[dataAsInts.length];
for ( int i = 0; i < dataAsInts.length; i++ ) {
@@ -95,18 +88,18 @@ private static byte[] convertFromInt( int[] dataAsInts, PrimitiveType to ) {
}
return doubleArrayToBytes( result );
}
- throw unsupportedConversion( PrimitiveType.INT, to );
+ throw new UnsupportedQuantitationRepresentationConversionException( PrimitiveType.INT, to );
}
- private static byte[] convertFromBoolean( boolean[] dataAsBooleans, PrimitiveType to ) {
- throw unsupportedConversion( PrimitiveType.BOOLEAN, to );
+ private static byte[] convertFromBoolean( boolean[] dataAsBooleans, PrimitiveType to ) throws UnsupportedQuantitationRepresentationConversionException {
+ throw new UnsupportedQuantitationRepresentationConversionException( PrimitiveType.BOOLEAN, to );
}
- private static byte[] convertFromChar( char[] dataAsChars, PrimitiveType to ) {
- throw unsupportedConversion( PrimitiveType.CHAR, to );
+ private static byte[] convertFromChar( char[] dataAsChars, PrimitiveType to ) throws UnsupportedQuantitationRepresentationConversionException {
+ throw new UnsupportedQuantitationRepresentationConversionException( PrimitiveType.CHAR, to );
}
- private static byte[] convertFromString( String[] dataAsStrings, PrimitiveType to ) {
+ private static byte[] convertFromString( String[] dataAsStrings, PrimitiveType to ) throws UnsupportedQuantitationRepresentationConversionException {
if ( to == PrimitiveType.DOUBLE ) {
double[] resultAsDoubles = new double[dataAsStrings.length];
for ( int i = 0; i < dataAsStrings.length; i++ ) {
@@ -118,10 +111,6 @@ private static byte[] convertFromString( String[] dataAsStrings, PrimitiveType t
}
return doubleArrayToBytes( resultAsDoubles );
}
- throw unsupportedConversion( PrimitiveType.STRING, to );
- }
-
- private static UnsupportedOperationException unsupportedConversion( PrimitiveType from, PrimitiveType to ) {
- throw new UnsupportedOperationException( "Converting data from " + from + " to " + to + " is not supported." );
+ throw new UnsupportedQuantitationRepresentationConversionException( PrimitiveType.STRING, to );
}
}
diff --git a/gemma-core/src/main/java/ubic/gemma/core/analysis/preprocess/convert/ScaleTypeConversionUtils.java b/gemma-core/src/main/java/ubic/gemma/core/analysis/preprocess/convert/ScaleTypeConversionUtils.java
index e097d6119c..718ea52c2d 100644
--- a/gemma-core/src/main/java/ubic/gemma/core/analysis/preprocess/convert/ScaleTypeConversionUtils.java
+++ b/gemma-core/src/main/java/ubic/gemma/core/analysis/preprocess/convert/ScaleTypeConversionUtils.java
@@ -1,6 +1,5 @@
package ubic.gemma.core.analysis.preprocess.convert;
-import org.apache.commons.lang3.StringUtils;
import ubic.gemma.model.common.quantitationtype.PrimitiveType;
import ubic.gemma.model.common.quantitationtype.QuantitationType;
import ubic.gemma.model.common.quantitationtype.ScaleType;
@@ -8,7 +7,8 @@
import ubic.gemma.model.expression.bioAssayData.DataVector;
import java.util.Collection;
-import java.util.function.Function;
+
+import static ubic.gemma.model.common.quantitationtype.QuantitationTypeUtils.appendToDescription;
/**
* Convert {@link DataVector} to different {@link ScaleType}.
@@ -23,20 +23,13 @@ public class ScaleTypeConversionUtils {
private static final ThreadLocal ONE_FLOAT_VALUE = ThreadLocal.withInitial( () -> new float[1] );
private static final ThreadLocal ONE_DOUBLE_VALUE = ThreadLocal.withInitial( () -> new double[1] );
- public static Collection convertVectors( Collection vectors, ScaleType toScale, Class vectorType ) {
+ public static Collection convertVectors( Collection vectors, ScaleType toScale, Class vectorType ) throws QuantitationTypeConversionException {
return QuantitationTypeConversionUtils.convertVectors( vectors, qt -> getConvertedQuantitationType( qt, toScale ), ( vec, origVec ) -> vec.setDataAsDoubles( convertData( origVec, toScale ) ), vectorType );
}
private static QuantitationType getConvertedQuantitationType( QuantitationType qt, ScaleType toScale ) {
QuantitationType quantitationType = QuantitationType.Factory.newInstance( qt );
- String description;
- if ( StringUtils.isNotBlank( qt.getDescription() ) ) {
- description = StringUtils.appendIfMissing( StringUtils.strip( qt.getDescription() ), "." ) + " ";
- } else {
- description = "";
- }
- description += "Data was converted from " + qt.getScale() + " to " + toScale + ".";
- quantitationType.setDescription( description );
+ appendToDescription( quantitationType, "Data was converted from " + qt.getScale() + " to " + toScale + "." );
quantitationType.setScale( toScale );
quantitationType.setRepresentation( PrimitiveType.DOUBLE );
return quantitationType;
@@ -48,7 +41,7 @@ private static QuantitationType getConvertedQuantitationType( QuantitationType q
* For efficiency, thread-local variables are used. Once you are done converting scalars, make sure to clean-up
* those variables with {@link #clearScalarConversionThreadLocalStorage()}.
*/
- public static double convertScalar( Number val, QuantitationType qt, ScaleType scaleType ) {
+ public static double convertScalar( Number val, QuantitationType qt, ScaleType scaleType ) throws UnsupportedQuantitationScaleConversionException {
if ( qt.getScale() == scaleType ) {
return val.doubleValue();
}
@@ -87,9 +80,9 @@ public static void clearScalarConversionThreadLocalStorage() {
* Convert a vector to the target scale.
* @param scaleType the target scale, or null to keep the original scale
* @throws IllegalArgumentException if the conversion is not possible
- * @throws UnsupportedOperationException if the conversion is not supported
+ * @throws UnsupportedQuantitationScaleConversionException if the conversion is not supported
*/
- public static double[] convertData( DataVector vec, ScaleType scaleType ) {
+ public static double[] convertData( DataVector vec, ScaleType scaleType ) throws UnsupportedQuantitationTypeConversionException {
switch ( vec.getQuantitationType().getRepresentation() ) {
case FLOAT:
return convertData( vec.getDataAsFloats(), vec.getQuantitationType(), scaleType );
@@ -100,21 +93,21 @@ public static double[] convertData( DataVector vec, ScaleType scaleType ) {
case LONG:
return convertData( vec.getDataAsLongs(), scaleType );
default:
- throw new UnsupportedOperationException( "Conversion of " + vec.getQuantitationType().getRepresentation() + " is not supported." );
+ throw new UnsupportedQuantitationTypeConversionException( "Conversion of " + vec.getQuantitationType().getRepresentation() + " to " + scaleType + " is not supported." );
}
}
/**
* Convert a vector of float data to the target scale.
*/
- public static double[] convertData( float[] vec, QuantitationType quantitationType, ScaleType scaleType ) {
+ public static double[] convertData( float[] vec, QuantitationType quantitationType, ScaleType scaleType ) throws UnsupportedQuantitationScaleConversionException {
return convertData( float2double( vec ), quantitationType.getType(), quantitationType.getScale(), scaleType );
}
/**
* Convert a vector of double data to the target scale.
*/
- public static double[] convertData( double[] vec, QuantitationType quantitationType, ScaleType scaleType ) {
+ public static double[] convertData( double[] vec, QuantitationType quantitationType, ScaleType scaleType ) throws UnsupportedQuantitationScaleConversionException {
return convertData( vec, quantitationType.getType(), quantitationType.getScale(), scaleType );
}
@@ -123,7 +116,7 @@ public static double[] convertData( double[] vec, QuantitationType quantitationT
*
* The type and scale are assumed to be counts.
*/
- public static double[] convertData( int[] vec, ScaleType scaleType ) {
+ public static double[] convertData( int[] vec, ScaleType scaleType ) throws UnsupportedQuantitationScaleConversionException {
if ( scaleType == ScaleType.LINEAR || scaleType == ScaleType.COUNT ) {
return int2double( vec );
}
@@ -143,7 +136,7 @@ public static double[] convertData( int[] vec, ScaleType scaleType ) {
}
break;
default:
- throw new UnsupportedOperationException( "Cannot rescale counting data on to a " + scaleType + " scale." );
+ throw new UnsupportedQuantitationScaleConversionException( ScaleType.COUNT, scaleType );
}
return result;
}
@@ -178,7 +171,7 @@ public static double[] convertData( long[] vec, ScaleType scaleType ) {
return result;
}
- public static double[] convertData( double[] vec, StandardQuantitationType fromType, ScaleType fromScale, ScaleType scaleType ) {
+ public static double[] convertData( double[] vec, StandardQuantitationType fromType, ScaleType fromScale, ScaleType scaleType ) throws UnsupportedQuantitationScaleConversionException {
if ( fromScale == scaleType ) {
return vec;
}
@@ -248,7 +241,7 @@ public static double[] convertData( double[] vec, StandardQuantitationType fromT
}
return unscaled;
default:
- throw new UnsupportedOperationException( "Cannot rescale data on to a " + scaleType + " scale." );
+ throw new UnsupportedQuantitationScaleConversionException( fromScale, scaleType );
}
}
diff --git a/gemma-core/src/main/java/ubic/gemma/core/analysis/preprocess/convert/UnsupportedQuantitationRepresentationConversionException.java b/gemma-core/src/main/java/ubic/gemma/core/analysis/preprocess/convert/UnsupportedQuantitationRepresentationConversionException.java
new file mode 100644
index 0000000000..c8966eaf01
--- /dev/null
+++ b/gemma-core/src/main/java/ubic/gemma/core/analysis/preprocess/convert/UnsupportedQuantitationRepresentationConversionException.java
@@ -0,0 +1,14 @@
+package ubic.gemma.core.analysis.preprocess.convert;
+
+import ubic.gemma.model.common.quantitationtype.PrimitiveType;
+
+/**
+ * Exception raised when data in a given representation cannot be converted to another representation.
+ * @author poirigui
+ */
+public class UnsupportedQuantitationRepresentationConversionException extends UnsupportedQuantitationTypeConversionException {
+
+ public UnsupportedQuantitationRepresentationConversionException( PrimitiveType from, PrimitiveType to ) {
+ super( "Converting data from " + from + " to " + to + " is not supported." );
+ }
+}
diff --git a/gemma-core/src/main/java/ubic/gemma/core/analysis/preprocess/convert/UnsupportedQuantitationScaleConversionException.java b/gemma-core/src/main/java/ubic/gemma/core/analysis/preprocess/convert/UnsupportedQuantitationScaleConversionException.java
index ae9e4f0043..ab5206ea66 100644
--- a/gemma-core/src/main/java/ubic/gemma/core/analysis/preprocess/convert/UnsupportedQuantitationScaleConversionException.java
+++ b/gemma-core/src/main/java/ubic/gemma/core/analysis/preprocess/convert/UnsupportedQuantitationScaleConversionException.java
@@ -26,7 +26,7 @@
*
* @author ptan
*/
-public class UnsupportedQuantitationScaleConversionException extends QuantitationTypeConversionException {
+public class UnsupportedQuantitationScaleConversionException extends UnsupportedQuantitationTypeConversionException {
public UnsupportedQuantitationScaleConversionException( ScaleType sourceScaleType, ScaleType targetScaleType ) {
super( String.format( "Cannot transform data from %s to %s scale.", sourceScaleType, targetScaleType ) );
diff --git a/gemma-core/src/main/java/ubic/gemma/core/analysis/preprocess/convert/UnsupportedQuantitationTypeConversionException.java b/gemma-core/src/main/java/ubic/gemma/core/analysis/preprocess/convert/UnsupportedQuantitationTypeConversionException.java
new file mode 100644
index 0000000000..adbef5b8de
--- /dev/null
+++ b/gemma-core/src/main/java/ubic/gemma/core/analysis/preprocess/convert/UnsupportedQuantitationTypeConversionException.java
@@ -0,0 +1,12 @@
+package ubic.gemma.core.analysis.preprocess.convert;
+
+/**
+ * Exception raised when data from a given quantitation type cannot be converted to another quantitation type.
+ * @author poirigui
+ */
+public class UnsupportedQuantitationTypeConversionException extends QuantitationTypeConversionException {
+
+ public UnsupportedQuantitationTypeConversionException( String message ) {
+ super( message );
+ }
+}
diff --git a/gemma-core/src/main/java/ubic/gemma/core/analysis/preprocess/svd/SVDServiceImpl.java b/gemma-core/src/main/java/ubic/gemma/core/analysis/preprocess/svd/SVDServiceImpl.java
index ae2e9b0a45..f1301008ee 100644
--- a/gemma-core/src/main/java/ubic/gemma/core/analysis/preprocess/svd/SVDServiceImpl.java
+++ b/gemma-core/src/main/java/ubic/gemma/core/analysis/preprocess/svd/SVDServiceImpl.java
@@ -47,7 +47,7 @@
import java.util.*;
-import static ubic.gemma.model.expression.experiment.ExperimentalDesignUtils.measurement2double;
+import static ubic.gemma.model.common.measurement.MeasurementUtils.measurement2double;
/**
* Perform SVD on expression data and store the results.
diff --git a/gemma-core/src/main/java/ubic/gemma/core/analysis/report/DatabaseViewGeneratorImpl.java b/gemma-core/src/main/java/ubic/gemma/core/analysis/report/DatabaseViewGeneratorImpl.java
index a3b6431b34..7b85781385 100644
--- a/gemma-core/src/main/java/ubic/gemma/core/analysis/report/DatabaseViewGeneratorImpl.java
+++ b/gemma-core/src/main/java/ubic/gemma/core/analysis/report/DatabaseViewGeneratorImpl.java
@@ -24,7 +24,7 @@
import org.apache.commons.logging.LogFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Component;
-import ubic.gemma.model.expression.experiment.ExperimentalDesignUtils;
+import ubic.gemma.model.expression.experiment.*;
import ubic.gemma.model.analysis.expression.diff.ContrastResult;
import ubic.gemma.model.analysis.expression.diff.DifferentialExpressionAnalysis;
import ubic.gemma.model.analysis.expression.diff.DifferentialExpressionAnalysisResult;
@@ -32,10 +32,6 @@
import ubic.gemma.model.common.description.Characteristic;
import ubic.gemma.model.expression.arrayDesign.ArrayDesign;
import ubic.gemma.model.expression.designElement.CompositeSequence;
-import ubic.gemma.model.expression.experiment.ExperimentalFactor;
-import ubic.gemma.model.expression.experiment.ExpressionExperiment;
-import ubic.gemma.model.expression.experiment.FactorValue;
-import ubic.gemma.model.expression.experiment.FactorValueUtils;
import ubic.gemma.model.genome.Gene;
import ubic.gemma.model.genome.Taxon;
import ubic.gemma.persistence.service.analysis.expression.diff.DifferentialExpressionAnalysisService;
@@ -280,7 +276,7 @@ private void generateDifferentialExpressionView( Integer limit, Collection> blatQuery( Collection sequences,
throws IOException;
/**
- * @return the blatScoreThreshold
- */
- double getBlatScoreThreshold();
-
- /**
- * @param blatScoreThreshold the blatScoreThreshold to set
+ * Set the blat score threshold to use.
+ *
+ * Defaults to {@link #DEFAULT_BLAT_SCORE_THRESHOLD}.
*/
void setBlatScoreThreshold( double blatScoreThreshold );
-
- /**
- * @return Returns the gfClientExe.
- */
- String getGfClientExe();
-
- /**
- * @return Returns the gfServerExe.
- */
- String getGfServerExe();
-
- /**
- * @return Returns the host.
- */
- String getHost();
-
- /**
- * @return Returns the humanServerPort.
- */
- int getHumanServerPort();
-
- /**
- * @return Returns the mouseServerPort.
- */
- int getMouseServerPort();
-
- /**
- * @return Returns the ratServerPort.
- */
- int getRatServerPort();
-
- /**
- * @return Returns the seqDir.
- */
- String getSeqDir();
-
- /**
- * @param genome genome
- * @return Returns the seqFiles.
- */
- String getSeqFiles( BlattableGenome genome );
-
- /**
- * @param inputStream to the Blat output file in psl format
- * @param taxon taxon
- * @return processed results.
- * @throws IOException when there are IO problems.
- */
- List processPsl( InputStream inputStream, Taxon taxon ) throws IOException;
-
- /**
- * Start the server, if the port isn't already being used. If the port is in use, we assume it is a gfServer.
- *
- * @param genome genome
- * @param port port
- * @throws IOException when there are IO problems.
- */
- void startServer( BlattableGenome genome, int port ) throws IOException;
-
- /**
- * Stop the gfServer, if it was started by this.
- *
- * @param port port
- */
- void stopServer( int port );
-
}
\ No newline at end of file
diff --git a/gemma-core/src/main/java/ubic/gemma/core/analysis/sequence/RepeatScan.java b/gemma-core/src/main/java/ubic/gemma/core/analysis/sequence/RepeatScan.java
index 0a70f35733..4a79e54953 100644
--- a/gemma-core/src/main/java/ubic/gemma/core/analysis/sequence/RepeatScan.java
+++ b/gemma-core/src/main/java/ubic/gemma/core/analysis/sequence/RepeatScan.java
@@ -18,33 +18,43 @@
*/
package ubic.gemma.core.analysis.sequence;
+import lombok.extern.apachecommons.CommonsLog;
+import org.apache.commons.io.IOUtils;
+import org.apache.commons.lang3.StringUtils;
import org.apache.commons.lang3.time.StopWatch;
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
import ubic.gemma.core.loader.genome.FastaParser;
import ubic.gemma.core.profiling.StopWatchUtils;
-import ubic.gemma.core.util.concurrent.GenericStreamConsumer;
+import ubic.gemma.core.util.ShellUtils;
import ubic.gemma.model.genome.Taxon;
import ubic.gemma.model.genome.biosequence.BioSequence;
-import ubic.gemma.core.config.Settings;
-import java.io.*;
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.IOException;
+import java.nio.charset.StandardCharsets;
+import java.nio.file.Files;
+import java.nio.file.Path;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
+import java.util.concurrent.TimeUnit;
/**
- * Scan sequences for repeats
+ * Scan sequences for repeats using RepeatMasker.
*
* @author pavlidis
*/
+@CommonsLog
public class RepeatScan {
- private static final String REPEAT_MASKER_CONFIG_PARAM = "repeatMasker.exe";
private static final int UPDATE_INTERVAL_MS = 1000 * 60 * 2;
- private static final Log log = LogFactory.getLog( RepeatScan.class.getName() );
- private static final String REPEAT_MASKER = Settings.getString( RepeatScan.REPEAT_MASKER_CONFIG_PARAM );
+
+ private final String repeatMaskerExe;
+
+ public RepeatScan( String repeatMaskerExe ) {
+ this.repeatMaskerExe = repeatMaskerExe;
+ }
/**
* @param sequences sequences
@@ -52,10 +62,10 @@ public class RepeatScan {
* @return Sequences which were updated.
*/
public Collection processRepeatMaskerOutput( Collection sequences,
- String outputSequencePath ) {
+ Path outputSequencePath ) {
FastaParser parser = new FastaParser();
try {
- parser.parse( outputSequencePath );
+ parser.parse( outputSequencePath.toFile() );
} catch ( IOException e ) {
throw new RuntimeException( e );
}
@@ -101,7 +111,7 @@ public Collection processRepeatMaskerOutput( Collection processRepeatMaskerOutput( Collection repeatScan( Collection sequences ) {
try {
- if ( sequences.size() == 0 ) {
+ if ( sequences.isEmpty() ) {
RepeatScan.log.warn( "No sequences to test" );
return sequences;
}
- File querySequenceFile = File.createTempFile( "repmask", ".fa" );
- SequenceWriter.writeSequencesToFile( sequences, querySequenceFile );
+ Path querySequenceFile = Files.createTempFile( "repmask", ".fa" );
+ SequenceWriter.writeSequencesToFile( sequences, querySequenceFile.toFile() );
Taxon taxon = sequences.iterator().next().getTaxon();
- this.execRepeatMasker( querySequenceFile, taxon );
-
- final String outputSequencePath = querySequenceFile.getParent() + File.separatorChar + querySequenceFile.getName() + ".masked";
+ Path outputSequencePath = this.execRepeatMasker( querySequenceFile, taxon );
// final String outputScorePath = querySequenceFile.getParent() + File.separatorChar
// + querySequenceFile.getName() + ".masked";
- File output = new File( outputSequencePath );
- if ( !output.exists() ) {
+ if ( !Files.exists( outputSequencePath ) ) {
this.handleNoOutputCondition( querySequenceFile, outputSequencePath );
return new HashSet<>();
}
@@ -141,94 +148,77 @@ public Collection repeatScan( Collection sequences ) {
double computeFractionMasked( BioSequence maskedSeq ) {
// count fraction of masked bases.
int origLength = maskedSeq.getSequence().length();
- int unmaskedBases = maskedSeq.getSequence().replaceAll( "[a-z]", "" ).length();
-
- return ( origLength - unmaskedBases ) / ( double ) origLength;
- }
-
- private void checkForExe() {
- if ( RepeatScan.REPEAT_MASKER == null ) {
- throw new IllegalStateException( "Repeatmasker executable could not be found. Make sure you correctly set "
- + RepeatScan.REPEAT_MASKER_CONFIG_PARAM );
+ int masked = 0;
+ for ( char c : maskedSeq.getSequence().toCharArray() ) {
+ if ( Character.isLowerCase( c ) ) {
+ masked++;
+ }
}
+ return ( double ) masked / ( double ) origLength;
}
/**
- * Run repeatmasker using a call to exec().
+ * Run RepeatMasker using a call to exec().
*
* @param querySequenceFile file
- * @param taxon taxon
+ * @param taxon taxon
+ * @return
*/
- private void execRepeatMasker( File querySequenceFile, Taxon taxon ) throws IOException {
-
- this.checkForExe();
-
- final String cmd = RepeatScan.REPEAT_MASKER + " -parallel 8 -xsmall -species " + taxon.getCommonName() + " "
- + querySequenceFile.getAbsolutePath();// FIXME use -dir option to put output where we want; see https://github.com/PavlidisLab/Gemma/issues/53;
- RepeatScan.log.info( "Running repeatmasker like this: " + cmd );
-
- final Process run = Runtime.getRuntime().exec( cmd );
-
- // to ensure that we aren't left waiting for these streams
- GenericStreamConsumer gscErr = new GenericStreamConsumer( run.getErrorStream() );
- GenericStreamConsumer gscIn = new GenericStreamConsumer( run.getInputStream() );
- gscErr.start();
- gscIn.start();
-
+ private Path execRepeatMasker( Path querySequenceFile, Taxon taxon ) throws IOException {
+ String[] cmd = new String[] { repeatMaskerExe, "-parallel", "8", "-xsmall",
+ "-species", taxon.getCommonName(),
+ // FIXME use -dir option to put output where we want; see https://github.com/PavlidisLab/Gemma/issues/53;
+ querySequenceFile.toString() };
+ RepeatScan.log.info( "Running RepeatMasker like this: " + ShellUtils.join( cmd ) );
+
+ final Process run = new ProcessBuilder( cmd )
+ // to ensure that we aren't left waiting for these streams
+ // TODO: switch to Redirect.DISCARD for Java 9+
+ .redirectOutput( ProcessBuilder.Redirect.appendTo( new File( "/dev/null" ) ) )
+ .redirectError( ProcessBuilder.Redirect.PIPE )
+ .start();
+
+ // wait...
+ StopWatch overallWatch = StopWatch.createStarted();
try {
-
- int exitVal = Integer.MIN_VALUE;
-
- // wait...
- StopWatch overallWatch = new StopWatch();
- overallWatch.start();
-
- while ( exitVal == Integer.MIN_VALUE ) {
- try {
- exitVal = run.exitValue();
- } catch ( IllegalThreadStateException e ) {
- // okay, still waiting.
- }
- Thread.sleep( RepeatScan.UPDATE_INTERVAL_MS );
+ while ( !run.waitFor( RepeatScan.UPDATE_INTERVAL_MS, TimeUnit.MILLISECONDS ) ) {
String minutes = StopWatchUtils.getMinutesElapsed( overallWatch );
- RepeatScan.log.info( "Repeatmasker: " + minutes + " minutes elapsed" );
+ RepeatScan.log.info( "RepeatMasker: " + minutes + " minutes elapsed" );
}
-
- overallWatch.stop();
- String minutes = StopWatchUtils.getMinutesElapsed( overallWatch );
- RepeatScan.log.info( "Repeatmasker took a total of " + minutes + " minutes" );
-
- // int exitVal = run.waitFor();
-
- RepeatScan.log.debug( "Repeatmasker exit value=" + exitVal );
} catch ( InterruptedException e ) {
+ Thread.currentThread().interrupt();
throw new RuntimeException( e );
}
- RepeatScan.log.debug( "Repeatmasker Success" );
+ int exitVal = run.exitValue();
+ if ( exitVal != 0 ) {
+ String errorMessage = StringUtils.strip( IOUtils.toString( run.getErrorStream(), StandardCharsets.UTF_8 ) );
+ throw new RuntimeException( "RepeatMasker failed with exit value " + exitVal + ":\n" + errorMessage );
+ }
+
+ overallWatch.stop();
+ String minutes = StopWatchUtils.getMinutesElapsed( overallWatch );
+ RepeatScan.log.info( "RepeatMasker took a total of " + minutes + " minutes" );
+ RepeatScan.log.debug( "RepeatMasker Success" );
+ return querySequenceFile.resolveSibling( querySequenceFile.getFileName().toString() + ".masked" );
}
- private void handleNoOutputCondition( File querySequenceFile, final String outputSequencePath ) throws IOException {
+ private void handleNoOutputCondition( Path querySequenceFile, Path outputSequencePath ) throws IOException {
// this happens if there were no repeats to mask. Check to make sure.
- final String outputSummary = querySequenceFile.getParent() + File.separatorChar + querySequenceFile.getName() + ".out";
- if ( !( new File( outputSummary ) ).exists() ) {
+ final Path outputSummary = querySequenceFile.resolveSibling( querySequenceFile.getFileName() + ".out" );
+ if ( !Files.exists( outputSummary ) ) {
// okay, something is wrong for sure.
- throw new RuntimeException(
- "Repeatmasker seems to have failed, it left no useful output (looking for " + outputSequencePath
- + " or " + outputSummary );
+ throw new RuntimeException( String.format( "RepeatMasker seems to have failed, it left no useful output (looking for %s or %s)",
+ outputSequencePath, outputSummary ) );
}
- InputStream is = new FileInputStream( outputSummary );
- try (BufferedReader br = new BufferedReader( new InputStreamReader( is ) )) {
+ try ( BufferedReader br = Files.newBufferedReader( outputSummary ) ) {
String nothingFound = "There were no repetitive sequences detected";
String line = br.readLine();
if ( line == null || line.startsWith( nothingFound ) ) {
RepeatScan.log.info( "There were no repeats found" );
} else {
- RepeatScan.log
- .warn( "Something might have gone wrong with repeatmasking. The output file reads: " + line );
+ RepeatScan.log.warn( "Something might have gone wrong with RepeatMasker. The output file reads: " + line );
}
}
-
}
-
}
diff --git a/gemma-core/src/main/java/ubic/gemma/core/analysis/sequence/ShellDelegatingBlat.java b/gemma-core/src/main/java/ubic/gemma/core/analysis/sequence/ShellDelegatingBlat.java
index 51cf727ff8..b36bf8a8a0 100644
--- a/gemma-core/src/main/java/ubic/gemma/core/analysis/sequence/ShellDelegatingBlat.java
+++ b/gemma-core/src/main/java/ubic/gemma/core/analysis/sequence/ShellDelegatingBlat.java
@@ -18,40 +18,52 @@
*/
package ubic.gemma.core.analysis.sequence;
-import org.apache.commons.configuration2.ex.ConfigurationException;
+import lombok.Getter;
+import lombok.Setter;
+import lombok.extern.apachecommons.CommonsLog;
+import org.apache.commons.io.IOUtils;
+import org.apache.commons.lang3.ArrayUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.lang3.time.StopWatch;
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
+import org.springframework.util.Assert;
import ubic.gemma.core.config.Settings;
import ubic.gemma.core.loader.genome.BlatResultParser;
import ubic.gemma.core.profiling.StopWatchUtils;
-import ubic.gemma.core.util.concurrent.Executors;
-import ubic.gemma.core.util.concurrent.GenericStreamConsumer;
+import ubic.gemma.core.util.ShellUtils;
import ubic.gemma.model.common.description.DatabaseType;
import ubic.gemma.model.common.description.ExternalDatabase;
import ubic.gemma.model.genome.Taxon;
import ubic.gemma.model.genome.biosequence.BioSequence;
import ubic.gemma.model.genome.sequenceAnalysis.BlatResult;
-import java.io.*;
+import javax.annotation.Nullable;
+import java.io.BufferedWriter;
+import java.io.File;
+import java.io.IOException;
+import java.io.InputStream;
import java.net.Socket;
-import java.net.UnknownHostException;
+import java.nio.charset.StandardCharsets;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.Paths;
import java.text.DecimalFormat;
import java.text.NumberFormat;
import java.util.*;
-import java.util.concurrent.*;
+import java.util.concurrent.TimeUnit;
/**
* Class to manage the gfServer and run BLAT searches. Delegates to the command-line shell to run blat.
*
* @author pavlidis
*/
-@SuppressWarnings("unused") // Possible external use
+@Getter
+@CommonsLog
public class ShellDelegatingBlat implements Blat {
+ /**
+ * Interval in milliseconds to report on BLAT progress by peeking at its output file.
+ */
private static final int BLAT_UPDATE_INTERVAL_MS = 1000 * 30;
- private static final Log log = LogFactory.getLog( ShellDelegatingBlat.class );
/**
* Minimum alignment length for retention.
*/
@@ -60,45 +72,61 @@ public class ShellDelegatingBlat implements Blat {
* Strings of As or Ts at the start or end of a sequence longer than this will be stripped off prior to analysis.
*/
private static final int POLY_AT_THRESHOLD = 5;
- private static final String os = System.getProperty( "os.name" ).toLowerCase();
- private double blatScoreThreshold = Blat.DEFAULT_BLAT_SCORE_THRESHOLD;
- private boolean doShutdown = true;
+
+ private static final int STEPSIZE = 7;
+
// typical values.
- private String gfClientExe = "/cygdrive/c/cygwin/usr/local/bin/gfClient.exe";
- private String gfServerExe = "/cygdrive/c/cygwin/usr/local/bin/gfServer.exe";
- private String host = "localhost";
- private int humanSensitiveServerPort;
- private String humanSeqFiles;
- private int humanServerPort;
- private int mouseSensitiveServerPort;
- private String mouseSeqFiles;
- private int mouseServerPort;
- private int ratSensitiveServerPort;
- private String ratSeqFiles;
- private int ratServerPort;
- private String seqDir = "/";
+ private final String gfClientExe;
+ private final String gfServerExe;
+ private final String host;
+ private final int humanSensitiveServerPort;
+ private final String[] humanSeqFiles;
+ private final int humanServerPort;
+ private final int mouseSensitiveServerPort;
+ private final String[] mouseSeqFiles;
+ private final int mouseServerPort;
+ private final int ratSensitiveServerPort;
+ private final String[] ratSeqFiles;
+ private final int ratServerPort;
+ private final Path seqDir;
+ private final Path tmpDir;
+
+ @Setter
+ private double blatScoreThreshold = Blat.DEFAULT_BLAT_SCORE_THRESHOLD;
+
+ @Nullable
private Process serverProcess;
+ private String serverHost;
+ private int serverPort;
/**
* Create a blat object with settings read from the config file.
*/
public ShellDelegatingBlat() {
- try {
- this.init();
- } catch ( ConfigurationException e ) {
- throw new RuntimeException( "Could not load configuration", e );
+ ShellDelegatingBlat.log.debug( "Reading global config" );
+ this.humanServerPort = Settings.getInt( "gfClient.humanServerPort" );
+ this.mouseServerPort = Settings.getInt( "gfClient.mouseServerPort" );
+ this.ratServerPort = Settings.getInt( "gfClient.ratServerPort" );
+ this.humanSensitiveServerPort = Settings.getInt( "gfClient.sensitive.humanServerPort" );
+ this.mouseSensitiveServerPort = Settings.getInt( "gfClient.sensitive.mouseServerPort" );
+ this.ratSensitiveServerPort = Settings.getInt( "gfClient.sensitive.ratServerPort" );
+ this.host = Settings.getString( "gfClient.host" );
+ this.seqDir = Paths.get( Settings.getString( "gfClient.seqDir" ) );
+ this.tmpDir = Paths.get( Settings.getDownloadPath() );
+ this.mouseSeqFiles = Settings.getStringArray( "gfClient.mouse.seqFiles" );
+ this.ratSeqFiles = Settings.getStringArray( "gfClient.rat.seqFiles" );
+ this.humanSeqFiles = Settings.getStringArray( "gfClient.human.seqFiles" );
+ this.gfClientExe = Settings.getString( "gfClient.exe" );
+ this.gfServerExe = Settings.getString( "gfServer.exe" );
+ if ( gfServerExe == null ) {
+ /*
+ * This won't ever really work -- it's left over from earlier iterations.
+ */
+ ShellDelegatingBlat.log
+ .warn( "You will not be able to start the server: gfServer.exe is not set in config" );
}
}
- public ShellDelegatingBlat( String host, int humanServerPort, String seqDir ) {
-
- if ( host == null || humanServerPort <= 0 || seqDir == null )
- throw new IllegalArgumentException( "All values must be non-null" );
- this.host = host;
- this.humanServerPort = humanServerPort;
- this.seqDir = seqDir;
- }
-
public static ExternalDatabase getSearchedGenome( Taxon taxon ) {
BlattableGenome genome = ShellDelegatingBlat.inferBlatDatabase( taxon );
ExternalDatabase searchedDatabase = ExternalDatabase.Factory.newInstance();
@@ -109,14 +137,12 @@ public static ExternalDatabase getSearchedGenome( Taxon taxon ) {
private static BlattableGenome inferBlatDatabase( Taxon taxon ) {
assert taxon != null;
-
BlattableGenome bg;
-
- if ( taxon.getNcbiId() == 10090 || taxon.getCommonName().equals( "mouse" ) ) {
+ if ( Objects.equals( taxon.getNcbiId(), 10090 ) || Objects.equals( taxon.getCommonName(), "mouse" ) ) {
bg = BlattableGenome.MOUSE;
- } else if ( taxon.getNcbiId() == 10116 || taxon.getCommonName().equals( "rat" ) ) {
+ } else if ( Objects.equals( taxon.getNcbiId(), 10116 ) || Objects.equals( taxon.getCommonName(), "rat" ) ) {
bg = BlattableGenome.RAT;
- } else if ( taxon.getNcbiId() == 9606 || taxon.getCommonName().equals( "human" ) ) {
+ } else if ( Objects.equals( taxon.getNcbiId(), 9606 ) || Objects.equals( taxon.getCommonName(), "human" ) ) {
bg = BlattableGenome.HUMAN;
} else {
throw new UnsupportedOperationException( "Cannot determine which database to search for " + taxon );
@@ -139,18 +165,18 @@ public List blatQuery( BioSequence b, Taxon taxon, boolean sensitive
assert seqDir != null;
// write the sequence to a temporary file.
String seqName = b.getName().replaceAll( " ", "_" );
- File querySequenceFile = File.createTempFile( seqName, ".fa" );
+ Path querySequenceFile = Files.createTempFile( seqName, ".fa" );
- try ( BufferedWriter out = new BufferedWriter( new FileWriter( querySequenceFile ) ) ) {
+ try ( BufferedWriter out = Files.newBufferedWriter( querySequenceFile ) ) {
String trimmed = SequenceManipulation
.stripPolyAorT( b.getSequence(), ShellDelegatingBlat.POLY_AT_THRESHOLD );
out.write( ">" + seqName + "\n" + trimmed );
- ShellDelegatingBlat.log.info( "Wrote sequence to " + querySequenceFile.getPath() );
+ ShellDelegatingBlat.log.info( "Wrote sequence to " + querySequenceFile );
}
- String outputPath = this.getTmpPslFilePath( seqName );
+ Path outputPath = this.getTmpPslFilePath( seqName );
- List results = this
- .gfClient( querySequenceFile, outputPath, this.choosePortForQuery( taxon, sensitive ) );
+ int portToUse = this.choosePortForQuery( taxon, sensitive );
+ List results = execGfClient( querySequenceFile, outputPath, portToUse, taxon );
ExternalDatabase searchedDatabase = ShellDelegatingBlat.getSearchedGenome( taxon );
for ( BlatResult result : results ) {
@@ -167,26 +193,18 @@ public Map> blatQuery( Collection seq
Taxon taxon ) throws IOException {
Map> results = new HashMap<>();
- File querySequenceFile = File.createTempFile( "sequences-for-blat", ".fa" );
- int count = SequenceWriter.writeSequencesToFile( sequences, querySequenceFile );
+ Path querySequenceFile = Files.createTempFile( "sequences-for-blat", ".fa" );
+ int count = SequenceWriter.writeSequencesToFile( sequences, querySequenceFile.toFile() );
if ( count == 0 ) {
- if ( !querySequenceFile.delete() ) {
- throw new IOException( "Could not delete file " + querySequenceFile.getPath() );
- }
+ Files.delete( querySequenceFile );
throw new IllegalArgumentException( "No sequences!" );
}
- String outputPath = this.getTmpPslFilePath( "blat-output" );
+ Path outputPath = this.getTmpPslFilePath( "blat-output" );
- Integer port = this.choosePortForQuery( taxon, sensitive );
+ int port = this.choosePortForQuery( taxon, sensitive );
- if ( port == null ) {
- throw new IllegalStateException(
- "Could not locate port for BLAT with settings taxon=" + taxon + ", sensitive=" + sensitive
- + ", check your configuration." );
- }
-
- Collection