From f3ff5d4fcbfdb640717f274ec33c31e60e69ce11 Mon Sep 17 00:00:00 2001 From: OganM Date: Wed, 11 Jun 2025 15:00:49 -0700 Subject: [PATCH 001/129] filter categories based on their presence in the BioMaterial factorValues --- .../experiment/ExpressionExperimentController.java | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/gemma-web/src/main/java/ubic/gemma/web/controller/expression/experiment/ExpressionExperimentController.java b/gemma-web/src/main/java/ubic/gemma/web/controller/expression/experiment/ExpressionExperimentController.java index 26b28bdc1f..4b65e01c47 100644 --- a/gemma-web/src/main/java/ubic/gemma/web/controller/expression/experiment/ExpressionExperimentController.java +++ b/gemma-web/src/main/java/ubic/gemma/web/controller/expression/experiment/ExpressionExperimentController.java @@ -445,8 +445,12 @@ public Collection getDesignMatrixRows( EntityDelegat .stream() .filter( factor -> !ExperimentalDesignUtils.isBatchFactor( factor ) && factor.getType() != FactorType.CONTINUOUS - // cell type factors apply to sub-biomaterials, so they don't make sense to display - && ( factor.getCategory() == null || !CharacteristicUtils.hasCategory( factor.getCategory(), Categories.CELL_TYPE ) ) ) + && ee.getBioAssays().stream() + .map( BioAssay::getSampleUsed ) + .map( BioMaterial::getFactorValues ) + .flatMap( Collection::stream ) + .map( FactorValue::getExperimentalFactor ) + .anyMatch( factor::equals ) ) .collect( Collectors.toSet() ); CountingMap assayCount = new CountingMap<>(); From 1697ca310f5e3eac8bf7e01bf1e83822cdb3789d Mon Sep 17 00:00:00 2001 From: Guillaume Poirier-Morency Date: Fri, 20 Jun 2025 22:10:58 -0700 Subject: [PATCH 002/129] Fix typo in env.BRANCH_NAME --- .jenkins/Jenkinsfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.jenkins/Jenkinsfile b/.jenkins/Jenkinsfile index 09f04eb9d8..5ed2f36829 100644 --- a/.jenkins/Jenkinsfile +++ b/.jenkins/Jenkinsfile @@ -48,7 +48,7 @@ pipeline { gemmaVersion = sh script: 'mvn help:evaluate -Dexpression=project.version -q -DforceStdout', returnStdout: true baseCodeVersion = sh script: 'mvn help:evaluate -Dartifact=baseCode:baseCode -Dexpression=project.version -q -DforceStdout', returnStdout: true buildHash = env.GIT_COMMIT - def productionBuild = env.BRNACH_NAME == 'master' + def productionBuild = env.BRANCH_NAME == 'master' def supportBuild = env.BRANCH_NAME.startsWith('support-') def stagingBuild = env.BRANCH_NAME =~ '^' + params.STAGING_BRANCH + '-.*' if (productionBuild) { From 2140530fa8589c188702fdcb2cf797bc46b0ad74 Mon Sep 17 00:00:00 2001 From: Guillaume Poirier-Morency Date: Fri, 20 Jun 2025 23:23:06 -0700 Subject: [PATCH 003/129] Update versions for hotfix --- gemma-cli/pom.xml | 2 +- gemma-core/pom.xml | 2 +- gemma-rest/pom.xml | 2 +- gemma-web/pom.xml | 2 +- pom.xml | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/gemma-cli/pom.xml b/gemma-cli/pom.xml index 92ae875b86..96621244e9 100644 --- a/gemma-cli/pom.xml +++ b/gemma-cli/pom.xml @@ -3,7 +3,7 @@ gemma gemma - 1.32.1 + 1.32.2-SNAPSHOT 4.0.0 gemma-cli diff --git a/gemma-core/pom.xml b/gemma-core/pom.xml index bd31721930..467284ee2c 100644 --- a/gemma-core/pom.xml +++ b/gemma-core/pom.xml @@ -3,7 +3,7 @@ gemma gemma - 1.32.1 + 1.32.2-SNAPSHOT 4.0.0 gemma-core diff --git a/gemma-rest/pom.xml b/gemma-rest/pom.xml index a5c9179152..918aa4bfba 100644 --- a/gemma-rest/pom.xml +++ b/gemma-rest/pom.xml @@ -5,7 +5,7 @@ gemma gemma - 1.32.1 + 1.32.2-SNAPSHOT 4.0.0 diff --git a/gemma-web/pom.xml b/gemma-web/pom.xml index fd35605f65..0dd8ab964b 100644 --- a/gemma-web/pom.xml +++ b/gemma-web/pom.xml @@ -3,7 +3,7 @@ gemma gemma - 1.32.1 + 1.32.2-SNAPSHOT 4.0.0 gemma-web diff --git a/pom.xml b/pom.xml index bf2b539741..7e539b1ae2 100644 --- a/pom.xml +++ b/pom.xml @@ -5,7 +5,7 @@ Gemma gemma gemma - 1.32.1 + 1.32.2-SNAPSHOT 2005 The Gemma Project for meta-analysis of genomics data https://gemma.msl.ubc.ca From 55aab9a49615207a1c0c025fd9866e3cdd26be2e Mon Sep 17 00:00:00 2001 From: Guillaume Poirier-Morency Date: Sat, 21 Jun 2025 09:50:55 -0700 Subject: [PATCH 004/129] Make the GENE2CS update more robust when related logic fails Always write the ExternalDatabase update first so that it will not be ignored if the write to disk or admin email fails. Do not rethrow an exception when writing the update status to disk fails. --- .../maintenance/TableMaintenanceUtilImpl.java | 67 +++++++++++-------- .../maintenance/TableMaintenanceUtilTest.java | 14 ++-- 2 files changed, 46 insertions(+), 35 deletions(-) diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/service/maintenance/TableMaintenanceUtilImpl.java b/gemma-core/src/main/java/ubic/gemma/persistence/service/maintenance/TableMaintenanceUtilImpl.java index 703d913fc1..0dd6ade9f9 100644 --- a/gemma-core/src/main/java/ubic/gemma/persistence/service/maintenance/TableMaintenanceUtilImpl.java +++ b/gemma-core/src/main/java/ubic/gemma/persistence/service/maintenance/TableMaintenanceUtilImpl.java @@ -20,7 +20,7 @@ package ubic.gemma.persistence.service.maintenance; import io.micrometer.core.annotation.Timed; -import org.apache.commons.io.FileUtils; +import org.apache.commons.io.file.PathUtils; import org.apache.commons.lang3.time.StopWatch; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; @@ -32,7 +32,6 @@ import org.springframework.util.Assert; import ubic.gemma.core.util.MailEngine; import ubic.gemma.model.common.auditAndSecurity.AuditEvent; -import ubic.gemma.model.common.auditAndSecurity.Auditable; import ubic.gemma.model.common.auditAndSecurity.eventType.ArrayDesignGeneMappingEvent; import ubic.gemma.model.common.description.ExternalDatabase; import ubic.gemma.model.common.description.ExternalDatabases; @@ -177,7 +176,6 @@ public int updateGene2CsEntries() { @Transactional @Timed public int updateGene2CsEntries( boolean force ) { - Gene2CsStatus updatedStatus = null; try { String annotation; if ( ( annotation = needsToRefreshGene2Cs( force ) ) == null ) { @@ -190,20 +188,18 @@ public int updateGene2CsEntries( boolean force ) { annotation += "\n\n" + "Updated " + updated + " entries."; } TableMaintenanceUtilImpl.log.info( String.format( "Done regenerating the GENE2CS table; %d entries were updated.", updated ) ); - updatedStatus = this.writeUpdateStatus( annotation, null ); + Gene2CsStatus updatedStatus; + updatedStatus = createUpdateStatus( annotation, null ); + updateGene2csExternalDatabaseLastUpdated( updatedStatus ); + writeGene2CsUpdateStatusToDisk( updatedStatus ); + sendGene2CsUpdateStatusAdminEmail( updatedStatus ); return updated; } catch ( Exception e ) { - updatedStatus = this.writeUpdateStatus( "An error occurred while attempting to update the GENE2CS table.", e ); + Gene2CsStatus updatedStatus; + updatedStatus = createUpdateStatus( "An error occurred while attempting to update the GENE2CS table.", e ); + writeGene2CsUpdateStatusToDisk( updatedStatus ); + sendGene2CsUpdateStatusAdminEmail( updatedStatus ); throw e; - } finally { - if ( updatedStatus != null ) { - if ( sendEmail ) { - mailEngine.sendAdminMessage( "Gene2Cs update status.", "Gene2Cs updating was run.\n" + updatedStatus.getAnnotation() ); - } - if ( updatedStatus.getError() == null ) { - this.updateGene2csExternalDatabaseLastUpdated( updatedStatus ); - } - } } } @@ -396,33 +392,50 @@ private Gene2CsStatus getLastGene2CsUpdateStatus() { } } - /** - * @param annotation extra text that describes the status - */ - private Gene2CsStatus writeUpdateStatus( String annotation, @Nullable Exception e ) { + private Gene2CsStatus createUpdateStatus( String annotation, @Nullable Exception e ) { Gene2CsStatus status = new Gene2CsStatus(); Calendar c = Calendar.getInstance(); Date date = c.getTime(); status.setLastUpdate( date ); status.setError( e ); status.setAnnotation( annotation ); + return status; + } + + /** + * Update the last updated date of the GENE2CS {@link ExternalDatabase}. + */ + private void updateGene2csExternalDatabaseLastUpdated( Gene2CsStatus status ) { + ExternalDatabase ed = externalDatabaseService.findByNameWithAuditTrail( ExternalDatabases.GENE2CS ); + if ( ed == null ) { + log.error( String.format( "External database with name %s is missing, no audit event will be recorded.", ExternalDatabases.GENE2CS ) ); + return; + } + externalDatabaseService.updateReleaseLastUpdated( ed, status.getAnnotation(), status.getLastUpdate() ); + } + + /** + * Write a GENE2CS update status to disk. + */ + private void writeGene2CsUpdateStatusToDisk( Gene2CsStatus status ) { try { - FileUtils.forceMkdirParent( gene2CsInfoPath.toFile() ); + PathUtils.createParentDirectories( gene2CsInfoPath ); try ( ObjectOutputStream oos = new ObjectOutputStream( Files.newOutputStream( gene2CsInfoPath ) ) ) { oos.writeObject( status ); } } catch ( IOException e2 ) { - throw new RuntimeException( "Failed to update gene2cs update status.", e2 ); + log.error( "Failed to update gene2cs update status.", e2 ); + // not rethrowing, or else the update itself would be rolled back } - return status; } - private void updateGene2csExternalDatabaseLastUpdated( Gene2CsStatus status ) { - ExternalDatabase ed = externalDatabaseService.findByNameWithAuditTrail( ExternalDatabases.GENE2CS ); - if ( ed != null ) { - externalDatabaseService.updateReleaseLastUpdated( ed, status.getAnnotation(), status.getLastUpdate() ); - } else { - log.warn( String.format( "External database with name %s is missing, no audit event will be recorded.", ExternalDatabases.GENE2CS ) ); + /** + * Send an email to the admin with the status of the GENE2CS update. + */ + private void sendGene2CsUpdateStatusAdminEmail( Gene2CsStatus updatedStatus ) { + if ( !sendEmail ) { + return; } + mailEngine.sendAdminMessage( "Gene2Cs update status.", "Gene2Cs updating was run.\n" + updatedStatus.getAnnotation() ); } } diff --git a/gemma-core/src/test/java/ubic/gemma/persistence/service/maintenance/TableMaintenanceUtilTest.java b/gemma-core/src/test/java/ubic/gemma/persistence/service/maintenance/TableMaintenanceUtilTest.java index fae5ff6ead..5c9c3f635f 100644 --- a/gemma-core/src/test/java/ubic/gemma/persistence/service/maintenance/TableMaintenanceUtilTest.java +++ b/gemma-core/src/test/java/ubic/gemma/persistence/service/maintenance/TableMaintenanceUtilTest.java @@ -22,7 +22,6 @@ import ubic.gemma.persistence.service.common.auditAndSecurity.AuditEventService; import ubic.gemma.persistence.service.common.description.ExternalDatabaseService; -import java.io.File; import java.io.IOException; import java.io.ObjectOutputStream; import java.nio.file.Files; @@ -113,12 +112,12 @@ public void setUp() throws IOException { } @After - public void tearDown() { + public void tearDown() throws IOException { reset( externalDatabaseService, sessionFactory, session, query ); - File f = gene2csInfoPath.toFile(); - if ( f.exists() ) { - assertThat( f.delete() ).isTrue(); - assertThat( f.getParentFile().delete() ).isTrue(); + Path f = gene2csInfoPath; + if ( Files.exists( f ) ) { + Files.delete( f ); + Files.delete( f.getParent() ); } } @@ -139,8 +138,7 @@ public void test() { public void testUpdateWhenTableIsFresh() throws IOException { Gene2CsStatus status = new Gene2CsStatus(); status.setLastUpdate( new Date() ); // now! so nothing can be newer - File statusFile = gene2csInfoPath.toFile(); - try ( ObjectOutputStream out = new ObjectOutputStream( Files.newOutputStream( statusFile.toPath() ) ) ) { + try ( ObjectOutputStream out = new ObjectOutputStream( Files.newOutputStream( gene2csInfoPath ) ) ) { out.writeObject( status ); } tableMaintenanceUtil.updateGene2CsEntries(); From b8a6f07504030760978d7af8624122743b6f9718 Mon Sep 17 00:00:00 2001 From: Guillaume Poirier-Morency Date: Sat, 21 Jun 2025 12:22:48 -0700 Subject: [PATCH 005/129] Add a dependency between sessionFactory and dataSourceInitializer In tests, this ensures that the database is always initialized before any test that uses the SessionFactory runs. --- .../resources/ubic/gemma/applicationContext-dataSource.xml | 4 ++++ .../resources/ubic/gemma/applicationContext-hibernate.xml | 2 +- .../ubic/gemma/applicationContext-dataSourceInitializer.xml | 3 ++- 3 files changed, 7 insertions(+), 2 deletions(-) diff --git a/gemma-core/src/main/resources/ubic/gemma/applicationContext-dataSource.xml b/gemma-core/src/main/resources/ubic/gemma/applicationContext-dataSource.xml index 6e56d5dc61..fbd5b928d2 100644 --- a/gemma-core/src/main/resources/ubic/gemma/applicationContext-dataSource.xml +++ b/gemma-core/src/main/resources/ubic/gemma/applicationContext-dataSource.xml @@ -24,6 +24,10 @@ + + + + diff --git a/gemma-core/src/main/resources/ubic/gemma/applicationContext-hibernate.xml b/gemma-core/src/main/resources/ubic/gemma/applicationContext-hibernate.xml index 43065b1ef5..f9180867be 100644 --- a/gemma-core/src/main/resources/ubic/gemma/applicationContext-hibernate.xml +++ b/gemma-core/src/main/resources/ubic/gemma/applicationContext-hibernate.xml @@ -53,7 +53,7 @@ + depends-on="ehcache,dataSourceInitializer"> diff --git a/gemma-core/src/test/resources/ubic/gemma/applicationContext-dataSourceInitializer.xml b/gemma-core/src/test/resources/ubic/gemma/applicationContext-dataSourceInitializer.xml index dbe71af719..5cf1d447f5 100644 --- a/gemma-core/src/test/resources/ubic/gemma/applicationContext-dataSourceInitializer.xml +++ b/gemma-core/src/test/resources/ubic/gemma/applicationContext-dataSourceInitializer.xml @@ -17,7 +17,8 @@ - + From 1ee58da1b191f7ef89190d23aa40ab79ff76f155 Mon Sep 17 00:00:00 2001 From: Guillaume Poirier-Morency Date: Sat, 21 Jun 2025 12:11:18 -0700 Subject: [PATCH 006/129] Remove MailEngine.getAdminEmailAddress() --- .../main/java/ubic/gemma/core/util/MailEngine.java | 5 ----- .../java/ubic/gemma/core/util/MailEngineImpl.java | 5 ----- .../java/ubic/gemma/core/util/package-info.java | 7 +++++++ .../java/ubic/gemma/core/util/MailEngineTest.java | 1 - .../common/auditAndSecurity/SecurityController.java | 13 ++++++++----- 5 files changed, 15 insertions(+), 16 deletions(-) create mode 100644 gemma-core/src/main/java/ubic/gemma/core/util/package-info.java diff --git a/gemma-core/src/main/java/ubic/gemma/core/util/MailEngine.java b/gemma-core/src/main/java/ubic/gemma/core/util/MailEngine.java index 4a4815eee0..6074e8ac9b 100644 --- a/gemma-core/src/main/java/ubic/gemma/core/util/MailEngine.java +++ b/gemma-core/src/main/java/ubic/gemma/core/util/MailEngine.java @@ -21,11 +21,6 @@ */ public interface MailEngine { - /** - * Return the admin email address used for {@link #sendAdminMessage(String, String)} - */ - String getAdminEmailAddress(); - /** * Send an email message to the administrator. */ diff --git a/gemma-core/src/main/java/ubic/gemma/core/util/MailEngineImpl.java b/gemma-core/src/main/java/ubic/gemma/core/util/MailEngineImpl.java index b6ae06bf47..b2ef754945 100644 --- a/gemma-core/src/main/java/ubic/gemma/core/util/MailEngineImpl.java +++ b/gemma-core/src/main/java/ubic/gemma/core/util/MailEngineImpl.java @@ -58,11 +58,6 @@ public class MailEngineImpl implements MailEngine { @Value("${gemma.support.email}") private String supportEmailAddress; - @Override - public String getAdminEmailAddress() { - return adminEmailAddress; - } - /** * Sends a message to the gemma administrator as defined in the Gemma.properties file */ diff --git a/gemma-core/src/main/java/ubic/gemma/core/util/package-info.java b/gemma-core/src/main/java/ubic/gemma/core/util/package-info.java new file mode 100644 index 0000000000..de02ba9f61 --- /dev/null +++ b/gemma-core/src/main/java/ubic/gemma/core/util/package-info.java @@ -0,0 +1,7 @@ +/** + * @author poirigui + */ +@ParametersAreNonnullByDefault +package ubic.gemma.core.util; + +import javax.annotation.ParametersAreNonnullByDefault; \ No newline at end of file diff --git a/gemma-core/src/test/java/ubic/gemma/core/util/MailEngineTest.java b/gemma-core/src/test/java/ubic/gemma/core/util/MailEngineTest.java index 47a669553f..4e8bc628fa 100644 --- a/gemma-core/src/test/java/ubic/gemma/core/util/MailEngineTest.java +++ b/gemma-core/src/test/java/ubic/gemma/core/util/MailEngineTest.java @@ -12,7 +12,6 @@ import org.springframework.mail.MailSender; import org.springframework.mail.SimpleMailMessage; import org.springframework.test.context.ContextConfiguration; -import org.springframework.test.context.junit4.AbstractJUnit4SpringContextTests; import ubic.gemma.core.context.TestComponent; import ubic.gemma.core.util.test.BaseTest; import ubic.gemma.core.util.test.TestPropertyPlaceholderConfigurer; diff --git a/gemma-web/src/main/java/ubic/gemma/web/controller/common/auditAndSecurity/SecurityController.java b/gemma-web/src/main/java/ubic/gemma/web/controller/common/auditAndSecurity/SecurityController.java index a5508699c5..a24cccc1c1 100644 --- a/gemma-web/src/main/java/ubic/gemma/web/controller/common/auditAndSecurity/SecurityController.java +++ b/gemma-web/src/main/java/ubic/gemma/web/controller/common/auditAndSecurity/SecurityController.java @@ -32,8 +32,8 @@ import org.springframework.security.core.userdetails.UserDetails; import org.springframework.security.core.userdetails.UsernameNotFoundException; import org.springframework.stereotype.Controller; -import ubic.gemma.core.security.authentication.UserManager; import ubic.gemma.core.util.MailEngine; +import ubic.gemma.core.security.authentication.UserManager; import ubic.gemma.model.analysis.expression.ExpressionExperimentSet; import ubic.gemma.model.analysis.expression.diff.GeneDifferentialExpressionMetaAnalysis; import ubic.gemma.model.common.Describable; @@ -79,6 +79,8 @@ public class SecurityController { @Value("${gemma.hosturl}") private String hostUrl; + @Value("${gemma.admin.email}") + private String adminEmailAddress; public boolean addUserToGroup( String userName, String groupName ) { @@ -115,10 +117,11 @@ public boolean addUserToGroup( String userName, String groupName ) { if ( StringUtils.isNotBlank( emailAddress ) ) { SecurityController.log.debug( "Sending email notification to " + emailAddress ); String manageGroupsUrl = hostUrl + servletContext.getContextPath() + "/manageGroups.html"; - String body = userTakingAction.getUserName() + " has added you to the group '" + groupName - + "'.\nTo view groups you belong to, visit " + manageGroupsUrl - + "\n\nIf you believe you received this email in error, contact " + mailEngine.getAdminEmailAddress() - + "."; + String body = String.format( "%s has added you to the group '%s'.\n" + + "To view groups you belong to, visit %s\n" + + "\n" + + "If you believe you received this email in error, contact %s.", + userTakingAction.getUserName(), groupName, manageGroupsUrl, adminEmailAddress ); mailEngine.sendMessage( emailAddress, "You have been added to a group on Gemma", body ); } From 4bccec8cba7c881c15764368fccd2e80e56fd9fd Mon Sep 17 00:00:00 2001 From: Guillaume Poirier-Morency Date: Mon, 23 Jun 2025 12:31:37 -0700 Subject: [PATCH 007/129] Fix missing class in dummy dataSourceInitializer --- .../resources/ubic/gemma/applicationContext-dataSource.xml | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/gemma-core/src/main/resources/ubic/gemma/applicationContext-dataSource.xml b/gemma-core/src/main/resources/ubic/gemma/applicationContext-dataSource.xml index fbd5b928d2..98f3030e3e 100644 --- a/gemma-core/src/main/resources/ubic/gemma/applicationContext-dataSource.xml +++ b/gemma-core/src/main/resources/ubic/gemma/applicationContext-dataSource.xml @@ -26,9 +26,7 @@ - - - + Date: Sun, 22 Jun 2025 19:28:51 -0700 Subject: [PATCH 008/129] Improvements for Fastacmd, RepeatScan and ShellDelegatingBlat and AffyPowerToolsProbesetSummarize Improve usages of the Process API: - use waitFor() with a timeout instead of Thread.sleep() - replace GenericStreamConsumer and GenericParsingConsumer with ProcessBuider and appropriate stream redirections - always specify arguments as an array of strings. - always pipe the standard error and include it in the exception message Replace File with Path. Eliminate native BLAT. It is unused and if we ever want to add it back, it will have to be updated anyway. --- .../gemma/apps/ArrayDesignRepeatScanCli.java | 9 +- .../ArrayDesignSequenceAssociationCli.java | 6 +- .../gemma/core/analysis/sequence/Blat.java | 78 +-- .../core/analysis/sequence/RepeatScan.java | 160 +++-- .../sequence/ShellDelegatingBlat.java | 586 ++++++++---------- .../java/ubic/gemma/core/config/Settings.java | 4 + .../AffyPowerToolsProbesetSummarize.java | 85 ++- .../ArrayDesignSequenceProcessingService.java | 52 +- ...ayDesignSequenceProcessingServiceImpl.java | 48 +- .../gemma/core/loader/genome/FastaCmd.java | 9 - .../core/loader/genome/SimpleFastaCmd.java | 249 ++++---- .../concurrent/GenericStreamConsumer.java | 65 -- .../concurrent/ParsingStreamConsumer.java | 48 -- gemma-core/src/main/native/.cvsignore | 0 gemma-core/src/main/native/Blat.c | 26 - gemma-core/src/main/native/README | 33 - gemma-core/src/main/native/include/Blat.h | 22 - gemma-core/src/main/native/include/gfClient.h | 9 - gemma-core/src/main/native/makefile | 9 - gemma-core/src/main/native/test/README | 2 - gemma-core/src/main/native/test/makefile | 15 - gemma-core/src/main/native/test/test.fa | 11 - gemma-core/src/main/native/test/testBlat.c | 12 - .../analysis/sequence/RepeatScanTest.java | 33 +- .../sequence/ShellDelegatingBlatTest.java | 54 ++ ...ompositeSequenceGeneMapperServiceTest.java | 2 +- ...DesignSequenceAlignmentandMappingTest.java | 3 +- ...ayDesignSequenceProcessorFastacmdTest.java | 5 +- .../ArrayDesignSequenceProcessorTest.java | 9 +- .../expression/arrayDesign/MockBlat.java | 63 -- .../expression/arrayDesign/MockFastaCmd.java | 34 +- .../loader/genome/SimpleFastaCmdTest.java | 68 +- .../gemma/core/util/test/Assumptions.java | 18 + 33 files changed, 675 insertions(+), 1152 deletions(-) delete mode 100644 gemma-core/src/main/java/ubic/gemma/core/util/concurrent/GenericStreamConsumer.java delete mode 100644 gemma-core/src/main/java/ubic/gemma/core/util/concurrent/ParsingStreamConsumer.java delete mode 100644 gemma-core/src/main/native/.cvsignore delete mode 100644 gemma-core/src/main/native/Blat.c delete mode 100644 gemma-core/src/main/native/README delete mode 100644 gemma-core/src/main/native/include/Blat.h delete mode 100644 gemma-core/src/main/native/include/gfClient.h delete mode 100644 gemma-core/src/main/native/makefile delete mode 100644 gemma-core/src/main/native/test/README delete mode 100644 gemma-core/src/main/native/test/makefile delete mode 100644 gemma-core/src/main/native/test/test.fa delete mode 100644 gemma-core/src/main/native/test/testBlat.c create mode 100644 gemma-core/src/test/java/ubic/gemma/core/analysis/sequence/ShellDelegatingBlatTest.java diff --git a/gemma-cli/src/main/java/ubic/gemma/apps/ArrayDesignRepeatScanCli.java b/gemma-cli/src/main/java/ubic/gemma/apps/ArrayDesignRepeatScanCli.java index bba1009d00..d430ef4b88 100644 --- a/gemma-cli/src/main/java/ubic/gemma/apps/ArrayDesignRepeatScanCli.java +++ b/gemma-cli/src/main/java/ubic/gemma/apps/ArrayDesignRepeatScanCli.java @@ -30,6 +30,8 @@ import ubic.gemma.model.genome.biosequence.BioSequence; import ubic.gemma.persistence.service.genome.biosequence.BioSequenceService; +import javax.annotation.Nullable; +import java.nio.file.Path; import java.util.Collection; import java.util.Date; @@ -43,13 +45,14 @@ public class ArrayDesignRepeatScanCli extends ArrayDesignSequenceManipulatingCli @Autowired private BioSequenceService bsService; - private String inputFileName; + @Nullable + private Path inputFileName; @Override protected void buildOptions( Options options ) { super.buildOptions( options ); Option fileOption = Option.builder( "f" ).hasArg().argName( ".out file" ) - .desc( "RepeatScan file to use as input" ).longOpt( "file" ).build(); + .desc( "RepeatScan file to use as input" ).longOpt( "file" ).type( Path.class ).build(); options.addOption( fileOption ); } @@ -57,7 +60,7 @@ protected void buildOptions( Options options ) { protected void processOptions( CommandLine commandLine ) throws ParseException { super.processOptions( commandLine ); if ( commandLine.hasOption( 'f' ) ) { - this.inputFileName = commandLine.getOptionValue( 'f' ); + this.inputFileName = commandLine.getParsedOptionValue( 'f' ); } } diff --git a/gemma-cli/src/main/java/ubic/gemma/apps/ArrayDesignSequenceAssociationCli.java b/gemma-cli/src/main/java/ubic/gemma/apps/ArrayDesignSequenceAssociationCli.java index 4c7a6bc65a..59fd0dd22a 100644 --- a/gemma-cli/src/main/java/ubic/gemma/apps/ArrayDesignSequenceAssociationCli.java +++ b/gemma-cli/src/main/java/ubic/gemma/apps/ArrayDesignSequenceAssociationCli.java @@ -142,7 +142,7 @@ protected void doAuthenticatedWork() throws Exception { // this is kind of an oddball function of this tool. if ( this.sequenceId != null ) { BioSequence updated = arrayDesignSequenceProcessingService.processSingleAccession( this.sequenceId, - new String[] { "nt", "est_others", "est_human", "est_mouse" }, null, force ); + new String[] { "nt", "est_others", "est_human", "est_mouse" }, force ); if ( updated != null ) { log.info( "Updated or created " + updated ); } @@ -206,7 +206,7 @@ protected void doAuthenticatedWork() throws Exception { String[] databases = chooseBLASTdbs( taxon ); arrayDesignSequenceProcessingService.processArrayDesign( arrayDesign, idFileIs, - databases, null, taxon, force ); + databases, taxon, force ); this.audit( arrayDesign, "Sequences identifiers from file: " + idFile ); } @@ -216,7 +216,7 @@ protected void doAuthenticatedWork() throws Exception { String[] databases = chooseBLASTdbs( taxon ); arrayDesignSequenceProcessingService.processArrayDesign( arrayDesign, - databases, null, force ); + databases, force ); this.audit( arrayDesign, "Sequence looked up from BLAST databases" ); } } diff --git a/gemma-core/src/main/java/ubic/gemma/core/analysis/sequence/Blat.java b/gemma-core/src/main/java/ubic/gemma/core/analysis/sequence/Blat.java index e53ad6b611..ca3abd7d2e 100644 --- a/gemma-core/src/main/java/ubic/gemma/core/analysis/sequence/Blat.java +++ b/gemma-core/src/main/java/ubic/gemma/core/analysis/sequence/Blat.java @@ -1,6 +1,5 @@ package ubic.gemma.core.analysis.sequence; -import ubic.gemma.core.analysis.sequence.ShellDelegatingBlat.BlattableGenome; import ubic.gemma.model.genome.Taxon; import ubic.gemma.model.genome.biosequence.BioSequence; import ubic.gemma.model.genome.sequenceAnalysis.BlatResult; @@ -11,7 +10,6 @@ import java.util.List; import java.util.Map; -@SuppressWarnings("unused") // Possible external use public interface Blat { /** @@ -21,7 +19,6 @@ public interface Blat { * @see BlatResult */ double DEFAULT_BLAT_SCORE_THRESHOLD = 0.7; - double STEPSIZE = 7; /** * Run a BLAT search using the gfClient. @@ -57,78 +54,9 @@ Map> blatQuery( Collection sequences, throws IOException; /** - * @return the blatScoreThreshold - */ - double getBlatScoreThreshold(); - - /** - * @param blatScoreThreshold the blatScoreThreshold to set + * Set the blat score threshold to use. + *

+ * Defaults to {@link #DEFAULT_BLAT_SCORE_THRESHOLD}. */ void setBlatScoreThreshold( double blatScoreThreshold ); - - /** - * @return Returns the gfClientExe. - */ - String getGfClientExe(); - - /** - * @return Returns the gfServerExe. - */ - String getGfServerExe(); - - /** - * @return Returns the host. - */ - String getHost(); - - /** - * @return Returns the humanServerPort. - */ - int getHumanServerPort(); - - /** - * @return Returns the mouseServerPort. - */ - int getMouseServerPort(); - - /** - * @return Returns the ratServerPort. - */ - int getRatServerPort(); - - /** - * @return Returns the seqDir. - */ - String getSeqDir(); - - /** - * @param genome genome - * @return Returns the seqFiles. - */ - String getSeqFiles( BlattableGenome genome ); - - /** - * @param inputStream to the Blat output file in psl format - * @param taxon taxon - * @return processed results. - * @throws IOException when there are IO problems. - */ - List processPsl( InputStream inputStream, Taxon taxon ) throws IOException; - - /** - * Start the server, if the port isn't already being used. If the port is in use, we assume it is a gfServer. - * - * @param genome genome - * @param port port - * @throws IOException when there are IO problems. - */ - void startServer( BlattableGenome genome, int port ) throws IOException; - - /** - * Stop the gfServer, if it was started by this. - * - * @param port port - */ - void stopServer( int port ); - } \ No newline at end of file diff --git a/gemma-core/src/main/java/ubic/gemma/core/analysis/sequence/RepeatScan.java b/gemma-core/src/main/java/ubic/gemma/core/analysis/sequence/RepeatScan.java index 0a70f35733..ca63c82697 100644 --- a/gemma-core/src/main/java/ubic/gemma/core/analysis/sequence/RepeatScan.java +++ b/gemma-core/src/main/java/ubic/gemma/core/analysis/sequence/RepeatScan.java @@ -18,44 +18,48 @@ */ package ubic.gemma.core.analysis.sequence; +import lombok.extern.apachecommons.CommonsLog; +import org.apache.commons.io.IOUtils; +import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.time.StopWatch; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import ubic.gemma.core.config.Settings; import ubic.gemma.core.loader.genome.FastaParser; import ubic.gemma.core.profiling.StopWatchUtils; -import ubic.gemma.core.util.concurrent.GenericStreamConsumer; import ubic.gemma.model.genome.Taxon; import ubic.gemma.model.genome.biosequence.BioSequence; -import ubic.gemma.core.config.Settings; -import java.io.*; -import java.util.Collection; -import java.util.HashMap; -import java.util.HashSet; -import java.util.Map; +import java.io.BufferedReader; +import java.io.File; +import java.io.IOException; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.*; +import java.util.concurrent.TimeUnit; /** - * Scan sequences for repeats + * Scan sequences for repeats using RepeatMasker. * * @author pavlidis */ +@CommonsLog public class RepeatScan { private static final String REPEAT_MASKER_CONFIG_PARAM = "repeatMasker.exe"; - private static final int UPDATE_INTERVAL_MS = 1000 * 60 * 2; - private static final Log log = LogFactory.getLog( RepeatScan.class.getName() ); private static final String REPEAT_MASKER = Settings.getString( RepeatScan.REPEAT_MASKER_CONFIG_PARAM ); + private static final int UPDATE_INTERVAL_MS = 1000 * 60 * 2; + /** * @param sequences sequences * @param outputSequencePath in FASTA format * @return Sequences which were updated. */ public Collection processRepeatMaskerOutput( Collection sequences, - String outputSequencePath ) { + Path outputSequencePath ) { FastaParser parser = new FastaParser(); try { - parser.parse( outputSequencePath ); + parser.parse( outputSequencePath.toFile() ); } catch ( IOException e ) { throw new RuntimeException( e ); } @@ -101,7 +105,7 @@ public Collection processRepeatMaskerOutput( Collection processRepeatMaskerOutput( Collection repeatScan( Collection sequences ) { try { - if ( sequences.size() == 0 ) { + if ( sequences.isEmpty() ) { RepeatScan.log.warn( "No sequences to test" ); return sequences; } - File querySequenceFile = File.createTempFile( "repmask", ".fa" ); - SequenceWriter.writeSequencesToFile( sequences, querySequenceFile ); + Path querySequenceFile = Files.createTempFile( "repmask", ".fa" ); + SequenceWriter.writeSequencesToFile( sequences, querySequenceFile.toFile() ); Taxon taxon = sequences.iterator().next().getTaxon(); - this.execRepeatMasker( querySequenceFile, taxon ); - - final String outputSequencePath = querySequenceFile.getParent() + File.separatorChar + querySequenceFile.getName() + ".masked"; + Path outputSequencePath = this.execRepeatMasker( querySequenceFile, taxon ); // final String outputScorePath = querySequenceFile.getParent() + File.separatorChar // + querySequenceFile.getName() + ".masked"; - File output = new File( outputSequencePath ); - if ( !output.exists() ) { + if ( !Files.exists( outputSequencePath ) ) { this.handleNoOutputCondition( querySequenceFile, outputSequencePath ); return new HashSet<>(); } @@ -141,94 +142,81 @@ public Collection repeatScan( Collection sequences ) { double computeFractionMasked( BioSequence maskedSeq ) { // count fraction of masked bases. int origLength = maskedSeq.getSequence().length(); - int unmaskedBases = maskedSeq.getSequence().replaceAll( "[a-z]", "" ).length(); - - return ( origLength - unmaskedBases ) / ( double ) origLength; - } - - private void checkForExe() { - if ( RepeatScan.REPEAT_MASKER == null ) { - throw new IllegalStateException( "Repeatmasker executable could not be found. Make sure you correctly set " - + RepeatScan.REPEAT_MASKER_CONFIG_PARAM ); + int masked = 0; + for ( char c : maskedSeq.getSequence().toCharArray() ) { + if ( Character.isLowerCase( c ) ) { + masked++; + } } + return ( double ) masked / ( double ) origLength; } /** - * Run repeatmasker using a call to exec(). + * Run RepeatMasker using a call to exec(). * * @param querySequenceFile file - * @param taxon taxon + * @param taxon taxon + * @return */ - private void execRepeatMasker( File querySequenceFile, Taxon taxon ) throws IOException { - - this.checkForExe(); - - final String cmd = RepeatScan.REPEAT_MASKER + " -parallel 8 -xsmall -species " + taxon.getCommonName() + " " - + querySequenceFile.getAbsolutePath();// FIXME use -dir option to put output where we want; see https://github.com/PavlidisLab/Gemma/issues/53; - RepeatScan.log.info( "Running repeatmasker like this: " + cmd ); - - final Process run = Runtime.getRuntime().exec( cmd ); - - // to ensure that we aren't left waiting for these streams - GenericStreamConsumer gscErr = new GenericStreamConsumer( run.getErrorStream() ); - GenericStreamConsumer gscIn = new GenericStreamConsumer( run.getInputStream() ); - gscErr.start(); - gscIn.start(); - + private Path execRepeatMasker( Path querySequenceFile, Taxon taxon ) throws IOException { + if ( RepeatScan.REPEAT_MASKER == null ) { + throw new IllegalStateException( "RepeatMasker executable could not be found. Make sure you correctly set " + + RepeatScan.REPEAT_MASKER_CONFIG_PARAM ); + } + String[] cmd = new String[] { RepeatScan.REPEAT_MASKER, "-parallel", "8", "-xsmall", + "-species", taxon.getCommonName(), + // FIXME use -dir option to put output where we want; see https://github.com/PavlidisLab/Gemma/issues/53; + querySequenceFile.toString() }; + RepeatScan.log.info( "Running RepeatMasker like this: " + Arrays.toString( cmd ) ); + + final Process run = new ProcessBuilder( cmd ) + // to ensure that we aren't left waiting for these streams + // TODO: switch to Redirect.DISCARD for Java 9+ + .redirectOutput( ProcessBuilder.Redirect.appendTo( new File( "/dev/null" ) ) ) + .redirectError( ProcessBuilder.Redirect.PIPE ) + .start(); + + // wait... + StopWatch overallWatch = StopWatch.createStarted(); try { - - int exitVal = Integer.MIN_VALUE; - - // wait... - StopWatch overallWatch = new StopWatch(); - overallWatch.start(); - - while ( exitVal == Integer.MIN_VALUE ) { - try { - exitVal = run.exitValue(); - } catch ( IllegalThreadStateException e ) { - // okay, still waiting. - } - Thread.sleep( RepeatScan.UPDATE_INTERVAL_MS ); + while ( !run.waitFor( RepeatScan.UPDATE_INTERVAL_MS, TimeUnit.MILLISECONDS ) ) { String minutes = StopWatchUtils.getMinutesElapsed( overallWatch ); - RepeatScan.log.info( "Repeatmasker: " + minutes + " minutes elapsed" ); + RepeatScan.log.info( "RepeatMasker: " + minutes + " minutes elapsed" ); } - - overallWatch.stop(); - String minutes = StopWatchUtils.getMinutesElapsed( overallWatch ); - RepeatScan.log.info( "Repeatmasker took a total of " + minutes + " minutes" ); - - // int exitVal = run.waitFor(); - - RepeatScan.log.debug( "Repeatmasker exit value=" + exitVal ); } catch ( InterruptedException e ) { + Thread.currentThread().interrupt(); throw new RuntimeException( e ); } - RepeatScan.log.debug( "Repeatmasker Success" ); + int exitVal = run.exitValue(); + if ( exitVal != 0 ) { + String errorMessage = StringUtils.strip( IOUtils.toString( run.getErrorStream(), StandardCharsets.UTF_8 ) ); + throw new RuntimeException( "RepeatMasker failed with exit value " + exitVal + ":\n" + errorMessage ); + } + + overallWatch.stop(); + String minutes = StopWatchUtils.getMinutesElapsed( overallWatch ); + RepeatScan.log.info( "RepeatMasker took a total of " + minutes + " minutes" ); + RepeatScan.log.debug( "RepeatMasker Success" ); + return querySequenceFile.resolveSibling( querySequenceFile.getFileName().toString() + ".masked" ); } - private void handleNoOutputCondition( File querySequenceFile, final String outputSequencePath ) throws IOException { + private void handleNoOutputCondition( Path querySequenceFile, Path outputSequencePath ) throws IOException { // this happens if there were no repeats to mask. Check to make sure. - final String outputSummary = querySequenceFile.getParent() + File.separatorChar + querySequenceFile.getName() + ".out"; - if ( !( new File( outputSummary ) ).exists() ) { + final Path outputSummary = querySequenceFile.resolveSibling( querySequenceFile.getFileName() + ".out" ); + if ( !Files.exists( outputSummary ) ) { // okay, something is wrong for sure. - throw new RuntimeException( - "Repeatmasker seems to have failed, it left no useful output (looking for " + outputSequencePath - + " or " + outputSummary ); + throw new RuntimeException( String.format( "RepeatMasker seems to have failed, it left no useful output (looking for %s or %s)", + outputSequencePath, outputSummary ) ); } - InputStream is = new FileInputStream( outputSummary ); - try (BufferedReader br = new BufferedReader( new InputStreamReader( is ) )) { + try ( BufferedReader br = Files.newBufferedReader( outputSummary ) ) { String nothingFound = "There were no repetitive sequences detected"; String line = br.readLine(); if ( line == null || line.startsWith( nothingFound ) ) { RepeatScan.log.info( "There were no repeats found" ); } else { - RepeatScan.log - .warn( "Something might have gone wrong with repeatmasking. The output file reads: " + line ); + RepeatScan.log.warn( "Something might have gone wrong with RepeatMasker. The output file reads: " + line ); } } - } - } diff --git a/gemma-core/src/main/java/ubic/gemma/core/analysis/sequence/ShellDelegatingBlat.java b/gemma-core/src/main/java/ubic/gemma/core/analysis/sequence/ShellDelegatingBlat.java index 51cf727ff8..428c1a6ad2 100644 --- a/gemma-core/src/main/java/ubic/gemma/core/analysis/sequence/ShellDelegatingBlat.java +++ b/gemma-core/src/main/java/ubic/gemma/core/analysis/sequence/ShellDelegatingBlat.java @@ -18,40 +18,51 @@ */ package ubic.gemma.core.analysis.sequence; -import org.apache.commons.configuration2.ex.ConfigurationException; +import lombok.Getter; +import lombok.Setter; +import lombok.extern.apachecommons.CommonsLog; +import org.apache.commons.io.IOUtils; +import org.apache.commons.lang3.ArrayUtils; import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.time.StopWatch; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.springframework.util.Assert; import ubic.gemma.core.config.Settings; import ubic.gemma.core.loader.genome.BlatResultParser; import ubic.gemma.core.profiling.StopWatchUtils; -import ubic.gemma.core.util.concurrent.Executors; -import ubic.gemma.core.util.concurrent.GenericStreamConsumer; import ubic.gemma.model.common.description.DatabaseType; import ubic.gemma.model.common.description.ExternalDatabase; import ubic.gemma.model.genome.Taxon; import ubic.gemma.model.genome.biosequence.BioSequence; import ubic.gemma.model.genome.sequenceAnalysis.BlatResult; -import java.io.*; +import javax.annotation.Nullable; +import java.io.BufferedWriter; +import java.io.File; +import java.io.IOException; +import java.io.InputStream; import java.net.Socket; -import java.net.UnknownHostException; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; import java.text.DecimalFormat; import java.text.NumberFormat; import java.util.*; -import java.util.concurrent.*; +import java.util.concurrent.TimeUnit; /** * Class to manage the gfServer and run BLAT searches. Delegates to the command-line shell to run blat. * * @author pavlidis */ -@SuppressWarnings("unused") // Possible external use +@Getter +@CommonsLog public class ShellDelegatingBlat implements Blat { + /** + * Interval in milliseconds to report on BLAT progress by peeking at its output file. + */ private static final int BLAT_UPDATE_INTERVAL_MS = 1000 * 30; - private static final Log log = LogFactory.getLog( ShellDelegatingBlat.class ); /** * Minimum alignment length for retention. */ @@ -60,45 +71,61 @@ public class ShellDelegatingBlat implements Blat { * Strings of As or Ts at the start or end of a sequence longer than this will be stripped off prior to analysis. */ private static final int POLY_AT_THRESHOLD = 5; - private static final String os = System.getProperty( "os.name" ).toLowerCase(); - private double blatScoreThreshold = Blat.DEFAULT_BLAT_SCORE_THRESHOLD; - private boolean doShutdown = true; + + private static final int STEPSIZE = 7; + // typical values. - private String gfClientExe = "/cygdrive/c/cygwin/usr/local/bin/gfClient.exe"; - private String gfServerExe = "/cygdrive/c/cygwin/usr/local/bin/gfServer.exe"; - private String host = "localhost"; - private int humanSensitiveServerPort; - private String humanSeqFiles; - private int humanServerPort; - private int mouseSensitiveServerPort; - private String mouseSeqFiles; - private int mouseServerPort; - private int ratSensitiveServerPort; - private String ratSeqFiles; - private int ratServerPort; - private String seqDir = "/"; + private final String gfClientExe; + private final String gfServerExe; + private final String host; + private final int humanSensitiveServerPort; + private final String[] humanSeqFiles; + private final int humanServerPort; + private final int mouseSensitiveServerPort; + private final String[] mouseSeqFiles; + private final int mouseServerPort; + private final int ratSensitiveServerPort; + private final String[] ratSeqFiles; + private final int ratServerPort; + private final Path seqDir; + private final Path tmpDir; + + @Setter + private double blatScoreThreshold = Blat.DEFAULT_BLAT_SCORE_THRESHOLD; + + @Nullable private Process serverProcess; + private String serverHost; + private int serverPort; /** * Create a blat object with settings read from the config file. */ public ShellDelegatingBlat() { - try { - this.init(); - } catch ( ConfigurationException e ) { - throw new RuntimeException( "Could not load configuration", e ); + ShellDelegatingBlat.log.debug( "Reading global config" ); + this.humanServerPort = Settings.getInt( "gfClient.humanServerPort" ); + this.mouseServerPort = Settings.getInt( "gfClient.mouseServerPort" ); + this.ratServerPort = Settings.getInt( "gfClient.ratServerPort" ); + this.humanSensitiveServerPort = Settings.getInt( "gfClient.sensitive.humanServerPort" ); + this.mouseSensitiveServerPort = Settings.getInt( "gfClient.sensitive.mouseServerPort" ); + this.ratSensitiveServerPort = Settings.getInt( "gfClient.sensitive.ratServerPort" ); + this.host = Settings.getString( "gfClient.host" ); + this.seqDir = Paths.get( Settings.getString( "gfClient.seqDir" ) ); + this.tmpDir = Paths.get( Settings.getDownloadPath() ); + this.mouseSeqFiles = Settings.getStringArray( "gfClient.mouse.seqFiles" ); + this.ratSeqFiles = Settings.getStringArray( "gfClient.rat.seqFiles" ); + this.humanSeqFiles = Settings.getStringArray( "gfClient.human.seqFiles" ); + this.gfClientExe = Settings.getString( "gfClient.exe" ); + this.gfServerExe = Settings.getString( "gfServer.exe" ); + if ( gfServerExe == null ) { + /* + * This won't ever really work -- it's left over from earlier iterations. + */ + ShellDelegatingBlat.log + .warn( "You will not be able to start the server: gfServer.exe is not set in config" ); } } - public ShellDelegatingBlat( String host, int humanServerPort, String seqDir ) { - - if ( host == null || humanServerPort <= 0 || seqDir == null ) - throw new IllegalArgumentException( "All values must be non-null" ); - this.host = host; - this.humanServerPort = humanServerPort; - this.seqDir = seqDir; - } - public static ExternalDatabase getSearchedGenome( Taxon taxon ) { BlattableGenome genome = ShellDelegatingBlat.inferBlatDatabase( taxon ); ExternalDatabase searchedDatabase = ExternalDatabase.Factory.newInstance(); @@ -109,14 +136,12 @@ public static ExternalDatabase getSearchedGenome( Taxon taxon ) { private static BlattableGenome inferBlatDatabase( Taxon taxon ) { assert taxon != null; - BlattableGenome bg; - - if ( taxon.getNcbiId() == 10090 || taxon.getCommonName().equals( "mouse" ) ) { + if ( Objects.equals( taxon.getNcbiId(), 10090 ) || Objects.equals( taxon.getCommonName(), "mouse" ) ) { bg = BlattableGenome.MOUSE; - } else if ( taxon.getNcbiId() == 10116 || taxon.getCommonName().equals( "rat" ) ) { + } else if ( Objects.equals( taxon.getNcbiId(), 10116 ) || Objects.equals( taxon.getCommonName(), "rat" ) ) { bg = BlattableGenome.RAT; - } else if ( taxon.getNcbiId() == 9606 || taxon.getCommonName().equals( "human" ) ) { + } else if ( Objects.equals( taxon.getNcbiId(), 9606 ) || Objects.equals( taxon.getCommonName(), "human" ) ) { bg = BlattableGenome.HUMAN; } else { throw new UnsupportedOperationException( "Cannot determine which database to search for " + taxon ); @@ -139,18 +164,18 @@ public List blatQuery( BioSequence b, Taxon taxon, boolean sensitive assert seqDir != null; // write the sequence to a temporary file. String seqName = b.getName().replaceAll( " ", "_" ); - File querySequenceFile = File.createTempFile( seqName, ".fa" ); + Path querySequenceFile = Files.createTempFile( seqName, ".fa" ); - try ( BufferedWriter out = new BufferedWriter( new FileWriter( querySequenceFile ) ) ) { + try ( BufferedWriter out = Files.newBufferedWriter( querySequenceFile ) ) { String trimmed = SequenceManipulation .stripPolyAorT( b.getSequence(), ShellDelegatingBlat.POLY_AT_THRESHOLD ); out.write( ">" + seqName + "\n" + trimmed ); - ShellDelegatingBlat.log.info( "Wrote sequence to " + querySequenceFile.getPath() ); + ShellDelegatingBlat.log.info( "Wrote sequence to " + querySequenceFile ); } - String outputPath = this.getTmpPslFilePath( seqName ); + Path outputPath = this.getTmpPslFilePath( seqName ); - List results = this - .gfClient( querySequenceFile, outputPath, this.choosePortForQuery( taxon, sensitive ) ); + int portToUse = this.choosePortForQuery( taxon, sensitive ); + List results = execGfClient( querySequenceFile, outputPath, portToUse, taxon ); ExternalDatabase searchedDatabase = ShellDelegatingBlat.getSearchedGenome( taxon ); for ( BlatResult result : results ) { @@ -167,26 +192,18 @@ public Map> blatQuery( Collection seq Taxon taxon ) throws IOException { Map> results = new HashMap<>(); - File querySequenceFile = File.createTempFile( "sequences-for-blat", ".fa" ); - int count = SequenceWriter.writeSequencesToFile( sequences, querySequenceFile ); + Path querySequenceFile = Files.createTempFile( "sequences-for-blat", ".fa" ); + int count = SequenceWriter.writeSequencesToFile( sequences, querySequenceFile.toFile() ); if ( count == 0 ) { - if ( !querySequenceFile.delete() ) { - throw new IOException( "Could not delete file " + querySequenceFile.getPath() ); - } + Files.delete( querySequenceFile ); throw new IllegalArgumentException( "No sequences!" ); } - String outputPath = this.getTmpPslFilePath( "blat-output" ); + Path outputPath = this.getTmpPslFilePath( "blat-output" ); - Integer port = this.choosePortForQuery( taxon, sensitive ); + int port = this.choosePortForQuery( taxon, sensitive ); - if ( port == null ) { - throw new IllegalStateException( - "Could not locate port for BLAT with settings taxon=" + taxon + ", sensitive=" + sensitive - + ", check your configuration." ); - } - - Collection rawResults = this.gfClient( querySequenceFile, outputPath, port ); + Collection rawResults = execGfClient( querySequenceFile, outputPath, port, taxon ); ShellDelegatingBlat.log.info( "Got " + rawResults.size() + " raw blat results" ); @@ -203,9 +220,7 @@ public Map> blatQuery( Collection seq results.get( query ).add( blatResult ); } - if ( !querySequenceFile.delete() ) { - throw new IOException( "Could not delete file " + querySequenceFile.getPath() ); - } + Files.delete( querySequenceFile ); return results; } @@ -215,67 +230,13 @@ public Map> blatQuery( Collection seq return this.blatQuery( sequences, false, taxon ); } - @Override - public double getBlatScoreThreshold() { - return this.blatScoreThreshold; - } - - @Override - public void setBlatScoreThreshold( double blatScoreThreshold ) { - this.blatScoreThreshold = blatScoreThreshold; - } - - @Override - public String getGfClientExe() { - return this.gfClientExe; - } - - @Override - public String getGfServerExe() { - return this.gfServerExe; - } - - @Override - public String getHost() { - return this.host; - } - - @Override - public int getHumanServerPort() { - return this.humanServerPort; - } - - @Override - public int getMouseServerPort() { - return this.mouseServerPort; - } - - @Override - public int getRatServerPort() { - return this.ratServerPort; - } - - @Override - public String getSeqDir() { - return this.seqDir; - } - - @Override - public String getSeqFiles( BlattableGenome genome ) { - switch ( genome ) { - case HUMAN: - return this.humanSeqFiles; - case MOUSE: - return this.mouseSeqFiles; - case RAT: - return this.ratSeqFiles; - default: - return this.humanSeqFiles; - - } - } - - @Override + /** + * Process the output of a BLAT search in psl format. + * @param inputStream to the Blat output file in psl format + * @param taxon taxon + * @return processed results. + * @throws IOException when there are IO problems. + */ public List processPsl( InputStream inputStream, Taxon taxon ) throws IOException { if ( inputStream.available() == 0 ) { @@ -293,61 +254,135 @@ public List processPsl( InputStream inputStream, Taxon taxon ) throw return brp.getResults(); } - @Override - public void startServer( BlattableGenome genome, int port ) throws IOException { - try ( Socket socket = new Socket( host, port ) ) { - ShellDelegatingBlat.log.info( "There is already a server on port " + port ); - this.doShutdown = false; - } catch ( UnknownHostException e ) { - throw new RuntimeException( "Unknown host " + host, e ); + /** + * Start the server, if the port isn't already being used. If the port is in use, we assume it is a gfServer. + * + * @param genome genome + * @param waitForFullInitialization if true, wait for the server to be fully initialized before returning, otherwise + * return immediately after starting the server. + * @throws IOException when there are IO problems. + */ + public synchronized void startServer( BlattableGenome genome, boolean sensitive, boolean waitForFullInitialization ) throws IOException { + Assert.state( serverProcess == null || !serverProcess.isAlive() ); + if ( sensitive ) { + // TODO: implement sensitive searches + throw new UnsupportedOperationException( "Sensitive BLAT searches are not supported by this implementation." ); + } + int port = getPort( genome, sensitive ); + // check if a server is already running + try ( Socket ignored = new Socket( host, port ) ) { + throw new RuntimeException( "There is already a gfServer listening on " + host + ":" + port + "." ); } catch ( IOException e ) { - String cmd = - this.getGfServerExe() + " -canStop -stepSize=" + Blat.STEPSIZE + " start " + this.getHost() + " " - + port + " " + this.getSeqFiles( genome ); - ShellDelegatingBlat.log.info( "Starting gfServer with command " + cmd ); - this.serverProcess = Runtime.getRuntime().exec( cmd, null, new File( this.getSeqDir() ) ); - - try { - Thread.sleep( 100 ); - int exit = serverProcess.exitValue(); - if ( exit != 0 ) { - throw new IOException( "Could not start server" ); + // ignore all other errors, the blat server is probably not running + } + String[] cmd = ArrayUtils.addAll( new String[] { + gfServerExe, "-stepSize=" + STEPSIZE, "start", this.host, String.valueOf( port ) }, this.getSeqFiles( genome ) ); + ShellDelegatingBlat.log.info( "Starting gfServer with command " + String.join( " ", cmd ) + "..." ); + this.serverProcess = new ProcessBuilder( cmd ) + .directory( seqDir.toFile() ) + .redirectOutput( ProcessBuilder.Redirect.INHERIT ) + .redirectError( ProcessBuilder.Redirect.PIPE ) + .start(); + this.serverHost = host; + this.serverPort = port; + + // wait a little bit to see if the server fails early (i.e. incorrect parameters) + try { + if ( serverProcess.waitFor( 100, TimeUnit.MILLISECONDS ) ) { + String errorMessage = StringUtils.strip( IOUtils.toString( serverProcess.getErrorStream(), StandardCharsets.UTF_8 ) ); + throw new RuntimeException( "Could not start gfServer (exit value=" + serverProcess.exitValue() + "):\n" + errorMessage ); + } + } catch ( InterruptedException e ) { + Thread.currentThread().interrupt(); + throw new RuntimeException( e ); + } + + if ( waitForFullInitialization ) { + log.info( "Waiting for gfServer to be fully initialized on " + serverHost + ":" + serverPort + "..." ); + while ( true ) { + if ( isServerReachable( serverHost, serverPort ) ) { + log.info( "gfServer is listening on " + serverHost + ":" + serverPort + "." ); + break; } - } catch ( InterruptedException e1 ) { - Thread.currentThread().interrupt(); - ShellDelegatingBlat.log.info( "Server seems to have started" ); - } catch ( IllegalThreadStateException e1 ) { - ShellDelegatingBlat.log.info( "Server seems to have started" ); } + } + } + + /** + * Check if the gfServer is running. + */ + public boolean isServerRunning() { + return serverProcess != null && serverProcess.isAlive() && isServerReachable( serverHost, serverPort ); + } + + /** + * Check if the gfServer for a given genome is reachable. + */ + public boolean isServerReachable( BlattableGenome genome, boolean sensitive ) { + return isServerReachable( host, getPort( genome, sensitive ) ); + } + /** + * Check if a gfServer is reachable. + */ + private boolean isServerReachable( String host, int port ) { + // try to connect to the server to ensure it is running + try ( Socket ignored = new Socket( host, port ) ) { + return true; + } catch ( IOException e ) { + return false; + // ignore all other errors, the blat server is probably not running } } - @Override - public void stopServer( int port ) { - if ( !doShutdown ) { + /** + * Stop the gfServer, if it was started by this. + */ + public synchronized void stopServer() { + if ( serverProcess == null ) { + log.warn( "gfServer was not started, nothing to stop." ); + return; + } else if ( !serverProcess.isAlive() ) { + log.info( "gfServer is not running, nothing to stop." ); return; } - ShellDelegatingBlat.log.info( "Shutting down gfServer" ); - if ( serverProcess == null ) - return; - // serverProcess.destroy(); + ShellDelegatingBlat.log.info( "Shutting down gfServer at " + serverHost + ":" + serverPort + "..." ); + try { - // this doesn't work unless the server was invoked with the option "-canStop" - Process server = Runtime.getRuntime() - .exec( this.getGfServerExe() + " stop " + this.getHost() + " " + port ); - server.waitFor(); - int exit = server.exitValue(); - ShellDelegatingBlat.log.info( "Server on port " + port + " shut down with exit value " + exit ); - } catch ( InterruptedException | IOException e ) { - ShellDelegatingBlat.log.error( e, e ); + // gracefully stop the server + serverProcess.destroy(); + // give the server 30 seconds to shut down + if ( serverProcess.waitFor( 30, TimeUnit.SECONDS ) ) { + int serverExitCode = serverProcess.exitValue(); + // 143 is the exit code for SIGTERM, which is what destroy() sends + if ( serverExitCode == 0 || serverExitCode == 143 ) { + ShellDelegatingBlat.log.info( "gfServer on port " + serverPort + " shut down with exit value " + serverExitCode ); + } else { + String errorMessage; + try { + errorMessage = IOUtils.toString( serverProcess.getErrorStream(), StandardCharsets.UTF_8 ); + } catch ( IOException e ) { + errorMessage = "Could not read error stream from gfServer process."; + } + ShellDelegatingBlat.log.info( "gfServer on port " + serverPort + " shut down with exit value " + serverExitCode + "\n" + errorMessage ); + } + } else { + log.warn( "gfServer did not shut down in time, killing it..." ); + serverProcess.destroyForcibly(); + } + } catch ( InterruptedException e ) { + Thread.currentThread().interrupt(); + throw new RuntimeException( e ); } - } - private Integer choosePortForQuery( Taxon taxon, boolean sensitive ) { + private int choosePortForQuery( Taxon taxon, boolean sensitive ) { BlattableGenome genome = ShellDelegatingBlat.inferBlatDatabase( taxon ); + return getPort( genome, sensitive ); + } + + private int getPort( BlattableGenome genome, boolean sensitive ) { switch ( genome ) { case MOUSE: return sensitive ? mouseSensitiveServerPort : mouseServerPort; @@ -356,13 +391,29 @@ private Integer choosePortForQuery( Taxon taxon, boolean sensitive ) { case HUMAN: default: return sensitive ? humanSensitiveServerPort : humanServerPort; + } + } + + private String[] getSeqFiles( BlattableGenome genome ) { + switch ( genome ) { + case HUMAN: + return this.humanSeqFiles; + case MOUSE: + return this.mouseSeqFiles; + case RAT: + return this.ratSeqFiles; + default: + return this.humanSeqFiles; } } - private void cleanUpTmpFiles( File querySequenceFile, String outputPath ) { - if ( !querySequenceFile.delete() || !( new File( outputPath ) ).delete() ) { - ShellDelegatingBlat.log.warn( "Could not clean up temporary files." ); + private void cleanUpTmpFiles( Path querySequenceFile, Path outputPath ) { + try { + Files.deleteIfExists( querySequenceFile ); + Files.deleteIfExists( outputPath ); + } catch ( IOException e ) { + ShellDelegatingBlat.log.warn( "Could not clean up temporary files.", e ); } } @@ -373,174 +424,64 @@ private void cleanUpTmpFiles( File querySequenceFile, String outputPath ) { * @param outputPath output path * @return collection of blat results */ - private List execGfClient( File querySequenceFile, String outputPath, int portToUse ) + private List execGfClient( Path querySequenceFile, Path outputPath, int portToUse, Taxon taxon ) throws IOException { - final String cmd = - gfClientExe + " -nohead -minScore=" + ShellDelegatingBlat.MIN_SCORE + " " + host + " " + portToUse + " " - + seqDir + " " + querySequenceFile.getAbsolutePath() + " " + outputPath; - ShellDelegatingBlat.log.info( cmd ); - - final Process run = Runtime.getRuntime().exec( cmd ); - - // to ensure that we aren't left waiting for these streams - GenericStreamConsumer gscErr = new GenericStreamConsumer( run.getErrorStream() ); - GenericStreamConsumer gscIn = new GenericStreamConsumer( run.getInputStream() ); - gscErr.start(); - gscIn.start(); - + StopWatch overallWatch = StopWatch.createStarted(); + + final String[] cmd = new String[] { + gfClientExe, "-nohead", "-minScore=" + ShellDelegatingBlat.MIN_SCORE, host, String.valueOf( portToUse ), + seqDir.toString(), querySequenceFile.toString(), outputPath.toString() }; + ShellDelegatingBlat.log.info( String.join( " ", cmd ) ); + final Process run = new ProcessBuilder( cmd ) + // to ensure that we aren't left waiting for these streams + // TODO: switch to Redirect.DISCARD for Java 9+ + .redirectOutput( ProcessBuilder.Redirect.appendTo( new File( "/dev/null" ) ) ) + .redirectError( ProcessBuilder.Redirect.PIPE ) + .start(); + // wait... try { - - int exitVal = Integer.MIN_VALUE; - - // wait... - StopWatch overallWatch = new StopWatch(); - overallWatch.start(); - - while ( exitVal == Integer.MIN_VALUE ) { - try { - exitVal = run.exitValue(); - } catch ( IllegalThreadStateException e ) { - // okay, still - // waiting. - } - Thread.sleep( ShellDelegatingBlat.BLAT_UPDATE_INTERVAL_MS ); + while ( !run.waitFor( ShellDelegatingBlat.BLAT_UPDATE_INTERVAL_MS, TimeUnit.MILLISECONDS ) ) { // I hope this is okay... - this.outputFile( outputPath, overallWatch ); + this.checkOutputFile( outputPath, overallWatch ); } - - overallWatch.stop(); - String minutes = StopWatchUtils.getMinutesElapsed( overallWatch ); - ShellDelegatingBlat.log.info( "Blat took a total of " + minutes + " minutes" ); - - // int exitVal = run.waitFor(); - - ShellDelegatingBlat.log.debug( "blat exit value=" + exitVal ); } catch ( InterruptedException e ) { + Thread.currentThread().interrupt(); throw new RuntimeException( e ); } - ShellDelegatingBlat.log.debug( "GfClient Success" ); - return this.processPsl( outputPath, null ); - } - - /** - * Get a temporary file name. - * - * @throws IOException if there is an IO problem while accessing the file - */ - private String getTmpPslFilePath( String base ) throws IOException { - File tmpDir = new File( Settings.getDownloadPath() ); - if ( StringUtils.isBlank( base ) ) { - return File.createTempFile( "blat-output", ".psl", tmpDir ).getPath(); + int exitVal = run.exitValue(); + if ( exitVal != 0 ) { + String errorMessage = StringUtils.strip( IOUtils.toString( run.getErrorStream(), StandardCharsets.UTF_8 ) ); + throw new RuntimeException( "gfClient exited with " + exitVal + ":\n" + errorMessage ); } - return File.createTempFile( base, ".psl", tmpDir ).getPath(); - } - - /** - * @param querySequenceFile query sequence file - * @param outputPath output path - * @return processed results. - * @throws IOException if there is an IO problem while accessing the file - */ - private List gfClient( File querySequenceFile, String outputPath, int portToUse ) - throws IOException { - // if ( hasNativeLibrary ) return jniGfClientCall( querySequenceFile, outputPath, portToUse ); - return this.execGfClient( querySequenceFile, outputPath, portToUse ); + overallWatch.stop(); + ShellDelegatingBlat.log.info( "Blat query took a total of " + overallWatch ); + ShellDelegatingBlat.log.debug( "GfClient Success" ); + return this.processPsl( outputPath, taxon ); } - private native void GfClientCall( String h, String p, String dir, String input, String output ); - - private void init() throws ConfigurationException { - ShellDelegatingBlat.log.debug( "Reading global config" ); - this.humanServerPort = Settings.getInt( "gfClient.humanServerPort" ); - this.mouseServerPort = Settings.getInt( "gfClient.mouseServerPort" ); - this.ratServerPort = Settings.getInt( "gfClient.ratServerPort" ); - - this.humanSensitiveServerPort = Settings.getInt( "gfClient.sensitive.humanServerPort" ); - this.mouseSensitiveServerPort = Settings.getInt( "gfClient.sensitive.mouseServerPort" ); - this.ratSensitiveServerPort = Settings.getInt( "gfClient.sensitive.ratServerPort" ); - this.host = Settings.getString( "gfClient.host" ); - this.seqDir = Settings.getString( "gfClient.seqDir" ); - this.mouseSeqFiles = Settings.getString( "gfClient.mouse.seqFiles" ); - this.ratSeqFiles = Settings.getString( "gfClient.rat.seqFiles" ); - this.humanSeqFiles = Settings.getString( "gfClient.human.seqFiles" ); - this.gfClientExe = Settings.getString( "gfClient.exe" ); - this.gfServerExe = Settings.getString( "gfServer.exe" ); - - if ( gfServerExe == null ) { - /* - * This won't ever really work -- it's left over from earlier iterations. - */ + private synchronized void checkOutputFile( final Path outputPath, StopWatch overallWatch ) { + try { + long size = Files.size( outputPath ); + NumberFormat nf = new DecimalFormat(); + nf.setMaximumFractionDigits( 2 ); + String minutes = StopWatchUtils.getMinutesElapsed( overallWatch ); ShellDelegatingBlat.log - .warn( "You will not be able to start the server: gfServer.exe is not set in config" ); - } - - if ( gfClientExe == null && ShellDelegatingBlat.os.startsWith( "windows" ) ) { - throw new ConfigurationException( "BLAT client calls will not work under windows." ); + .info( "BLAT output so far: " + nf.format( size / 1024.0 ) + " kb (" + minutes + " minutes elapsed)" ); + } catch ( IOException e ) { + ShellDelegatingBlat.log.warn( "Failed to check BLAT output file: " + outputPath, e ); } - } /** - * @param querySequenceFile query sequence file - * @param outputPath output path - * @return processed results. + * Get a temporary file name. + * + * @throws IOException if there is an IO problem while accessing the file */ - private Collection jniGfClientCall( final File querySequenceFile, final String outputPath, - final int portToUse ) throws IOException { - try { - ShellDelegatingBlat.log.debug( "Starting blat run" ); - - ExecutorService executor = Executors.newSingleThreadExecutor(); - Future future = executor.submit( () -> { - ShellDelegatingBlat.this - .GfClientCall( host, Integer.toString( portToUse ), seqDir, querySequenceFile.getPath(), - outputPath ); - } ); - executor.shutdown(); - - // wait... - StopWatch overallWatch = new StopWatch(); - overallWatch.start(); - - while ( !future.isDone() ) { - try { - future.get( ShellDelegatingBlat.BLAT_UPDATE_INTERVAL_MS, java.util.concurrent.TimeUnit.MILLISECONDS ); - } catch ( TimeoutException e ) { - log.info( "Waiting for blat to finish..." ); - continue; - } catch ( InterruptedException ie ) { - future.cancel( true ); - throw new RuntimeException( ie ); - } catch ( ExecutionException e ) { - throw new RuntimeException( e ); - } - this.outputFile( outputPath, overallWatch ); - } - - overallWatch.stop(); - String minutes = StopWatchUtils.getMinutesElapsed( overallWatch ); - ShellDelegatingBlat.log.info( "Blat took a total of " + minutes + " minutes" ); - - } catch ( UnsatisfiedLinkError e ) { - ShellDelegatingBlat.log.error( e, e ); - ShellDelegatingBlat.log.info( "Falling back on exec()" ); - this.execGfClient( querySequenceFile, outputPath, portToUse ); - } - return this.processPsl( outputPath, null ); - } - - private synchronized void outputFile( final String outputPath, StopWatch overallWatch ) { - File outputFile = new File( outputPath ); - Long size = outputFile.length(); - NumberFormat nf = new DecimalFormat(); - nf.setMaximumFractionDigits( 2 ); - String minutes = StopWatchUtils.getMinutesElapsed( overallWatch ); - ShellDelegatingBlat.log - .info( "BLAT output so far: " + nf.format( size / 1024.0 ) + " kb (" + minutes + " minutes elapsed)" ); - + private Path getTmpPslFilePath( String base ) throws IOException { + return Files.createTempFile( tmpDir, StringUtils.isBlank( base ) ? "blat-output" : base, ".psl" ); } /** @@ -548,17 +489,16 @@ private synchronized void outputFile( final String outputPath, StopWatch overall * @param taxon taxon (optional, can be null) * @return processed results. */ - private List processPsl( String filePath, Taxon taxon ) throws IOException { + private List processPsl( Path filePath, Taxon taxon ) throws IOException { ShellDelegatingBlat.log.debug( "Processing " + filePath ); BlatResultParser brp = new BlatResultParser(); brp.setTaxon( taxon ); brp.setScoreThreshold( this.blatScoreThreshold ); - brp.parse( filePath ); + brp.parse( filePath.toFile() ); return brp.getResults(); } public enum BlattableGenome { HUMAN, MOUSE, RAT } - } diff --git a/gemma-core/src/main/java/ubic/gemma/core/config/Settings.java b/gemma-core/src/main/java/ubic/gemma/core/config/Settings.java index c1a5d8971f..64c9c62fad 100644 --- a/gemma-core/src/main/java/ubic/gemma/core/config/Settings.java +++ b/gemma-core/src/main/java/ubic/gemma/core/config/Settings.java @@ -168,4 +168,8 @@ public static String getString( String key ) { public static String getString( String key, String defaultValue ) { return Settings.config.getString( key, defaultValue ); } + + public static String[] getStringArray( String key ) { + return Settings.config.getStringArray( key ); + } } \ No newline at end of file diff --git a/gemma-core/src/main/java/ubic/gemma/core/loader/expression/AffyPowerToolsProbesetSummarize.java b/gemma-core/src/main/java/ubic/gemma/core/loader/expression/AffyPowerToolsProbesetSummarize.java index edf5563d26..c4ccd84d7e 100644 --- a/gemma-core/src/main/java/ubic/gemma/core/loader/expression/AffyPowerToolsProbesetSummarize.java +++ b/gemma-core/src/main/java/ubic/gemma/core/loader/expression/AffyPowerToolsProbesetSummarize.java @@ -17,6 +17,8 @@ import org.apache.commons.collections4.CollectionUtils; import org.apache.commons.configuration2.PropertiesConfiguration; import org.apache.commons.configuration2.ex.ConfigurationException; +import org.apache.commons.io.IOUtils; +import org.apache.commons.lang3.ArrayUtils; import org.apache.commons.lang3.RandomStringUtils; import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.time.StopWatch; @@ -28,7 +30,6 @@ import ubic.basecode.util.FileTools; import ubic.gemma.core.config.Settings; import ubic.gemma.core.profiling.StopWatchUtils; -import ubic.gemma.core.util.concurrent.GenericStreamConsumer; import ubic.gemma.model.common.quantitationtype.*; import ubic.gemma.model.expression.arrayDesign.ArrayDesign; import ubic.gemma.model.expression.bioAssay.BioAssay; @@ -37,8 +38,11 @@ import ubic.gemma.model.expression.designElement.CompositeSequence; import ubic.gemma.model.expression.experiment.ExpressionExperiment; +import javax.annotation.Nullable; import java.io.*; +import java.nio.charset.StandardCharsets; import java.util.*; +import java.util.concurrent.TimeUnit; import java.util.regex.Matcher; import java.util.regex.Pattern; @@ -423,13 +427,13 @@ private File findCdf( ArrayDesign ad ) { * /bigscratch/GSE123/*.CEL * * - * @param targetPlatform ad - * @param cdfFileName e g. HG-U133A_2.cdf - * @param celfiles celfiles - * @param outputPath path + * @param targetPlatform ad + * @param cdfFileName e g. HG-U133A_2.cdf + * @param celfiles celfiles + * @param outputPath path * @return string */ - private String getCDFCommand( ArrayDesign targetPlatform, String cdfFileName, List celfiles, + private String[] getCDFCommand( ArrayDesign targetPlatform, @Nullable String cdfFileName, List celfiles, String outputPath ) { String toolPath = Settings.getString( "affy.power.tools.exec" ); @@ -457,8 +461,7 @@ private String getCDFCommand( ArrayDesign targetPlatform, String cdfFileName, Li * HG_U95C.CDF.gz, Mouse430_2.cdf.gz etc. */ - return toolPath + " -a " + AffyPowerToolsProbesetSummarize.METHOD + " -d " + cdfFullPath + " -o " + outputPath - + " " + StringUtils.join( celfiles, " " ); + return ArrayUtils.addAll( new String[] { toolPath, "-a", AffyPowerToolsProbesetSummarize.METHOD, "-d", cdfFullPath, "-o", outputPath }, celfiles.toArray( new String[0] ) ); } /** @@ -466,7 +469,7 @@ private String getCDFCommand( ArrayDesign targetPlatform, String cdfFileName, Li * @param accessionsOfInterest Used for multiplatform studies; if null, ignored * @return strings */ - private List getCelFiles( Collection files, Collection accessionsOfInterest ) { + private List getCelFiles( Collection files, @Nullable Collection accessionsOfInterest ) { Set celfiles = new HashSet<>(); for ( File f : files ) { @@ -514,12 +517,12 @@ private List getCelFiles( Collection files, Collection acc * http://media.affymetrix.com/support/developer/powertools/changelog/apt-probeset-summarize.html * http://bib.oxfordjournals.org/content/early/2011/04/15/bib.bbq086.full * - * @param ad ad - * @param celfiles celfiles - * @param outputPath directory + * @param ad ad + * @param celfiles celfiles + * @param outputPath directory * @return string or null if not found.s */ - private String getMPSCommand( ArrayDesign ad, List celfiles, String outputPath ) { + private String[] getMPSCommand( ArrayDesign ad, List celfiles, String outputPath ) { /* * Get the pgf, clf, mps file for this platform. qc probesets: optional. */ @@ -550,8 +553,9 @@ private String getMPSCommand( ArrayDesign ad, List celfiles, String outp this.checkFileReadable( mps ); this.checkFileReadable( qcc ); - return toolPath + " -a " + AffyPowerToolsProbesetSummarize.METHOD + " -p " + pgf + " -c " + clf + " -m " + mps - + " -o " + outputPath + " --qc-probesets " + qcc + " " + StringUtils.join( celfiles, " " ); + return ArrayUtils.addAll( new String[] { toolPath, "-a", AffyPowerToolsProbesetSummarize.METHOD, + "-p", pgf, "-c", clf, "-m", mps, "-o", outputPath, "--qc-probesets", qcc }, + celfiles.toArray( new String[0] ) ); } private String getOutputFilePath( ExpressionExperiment ee ) { @@ -585,7 +589,7 @@ private Collection tryRun( ExpressionExperiment ee, Arr List celFiles = this.getCelFiles( files, accessionsOfInterest ); AffyPowerToolsProbesetSummarize.log.info( "Located " + celFiles.size() + " cel files" ); String outputPath = this.getOutputFilePath( ee ); - String cmd; + String[] cmd; // look for a CDF first. File cdf = this.findCdf( targetPlatform ); @@ -607,44 +611,31 @@ private Collection tryRun( ExpressionExperiment ee, Arr AffyPowerToolsProbesetSummarize.log.info( "Original platform: " + originalPlatform + "; Target platform (apt-probeset-summarize will be called with): " + targetPlatform ); - AffyPowerToolsProbesetSummarize.log.info( "Running: " + cmd ); - - int exitVal = Integer.MIN_VALUE; + AffyPowerToolsProbesetSummarize.log.info( "Running: " + Arrays.toString( cmd ) ); StopWatch overallWatch = new StopWatch(); overallWatch.start(); try { - final Process run = Runtime.getRuntime().exec( cmd ); - GenericStreamConsumer gscErr = new GenericStreamConsumer( run.getErrorStream(), true ); - GenericStreamConsumer gscIn = new GenericStreamConsumer( run.getInputStream() ); - gscErr.start(); - gscIn.start(); - - int i = 0; - while ( exitVal == Integer.MIN_VALUE ) { - try { - exitVal = run.exitValue(); - } catch ( IllegalThreadStateException e ) { - // okay, still waiting. - } - Thread.sleep( 1000 ); - - if ( ++i % AffyPowerToolsProbesetSummarize.AFFY_UPDATE_INTERVAL_S == 0 ) { - File outputFile = new File( outputPath + File.separator + "apt-probeset-summarize.log" ); - Long size = outputFile.length(); - - String minutes = StopWatchUtils.getMinutesElapsed( overallWatch ); - AffyPowerToolsProbesetSummarize.log - .info( String.format( "apt-probeset-summarize logging output so far: %.2f", size / 1024.0 ) - + " kb (" + minutes + " minutes elapsed)" ); - } - + final Process run = new ProcessBuilder( cmd ) + // TODO: switch to Redirect.DISCARD from Java 9 + .redirectOutput( ProcessBuilder.Redirect.appendTo( new File( "/dev/null" ) ) ) + .redirectError( ProcessBuilder.Redirect.PIPE ) + .start(); + + while ( !run.waitFor( AFFY_UPDATE_INTERVAL_S, TimeUnit.SECONDS ) ) { + File outputFile = new File( outputPath + File.separator + "apt-probeset-summarize.log" ); + long size = outputFile.length(); + String minutes = StopWatchUtils.getMinutesElapsed( overallWatch ); + AffyPowerToolsProbesetSummarize.log + .info( String.format( "apt-probeset-summarize logging output so far: %.2f", size / 1024.0 ) + + " kb (" + minutes + " minutes elapsed)" ); } - if ( exitVal > 0 ) { - AffyPowerToolsProbesetSummarize.log - .warn( "apt-probeset-summarize exit value was non-zero: " + exitVal ); + int exitVal = run.exitValue(); + if ( exitVal != 0 ) { + String errorMessage = StringUtils.strip( IOUtils.toString( run.getErrorStream(), StandardCharsets.UTF_8 ) ); + throw new RuntimeException( "apt-probeset-summarize exit value was non-zero: " + exitVal + "\n" + errorMessage ); } overallWatch.stop(); diff --git a/gemma-core/src/main/java/ubic/gemma/core/loader/expression/arrayDesign/ArrayDesignSequenceProcessingService.java b/gemma-core/src/main/java/ubic/gemma/core/loader/expression/arrayDesign/ArrayDesignSequenceProcessingService.java index c857d88e01..0f3bb50b13 100644 --- a/gemma-core/src/main/java/ubic/gemma/core/loader/expression/arrayDesign/ArrayDesignSequenceProcessingService.java +++ b/gemma-core/src/main/java/ubic/gemma/core/loader/expression/arrayDesign/ArrayDesignSequenceProcessingService.java @@ -154,25 +154,22 @@ Collection processArrayDesign( ArrayDesign arrayDesign, InputStream * This method ALWAYS clobbers the BioSequence associations that are associated with the array design (at least, if * any of the probe identifiers in the file given match the array design). * + * @param arrayDesign plaftorm * @param sequenceIdentifierFile Sequence file has two columns: column 1 is a probe id, column 2 is a genbank - * accession or sequence name, delimited by tab. Sequences will be fetched from BLAST databases if possible; - * ones missing will be sought directly in Gemma. - * @param force If true, if an existing BioSequence that matches is found in the system, any existing sequence - * information in the BioSequence will be overwritten. - * @param arrayDesign plaftorm - * @param taxon taxon - * @param blastDbHome blast db home - * @param databaseNames database names + * accession or sequence name, delimited by tab. Sequences will be fetched from BLAST databases if possible; + * ones missing will be sought directly in Gemma. + * @param databaseNames database names + * @param taxon taxon + * @param force If true, if an existing BioSequence that matches is found in the system, any existing sequence + * information in the BioSequence will be overwritten. * @return bio sequences * @throws IOException when IO problems occur. */ Collection processArrayDesign( ArrayDesign arrayDesign, InputStream sequenceIdentifierFile, - String[] databaseNames, String blastDbHome, Taxon taxon, boolean force ) throws IOException; + String[] databaseNames, Taxon taxon, boolean force ) throws IOException; Collection processArrayDesign( ArrayDesign arrayDesign, InputStream sequenceIdentifierFile, - String[] databaseNames, String blastDbHome, Taxon taxon, boolean force, FastaCmd fc ) throws IOException; - - Collection processArrayDesign( ArrayDesign arrayDesign, String[] databaseNames, boolean force ); + String[] databaseNames, Taxon taxon, boolean force, FastaCmd fc ) throws IOException; /** * For the case where the sequences are retrieved simply by the Genbank accession. For this to work, the array @@ -182,41 +179,36 @@ Collection processArrayDesign( ArrayDesign arrayDesign, InputStream * sequence is based on that, not the one provided in the Biosequence; if it differs it will be replaced. This * happens when the Genbank accession is for a Refseq (for example) but the actual clone on the array is from IMAGE. * + * @param arrayDesign platform * @param databaseNames the names of the BLAST-formatted databases to search (e.g., nt, est_mouse) - * @param blastDbHome where to find the blast databases for sequence retrieval - * @param force If true, then when an existing BioSequence contains a non-empty sequence value, it will be - * overwritten with a new one. - * @param arrayDesign platform + * @param force If true, then when an existing BioSequence contains a non-empty sequence value, it will be + * overwritten with a new one. * @return bio sequences */ - Collection processArrayDesign( ArrayDesign arrayDesign, String[] databaseNames, String blastDbHome, - boolean force ); + Collection processArrayDesign( ArrayDesign arrayDesign, String[] databaseNames, boolean force ); /** * Provided primarily for testing. * + * @param arrayDesign platform * @param databaseNames the names of the BLAST-formatted databases to search (e.g., nt, est_mouse) - * @param blastDbHome where to find the blast databases for sequence retrieval - * @param force If true, then when an existing BioSequence contains a non-empty sequence value, it will be - * overwritten with a new one. - * @param arrayDesign platform - * @param fc fasta command + * @param force If true, then when an existing BioSequence contains a non-empty sequence value, it will be + * overwritten with a new one. + * @param fc fasta command * @return bio sequences */ - Collection processArrayDesign( ArrayDesign arrayDesign, String[] databaseNames, String blastDbHome, - boolean force, FastaCmd fc ); + Collection processArrayDesign( ArrayDesign arrayDesign, String[] databaseNames, boolean force, FastaCmd fc ); /** * Update a single sequence in the system. * - * @param force If true, if an existing BioSequence that matches if found in the system, any existing sequence - * information in the BioSequence will be overwritten. + * @param sequenceId sequence id * @param databaseNames database names - * @param blastDbHome blast db home - * @param sequenceId sequence id + * @param force If true, if an existing BioSequence that matches if found in the system, any existing sequence + * information in the BioSequence will be overwritten. * @return persistent BioSequence. */ - BioSequence processSingleAccession( String sequenceId, String[] databaseNames, String blastDbHome, boolean force ); + BioSequence processSingleAccession( String sequenceId, String[] databaseNames, boolean force ); Taxon validateTaxon( Taxon taxon, ArrayDesign arrayDesign ) throws IllegalArgumentException; diff --git a/gemma-core/src/main/java/ubic/gemma/core/loader/expression/arrayDesign/ArrayDesignSequenceProcessingServiceImpl.java b/gemma-core/src/main/java/ubic/gemma/core/loader/expression/arrayDesign/ArrayDesignSequenceProcessingServiceImpl.java index 19636d8248..9983d24794 100644 --- a/gemma-core/src/main/java/ubic/gemma/core/loader/expression/arrayDesign/ArrayDesignSequenceProcessingServiceImpl.java +++ b/gemma-core/src/main/java/ubic/gemma/core/loader/expression/arrayDesign/ArrayDesignSequenceProcessingServiceImpl.java @@ -23,7 +23,6 @@ import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.springframework.beans.factory.annotation.Autowired; -import org.springframework.security.access.method.P; import org.springframework.stereotype.Component; import ubic.gemma.core.analysis.report.ArrayDesignReportService; import ubic.gemma.core.analysis.sequence.SequenceManipulation; @@ -44,6 +43,7 @@ import ubic.gemma.persistence.service.expression.arrayDesign.ArrayDesignService; import ubic.gemma.persistence.service.genome.biosequence.BioSequenceService; +import javax.annotation.Nullable; import java.io.*; import java.util.*; @@ -402,8 +402,8 @@ public Collection processArrayDesign( ArrayDesign arrayDesign, Inpu @Override public Collection processArrayDesign( ArrayDesign arrayDesign, InputStream sequenceIdentifierFile, - String[] databaseNames, String blastDbHome, Taxon taxon, boolean force ) throws IOException { - return this.processArrayDesign( arrayDesign, sequenceIdentifierFile, databaseNames, blastDbHome, taxon, force, + String[] databaseNames, Taxon taxon, boolean force ) throws IOException { + return this.processArrayDesign( arrayDesign, sequenceIdentifierFile, databaseNames, taxon, force, null ); } @@ -412,7 +412,7 @@ public Collection processArrayDesign( ArrayDesign arrayDesign, Inpu */ @Override public Collection processArrayDesign( ArrayDesign arrayDesign, InputStream sequenceIdentifierFile, - String[] databaseNames, String blastDbHome, Taxon taxon, boolean force, FastaCmd fc ) throws IOException { + String[] databaseNames, Taxon taxon, boolean force, FastaCmd fc ) throws IOException { this.checkForCompositeSequences( arrayDesign ); Map probe2acc = this.parseAccessionFile( sequenceIdentifierFile ); @@ -434,7 +434,7 @@ public Collection processArrayDesign( ArrayDesign arrayDesign, Inpu fc = new SimpleFastaCmd(); Collection retrievedSequences = this - .searchBlastDbs( databaseNames, blastDbHome, notFound, fc ); + .searchBlastDbs( databaseNames, notFound, fc ); // map of accessions to sequence. Map found = this @@ -493,18 +493,12 @@ public Collection processArrayDesign( ArrayDesign arrayDesign, Inpu @Override public Collection processArrayDesign( ArrayDesign arrayDesign, String[] databaseNames, boolean force ) { - return this.processArrayDesign( arrayDesign, databaseNames, null, force ); + return this.processArrayDesign( arrayDesign, databaseNames, force, null ); } @Override public Collection processArrayDesign( ArrayDesign arrayDesign, String[] databaseNames, - String blastDbHome, boolean force ) { - return this.processArrayDesign( arrayDesign, databaseNames, blastDbHome, force, null ); - } - - @Override - public Collection processArrayDesign( ArrayDesign arrayDesign, String[] databaseNames, - String blastDbHome, boolean force, FastaCmd fc ) { + boolean force, @Nullable FastaCmd fc ) { Map accessionsToFetch = this.initializeFetchList( arrayDesign, force ); @@ -525,7 +519,7 @@ public Collection processArrayDesign( ArrayDesign arrayDesign, Stri if ( fc == null ) fc = new SimpleFastaCmd(); Collection retrievedSequences = this - .searchBlastDbs( databaseNames, blastDbHome, notFound, fc ); + .searchBlastDbs( databaseNames, notFound, fc ); Map found = this .findOrUpdateSequences( accessionsToFetch, retrievedSequences, taxaOnArray, force ); @@ -547,14 +541,14 @@ public Collection processArrayDesign( ArrayDesign arrayDesign, Stri /** * Update a single sequence in the system. * - * @param force If true, if an existing BioSequence that matches if found in the system, any existing sequence - * information in the BioSequence will be overwritten. + * @param force If true, if an existing BioSequence that matches if found in the system, any existing sequence + * information in the BioSequence will be overwritten. * @return persistent BioSequence. */ @Override - public BioSequence processSingleAccession( String sequenceId, String[] databaseNames, String blastDbHome, + public BioSequence processSingleAccession( String sequenceId, String[] databaseNames, boolean force ) { - BioSequence found = this.searchBlastDbs( databaseNames, blastDbHome, sequenceId, new SimpleFastaCmd() ); + BioSequence found = this.searchBlastDbs( databaseNames, sequenceId, new SimpleFastaCmd() ); if ( found == null ) return null; return this.createOrUpdateGenbankSequence( found, force ); @@ -1039,17 +1033,12 @@ private Collection processOligoDesign( ArrayDesign arrayDesign, Inp return res; } - private Collection searchBlastDbs( String[] databaseNames, String blastDbHome, + private Collection searchBlastDbs( String[] databaseNames, Collection accessionsToFetch, FastaCmd fc ) { Collection retrievedSequences = new HashSet<>(); for ( String dbName : databaseNames ) { - Collection moreBioSequences; - if ( blastDbHome != null ) { - moreBioSequences = fc.getBatchAccessions( accessionsToFetch, dbName, blastDbHome ); - } else { - moreBioSequences = fc.getBatchAccessions( accessionsToFetch, dbName ); - } + Collection moreBioSequences = fc.getBatchAccessions( accessionsToFetch, dbName ); if ( ArrayDesignSequenceProcessingServiceImpl.log.isDebugEnabled() ) ArrayDesignSequenceProcessingServiceImpl.log @@ -1066,16 +1055,11 @@ private Collection searchBlastDbs( String[] databaseNames, String b /** * Search for a single accession */ - private BioSequence searchBlastDbs( String[] databaseNames, String blastDbHome, String accessionToFetch, + private BioSequence searchBlastDbs( String[] databaseNames, String accessionToFetch, FastaCmd fc ) { for ( String dbName : databaseNames ) { - BioSequence moreBioSequence; - if ( blastDbHome != null ) { - moreBioSequence = fc.getByAccession( accessionToFetch, dbName, blastDbHome ); - } else { - moreBioSequence = fc.getByAccession( accessionToFetch, dbName, null ); - } + BioSequence moreBioSequence = fc.getByAccession( accessionToFetch, dbName ); if ( moreBioSequence != null ) return moreBioSequence; } diff --git a/gemma-core/src/main/java/ubic/gemma/core/loader/genome/FastaCmd.java b/gemma-core/src/main/java/ubic/gemma/core/loader/genome/FastaCmd.java index c47e73e796..f40c8f6444 100644 --- a/gemma-core/src/main/java/ubic/gemma/core/loader/genome/FastaCmd.java +++ b/gemma-core/src/main/java/ubic/gemma/core/loader/genome/FastaCmd.java @@ -38,13 +38,4 @@ public interface FastaCmd { Collection getBatchAccessions( Collection accessions, String database ); Collection getBatchIdentifiers( Collection identifiers, String database ); - - BioSequence getByAccession( String accession, String database, String blastHome ); - - BioSequence getByIdentifier( int identifier, String database, String blastHome ); - - Collection getBatchAccessions( Collection accessions, String database, String blastHome ); - - Collection getBatchIdentifiers( Collection identifiers, String database, String blastHome ); - } diff --git a/gemma-core/src/main/java/ubic/gemma/core/loader/genome/SimpleFastaCmd.java b/gemma-core/src/main/java/ubic/gemma/core/loader/genome/SimpleFastaCmd.java index 9ff14f6aa4..03e951a7ce 100644 --- a/gemma-core/src/main/java/ubic/gemma/core/loader/genome/SimpleFastaCmd.java +++ b/gemma-core/src/main/java/ubic/gemma/core/loader/genome/SimpleFastaCmd.java @@ -18,15 +18,17 @@ */ package ubic.gemma.core.loader.genome; +import lombok.extern.apachecommons.CommonsLog; +import org.apache.commons.io.IOUtils; import org.apache.commons.lang3.StringUtils; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; -import ubic.gemma.core.util.concurrent.GenericStreamConsumer; -import ubic.gemma.core.util.concurrent.ParsingStreamConsumer; -import ubic.gemma.model.genome.biosequence.BioSequence; import ubic.gemma.core.config.Settings; +import ubic.gemma.model.genome.biosequence.BioSequence; -import java.io.*; +import java.io.BufferedWriter; +import java.io.IOException; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.nio.file.Path; import java.util.Collection; /** @@ -35,95 +37,72 @@ * * @author pavlidis */ +@CommonsLog public class SimpleFastaCmd implements FastaCmd { // this name should be eventually changed to blastdbCmd.exe, since NCBI BLAST changed the name of the program. - public static final String FASTA_CMD_ENV_VAR = "fastaCmd.exe"; - - private static final Log log = LogFactory.getLog( SimpleFastaCmd.class.getName() ); - private static final String blastDbHome = System.getenv( "BLASTDB" ); - private static String fastaCmdExecutable = Settings.getString( SimpleFastaCmd.FASTA_CMD_ENV_VAR ); - private String dbOption = "d"; - private String queryOption = "s"; - private String entryBatchOption = "i"; - - public SimpleFastaCmd() { - super(); - - if ( System.getProperty( "os.name" ) != null && System.getProperty( "os.name" ).startsWith( "Windows" ) - && !SimpleFastaCmd.fastaCmdExecutable.endsWith( "\"" ) ) { - SimpleFastaCmd.fastaCmdExecutable = StringUtils.strip( SimpleFastaCmd.fastaCmdExecutable, "\"\'" ); - SimpleFastaCmd.fastaCmdExecutable = "\"" + SimpleFastaCmd.fastaCmdExecutable + "\""; - } - - if ( SimpleFastaCmd.fastaCmdExecutable.contains( "blastdbcmd" ) ) { - dbOption = "db"; - queryOption = "entry"; - entryBatchOption = "entry_batch"; + public static final String FASTA_CMD_CONFIG_NAME = "fastaCmd.exe"; + + private static String FASTA_CMD_EXE = Settings.getString( FASTA_CMD_CONFIG_NAME ); + + private static final String DB_OPTION; + private static final String QUERY_OPTION; + private static final String ENTRY_BATCH_OPTION; + + static { + if ( FASTA_CMD_EXE.endsWith( "blastdbcmd" ) ) { + log.debug( "Detected that blastdbcmd is being used, setting options accordingly." ); + DB_OPTION = "db"; + QUERY_OPTION = "entry"; + ENTRY_BATCH_OPTION = "entry_batch"; + } else { + log.debug( "Detected that fastacmd is being used, setting options accordingly." ); + DB_OPTION = "d"; + QUERY_OPTION = "s"; + ENTRY_BATCH_OPTION = "i"; } } - @Override - public BioSequence getByAccession( String accession, String database ) { - return this.getByAccession( accession, database, SimpleFastaCmd.blastDbHome ); - } + private String blastHome = System.getenv( "BLASTDB" ); - @Override - public BioSequence getByIdentifier( int identifier, String database ) { - try { - return this.getSingle( String.valueOf( identifier ), database, SimpleFastaCmd.blastDbHome ); - } catch ( IOException e ) { - throw new RuntimeException( e ); - } + public void setBlastHome( String blastDbHome ) { + this.blastHome = blastDbHome; } @Override - public Collection getBatchAccessions( Collection accessions, String database ) { - return this.getBatchAccessions( accessions, database, SimpleFastaCmd.blastDbHome ); - } - - @Override - public Collection getBatchIdentifiers( Collection identifiers, String database ) { - return this.getBatchIdentifiers( identifiers, database, SimpleFastaCmd.blastDbHome ); - } - - @Override - public BioSequence getByAccession( String accession, String database, String blastHome ) { + public BioSequence getByAccession( String accession, String database ) { try { - return this.getSingle( accession, database, blastHome ); + return getSingle( accession, database ); } catch ( IOException e ) { throw new RuntimeException( e ); } } @Override - public BioSequence getByIdentifier( int identifier, String database, String blastHome ) { + public BioSequence getByIdentifier( int identifier, String database ) { try { - return this.getSingle( String.valueOf( identifier ), database, blastHome ); + return getSingle( String.valueOf( identifier ), database ); } catch ( IOException e ) { throw new RuntimeException( e ); } } @Override - public Collection getBatchAccessions( Collection accessions, String database, - String blastHome ) { + public Collection getBatchAccessions( Collection accessions, String database ) { try { - return this.getMultiple( accessions, database, blastHome ); + return getMultiple( accessions, database ); } catch ( IOException e ) { throw new RuntimeException( e ); } } @Override - public Collection getBatchIdentifiers( Collection identifiers, String database, - String blastHome ) { + public Collection getBatchIdentifiers( Collection identifiers, String database ) { try { - return this.getMultiple( identifiers, database, blastHome ); + return getMultiple( identifiers, database ); } catch ( IOException e ) { throw new RuntimeException( e ); } - } /** @@ -131,77 +110,36 @@ public Collection getBatchIdentifiers( Collection identifi * * @param keys keys * @param database database - * @param blastHome blast home * @return bio sequences * @throws IOException when there are IO problems */ - private Collection getMultiple( Collection keys, String database, String blastHome ) + private Collection getMultiple( Collection keys, String database ) throws IOException { - - if ( StringUtils.isBlank( SimpleFastaCmd.fastaCmdExecutable ) ) - throw new IllegalStateException( "No blastdbcmd executable: You must set " + SimpleFastaCmd.FASTA_CMD_ENV_VAR - + " in your environment." ); - - if ( blastHome == null ) { - throw new IllegalArgumentException( - "No blast database location specified, you must set this in your environment" ); - } - File tmp = File.createTempFile( "sequenceIds", ".txt" ); - try ( Writer tmpOut = new FileWriter( tmp ) ) { - - for ( Object object : keys ) { - if ( object instanceof String ) { - String acc = object.toString().replaceFirst( "\\.[0-9]+", "" ); - tmpOut.write( acc + "\n" ); - - } else { - tmpOut.write( object.toString() + "\n" ); - } - } - } - String[] opts = new String[] { "BLASTDB=" + blastHome }; - String[] command = new String[] { SimpleFastaCmd.fastaCmdExecutable, "-long_seqids", "-target_only", "-" + dbOption, database, "-" + entryBatchOption, tmp.getAbsolutePath() }; - SimpleFastaCmd.log.info( command ); - Process pr; - SimpleFastaCmd.log.info( "BLASTDB=" + blastHome ); - pr = Runtime.getRuntime().exec( command, opts ); - - // EntityUtils.deleteFile( tmp ); - return this.getSequencesFromFastaCmdOutput( pr ); - - } - - private Collection getSequencesFromFastaCmdOutput( Process pr ) { - - try ( final InputStream is = new BufferedInputStream( pr.getInputStream() ); - InputStream err = pr.getErrorStream() ) { - - final FastaParser parser = new FastaParser(); - - ParsingStreamConsumer sg = new ParsingStreamConsumer<>( parser, is ); - GenericStreamConsumer gsc = new GenericStreamConsumer( err, true ); - sg.start(); - gsc.start(); - int exitVal = Integer.MIN_VALUE; - - while ( exitVal == Integer.MIN_VALUE ) { - - try { - exitVal = pr.exitValue(); - } catch ( IllegalThreadStateException e ) { - // okay, still waiting. + checkBlastConfig(); + Path tmp = Files.createTempFile( "sequenceIds", ".txt" ); + try { + try ( BufferedWriter tmpOut = Files.newBufferedWriter( tmp ) ) { + for ( Object object : keys ) { + if ( object instanceof String ) { + String acc = object.toString().replaceFirst( "\\.[0-9]+", "" ); + tmpOut.write( acc + "\n" ); + } else { + tmpOut.write( object.toString() + "\n" ); + } } - Thread.sleep( 200 ); - - SimpleFastaCmd.log - .debug( "fastacmd exit value=" + exitVal ); // often nonzero if some sequences are not found. - } - Thread.sleep( 200 ); - return parser.getResults(); - - } catch ( Exception e ) { - throw new RuntimeException( e ); + String[] command = new String[] { SimpleFastaCmd.FASTA_CMD_EXE, "-long_seqids", "-target_only", + "-" + DB_OPTION, database, "-" + ENTRY_BATCH_OPTION, tmp.toString() }; + SimpleFastaCmd.log.info( String.join( " ", command ) ); + ProcessBuilder pb = new ProcessBuilder( command ) + .redirectOutput( ProcessBuilder.Redirect.PIPE ) + .redirectError( ProcessBuilder.Redirect.PIPE ); + SimpleFastaCmd.log.info( "BLASTDB=" + blastHome ); + pb.environment().put( "BLASTDB", blastHome ); + Process pr = pb.start(); + return getSequencesFromFastaCmdOutput( pr ); + } finally { + Files.delete( tmp ); } } @@ -210,23 +148,56 @@ private Collection getSequencesFromFastaCmdOutput( Process pr ) { * @param database db * @throws IOException io problems */ - private BioSequence getSingle( String key, String database, String blastHome ) throws IOException { - if ( blastHome == null ) { - blastHome = SimpleFastaCmd.blastDbHome; - } - String[] opts = new String[] { "BLASTDB=" + blastHome }; - String[] command = new String[] { SimpleFastaCmd.fastaCmdExecutable, "-long_seqids", "-target_only", "-" + dbOption, database, "-" + queryOption, key }; - Process pr = Runtime.getRuntime().exec( command, opts ); - log.info( StringUtils.join( opts, " " ) ); - SimpleFastaCmd.log.info( command ); - Collection sequences = this.getSequencesFromFastaCmdOutput( pr ); - if ( sequences.size() == 0 ) { + private BioSequence getSingle( String key, String database ) throws IOException { + checkBlastConfig(); + String[] command = new String[] { SimpleFastaCmd.FASTA_CMD_EXE, "-long_seqids", "-target_only", + "-" + DB_OPTION, database, "-" + QUERY_OPTION, key }; + SimpleFastaCmd.log.info( String.join( " ", command ) ); + ProcessBuilder pb = new ProcessBuilder( command ) + .redirectOutput( ProcessBuilder.Redirect.PIPE ) + .redirectError( ProcessBuilder.Redirect.PIPE ); + SimpleFastaCmd.log.info( "BLASTDB=" + blastHome ); + pb.environment().put( "BLASTDB", blastHome ); + Process pr = pb.start(); + Collection sequences = getSequencesFromFastaCmdOutput( pr ); + if ( sequences.isEmpty() ) { return null; - } - if ( sequences.size() == 1 ) { + } else if ( sequences.size() == 1 ) { return sequences.iterator().next(); + } else { + throw new IllegalStateException( "Got more than one sequence!" ); } - throw new IllegalStateException( "Got more than one sequence!" ); } + private void checkBlastConfig() { + if ( StringUtils.isBlank( SimpleFastaCmd.FASTA_CMD_EXE ) ) + throw new IllegalStateException( "No blastdbcmd executable: You must set " + SimpleFastaCmd.FASTA_CMD_CONFIG_NAME + + " in your environment." ); + if ( blastHome == null ) { + throw new IllegalArgumentException( "No blast database location specified, you must set the BLASTDB environment variable or use setBlastHome()." ); + } + } + + private Collection getSequencesFromFastaCmdOutput( Process pr ) { + try { + final FastaParser parser = new FastaParser(); + parser.parse( pr.getInputStream() ); + int exitVal = pr.waitFor(); + if ( exitVal != 0 ) { + // check standard error stream for specific error messages + String errorMessage = StringUtils.strip( IOUtils.toString( pr.getErrorStream(), StandardCharsets.UTF_8 ) ); + if ( errorMessage.contains( "Entry or entries not found in BLAST database" ) || errorMessage.contains( "Skipped" ) ) { + log.warn( "There are warnings in " + FASTA_CMD_EXE + " output:\n" + errorMessage ); + return parser.getResults(); + } + throw new RuntimeException( FASTA_CMD_EXE + " exit value=" + exitVal + " " + errorMessage ); + } + return parser.getResults(); + } catch ( IOException e ) { + throw new RuntimeException( e ); + } catch ( InterruptedException e ) { + Thread.currentThread().interrupt(); + throw new RuntimeException( e ); + } + } } diff --git a/gemma-core/src/main/java/ubic/gemma/core/util/concurrent/GenericStreamConsumer.java b/gemma-core/src/main/java/ubic/gemma/core/util/concurrent/GenericStreamConsumer.java deleted file mode 100644 index 42017b20e3..0000000000 --- a/gemma-core/src/main/java/ubic/gemma/core/util/concurrent/GenericStreamConsumer.java +++ /dev/null @@ -1,65 +0,0 @@ -/* - * The Gemma project - * - * Copyright (c) 2006 University of British Columbia - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - */ -package ubic.gemma.core.util.concurrent; - -import java.io.BufferedReader; -import java.io.IOException; -import java.io.InputStream; -import java.io.InputStreamReader; - -/** - * See http://www.javaworld.com/javaworld/jw-12-2000/jw-1229-traps.html - * - * @author pavlidis - */ -public class GenericStreamConsumer extends Thread { - private final InputStream is; - private final boolean printToSderr; - - public GenericStreamConsumer( InputStream is ) { - this.is = is; - this.printToSderr = false; - } - - /** - * - * @param is input stream - * @param printToSderr (default=false) - */ - public GenericStreamConsumer( InputStream is, boolean printToSderr ) { - this.is = is; - this.printToSderr = printToSderr; - } - - @Override - public void run() { - try { - InputStreamReader isr = new InputStreamReader( is ); - BufferedReader br = new BufferedReader( isr ); - String line; - while ( ( line = br.readLine() ) != null ) { - if ( printToSderr ) { - System.err.println( line ); - } - } - } catch ( IOException ioe ) { - ioe.printStackTrace(); - } - } -} \ No newline at end of file diff --git a/gemma-core/src/main/java/ubic/gemma/core/util/concurrent/ParsingStreamConsumer.java b/gemma-core/src/main/java/ubic/gemma/core/util/concurrent/ParsingStreamConsumer.java deleted file mode 100644 index 786863c13a..0000000000 --- a/gemma-core/src/main/java/ubic/gemma/core/util/concurrent/ParsingStreamConsumer.java +++ /dev/null @@ -1,48 +0,0 @@ -/* - * The Gemma project - * - * Copyright (c) 2006 University of British Columbia - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - */ -package ubic.gemma.core.util.concurrent; - -import ubic.gemma.core.loader.util.parser.Parser; - -import java.io.IOException; -import java.io.InputStream; - -/** - * See http://www.javaworld.com/javaworld/jw-12-2000/jw-1229-traps.html - * - * @author pavlidis - */ -public class ParsingStreamConsumer extends Thread { - private final InputStream is; - private final Parser parser; - - public ParsingStreamConsumer( Parser parser, InputStream is ) { - this.is = is; - this.parser = parser; - } - - @Override - public void run() { - try { - parser.parse( is ); - } catch ( IOException e ) { - throw new RuntimeException( e ); - } - } -} diff --git a/gemma-core/src/main/native/.cvsignore b/gemma-core/src/main/native/.cvsignore deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/gemma-core/src/main/native/Blat.c b/gemma-core/src/main/native/Blat.c deleted file mode 100644 index 3b44d24dea..0000000000 --- a/gemma-core/src/main/native/Blat.c +++ /dev/null @@ -1,26 +0,0 @@ -#include "include/gfClient.h" -#include -#include - -/* - * - */ -JNIEXPORT jobject JNICALL Java_ubic_gemma_apps_Blat_GfClientCall - (JNIEnv * env, jobject obj, jstring jhostname, jstring jport, - jstring jseqDir, jstring jqueryFile, jstring joutputFile) -{ - - const char *hostname = (*env)->GetStringUTFChars (env, jhostname, 0); - const char *port = (*env)->GetStringUTFChars (env, jport, 0); - const char *seqDir = (*env)->GetStringUTFChars (env, jseqDir, 0); - const char *queryFile = (*env)->GetStringUTFChars (env, jqueryFile, 0); - const char *outputFile = (*env)->GetStringUTFChars (env, joutputFile, 0); - gfClient (hostname, port, seqDir, queryFile, outputFile, "dna", "dna"); - - (*env)->ReleaseStringUTFChars (env, jhostname, hostname); - (*env)->ReleaseStringUTFChars (env, jport, port); - (*env)->ReleaseStringUTFChars (env, jseqDir, seqDir); - (*env)->ReleaseStringUTFChars (env, jqueryFile, queryFile); - (*env)->ReleaseStringUTFChars (env, joutputFile, outputFile); - return; -} diff --git a/gemma-core/src/main/native/README b/gemma-core/src/main/native/README deleted file mode 100644 index 814b3a6572..0000000000 --- a/gemma-core/src/main/native/README +++ /dev/null @@ -1,33 +0,0 @@ -libtool --mode=link gcc -shared -g -O -o libBlat.la Blat.c /usr/local/lib/libGfClient.la -rpath /usr/java/jdk1.5.0_03/jre/lib/i386 -I${JAVA_HOME}/include/ -I${JAVA_HOME}/include/linux -L/usr/local/lib -libtool --mode=install install -c libBlat.la /usr/java/jdk1.5.0_03/jre/lib/i386/libBlat.la - -libtool --mode=link gcc -shared -g -O -o libBlat.la Blat.c /usr/local/lib/libGfClient.a -rpath /usr/local/lib -I${JAVA_HOME}/include/ -I${JAVA_HOME}/include/linux -L/usr/local/lib -libtool --mode=install install -c libBlat.la /usr/local/lib/libBlat.la - - - -libtool --mode=link gcc -shared -g -O -o libBlat.la Blat.c gfClient.lo -rpath /usr/java/jdk1.5.0_03/jre/lib/i386 -I${JAVA_HOME}/include/ -I${JAVA_HOME}/include/linux -L/usr/local/lib - - - ----------------------------------------------------------------------- -Libraries have been installed in: - /usr/java/jdk1.5.0_03/jre/lib/i386 - -If you ever happen to want to link against installed libraries -in a given directory, LIBDIR, you must either use libtool, and -specify the full pathname of the library, or use the `-LLIBDIR' -flag during linking and do at least one of the following: - - add LIBDIR to the `LD_LIBRARY_PATH' environment variable - during execution - - add LIBDIR to the `LD_RUN_PATH' environment variable - during linking - - use the `-Wl,--rpath -Wl,LIBDIR' linker flag - - have your system administrator add LIBDIR to `/etc/ld.so.conf' - -See any operating system documentation about shared libraries for -more information, such as the ld(1) and ld.so(8) manual pages. ----------------------------------------------------------------------- - - - diff --git a/gemma-core/src/main/native/include/Blat.h b/gemma-core/src/main/native/include/Blat.h deleted file mode 100644 index 867e016eff..0000000000 --- a/gemma-core/src/main/native/include/Blat.h +++ /dev/null @@ -1,22 +0,0 @@ -/* DO NOT EDIT THIS FILE - it is machine generated */ -#include -/* Header for class ubic_gemma_apps_Blat */ - -#ifndef _Included_ubi_gemma_apps_Blat -#define _Included_ubic_gemma_apps_Blat -#ifdef __cplusplus -extern "C" { -#endif -/* Inaccessible static: log */ -/* Inaccessible static: class_000240 */ -/* - * Class: ubic_gemma_apps_Blat - * Method: GfClientCall - * Signature: (Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;)V - */ -JNIEXPORT void JNICALL Java_ubic_gemma_apps_Blat_GfClientCall (JNIEnv *, jobject, jstring, jstring, jstring, jstring, jstring); - -#ifdef __cplusplus -} -#endif -#endif diff --git a/gemma-core/src/main/native/include/gfClient.h b/gemma-core/src/main/native/include/gfClient.h deleted file mode 100644 index 81389c9104..0000000000 --- a/gemma-core/src/main/native/include/gfClient.h +++ /dev/null @@ -1,9 +0,0 @@ -#ifndef GFCLIENT_H -#define GFCLIENT_H -#include - -void gfClient(const char *hostName, const char *portName, const char *tseqDir, const char *inName, const char *outName, const char *qType, const char *tType); - -#endif /* GFCLIENT_H */ - - diff --git a/gemma-core/src/main/native/makefile b/gemma-core/src/main/native/makefile deleted file mode 100644 index 3738a906fd..0000000000 --- a/gemma-core/src/main/native/makefile +++ /dev/null @@ -1,9 +0,0 @@ -blat: - libtool --mode=compile gcc -c -O -o Blat.o Blat.c -I../include/ -I${JAVA_HOME}/include/ -I${JAVA_HOME}/include/linux - - -lib: blat - libtool --mode=link gcc -g -O -o libBlat.la Blat.lo -rpath /usr/local/lib -lGfClient - -install: - libtool --mode=install install -c libBlat.la /usr/local/lib/libBlat.la diff --git a/gemma-core/src/main/native/test/README b/gemma-core/src/main/native/test/README deleted file mode 100644 index dc8910adda..0000000000 --- a/gemma-core/src/main/native/test/README +++ /dev/null @@ -1,2 +0,0 @@ -This contains C code to test the function of the native code independently of Java - diff --git a/gemma-core/src/main/native/test/makefile b/gemma-core/src/main/native/test/makefile deleted file mode 100644 index ff6a605381..0000000000 --- a/gemma-core/src/main/native/test/makefile +++ /dev/null @@ -1,15 +0,0 @@ -all: O exe test - -O: - libtool --mode=compile gcc -g -O -c -o testBlat.o testBlat.c - -exe: O - libtool --mode=link gcc -g -O -o testBlat testBlat.o -lBlat - - -clean: - rm *.o *.lo - rm -rf ./lib - -test: exe - ./testBlat localhost 177778 test.fa diff --git a/gemma-core/src/main/native/test/test.fa b/gemma-core/src/main/native/test/test.fa deleted file mode 100644 index 879a4f7253..0000000000 --- a/gemma-core/src/main/native/test/test.fa +++ /dev/null @@ -1,11 +0,0 @@ ->testSequence -agtagtaggggattggttatgaggctagcataataatccaggcaagaggtaatcaggtttccatgagattggcaacaaaggg -actgaagtgaccagcattgcttatgcctggaagatttggaaaatgaggcatcaaaacgacaatgtctgaaggagtagcaggt -tatgggggaagtggatgggtttggaattggacatgtaagtgttacatgctggttagatatccagggaaagatacctagtaag -tagctggaagtatggatctggggcttaaaggagcacttcagtaattctttacataaaggcagtacagccatgaaaatagatg -atgttgccagagtaagtgtggggagagagaaggtcaaggacaaaaatgtggagagtacttgcttagaaagggtggaggggcc -atcaaaggagtcagaagcagcagttagagaagtagaaagaggagagtagcagcgtggtacactgaggtcaCCGGGGGAGGAG -AGAGGACGCAGCCAGCCACAGAACAGATGCATCCTCTAGGGCTAGAGGGTCCTGAAAGCTCCGAGAGTAATTCTCATGTGCA -TTTAGGTTTGGGAATAGATCACTGTTAATTCAACAGAGAAATGAAAGAAGAGAAGGTTCGGTGGGGTCCAGCCATGCCCTGT -TACGTGGAATTTTTTCCCTAAGGGTGTGGTCCCCTCCCCTACAGCTCGTCTTTTGGAGGGCTGGTCCAGGCTCCTCTAAGCC -ATGACGCCGGCTGAGGATCAGCGGTTGGTGTACATGATCTCCTCAGCCTTGCCCGTTGTCC diff --git a/gemma-core/src/main/native/test/testBlat.c b/gemma-core/src/main/native/test/testBlat.c deleted file mode 100644 index 49f75efc63..0000000000 --- a/gemma-core/src/main/native/test/testBlat.c +++ /dev/null @@ -1,12 +0,0 @@ -#include "../include/Blat.h" -#include "../include/gfClient.h" - -int main (int argc, char **argv) { - if (argc != 4) { - return 255; - } - gfClient(argv[1], argv[2], "./", argv[3], "/tmp/outfile.tmp", "dna", "dna") ; - return 1; - -} - diff --git a/gemma-core/src/test/java/ubic/gemma/core/analysis/sequence/RepeatScanTest.java b/gemma-core/src/test/java/ubic/gemma/core/analysis/sequence/RepeatScanTest.java index 2fa198cae1..4e6892c492 100644 --- a/gemma-core/src/test/java/ubic/gemma/core/analysis/sequence/RepeatScanTest.java +++ b/gemma-core/src/test/java/ubic/gemma/core/analysis/sequence/RepeatScanTest.java @@ -1,8 +1,8 @@ /* * The Gemma project - * + * * Copyright (c) 2007 University of British Columbia - * + * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at @@ -18,17 +18,38 @@ */ package ubic.gemma.core.analysis.sequence; -import junit.framework.TestCase; +import org.junit.Test; +import ubic.gemma.core.config.Settings; +import ubic.gemma.model.genome.Taxon; import ubic.gemma.model.genome.biosequence.BioSequence; +import java.util.Collections; + +import static org.junit.Assert.assertEquals; +import static ubic.gemma.core.util.test.Assumptions.assumeThatExecutableExists; + /** * @author pavlidis * */ -public class RepeatScanTest extends TestCase { +public class RepeatScanTest { + + private static final String repeatMaskerExe = Settings.getString( "repeatMasker.exe" ); + + @Test + public void testRepeatScan() { + assumeThatExecutableExists( repeatMaskerExe ); + Taxon taxon = Taxon.Factory.newInstance( "human" ); + BioSequence b = BioSequence.Factory.newInstance( "test", taxon ); + b.setSequence( "AAAaaaaAAAAaaa" ); + RepeatScan r = new RepeatScan(); + r.repeatScan( Collections.singleton( b ) ); + } - public final void testFraction() { - BioSequence b = BioSequence.Factory.newInstance(); + @Test + public void testFraction() { + Taxon taxon = Taxon.Factory.newInstance( "human" ); + BioSequence b = BioSequence.Factory.newInstance( "test", taxon ); b.setSequence( "AAAaaaaAAAAaaa" ); RepeatScan r = new RepeatScan(); double d = r.computeFractionMasked( b ); diff --git a/gemma-core/src/test/java/ubic/gemma/core/analysis/sequence/ShellDelegatingBlatTest.java b/gemma-core/src/test/java/ubic/gemma/core/analysis/sequence/ShellDelegatingBlatTest.java new file mode 100644 index 0000000000..14f3b6dd5d --- /dev/null +++ b/gemma-core/src/test/java/ubic/gemma/core/analysis/sequence/ShellDelegatingBlatTest.java @@ -0,0 +1,54 @@ +package ubic.gemma.core.analysis.sequence; + +import org.junit.After; +import org.junit.Ignore; +import org.junit.Test; +import org.junit.experimental.categories.Category; +import ubic.gemma.core.util.test.category.SlowTest; +import ubic.gemma.model.genome.Taxon; +import ubic.gemma.model.genome.biosequence.BioSequence; + +import java.io.IOException; + +import static org.junit.Assert.assertTrue; +import static org.junit.Assume.assumeTrue; +import static ubic.gemma.core.util.test.Assumptions.assumeThatExecutableExists; + +public class ShellDelegatingBlatTest { + + private ShellDelegatingBlat sdb; + + @After + public void shutdownBlatServer() { + if ( sdb != null ) { + sdb.stopServer(); + } + } + + @Test + public void testClient() throws IOException { + sdb = new ShellDelegatingBlat(); + assumeThatExecutableExists( sdb.getGfClientExe() ); + assumeTrue( "The gfServer for human is not reachable.", sdb.isServerReachable( ShellDelegatingBlat.BlattableGenome.HUMAN, false ) ); + Taxon taxon = Taxon.Factory.newInstance( "human" ); + BioSequence bs = BioSequence.Factory.newInstance( "bs1", taxon ); + bs.setSequence( "GTCCTCGGAACCAGGACCTCGGCGTGGCCTAGCG" ); + sdb.blatQuery( bs ); + } + + @Test + @Category(SlowTest.class) + @Ignore("This works, but it is way too slow.") + public void testServer() throws IOException { + sdb = new ShellDelegatingBlat(); + assumeThatExecutableExists( sdb.getGfClientExe() ); + assumeThatExecutableExists( sdb.getGfServerExe() ); + // this is very slow... + sdb.startServer( ShellDelegatingBlat.BlattableGenome.HUMAN, false, true ); + assertTrue( sdb.isServerRunning() ); + Taxon taxon = Taxon.Factory.newInstance( "human" ); + BioSequence bs = BioSequence.Factory.newInstance( "bs1", taxon ); + bs.setSequence( "GTCCTCGGAACCAGGACCTCGGCGTGGCCTAGCG" ); + sdb.blatQuery( bs ); + } +} \ No newline at end of file diff --git a/gemma-core/src/test/java/ubic/gemma/core/analysis/service/CompositeSequenceGeneMapperServiceTest.java b/gemma-core/src/test/java/ubic/gemma/core/analysis/service/CompositeSequenceGeneMapperServiceTest.java index 690d4ec3e0..32ab89f153 100644 --- a/gemma-core/src/test/java/ubic/gemma/core/analysis/service/CompositeSequenceGeneMapperServiceTest.java +++ b/gemma-core/src/test/java/ubic/gemma/core/analysis/service/CompositeSequenceGeneMapperServiceTest.java @@ -69,7 +69,7 @@ public class CompositeSequenceGeneMapperServiceTest extends AbstractGeoServiceTest { private final String arrayAccession = "GPL96"; - private final Blat blat = new ShellDelegatingBlat(); + private final ShellDelegatingBlat blat = new ShellDelegatingBlat(); private final String csName = "117_at";// "218120_s_at"; private final String geneOfficialSymbol = "HSPA6";// "HMOX2"; private ArrayDesign ad = null; diff --git a/gemma-core/src/test/java/ubic/gemma/core/loader/expression/arrayDesign/ArrayDesignSequenceAlignmentandMappingTest.java b/gemma-core/src/test/java/ubic/gemma/core/loader/expression/arrayDesign/ArrayDesignSequenceAlignmentandMappingTest.java index 2fca9c347d..d2b044c3c2 100644 --- a/gemma-core/src/test/java/ubic/gemma/core/loader/expression/arrayDesign/ArrayDesignSequenceAlignmentandMappingTest.java +++ b/gemma-core/src/test/java/ubic/gemma/core/loader/expression/arrayDesign/ArrayDesignSequenceAlignmentandMappingTest.java @@ -22,7 +22,6 @@ import org.junit.Test; import org.junit.experimental.categories.Category; import org.springframework.beans.factory.annotation.Autowired; -import ubic.basecode.util.FileTools; import ubic.gemma.core.analysis.sequence.Blat; import ubic.gemma.core.util.test.category.GoldenPathTest; import ubic.gemma.core.util.test.category.SlowTest; @@ -52,7 +51,7 @@ public final void testProcessArrayDesign() throws Exception { ad = arrayDesignService.thaw( ad ); Collection seqs = app.processArrayDesign( ad, new String[] { "testblastdb", "testblastdbPartTwo" }, - FileTools.resourceToPath( "/data/loader/genome/blast" ), true, + true, new MockFastaCmd( ad.getPrimaryTaxon() ) ); assertNotNull( seqs ); diff --git a/gemma-core/src/test/java/ubic/gemma/core/loader/expression/arrayDesign/ArrayDesignSequenceProcessorFastacmdTest.java b/gemma-core/src/test/java/ubic/gemma/core/loader/expression/arrayDesign/ArrayDesignSequenceProcessorFastacmdTest.java index b57acea5c5..2ab6d07ce9 100644 --- a/gemma-core/src/test/java/ubic/gemma/core/loader/expression/arrayDesign/ArrayDesignSequenceProcessorFastacmdTest.java +++ b/gemma-core/src/test/java/ubic/gemma/core/loader/expression/arrayDesign/ArrayDesignSequenceProcessorFastacmdTest.java @@ -21,7 +21,6 @@ import org.junit.Test; import org.junit.experimental.categories.Category; import org.springframework.beans.factory.annotation.Autowired; -import ubic.basecode.util.FileTools; import ubic.gemma.core.loader.genome.SimpleFastaCmd; import ubic.gemma.core.loader.util.TestUtils; import ubic.gemma.core.util.test.category.SlowTest; @@ -58,7 +57,7 @@ public void testProcessArrayDesignWithFastaCmdFetch() throws Exception { // finally the real business. There are 243 sequences on the array. Collection res = app .processArrayDesign( ad, new String[] { "testblastdb", "testblastdbPartTwo" }, - FileTools.resourceToPath( "/data/loader/genome/blast" ), false ); + false ); if ( res != null ) { if ( res.size() == 242 ) { log.warn( @@ -87,7 +86,7 @@ public void testProcessArrayDesignWithFastaCmdFetch() throws Exception { } private boolean fastaCmdExecutableExists() { - String fastacmdExe = Settings.getString( SimpleFastaCmd.FASTA_CMD_ENV_VAR ); + String fastacmdExe = Settings.getString( SimpleFastaCmd.FASTA_CMD_CONFIG_NAME ); if ( fastacmdExe == null ) { log.warn( "No fastacmd executable is configured, skipping test" ); return false; diff --git a/gemma-core/src/test/java/ubic/gemma/core/loader/expression/arrayDesign/ArrayDesignSequenceProcessorTest.java b/gemma-core/src/test/java/ubic/gemma/core/loader/expression/arrayDesign/ArrayDesignSequenceProcessorTest.java index 9ceec762b0..f3219ce1ed 100644 --- a/gemma-core/src/test/java/ubic/gemma/core/loader/expression/arrayDesign/ArrayDesignSequenceProcessorTest.java +++ b/gemma-core/src/test/java/ubic/gemma/core/loader/expression/arrayDesign/ArrayDesignSequenceProcessorTest.java @@ -24,7 +24,6 @@ import org.junit.Ignore; import org.junit.Test; import org.springframework.beans.factory.annotation.Autowired; -import ubic.basecode.util.FileTools; import ubic.gemma.core.loader.expression.geo.AbstractGeoServiceTest; import ubic.gemma.core.loader.expression.geo.GeoDomainObjectGeneratorLocal; import ubic.gemma.core.loader.expression.geo.service.GeoService; @@ -124,7 +123,7 @@ public void testAssignSequencesToDesignElementsMissingSequence() throws Exceptio @Test @Ignore("See https://github.com/PavlidisLab/Gemma/issues/1082 for details") public void testFetchAndLoadWithIdentifiers() throws Exception { - String fastacmdExe = Settings.getString( SimpleFastaCmd.FASTA_CMD_ENV_VAR ); + String fastacmdExe = Settings.getString( SimpleFastaCmd.FASTA_CMD_CONFIG_NAME ); Assume.assumeTrue( "No fastacmd executable is configured, skipping test.", fastacmdExe != null ); File fi = new File( fastacmdExe ); @@ -145,7 +144,7 @@ public void testFetchAndLoadWithIdentifiers() throws Exception { .getResourceAsStream( "/data/loader/expression/arrayDesign/identifierTest.txt" ) ) { Collection res = app .processArrayDesign( result, f, new String[] { "testblastdb", "testblastdbPartTwo" }, - FileTools.resourceToPath( "/data/loader/genome/blast" ), taxon, true ); + taxon, true ); assertNotNull( res ); for ( BioSequence sequence : res ) { assertNotNull( sequence.getSequence() ); @@ -158,7 +157,7 @@ public void testFetchAndLoadWithIdentifiers() throws Exception { @Test public void testFetchAndLoadWithSequences() throws Exception { - String fastacmdExe = Settings.getString( SimpleFastaCmd.FASTA_CMD_ENV_VAR ); + String fastacmdExe = Settings.getString( SimpleFastaCmd.FASTA_CMD_CONFIG_NAME ); Assume.assumeTrue( "No fastacmd executable is configured, skipping test.", fastacmdExe == null ); geoService.setGeoDomainObjectGenerator( new GeoDomainObjectGeneratorLocal( this.getTestFileBasePath() ) ); @@ -171,7 +170,7 @@ public void testFetchAndLoadWithSequences() throws Exception { try { Collection res = app .processArrayDesign( result, new String[] { "testblastdb", "testblastdbPartTwo" }, - FileTools.resourceToPath( "/data/loader/genome/blast" ), false ); + false ); assertNotNull( res ); for ( BioSequence sequence : res ) { assertNotNull( sequence.getSequence() ); diff --git a/gemma-core/src/test/java/ubic/gemma/core/loader/expression/arrayDesign/MockBlat.java b/gemma-core/src/test/java/ubic/gemma/core/loader/expression/arrayDesign/MockBlat.java index eba6df0afa..969cadf945 100644 --- a/gemma-core/src/test/java/ubic/gemma/core/loader/expression/arrayDesign/MockBlat.java +++ b/gemma-core/src/test/java/ubic/gemma/core/loader/expression/arrayDesign/MockBlat.java @@ -1,7 +1,6 @@ package ubic.gemma.core.loader.expression.arrayDesign; import ubic.gemma.core.analysis.sequence.Blat; -import ubic.gemma.core.analysis.sequence.ShellDelegatingBlat.BlattableGenome; import ubic.gemma.core.util.test.PersistentDummyObjectHelper; import ubic.gemma.model.genome.Chromosome; import ubic.gemma.model.genome.PhysicalLocation; @@ -9,7 +8,6 @@ import ubic.gemma.model.genome.biosequence.BioSequence; import ubic.gemma.model.genome.sequenceAnalysis.BlatResult; -import java.io.InputStream; import java.util.*; class MockBlat implements Blat { @@ -75,69 +73,8 @@ public Map> blatQuery( Collection seq return this.blatQuery( sequences, false, t ); } - @Override - public double getBlatScoreThreshold() { - return 0; - } - @Override public void setBlatScoreThreshold( double blatScoreThreshold ) { } - - @Override - public String getGfClientExe() { - return null; - } - - @Override - public String getGfServerExe() { - return null; - } - - @Override - public String getHost() { - return null; - } - - @Override - public int getHumanServerPort() { - return 0; - } - - @Override - public int getMouseServerPort() { - return 0; - } - - @Override - public int getRatServerPort() { - return 0; - } - - @Override - public String getSeqDir() { - return null; - } - - @Override - public String getSeqFiles( BlattableGenome genome ) { - return null; - } - - @Override - public List processPsl( InputStream inputStream, Taxon t ) { - return null; - } - - @Override - public void startServer( BlattableGenome genome, int port ) { - - } - - @Override - public void stopServer( int port ) { - - } - } \ No newline at end of file diff --git a/gemma-core/src/test/java/ubic/gemma/core/loader/expression/arrayDesign/MockFastaCmd.java b/gemma-core/src/test/java/ubic/gemma/core/loader/expression/arrayDesign/MockFastaCmd.java index 66e1bb2077..e2cce0ad99 100644 --- a/gemma-core/src/test/java/ubic/gemma/core/loader/expression/arrayDesign/MockFastaCmd.java +++ b/gemma-core/src/test/java/ubic/gemma/core/loader/expression/arrayDesign/MockFastaCmd.java @@ -21,53 +21,31 @@ public MockFastaCmd( Taxon t ) { @Override public BioSequence getByAccession( String accession, String database ) { - return this.getSingle( accession, database, null ); + return this.getSingle( accession, database ); } @Override public BioSequence getByIdentifier( int identifier, String database ) { - return this.getSingle( identifier, database, null ); + return this.getSingle( identifier, database ); } @Override public Collection getBatchAccessions( Collection accessions, String database ) { - return this.getMultiple( accessions, database, null ); + return this.getMultiple( accessions, database ); } @Override public Collection getBatchIdentifiers( Collection identifiers, String database ) { - return this.getMultiple( identifiers, database, null ); - } - - @Override - public BioSequence getByAccession( String accession, String database, String blastHome ) { - return this.getSingle( accession, database, blastHome ); - } - - @Override - public BioSequence getByIdentifier( int identifier, String database, String blastHome ) { - return this.getSingle( identifier, database, blastHome ); - } - - @Override - public Collection getBatchAccessions( Collection accessions, String database, - String blastHome ) { - return this.getMultiple( accessions, database, blastHome ); - } - - @Override - public Collection getBatchIdentifiers( Collection identifiers, String database, - String blastHome ) { - return this.getMultiple( identifiers, database, blastHome ); + return this.getMultiple( identifiers, database ); } @SuppressWarnings("unused") - private BioSequence getSingle( Object accession, String database, String blastHome ) { + private BioSequence getSingle( Object accession, String database ) { return this.makeSequence( accession ); } @SuppressWarnings("unused") - private Collection getMultiple( Collection accessions, String database, String blastHome ) { + private Collection getMultiple( Collection accessions, String database ) { Collection results = new HashSet<>(); for ( Object object : accessions ) { BioSequence result = this.makeSequence( object ); diff --git a/gemma-core/src/test/java/ubic/gemma/core/loader/genome/SimpleFastaCmdTest.java b/gemma-core/src/test/java/ubic/gemma/core/loader/genome/SimpleFastaCmdTest.java index 3fb3b013df..4c68b77069 100644 --- a/gemma-core/src/test/java/ubic/gemma/core/loader/genome/SimpleFastaCmdTest.java +++ b/gemma-core/src/test/java/ubic/gemma/core/loader/genome/SimpleFastaCmdTest.java @@ -18,18 +18,18 @@ */ package ubic.gemma.core.loader.genome; -import org.junit.Before; +import org.junit.BeforeClass; import org.junit.Test; import ubic.basecode.util.FileTools; -import ubic.gemma.model.genome.biosequence.BioSequence; import ubic.gemma.core.config.Settings; +import ubic.gemma.model.genome.biosequence.BioSequence; -import java.io.File; import java.net.URISyntaxException; import java.util.ArrayList; import java.util.Collection; import static org.junit.Assert.*; +import static ubic.gemma.core.util.test.Assumptions.assumeThatExecutableExists; /** * @author pavlidis @@ -37,22 +37,26 @@ public class SimpleFastaCmdTest { private static final String TESTBLASTDB = "testblastdb"; - private String testBlastDbPath; + private static String testBlastDbPath; + + @BeforeClass + public static void checkFastaCmdExecutableExists() throws URISyntaxException { + assumeThatExecutableExists( Settings.getString( SimpleFastaCmd.FASTA_CMD_CONFIG_NAME ) ); + testBlastDbPath = FileTools.resourceToPath( "/data/loader/genome/blast" ); + } // Test may need to be disabled because it fails in continuum, sometimes (unpredictable) @Test public void testGetMultiple() { - if ( this.fastaCmdExecutableNotExists() ) { - return; - } SimpleFastaCmd fastaCmd = new SimpleFastaCmd(); + fastaCmd.setBlastHome( testBlastDbPath ); Collection input = new ArrayList<>(); input.add( 1435867 ); input.add( 1435868 ); Collection bs = fastaCmd - .getBatchIdentifiers( input, SimpleFastaCmdTest.TESTBLASTDB, testBlastDbPath ); + .getBatchIdentifiers( input, SimpleFastaCmdTest.TESTBLASTDB ); assertNotNull( bs ); assertEquals( 2, bs.size() ); } @@ -60,27 +64,23 @@ public void testGetMultiple() { // Test may need to be disabled because it fails in continuum, sometimes (unpredictable) @Test public void testGetMultipleAcc() { - if ( this.fastaCmdExecutableNotExists() ) { - return; - } SimpleFastaCmd fastaCmd = new SimpleFastaCmd(); + fastaCmd.setBlastHome( testBlastDbPath ); Collection input = new ArrayList<>(); input.add( "AA000002.1" ); input.add( "AA000003.1" ); Collection bs = fastaCmd - .getBatchAccessions( input, SimpleFastaCmdTest.TESTBLASTDB, testBlastDbPath ); + .getBatchAccessions( input, SimpleFastaCmdTest.TESTBLASTDB ); assertNotNull( bs ); assertEquals( 2, bs.size() ); } @Test public void testGetMultipleAccSomeNotFound() { - if ( this.fastaCmdExecutableNotExists() ) { - return; - } SimpleFastaCmd fastaCmd = new SimpleFastaCmd(); + fastaCmd.setBlastHome( testBlastDbPath ); Collection input = new ArrayList<>(); input.add( "FAKE.2" ); @@ -89,19 +89,17 @@ public void testGetMultipleAccSomeNotFound() { input.add( "AA000003.1" ); Collection bs = fastaCmd - .getBatchAccessions( input, SimpleFastaCmdTest.TESTBLASTDB, testBlastDbPath ); + .getBatchAccessions( input, SimpleFastaCmdTest.TESTBLASTDB ); + fastaCmd.setBlastHome( testBlastDbPath ); assertNotNull( bs ); assertEquals( 2, bs.size() ); } @Test public void testGetSingle() { - if ( this.fastaCmdExecutableNotExists() ) { - return; - } - SimpleFastaCmd fastaCmd = new SimpleFastaCmd(); - BioSequence bs = fastaCmd.getByIdentifier( 1435867, SimpleFastaCmdTest.TESTBLASTDB, testBlastDbPath ); + fastaCmd.setBlastHome( testBlastDbPath ); + BioSequence bs = fastaCmd.getByIdentifier( 1435867, SimpleFastaCmdTest.TESTBLASTDB ); assertNotNull( bs ); String expected = "CCACCTTTCCCTCCACTCCTCACGTTCTCACCTGTAAAGCGTCCCTCCCTCATCCCCATGCCCCCTTACCCTGCAGGGTA" + "GAGTAGGCTAGAAACCAGAGAGCTCCAAGCTCCATCTGTGGAGAGGTGCCATCCTTGGGCTGCAGAGAGAGGAGAATTTG" @@ -114,13 +112,11 @@ public void testGetSingle() { @Test public void testGetSingleAcc() { - if ( this.fastaCmdExecutableNotExists() ) { - return; - } SimpleFastaCmd fastaCmd = new SimpleFastaCmd(); + fastaCmd.setBlastHome( testBlastDbPath ); String accession = "AA000002"; - BioSequence bs = fastaCmd.getByAccession( accession, SimpleFastaCmdTest.TESTBLASTDB, testBlastDbPath ); + BioSequence bs = fastaCmd.getByAccession( accession, SimpleFastaCmdTest.TESTBLASTDB ); assertNotNull( "fastacmd failed to find " + accession, bs ); String expected = "CCACCTTTCCCTCCACTCCTCACGTTCTCACCTGTAAAGCGTCCCTCCCTCATCCCCATGCCCCCTTACCCTGCAGGGTA" + "GAGTAGGCTAGAAACCAGAGAGCTCCAAGCTCCATCTGTGGAGAGGTGCCATCCTTGGGCTGCAGAGAGAGGAGAATTTG" @@ -133,28 +129,10 @@ public void testGetSingleAcc() { @Test public void testGetSingleAccNotFound() { - if ( this.fastaCmdExecutableNotExists() ) { - return; - } SimpleFastaCmd fastaCmd = new SimpleFastaCmd(); + fastaCmd.setBlastHome( testBlastDbPath ); - BioSequence bs = fastaCmd.getByAccession( "FAKE.1", SimpleFastaCmdTest.TESTBLASTDB, testBlastDbPath ); + BioSequence bs = fastaCmd.getByAccession( "FAKE.1", SimpleFastaCmdTest.TESTBLASTDB ); assertNull( bs ); } - - @Before - public void setup() throws URISyntaxException { - testBlastDbPath = FileTools.resourceToPath( "/data/loader/genome/blast" ); - } - - private boolean fastaCmdExecutableNotExists() { - - String fastacmdExe = Settings.getString( SimpleFastaCmd.FASTA_CMD_ENV_VAR ); - if ( fastacmdExe == null ) { - return true; - } - - File fi = new File( fastacmdExe ); - return !fi.canRead(); - } } diff --git a/gemma-core/src/test/java/ubic/gemma/core/util/test/Assumptions.java b/gemma-core/src/test/java/ubic/gemma/core/util/test/Assumptions.java index 58456dae55..9f74ff0cb9 100644 --- a/gemma-core/src/test/java/ubic/gemma/core/util/test/Assumptions.java +++ b/gemma-core/src/test/java/ubic/gemma/core/util/test/Assumptions.java @@ -18,6 +18,24 @@ */ public class Assumptions { + public static void assumeThatExecutableExists( String executable ) { + if ( executable.contains( "/" ) ) { + assumeTrue( Files.isExecutable( Paths.get( executable ) ) ); + } else { + boolean found = false; + String pathEnv = System.getenv( "PATH" ); + if ( pathEnv != null ) { + for ( String p : pathEnv.split( ":" ) ) { + if ( Files.isExecutable( Paths.get( p, executable ) ) ) { + found = true; + break; + } + } + } + assumeTrue( "Executable " + executable + " not found in $PATH " + pathEnv + "", found ); + } + } + /** * Assume that a certain amount of memory is available. * @param jvm whether to consider the free JVM memory or the system free memory From f4a2b29ef8c5d3ba6c33736ad0869cd4e33b19ed Mon Sep 17 00:00:00 2001 From: OganM Date: Mon, 23 Jun 2025 18:47:45 -0700 Subject: [PATCH 009/129] keep default indentation for diff exp trees --- gemma-web/src/main/webapp/styles/ext-overrides.css | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/gemma-web/src/main/webapp/styles/ext-overrides.css b/gemma-web/src/main/webapp/styles/ext-overrides.css index b2971068fd..578790049e 100644 --- a/gemma-web/src/main/webapp/styles/ext-overrides.css +++ b/gemma-web/src/main/webapp/styles/ext-overrides.css @@ -214,6 +214,4 @@ font: normal 12px tahoma, arial, helvetica, sans-serif; } -.x-tree-elbow-end{ - display:none -} + From 7d16343abcfb4840eb6b0efb3003b83080380ad3 Mon Sep 17 00:00:00 2001 From: Guillaume Poirier-Morency Date: Mon, 23 Jun 2025 19:06:31 -0700 Subject: [PATCH 010/129] Remove serialVersionUID in non-serializable models --- .../analysis/expression/diff/ExpressionAnalysisResultSet.java | 1 - .../ubic/gemma/model/common/auditAndSecurity/UserGroup.java | 4 ---- .../ubic/gemma/model/expression/arrayDesign/ArrayDesign.java | 4 ---- .../model/expression/experiment/ExpressionExperiment.java | 2 -- .../main/java/ubic/gemma/model/genome/gene/GeneProduct.java | 4 ---- .../src/main/java/ubic/gemma/model/genome/gene/GeneSet.java | 1 - 6 files changed, 16 deletions(-) diff --git a/gemma-core/src/main/java/ubic/gemma/model/analysis/expression/diff/ExpressionAnalysisResultSet.java b/gemma-core/src/main/java/ubic/gemma/model/analysis/expression/diff/ExpressionAnalysisResultSet.java index 283a0c03a4..f35e27b537 100644 --- a/gemma-core/src/main/java/ubic/gemma/model/analysis/expression/diff/ExpressionAnalysisResultSet.java +++ b/gemma-core/src/main/java/ubic/gemma/model/analysis/expression/diff/ExpressionAnalysisResultSet.java @@ -35,7 +35,6 @@ */ public class ExpressionAnalysisResultSet extends FactorAssociatedAnalysisResultSet implements SecuredChild { - private static final long serialVersionUID = 7226901182513177574L; private Integer numberOfProbesTested; private Integer numberOfGenesTested; @Nullable diff --git a/gemma-core/src/main/java/ubic/gemma/model/common/auditAndSecurity/UserGroup.java b/gemma-core/src/main/java/ubic/gemma/model/common/auditAndSecurity/UserGroup.java index 4b6997e123..6135dec5ad 100644 --- a/gemma-core/src/main/java/ubic/gemma/model/common/auditAndSecurity/UserGroup.java +++ b/gemma-core/src/main/java/ubic/gemma/model/common/auditAndSecurity/UserGroup.java @@ -30,10 +30,6 @@ */ public class UserGroup extends AbstractAuditable implements gemma.gsec.model.UserGroup { - /** - * The serial version UID of this class. Needed for serialization. - */ - private static final long serialVersionUID = 5795744069086222179L; private Set groupMembers = new HashSet<>(); private Set authorities = new HashSet<>(); diff --git a/gemma-core/src/main/java/ubic/gemma/model/expression/arrayDesign/ArrayDesign.java b/gemma-core/src/main/java/ubic/gemma/model/expression/arrayDesign/ArrayDesign.java index 9cb02e2e5c..6c8df2f1e5 100644 --- a/gemma-core/src/main/java/ubic/gemma/model/expression/arrayDesign/ArrayDesign.java +++ b/gemma-core/src/main/java/ubic/gemma/model/expression/arrayDesign/ArrayDesign.java @@ -55,10 +55,6 @@ public static ArrayDesign newInstance( String shortName, Taxon taxon ) { } } - /** - * The serial version UID of this class. Needed for serialization. - */ - private static final long serialVersionUID = -7566439134502613470L; private Integer advertisedNumberOfDesignElements; private Set alternateNames = new HashSet<>(); private ArrayDesign alternativeTo; // for affymetrix diff --git a/gemma-core/src/main/java/ubic/gemma/model/expression/experiment/ExpressionExperiment.java b/gemma-core/src/main/java/ubic/gemma/model/expression/experiment/ExpressionExperiment.java index 6137b69e4c..7ffefbcfb4 100644 --- a/gemma-core/src/main/java/ubic/gemma/model/expression/experiment/ExpressionExperiment.java +++ b/gemma-core/src/main/java/ubic/gemma/model/expression/experiment/ExpressionExperiment.java @@ -48,8 +48,6 @@ public static ExpressionExperiment newInstance() { } } - private static final long serialVersionUID = -1342753625018841735L; - public static final int MAX_NAME_LENGTH = 255; @Nullable diff --git a/gemma-core/src/main/java/ubic/gemma/model/genome/gene/GeneProduct.java b/gemma-core/src/main/java/ubic/gemma/model/genome/gene/GeneProduct.java index 9841a101cf..c191c73aa3 100644 --- a/gemma-core/src/main/java/ubic/gemma/model/genome/gene/GeneProduct.java +++ b/gemma-core/src/main/java/ubic/gemma/model/genome/gene/GeneProduct.java @@ -31,10 +31,6 @@ @Indexed public class GeneProduct extends ChromosomeFeature { - /** - * The serial version UID of this class. Needed for serialization. - */ - private static final long serialVersionUID = 8414732389521430535L; private String ncbiGi; private Set accessions = new java.util.HashSet<>(); /** diff --git a/gemma-core/src/main/java/ubic/gemma/model/genome/gene/GeneSet.java b/gemma-core/src/main/java/ubic/gemma/model/genome/gene/GeneSet.java index aa3e6ccb2f..a35d0cedbf 100644 --- a/gemma-core/src/main/java/ubic/gemma/model/genome/gene/GeneSet.java +++ b/gemma-core/src/main/java/ubic/gemma/model/genome/gene/GeneSet.java @@ -36,7 +36,6 @@ @Indexed public class GeneSet extends AbstractAuditable implements SecuredNotChild { - private static final long serialVersionUID = 4357218100681569138L; private Set characteristics = new HashSet<>(); private DatabaseEntry sourceAccession; private Set literatureSources = new HashSet<>(); From d5975b4a49c7d3b6996a91e3f3aa395b4b4bfa8e Mon Sep 17 00:00:00 2001 From: Guillaume Poirier-Morency Date: Mon, 23 Jun 2025 19:26:27 -0700 Subject: [PATCH 011/129] Add an accession to the ExpressionExperimentSet model (fix #1428) Make it clear that a EE set can only contain other EEs. With this change, it becomes impossible to query anything else than an ExpressionExperiment in ExpressionExperimentManipulatingCLI. Perform according simplifications, mainly in the CLI module. --- .../ubic/gemma/apps/AffyDataFromCelCli.java | 9 +- .../DifferentialExpressionAnalysisCli.java | 5 +- .../ExpressionExperimentManipulatingCLI.java | 103 +++++------------- .../ExpressionExperimentPrimaryPubCli.java | 5 +- .../java/ubic/gemma/apps/LinkAnalysisCli.java | 5 +- .../gemma/apps/MakeExperimentPrivateCli.java | 4 +- .../gemma/apps/MakeExperimentsPublicCli.java | 4 +- .../ubic/gemma/apps/RNASeqBatchInfoCli.java | 5 +- .../ubic/gemma/apps/RNASeqDataAddCli.java | 5 +- .../gemma/apps/SingleCellDataLoaderCli.java | 5 +- .../expression/ExpressionExperimentSet.java | 27 +++-- .../persister/RelationshipPersister.java | 8 +- ...ionExperimentSetValueObjectHelperImpl.java | 6 +- .../resources/sql/migrations/db.1.32.2.sql | 3 + .../ExpressionExperimentSet.hbm.xml | 8 +- .../ExpressionExperimentSetServiceTest.java | 5 +- ...ionExperimentSetValueObjectHelperTest.java | 2 +- .../persistence/SessionListManagerImpl.java | 10 +- 18 files changed, 88 insertions(+), 131 deletions(-) create mode 100644 gemma-core/src/main/resources/sql/migrations/db.1.32.2.sql diff --git a/gemma-cli/src/main/java/ubic/gemma/apps/AffyDataFromCelCli.java b/gemma-cli/src/main/java/ubic/gemma/apps/AffyDataFromCelCli.java index fc3a6b3365..ac9a202f27 100644 --- a/gemma-cli/src/main/java/ubic/gemma/apps/AffyDataFromCelCli.java +++ b/gemma-cli/src/main/java/ubic/gemma/apps/AffyDataFromCelCli.java @@ -31,7 +31,6 @@ import ubic.gemma.model.common.auditAndSecurity.eventType.FailedDataReplacedEvent; import ubic.gemma.model.common.quantitationtype.QuantitationType; import ubic.gemma.model.expression.arrayDesign.ArrayDesign; -import ubic.gemma.model.expression.experiment.BioAssaySet; import ubic.gemma.model.expression.experiment.ExpressionExperiment; import ubic.gemma.persistence.service.expression.arrayDesign.ArrayDesignService; @@ -85,12 +84,12 @@ protected void processExperimentOptions( CommandLine commandLine ) throws ParseE } @Override - protected void processBioAssaySets( Collection expressionExperiments ) { + protected void processExpressionExperiments( Collection expressionExperiments ) { if ( StringUtils.isNotBlank( aptFile ) ) { throw new IllegalArgumentException( "Can't use " + AffyDataFromCelCli.APT_FILE_OPT + " unless you are doing just one experiment" ); } - super.processBioAssaySets( expressionExperiments ); + super.processExpressionExperiments( expressionExperiments ); } @Override @@ -179,8 +178,8 @@ protected void processExpressionExperiment( ExpressionExperiment ee ) { } } - private boolean checkForAlreadyDone( BioAssaySet ee ) { - for ( QuantitationType qt : eeService.getQuantitationTypes( ( ExpressionExperiment ) ee ) ) { + private boolean checkForAlreadyDone( ExpressionExperiment ee ) { + for ( QuantitationType qt : eeService.getQuantitationTypes( ee ) ) { if ( qt.getIsRecomputedFromRawData() ) { return true; } diff --git a/gemma-cli/src/main/java/ubic/gemma/apps/DifferentialExpressionAnalysisCli.java b/gemma-cli/src/main/java/ubic/gemma/apps/DifferentialExpressionAnalysisCli.java index 100db10176..9a5803dffb 100644 --- a/gemma-cli/src/main/java/ubic/gemma/apps/DifferentialExpressionAnalysisCli.java +++ b/gemma-cli/src/main/java/ubic/gemma/apps/DifferentialExpressionAnalysisCli.java @@ -32,7 +32,6 @@ import ubic.gemma.core.analysis.service.ExpressionDataFileService; import ubic.gemma.model.analysis.expression.diff.DifferentialExpressionAnalysis; import ubic.gemma.model.common.auditAndSecurity.eventType.DifferentialExpressionAnalysisEvent; -import ubic.gemma.model.expression.experiment.BioAssaySet; import ubic.gemma.model.expression.experiment.ExperimentalDesignUtils; import ubic.gemma.model.expression.experiment.ExperimentalFactor; import ubic.gemma.model.expression.experiment.ExpressionExperiment; @@ -195,7 +194,7 @@ protected void processExperimentOptions( CommandLine commandLine ) throws ParseE } @Override - protected void processBioAssaySets( Collection expressionExperiments ) { + protected void processExpressionExperiments( Collection expressionExperiments ) { if ( type != null ) { throw new IllegalArgumentException( "You can only specify the analysis type when analyzing a single experiment" ); } @@ -205,7 +204,7 @@ protected void processBioAssaySets( Collection expressionExperiment if ( !factorIdentifiers.isEmpty() ) { throw new IllegalArgumentException( "You can only specify the factors when analyzing a single experiment" ); } - super.processBioAssaySets( expressionExperiments ); + super.processExpressionExperiments( expressionExperiments ); } @Override diff --git a/gemma-cli/src/main/java/ubic/gemma/apps/ExpressionExperimentManipulatingCLI.java b/gemma-cli/src/main/java/ubic/gemma/apps/ExpressionExperimentManipulatingCLI.java index a76a2fe40d..f7dc8449c8 100644 --- a/gemma-cli/src/main/java/ubic/gemma/apps/ExpressionExperimentManipulatingCLI.java +++ b/gemma-cli/src/main/java/ubic/gemma/apps/ExpressionExperimentManipulatingCLI.java @@ -38,9 +38,7 @@ import ubic.gemma.model.common.auditAndSecurity.eventType.AuditEventType; import ubic.gemma.model.common.search.SearchSettings; import ubic.gemma.model.expression.arrayDesign.ArrayDesign; -import ubic.gemma.model.expression.experiment.BioAssaySet; import ubic.gemma.model.expression.experiment.ExpressionExperiment; -import ubic.gemma.model.expression.experiment.ExpressionExperimentSubSet; import ubic.gemma.model.genome.Taxon; import ubic.gemma.persistence.service.common.auditAndSecurity.AuditEventService; import ubic.gemma.persistence.service.common.auditAndSecurity.AuditTrailService; @@ -365,7 +363,7 @@ protected void processExperimentOptions( CommandLine commandLine ) throws ParseE @Override protected void doAuthenticatedWork() throws Exception { // intentionally a TreeSet over IDs, to prevent proxy initialization via hashCode() - Collection expressionExperiments = new TreeSet<>( Comparator.comparing( BioAssaySet::getId ) ); + Collection expressionExperiments = new TreeSet<>( Comparator.comparing( ExpressionExperiment::getId ) ); if ( all ) { if ( useReferencesIfPossible ) { @@ -426,46 +424,49 @@ protected void doAuthenticatedWork() throws Exception { this.removeTroubledExperiments( expressionExperiments ); } - expressionExperiments = preprocessBioAssaySets( expressionExperiments ); + expressionExperiments = preprocessExpressionExperiments( expressionExperiments ); if ( expressionExperiments.isEmpty() ) { throw new RuntimeException( "No expression experiments matched the given options." ); } else if ( expressionExperiments.size() == 1 ) { - BioAssaySet ee = expressionExperiments.iterator().next(); + ExpressionExperiment ee = expressionExperiments.iterator().next(); log.info( "Final dataset: " + formatExperiment( ee ) ); - processBioAssaySet( expressionExperiments.iterator().next() ); + ExpressionExperiment bas = expressionExperiments.iterator().next(); + Assert.notNull( bas, "Cannot process a null ExpressionExperiment." ); + processExpressionExperiment( bas ); } else { if ( !singleExperimentOptionsUsed.isEmpty() ) { throw new IllegalStateException( String.format( "There are single-experiment options used: %s, but more than one experiments was found.", singleExperimentOptionsUsed.stream().map( o -> "-" + o ).collect( Collectors.joining( ", " ) ) ) ); } log.info( String.format( "Final list: %d expression experiments", expressionExperiments.size() ) ); - processBioAssaySets( expressionExperiments ); + processExpressionExperiments( expressionExperiments ); } } /** - * Preprocess the set of {@link BioAssaySet} before invoking {@link #processBioAssaySets(Collection)} or - * {@link #processBioAssaySet(BioAssaySet)}. + * Preprocess the set of {@link ExpressionExperiment} before invoking {@link #processExpressionExperiments(Collection)} or + * {@link #processExpressionExperiment(ExpressionExperiment)}. *

* This can be an opportunity to filter or modify the set of experiments. */ - protected Collection preprocessBioAssaySets( Collection expressionExperiments ) { + protected Collection preprocessExpressionExperiments( Collection expressionExperiments ) { return expressionExperiments; } /** - * Process multiple {@link BioAssaySet}. + * Process multiple {@link ExpressionExperiment}. *

* This only called if more than one experiment was found. */ - protected void processBioAssaySets( Collection expressionExperiments ) { + protected void processExpressionExperiments( Collection expressionExperiments ) { setEstimatedMaxTasks( expressionExperiments.size() ); - for ( BioAssaySet bas : expressionExperiments ) { + for ( ExpressionExperiment ee : expressionExperiments ) { try { - processBioAssaySet( bas ); + Assert.notNull( ee, "Cannot process a null ExpressionExperiment." ); + processExpressionExperiment( ee ); } catch ( Exception e ) { - addErrorObject( toBatchObject( bas ), e ); + addErrorObject( toBatchObject( ee ), e ); if ( abortOnError ) { throw new RuntimeException( "Aborted processing due to error.", e ); } @@ -473,24 +474,6 @@ protected void processBioAssaySets( Collection expressionExperiment } } - /** - * Process a BioAssaySet. - *

- * This method delegates to one of {@link #processExpressionExperiment(ExpressionExperiment)}, - * {@link #processExpressionExperimentSubSet(ExpressionExperimentSubSet)} or {@link #processOtherBioAssaySet(BioAssaySet)}. - * @throws Exception if an error occurs, it will be collected via {@link #addErrorObject(Serializable, String, Throwable)} - */ - protected void processBioAssaySet( BioAssaySet bas ) throws Exception { - Assert.notNull( bas, "Cannot process a null BioAssaySet." ); - if ( bas instanceof ExpressionExperiment ) { - processExpressionExperiment( ( ExpressionExperiment ) bas ); - } else if ( bas instanceof ExpressionExperimentSubSet ) { - processExpressionExperimentSubSet( ( ExpressionExperimentSubSet ) bas ); - } else { - processOtherBioAssaySet( bas ); - } - } - /** * Process an {@link ExpressionExperiment}. */ @@ -498,43 +481,18 @@ protected void processExpressionExperiment( ExpressionExperiment expressionExper throw new UnsupportedOperationException( "This command line does support experiments." ); } - /** - * Process an {@link ExpressionExperimentSubSet}. - */ - protected void processExpressionExperimentSubSet( @SuppressWarnings("unused") ExpressionExperimentSubSet expressionExperimentSubSet ) throws Exception { - throw new UnsupportedOperationException( "This command line does support experiment subsets." ); - } - - /** - * Process other kinds of {@link BioAssaySet} that are neither experiment nor subset. - */ - protected void processOtherBioAssaySet( @SuppressWarnings("unused") BioAssaySet bas ) throws Exception { - throw new UnsupportedOperationException( "This command line does support other kinds of BioAssaySet." ); - } - - @Override protected final Serializable toBatchObject( @Nullable ExpressionExperiment object ) { - return toBatchObject( ( BioAssaySet ) object ); - } - - protected final Serializable toBatchObject( @Nullable BioAssaySet object ) { if ( object == null ) { return null; } - if ( object instanceof ExpressionExperiment ) { - if ( Hibernate.isInitialized( object ) ) { - return ( ( ExpressionExperiment ) object ).getShortName(); - } else { - return "ExpressionExperiment Id=" + object.getId(); - } - } else if ( object instanceof ExpressionExperimentSubSet ) { - return "ExpressionExperimentSubSet Id=" + object.getId(); + if ( Hibernate.isInitialized( object ) ) { + return object.getShortName(); } else { - return "BioAssaySet Id=" + object.getId(); + return "ExpressionExperiment Id=" + object.getId(); } } - private void excludeFromFile( Collection expressionExperiments, Path excludeEeFileName ) throws IOException { + private void excludeFromFile( Collection expressionExperiments, Path excludeEeFileName ) throws IOException { assert !expressionExperiments.isEmpty(); Collection excludeExperiments; excludeExperiments = this.readExpressionExperimentListFile( excludeEeFileName ); @@ -553,7 +511,7 @@ private List experimentsFromCliList( String[] identifiers return ees; } - private Set experimentsFromEeSet( String optionValue ) { + private Set experimentsFromEeSet( String optionValue ) { Assert.isTrue( StringUtils.isNotBlank( optionValue ), "Please provide an eeset name" ); ExpressionExperimentSet eeSet; try { @@ -640,7 +598,7 @@ private Collection readExpressionExperimentListFile( Path /** * Obtain EEs that are troubled. */ - private void removeTroubledExperiments( Collection expressionExperiments ) { + private void removeTroubledExperiments( Collection expressionExperiments ) { if ( expressionExperiments.isEmpty() ) { log.warn( "No experiments to remove troubled from" ); return; @@ -653,14 +611,7 @@ private void removeTroubledExperiments( Collection expressionExperi AtomicInteger removedTroubledExperiments = new AtomicInteger(); expressionExperiments.removeIf( ee -> { // for subsets, check source experiment troubled flag - if ( ee instanceof ExpressionExperimentSubSet ) { - if ( troubledIds.contains( ( ( ExpressionExperimentSubSet ) ee ).getSourceExperiment().getId() ) ) { - removedTroubledExperiments.incrementAndGet(); - return true; - } else { - return false; - } - } else if ( troubledIds.contains( ee.getId() ) ) { + if ( troubledIds.contains( ee.getId() ) ) { removedTroubledExperiments.incrementAndGet(); return true; } else { @@ -750,15 +701,11 @@ protected void setAbortOnError() { *

* Use this for printing datasets if {@link #useReferencesIfPossible} is set to prevent {@link org.hibernate.LazyInitializationException}. */ - protected String formatExperiment( BioAssaySet bas ) { + protected String formatExperiment( ExpressionExperiment bas ) { if ( Hibernate.isInitialized( bas ) ) { return bas + " " + entityUrlBuilder.fromHostUrl().entity( bas ).web().toUriString(); - } else if ( bas instanceof ExpressionExperiment ) { - return "ExpressionExperiment Id=" + bas.getId() + " " + entityUrlBuilder.fromHostUrl().entity( ( ExpressionExperiment ) bas ).web().toUriString(); - } else if ( bas instanceof ExpressionExperimentSubSet ) { - return "ExpressionExperimentSubSet Id=" + bas.getId() + entityUrlBuilder.fromHostUrl().entity( ( ExpressionExperimentSubSet ) bas ).web().toUriString(); } else { - return "BioAssaySet Id=" + bas.getId(); + return "ExpressionExperiment Id=" + bas.getId() + " " + entityUrlBuilder.fromHostUrl().entity( ( ExpressionExperiment ) bas ).web().toUriString(); } } diff --git a/gemma-cli/src/main/java/ubic/gemma/apps/ExpressionExperimentPrimaryPubCli.java b/gemma-cli/src/main/java/ubic/gemma/apps/ExpressionExperimentPrimaryPubCli.java index 0b30c8220d..c0a4d9aeb9 100644 --- a/gemma-cli/src/main/java/ubic/gemma/apps/ExpressionExperimentPrimaryPubCli.java +++ b/gemma-cli/src/main/java/ubic/gemma/apps/ExpressionExperimentPrimaryPubCli.java @@ -29,7 +29,6 @@ import ubic.gemma.core.loader.entrez.pubmed.ExpressionExperimentBibRefFinder; import ubic.gemma.core.loader.entrez.pubmed.PubMedSearch; import ubic.gemma.model.common.description.BibliographicReference; -import ubic.gemma.model.expression.experiment.BioAssaySet; import ubic.gemma.model.expression.experiment.ExpressionExperiment; import ubic.gemma.persistence.persister.PersisterHelper; import ubic.gemma.persistence.service.expression.experiment.ExpressionExperimentService; @@ -111,14 +110,14 @@ protected void processExperimentOptions( CommandLine commandLine ) throws ParseE Collection failedEe; @Override - protected void processBioAssaySets( Collection expressionExperiments ) { + protected void processExpressionExperiments( Collection expressionExperiments ) { // collect some statistics nullPubCount = new ArrayList<>(); samePubCount = new ArrayList<>(); diffPubCount = new ArrayList<>(); failedEe = new ArrayList<>(); - super.processBioAssaySets( expressionExperiments ); + super.processExpressionExperiments( expressionExperiments ); // print statistics log.info( "\n\n========== Summary ==========" ); diff --git a/gemma-cli/src/main/java/ubic/gemma/apps/LinkAnalysisCli.java b/gemma-cli/src/main/java/ubic/gemma/apps/LinkAnalysisCli.java index 9afe94a011..757ea1c788 100644 --- a/gemma-cli/src/main/java/ubic/gemma/apps/LinkAnalysisCli.java +++ b/gemma-cli/src/main/java/ubic/gemma/apps/LinkAnalysisCli.java @@ -42,7 +42,6 @@ import ubic.gemma.model.expression.bioAssayData.ProcessedExpressionDataVector; import ubic.gemma.model.expression.biomaterial.BioMaterial; import ubic.gemma.model.expression.designElement.CompositeSequence; -import ubic.gemma.model.expression.experiment.BioAssaySet; import ubic.gemma.model.expression.experiment.ExpressionExperiment; import ubic.gemma.model.expression.experiment.ExpressionExperimentValueObject; import ubic.gemma.model.genome.Taxon; @@ -391,11 +390,11 @@ protected void doAuthenticatedWork() throws Exception { } @Override - protected Collection preprocessBioAssaySets( Collection expressionExperiments ) { + protected Collection preprocessExpressionExperiments( Collection expressionExperiments ) { /* * Do in decreasing order of size, to help capture more links earlier - reduces fragmentation. */ - List sees = new ArrayList<>( expressionExperiments ); + List sees = new ArrayList<>( expressionExperiments ); if ( expressionExperiments.size() > 1 ) { log.info( "Sorting data sets by number of samples, doing large data sets first." ); diff --git a/gemma-cli/src/main/java/ubic/gemma/apps/MakeExperimentPrivateCli.java b/gemma-cli/src/main/java/ubic/gemma/apps/MakeExperimentPrivateCli.java index 427e397cf1..f4f97dc2a2 100644 --- a/gemma-cli/src/main/java/ubic/gemma/apps/MakeExperimentPrivateCli.java +++ b/gemma-cli/src/main/java/ubic/gemma/apps/MakeExperimentPrivateCli.java @@ -3,7 +3,7 @@ import gemma.gsec.SecurityService; import org.springframework.beans.factory.annotation.Autowired; import ubic.gemma.model.common.auditAndSecurity.eventType.MakePrivateEvent; -import ubic.gemma.model.expression.experiment.BioAssaySet; +import ubic.gemma.model.expression.experiment.ExpressionExperiment; public class MakeExperimentPrivateCli extends ExpressionExperimentManipulatingCLI { @@ -21,7 +21,7 @@ public String getShortDesc() { } @Override - protected void processBioAssaySet( BioAssaySet ee ) { + protected void processExpressionExperiment( ExpressionExperiment ee ) { securityService.makePrivate( ee ); this.auditTrailService.addUpdateEvent( ee, MakePrivateEvent.class, "Made private from command line" ); } diff --git a/gemma-cli/src/main/java/ubic/gemma/apps/MakeExperimentsPublicCli.java b/gemma-cli/src/main/java/ubic/gemma/apps/MakeExperimentsPublicCli.java index 7a3d524f36..6beddf0990 100644 --- a/gemma-cli/src/main/java/ubic/gemma/apps/MakeExperimentsPublicCli.java +++ b/gemma-cli/src/main/java/ubic/gemma/apps/MakeExperimentsPublicCli.java @@ -17,7 +17,7 @@ import gemma.gsec.SecurityService; import org.springframework.beans.factory.annotation.Autowired; import ubic.gemma.model.common.auditAndSecurity.eventType.MakePublicEvent; -import ubic.gemma.model.expression.experiment.BioAssaySet; +import ubic.gemma.model.expression.experiment.ExpressionExperiment; /** * Make data sets public. You must be the owner of the experiment to do this. @@ -40,7 +40,7 @@ public String getShortDesc() { } @Override - protected void processBioAssaySet( BioAssaySet ee ) { + protected void processExpressionExperiment( ExpressionExperiment ee ) { securityService.makePublic( ee ); this.auditTrailService.addUpdateEvent( ee, MakePublicEvent.class, "Made public from command line" ); } diff --git a/gemma-cli/src/main/java/ubic/gemma/apps/RNASeqBatchInfoCli.java b/gemma-cli/src/main/java/ubic/gemma/apps/RNASeqBatchInfoCli.java index fb8f93f71e..a4ab7847e5 100644 --- a/gemma-cli/src/main/java/ubic/gemma/apps/RNASeqBatchInfoCli.java +++ b/gemma-cli/src/main/java/ubic/gemma/apps/RNASeqBatchInfoCli.java @@ -18,7 +18,6 @@ import org.springframework.beans.factory.annotation.Autowired; import org.springframework.beans.factory.annotation.Value; import ubic.gemma.core.analysis.preprocess.batcheffects.BatchInfoPopulationService; -import ubic.gemma.model.expression.experiment.BioAssaySet; import ubic.gemma.model.expression.experiment.ExpressionExperiment; import java.util.Collection; @@ -53,9 +52,9 @@ protected void buildExperimentOptions( Options options ) { } @Override - protected Collection preprocessBioAssaySets( Collection expressionExperiments ) { + protected Collection preprocessExpressionExperiments( Collection expressionExperiments ) { log.info( "Checking folders for existing experiments in " + fastqRootDir ); - return super.preprocessBioAssaySets( expressionExperiments ); + return super.preprocessExpressionExperiments( expressionExperiments ); } @Override diff --git a/gemma-cli/src/main/java/ubic/gemma/apps/RNASeqDataAddCli.java b/gemma-cli/src/main/java/ubic/gemma/apps/RNASeqDataAddCli.java index 745d1a5ecf..9f4a921805 100644 --- a/gemma-cli/src/main/java/ubic/gemma/apps/RNASeqDataAddCli.java +++ b/gemma-cli/src/main/java/ubic/gemma/apps/RNASeqDataAddCli.java @@ -29,7 +29,6 @@ import ubic.gemma.model.common.quantitationtype.StandardQuantitationType; import ubic.gemma.model.expression.arrayDesign.ArrayDesign; import ubic.gemma.model.expression.bioAssay.BioAssay; -import ubic.gemma.model.expression.experiment.BioAssaySet; import ubic.gemma.model.expression.experiment.ExpressionExperiment; import java.io.IOException; @@ -161,11 +160,11 @@ protected void processExperimentOptions( CommandLine commandLine ) throws ParseE } @Override - protected void processBioAssaySets( Collection bas ) { + protected void processExpressionExperiments( Collection bas ) { if ( !justbackfillLog2cpm ) { throw new IllegalArgumentException( "Sorry, can only process one experiment with this tool, unless -log2cpm is used." ); } - super.processBioAssaySets( bas ); + super.processExpressionExperiments( bas ); } @Override diff --git a/gemma-cli/src/main/java/ubic/gemma/apps/SingleCellDataLoaderCli.java b/gemma-cli/src/main/java/ubic/gemma/apps/SingleCellDataLoaderCli.java index 8c7d6cbc51..bcc17a6fa1 100644 --- a/gemma-cli/src/main/java/ubic/gemma/apps/SingleCellDataLoaderCli.java +++ b/gemma-cli/src/main/java/ubic/gemma/apps/SingleCellDataLoaderCli.java @@ -18,7 +18,6 @@ import ubic.gemma.model.expression.bioAssay.BioAssay; import ubic.gemma.model.expression.bioAssayData.CellLevelCharacteristics; import ubic.gemma.model.expression.bioAssayData.CellTypeAssignment; -import ubic.gemma.model.expression.experiment.BioAssaySet; import ubic.gemma.model.expression.experiment.ExpressionExperiment; import javax.annotation.Nullable; @@ -357,11 +356,11 @@ private void rejectInvalidOptionsForDataType( CommandLine commandLine, @Nullable } @Override - protected void processBioAssaySets( Collection expressionExperiments ) { + protected void processExpressionExperiments( Collection expressionExperiments ) { if ( dataPath != null || qtName != null || cellTypeAssignmentFile != null || otherCellLevelCharacteristicsFile != null || sequencingMetadataFile != null ) { throw new IllegalArgumentException( "Cannot specify a data path, quantitation type name, cell type assignment file, cell-level characteristics file or sequencing metadata file when processing more than one experiment." ); } - super.processBioAssaySets( expressionExperiments ); + super.processExpressionExperiments( expressionExperiments ); } @Override diff --git a/gemma-core/src/main/java/ubic/gemma/model/analysis/expression/ExpressionExperimentSet.java b/gemma-core/src/main/java/ubic/gemma/model/analysis/expression/ExpressionExperimentSet.java index ba0495b6e2..93446df5dc 100644 --- a/gemma-core/src/main/java/ubic/gemma/model/analysis/expression/ExpressionExperimentSet.java +++ b/gemma-core/src/main/java/ubic/gemma/model/analysis/expression/ExpressionExperimentSet.java @@ -19,15 +19,14 @@ package ubic.gemma.model.analysis.expression; -import org.hibernate.search.annotations.DocumentId; -import org.hibernate.search.annotations.Field; -import org.hibernate.search.annotations.Indexed; -import org.hibernate.search.annotations.Store; +import org.hibernate.search.annotations.*; import ubic.gemma.model.common.auditAndSecurity.AbstractAuditable; import ubic.gemma.model.common.auditAndSecurity.Securable; -import ubic.gemma.model.expression.experiment.BioAssaySet; +import ubic.gemma.model.common.description.DatabaseEntry; +import ubic.gemma.model.expression.experiment.ExpressionExperiment; import ubic.gemma.model.genome.Taxon; +import javax.annotation.Nullable; import java.util.HashSet; import java.util.Set; @@ -39,8 +38,10 @@ @Indexed public class ExpressionExperimentSet extends AbstractAuditable implements Securable { + @Nullable + private DatabaseEntry accession; private Taxon taxon; - private Set experiments = new HashSet<>(); + private Set experiments = new HashSet<>(); /** * No-arg constructor added to satisfy javabean contract @@ -82,11 +83,21 @@ public String getDescription() { return super.getDescription(); } - public Set getExperiments() { + @Nullable + @IndexedEmbedded + public DatabaseEntry getAccession() { + return accession; + } + + public void setAccession( @Nullable DatabaseEntry accession ) { + this.accession = accession; + } + + public Set getExperiments() { return this.experiments; } - public void setExperiments( Set experiments ) { + public void setExperiments( Set experiments ) { this.experiments = experiments; } diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/persister/RelationshipPersister.java b/gemma-core/src/main/java/ubic/gemma/persistence/persister/RelationshipPersister.java index 3e49274f4e..f85814815e 100644 --- a/gemma-core/src/main/java/ubic/gemma/persistence/persister/RelationshipPersister.java +++ b/gemma-core/src/main/java/ubic/gemma/persistence/persister/RelationshipPersister.java @@ -23,7 +23,7 @@ import ubic.gemma.model.analysis.expression.ExpressionExperimentSet; import ubic.gemma.model.analysis.expression.coexpression.CoexpressionAnalysis; import ubic.gemma.model.association.Gene2GOAssociation; -import ubic.gemma.model.expression.experiment.BioAssaySet; +import ubic.gemma.model.expression.experiment.ExpressionExperiment; import ubic.gemma.persistence.service.analysis.expression.ExpressionExperimentSetDao; import ubic.gemma.persistence.service.analysis.expression.coexpression.CoexpressionAnalysisDao; import ubic.gemma.persistence.service.association.Gene2GOAssociationDao; @@ -61,11 +61,11 @@ protected Object doPersist( Object entity, Caches caches ) { } private ExpressionExperimentSet persistExpressionExperimentSet( ExpressionExperimentSet entity, Caches caches ) { - Collection setMembers = new HashSet<>(); + Collection setMembers = new HashSet<>(); - for ( BioAssaySet baSet : entity.getExperiments() ) { + for ( ExpressionExperiment baSet : entity.getExperiments() ) { if ( baSet.getId() == null ) { - baSet = ( BioAssaySet ) this.doPersist( baSet, caches ); + baSet = ( ExpressionExperiment ) this.doPersist( baSet, caches ); } setMembers.add( baSet ); } diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentSetValueObjectHelperImpl.java b/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentSetValueObjectHelperImpl.java index 9225a59481..33005730d5 100644 --- a/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentSetValueObjectHelperImpl.java +++ b/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentSetValueObjectHelperImpl.java @@ -96,7 +96,7 @@ public ExpressionExperimentSet create( ExpressionExperimentSetValueObject eesvo newSet.setName( eesvo.getName() ); newSet.setDescription( eesvo.getDescription() ); - Collection datasetsAnalyzed = expressionExperimentService.load( + Collection datasetsAnalyzed = expressionExperimentService.load( eesvo.getExpressionExperimentIds() ); newSet.getExperiments().addAll( datasetsAnalyzed ); @@ -205,7 +205,7 @@ public void updateMembers( Long groupId, Collection eeIds ) { } assert newExperiments.size() == eeIds.size(); - Collection basColl = new HashSet<>(); + Collection basColl = new HashSet<>(); for ( ExpressionExperiment experiment : newExperiments ) { Taxon eeTaxon = expressionExperimentService.getTaxon( experiment ); @@ -259,7 +259,7 @@ public ExpressionExperimentSet convertToEntity( ExpressionExperimentSetValueObje "The value object must have some experiments associated before it can be converted and persisted" ); } - Set bas = new HashSet( experiments ); + Set bas = new HashSet<>( experiments ); entity.setExperiments( bas ); entity.setName( setVO.getName() ); diff --git a/gemma-core/src/main/resources/sql/migrations/db.1.32.2.sql b/gemma-core/src/main/resources/sql/migrations/db.1.32.2.sql new file mode 100644 index 0000000000..8f1defa5b6 --- /dev/null +++ b/gemma-core/src/main/resources/sql/migrations/db.1.32.2.sql @@ -0,0 +1,3 @@ +alter table EXPRESSION_EXPERIMENT_SET add column ACCESSION_FK BIGINT; +alter table EXPRESSION_EXPERIMENT_SET add constraint ACCESSION_FK unique (ACCESSION_FK); +alter table EXPRESSION_EXPERIMENT_SET add index EXPRESSION_EXPERIMENT_SET_ACCESSION_FKC (ACCESSION_FK), add constraint EXPRESSION_EXPERIMENT_SET_ACCESSION_FKC foreign key (ACCESSION_FK) references DATABASE_ENTRY (ID); diff --git a/gemma-core/src/main/resources/ubic/gemma/model/analysis/expression/ExpressionExperimentSet.hbm.xml b/gemma-core/src/main/resources/ubic/gemma/model/analysis/expression/ExpressionExperimentSet.hbm.xml index e5e783a668..a6ccaa404c 100644 --- a/gemma-core/src/main/resources/ubic/gemma/model/analysis/expression/ExpressionExperimentSet.hbm.xml +++ b/gemma-core/src/main/resources/ubic/gemma/model/analysis/expression/ExpressionExperimentSet.hbm.xml @@ -22,15 +22,19 @@ + + + - + - + diff --git a/gemma-core/src/test/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentSetServiceTest.java b/gemma-core/src/test/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentSetServiceTest.java index dea2bcb921..c4917b1d56 100755 --- a/gemma-core/src/test/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentSetServiceTest.java +++ b/gemma-core/src/test/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentSetServiceTest.java @@ -25,7 +25,6 @@ import org.springframework.beans.factory.annotation.Autowired; import ubic.gemma.core.util.test.BaseSpringContextTest; import ubic.gemma.model.analysis.expression.ExpressionExperimentSet; -import ubic.gemma.model.expression.experiment.BioAssaySet; import ubic.gemma.model.expression.experiment.ExpressionExperiment; import ubic.gemma.model.expression.experiment.ExpressionExperimentSetValueObject; import ubic.gemma.model.genome.Taxon; @@ -104,7 +103,7 @@ public void testUpdate() { String newName = "newName"; String newDesc = "newDesc"; - Set newMembers = new HashSet<>(); + Set newMembers = new HashSet<>(); newMembers.add( ee1 ); eeSet.setName( newName ); @@ -131,7 +130,7 @@ public void testUpdate() { @Test(expected = Exception.class) public void testAddingExperimentOfWrongTaxonUpdate() { - Set newMembers = new HashSet<>(); + Set newMembers = new HashSet<>(); newMembers.add( ee1 ); newMembers.add( eeMouse ); eeSet.setExperiments( newMembers ); diff --git a/gemma-core/src/test/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentSetValueObjectHelperTest.java b/gemma-core/src/test/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentSetValueObjectHelperTest.java index b9bce4291b..c599a46d5d 100644 --- a/gemma-core/src/test/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentSetValueObjectHelperTest.java +++ b/gemma-core/src/test/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentSetValueObjectHelperTest.java @@ -64,7 +64,7 @@ public void setUp() throws Exception { Taxon tax1 = this.getTaxon( "human" ); ee = this.getTestPersistentExpressionExperiment( tax1 ); - Collection ees = new HashSet<>(); + Collection ees = new HashSet<>(); ees.add( ee ); eeSet = ExpressionExperimentSet.Factory.newInstance(); diff --git a/gemma-web/src/main/java/ubic/gemma/web/controller/persistence/SessionListManagerImpl.java b/gemma-web/src/main/java/ubic/gemma/web/controller/persistence/SessionListManagerImpl.java index ede40d84e9..8245585479 100644 --- a/gemma-web/src/main/java/ubic/gemma/web/controller/persistence/SessionListManagerImpl.java +++ b/gemma-web/src/main/java/ubic/gemma/web/controller/persistence/SessionListManagerImpl.java @@ -22,16 +22,16 @@ import org.springframework.beans.factory.annotation.Autowired; import org.springframework.stereotype.Component; import ubic.gemma.core.analysis.report.ExpressionExperimentReportService; -import ubic.gemma.model.genome.gene.SessionBoundGeneSetValueObject; -import ubic.gemma.persistence.service.genome.gene.GeneSetService; import ubic.gemma.model.analysis.expression.ExpressionExperimentSet; -import ubic.gemma.model.expression.experiment.BioAssaySet; +import ubic.gemma.model.expression.experiment.ExpressionExperiment; import ubic.gemma.model.expression.experiment.ExpressionExperimentDetailsValueObject; import ubic.gemma.model.expression.experiment.SessionBoundExpressionExperimentSetValueObject; import ubic.gemma.model.genome.gene.GeneSet; import ubic.gemma.model.genome.gene.GeneValueObject; +import ubic.gemma.model.genome.gene.SessionBoundGeneSetValueObject; import ubic.gemma.persistence.service.expression.experiment.ExpressionExperimentService; import ubic.gemma.persistence.service.expression.experiment.ExpressionExperimentSetService; +import ubic.gemma.persistence.service.genome.gene.GeneSetService; import java.util.ArrayList; import java.util.Collection; @@ -126,9 +126,9 @@ public Collection getAllGeneSets( Long taxonId ) @Override public Collection getExperimentIdsInSet( Long id ) { ExpressionExperimentSet eeSet = expressionExperimentSetService.loadOrFail( id ); // secure - Collection datasets = eeSet.getExperiments(); // Not secure. + Collection datasets = eeSet.getExperiments(); // Not secure. Collection eeids = new HashSet<>(); - for ( BioAssaySet ee : datasets ) { + for ( ExpressionExperiment ee : datasets ) { eeids.add( ee.getId() ); } return eeids; From 75cd204dbbe6ceaa6d35de0924d431ab24c7dfae Mon Sep 17 00:00:00 2001 From: Guillaume Poirier-Morency Date: Tue, 24 Jun 2025 10:02:35 -0700 Subject: [PATCH 012/129] Treat Jersey ParamException as a warning --- .../java/ubic/gemma/rest/providers/RequestExceptionLogger.java | 3 +++ 1 file changed, 3 insertions(+) diff --git a/gemma-rest/src/main/java/ubic/gemma/rest/providers/RequestExceptionLogger.java b/gemma-rest/src/main/java/ubic/gemma/rest/providers/RequestExceptionLogger.java index 9309a2b462..8afca26138 100644 --- a/gemma-rest/src/main/java/ubic/gemma/rest/providers/RequestExceptionLogger.java +++ b/gemma-rest/src/main/java/ubic/gemma/rest/providers/RequestExceptionLogger.java @@ -2,6 +2,7 @@ import lombok.extern.apachecommons.CommonsLog; import org.glassfish.jersey.server.ContainerRequest; +import org.glassfish.jersey.server.ParamException; import org.glassfish.jersey.server.monitoring.ApplicationEvent; import org.glassfish.jersey.server.monitoring.ApplicationEventListener; import org.glassfish.jersey.server.monitoring.RequestEvent; @@ -32,6 +33,8 @@ public RequestEventListener onRequest( RequestEvent requestEvent ) { m = "Exception was raised, but there is no current request."; } if ( event.getException() instanceof ClientErrorException + // these should be treated as 400 errors, but they do not inherit from BadRequestException + || event.getException() instanceof ParamException || event.getException() instanceof ServiceUnavailableException ) { log.warn( m, event.getException() ); } else { From fd4e7f0447579291e240331c9e61f1f49fa88e9a Mon Sep 17 00:00:00 2001 From: Guillaume Poirier-Morency Date: Tue, 24 Jun 2025 10:08:49 -0700 Subject: [PATCH 013/129] Add minimal tests for ExpressionExperimentSet accession --- .../ExpressionExperimentSetDao.java | 5 ++ .../ExpressionExperimentSetDaoImpl.java | 21 ++++++ .../ExpressionExperimentSetDaoTest.java | 71 +++++++++++++++++++ 3 files changed, 97 insertions(+) create mode 100644 gemma-core/src/test/java/ubic/gemma/persistence/service/analysis/expression/ExpressionExperimentSetDaoTest.java diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/service/analysis/expression/ExpressionExperimentSetDao.java b/gemma-core/src/main/java/ubic/gemma/persistence/service/analysis/expression/ExpressionExperimentSetDao.java index 481de9638d..253a6f6d62 100644 --- a/gemma-core/src/main/java/ubic/gemma/persistence/service/analysis/expression/ExpressionExperimentSetDao.java +++ b/gemma-core/src/main/java/ubic/gemma/persistence/service/analysis/expression/ExpressionExperimentSetDao.java @@ -19,6 +19,7 @@ package ubic.gemma.persistence.service.analysis.expression; import ubic.gemma.model.analysis.expression.ExpressionExperimentSet; +import ubic.gemma.model.common.description.ExternalDatabase; import ubic.gemma.model.expression.experiment.BioAssaySet; import ubic.gemma.model.expression.experiment.ExpressionExperiment; import ubic.gemma.model.expression.experiment.ExpressionExperimentDetailsValueObject; @@ -43,6 +44,10 @@ public interface ExpressionExperimentSetDao Collection findByName( String name ); + Collection findByAccession( String accession ); + + Collection findByAccession( String accession, ExternalDatabase externalDatabase ); + /** * @param id id * @return the security-filtered list of experiments in a set. It is possible for the return to be empty even if the set diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/service/analysis/expression/ExpressionExperimentSetDaoImpl.java b/gemma-core/src/main/java/ubic/gemma/persistence/service/analysis/expression/ExpressionExperimentSetDaoImpl.java index 0d7a4d6f4c..e35576c1bf 100644 --- a/gemma-core/src/main/java/ubic/gemma/persistence/service/analysis/expression/ExpressionExperimentSetDaoImpl.java +++ b/gemma-core/src/main/java/ubic/gemma/persistence/service/analysis/expression/ExpressionExperimentSetDaoImpl.java @@ -26,6 +26,7 @@ import org.springframework.beans.factory.annotation.Autowired; import org.springframework.stereotype.Repository; import ubic.gemma.model.analysis.expression.ExpressionExperimentSet; +import ubic.gemma.model.common.description.ExternalDatabase; import ubic.gemma.model.expression.experiment.BioAssaySet; import ubic.gemma.model.expression.experiment.ExpressionExperiment; import ubic.gemma.model.expression.experiment.ExpressionExperimentDetailsValueObject; @@ -69,6 +70,26 @@ public Collection findByName( final String name ) { return this.findByProperty( "name", name ); } + @Override + public Collection findByAccession( String accession ) { + //noinspection unchecked + return getSessionFactory().getCurrentSession() + .createQuery( "select ees from ExpressionExperimentSet ees join ees.accession accession where accession.accession = :accession" ) + .setParameter( "accession", accession ) + .list(); + } + + @Override + public Collection findByAccession( String accession, ExternalDatabase externalDatabase ) { + //noinspection unchecked + return getSessionFactory().getCurrentSession() + .createQuery( "select ees from ExpressionExperimentSet ees join ees.accession accession " + + "where accession.accession = :accession and accession.externalDatabase = :ed" ) + .setParameter( "accession", accession ) + .setParameter( "ed", externalDatabase ) + .list(); + } + @Override public Collection getExperimentsInSet( Long id ) { //noinspection unchecked diff --git a/gemma-core/src/test/java/ubic/gemma/persistence/service/analysis/expression/ExpressionExperimentSetDaoTest.java b/gemma-core/src/test/java/ubic/gemma/persistence/service/analysis/expression/ExpressionExperimentSetDaoTest.java new file mode 100644 index 0000000000..bf08ded51f --- /dev/null +++ b/gemma-core/src/test/java/ubic/gemma/persistence/service/analysis/expression/ExpressionExperimentSetDaoTest.java @@ -0,0 +1,71 @@ +package ubic.gemma.persistence.service.analysis.expression; + +import org.hibernate.SessionFactory; +import org.hibernate.exception.ConstraintViolationException; +import org.junit.Test; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.context.annotation.Bean; +import org.springframework.context.annotation.Configuration; +import org.springframework.test.context.ContextConfiguration; +import ubic.gemma.core.context.TestComponent; +import ubic.gemma.core.util.test.BaseDatabaseTest; +import ubic.gemma.model.analysis.expression.ExpressionExperimentSet; +import ubic.gemma.model.common.description.DatabaseEntry; +import ubic.gemma.model.common.description.ExternalDatabase; +import ubic.gemma.persistence.service.expression.experiment.ExpressionExperimentDao; +import ubic.gemma.persistence.service.expression.experiment.ExpressionExperimentDaoImpl; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatThrownBy; +import static org.junit.Assert.assertNotNull; + +@ContextConfiguration +public class ExpressionExperimentSetDaoTest extends BaseDatabaseTest { + + @Configuration + @TestComponent + static class CC extends BaseDatabaseTestContextConfiguration { + + @Bean + public ExpressionExperimentSetDao expressionExperimentSetDao( SessionFactory sessionFactory, ExpressionExperimentDao expressionExperimentDao ) { + return new ExpressionExperimentSetDaoImpl( sessionFactory, expressionExperimentDao ); + } + + @Bean + public ExpressionExperimentDao expressionExperimentDao( SessionFactory sessionFactory ) { + return new ExpressionExperimentDaoImpl( sessionFactory ); + } + } + + @Autowired + private ExpressionExperimentSetDao expressionExperimentSetDao; + + /** + * This is an example of modeling a GEO super-series. + */ + @Test + public void testCreateSetWithAccession() { + ExternalDatabase geo = new ExternalDatabase(); + geo.setName( "GEO" ); + sessionFactory.getCurrentSession().persist( geo ); + ExpressionExperimentSet expressionExperimentSet = new ExpressionExperimentSet(); + expressionExperimentSet.setAccession( DatabaseEntry.Factory.newInstance( "GSE000123", geo ) ); + expressionExperimentSet = expressionExperimentSetDao.create( expressionExperimentSet ); + assertNotNull( expressionExperimentSet.getId() ); + assertNotNull( expressionExperimentSet.getAccession() ); + assertNotNull( expressionExperimentSet.getAccession().getId() ); + assertThat( expressionExperimentSetDao.findByAccession( "GSE000123" ) ) + .contains( expressionExperimentSet ); + assertThat( expressionExperimentSetDao.findByAccession( "GSE000123", geo ) ) + .contains( expressionExperimentSet ); + sessionFactory.getCurrentSession().flush(); + + // violates the unique key on the accession + ExpressionExperimentSet set2 = new ExpressionExperimentSet(); + set2.setAccession( expressionExperimentSet.getAccession() ); + assertThatThrownBy( () -> expressionExperimentSetDao.create( set2 ) ) + .isInstanceOf( ConstraintViolationException.class ); + sessionFactory.getCurrentSession().clear(); + } + +} \ No newline at end of file From 19afdaba6165ae0b952621e342670a90318806c7 Mon Sep 17 00:00:00 2001 From: Guillaume Poirier-Morency Date: Tue, 24 Jun 2025 11:56:18 -0700 Subject: [PATCH 014/129] Allow gene-cs statistics to use dummy products Fix #1424. --- .../CompositeSequenceGeneMapperService.java | 9 +- .../persistence/service/genome/GeneDao.java | 35 +++++- .../service/genome/GeneDaoImpl.java | 104 +++++++++++------- .../service/genome/gene/GeneService.java | 29 +++-- .../service/genome/gene/GeneServiceImpl.java | 33 ++++-- .../diff/DiffExMetaAnalyzerServiceTest.java | 8 +- ...ompositeSequenceGeneMapperServiceTest.java | 3 +- .../service/genome/GeneDaoTest.java | 30 ++++- .../CompositeSequenceController.java | 2 +- 9 files changed, 172 insertions(+), 81 deletions(-) diff --git a/gemma-core/src/main/java/ubic/gemma/core/analysis/service/CompositeSequenceGeneMapperService.java b/gemma-core/src/main/java/ubic/gemma/core/analysis/service/CompositeSequenceGeneMapperService.java index a08df19993..4da918288d 100644 --- a/gemma-core/src/main/java/ubic/gemma/core/analysis/service/CompositeSequenceGeneMapperService.java +++ b/gemma-core/src/main/java/ubic/gemma/core/analysis/service/CompositeSequenceGeneMapperService.java @@ -22,10 +22,10 @@ import org.apache.commons.logging.LogFactory; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.stereotype.Component; -import ubic.gemma.persistence.service.genome.gene.GeneService; import ubic.gemma.model.expression.arrayDesign.ArrayDesign; import ubic.gemma.model.expression.designElement.CompositeSequence; import ubic.gemma.model.genome.Gene; +import ubic.gemma.persistence.service.genome.gene.GeneService; import java.util.Collection; import java.util.HashSet; @@ -60,13 +60,10 @@ public LinkedHashMap> getGene2ProbeMapByOffi log.debug( "official symbol: " + officialSymbol ); Collection genes = genesMap.get( officialSymbol ); for ( Gene g : genes ) { - Collection compositeSequences = geneService.getCompositeSequencesById( g.getId() ); + Collection compositeSequences = geneService.getCompositeSequences( g, true ); for ( CompositeSequence sequence : compositeSequences ) { if ( arrayDesigns.contains( sequence.getArrayDesign() ) ) { - if ( compositeSequencesForGeneMap.get( g ) == null ) { - compositeSequencesForGeneMap.put( g, new HashSet() ); - } - compositeSequencesForGeneMap.get( g ).add( sequence ); + compositeSequencesForGeneMap.computeIfAbsent( g, k -> new HashSet<>() ).add( sequence ); } } } diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/service/genome/GeneDao.java b/gemma-core/src/main/java/ubic/gemma/persistence/service/genome/GeneDao.java index a9a8e821fb..676083b15a 100644 --- a/gemma-core/src/main/java/ubic/gemma/persistence/service/genome/GeneDao.java +++ b/gemma-core/src/main/java/ubic/gemma/persistence/service/genome/GeneDao.java @@ -93,14 +93,35 @@ public interface GeneDao extends FilteringVoEnabledDao { Collection findByPhysicalLocation( PhysicalLocation location ); /** - * @param id id - * @return how many platform elements (e.g. probes) represent this gene, totalled up over all platforms. + * Obtain the number of platform elements (e.g. probes) associated to this gene, totalled up over all platforms. + *

+ * Note that ACLs are applied to the platforms. + * @param includeDummyProducts if true, include platform elements related via dummy {@link ubic.gemma.model.genome.gene.GeneProduct}s + */ + long getCompositeSequenceCount( Gene gene, boolean includeDummyProducts ); + + /** + * @see #getCompositeSequences(Gene, boolean) */ - long getCompositeSequenceCountById( long id ); + long getCompositeSequenceCountById( long id, boolean includeDummyProducts ); - Collection getCompositeSequences( Gene gene, ArrayDesign arrayDesign ); + /** + * Get the composite sequences (e.g. probes) associated with this gene via a particular platform. + * @param arrayDesign platform to restrict composite sequences to + * @param includeDummyProducts if true, include platform elements related via dummy {@link ubic.gemma.model.genome.gene.GeneProduct}s + */ + Collection getCompositeSequences( Gene gene, ArrayDesign arrayDesign, boolean includeDummyProducts ); - Collection getCompositeSequencesById( long id ); + /** + * Get the composite sequences (e.g. probes) associated with this gene, for any platform. + * @param includeDummyProducts if true, include platform elements related via dummy {@link ubic.gemma.model.genome.gene.GeneProduct}s + */ + Collection getCompositeSequences( Gene gene, boolean includeDummyProducts ); + + /** + * @see #getCompositeSequences(Gene, boolean) + */ + Collection getCompositeSequencesById( long id, boolean includeDummyProducts ); /** * @param taxon taxon @@ -110,9 +131,11 @@ public interface GeneDao extends FilteringVoEnabledDao { /** * @param id id + * @param includeDummyProducts include platforms related via dummy {@link ubic.gemma.model.genome.gene.GeneProduct}s + * in the count * @return how many platforms have a representation of this gene */ - int getPlatformCountById( Long id ); + long getPlatformCountById( Long id, boolean includeDummyProducts ); /** * @param taxon taxon diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/service/genome/GeneDaoImpl.java b/gemma-core/src/main/java/ubic/gemma/persistence/service/genome/GeneDaoImpl.java index e132daea59..2bf68b2284 100644 --- a/gemma-core/src/main/java/ubic/gemma/persistence/service/genome/GeneDaoImpl.java +++ b/gemma-core/src/main/java/ubic/gemma/persistence/service/genome/GeneDaoImpl.java @@ -219,63 +219,86 @@ public Collection findByPhysicalLocation( final PhysicalLocation location .setParameter( "location", location ).list(); } + @Override + public long getCompositeSequenceCount( Gene gene, boolean includeDummyProducts ) { + Query query = this.getSessionFactory().getCurrentSession() + .createQuery( "select count(distinct cs) from GeneProduct gp, BioSequence2GeneProduct as bs2gp, CompositeSequence as cs " + + AclQueryUtils.formAclRestrictionClause( "cs.arrayDesign.id" ) + " " + + "and gp = bs2gp.geneProduct " + + "and cs.biologicalCharacteristic=bs2gp.bioSequence " + + "and gp.gene = :gene" + + ( includeDummyProducts ? "" : " and gp.dummy = false" ) ) + .setParameter( "gene", gene ); + AclQueryUtils.addAclParameters( query, ArrayDesign.class ); + return ( Long ) query.uniqueResult(); + } + /** * Gets a count of the CompositeSequences related to the gene identified by the given id. * * @return Collection */ @Override - public long getCompositeSequenceCountById( long id ) { - //language=HQL - final String queryString = - "select count(distinct cs) from Gene as gene inner join gene.products gp, BioSequence2GeneProduct" - + " as bs2gp, CompositeSequence as cs where gp=bs2gp.geneProduct " - + " and cs.biologicalCharacteristic=bs2gp.bioSequence " + " and gene.id = :id "; - return ( Long ) this.getSessionFactory().getCurrentSession() - .createQuery( queryString ) - .setParameter( "id", id ) - .uniqueResult(); + public long getCompositeSequenceCountById( long id, boolean includeDummyProducts ) { + Query query = this.getSessionFactory().getCurrentSession() + .createQuery( "select count(distinct cs) from GeneProduct gp, BioSequence2GeneProduct as bs2gp, CompositeSequence as cs " + + AclQueryUtils.formAclRestrictionClause( "cs.arrayDesign.id" ) + " " + + "and gp = bs2gp.geneProduct " + + "and cs.biologicalCharacteristic=bs2gp.bioSequence " + + "and gp.gene.id = :id" + + ( includeDummyProducts ? "" : " and gp.dummy = false" ) ) + .setParameter( "id", id ); + AclQueryUtils.addAclParameters( query, ArrayDesign.class ); + return ( Long ) query.uniqueResult(); } @Override - public Collection getCompositeSequences( Gene gene, ArrayDesign arrayDesign ) { - //language=HQL - final String queryString = "select cs from Gene as gene " - + "inner join gene.products gp, " - + "BioSequence2GeneProduct as bs2gp, " - + "CompositeSequence as cs " - + "where gp=bs2gp.geneProduct " - + "and cs.biologicalCharacteristic=bs2gp.bioSequence " - + "and gene = :gene " - + "and cs.arrayDesign = :arrayDesign " - + "group by cs"; - + public Collection getCompositeSequences( Gene gene, ArrayDesign arrayDesign, boolean includeDummyProducts ) { //noinspection unchecked return this.getSessionFactory().getCurrentSession() - .createQuery( queryString ) + .createQuery( "select cs from GeneProduct as gp, BioSequence2GeneProduct as bs2gp, CompositeSequence as cs " + + "where gp = bs2gp.geneProduct " + + "and cs.biologicalCharacteristic=bs2gp.bioSequence " + + "and cs.arrayDesign = :arrayDesign " + + "and gp.gene = :gene " + + ( !includeDummyProducts ? " and gp.dummy = false " : "" ) + + "group by cs" ) .setParameter( "arrayDesign", arrayDesign ) .setParameter( "gene", gene ) .list(); } + @Override + public Collection getCompositeSequences( Gene gene, boolean includeDummyProducts ) { + //noinspection unchecked + return ( List ) this.getSessionFactory().getCurrentSession() + // important note: + .createQuery( "select cs from GeneProduct as gp, BioSequence2GeneProduct as bs2gp, CompositeSequence as cs " + + "where gp = bs2gp.geneProduct " + + "and cs.biologicalCharacteristic = bs2gp.bioSequence " + + "and gp.gene = :gene " + + ( !includeDummyProducts ? " and gp.dummy = false " : "" ) + + "group by cs" ) + .setParameter( "gene", gene ) + .list(); + } + /** * Gets all the CompositeSequences related to the gene identified by the given id. * * @return Collection */ @Override - public Collection getCompositeSequencesById( long id ) { - //language=HQL - final String queryString = - "select cs from Gene as gene inner join gene.products as gp, " - + "BioSequence2GeneProduct as bs2gp, " - + "CompositeSequence as cs " - + "where gp=bs2gp.geneProduct " - + "and cs.biologicalCharacteristic=bs2gp.bioSequence " - + "and gene.id = :id " - + "group by cs"; + public Collection getCompositeSequencesById( long id, boolean includeDummyProducts ) { //noinspection unchecked - return this.getSessionFactory().getCurrentSession().createQuery( queryString ) + return ( List ) this.getSessionFactory().getCurrentSession() + // important note: + .createQuery( "select cs from GeneProduct as gp, BioSequence2GeneProduct as bs2gp, CompositeSequence as cs " + + "where gp = bs2gp.geneProduct " + + "and cs.biologicalCharacteristic = bs2gp.bioSequence " + + "and gp.gene.id = :id " + + ( !includeDummyProducts ? " and gp.dummy = false " : "" ) + + "group by cs" ) .setParameter( "id", id ) .list(); } @@ -290,16 +313,17 @@ public Collection getMicroRnaByTaxon( Taxon taxon ) { } @Override - public int getPlatformCountById( Long id ) { + public long getPlatformCountById( Long id, boolean includeDummyProducts ) { //language=HQL final String queryString = - "select count(distinct cs.arrayDesign) from Gene as gene inner join gene.products gp, BioSequence2GeneProduct" - + " as bs2gp, CompositeSequence as cs where gp=bs2gp.geneProduct " - + " and cs.biologicalCharacteristic=bs2gp.bioSequence " + " and gene.id = :id "; - return ( ( Long ) this.getSessionFactory().getCurrentSession() + "select count(distinct cs.arrayDesign) from GeneProduct gp, BioSequence2GeneProduct as bs2gp, CompositeSequence as cs " + + "where gp=bs2gp.geneProduct " + + "and cs.biologicalCharacteristic=bs2gp.bioSequence " + + "and gp.gene.id = :id "; + return ( Long ) this.getSessionFactory().getCurrentSession() .createQuery( queryString ) .setParameter( "id", id ) - .uniqueResult() ).intValue(); + .uniqueResult(); } @Override diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/service/genome/gene/GeneService.java b/gemma-core/src/main/java/ubic/gemma/persistence/service/genome/gene/GeneService.java index 7d75a9e1f4..edbe3b7f03 100755 --- a/gemma-core/src/main/java/ubic/gemma/persistence/service/genome/gene/GeneService.java +++ b/gemma-core/src/main/java/ubic/gemma/persistence/service/genome/gene/GeneService.java @@ -112,24 +112,33 @@ public interface GeneService extends BaseService, FilteringVoEnabledServic Collection findGOTerms( Long geneId ); - long getCompositeSequenceCountById( Long id ); + /** + * @see ubic.gemma.persistence.service.genome.GeneDao#getCompositeSequenceCount(Gene, boolean) + */ + long getCompositeSequenceCount( Gene gene, boolean includeDummyProducts ); /** - * Returns a list of compositeSequences associated with the given gene and array design - * - * @param gene gene - * @param arrayDesign platform - * @return composite sequences + * @see ubic.gemma.persistence.service.genome.GeneDao#getCompositeSequenceCountById(long, boolean) + */ + long getCompositeSequenceCountById( Long id, boolean includeDummyProducts ); + + /** + * @see ubic.gemma.persistence.service.genome.GeneDao#getCompositeSequences(Gene, ArrayDesign, boolean) + */ + @Secured({ "IS_AUTHENTICATED_ANONYMOUSLY", "AFTER_ACL_ARRAYDESIGN_COLLECTION_READ" }) + Collection getCompositeSequences( Gene gene, ArrayDesign arrayDesign, boolean includeDummyProducts ); + + /** + * @see ubic.gemma.persistence.service.genome.GeneDao#getCompositeSequencesById(long, boolean) */ @Secured({ "IS_AUTHENTICATED_ANONYMOUSLY", "AFTER_ACL_ARRAYDESIGN_COLLECTION_READ" }) - Collection getCompositeSequences( Gene gene, ArrayDesign arrayDesign ); + Collection getCompositeSequences( Gene gene, boolean includeDummyProducts ); /** - * @param id Gemma gene id - * @return Return probes for a given gene id. + * @see ubic.gemma.persistence.service.genome.GeneDao#getCompositeSequencesById(long, boolean) */ @Secured({ "IS_AUTHENTICATED_ANONYMOUSLY", "AFTER_ACL_ARRAYDESIGN_COLLECTION_READ" }) - Collection getCompositeSequencesById( Long id ); + Collection getCompositeSequencesById( Long geneId, boolean includeDummyProducts ); List getPhysicalLocationsValueObjects( Gene gene ); diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/service/genome/gene/GeneServiceImpl.java b/gemma-core/src/main/java/ubic/gemma/persistence/service/genome/gene/GeneServiceImpl.java index 3633b13130..3058287b4d 100755 --- a/gemma-core/src/main/java/ubic/gemma/persistence/service/genome/gene/GeneServiceImpl.java +++ b/gemma-core/src/main/java/ubic/gemma/persistence/service/genome/gene/GeneServiceImpl.java @@ -33,7 +33,6 @@ import ubic.gemma.model.association.Gene2GOAssociation; import ubic.gemma.model.association.coexpression.GeneCoexpressionNodeDegreeValueObject; import ubic.gemma.model.common.description.AnnotationValueObject; -import ubic.gemma.model.common.description.CharacteristicValueObject; import ubic.gemma.model.common.description.ExternalDatabase; import ubic.gemma.model.common.search.SearchSettings; import ubic.gemma.model.expression.arrayDesign.ArrayDesign; @@ -207,20 +206,32 @@ public Collection findGOTerms( Long geneId ) { @Override @Transactional(readOnly = true) - public long getCompositeSequenceCountById( final Long id ) { - return this.geneDao.getCompositeSequenceCountById( id ); + public long getCompositeSequenceCount( Gene gene, boolean includeDummyProducts ) { + return this.geneDao.getCompositeSequenceCount( gene, includeDummyProducts ); } @Override @Transactional(readOnly = true) - public Collection getCompositeSequences( final Gene gene, final ArrayDesign arrayDesign ) { - return this.geneDao.getCompositeSequences( gene, arrayDesign ); + public long getCompositeSequenceCountById( final Long id, boolean includeDummyProducts ) { + return this.geneDao.getCompositeSequenceCountById( id, includeDummyProducts ); } @Override @Transactional(readOnly = true) - public Collection getCompositeSequencesById( final Long id ) { - return this.geneDao.getCompositeSequencesById( id ); + public Collection getCompositeSequences( final Gene gene, final ArrayDesign arrayDesign, boolean includeDummyProducts ) { + return this.geneDao.getCompositeSequences( gene, arrayDesign, includeDummyProducts ); + } + + @Override + @Transactional(readOnly = true) + public Collection getCompositeSequences( final Gene gene, boolean includeDummyProducts ) { + return this.geneDao.getCompositeSequences( gene, includeDummyProducts ); + } + + @Override + @Transactional(readOnly = true) + public Collection getCompositeSequencesById( Long geneId, boolean includeDummyProducts ) { + return this.geneDao.getCompositeSequencesById( geneId, includeDummyProducts ); } @Override @@ -306,11 +317,11 @@ public GeneValueObject loadFullyPopulatedValueObject( Long id ) { gvo.setMultifunctionalityRank( gene.getMultifunctionality().getRank() ); } - Long compositeSequenceCount = this.getCompositeSequenceCountById( id ); - gvo.setCompositeSequenceCount( compositeSequenceCount.intValue() ); + long compositeSequenceCount = this.getCompositeSequenceCountById( id, true ); + gvo.setCompositeSequenceCount( ( int ) compositeSequenceCount ); - Integer platformCount = this.geneDao.getPlatformCountById( id ); - gvo.setPlatformCount( platformCount ); + long platformCount = this.geneDao.getPlatformCountById( id, true ); + gvo.setPlatformCount( ( int ) platformCount ); Collection geneSets = this.geneSetSearch.findByGene( gene ); Collection gsVos = new ArrayList<>(); diff --git a/gemma-core/src/test/java/ubic/gemma/core/analysis/expression/diff/DiffExMetaAnalyzerServiceTest.java b/gemma-core/src/test/java/ubic/gemma/core/analysis/expression/diff/DiffExMetaAnalyzerServiceTest.java index e602e6db16..f5844f8b65 100644 --- a/gemma-core/src/test/java/ubic/gemma/core/analysis/expression/diff/DiffExMetaAnalyzerServiceTest.java +++ b/gemma-core/src/test/java/ubic/gemma/core/analysis/expression/diff/DiffExMetaAnalyzerServiceTest.java @@ -22,7 +22,6 @@ import org.junit.experimental.categories.Category; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.core.io.ClassPathResource; -import ubic.gemma.persistence.service.genome.gene.GeneService; import ubic.gemma.core.loader.expression.arrayDesign.ArrayDesignProbeMapperService; import ubic.gemma.core.loader.expression.geo.AbstractGeoServiceTest; import ubic.gemma.core.loader.expression.geo.GeoDomainObjectGeneratorLocal; @@ -41,7 +40,6 @@ import ubic.gemma.model.expression.experiment.FactorValue; import ubic.gemma.model.genome.Gene; import ubic.gemma.model.genome.Taxon; -import ubic.gemma.persistence.service.maintenance.TableMaintenanceUtil; import ubic.gemma.persistence.service.analysis.expression.diff.DifferentialExpressionAnalysisService; import ubic.gemma.persistence.service.analysis.expression.diff.DifferentialExpressionResultService; import ubic.gemma.persistence.service.analysis.expression.diff.GeneDiffExMetaAnalysisService; @@ -51,6 +49,8 @@ import ubic.gemma.persistence.service.expression.designElement.CompositeSequenceService; import ubic.gemma.persistence.service.expression.experiment.ExperimentalFactorService; import ubic.gemma.persistence.service.expression.experiment.ExpressionExperimentService; +import ubic.gemma.persistence.service.genome.gene.GeneService; +import ubic.gemma.persistence.service.maintenance.TableMaintenanceUtil; import ubic.gemma.persistence.util.IdentifiableUtils; import java.io.File; @@ -448,10 +448,10 @@ private void extraTests1( ExpressionExperiment ds1 ) { assertTrue( !geneCollection.isEmpty() ); Gene g = geneCollection.iterator().next(); assertNotNull( g ); - long count = geneService.getCompositeSequenceCountById( g.getId() ); + long count = geneService.getCompositeSequenceCount( g, true ); assertTrue( count != 0 ); - Collection compSequences = geneService.getCompositeSequencesById( g.getId() ); + Collection compSequences = geneService.getCompositeSequences( g, true ); assertTrue( compSequences.size() != 0 ); Collection collection = compositeSequenceService.findByGene( g ); diff --git a/gemma-core/src/test/java/ubic/gemma/core/analysis/service/CompositeSequenceGeneMapperServiceTest.java b/gemma-core/src/test/java/ubic/gemma/core/analysis/service/CompositeSequenceGeneMapperServiceTest.java index 32ab89f153..7373a52124 100644 --- a/gemma-core/src/test/java/ubic/gemma/core/analysis/service/CompositeSequenceGeneMapperServiceTest.java +++ b/gemma-core/src/test/java/ubic/gemma/core/analysis/service/CompositeSequenceGeneMapperServiceTest.java @@ -26,7 +26,6 @@ import org.springframework.beans.factory.annotation.Value; import org.springframework.core.io.ClassPathResource; import ubic.basecode.util.FileTools; -import ubic.gemma.core.analysis.sequence.Blat; import ubic.gemma.core.analysis.sequence.ShellDelegatingBlat; import ubic.gemma.core.loader.expression.arrayDesign.ArrayDesignProbeMapperService; import ubic.gemma.core.loader.expression.arrayDesign.ArrayDesignProbeMapperServiceImpl; @@ -150,7 +149,7 @@ public void testGetCompositeSequencesByGeneId() { Gene g = genes.iterator().next(); - Collection compositeSequences = geneService.getCompositeSequencesById( g.getId() ); + Collection compositeSequences = geneService.getCompositeSequences( g, true ); assertNotNull( compositeSequences ); assertEquals( compositeSequences.size(), 1 ); diff --git a/gemma-core/src/test/java/ubic/gemma/persistence/service/genome/GeneDaoTest.java b/gemma-core/src/test/java/ubic/gemma/persistence/service/genome/GeneDaoTest.java index bb08c0ffac..80004d6b81 100644 --- a/gemma-core/src/test/java/ubic/gemma/persistence/service/genome/GeneDaoTest.java +++ b/gemma-core/src/test/java/ubic/gemma/persistence/service/genome/GeneDaoTest.java @@ -5,15 +5,21 @@ import org.springframework.beans.factory.annotation.Autowired; import org.springframework.context.annotation.Bean; import org.springframework.context.annotation.Configuration; +import org.springframework.security.test.context.support.WithMockUser; +import org.springframework.security.test.context.support.WithSecurityContextTestExecutionListener; import org.springframework.test.context.ContextConfiguration; +import org.springframework.test.context.TestExecutionListeners; +import ubic.gemma.core.context.TestComponent; import ubic.gemma.core.util.test.BaseDatabaseTest; +import ubic.gemma.model.expression.arrayDesign.ArrayDesign; import ubic.gemma.model.genome.Gene; +import ubic.gemma.model.genome.Taxon; import ubic.gemma.model.genome.gene.GeneProduct; -import ubic.gemma.core.context.TestComponent; import static org.assertj.core.api.Assertions.assertThat; @ContextConfiguration +@TestExecutionListeners(WithSecurityContextTestExecutionListener.class) public class GeneDaoTest extends BaseDatabaseTest { @Configuration @@ -29,6 +35,28 @@ public GeneDao geneDao( SessionFactory sessionFactory ) { @Autowired private GeneDao geneDao; + @Test + @WithMockUser // needed for in-query ACL checks + public void testGetCompositeSequences() { + Taxon taxon = Taxon.Factory.newInstance(); + sessionFactory.getCurrentSession().persist( taxon ); + ArrayDesign ad = ArrayDesign.Factory.newInstance( "test", taxon ); + sessionFactory.getCurrentSession().persist( ad ); + Gene g = geneDao.create( Gene.Factory.newInstance() ); + assertThat( geneDao.getCompositeSequences( g, true ) ).isEmpty(); + assertThat( geneDao.getCompositeSequences( g, false ) ).isEmpty(); + assertThat( geneDao.getCompositeSequences( g, ad, true ) ).isEmpty(); + assertThat( geneDao.getCompositeSequences( g, ad, false ) ).isEmpty(); + assertThat( geneDao.getCompositeSequencesById( g.getId(), true ) ).isEmpty(); + assertThat( geneDao.getCompositeSequencesById( g.getId(), false ) ).isEmpty(); + assertThat( geneDao.getCompositeSequenceCount( g, true ) ).isZero(); + assertThat( geneDao.getCompositeSequenceCount( g, false ) ).isZero(); + assertThat( geneDao.getCompositeSequenceCountById( g.getId(), true ) ).isZero(); + assertThat( geneDao.getCompositeSequenceCountById( g.getId(), false ) ).isZero(); + assertThat( geneDao.getCompositeSequenceCountById( g.getId(), true ) ).isZero(); + assertThat( geneDao.getCompositeSequenceCountById( g.getId(), false ) ).isZero(); + } + @Test public void testRemove() { Gene g = geneDao.create( Gene.Factory.newInstance() ); diff --git a/gemma-web/src/main/java/ubic/gemma/web/controller/expression/designElement/CompositeSequenceController.java b/gemma-web/src/main/java/ubic/gemma/web/controller/expression/designElement/CompositeSequenceController.java index 5eff38c8fd..81fa010205 100644 --- a/gemma-web/src/main/java/ubic/gemma/web/controller/expression/designElement/CompositeSequenceController.java +++ b/gemma-web/src/main/java/ubic/gemma/web/controller/expression/designElement/CompositeSequenceController.java @@ -130,7 +130,7 @@ public Collection getGeneCsSummaries( Long gene throw new IllegalArgumentException( "Gene ID must not be null" ); } - Collection compositeSequences = geneService.getCompositeSequencesById( geneId ); + Collection compositeSequences = geneService.getCompositeSequencesById( geneId, true ); Collection rawSummaries = compositeSequenceService.getRawSummary( compositeSequences ); if ( rawSummaries == null || rawSummaries.isEmpty() ) { From d2df236282eda74127a3122a7205d68ff318c7cb Mon Sep 17 00:00:00 2001 From: Guillaume Poirier-Morency Date: Tue, 24 Jun 2025 12:18:24 -0700 Subject: [PATCH 015/129] Properly print commands that we are about to execute Relocate ShellUtils from gemma-cli in gemma-core so that we can use it for quoting and printing command lines. --- .../gemma/cli/completion/BashCompletionGenerator.java | 2 +- .../gemma/cli/completion/FishCompletionGenerator.java | 2 +- .../src/main/java/ubic/gemma/cli/main/GemmaCLI.java | 1 + .../java/ubic/gemma/cli/util/EntityLocatorImpl.java | 1 + .../test/java/ubic/gemma/cli/util/test/CliAssert.java | 2 +- .../ubic/gemma/core/analysis/sequence/RepeatScan.java | 8 ++++++-- .../core/analysis/sequence/ShellDelegatingBlat.java | 5 +++-- .../expression/AffyPowerToolsProbesetSummarize.java | 3 ++- .../ubic/gemma/core/loader/genome/SimpleFastaCmd.java | 5 +++-- .../src/main/java/ubic/gemma}/util/ShellUtils.java | 11 ++++++++++- .../java/ubic/gemma/core}/util/ShellUtilsTest.java | 6 ++++-- 11 files changed, 33 insertions(+), 13 deletions(-) rename {gemma-cli/src/main/java/ubic/gemma/cli => gemma-core/src/main/java/ubic/gemma}/util/ShellUtils.java (59%) rename {gemma-cli/src/test/java/ubic/gemma/cli => gemma-core/src/test/java/ubic/gemma/core}/util/ShellUtilsTest.java (57%) diff --git a/gemma-cli/src/main/java/ubic/gemma/cli/completion/BashCompletionGenerator.java b/gemma-cli/src/main/java/ubic/gemma/cli/completion/BashCompletionGenerator.java index 8eac9c81ff..547134797e 100644 --- a/gemma-cli/src/main/java/ubic/gemma/cli/completion/BashCompletionGenerator.java +++ b/gemma-cli/src/main/java/ubic/gemma/cli/completion/BashCompletionGenerator.java @@ -8,7 +8,7 @@ import java.util.*; import java.util.stream.Collectors; -import static ubic.gemma.cli.util.ShellUtils.quoteIfNecessary; +import static ubic.gemma.util.ShellUtils.quoteIfNecessary; public class BashCompletionGenerator extends AbstractCompletionGenerator { diff --git a/gemma-cli/src/main/java/ubic/gemma/cli/completion/FishCompletionGenerator.java b/gemma-cli/src/main/java/ubic/gemma/cli/completion/FishCompletionGenerator.java index 50c8f558cc..e0a2778a13 100644 --- a/gemma-cli/src/main/java/ubic/gemma/cli/completion/FishCompletionGenerator.java +++ b/gemma-cli/src/main/java/ubic/gemma/cli/completion/FishCompletionGenerator.java @@ -18,7 +18,7 @@ import java.util.Set; import java.util.stream.Collectors; -import static ubic.gemma.cli.util.ShellUtils.quoteIfNecessary; +import static ubic.gemma.util.ShellUtils.quoteIfNecessary; /** * Generates fish completion script. diff --git a/gemma-cli/src/main/java/ubic/gemma/cli/main/GemmaCLI.java b/gemma-cli/src/main/java/ubic/gemma/cli/main/GemmaCLI.java index 7639d49117..af3c6c4376 100644 --- a/gemma-cli/src/main/java/ubic/gemma/cli/main/GemmaCLI.java +++ b/gemma-cli/src/main/java/ubic/gemma/cli/main/GemmaCLI.java @@ -34,6 +34,7 @@ import ubic.gemma.core.context.SpringContextUtils; import ubic.gemma.core.util.BuildInfo; import ubic.gemma.core.util.concurrent.ThreadUtils; +import ubic.gemma.util.ShellUtils; import javax.annotation.Nullable; import java.io.PrintWriter; diff --git a/gemma-cli/src/main/java/ubic/gemma/cli/util/EntityLocatorImpl.java b/gemma-cli/src/main/java/ubic/gemma/cli/util/EntityLocatorImpl.java index 6add859d43..46a7037f5d 100644 --- a/gemma-cli/src/main/java/ubic/gemma/cli/util/EntityLocatorImpl.java +++ b/gemma-cli/src/main/java/ubic/gemma/cli/util/EntityLocatorImpl.java @@ -24,6 +24,7 @@ import ubic.gemma.persistence.service.expression.experiment.ExpressionExperimentService; import ubic.gemma.persistence.service.expression.experiment.SingleCellExpressionExperimentService; import ubic.gemma.persistence.service.genome.taxon.TaxonService; +import ubic.gemma.util.ShellUtils; import javax.annotation.Nullable; import java.util.*; diff --git a/gemma-cli/src/test/java/ubic/gemma/cli/util/test/CliAssert.java b/gemma-cli/src/test/java/ubic/gemma/cli/util/test/CliAssert.java index ffb0baa463..841ec004b5 100644 --- a/gemma-cli/src/test/java/ubic/gemma/cli/util/test/CliAssert.java +++ b/gemma-cli/src/test/java/ubic/gemma/cli/util/test/CliAssert.java @@ -6,7 +6,7 @@ import org.assertj.core.api.ByteArrayAssert; import org.assertj.core.description.Description; import ubic.gemma.cli.util.CLI; -import ubic.gemma.cli.util.ShellUtils; +import ubic.gemma.util.ShellUtils; import ubic.gemma.cli.util.TestCliContext; import java.io.*; diff --git a/gemma-core/src/main/java/ubic/gemma/core/analysis/sequence/RepeatScan.java b/gemma-core/src/main/java/ubic/gemma/core/analysis/sequence/RepeatScan.java index ca63c82697..c7a09777f7 100644 --- a/gemma-core/src/main/java/ubic/gemma/core/analysis/sequence/RepeatScan.java +++ b/gemma-core/src/main/java/ubic/gemma/core/analysis/sequence/RepeatScan.java @@ -27,6 +27,7 @@ import ubic.gemma.core.profiling.StopWatchUtils; import ubic.gemma.model.genome.Taxon; import ubic.gemma.model.genome.biosequence.BioSequence; +import ubic.gemma.util.ShellUtils; import java.io.BufferedReader; import java.io.File; @@ -34,7 +35,10 @@ import java.nio.charset.StandardCharsets; import java.nio.file.Files; import java.nio.file.Path; -import java.util.*; +import java.util.Collection; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Map; import java.util.concurrent.TimeUnit; /** @@ -167,7 +171,7 @@ private Path execRepeatMasker( Path querySequenceFile, Taxon taxon ) throws IOEx "-species", taxon.getCommonName(), // FIXME use -dir option to put output where we want; see https://github.com/PavlidisLab/Gemma/issues/53; querySequenceFile.toString() }; - RepeatScan.log.info( "Running RepeatMasker like this: " + Arrays.toString( cmd ) ); + RepeatScan.log.info( "Running RepeatMasker like this: " + ShellUtils.join( cmd ) ); final Process run = new ProcessBuilder( cmd ) // to ensure that we aren't left waiting for these streams diff --git a/gemma-core/src/main/java/ubic/gemma/core/analysis/sequence/ShellDelegatingBlat.java b/gemma-core/src/main/java/ubic/gemma/core/analysis/sequence/ShellDelegatingBlat.java index 428c1a6ad2..75c042478a 100644 --- a/gemma-core/src/main/java/ubic/gemma/core/analysis/sequence/ShellDelegatingBlat.java +++ b/gemma-core/src/main/java/ubic/gemma/core/analysis/sequence/ShellDelegatingBlat.java @@ -34,6 +34,7 @@ import ubic.gemma.model.genome.Taxon; import ubic.gemma.model.genome.biosequence.BioSequence; import ubic.gemma.model.genome.sequenceAnalysis.BlatResult; +import ubic.gemma.util.ShellUtils; import javax.annotation.Nullable; import java.io.BufferedWriter; @@ -277,7 +278,7 @@ public synchronized void startServer( BlattableGenome genome, boolean sensitive, } String[] cmd = ArrayUtils.addAll( new String[] { gfServerExe, "-stepSize=" + STEPSIZE, "start", this.host, String.valueOf( port ) }, this.getSeqFiles( genome ) ); - ShellDelegatingBlat.log.info( "Starting gfServer with command " + String.join( " ", cmd ) + "..." ); + ShellDelegatingBlat.log.info( "Starting gfServer with command: " + ShellUtils.join( cmd ) ); this.serverProcess = new ProcessBuilder( cmd ) .directory( seqDir.toFile() ) .redirectOutput( ProcessBuilder.Redirect.INHERIT ) @@ -431,7 +432,7 @@ private List execGfClient( Path querySequenceFile, Path outputPath, final String[] cmd = new String[] { gfClientExe, "-nohead", "-minScore=" + ShellDelegatingBlat.MIN_SCORE, host, String.valueOf( portToUse ), seqDir.toString(), querySequenceFile.toString(), outputPath.toString() }; - ShellDelegatingBlat.log.info( String.join( " ", cmd ) ); + ShellDelegatingBlat.log.info( ShellUtils.join( cmd ) ); final Process run = new ProcessBuilder( cmd ) // to ensure that we aren't left waiting for these streams // TODO: switch to Redirect.DISCARD for Java 9+ diff --git a/gemma-core/src/main/java/ubic/gemma/core/loader/expression/AffyPowerToolsProbesetSummarize.java b/gemma-core/src/main/java/ubic/gemma/core/loader/expression/AffyPowerToolsProbesetSummarize.java index c4ccd84d7e..17c6f68892 100644 --- a/gemma-core/src/main/java/ubic/gemma/core/loader/expression/AffyPowerToolsProbesetSummarize.java +++ b/gemma-core/src/main/java/ubic/gemma/core/loader/expression/AffyPowerToolsProbesetSummarize.java @@ -37,6 +37,7 @@ import ubic.gemma.model.expression.bioAssayData.RawExpressionDataVector; import ubic.gemma.model.expression.designElement.CompositeSequence; import ubic.gemma.model.expression.experiment.ExpressionExperiment; +import ubic.gemma.util.ShellUtils; import javax.annotation.Nullable; import java.io.*; @@ -611,7 +612,7 @@ private Collection tryRun( ExpressionExperiment ee, Arr AffyPowerToolsProbesetSummarize.log.info( "Original platform: " + originalPlatform + "; Target platform (apt-probeset-summarize will be called with): " + targetPlatform ); - AffyPowerToolsProbesetSummarize.log.info( "Running: " + Arrays.toString( cmd ) ); + AffyPowerToolsProbesetSummarize.log.info( "Running: " + ShellUtils.join( cmd ) ); StopWatch overallWatch = new StopWatch(); overallWatch.start(); diff --git a/gemma-core/src/main/java/ubic/gemma/core/loader/genome/SimpleFastaCmd.java b/gemma-core/src/main/java/ubic/gemma/core/loader/genome/SimpleFastaCmd.java index 03e951a7ce..61914555a3 100644 --- a/gemma-core/src/main/java/ubic/gemma/core/loader/genome/SimpleFastaCmd.java +++ b/gemma-core/src/main/java/ubic/gemma/core/loader/genome/SimpleFastaCmd.java @@ -23,6 +23,7 @@ import org.apache.commons.lang3.StringUtils; import ubic.gemma.core.config.Settings; import ubic.gemma.model.genome.biosequence.BioSequence; +import ubic.gemma.util.ShellUtils; import java.io.BufferedWriter; import java.io.IOException; @@ -130,7 +131,7 @@ private Collection getMultiple( Collection keys, String database } String[] command = new String[] { SimpleFastaCmd.FASTA_CMD_EXE, "-long_seqids", "-target_only", "-" + DB_OPTION, database, "-" + ENTRY_BATCH_OPTION, tmp.toString() }; - SimpleFastaCmd.log.info( String.join( " ", command ) ); + SimpleFastaCmd.log.info( ShellUtils.join( command ) ); ProcessBuilder pb = new ProcessBuilder( command ) .redirectOutput( ProcessBuilder.Redirect.PIPE ) .redirectError( ProcessBuilder.Redirect.PIPE ); @@ -152,7 +153,7 @@ private BioSequence getSingle( String key, String database ) throws IOException checkBlastConfig(); String[] command = new String[] { SimpleFastaCmd.FASTA_CMD_EXE, "-long_seqids", "-target_only", "-" + DB_OPTION, database, "-" + QUERY_OPTION, key }; - SimpleFastaCmd.log.info( String.join( " ", command ) ); + SimpleFastaCmd.log.info( ShellUtils.join( command ) ); ProcessBuilder pb = new ProcessBuilder( command ) .redirectOutput( ProcessBuilder.Redirect.PIPE ) .redirectError( ProcessBuilder.Redirect.PIPE ); diff --git a/gemma-cli/src/main/java/ubic/gemma/cli/util/ShellUtils.java b/gemma-core/src/main/java/ubic/gemma/util/ShellUtils.java similarity index 59% rename from gemma-cli/src/main/java/ubic/gemma/cli/util/ShellUtils.java rename to gemma-core/src/main/java/ubic/gemma/util/ShellUtils.java index e2b052fb63..444f20c0f0 100644 --- a/gemma-cli/src/main/java/ubic/gemma/cli/util/ShellUtils.java +++ b/gemma-core/src/main/java/ubic/gemma/util/ShellUtils.java @@ -1,7 +1,16 @@ -package ubic.gemma.cli.util; +package ubic.gemma.util; + +import java.util.Arrays; +import java.util.stream.Collectors; public class ShellUtils { + public static String join( String... args ) { + return Arrays.stream( args ) + .map( ShellUtils::quoteIfNecessary ) + .collect( Collectors.joining( " " ) ); + } + public static String quoteIfNecessary( String s ) { if ( s.contains( "'" ) || s.contains( " " ) || s.contains( "\t" ) || s.contains( "\n" ) ) { return "'" + s.replaceAll( "'", "'\"'\"'" ) diff --git a/gemma-cli/src/test/java/ubic/gemma/cli/util/ShellUtilsTest.java b/gemma-core/src/test/java/ubic/gemma/core/util/ShellUtilsTest.java similarity index 57% rename from gemma-cli/src/test/java/ubic/gemma/cli/util/ShellUtilsTest.java rename to gemma-core/src/test/java/ubic/gemma/core/util/ShellUtilsTest.java index 44215b5aad..64d0c6e973 100644 --- a/gemma-cli/src/test/java/ubic/gemma/cli/util/ShellUtilsTest.java +++ b/gemma-core/src/test/java/ubic/gemma/core/util/ShellUtilsTest.java @@ -1,14 +1,16 @@ -package ubic.gemma.cli.util; +package ubic.gemma.core.util; import org.junit.Test; import static org.junit.Assert.assertEquals; -import static ubic.gemma.cli.util.ShellUtils.quoteIfNecessary; +import static ubic.gemma.util.ShellUtils.join; +import static ubic.gemma.util.ShellUtils.quoteIfNecessary; public class ShellUtilsTest { @Test public void test() { + assertEquals( "a b ' '", join( "a", "b", " " ) ); assertEquals( "' '", quoteIfNecessary( " " ) ); assertEquals( "' a '", quoteIfNecessary( " a " ) ); } From d3b277e9d6c1023205e413eb4bd7d3c8212ef070 Mon Sep 17 00:00:00 2001 From: Guillaume Poirier-Morency Date: Tue, 24 Jun 2025 13:18:19 -0700 Subject: [PATCH 016/129] Add a watch npm script and an IntelliJ configuration for running it --- .../Build_static_assets_continuously.xml | 12 ++++++++++++ gemma-web/src/main/webapp/package.json | 1 + 2 files changed, 13 insertions(+) create mode 100644 .idea/runConfigurations/Build_static_assets_continuously.xml diff --git a/.idea/runConfigurations/Build_static_assets_continuously.xml b/.idea/runConfigurations/Build_static_assets_continuously.xml new file mode 100644 index 0000000000..92b0c5d835 --- /dev/null +++ b/.idea/runConfigurations/Build_static_assets_continuously.xml @@ -0,0 +1,12 @@ + + + + + +