From bbf17bca5c1355d3ab118cc89084f2609f7af116 Mon Sep 17 00:00:00 2001 From: dipayan1985 Date: Mon, 2 Feb 2026 12:00:17 +0000 Subject: [PATCH 1/9] removing not required pipelines, also pointing dev to prod schema store --- agents/pom.xml | 1 - agents/solr/pom.xml | 67 -- .../uk/ac/ebi/biosamples/Application.java | 181 ---- .../biosamples/solr/MessageHandlerSolr.java | 146 --- .../uk/ac/ebi/biosamples/solr/SolrRunner.java | 44 - .../solr/MessageHandlerSolrTest.java | 121 --- .../examples/samples/SAMEA5397449.json | 95 -- k8s/helm/values-primary_dev.yaml | 5 +- .../uk/ac/ebi/biosamples/Application.java | 8 +- .../SampleChecklistComplianceHandlerEVA.java | 2 +- pipelines/ena/pom.xml | 135 --- .../uk/ac/ebi/biosamples/Application.java | 40 - .../ebi/biosamples/ena/EnaImportCallable.java | 299 ------ .../ena/EnaImportCallableFactory.java | 69 -- .../ebi/biosamples/ena/EnaImportRunner.java | 550 ----------- .../ac/ebi/biosamples/ena/SpecialTypes.java | 17 - .../ena/EnaSampleXmlEnhancerTest.java | 224 ----- .../ac/ebi/biosamples/ena/ExampleSamples.java | 906 ------------------ .../biosamples/ena/MockBioSamplesClient.java | 91 -- .../ebi/biosamples/ena/TestApplication.java | 105 -- .../ac/ebi/biosamples/ena/TestConversion.java | 150 --- .../ena/src/test/resources/SRS000121.xml | 117 --- pipelines/export/pom.xml | 48 - .../uk/ac/ebi/biosamples/Application.java | 40 - .../ebi/biosamples/export/ExportRunner.java | 84 -- pipelines/ncbi-ena-link/pom.xml | 6 +- .../ebi/biosamples/ena/NcbiEnaLinkRunner.java | 1 - pipelines/neoexport/pom.xml | 53 - .../uk/ac/ebi/biosamples/Application.java | 146 --- .../biosamples/neoexport/NeoCsvExporter.java | 232 ----- .../neoexport/NeoExportCallable.java | 45 - .../biosamples/neoexport/NeoExportRunner.java | 168 ---- pipelines/pom.xml | 3 +- pipelines/sample-post-release-action/pom.xml | 6 +- 34 files changed, 20 insertions(+), 4185 deletions(-) delete mode 100644 agents/solr/pom.xml delete mode 100644 agents/solr/src/main/java/uk/ac/ebi/biosamples/Application.java delete mode 100644 agents/solr/src/main/java/uk/ac/ebi/biosamples/solr/MessageHandlerSolr.java delete mode 100644 agents/solr/src/main/java/uk/ac/ebi/biosamples/solr/SolrRunner.java delete mode 100644 agents/solr/src/test/java/uk/ac/ebi/biosamples/solr/MessageHandlerSolrTest.java delete mode 100644 agents/solr/src/test/resources/examples/samples/SAMEA5397449.json delete mode 100644 pipelines/ena/pom.xml delete mode 100644 pipelines/ena/src/main/java/uk/ac/ebi/biosamples/Application.java delete mode 100644 pipelines/ena/src/main/java/uk/ac/ebi/biosamples/ena/EnaImportCallable.java delete mode 100644 pipelines/ena/src/main/java/uk/ac/ebi/biosamples/ena/EnaImportCallableFactory.java delete mode 100644 pipelines/ena/src/main/java/uk/ac/ebi/biosamples/ena/EnaImportRunner.java delete mode 100644 pipelines/ena/src/main/java/uk/ac/ebi/biosamples/ena/SpecialTypes.java delete mode 100644 pipelines/ena/src/test/java/uk/ac/ebi/biosamples/ena/EnaSampleXmlEnhancerTest.java delete mode 100644 pipelines/ena/src/test/java/uk/ac/ebi/biosamples/ena/ExampleSamples.java delete mode 100644 pipelines/ena/src/test/java/uk/ac/ebi/biosamples/ena/MockBioSamplesClient.java delete mode 100644 pipelines/ena/src/test/java/uk/ac/ebi/biosamples/ena/TestApplication.java delete mode 100644 pipelines/ena/src/test/java/uk/ac/ebi/biosamples/ena/TestConversion.java delete mode 100644 pipelines/ena/src/test/resources/SRS000121.xml delete mode 100644 pipelines/export/pom.xml delete mode 100644 pipelines/export/src/main/java/uk/ac/ebi/biosamples/Application.java delete mode 100644 pipelines/export/src/main/java/uk/ac/ebi/biosamples/export/ExportRunner.java delete mode 100644 pipelines/neoexport/pom.xml delete mode 100644 pipelines/neoexport/src/main/java/uk/ac/ebi/biosamples/Application.java delete mode 100644 pipelines/neoexport/src/main/java/uk/ac/ebi/biosamples/neoexport/NeoCsvExporter.java delete mode 100644 pipelines/neoexport/src/main/java/uk/ac/ebi/biosamples/neoexport/NeoExportCallable.java delete mode 100644 pipelines/neoexport/src/main/java/uk/ac/ebi/biosamples/neoexport/NeoExportRunner.java diff --git a/agents/pom.xml b/agents/pom.xml index e9d7201885..7e317b1fbd 100644 --- a/agents/pom.xml +++ b/agents/pom.xml @@ -13,7 +13,6 @@ - solr uploadworkers diff --git a/agents/solr/pom.xml b/agents/solr/pom.xml deleted file mode 100644 index 2c8345d293..0000000000 --- a/agents/solr/pom.xml +++ /dev/null @@ -1,67 +0,0 @@ - -4.0.0 - - agents-solr - jar - - - uk.ac.ebi.biosamples - biosamples - 5.3.15-SNAPSHOT - ../../ - - - - - uk.ac.ebi.biosamples - properties - 5.3.15-SNAPSHOT - - - uk.ac.ebi.biosamples - core - 5.3.15-SNAPSHOT - - - - org.springframework.hateoas - spring-hateoas - 1.3.4 - - - org.springframework.boot - spring-boot-starter-web - - - com.github.ben-manes.caffeine - caffeine - - - org.apache.httpcomponents - httpclient - - - org.apache.httpcomponents - httpclient-cache - - - 4.5.3 - - - - - - - org.springframework.boot - spring-boot-maven-plugin - - - - build-info - - - - - - - diff --git a/agents/solr/src/main/java/uk/ac/ebi/biosamples/Application.java b/agents/solr/src/main/java/uk/ac/ebi/biosamples/Application.java deleted file mode 100644 index 27ba9fdfaa..0000000000 --- a/agents/solr/src/main/java/uk/ac/ebi/biosamples/Application.java +++ /dev/null @@ -1,181 +0,0 @@ -/* -* Copyright 2021 EMBL - European Bioinformatics Institute -* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this -* file except in compliance with the License. You may obtain a copy of the License at -* http://www.apache.org/licenses/LICENSE-2.0 -* Unless required by applicable law or agreed to in writing, software distributed under the -* License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR -* CONDITIONS OF ANY KIND, either express or implied. See the License for the -* specific language governing permissions and limitations under the License. -*/ -package uk.ac.ebi.biosamples; - -import com.fasterxml.jackson.databind.DeserializationFeature; -import com.fasterxml.jackson.databind.ObjectMapper; -import java.util.Collections; -import java.util.List; -import org.apache.http.HeaderElement; -import org.apache.http.HeaderElementIterator; -import org.apache.http.HttpResponse; -import org.apache.http.HttpStatus; -import org.apache.http.client.HttpClient; -import org.apache.http.client.ServiceUnavailableRetryStrategy; -import org.apache.http.client.config.RequestConfig; -import org.apache.http.conn.ConnectionKeepAliveStrategy; -import org.apache.http.impl.client.cache.CacheConfig; -import org.apache.http.impl.client.cache.CachingHttpClientBuilder; -import org.apache.http.impl.conn.PoolingHttpClientConnectionManager; -import org.apache.http.message.BasicHeaderElementIterator; -import org.apache.http.protocol.HTTP; -import org.apache.http.protocol.HttpContext; -import org.springframework.amqp.rabbit.config.SimpleRabbitListenerContainerFactory; -import org.springframework.amqp.rabbit.connection.ConnectionFactory; -import org.springframework.boot.SpringApplication; -import org.springframework.boot.autoconfigure.SpringBootApplication; -import org.springframework.boot.autoconfigure.amqp.SimpleRabbitListenerContainerFactoryConfigurer; -import org.springframework.boot.web.client.RestTemplateCustomizer; -import org.springframework.context.annotation.Bean; -import org.springframework.hateoas.MediaTypes; -import org.springframework.hateoas.RepresentationModel; -import org.springframework.hateoas.mediatype.hal.Jackson2HalModule; -import org.springframework.hateoas.server.mvc.TypeConstrainedMappingJackson2HttpMessageConverter; -import org.springframework.http.client.HttpComponentsClientHttpRequestFactory; -import org.springframework.http.converter.HttpMessageConverter; -import org.springframework.http.converter.json.MappingJackson2HttpMessageConverter; -import org.springframework.scheduling.annotation.EnableAsync; -import org.springframework.scheduling.annotation.EnableScheduling; -import org.springframework.web.client.RestTemplate; - -@SpringBootApplication -@EnableAsync -@EnableScheduling -public class Application { - @Bean - public RestTemplate restTemplate() { - final RestTemplate restTemplate = new RestTemplate(); - getRestTemplateCustomizer().customize(restTemplate); - return restTemplate; - } - - public static void main(final String[] args) { - System.exit(SpringApplication.exit(SpringApplication.run(Application.class, args))); - } - - @Bean - public RestTemplateCustomizer getRestTemplateCustomizer() { - return restTemplate -> { - - // use a keep alive strategy to try to make it easier to maintain connections for - // reuse - final ConnectionKeepAliveStrategy keepAliveStrategy = - (response, context) -> { - - // check if there is a non-standard keep alive header present - final HeaderElementIterator it = - new BasicHeaderElementIterator(response.headerIterator(HTTP.CONN_KEEP_ALIVE)); - while (it.hasNext()) { - final HeaderElement he = it.nextElement(); - final String param = he.getName(); - final String value = he.getValue(); - if (value != null && param.equalsIgnoreCase("timeout")) { - return Long.parseLong(value) * 1000; - } - } - // default to 15s if no header - return 15 * 1000; - }; - - // set a number of connections to use at once for multiple threads - final PoolingHttpClientConnectionManager poolingHttpClientConnectionManager = - new PoolingHttpClientConnectionManager(); - poolingHttpClientConnectionManager.setMaxTotal(8); - poolingHttpClientConnectionManager.setDefaultMaxPerRoute(8); - - // set a local cache for cacheable responses - // TODO application.properties this - final CacheConfig cacheConfig = - CacheConfig.custom() - .setMaxCacheEntries(1024) - .setMaxObjectSize(1024 * 1024) // max size of 1Mb - // number of entries x size of entries = 1Gb total cache size - .setSharedCache(false) // act like a browser cache not a middle-hop cache - .build(); - - // set a timeout limit - final int timeout = 60000; - final RequestConfig config = - RequestConfig.custom() - .setConnectTimeout(timeout) // time to establish the connection with the remote - // host - .setConnectionRequestTimeout(timeout) // maximum time of inactivity between two data - // packets - .setSocketTimeout(timeout) - .build(); // time to wait for a connection from the connection - // manager/pool - - // set retry strategy to retry on any 5xx error - // ebi load balancers return a 500 error when a service is unavaliable not a 503 - final ServiceUnavailableRetryStrategy serviceUnavailStrategy = - new ServiceUnavailableRetryStrategy() { - - @Override - public boolean retryRequest( - final HttpResponse response, final int executionCount, final HttpContext context) { - final int maxRetries = 100; - return executionCount <= maxRetries - && (response.getStatusLine().getStatusCode() == HttpStatus.SC_SERVICE_UNAVAILABLE - || response.getStatusLine().getStatusCode() - == HttpStatus.SC_INTERNAL_SERVER_ERROR); - } - - @Override - public long getRetryInterval() { - // measured in milliseconds - return 1000; - } - }; - - // make the actual client - final HttpClient httpClient = - CachingHttpClientBuilder.create() - .setCacheConfig(cacheConfig) - .useSystemProperties() - .setConnectionManager(poolingHttpClientConnectionManager) - .setKeepAliveStrategy(keepAliveStrategy) - .setServiceUnavailableRetryStrategy(serviceUnavailStrategy) - .setDefaultRequestConfig(config) - .build(); - - // and wire it into the resttemplate - restTemplate.setRequestFactory(new HttpComponentsClientHttpRequestFactory(httpClient)); - - // make sure there is a application/hal+json converter - // traverson will make its own but not if we want to customize the resttemplate in - // any way - // (e.g. caching) - final List> converters = restTemplate.getMessageConverters(); - final ObjectMapper mapper = new ObjectMapper(); - mapper.registerModule(new Jackson2HalModule()); - mapper.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false); - final MappingJackson2HttpMessageConverter halConverter = - new TypeConstrainedMappingJackson2HttpMessageConverter(RepresentationModel.class); - halConverter.setObjectMapper(mapper); - halConverter.setSupportedMediaTypes(Collections.singletonList(MediaTypes.HAL_JSON)); - // make sure this is inserted first - converters.add(0, halConverter); - restTemplate.setMessageConverters(converters); - }; - } - - @Bean("biosamplesAgentSolrContainerFactory") - public SimpleRabbitListenerContainerFactory containerFactory( - final SimpleRabbitListenerContainerFactoryConfigurer configurer, - final ConnectionFactory connectionFactory) { - final SimpleRabbitListenerContainerFactory factory = new SimpleRabbitListenerContainerFactory(); - factory.setConcurrentConsumers(32); - factory.setMaxConcurrentConsumers(64); - configurer.configure(factory, connectionFactory); - - return factory; - } -} diff --git a/agents/solr/src/main/java/uk/ac/ebi/biosamples/solr/MessageHandlerSolr.java b/agents/solr/src/main/java/uk/ac/ebi/biosamples/solr/MessageHandlerSolr.java deleted file mode 100644 index 444a54bd01..0000000000 --- a/agents/solr/src/main/java/uk/ac/ebi/biosamples/solr/MessageHandlerSolr.java +++ /dev/null @@ -1,146 +0,0 @@ -/* -* Copyright 2021 EMBL - European Bioinformatics Institute -* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this -* file except in compliance with the License. You may obtain a copy of the License at -* http://www.apache.org/licenses/LICENSE-2.0 -* Unless required by applicable law or agreed to in writing, software distributed under the -* License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR -* CONDITIONS OF ANY KIND, either express or implied. See the License for the -* specific language governing permissions and limitations under the License. -*/ -package uk.ac.ebi.biosamples.solr; - -import java.time.ZoneOffset; -import java.time.ZonedDateTime; -import java.time.format.DateTimeFormatter; -import java.util.Arrays; -import java.util.List; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; -import org.springframework.amqp.rabbit.annotation.RabbitListener; -import org.springframework.stereotype.Service; -import uk.ac.ebi.biosamples.core.model.Attribute; -import uk.ac.ebi.biosamples.core.model.Sample; -import uk.ac.ebi.biosamples.messaging.MessagingConstants; -import uk.ac.ebi.biosamples.messaging.model.MessageContent; -import uk.ac.ebi.biosamples.solr.model.SolrSample; -import uk.ac.ebi.biosamples.solr.repo.SolrSampleRepository; -import uk.ac.ebi.biosamples.solr.service.SampleToSolrSampleConverter; - -@Service -public class MessageHandlerSolr { - private static final Logger LOGGER = LoggerFactory.getLogger(MessageHandlerSolr.class); - private static final List INDEXABLE_STATUSES = - Arrays.asList( - "private", - "public", - "live", - "suppressed", - "killed", - "temporary_suppressed", - "temporary_killed"); - - private final SolrSampleRepository repository; - private final SampleToSolrSampleConverter sampleToSolrSampleConverter; - - public MessageHandlerSolr( - final SolrSampleRepository repository, - final SampleToSolrSampleConverter sampleToSolrSampleConverter) { - this.repository = repository; - this.sampleToSolrSampleConverter = sampleToSolrSampleConverter; - } - - @RabbitListener( - queues = MessagingConstants.INDEXING_QUEUE, - containerFactory = "biosamplesAgentSolrContainerFactory") - public void handleIndexing(final MessageContent messageContent) { - handle(messageContent); - } - - @RabbitListener( - queues = MessagingConstants.REINDEXING_QUEUE, - containerFactory = "biosamplesAgentSolrContainerFactory") - public void handleReindexing(final MessageContent messageContent) { - handle(messageContent); - } - - private void handle(final MessageContent messageContent) { - if (messageContent.getSample() == null) { - LOGGER.warn("received message without sample"); - - return; - } - - final Sample sample = messageContent.getSample(); - - handleSample(sample, messageContent.getCreationTime()); - - for (final Sample related : messageContent.getRelated()) { - handleSample(related, messageContent.getCreationTime()); - } - } - - private void handleSample(final Sample sample, final String modifiedTime) { - final String accession = sample.getAccession(); - - if (isIndexingCandidate(sample)) { - try { - SolrSample solrSample = sampleToSolrSampleConverter.convert(sample); - // add the modified time to the solrSample - final String indexedTime = - ZonedDateTime.now(ZoneOffset.UTC).format(DateTimeFormatter.ISO_INSTANT); - - assert solrSample != null; - - solrSample = - SolrSample.build( - solrSample.getName(), - solrSample.getAccession(), - solrSample.getDomain(), - solrSample.getWebinSubmissionAcccountId(), - solrSample.getStatus(), - solrSample.getRelease(), - solrSample.getUpdate(), - modifiedTime, - indexedTime, - solrSample.getAttributeValues(), - solrSample.getAttributeIris(), - solrSample.getAttributeUnits(), - solrSample.getOutgoingRelationships(), - solrSample.getIncomingRelationships(), - solrSample.getExternalReferencesData(), - solrSample.getKeywords()); - - repository.saveWithoutCommit(solrSample); - - LOGGER.info(String.format("added %s to index", accession)); - } catch (final Exception e) { - LOGGER.error("failed to index " + accession, e); - - throw e; - } - } else { - if (repository.existsById(accession)) { - repository.deleteById(accession); - LOGGER.info(String.format("removed %s from index", accession)); - } - } - } - - static boolean isIndexingCandidate(final Sample sample) { - for (final Attribute attribute : sample.getAttributes()) { - if (attribute.getType().equals("INSDC status")) { - if (!INDEXABLE_STATUSES.contains(attribute.getValue())) { - LOGGER.debug( - String.format( - "not indexing %s as INSDC status is %s", - sample.getAccession(), attribute.getValue())); - - return false; - } - } - } - - return true; - } -} diff --git a/agents/solr/src/main/java/uk/ac/ebi/biosamples/solr/SolrRunner.java b/agents/solr/src/main/java/uk/ac/ebi/biosamples/solr/SolrRunner.java deleted file mode 100644 index 9e449c844e..0000000000 --- a/agents/solr/src/main/java/uk/ac/ebi/biosamples/solr/SolrRunner.java +++ /dev/null @@ -1,44 +0,0 @@ -/* -* Copyright 2021 EMBL - European Bioinformatics Institute -* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this -* file except in compliance with the License. You may obtain a copy of the License at -* http://www.apache.org/licenses/LICENSE-2.0 -* Unless required by applicable law or agreed to in writing, software distributed under the -* License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR -* CONDITIONS OF ANY KIND, either express or implied. See the License for the -* specific language governing permissions and limitations under the License. -*/ -package uk.ac.ebi.biosamples.solr; - -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; -import org.springframework.beans.factory.annotation.Autowired; -import org.springframework.boot.ApplicationArguments; -import org.springframework.boot.ApplicationRunner; -import org.springframework.stereotype.Component; -import uk.ac.ebi.biosamples.BioSamplesProperties; -import uk.ac.ebi.biosamples.messaging.MessagingConstants; -import uk.ac.ebi.biosamples.messaging.service.MessageUtils; - -@Component -public class SolrRunner implements ApplicationRunner { - private final Logger log = LoggerFactory.getLogger(getClass()); - @Autowired private MessageUtils messageUtils; - @Autowired private BioSamplesProperties biosamplesProperties; - - @Override - public void run(final ApplicationArguments args) throws Exception { - // as long as there are messages to read, keep this thread alive - // that will also keep the async message client alive too? - Long messageCount = null; - - while (biosamplesProperties.getAgentSolrStayalive() - || messageCount == null - || messageCount > 0) { - Thread.sleep(1000); - messageCount = messageUtils.getQueueCount(MessagingConstants.INDEXING_QUEUE); - - log.trace("Messages remaining in " + MessagingConstants.INDEXING_QUEUE + " " + messageCount); - } - } -} diff --git a/agents/solr/src/test/java/uk/ac/ebi/biosamples/solr/MessageHandlerSolrTest.java b/agents/solr/src/test/java/uk/ac/ebi/biosamples/solr/MessageHandlerSolrTest.java deleted file mode 100644 index 943ad5081d..0000000000 --- a/agents/solr/src/test/java/uk/ac/ebi/biosamples/solr/MessageHandlerSolrTest.java +++ /dev/null @@ -1,121 +0,0 @@ -/* -* Copyright 2021 EMBL - European Bioinformatics Institute -* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this -* file except in compliance with the License. You may obtain a copy of the License at -* http://www.apache.org/licenses/LICENSE-2.0 -* Unless required by applicable law or agreed to in writing, software distributed under the -* License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR -* CONDITIONS OF ANY KIND, either express or implied. See the License for the -* specific language governing permissions and limitations under the License. -*/ -package uk.ac.ebi.biosamples.solr; - -import static junit.framework.TestCase.assertTrue; -import static org.junit.Assert.assertFalse; - -import com.fasterxml.jackson.databind.DeserializationFeature; -import com.fasterxml.jackson.databind.ObjectMapper; -import java.time.Instant; -import java.util.*; -import org.junit.Test; -import org.springframework.hateoas.MediaTypes; -import org.springframework.hateoas.RepresentationModel; -import org.springframework.hateoas.mediatype.hal.Jackson2HalModule; -import org.springframework.hateoas.server.mvc.TypeConstrainedMappingJackson2HttpMessageConverter; -import org.springframework.http.converter.HttpMessageConverter; -import org.springframework.http.converter.json.MappingJackson2HttpMessageConverter; -import org.springframework.web.client.RestTemplate; -import uk.ac.ebi.biosamples.core.model.Attribute; -import uk.ac.ebi.biosamples.core.model.Sample; -import uk.ac.ebi.biosamples.core.model.SampleStatus; - -public class MessageHandlerSolrTest { - - @Test - public void should_index_public_sample() { - final Attribute attribute = Attribute.build("INSDC status", "public"); - assertTrue( - MessageHandlerSolr.isIndexingCandidate( - generateTestSample("public-example", Collections.singletonList(attribute)))); - } - - @Test - public void should_index_live_sample() { - final Attribute attribute = Attribute.build("INSDC status", "live"); - assertTrue( - MessageHandlerSolr.isIndexingCandidate( - (generateTestSample("live-example", Collections.singletonList(attribute))))); - } - - @Test - public void should_index_sample_with_no_INSDC_status() { - assertTrue( - MessageHandlerSolr.isIndexingCandidate( - (generateTestSample("no-example", Collections.EMPTY_LIST)))); - } - - @Test - public void should_index_SAMEA5397449_sample_with_no_INSDC_status() throws Exception { - final String filePath = "/examples/samples/SAMEA5397449.json"; - final ObjectMapper objectMapper = getObjectMapper(); - final Sample sample = - objectMapper.readValue( - MessageHandlerSolrTest.class.getResourceAsStream(filePath), Sample.class); - assertTrue(MessageHandlerSolr.isIndexingCandidate(sample)); - } - - private ObjectMapper getObjectMapper() { - final RestTemplate restTemplate = new RestTemplate(); - final List> converters = restTemplate.getMessageConverters(); - final ObjectMapper mapper = new ObjectMapper(); - mapper.registerModule(new Jackson2HalModule()); - mapper.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false); - final MappingJackson2HttpMessageConverter halConverter = - new TypeConstrainedMappingJackson2HttpMessageConverter(RepresentationModel.class); - halConverter.setObjectMapper(mapper); - halConverter.setSupportedMediaTypes(Arrays.asList(MediaTypes.HAL_JSON)); - // make sure this is inserted first - converters.add(0, halConverter); - restTemplate.setMessageConverters(converters); - return mapper; - } - - @Test - public void should_not_index_suppressed_sample() { - final Attribute attribute = Attribute.build("INSDC status", "suppressed"); - assertTrue( - MessageHandlerSolr.isIndexingCandidate( - (generateTestSample("suppressed-example", Collections.singletonList(attribute))))); - } - - @Test - public void should_not_index_sample_with_unexpected_INSDC_status() { - final Attribute attribute = Attribute.build("INSDC status", "gertgerge"); - assertFalse( - MessageHandlerSolr.isIndexingCandidate( - (generateTestSample("unexpected-example", Collections.singletonList(attribute))))); - } - - private Sample generateTestSample(final String accession, final List attributes) { - final Set attributeSet = new HashSet<>(); - for (final Attribute attribute : attributes) { - attributeSet.add(attribute); - } - return Sample.build( - "", - accession, - "ERS01", - "", - "", - Long.valueOf(9606), - SampleStatus.PUBLIC, - Instant.now(), - Instant.now(), - Instant.now(), - Instant.now(), - Instant.now(), - attributeSet, - Collections.emptySet(), - Collections.emptySet()); - } -} diff --git a/agents/solr/src/test/resources/examples/samples/SAMEA5397449.json b/agents/solr/src/test/resources/examples/samples/SAMEA5397449.json deleted file mode 100644 index 648c922e22..0000000000 --- a/agents/solr/src/test/resources/examples/samples/SAMEA5397449.json +++ /dev/null @@ -1,95 +0,0 @@ -{ - "name" : "ExomeCapture-DAS5-003070", - "accession" : "SAMEA5397449", - "domain" : "self.77970ea195e14caef9030efc43314480b5682cebe9ba202834350a22177a89ee", - "release" : "2019-01-24T00:00:00Z", - "update" : "2019-01-24T10:08:05.111Z", - "taxId" : 4565, - "characteristics" : { - "Common Name" : [ { - "text" : "bread wheat" - } ], - "Material" : [ { - "text" : "173314" - } ], - "Organism" : [ { - "text" : "Triticum aestivum subsp. aestivum", - "ontologyTerms" : [ "4565" ] - } ], - "Scientific Name" : [ { - "text" : "Triticum aestivum subsp. aestivum" - } ], - "Submission description" : [ { - "text" : "The exome sequenced hexaploid wheat accessions were selected from a worldwide population of landraces and cultivars obtained from the USDA National Small Grains Collection and the Australian Grains Genebank (formally the Australian Winter Cereal Collection). The worldwide population comprised 3990 accessions from 106 countries that were genotyped using the Infinium iSelect 90K SNP wheat bead chip array. The MCG method was used to select accessions for exome sequencing. This method uses a genetic relationship matrix calculated from genotype data to iteratively chose and arbitrary number of accessions (in the case of this study 20% of the accessions) that collectively explain the largest proportion of variance in genetic relationships among the whole set. Genomic DNA was extracted from leaf tissue for each accession using the Agencourt DNAdvance Genomic DNA Isolation Kit (Beckman Coulter) and subjected to sequence capture using the NimbleGen SeqCap EZ wheat whole genome assay. In brief, 1 µg of DNA was fragmented with the Covaris S2 instrument to obtain an average fragment length of 300 bp. Shorter fragments were removed using Agencourt AMPure XP beads (Beckman Coulter). The KAPA Library Preparation Kit (Kapa Biosystems) and adapters supplied in the NimbleGen SeqCap EZ Reagent Kit Plus v2 (Roche) were used to prepare sample libraries for the capture assay, according to the KAPA protocol but excluding steps 8.1-8.22. The quality and yield of each sample library was assessed using an Agilent 2200 TapeStation (Agilent Technologies) before proceeding to the sequence capture assay following the Roche protocol. Each target capture sequence enriched sample library was sequenced on an Illumina HiSeq2000 instrument (Illumina) to generate about 30 million reads per accession. The project leaders are Matthew Hayden (Agriculture Victoria, AgriBio, Centre for AgriBioscience, Bundoora, Victoria, Australia; La Trobe University, Bundoora, Victoria, Australia) and Eduard Akhunov (Kansas State University, USA). More information can be found on our project website at http://wheatgenomics.plantpath.ksu.edu/1000EC" - } ], - "Submission identifier" : [ { - "text" : "GSB-804" - } ], - "Submission title" : [ { - "text" : "Exome sequencing of diverse collection of wheat landraces, cultivars and breeding lines" - } ], - "collection_date" : [ { - "text" : "Watkins 1920-30s" - } ], - "cultivar" : [ { - "text" : "SRM NOGAL" - } ], - "culture_collection" : [ { - "text" : "AWCC" - } ], - "description" : [ { - "text" : "exome sequences of ExomeCapture-DAS5-003070 - Accession Name = 'SRM NOGAL'; Remarks = '.'; Released = '.'; Lattitude,Longitude,Elevation = '.,.,.'; Ancestry = '.'" - } ], - "dev_stage" : [ { - "text" : "2 week old" - } ], - "geographic location (country and/or sea)" : [ { - "text" : "Argentina" - } ], - "specimen_voucher" : [ { - "text" : "AWCC:173314" - } ], - "sub_species" : [ { - "text" : "subsp. aestivum" - } ], - "tissue_type" : [ { - "text" : "leaf" - } ], - "variety" : [ { - "text" : "Aus38861" - } ] - }, - "relationships" : [ { - "source" : "SAMEG4750776", - "type" : "has member", - "target" : "SAMEA5397449" - } ], - "releaseDate" : "2019-01-24", - "updateDate" : "2019-01-24", - "organization" : [ { - "Name" : "Eduard Akhunov", - "Address" : "1712 Claflin Road, Department of Plant Pathology, Kansas State University, Manhattan, KS 66506, USA" - } ], - "contact" : [ { - "FirstName" : "Fei", - "LastName" : "He", - "E-mail" : "feihe@ksu.edu", - "Name" : "Fei He" - } ], - "_links" : { - "self" : { - "href" : "https://www.ebi.ac.uk/biosamples/samples/SAMEA5397449" - }, - "curationDomain" : { - "href" : "https://www.ebi.ac.uk/biosamples/samples/SAMEA5397449{?curationdomain}", - "templated" : true - }, - "curationLinks" : { - "href" : "https://www.ebi.ac.uk/biosamples/samples/SAMEA5397449/curationlinks" - }, - "curationLink" : { - "href" : "https://www.ebi.ac.uk/biosamples/samples/SAMEA5397449/curationlinks/{hash}", - "templated" : true - } - } -} \ No newline at end of file diff --git a/k8s/helm/values-primary_dev.yaml b/k8s/helm/values-primary_dev.yaml index fa65361164..1610868ff1 100644 --- a/k8s/helm/values-primary_dev.yaml +++ b/k8s/helm/values-primary_dev.yaml @@ -3,4 +3,7 @@ rabbitmq: biosamples: search: host: biosamples-search-helm - port: 9090 \ No newline at end of file + port: 9090 + schema: + store: + url: https://www.ebi.ac.uk/biosamples/schema-store diff --git a/pipelines/chain/src/main/java/uk/ac/ebi/biosamples/Application.java b/pipelines/chain/src/main/java/uk/ac/ebi/biosamples/Application.java index eec9c94d58..b8e05103c6 100644 --- a/pipelines/chain/src/main/java/uk/ac/ebi/biosamples/Application.java +++ b/pipelines/chain/src/main/java/uk/ac/ebi/biosamples/Application.java @@ -36,6 +36,7 @@ import uk.ac.ebi.biosamples.service.EnaConfig; import uk.ac.ebi.biosamples.service.EnaSampleToBioSampleConversionService; import uk.ac.ebi.biosamples.service.EraProDao; +import uk.ac.ebi.biosamples.service.PipelineHelperService; import uk.ac.ebi.biosamples.utils.PipelineUtils; @SpringBootApplication(exclude = DataSourceAutoConfiguration.class) @@ -43,7 +44,12 @@ excludeFilters = { @ComponentScan.Filter( type = FilterType.ASSIGNABLE_TYPE, - value = {EnaConfig.class, EraProDao.class, EnaSampleToBioSampleConversionService.class}) + value = { + EnaConfig.class, + EraProDao.class, + EnaSampleToBioSampleConversionService.class, + PipelineHelperService.class + }) }) @Import(ExclusionConfiguration.class) @EnableCaching diff --git a/pipelines/chain/src/main/java/uk/ac/ebi/biosamples/helpdesk/services/SampleChecklistComplianceHandlerEVA.java b/pipelines/chain/src/main/java/uk/ac/ebi/biosamples/helpdesk/services/SampleChecklistComplianceHandlerEVA.java index 8b0082ce29..e0f7c2accc 100644 --- a/pipelines/chain/src/main/java/uk/ac/ebi/biosamples/helpdesk/services/SampleChecklistComplianceHandlerEVA.java +++ b/pipelines/chain/src/main/java/uk/ac/ebi/biosamples/helpdesk/services/SampleChecklistComplianceHandlerEVA.java @@ -165,7 +165,7 @@ private void handleGeographicLocationAndCollectionDate(final Sample sample) { } public void updateSamnSampleGeographicLocationFromFile() { - final Pattern pattern = Pattern.compile("SAMN\\d+"); + final Pattern pattern = Pattern.compile("(SAMN|SAMD)\\d+"); final Set samnAccessions = new HashSet<>(); try (BufferedReader reader = diff --git a/pipelines/ena/pom.xml b/pipelines/ena/pom.xml deleted file mode 100644 index cbc018af45..0000000000 --- a/pipelines/ena/pom.xml +++ /dev/null @@ -1,135 +0,0 @@ - - 4.0.0 - - pipelines-ena - jar - - - uk.ac.ebi.biosamples - biosamples - 5.3.10-SNAPSHOT - ../../ - - - - - uk.ac.ebi.biosamples - pipelines-common - 5.3.10-SNAPSHOT - - - uk.ac.ebi.biosamples - core - 5.3.10-SNAPSHOT - - - org.springframework.hateoas - spring-hateoas - 1.3.4 - - - org.springframework.boot - spring-boot-starter-jdbc - - - org.apache.tomcat - tomcat-jdbc - - - - - - - com.zaxxer - HikariCP - 3.3.1 - - - - - dom4j - dom4j - 1.6.1 - - - jaxen - jaxen - - - xmlunit - xmlunit - 1.4 - - - - xerces - xercesImpl - 2.12.2 - - - - - - com.oracle.jdbc - ojdbc8 - 12.2.0.1 - - - com.oracle.jdbc - xdb6 - 12.2.0.1 - - - com.oracle.jdbc - xmlparserv2 - 19.3.0.0 - - - - - com.fasterxml.jackson.dataformat - jackson-dataformat-csv - 2.8.8 - - - org.apache.poi - poi - 5.1.0 - - - - org.apache.poi - poi-ooxml - 5.1.0 - - - - - - maven.oracle.com - oracle-maven-repo - https://maven.oracle.com/public - - - - - - - - - - - org.springframework.boot - spring-boot-maven-plugin - - - - build-info - - - - - - - diff --git a/pipelines/ena/src/main/java/uk/ac/ebi/biosamples/Application.java b/pipelines/ena/src/main/java/uk/ac/ebi/biosamples/Application.java deleted file mode 100644 index 8ed672cb84..0000000000 --- a/pipelines/ena/src/main/java/uk/ac/ebi/biosamples/Application.java +++ /dev/null @@ -1,40 +0,0 @@ -/* -* Copyright 2021 EMBL - European Bioinformatics Institute -* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this -* file except in compliance with the License. You may obtain a copy of the License at -* http://www.apache.org/licenses/LICENSE-2.0 -* Unless required by applicable law or agreed to in writing, software distributed under the -* License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR -* CONDITIONS OF ANY KIND, either express or implied. See the License for the -* specific language governing permissions and limitations under the License. -*/ -package uk.ac.ebi.biosamples; - -import org.springframework.boot.SpringApplication; -import org.springframework.boot.autoconfigure.SpringBootApplication; -import org.springframework.cache.annotation.EnableCaching; -import org.springframework.context.ConfigurableApplicationContext; -import org.springframework.context.annotation.Bean; -import org.springframework.context.support.PropertySourcesPlaceholderConfigurer; -import org.springframework.scheduling.annotation.EnableAsync; -import org.springframework.scheduling.annotation.EnableScheduling; -import uk.ac.ebi.biosamples.utils.PipelineUtils; - -@SpringBootApplication -@EnableCaching(proxyTargetClass = true) -@EnableAsync -@EnableScheduling -public class Application { - - // this is needed to read non-strings from properties files - // must be static for lifecycle reasons - @Bean - public static PropertySourcesPlaceholderConfigurer getPropertySourcesPlaceholderConfigurer() { - return new PropertySourcesPlaceholderConfigurer(); - } - - public static void main(final String[] args) { - final ConfigurableApplicationContext ctx = SpringApplication.run(Application.class, args); - PipelineUtils.exitPipeline(ctx); - } -} diff --git a/pipelines/ena/src/main/java/uk/ac/ebi/biosamples/ena/EnaImportCallable.java b/pipelines/ena/src/main/java/uk/ac/ebi/biosamples/ena/EnaImportCallable.java deleted file mode 100644 index 0f6170ae93..0000000000 --- a/pipelines/ena/src/main/java/uk/ac/ebi/biosamples/ena/EnaImportCallable.java +++ /dev/null @@ -1,299 +0,0 @@ -/* -* Copyright 2021 EMBL - European Bioinformatics Institute -* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this -* file except in compliance with the License. You may obtain a copy of the License at -* http://www.apache.org/licenses/LICENSE-2.0 -* Unless required by applicable law or agreed to in writing, software distributed under the -* License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR -* CONDITIONS OF ANY KIND, either express or implied. See the License for the -* specific language governing permissions and limitations under the License. -*/ -package uk.ac.ebi.biosamples.ena; - -import static uk.ac.ebi.biosamples.BioSamplesConstants.SRA_ACCESSION; - -import java.util.Objects; -import java.util.Optional; -import java.util.Set; -import java.util.concurrent.Callable; -import org.dom4j.DocumentException; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; -import org.springframework.hateoas.EntityModel; -import uk.ac.ebi.biosamples.BioSamplesConstants; -import uk.ac.ebi.biosamples.client.BioSamplesClient; -import uk.ac.ebi.biosamples.model.Attribute; -import uk.ac.ebi.biosamples.model.Sample; -import uk.ac.ebi.biosamples.model.SampleStatus; -import uk.ac.ebi.biosamples.service.EnaSampleToBioSampleConversionService; -import uk.ac.ebi.biosamples.service.EraProDao; -import uk.ac.ebi.biosamples.service.EraproSample; - -public class EnaImportCallable implements Callable { - private final Logger log = LoggerFactory.getLogger(getClass()); - private final BioSamplesClient bioSamplesWebinClient; - private final BioSamplesClient bioSamplesAapClient; - private final EnaSampleToBioSampleConversionService enaSampleToBioSampleConversionService; - private final EraProDao eraProDao; - private final SpecialTypes specialTypes; - private final String accession; - - EnaImportCallable( - final String accession, - final BioSamplesClient bioSamplesWebinClient, - final BioSamplesClient bioSamplesAapClient, - final EnaSampleToBioSampleConversionService enaSampleToBioSampleConversionService, - final EraProDao eraProDao, - final SpecialTypes specialTypes) { - this.accession = accession; - this.bioSamplesWebinClient = bioSamplesWebinClient; - this.bioSamplesAapClient = bioSamplesAapClient; - this.enaSampleToBioSampleConversionService = enaSampleToBioSampleConversionService; - this.eraProDao = eraProDao; - this.specialTypes = specialTypes; - } - - @Override - public Void call() throws Exception { - log.info("Handling " + accession); - - if (specialTypes != null - && (specialTypes.equals(SpecialTypes.SUPPRESSED) - || specialTypes.equals(SpecialTypes.KILLED))) { - return handleSuppressedKilledSample(specialTypes); - } - - Sample enaSampleConvertedToBioSample = null; - - try { - SampleToUpdateRequiredPair sampleToUpdateRequiredPair = null; - - if (specialTypes == SpecialTypes.BSD_AUTHORITY) { - sampleToUpdateRequiredPair = buildBsdAuthoritySampleWithSraAccession(accession); - } else { - enaSampleConvertedToBioSample = - enaSampleToBioSampleConversionService.enrichSample(accession); - } - - boolean success = false; - int numRetry = 0; - - while (!success) { - try { - if (specialTypes == SpecialTypes.BSD_AUTHORITY) { - if (sampleToUpdateRequiredPair.updateRequired) { - final Sample bsdAuthoritySampleWithSraAccession = sampleToUpdateRequiredPair.sample; - - if (bsdAuthoritySampleWithSraAccession != null) { - if (bsdAuthoritySampleWithSraAccession.getDomain() != null) { - bioSamplesAapClient.persistSampleResource(bsdAuthoritySampleWithSraAccession); - } else if (bsdAuthoritySampleWithSraAccession.getWebinSubmissionAccountId() - != null) { - bioSamplesWebinClient.persistSampleResource(bsdAuthoritySampleWithSraAccession); - } else { - throw new RuntimeException( - "Couldn't determine authentication of sample: " + accession); - } - } else { - throw new RuntimeException( - "Failed to fetch BioSample authority sample from BioSamples: " + accession); - } - } - } else { - if (enaSampleConvertedToBioSample != null) { - bioSamplesWebinClient.persistSampleResource(enaSampleConvertedToBioSample); - } else { - throw new RuntimeException("ENA sample converted to BioSample is null: " + accession); - } - } - - success = true; - } catch (final Exception e) { - if (++numRetry == BioSamplesConstants.MAX_RETRIES) { - EnaImportRunner.failures.add(accession); - - throw new RuntimeException("Failed to handle the sample with accession: " + accession); - } - } - } - } catch (final Exception e) { - log.info("Failed to handle ENA sample with accession: " + accession, e); - - throw e; - } - - return null; - } - - private SampleToUpdateRequiredPair buildBsdAuthoritySampleWithSraAccession( - final String accession) { - final Optional> sampleOptionalInBioSamples = - bioSamplesWebinClient.fetchSampleResource(accession, false); - final Sample sampleInBioSamples = - sampleOptionalInBioSamples.map(EntityModel::getContent).orElse(null); - final EraproSample eraproSample = eraProDao.getSampleDetailsByBioSampleId(accession); - final String eraproSampleSampleId = eraproSample.getSampleId(); - boolean sampleSaveRequired = false; - - assert sampleInBioSamples != null; - - final Set attributesInBioSample = sampleInBioSamples.getAttributes(); - - if (attributesInBioSample.stream() - .noneMatch(attribute -> attribute.getType().equals(SRA_ACCESSION))) { - log.info( - "Sample " - + accession - + " doesn't have SRA accession, creating new SRA accession attribute with SAMPLE_ID from ENA"); - - sampleSaveRequired = true; - attributesInBioSample.add(Attribute.build(SRA_ACCESSION, eraproSampleSampleId)); - } else { - final Attribute sraAccessionAttribute = - attributesInBioSample.stream() - .filter(attribute -> attribute.getType().equals(SRA_ACCESSION)) - .findFirst() - .get(); - - if (!Objects.equals(sraAccessionAttribute.getValue(), eraproSampleSampleId)) { - log.info( - "Sample " - + accession - + " has SRA accession mismatch with ENA, this shouldn't happen - investigate"); - - /*sampleSaveRequired = true; - attributesInBioSample.removeIf(attribute -> attribute.getType().equals(SRA_ACCESSION)); - attributesInBioSample.add(Attribute.build(SRA_ACCESSION, eraproSampleSampleId));*/ - // August 12, 2024: dont do anything to these samples, ENA and BSD auth samples shouldn't - // have this mismatch from the end of 2023 - } else { - log.info("Sample " + accession + " has SRA accession match with ENA, no action required"); - } - } - - return new SampleToUpdateRequiredPair( - Sample.Builder.fromSample(sampleInBioSamples).withAttributes(attributesInBioSample).build(), - sampleSaveRequired); - } - - private static class SampleToUpdateRequiredPair { - private final Sample sample; - private final boolean updateRequired; - - private SampleToUpdateRequiredPair(final Sample sample, final boolean updateRequired) { - this.sample = sample; - this.updateRequired = updateRequired; - } - } - - private Void handleSuppressedKilledSample(final SpecialTypes specialTypes) - throws DocumentException { - final Optional> sampleOptionalInBioSamples = - bioSamplesWebinClient.fetchSampleResource(accession, false); - final Sample sampleInBioSamples = - sampleOptionalInBioSamples.map(EntityModel::getContent).orElse(null); - final String statusHandled = specialTypes.name().toLowerCase(); - - if (sampleInBioSamples != null) { - final Set sampleAttributes = sampleInBioSamples.getAttributes(); - final Attribute insdcStatusAttribute = - sampleAttributes.stream() - .filter(attribute -> attribute.getType().equals("INSDC Status")) - .findFirst() - .orElse(null); - - if (insdcStatusAttribute == null) { - log.info( - "Sample exists in BioSamples and INSDC status is not set, adding INSDC status as " - + statusHandled - + " for " - + accession); - - sampleAttributes.add(Attribute.build("INSDC Status", statusHandled)); - - bioSamplesWebinClient.persistSampleResource( - Sample.Builder.fromSample(sampleInBioSamples) - .withAttributes(sampleAttributes) - .withStatus(SampleStatus.valueOf(String.valueOf(specialTypes))) - .build()); - - addToList(specialTypes); - } else if (!insdcStatusAttribute.getValue().equalsIgnoreCase(statusHandled)) { - log.info( - "Sample exists in BioSamples and INSDC status is not " - + statusHandled - + ", adding INSDC status as " - + statusHandled - + " for " - + accession); - - sampleAttributes.remove(insdcStatusAttribute); - sampleAttributes.add(Attribute.build("INSDC Status", statusHandled)); - - bioSamplesWebinClient.persistSampleResource( - Sample.Builder.fromSample(sampleInBioSamples) - .withAttributes(sampleAttributes) - .withStatus(SampleStatus.valueOf(String.valueOf(specialTypes))) - .build()); - - addToList(specialTypes); - } else { - log.info( - "Sample exists in BioSamples and INSDC status is " - + statusHandled - + " ,no change required for " - + accession); - - bioSamplesWebinClient.persistSampleResource( - Sample.Builder.fromSample(sampleInBioSamples) - .withStatus(SampleStatus.valueOf(String.valueOf(specialTypes))) - .build()); - - addToList(specialTypes); - } - } else { - log.info( - "Sample doesn't exist in BioSamples, fetching " - + sampleInBioSamples - + " sample from ERAPRO " - + accession); - try { - final Sample sample = enaSampleToBioSampleConversionService.enrichSample(accession); - - boolean success = false; - int numRetry = 0; - - while (!success) { - try { - bioSamplesWebinClient.persistSampleResource(sample); - - addToList(specialTypes); - - success = true; - } catch (final Exception e) { - if (++numRetry == BioSamplesConstants.MAX_RETRIES) { - EnaImportRunner.failures.add(accession); - - throw new RuntimeException( - "Failed to handle the ENA suppressed/ killed sample with accession " + accession); - } - } - } - } catch (final Exception e) { - log.info("Failed to handle ENA suppressed/ killed sample with accession " + accession, e); - - throw e; - } - } - - return null; - } - - private void addToList(final SpecialTypes specialTypes) { - if (specialTypes.equals(SpecialTypes.SUPPRESSED)) { - EnaImportRunner.todaysSuppressedSamples.add(accession); - } else { - EnaImportRunner.todaysKilledSamples.add(accession); - } - } -} diff --git a/pipelines/ena/src/main/java/uk/ac/ebi/biosamples/ena/EnaImportCallableFactory.java b/pipelines/ena/src/main/java/uk/ac/ebi/biosamples/ena/EnaImportCallableFactory.java deleted file mode 100644 index 7142ba19b6..0000000000 --- a/pipelines/ena/src/main/java/uk/ac/ebi/biosamples/ena/EnaImportCallableFactory.java +++ /dev/null @@ -1,69 +0,0 @@ -/* -* Copyright 2021 EMBL - European Bioinformatics Institute -* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this -* file except in compliance with the License. You may obtain a copy of the License at -* http://www.apache.org/licenses/LICENSE-2.0 -* Unless required by applicable law or agreed to in writing, software distributed under the -* License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR -* CONDITIONS OF ANY KIND, either express or implied. See the License for the -* specific language governing permissions and limitations under the License. -*/ -package uk.ac.ebi.biosamples.ena; - -import java.util.concurrent.Callable; -import org.springframework.beans.factory.annotation.Qualifier; -import org.springframework.stereotype.Service; -import uk.ac.ebi.biosamples.client.BioSamplesClient; -import uk.ac.ebi.biosamples.service.EnaSampleToBioSampleConversionService; -import uk.ac.ebi.biosamples.service.EraProDao; - -@Service -public class EnaImportCallableFactory { - private final BioSamplesClient bioSamplesWebinClient; - private final BioSamplesClient bioSamplesAapClient; - private final EnaSampleToBioSampleConversionService enaSampleToBioSampleConversionService; - private final EraProDao eraProDao; - - public EnaImportCallableFactory( - @Qualifier("WEBINCLIENT") final BioSamplesClient bioSamplesWebinClient, - final BioSamplesClient bioSamplesAapClient, - final EnaSampleToBioSampleConversionService enaSampleToBioSampleConversionService, - final EraProDao eraProDao) { - this.bioSamplesWebinClient = bioSamplesWebinClient; - this.bioSamplesAapClient = bioSamplesAapClient; - this.enaSampleToBioSampleConversionService = enaSampleToBioSampleConversionService; - this.eraProDao = eraProDao; - } - - /** - * Builds callable for dealing most ENA samples - * - * @param accession The accession passed - * @return the callable, {@link EnaImportCallable} - */ - public Callable build(final String accession) { - return new EnaImportCallable( - accession, - bioSamplesWebinClient, - bioSamplesAapClient, - enaSampleToBioSampleConversionService, - eraProDao, - null); - } - - /** - * Builds callable for dealing most ENA samples - * - * @param accession The accession passed - * @return the callable, {@link EnaImportCallable} - */ - public Callable build(final String accession, final SpecialTypes specialTypes) { - return new EnaImportCallable( - accession, - bioSamplesWebinClient, - bioSamplesAapClient, - enaSampleToBioSampleConversionService, - eraProDao, - specialTypes); - } -} diff --git a/pipelines/ena/src/main/java/uk/ac/ebi/biosamples/ena/EnaImportRunner.java b/pipelines/ena/src/main/java/uk/ac/ebi/biosamples/ena/EnaImportRunner.java deleted file mode 100644 index 4d83c24e33..0000000000 --- a/pipelines/ena/src/main/java/uk/ac/ebi/biosamples/ena/EnaImportRunner.java +++ /dev/null @@ -1,550 +0,0 @@ -/* -* Copyright 2021 EMBL - European Bioinformatics Institute -* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this -* file except in compliance with the License. You may obtain a copy of the License at -* http://www.apache.org/licenses/LICENSE-2.0 -* Unless required by applicable law or agreed to in writing, software distributed under the -* License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR -* CONDITIONS OF ANY KIND, either express or implied. See the License for the -* specific language governing permissions and limitations under the License. -*/ -package uk.ac.ebi.biosamples.ena; - -import java.io.BufferedReader; -import java.io.FileReader; -import java.io.IOException; -import java.sql.ResultSet; -import java.sql.SQLException; -import java.time.Instant; -import java.time.LocalDate; -import java.time.format.DateTimeFormatter; -import java.util.*; -import java.util.concurrent.Callable; -import java.util.concurrent.ExecutionException; -import java.util.concurrent.Future; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; -import org.springframework.beans.factory.annotation.Autowired; -import org.springframework.beans.factory.annotation.Qualifier; -import org.springframework.boot.ApplicationArguments; -import org.springframework.boot.ApplicationRunner; -import org.springframework.boot.autoconfigure.condition.ConditionalOnProperty; -import org.springframework.hateoas.EntityModel; -import org.springframework.jdbc.core.RowCallbackHandler; -import org.springframework.stereotype.Component; -import org.springframework.web.client.HttpClientErrorException; -import uk.ac.ebi.biosamples.PipelinesProperties; -import uk.ac.ebi.biosamples.client.BioSamplesClient; -import uk.ac.ebi.biosamples.model.PipelineName; -import uk.ac.ebi.biosamples.model.Sample; -import uk.ac.ebi.biosamples.mongo.model.MongoPipeline; -import uk.ac.ebi.biosamples.mongo.repository.MongoPipelineRepository; -import uk.ac.ebi.biosamples.mongo.util.PipelineCompletionStatus; -import uk.ac.ebi.biosamples.service.EraProDao; -import uk.ac.ebi.biosamples.service.SampleCallbackResult; -import uk.ac.ebi.biosamples.utils.AdaptiveThreadPoolExecutor; -import uk.ac.ebi.biosamples.utils.PipelineUniqueIdentifierGenerator; -import uk.ac.ebi.biosamples.utils.PipelineUtils; -import uk.ac.ebi.biosamples.utils.ThreadUtils; - -@Component -@ConditionalOnProperty( - prefix = "job.autorun", - name = "enabled", - havingValue = "true", - matchIfMissing = true) -public class EnaImportRunner implements ApplicationRunner { - private static final Logger log = LoggerFactory.getLogger(EnaImportRunner.class); - @Autowired private PipelinesProperties pipelinesProperties; - @Autowired private EraProDao eraProDao; - @Autowired private EnaImportCallableFactory enaImportCallableFactory; - @Autowired private MongoPipelineRepository mongoPipelineRepository; - - @Autowired - @Qualifier("WEBINCLIENT") - private BioSamplesClient bioSamplesClient; - - private final Map> futures = new LinkedHashMap<>(); - static final Set failures = new HashSet<>(); - static final Set todaysSuppressedSamples = new HashSet<>(); - static final Set todaysKilledSamples = new HashSet<>(); - - @Override - public void run(final ApplicationArguments args) throws Exception { - log.info("Processing ENA pipeline..."); - - boolean isPassed = true; - boolean importSuppressedAndKilled = true; - - String pipelineFailureCause = null; - - try { - // date format is YYYY-mm-dd - final LocalDate fromDate; - final LocalDate toDate; - String filePath = null; - String fileType = null; - - if (args.getOptionNames().contains("from")) { - fromDate = - LocalDate.parse( - args.getOptionValues("from").iterator().next(), DateTimeFormatter.ISO_LOCAL_DATE); - } else { - fromDate = LocalDate.parse("1000-01-01", DateTimeFormatter.ISO_LOCAL_DATE); - } - - if (args.getOptionNames().contains("until")) { - toDate = - LocalDate.parse( - args.getOptionValues("until").iterator().next(), DateTimeFormatter.ISO_LOCAL_DATE); - } else { - toDate = LocalDate.parse("3000-01-01", DateTimeFormatter.ISO_LOCAL_DATE); - } - - if (args.getOptionNames().contains("filePath")) { - filePath = args.getOptionValues("filePath").iterator().next(); - } - - if (args.getOptionNames().contains("fileType")) { - fileType = args.getOptionValues("fileType").iterator().next(); - } - - if (args.getOptionNames().contains("importSuppressedAndKilled")) { - if (args.getOptionValues("importSuppressedAndKilled") - .iterator() - .next() - .equalsIgnoreCase("false")) { - importSuppressedAndKilled = false; - } - } - - log.info( - "Running from date range from " - + fromDate - + " until " - + toDate - + " for file " - + filePath - + " of type " - + fileType); - - // log.info("Suppression Runner and killed runner is to be executed: " + - // importSuppressedAndKilled); - - // Import ENA samples - importEraSamples(fromDate, toDate); - - // Import ERA and BSD authority samples from file - /*if (filePath != null) { - if (fileType != null && fileType.equals("BSD_AUTHORITY")) { - updateBSDAuthoritySamplesFromFile(filePath); - } else { - importEraSamplesFromFile(filePath); - } - }*/ - - /*if (importSuppressedAndKilled) { - try { - // handler for suppressed and killed ENA samples - // handleSuppressedAndKilledEnaSamples(); - } catch (final Exception e) { - log.info("Suppression Runner failed"); - } - }*/ - - // Sync BSD authority samples from ERAPRO - // importEraBsdAuthoritySamples(fromDate, toDate); - // syncBsdEnaSampleStatusFromFile_BsdAuthoritySamples(filePath); - } catch (final Exception e) { - log.error("Pipeline failed to finish successfully", e); - pipelineFailureCause = e.getMessage(); - isPassed = false; - - throw e; - } finally { - try { - final MongoPipeline mongoPipeline; - final String pipelineUniqueIdentifier = - PipelineUniqueIdentifierGenerator.getPipelineUniqueIdentifier(PipelineName.ENA); - - if (isPassed) { - mongoPipeline = - new MongoPipeline( - pipelineUniqueIdentifier, - new Date(), - PipelineName.ENA.name(), - PipelineCompletionStatus.COMPLETED, - String.join(",", failures), - null); - } else { - mongoPipeline = - new MongoPipeline( - pipelineUniqueIdentifier, - new Date(), - PipelineName.ENA.name(), - PipelineCompletionStatus.FAILED, - String.join(",", failures), - pipelineFailureCause); - } - - mongoPipelineRepository.insert(mongoPipeline); - - PipelineUtils.writeFailedSamplesToFile(failures, PipelineName.ENA); - PipelineUtils.writeToFile(todaysSuppressedSamples, PipelineName.ENA, "SUPPRESSED"); - } catch (final Exception e) { - log.info("Error in persisting pipeline status to database " + e.getMessage()); - } - } - } - - private void updateBSDAuthoritySamplesFromFile(String filePath) { - final List accessions = getAccessions(filePath); - - // todo handle this - } - - private static List getAccessions(String filePath) { - final List accessions = new ArrayList<>(); - - try (final BufferedReader bufferedReader = new BufferedReader(new FileReader(filePath))) { - String accession; - - while ((accession = bufferedReader.readLine()) != null) { - accessions.add(accession); - } - } catch (final IOException e) { - final String error = "Error reading file " + filePath; - - log.error(error); - throw new RuntimeException(error); - } - - return accessions; - } - - private void importEraSamplesFromFile(final String filePath) throws Exception { - final List accessions = getAccessions(filePath); - final List sampleCallbackResults = - eraProDao.doSampleCallbackForAccessions(accessions, false); - - handleSamples(sampleCallbackResults); - } - - private void syncBsdEnaSampleStatusFromFile_BsdAuthoritySamples(final String filePath) { - final List accessions = getAccessions(filePath); - final List sampleCallbackResults = - eraProDao.doSampleCallbackForAccessions(accessions, true); - - syncBsdEnaSampleStatus(sampleCallbackResults); - } - - private void syncBsdEnaSampleStatus(final List sampleCallbackResults) { - sampleCallbackResults.forEach( - sampleCallbackResult -> { - final String biosampleId = sampleCallbackResult.getBiosampleId(); - final int statusId = sampleCallbackResult.getStatusId(); - final Optional> biosampleOptional = - bioSamplesClient.fetchSampleResource(biosampleId, false); - - if (biosampleOptional.isPresent()) { - final Sample biosample = biosampleOptional.get().getContent(); - - if (biosample.getRelease().isBefore(Instant.now())) { - log.info("Sample " + biosampleId + " is public in BioSamples, check ENA status"); - - if (statusId == 2) { - log.info("Sample " + biosampleId + " is public in BioSamples, but private in ENA"); - eraProDao.updateSampleStatus(biosampleId); - log.info("Sample " + biosampleId + " status updated to public in ENA"); - } else { - log.info("Sample " + biosampleId + " is not private in ENA, no actions required"); - } - } else { - log.info("Sample " + biosampleId + " is private in BioSamples"); - } - } - }); - } - - private void handleSamples(List sampleCallbackResults) throws Exception { - final EraRowHandler eraRowHandler = new EraRowHandler(enaImportCallableFactory); - - if (pipelinesProperties.getThreadCount() == 0) { - sampleCallbackResults.forEach( - sampleCallbackResult -> eraRowHandler.processRow(sampleCallbackResult, false)); - } else { - try (final AdaptiveThreadPoolExecutor executorService = - AdaptiveThreadPoolExecutor.create( - 100, - 10000, - false, - pipelinesProperties.getThreadCount(), - pipelinesProperties.getThreadCountMax())) { - - sampleCallbackResults.forEach( - sampleCallbackResult -> - futures.put( - sampleCallbackResult.getBiosampleId(), - executorService.submit( - Objects.requireNonNull( - eraRowHandler.processRow(sampleCallbackResult, false))))); - - checkFutures(100); - } - } - } - - /** - * Handler for suppressed ENA samples. If status of sample is different in BioSamples, status will - * be updated so SUPPRESSED. If sample doesn't exist it will be created - * - * @throws Exception in case of failures - */ - private void handleSuppressedAndKilledEnaSamples() throws Exception { - log.info( - "Fetching all suppressed ENA samples. " - + "If they exist in BioSamples with different status, their status will be updated. "); - try (final AdaptiveThreadPoolExecutor executorService = - AdaptiveThreadPoolExecutor.create( - 100, - 10000, - false, - pipelinesProperties.getThreadCount(), - pipelinesProperties.getThreadCountMax())) { - - final EnaSuppressedAndKilledSamplesCallbackHandler - enaSuppressedAndKilledSamplesCallbackHandler = - new EnaSuppressedAndKilledSamplesCallbackHandler( - executorService, enaImportCallableFactory, futures, SpecialTypes.SUPPRESSED); - eraProDao.doGetSuppressedEnaSamples(enaSuppressedAndKilledSamplesCallbackHandler); - - log.info("waiting for futures"); // wait for anything to finish - - checkFutures(100); - } - } - - /** - * @author dgupta - *

{@link RowCallbackHandler} for suppressed ENA samples - */ - private static class EnaSuppressedAndKilledSamplesCallbackHandler implements RowCallbackHandler { - private final AdaptiveThreadPoolExecutor executorService; - private final EnaImportCallableFactory enaCallableFactory; - private final Map> futures; - private final SpecialTypes specialTypes; - - public EnaSuppressedAndKilledSamplesCallbackHandler( - final AdaptiveThreadPoolExecutor executorService, - final EnaImportCallableFactory enaCallableFactory, - final Map> futures, - final SpecialTypes specialTypes) { - this.executorService = executorService; - this.enaCallableFactory = enaCallableFactory; - this.futures = futures; - this.specialTypes = specialTypes; - } - - @Override - public void processRow(final ResultSet rs) throws SQLException { - final String sampleAccession = rs.getString("BIOSAMPLE_ID"); - final Callable callable = enaCallableFactory.build(sampleAccession, specialTypes); - - execute(sampleAccession, callable, executorService, futures); - } - - private static void execute( - final String sampleAccession, - final Callable callable, - final AdaptiveThreadPoolExecutor executorService, - final Map> futures) { - if (executorService == null) { - try { - callable.call(); - } catch (final RuntimeException e) { - throw e; - } catch (final Exception e) { - throw new RuntimeException(e); - } - } else { - futures.put(sampleAccession, executorService.submit(callable)); - } - } - } - - private void importEraSamples(final LocalDate fromDate, final LocalDate toDate) throws Exception { - log.info("Handling ENA Samples"); - - final List sampleCallbackResults = - getAllEnaSamplesToHandle(fromDate, toDate); - handleSamples(sampleCallbackResults); - } - - private void checkFutures(final int maxSize) { - try { - ThreadUtils.checkFutures(futures, maxSize); - } catch (final HttpClientErrorException e) { - log.error("HTTP Client error body : " + e.getResponseBodyAsString()); - throw e; - } catch (final RuntimeException e) { - throw e; - } catch (final ExecutionException e) { - throw new RuntimeException(e.getCause()); - } catch (final InterruptedException e) { - throw new RuntimeException(e); - } - } - - private void importEraBsdAuthoritySamples(final LocalDate fromDate, final LocalDate toDate) - throws Exception { - log.info("Handling ENA BioSample authority Samples"); - - final List sampleCallbackResults = - getAllEnaBsdAuthoritySamplesToHandle(fromDate, toDate); - final EraRowHandler eraRowHandler = new EraRowHandler(enaImportCallableFactory); - - log.info("Total number of samples to be handled is " + sampleCallbackResults.size()); - - if (pipelinesProperties.getThreadCount() == 0) { - sampleCallbackResults.forEach( - sampleCallbackResult -> eraRowHandler.processRow(sampleCallbackResult, true)); - } else { - try (final AdaptiveThreadPoolExecutor executorService = - AdaptiveThreadPoolExecutor.create( - 100, - 10000, - false, - pipelinesProperties.getThreadCount(), - pipelinesProperties.getThreadCountMax())) { - - sampleCallbackResults.forEach( - sampleCallbackResult -> - futures.put( - sampleCallbackResult.getBiosampleId(), - executorService.submit( - Objects.requireNonNull( - eraRowHandler.processRow(sampleCallbackResult, true))))); - - checkFutures(100); - } - } - } - - private List getAllEnaSamplesToHandle( - final LocalDate fromDate, final LocalDate toDate) { - final int MAX_RETRIES = 5; - List sampleCallbackResults = new ArrayList<>(); - boolean success = false; - int numRetry = 0; - - while (!success) { - try { - sampleCallbackResults = eraProDao.doSampleCallback(fromDate, toDate); - - log.info("Total number of samples to be handled is " + sampleCallbackResults.size()); - - success = true; - } catch (final Exception e) { - log.error("Fetching from ERAPRO failed with exception - retry ", e); - - if (++numRetry == MAX_RETRIES) { - throw new RuntimeException("Permanent failure in fetching samples from ERAPRO"); - } - } - } - - return sampleCallbackResults; - } - - private List getAllEnaBsdAuthoritySamplesToHandle( - final LocalDate fromDate, final LocalDate toDate) { - final int MAX_RETRIES = 5; - List sampleCallbackResults = new ArrayList<>(); - boolean success = false; - int numRetry = 0; - - while (!success) { - try { - sampleCallbackResults = eraProDao.doSampleCallbackForBsdAuthoritySamples(fromDate, toDate); - - success = true; - } catch (final Exception e) { - log.error("Fetching from ERAPRO failed with exception - retry ", e); - - if (++numRetry == MAX_RETRIES) { - throw new RuntimeException("Permanent failure in fetching samples from ERAPRO"); - } - } - } - - return sampleCallbackResults; - } - - private static class EraRowHandler { - private final EnaImportCallableFactory enaImportCallableFactory; - - EraRowHandler(final EnaImportCallableFactory enaImportCallableFactory) { - this.enaImportCallableFactory = enaImportCallableFactory; - } - - private enum ENAStatus { - PRIVATE(2), - CANCELLED(3), - PUBLIC(4), - SUPPRESSED(5), - KILLED(6), - TEMPORARY_SUPPRESSED(7), - TEMPORARY_KILLED(8); - - private final int value; - private static final Map enaSampleStatusIdToNameMap = new HashMap<>(); - - ENAStatus(final int value) { - this.value = value; - } - - static { - for (final ENAStatus enaStatus : ENAStatus.values()) { - enaSampleStatusIdToNameMap.put(enaStatus.value, enaStatus); - } - } - - public static ENAStatus valueOf(final int pageType) { - return enaSampleStatusIdToNameMap.get(pageType); - } - } - - public Callable processRow( - final SampleCallbackResult sampleCallbackResult, final boolean bsdAuthority) { - final String biosampleId = sampleCallbackResult.getBiosampleId(); - final int statusId = sampleCallbackResult.getStatusId(); - final java.sql.Date lastUpdated = sampleCallbackResult.getLastUpdated(); - final ENAStatus enaStatus = ENAStatus.valueOf(statusId); - - switch (enaStatus) { - case PUBLIC: - case PRIVATE: - case SUPPRESSED: - case TEMPORARY_SUPPRESSED: - case KILLED: - case TEMPORARY_KILLED: - case CANCELLED: - log.info( - String.format( - "%s is being handled as status is %s and last updated is %s (searched by first public and last updated)", - biosampleId, enaStatus.name(), lastUpdated)); - - if (bsdAuthority) { - return enaImportCallableFactory.build(biosampleId, SpecialTypes.BSD_AUTHORITY); - } else { - return enaImportCallableFactory.build(biosampleId); - } - default: - log.info( - String.format("%s would be ignored as status is %s", biosampleId, enaStatus.name())); - } - - return null; - } - } -} diff --git a/pipelines/ena/src/main/java/uk/ac/ebi/biosamples/ena/SpecialTypes.java b/pipelines/ena/src/main/java/uk/ac/ebi/biosamples/ena/SpecialTypes.java deleted file mode 100644 index 862b6226ef..0000000000 --- a/pipelines/ena/src/main/java/uk/ac/ebi/biosamples/ena/SpecialTypes.java +++ /dev/null @@ -1,17 +0,0 @@ -/* -* Copyright 2021 EMBL - European Bioinformatics Institute -* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this -* file except in compliance with the License. You may obtain a copy of the License at -* http://www.apache.org/licenses/LICENSE-2.0 -* Unless required by applicable law or agreed to in writing, software distributed under the -* License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR -* CONDITIONS OF ANY KIND, either express or implied. See the License for the -* specific language governing permissions and limitations under the License. -*/ -package uk.ac.ebi.biosamples.ena; - -public enum SpecialTypes { - BSD_AUTHORITY, - SUPPRESSED, - KILLED -} diff --git a/pipelines/ena/src/test/java/uk/ac/ebi/biosamples/ena/EnaSampleXmlEnhancerTest.java b/pipelines/ena/src/test/java/uk/ac/ebi/biosamples/ena/EnaSampleXmlEnhancerTest.java deleted file mode 100644 index 1d18b465db..0000000000 --- a/pipelines/ena/src/test/java/uk/ac/ebi/biosamples/ena/EnaSampleXmlEnhancerTest.java +++ /dev/null @@ -1,224 +0,0 @@ -/* -* Copyright 2021 EMBL - European Bioinformatics Institute -* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this -* file except in compliance with the License. You may obtain a copy of the License at -* http://www.apache.org/licenses/LICENSE-2.0 -* Unless required by applicable law or agreed to in writing, software distributed under the -* License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR -* CONDITIONS OF ANY KIND, either express or implied. See the License for the -* specific language governing permissions and limitations under the License. -*/ -package uk.ac.ebi.biosamples.ena; - -import static org.junit.Assert.assertEquals; -import static uk.ac.ebi.biosamples.ena.ExampleSamples.*; -import static uk.ac.ebi.biosamples.service.EnaXmlUtil.pretty; - -import org.junit.Before; -import org.junit.Test; -import org.junit.runner.RunWith; -import org.springframework.beans.factory.annotation.Autowired; -import org.springframework.boot.test.context.SpringBootTest; -import org.springframework.test.context.junit4.SpringRunner; -import uk.ac.ebi.biosamples.service.EnaSampleXmlEnhancer; -import uk.ac.ebi.biosamples.service.EraProDao; -import uk.ac.ebi.biosamples.service.EraproSample; - -@RunWith(SpringRunner.class) -@SpringBootTest( - classes = {TestApplication.class, EnaSampleXmlEnhancer.class, EraProDao.class}, - properties = {"job.autorun.enabled=false"}) -public class EnaSampleXmlEnhancerTest { - - @Autowired private EnaSampleXmlEnhancer enaSampleXmlEnhancer; - - private EraproSample eraproSample; - - @Before - public void setup() { - eraproSample = new EraproSample(); - eraproSample.lastUpdated = "2015-06-23"; - eraproSample.firstPublic = "2010-02-26"; - eraproSample.brokerName = null; - eraproSample.biosampleId = "SAMN00001603"; - eraproSample.centreName = "1000G"; - eraproSample.taxId = Long.valueOf(9606); - eraproSample.scientificName = "Homo sapiens"; - } - - @Test - public void test_xml_with_all_rules() { - assertEquals( - expectedFullSampleXml, enaSampleXmlEnhancer.applyAllRules(fullSampleXml, eraproSample)); - } - - @Test - public void test_center_name_rule_fixes_applicable_ebi_xml() { - eraproSample.centreName = "expanded center name"; - assertEquals( - pretty(expectedModifiedCenterNameSampleXml), - enaSampleXmlEnhancer.applyRules( - exampleSampleXml, eraproSample, EnaSampleXmlEnhancer.CenterNameRule.INSTANCE)); - } - - @Test - public void test_biosamples_rule_fixes_applicable_ebi_xml() { - assertEquals( - expectedModifiedEbiBiosamplesSampleXml, - enaSampleXmlEnhancer.applyRules( - exampleSampleXml, eraproSample, EnaSampleXmlEnhancer.BioSamplesIdRule.INSTANCE)); - } - - @Test - public void test_broker_rule_fixes_applicable_ebi_xml() { - eraproSample.brokerName = "broker"; - assertEquals( - pretty(expectedModifiedEbiBrokerSampleXml), - enaSampleXmlEnhancer.applyRules( - exampleSampleXml, eraproSample, EnaSampleXmlEnhancer.BrokerRule.INSTANCE)); - } - - @Test - public void test_broker_rule_fixes_applicable_ncbi_xml() { - assertEquals( - pretty(expectedModifiedNcbiBrokerSampleXml), - enaSampleXmlEnhancer.applyRules( - ncbiSampleXml, eraproSample, EnaSampleXmlEnhancer.BrokerRule.INSTANCE)); - } - - @Test - public void test_broker_rule_fixes_applicable_ddbj_xml() { - assertEquals( - pretty(expectedModifiedDdbjBrokerSampleXml), - enaSampleXmlEnhancer.applyRules( - ddbjSampleXml, eraproSample, EnaSampleXmlEnhancer.BrokerRule.INSTANCE)); - } - - @Test - public void test_broker_rule_does_not_change_non_applicable_xml() { - eraproSample = new EraproSample(); - eraproSample.lastUpdated = "2015-06-23"; - eraproSample.firstPublic = "2010-02-26"; - eraproSample.brokerName = null; - eraproSample.biosampleId = ""; - eraproSample.centreName = "1000G"; - eraproSample.taxId = Long.valueOf("9606"); - eraproSample.scientificName = "Homo sapiens"; - assertEquals( - pretty(exampleSampleXml), - enaSampleXmlEnhancer.applyRules( - exampleSampleXml, eraproSample, EnaSampleXmlEnhancer.BrokerRule.INSTANCE)); - } - - @Test - public void test_alias_rule_fixes_applicable_xml() { - assertEquals( - pretty(expectedModifiedMissingAliasSampleXml), - enaSampleXmlEnhancer.applyRules( - missingAliasSampleXml, eraproSample, EnaSampleXmlEnhancer.AliasRule.INSTANCE)); - } - - @Test - public void test_alias_rule_does_not_change_non_applicable_xml() { - assertEquals( - pretty(exampleSampleXml), - enaSampleXmlEnhancer.applyRules( - exampleSampleXml, eraproSample, EnaSampleXmlEnhancer.AliasRule.INSTANCE)); - } - - @Test - public void test_namespace_rule_fixes_applicable_xml() { - assertEquals( - pretty(exampleSampleXml), - enaSampleXmlEnhancer.applyRules( - missingNamespaceSampleXml, eraproSample, EnaSampleXmlEnhancer.NamespaceRule.INSTANCE)); - assertEquals( - pretty(exampleSampleXml), - enaSampleXmlEnhancer.applyRules( - emptyNamespaceSampleXml, eraproSample, EnaSampleXmlEnhancer.NamespaceRule.INSTANCE)); - } - - @Test - public void test_namespace_rule_does_not_change_non_applicable_xml() { - assertEquals( - pretty(exampleSampleXml), - enaSampleXmlEnhancer.applyRules( - exampleSampleXml, eraproSample, EnaSampleXmlEnhancer.NamespaceRule.INSTANCE)); - } - - @Test - public void test_link_removal_rule_fixes_applicable_xml() { - assertEquals( - expectedModifiedNcbiLinksRemoved, - enaSampleXmlEnhancer.applyRules( - ncbiSampleXml, eraproSample, EnaSampleXmlEnhancer.LinkRemovalRule.INSTANCE)); - } - - @Test - public void test_first_public_and_last_updated_for_applicable_xml() { - eraproSample.lastUpdated = "2018-02-01"; - eraproSample.firstPublic = "2018-01-01"; - assertEquals( - exampleSampleXmlWithDates, - enaSampleXmlEnhancer.applyRules( - exampleSampleXml, eraproSample, EnaSampleXmlEnhancer.DatesRule.INSTANCE)); - } - - @Test - public void test_title_rule_fixes_applicable_xml() { - eraproSample = new EraproSample(); - eraproSample.lastUpdated = "2018-03-09"; - eraproSample.firstPublic = "2010-02-26"; - eraproSample.brokerName = null; - eraproSample.biosampleId = "'SAMEA749880'"; - eraproSample.centreName = "Wellcome Sanger Institute"; - eraproSample.taxId = Long.valueOf("580240"); - eraproSample.scientificName = "Saccharomyces cerevisiae W303"; - assertEquals( - exampleSampleWithTitleAddedXml, - enaSampleXmlEnhancer.applyRules( - exampleSampleWithoutTitleXml, eraproSample, EnaSampleXmlEnhancer.TitleRule.INSTANCE)); - } - - @Test - public void test_taxon_fix_rule_fixes_applicable_xml() { - eraproSample = new EraproSample(); - eraproSample.lastUpdated = "2015-06-23"; - eraproSample.firstPublic = "2010-02-26"; - eraproSample.brokerName = null; - eraproSample.biosampleId = "'SAMN00014227'"; - eraproSample.centreName = "Baylor College of Medicine"; - eraproSample.taxId = Long.valueOf("7227"); - eraproSample.scientificName = null; - assertEquals( - exampleSampleThatHasBeenTaxonFixed, - enaSampleXmlEnhancer.applyRules( - exampleSampleThatCanBeTaxonFixed, - eraproSample, - EnaSampleXmlEnhancer.TaxonRule.INSTANCE)); - } - - @Test - public void test_taxon_fix_rule_fixes_applicable_xml_SAMN02356578() { - eraproSample.lastUpdated = "2015-06-23"; - eraproSample.firstPublic = "2013-09-25"; - eraproSample.brokerName = null; - eraproSample.biosampleId = "'SAMN02356578'"; - eraproSample.centreName = "Broad Institute"; - eraproSample.taxId = Long.valueOf("1400346"); - eraproSample.scientificName = "Acinetobacter lwoffii NIPH 512"; - assertEquals( - exampleSampleThatHasBeenTaxonFixedSAMN02356578, - enaSampleXmlEnhancer.applyRules( - exampleSampleThatCanBeTaxonFixedSAMN02356578, - eraproSample, - EnaSampleXmlEnhancer.TaxonRule.INSTANCE)); - } - - @Test - public void test_pretty() { - final String pretty1 = pretty(expectedModifiedNcbiLinksRemoved); - final String pretty2 = pretty(pretty1); - assertEquals(pretty1, pretty2); - } -} diff --git a/pipelines/ena/src/test/java/uk/ac/ebi/biosamples/ena/ExampleSamples.java b/pipelines/ena/src/test/java/uk/ac/ebi/biosamples/ena/ExampleSamples.java deleted file mode 100644 index 36be0a8c74..0000000000 --- a/pipelines/ena/src/test/java/uk/ac/ebi/biosamples/ena/ExampleSamples.java +++ /dev/null @@ -1,906 +0,0 @@ -/* -* Copyright 2021 EMBL - European Bioinformatics Institute -* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this -* file except in compliance with the License. You may obtain a copy of the License at -* http://www.apache.org/licenses/LICENSE-2.0 -* Unless required by applicable law or agreed to in writing, software distributed under the -* License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR -* CONDITIONS OF ANY KIND, either express or implied. See the License for the -* specific language governing permissions and limitations under the License. -*/ -package uk.ac.ebi.biosamples.ena; - -public class ExampleSamples { - - public static String exampleSampleXml = - "\n" - + " \n" - + " \n" - + " ERS2295402\n" - + " K510\n" - + " \n" - + " unknown/stray cat\n" - + " \n" - + " 9685\n" - + " Felis catus\n" - + " domestic cat\n" - + " \n" - + " unknown/stray cat female\n" - + " \n" - + " \n" - + " ENA-CHECKLIST\n" - + " ERC000011\n" - + " \n" - + " \n" - + " \n" - + ""; - - public static String exampleSampleWithoutTitleXml = - "\n" - + " \n" - + " \n" - + " ERS000003\n" - + " Solexa sequencing of Saccharomyces cerevisiae strain W303 random 200 bp library\n" - + " \n" - + " \n" - + " 580240\n" - + " Saccharomyces cerevisiae W303\n" - + " \n" - + " Solexa sequencing of Saccharomyces cerevisiae strain W303 random 200 bp library\n" - + " \n" - + " \n" - + " strain\n" - + " W303\n" - + " \n" - + " \n" - + " \n" - + ""; - - public static String exampleSampleThatCanBeTaxonFixedSAMN02356578 = - "\n" - + " \n" - + " \n" - + " SRS485026\n" - + " SAMN02356578\n" - + " Acinetobacter lwoffii CIP64.10\n" - + " \n" - + " General Sample for Acinetobacter lwoffii NIPH 512\n" - + " \n" - + " 1400346\n" - + " Acinetobacter lwoffii NIPH 512\n" - + " \n" - + " \n" - + " \n" - + " \n" - + " bioproject\n" - + " 219244\n" - + " \n" - + " \n" - + " \n" - + " \n" - + " \n" - + " collection_date\n" - + " 2011\n" - + " \n" - + " \n" - + " geo_loc_name\n" - + " Unknown\n" - + " \n" - + " \n" - + " specific_host\n" - + " Homo sapiens\n" - + " \n" - + " \n" - + " isolation_source\n" - + " Unknown\n" - + " \n" - + " \n" - + " strain\n" - + " NIPH 512\n" - + " \n" - + " \n" - + " \n" - + ""; - - public static String exampleSampleThatCanBeTaxonFixed = - "\n" - + " \n" - + " \n" - + " SRS003443\n" - + " SAMN00014227\n" - + " BCM-DGRP301\n" - + " \n" - + " Drosophila melanogaster DGRP-301\n" - + " \n" - + " 7227\n" - + " Drosophila melanogaster\n" - + " \n" - + " Drosophila melanogaster DGRP-301\n" - + " \n" - + " \n" - + " \n" - + " \n" - + " https://www.hgsc.bcm.edu/content/drosophila-genetic-reference-panel\n" - + " \n" - + " \n" - + " \n" - + " \n" - + " \n" - + " https://www.hgsc.bcm.edu/content/drosophila-genetic-reference-panel\n" - + " \n" - + " \n" - + " \n" - + " \n" - + " \n" - + " Strain\n" - + " DGRP-301\n" - + " \n" - + " \n" - + " \n" - + " \n" - + ""; - - public static String exampleSampleWithTitleAddedXml = - "\n" - + "\n" - + " \n" - + " \n" - + " \n" - + " ERS000003 \n" - + " Solexa sequencing of Saccharomyces cerevisiae strain W303 random 200 bp library \n" - + " \n" - + " \n" - + " 580240 \n" - + " Saccharomyces cerevisiae W303 \n" - + " \n" - + " Solexa sequencing of Saccharomyces cerevisiae strain W303 random 200 bp library \n" - + " \n" - + " \n" - + " strain \n" - + " W303 \n" - + " \n" - + " \n" - + " Saccharomyces cerevisiae W303\n" - + " \n" - + "\n"; - - public static String missingAliasSampleXml = - "\n" - + " \n" - + " \n" - + " ERS2295402\n" - + " K510\n" - + " \n" - + " unknown/stray cat\n" - + " \n" - + " 9685\n" - + " Felis catus\n" - + " domestic cat\n" - + " \n" - + " unknown/stray cat female\n" - + " \n" - + " \n" - + " ENA-CHECKLIST\n" - + " ERC000011\n" - + " \n" - + " \n" - + " \n" - + ""; - - public static String expectedModifiedMissingAliasSampleXml = - "\n" - + " \n" - + " \n" - + " ERS2295402\n" - + " \n" - + " unknown/stray cat\n" - + " \n" - + " 9685\n" - + " Felis catus\n" - + " domestic cat\n" - + " \n" - + " unknown/stray cat female\n" - + " \n" - + " \n" - + " ENA-CHECKLIST\n" - + " ERC000011\n" - + " \n" - + " \n" - + " \n" - + ""; - - public static String missingNamespaceSampleXml = - "\n" - + " \n" - + " \n" - + " ERS2295402\n" - + " K510\n" - + " \n" - + " unknown/stray cat\n" - + " \n" - + " 9685\n" - + " Felis catus\n" - + " domestic cat\n" - + " \n" - + " unknown/stray cat female\n" - + " \n" - + " \n" - + " ENA-CHECKLIST\n" - + " ERC000011\n" - + " \n" - + " \n" - + " \n" - + ""; - - public static String emptyNamespaceSampleXml = - "\n" - + " \n" - + " \n" - + " ERS2295402\n" - + " K510\n" - + " \n" - + " unknown/stray cat\n" - + " \n" - + " 9685\n" - + " Felis catus\n" - + " domestic cat\n" - + " \n" - + " unknown/stray cat female\n" - + " \n" - + " \n" - + " ENA-CHECKLIST\n" - + " ERC000011\n" - + " \n" - + " \n" - + " \n" - + ""; - - public static String expectedModifiedNcbiBrokerSampleXml = - "\n" - + " \n" - + " \n" - + " SRS000121\n" - + " SAMN00001603\n" - + " GM18561\n" - + " NA18561\n" - + " \n" - + " Coriell GM18561\n" - + " \n" - + " 9606\n" - + " Homo sapiens\n" - + " \n" - + " Human HapMap individual Coriell catalog ID NA18561\n" - + " \n" - + " \n" - + " \n" - + " \n" - + " http://www.ncbi.nlm.nih.gov/SNP/snp_viewBatch.cgi?sbid=1061891\n" - + " \n" - + " \n" - + " \n" - + " \n" - + " \n" - + " http://www.ncbi.nlm.nih.gov/projects/SNP/snp_ind.cgi?ind_id=5153\n" - + " \n" - + " \n" - + " \n" - + " \n" - + " \n" - + " DNA-ID\n" - + " NA18561\n" - + " \n" - + " \n" - + " Super Population Code\n" - + " EAS\n" - + " \n" - + " \n" - + " population\n" - + " CHB\n" - + " \n" - + " \n" - + " Super Population Description\n" - + " East Asian\n" - + " \n" - + " \n" - + " Coriell plate\n" - + " HAPMAPPT02\n" - + " \n" - + " \n" - + " Coriell panel\n" - + " MGP00017\n" - + " \n" - + " \n" - + " Population Description\n" - + " Han Chinese in Beijing, China\n" - + " \n" - + " \n" - + " sex\n" - + " male\n" - + " \n" - + " \n" - + " HapMap sample ID\n" - + " NA18561\n" - + " \n" - + " \n" - + " culture_collection\n" - + " Coriell:GM18561\n" - + " \n" - + " \n" - + " BioSampleModel\n" - + " HapMap\n" - + " \n" - + " \n" - + " \n" - + ""; - - public static String ncbiSampleXml = - "\n" - + " \n" - + " \n" - + " SRS000121\n" - + " SAMN00001603\n" - + " GM18561\n" - + " NA18561\n" - + " \n" - + " Coriell GM18561\n" - + " \n" - + " 9606\n" - + " Homo sapiens\n" - + " \n" - + " Human HapMap individual Coriell catalog ID NA18561\n" - + " \n" - + " \n" - + " \n" - + " \n" - + " http://www.ncbi.nlm.nih.gov/SNP/snp_viewBatch.cgi?sbid=1061891\n" - + " \n" - + " \n" - + " \n" - + " \n" - + " \n" - + " http://www.ncbi.nlm.nih.gov/projects/SNP/snp_ind.cgi?ind_id=5153\n" - + " \n" - + " \n" - + " \n" - + " \n" - + " \n" - + " DNA-ID\n" - + " NA18561\n" - + " \n" - + " \n" - + " Super Population Code\n" - + " EAS\n" - + " \n" - + " \n" - + " population\n" - + " CHB\n" - + " \n" - + " \n" - + " Super Population Description\n" - + " East Asian\n" - + " \n" - + " \n" - + " Coriell plate\n" - + " HAPMAPPT02\n" - + " \n" - + " \n" - + " Coriell panel\n" - + " MGP00017\n" - + " \n" - + " \n" - + " Population Description\n" - + " Han Chinese in Beijing, China\n" - + " \n" - + " \n" - + " sex\n" - + " male\n" - + " \n" - + " \n" - + " HapMap sample ID\n" - + " NA18561\n" - + " \n" - + " \n" - + " culture_collection\n" - + " Coriell:GM18561\n" - + " \n" - + " \n" - + " BioSampleModel\n" - + " HapMap\n" - + " \n" - + " \n" - + " \n" - + ""; - - public static String ddbjSampleXml = - "\n" - + " \n" - + " \n" - + " DRS000378\n" - + " SAMD00015737\n" - + " \n" - + " Ovarian piRNAs from a female that shows W chromosome mutation linked sex differentiation deficiency (Individual No. 4-1)\n" - + " \n" - + " 7091\n" - + " Bombyx mori\n" - + " \n" - + " \n" - + " \n" - + " sample_name\n" - + " DRS000378\n" - + " \n" - + " \n" - + " sex\n" - + " female\n" - + " \n" - + " \n" - + " sample comment\n" - + " piRNA library was constructed from say 4 pupal ovary from a female that shows sex differentiation deficiency\n" - + " \n" - + " \n" - + " dev_stage\n" - + " 4 day old pupa\n" - + " \n" - + " \n" - + " cell type\n" - + " ovary\n" - + " \n" - + " \n" - + " \n" - + ""; - - public static String expectedModifiedDdbjBrokerSampleXml = - "\n" - + " \n" - + " \n" - + " DRS000378\n" - + " SAMD00015737\n" - + " \n" - + " Ovarian piRNAs from a female that shows W chromosome mutation linked sex differentiation deficiency (Individual No. 4-1)\n" - + " \n" - + " 7091\n" - + " Bombyx mori\n" - + " \n" - + " \n" - + " \n" - + " sample_name\n" - + " DRS000378\n" - + " \n" - + " \n" - + " sex\n" - + " female\n" - + " \n" - + " \n" - + " sample comment\n" - + " piRNA library was constructed from say 4 pupal ovary from a female that shows sex differentiation deficiency\n" - + " \n" - + " \n" - + " dev_stage\n" - + " 4 day old pupa\n" - + " \n" - + " \n" - + " cell type\n" - + " ovary\n" - + " \n" - + " \n" - + " \n" - + ""; - - public static String expectedModifiedNcbiLinksRemoved = - "\n" - + "\n" - + " \n" - + " \n" - + " \n" - + " SRS000121 \n" - + " SAMN00001603 \n" - + " GM18561 \n" - + " NA18561 \n" - + " \n" - + " Coriell GM18561 \n" - + " \n" - + " 9606 \n" - + " Homo sapiens \n" - + " \n" - + " Human HapMap individual Coriell catalog ID NA18561 \n" - + " \n" - + " \n" - + " \n" - + " DNA-ID \n" - + " NA18561 \n" - + " \n" - + " \n" - + " Super Population Code \n" - + " EAS \n" - + " \n" - + " \n" - + " population \n" - + " CHB \n" - + " \n" - + " \n" - + " Super Population Description \n" - + " East Asian \n" - + " \n" - + " \n" - + " Coriell plate \n" - + " HAPMAPPT02 \n" - + " \n" - + " \n" - + " Coriell panel \n" - + " MGP00017 \n" - + " \n" - + " \n" - + " Population Description \n" - + " Han Chinese in Beijing, China \n" - + " \n" - + " \n" - + " sex \n" - + " male \n" - + " \n" - + " \n" - + " HapMap sample ID \n" - + " NA18561 \n" - + " \n" - + " \n" - + " culture_collection \n" - + " Coriell:GM18561 \n" - + " \n" - + " \n" - + " BioSampleModel \n" - + " HapMap \n" - + " \n" - + " \n" - + " \n" - + "\n"; - - public static String exampleSampleXmlWithDates = - "\n" - + "\n" - + " \n" - + " \n" - + " \n" - + " ERS2295402 \n" - + " K510 \n" - + " \n" - + " unknown/stray cat \n" - + " \n" - + " 9685 \n" - + " Felis catus \n" - + " domestic cat \n" - + " \n" - + " unknown/stray cat female \n" - + " \n" - + " \n" - + " ENA-CHECKLIST \n" - + " ERC000011 \n" - + " \n" - + " \n" - + " ENA-FIRST-PUBLIC\n" - + " 2018-01-01\n" - + " \n" - + " \n" - + " ENA-LAST-UPDATE\n" - + " 2018-02-01\n" - + " \n" - + " \n" - + " \n" - + "\n"; - - public static String expectedModifiedEbiBrokerSampleXml = - "\n" - + "\n" - + " \n" - + " \n" - + " \n" - + " ERS2295402 \n" - + " K510 \n" - + " \n" - + " unknown/stray cat \n" - + " \n" - + " 9685 \n" - + " Felis catus \n" - + " domestic cat \n" - + " \n" - + " unknown/stray cat female \n" - + " \n" - + " \n" - + " ENA-CHECKLIST \n" - + " ERC000011 \n" - + " \n" - + " \n" - + " \n" - + "\n"; - - public static String expectedModifiedEbiBiosamplesSampleXml = - "\n" - + "\n" - + " \n" - + " \n" - + " \n" - + " ERS2295402 \n" - + " K510 \n" - + " SAMN00001603\n" - + " \n" - + " unknown/stray cat \n" - + " \n" - + " 9685 \n" - + " Felis catus \n" - + " domestic cat \n" - + " \n" - + " unknown/stray cat female \n" - + " \n" - + " \n" - + " ENA-CHECKLIST \n" - + " ERC000011 \n" - + " \n" - + " \n" - + " \n" - + "\n"; - - public static String expectedModifiedCenterNameSampleXml = - "\n" - + "\n" - + " \n" - + " \n" - + " \n" - + " ERS2295402 \n" - + " K510 \n" - + " \n" - + " unknown/stray cat \n" - + " \n" - + " 9685 \n" - + " Felis catus \n" - + " domestic cat \n" - + " \n" - + " unknown/stray cat female \n" - + " \n" - + " \n" - + " ENA-CHECKLIST \n" - + " ERC000011 \n" - + " \n" - + " \n" - + " \n" - + "\n"; - - public static String fullSampleXml = - "\n" - + " \n" - + " \n" - + " SRS000121\n" - + " SAMN00001603\n" - + " GM18561\n" - + " NA18561\n" - + " \n" - + " Coriell GM18561\n" - + " \n" - + " 9606\n" - + " Homo sapiens\n" - + " \n" - + " Human HapMap individual Coriell catalog ID NA18561\n" - + " \n" - + " \n" - + " \n" - + " \n" - + " http://www.ncbi.nlm.nih.gov/SNP/snp_viewBatch.cgi?sbid=1061891\n" - + " \n" - + " \n" - + " \n" - + " \n" - + " \n" - + " http://www.ncbi.nlm.nih.gov/projects/SNP/snp_ind.cgi?ind_id=5153\n" - + " \n" - + " \n" - + " \n" - + " \n" - + " \n" - + " DNA-ID\n" - + " NA18561\n" - + " \n" - + " \n" - + " Super Population Code\n" - + " EAS\n" - + " \n" - + " \n" - + " population\n" - + " CHB\n" - + " \n" - + " \n" - + " Super Population Description\n" - + " East Asian\n" - + " \n" - + " \n" - + " Coriell plate\n" - + " HAPMAPPT02\n" - + " \n" - + " \n" - + " Coriell panel\n" - + " MGP00017\n" - + " \n" - + " \n" - + " Population Description\n" - + " Han Chinese in Beijing, China\n" - + " \n" - + " \n" - + " sex\n" - + " male\n" - + " \n" - + " \n" - + " HapMap sample ID\n" - + " NA18561\n" - + " \n" - + " \n" - + " culture_collection\n" - + " Coriell:GM18561\n" - + " \n" - + " \n" - + " BioSampleModel\n" - + " HapMap\n" - + " \n" - + " \n" - + " \n" - + ""; - - public static String expectedFullSampleXml = - "\n" - + "\n" - + " \n" - + " \n" - + " \n" - + " SRS000121 \n" - + " SAMN00001603 \n" - + " GM18561 \n" - + " NA18561 \n" - + " \n" - + " Coriell GM18561 \n" - + " \n" - + " 9606 \n" - + " Homo sapiens \n" - + " \n" - + " Human HapMap individual Coriell catalog ID NA18561 \n" - + " \n" - + " \n" - + " \n" - + " DNA-ID \n" - + " NA18561 \n" - + " \n" - + " \n" - + " Super Population Code \n" - + " EAS \n" - + " \n" - + " \n" - + " population \n" - + " CHB \n" - + " \n" - + " \n" - + " Super Population Description \n" - + " East Asian \n" - + " \n" - + " \n" - + " Coriell plate \n" - + " HAPMAPPT02 \n" - + " \n" - + " \n" - + " Coriell panel \n" - + " MGP00017 \n" - + " \n" - + " \n" - + " Population Description \n" - + " Han Chinese in Beijing, China \n" - + " \n" - + " \n" - + " sex \n" - + " male \n" - + " \n" - + " \n" - + " HapMap sample ID \n" - + " NA18561 \n" - + " \n" - + " \n" - + " culture_collection \n" - + " Coriell:GM18561 \n" - + " \n" - + " \n" - + " BioSampleModel \n" - + " HapMap \n" - + " \n" - + " \n" - + " ENA-FIRST-PUBLIC\n" - + " 2010-02-26\n" - + " \n" - + " \n" - + " ENA-LAST-UPDATE\n" - + " 2015-06-23\n" - + " \n" - + " \n" - + " \n" - + "\n"; - - public static String exampleSampleThatHasBeenTaxonFixedSAMN02356578 = - "\n" - + "\n" - + " \n" - + " \n" - + " \n" - + " SRS485026 \n" - + " SAMN02356578 \n" - + " Acinetobacter lwoffii CIP64.10 \n" - + " \n" - + " General Sample for Acinetobacter lwoffii NIPH 512 \n" - + " \n" - + " 1400346 \n" - + " Acinetobacter lwoffii NIPH 512 \n" - + " \n" - + " \n" - + " \n" - + " \n" - + " \n" - + " bioproject \n" - + " 219244 \n" - + " \n" - + " \n" - + " \n" - + " \n" - + " \n" - + " collection_date \n" - + " 2011 \n" - + " \n" - + " \n" - + " geo_loc_name \n" - + " Unknown \n" - + " \n" - + " \n" - + " specific_host \n" - + " Homo sapiens \n" - + " \n" - + " \n" - + " isolation_source \n" - + " Unknown \n" - + " \n" - + " \n" - + " strain \n" - + " NIPH 512 \n" - + " \n" - + " \n" - + " \n" - + "\n"; - - public static String exampleSampleThatHasBeenTaxonFixed = - "\n" - + "\n" - + " \n" - + " \n" - + " \n" - + " SRS003443 \n" - + " SAMN00014227 \n" - + " BCM-DGRP301 \n" - + " \n" - + " Drosophila melanogaster DGRP-301 \n" - + " \n" - + " 7227 \n" - + " \n" - + " \n" - + " \n" - + " Drosophila melanogaster DGRP-301 \n" - + " \n" - + " \n" - + " \n" - + " \n" - + " https://www.hgsc.bcm.edu/content/drosophila-genetic-reference-panel \n" - + " \n" - + " \n" - + " \n" - + " \n" - + " \n" - + " https://www.hgsc.bcm.edu/content/drosophila-genetic-reference-panel \n" - + " \n" - + " \n" - + " \n" - + " \n" - + " \n" - + " Strain \n" - + " DGRP-301 \n" - + " \n" - + " \n" - + " \n" - + " \n" - + "\n"; -} diff --git a/pipelines/ena/src/test/java/uk/ac/ebi/biosamples/ena/MockBioSamplesClient.java b/pipelines/ena/src/test/java/uk/ac/ebi/biosamples/ena/MockBioSamplesClient.java deleted file mode 100644 index e201a0b5a4..0000000000 --- a/pipelines/ena/src/test/java/uk/ac/ebi/biosamples/ena/MockBioSamplesClient.java +++ /dev/null @@ -1,91 +0,0 @@ -/* -* Copyright 2021 EMBL - European Bioinformatics Institute -* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this -* file except in compliance with the License. You may obtain a copy of the License at -* http://www.apache.org/licenses/LICENSE-2.0 -* Unless required by applicable law or agreed to in writing, software distributed under the -* License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR -* CONDITIONS OF ANY KIND, either express or implied. See the License for the -* specific language governing permissions and limitations under the License. -*/ -package uk.ac.ebi.biosamples.ena; - -import static org.mockito.Mockito.mock; - -import com.fasterxml.jackson.core.JsonProcessingException; -import com.fasterxml.jackson.databind.ObjectMapper; -import java.io.FileWriter; -import java.io.IOException; -import java.io.PrintWriter; -import java.net.URI; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; -import org.springframework.boot.web.client.RestTemplateBuilder; -import org.springframework.hateoas.EntityModel; -import uk.ac.ebi.biosamples.client.BioSamplesClient; -import uk.ac.ebi.biosamples.client.service.ClientService; -import uk.ac.ebi.biosamples.client.utils.ClientProperties; -import uk.ac.ebi.biosamples.model.Sample; -import uk.ac.ebi.biosamples.service.SampleValidator; - -public class MockBioSamplesClient extends BioSamplesClient { - private final Logger log = LoggerFactory.getLogger(getClass()); - - private static PrintWriter printWriter; - - private static FileWriter fileWriter; - - private final ObjectMapper objectMapper; - - private long count = 0; - - MockBioSamplesClient( - final URI uri, - final URI uriV2, - final RestTemplateBuilder restTemplateBuilder, - final SampleValidator sampleValidator, - final ClientService aapClientService, - final ClientProperties bioSamplesProperties, - final ObjectMapper objectMapper) { - super(uri, uriV2, restTemplateBuilder, sampleValidator, aapClientService, bioSamplesProperties); - this.objectMapper = objectMapper; - try { - fileWriter = new FileWriter("export.json"); - printWriter = new PrintWriter(fileWriter); - } catch (final IOException e) { - e.printStackTrace(); - } - } - - private void logSample(final Sample sample) { - count++; - String sampleJson = ""; - try { - // objectMapper.enable(SerializationFeature.INDENT_OUTPUT); - sampleJson = objectMapper.writeValueAsString(sample); - // System.out.println(sampleJson); - } catch (final JsonProcessingException ignored) { - - } - printWriter.printf("%s\n", sampleJson); - if (count % 500 == 0) { - log.info("Recorded " + count + " samples"); - } - } - - @Override - public EntityModel persistSampleResource(final Sample sample) { - logSample(sample); - return mock(EntityModel.class); - } - - @Override - public void finalize() { - try { - fileWriter.close(); - printWriter.close(); - } catch (final IOException e) { - e.printStackTrace(); - } - } -} diff --git a/pipelines/ena/src/test/java/uk/ac/ebi/biosamples/ena/TestApplication.java b/pipelines/ena/src/test/java/uk/ac/ebi/biosamples/ena/TestApplication.java deleted file mode 100644 index 02fb4915a6..0000000000 --- a/pipelines/ena/src/test/java/uk/ac/ebi/biosamples/ena/TestApplication.java +++ /dev/null @@ -1,105 +0,0 @@ -/* -* Copyright 2021 EMBL - European Bioinformatics Institute -* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this -* file except in compliance with the License. You may obtain a copy of the License at -* http://www.apache.org/licenses/LICENSE-2.0 -* Unless required by applicable law or agreed to in writing, software distributed under the -* License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR -* CONDITIONS OF ANY KIND, either express or implied. See the License for the -* specific language governing permissions and limitations under the License. -*/ -package uk.ac.ebi.biosamples.ena; - -import com.fasterxml.jackson.databind.DeserializationFeature; -import com.fasterxml.jackson.databind.ObjectMapper; -import java.util.Collections; -import javax.sql.DataSource; -import org.springframework.boot.jdbc.DataSourceBuilder; -import org.springframework.boot.web.client.RestTemplateBuilder; -import org.springframework.context.annotation.Bean; -import org.springframework.context.annotation.Configuration; -import org.springframework.hateoas.MediaTypes; -import org.springframework.hateoas.RepresentationModel; -import org.springframework.hateoas.mediatype.hal.Jackson2HalModule; -import org.springframework.hateoas.server.mvc.TypeConstrainedMappingJackson2HttpMessageConverter; -import org.springframework.http.converter.json.MappingJackson2HttpMessageConverter; -import org.springframework.jdbc.core.JdbcTemplate; -import org.springframework.web.client.RestTemplate; -import uk.ac.ebi.biosamples.client.BioSamplesClient; -import uk.ac.ebi.biosamples.client.service.WebinAuthClientService; -import uk.ac.ebi.biosamples.client.utils.ClientProperties; -import uk.ac.ebi.biosamples.service.CurationApplicationService; -import uk.ac.ebi.biosamples.service.SampleValidator; - -@Configuration -public class TestApplication { - @Bean - public RestTemplate restTemplate() { - return new RestTemplate(); - } - - @Bean - public RestTemplateBuilder restTemplateBuilder() { - return new RestTemplateBuilder(); - } - - @Bean - public ObjectMapper objectMapper() { - final ObjectMapper mapper = new ObjectMapper(); - mapper.registerModule(new Jackson2HalModule()); - mapper.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false); - final MappingJackson2HttpMessageConverter halConverter = - new TypeConstrainedMappingJackson2HttpMessageConverter(RepresentationModel.class); - halConverter.setObjectMapper(mapper); - halConverter.setSupportedMediaTypes(Collections.singletonList(MediaTypes.HAL_JSON)); - return mapper; - } - - @Bean - SampleValidator sampleValidator() { - return null; - } - - @Bean - WebinAuthClientService webinAuthClientService() { - return null; - } - - @Bean - public ClientProperties clientProperties() { - return new ClientProperties(); - } - - @Bean - public CurationApplicationService curationApplicationService() { - return new CurationApplicationService(); - } - - @Bean("MOCKCLIENT") - public BioSamplesClient bioSamplesClient() { - return new MockBioSamplesClient( - clientProperties().getBiosamplesClientUri(), - clientProperties().getBiosamplesClientUriV2(), - restTemplateBuilder(), - sampleValidator(), - webinAuthClientService(), - clientProperties(), - objectMapper()); - } - - @Bean - public DataSource getDataSource() { - final DataSourceBuilder dataSourceBuilder = DataSourceBuilder.create(); - dataSourceBuilder.driverClassName("oracle.jdbc.OracleDriver"); - dataSourceBuilder.url("jdbc:oracle:thin:@//ora-era-pro-hl.ebi.ac.uk:1531/ERAPRO"); - dataSourceBuilder.username("era_reader"); - dataSourceBuilder.password("reader"); - - return dataSourceBuilder.build(); - } - - @Bean("eraJdbcTemplate") - public JdbcTemplate jdbcTemplate() { - return new JdbcTemplate(getDataSource()); - } -} diff --git a/pipelines/ena/src/test/java/uk/ac/ebi/biosamples/ena/TestConversion.java b/pipelines/ena/src/test/java/uk/ac/ebi/biosamples/ena/TestConversion.java deleted file mode 100644 index 641c43abbf..0000000000 --- a/pipelines/ena/src/test/java/uk/ac/ebi/biosamples/ena/TestConversion.java +++ /dev/null @@ -1,150 +0,0 @@ -/* -* Copyright 2021 EMBL - European Bioinformatics Institute -* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this -* file except in compliance with the License. You may obtain a copy of the License at -* http://www.apache.org/licenses/LICENSE-2.0 -* Unless required by applicable law or agreed to in writing, software distributed under the -* License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR -* CONDITIONS OF ANY KIND, either express or implied. See the License for the -* specific language governing permissions and limitations under the License. -*/ -package uk.ac.ebi.biosamples.ena; - -import static org.junit.Assert.fail; - -import java.time.LocalDate; -import java.time.format.DateTimeFormatter; -import org.junit.Ignore; -import org.junit.Test; -import org.junit.runner.RunWith; -import org.springframework.beans.factory.annotation.Autowired; -import org.springframework.beans.factory.annotation.Qualifier; -import org.springframework.boot.test.context.SpringBootTest; -import org.springframework.jdbc.core.RowCallbackHandler; -import org.springframework.test.context.junit4.SpringRunner; -import uk.ac.ebi.biosamples.PipelinesProperties; -import uk.ac.ebi.biosamples.client.BioSamplesClient; -import uk.ac.ebi.biosamples.service.*; - -@Ignore -@RunWith(SpringRunner.class) -@SpringBootTest( - classes = { - TestApplication.class, - EraProDao.class, - EnaSampleToBioSampleConversionService.class, - EnaSampleXmlEnhancer.class, - BioSampleConverter.class, - TaxonomyService.class, - PipelinesProperties.class - }, - properties = {"job.autorun.enabled=false"}) -public class TestConversion { - @Qualifier("MOCKCLIENT") - @Autowired - public BioSamplesClient bioSamplesWebinClient; - - @Autowired private EraProDao eraProDao; - - @Autowired private EnaSampleToBioSampleConversionService enaSampleToBioSampleConversionService; - - @Test - @Ignore - public void test_over_all_samples() { - final LocalDate fromDate = LocalDate.parse("1000-01-01", DateTimeFormatter.ISO_LOCAL_DATE); - final LocalDate toDate = LocalDate.parse("3000-01-01", DateTimeFormatter.ISO_LOCAL_DATE); - eraProDao.doSampleCallback(fromDate, toDate); - } - - @Test - public void test_with_single() { - final RowCallbackHandler rowCallbackHandler = - resultSet -> { - final String sampleAccession = resultSet.getString("BIOSAMPLE_ID"); - final EnaImportCallable enaImportCallable = - new EnaImportCallable( - sampleAccession, - bioSamplesWebinClient, - null, - enaSampleToBioSampleConversionService, - eraProDao, - null); - try { - enaImportCallable.call(); - } catch (final Exception e) { - e.printStackTrace(); - fail(); - } - }; - eraProDao.getSingleSample("SAMEA100000168", rowCallbackHandler); - } - - @Test - public void test_with_suppressed() { - final RowCallbackHandler rowCallbackHandler = - resultSet -> { - final String sampleAccession = resultSet.getString("BIOSAMPLE_ID"); - final EnaImportCallable enaImportCallable = - new EnaImportCallable( - sampleAccession, - bioSamplesWebinClient, - null, - enaSampleToBioSampleConversionService, - eraProDao, - null); - try { - enaImportCallable.call(); - } catch (final Exception e) { - e.printStackTrace(); - fail(); - } - }; - eraProDao.getSingleSample("SAMEA1930638", rowCallbackHandler); - } - - @Test - public void test_with_killed() { - final RowCallbackHandler rowCallbackHandler = - resultSet -> { - final String sampleAccession = resultSet.getString("BIOSAMPLE_ID"); - final EnaImportCallable enaImportCallable = - new EnaImportCallable( - sampleAccession, - bioSamplesWebinClient, - null, - enaSampleToBioSampleConversionService, - eraProDao, - null); - try { - enaImportCallable.call(); - } catch (final Exception e) { - e.printStackTrace(); - fail(); - } - }; - eraProDao.getSingleSample("SAMEA1935107", rowCallbackHandler); - } - - @Test - public void test_with_failing() { - final RowCallbackHandler rowCallbackHandler = - resultSet -> { - final String sampleAccession = resultSet.getString("BIOSAMPLE_ID"); - final EnaImportCallable enaImportCallable = - new EnaImportCallable( - sampleAccession, - bioSamplesWebinClient, - null, - enaSampleToBioSampleConversionService, - eraProDao, - null); - try { - enaImportCallable.call(); - } catch (final Exception e) { - e.printStackTrace(); - fail(); - } - }; - eraProDao.getSingleSample("SAMEA104371999", rowCallbackHandler); - } -} diff --git a/pipelines/ena/src/test/resources/SRS000121.xml b/pipelines/ena/src/test/resources/SRS000121.xml deleted file mode 100644 index 09ac61d413..0000000000 --- a/pipelines/ena/src/test/resources/SRS000121.xml +++ /dev/null @@ -1,117 +0,0 @@ - - - - - SRS000121 - SAMN00001603 - GM18561 - NA18561 - - Coriell GM18561 - - 9606 - Homo sapiens - - Human HapMap individual Coriell catalog ID NA18561 - - - - ENA-STUDY - ERP012319,SRP000031,SRP000033,SRP000546,SRP003652,SRP004231,SRP004364,SRP027257,SRP048601,SRP078362 - - - - - ENA-EXPERIMENT - ERX000052,ERX000434,ERX002599,ERX012022,ERX226435-ERX226436,ERX1124530-ERX1124547,SRX027411,SRX031793,SRX031799,SRX320542,SRX724287,SRX726133,SRX1937792-SRX1937793 - - - - - ENA-RUN - ERR000300,ERR000334,ERR000394,ERR000519-ERR000522,ERR001124-ERR001125,ERR004071,ERR005156,ERR005180-ERR005183,ERR009002,ERR009042,ERR031858,ERR251901-ERR251902,ERR1044239-ERR1044256,SRR066864,SRR066868,SRR066890,SRR066905,SRR066921,SRR066942,SRR066956,SRR066962,SRR067007,SRR067015,SRR067031,SRR067036,SRR067055-SRR067056,SRR067068,SRR067086,SRR067113,SRR067122,SRR067141,SRR067147,SRR067163,SRR067202,SRR067204,SRR067219,SRR192547,SRR192549-SRR192552,SRR932313,SRR1601955,SRR1604790,SRR3881579-SRR3881580 - - - - - ENA-SUBMISSION - SRA000288 - - - - - ENA-FASTQ-FILES - - - - - - ENA-SUBMITTED-FILES - - - - - - - DNA-ID - NA18561 - - - Super Population Code - EAS - - - population - CHB - - - Super Population Description - East Asian - - - Coriell plate - HAPMAPPT02 - - - Coriell panel - MGP00017 - - - Population Description - Han Chinese in Beijing, China - - - sex - male - - - HapMap sample ID - NA18561 - - - culture_collection - Coriell:GM18561 - - - BioSampleModel - HapMap - - - ENA-SPOT-COUNT - 2012757925 - - - ENA-BASE-COUNT - 358938231877 - - - ENA-FIRST-PUBLIC - 2010-02-26 - - - ENA-LAST-UPDATE - 2015-06-23 - - - - \ No newline at end of file diff --git a/pipelines/export/pom.xml b/pipelines/export/pom.xml deleted file mode 100644 index 47b811e1a0..0000000000 --- a/pipelines/export/pom.xml +++ /dev/null @@ -1,48 +0,0 @@ - - 4.0.0 - - pipelines-export - jar - - - uk.ac.ebi.biosamples - biosamples - 5.3.13-SNAPSHOT - ../../ - - - - - uk.ac.ebi.biosamples - pipelines-common - 5.3.13-SNAPSHOT - - - uk.ac.ebi.biosamples - core - 5.3.13-SNAPSHOT - - - org.springframework.hateoas - spring-hateoas - 1.3.4 - - - - - - - org.springframework.boot - spring-boot-maven-plugin - - - - build-info - - - - - - - diff --git a/pipelines/export/src/main/java/uk/ac/ebi/biosamples/Application.java b/pipelines/export/src/main/java/uk/ac/ebi/biosamples/Application.java deleted file mode 100644 index 9a62af2f13..0000000000 --- a/pipelines/export/src/main/java/uk/ac/ebi/biosamples/Application.java +++ /dev/null @@ -1,40 +0,0 @@ -/* -* Copyright 2021 EMBL - European Bioinformatics Institute -* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this -* file except in compliance with the License. You may obtain a copy of the License at -* http://www.apache.org/licenses/LICENSE-2.0 -* Unless required by applicable law or agreed to in writing, software distributed under the -* License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR -* CONDITIONS OF ANY KIND, either express or implied. See the License for the -* specific language governing permissions and limitations under the License. -*/ -package uk.ac.ebi.biosamples; - -import org.springframework.boot.SpringApplication; -import org.springframework.boot.autoconfigure.SpringBootApplication; -import org.springframework.boot.autoconfigure.jdbc.DataSourceAutoConfiguration; -import org.springframework.context.ConfigurableApplicationContext; -import org.springframework.context.annotation.ComponentScan; -import org.springframework.context.annotation.FilterType; -import org.springframework.context.annotation.Import; -import uk.ac.ebi.biosamples.configuration.ExclusionConfiguration; -import uk.ac.ebi.biosamples.service.EnaConfig; -import uk.ac.ebi.biosamples.service.EnaSampleToBioSampleConversionService; -import uk.ac.ebi.biosamples.service.EraProDao; -import uk.ac.ebi.biosamples.utils.PipelineUtils; - -@SpringBootApplication(exclude = DataSourceAutoConfiguration.class) -@ComponentScan( - excludeFilters = { - @ComponentScan.Filter( - type = FilterType.ASSIGNABLE_TYPE, - value = {EnaConfig.class, EraProDao.class, EnaSampleToBioSampleConversionService.class}) - }) -@Import(ExclusionConfiguration.class) -public class Application { - - public static void main(final String[] args) { - final ConfigurableApplicationContext ctx = SpringApplication.run(Application.class, args); - PipelineUtils.exitPipeline(ctx); - } -} diff --git a/pipelines/export/src/main/java/uk/ac/ebi/biosamples/export/ExportRunner.java b/pipelines/export/src/main/java/uk/ac/ebi/biosamples/export/ExportRunner.java deleted file mode 100644 index 219dd9dd69..0000000000 --- a/pipelines/export/src/main/java/uk/ac/ebi/biosamples/export/ExportRunner.java +++ /dev/null @@ -1,84 +0,0 @@ -/* -* Copyright 2021 EMBL - European Bioinformatics Institute -* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this -* file except in compliance with the License. You may obtain a copy of the License at -* http://www.apache.org/licenses/LICENSE-2.0 -* Unless required by applicable law or agreed to in writing, software distributed under the -* License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR -* CONDITIONS OF ANY KIND, either express or implied. See the License for the -* specific language governing permissions and limitations under the License. -*/ -package uk.ac.ebi.biosamples.export; - -import com.fasterxml.jackson.databind.ObjectMapper; -import java.io.FileOutputStream; -import java.io.OutputStreamWriter; -import java.io.Writer; -import java.nio.charset.StandardCharsets; -import java.util.zip.GZIPOutputStream; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; -import org.springframework.boot.ApplicationArguments; -import org.springframework.boot.ApplicationRunner; -import org.springframework.hateoas.EntityModel; -import org.springframework.stereotype.Component; -import uk.ac.ebi.biosamples.client.BioSamplesClient; -import uk.ac.ebi.biosamples.core.model.Sample; - -@Component -public class ExportRunner implements ApplicationRunner { - - private final Logger log = LoggerFactory.getLogger(getClass()); - private final BioSamplesClient bioSamplesClient; - private final ObjectMapper objectMapper; - - public ExportRunner(final BioSamplesClient bioSamplesClient, final ObjectMapper objectMapper) { - // ensure the client is public - if (bioSamplesClient.getPublicClient().isPresent()) { - this.bioSamplesClient = bioSamplesClient.getPublicClient().get(); - } else { - this.bioSamplesClient = bioSamplesClient; - } - this.objectMapper = objectMapper; - } - - @Override - public void run(final ApplicationArguments args) { - final String jsonSampleFilename = args.getNonOptionArgs().get(0); - final long oldTime = System.nanoTime(); - int sampleCount = 0; - - try { - boolean first = true; - try (final Writer jsonSampleWriter = - args.getOptionValues("gzip") == null - ? new OutputStreamWriter( - new FileOutputStream(jsonSampleFilename), StandardCharsets.UTF_8) - : new OutputStreamWriter( - new GZIPOutputStream(new FileOutputStream(jsonSampleFilename)), - StandardCharsets.UTF_8)) { - jsonSampleWriter.write("[\n"); - for (final EntityModel sampleResource : bioSamplesClient.fetchSampleResourceAll()) { - final Sample sample = sampleResource.getContent(); - - log.info("Handling " + sample.getAccession()); - - if (!first) { - jsonSampleWriter.write(",\n"); - } - - jsonSampleWriter.write(objectMapper.writeValueAsString(sample)); - first = false; - sampleCount += 1; - } - - jsonSampleWriter.write("\n]"); - } - } catch (final Exception e) { - e.printStackTrace(); - } finally { - final long elapsed = System.nanoTime() - oldTime; - log.info("Exported " + sampleCount + " samples in " + (elapsed / 1000000000L) + "s"); - } - } -} diff --git a/pipelines/ncbi-ena-link/pom.xml b/pipelines/ncbi-ena-link/pom.xml index de2fdc85ee..4db034c943 100644 --- a/pipelines/ncbi-ena-link/pom.xml +++ b/pipelines/ncbi-ena-link/pom.xml @@ -8,7 +8,7 @@ uk.ac.ebi.biosamples biosamples - 5.3.13-SNAPSHOT + 5.3.15-SNAPSHOT ../../ @@ -16,12 +16,12 @@ uk.ac.ebi.biosamples pipelines-common - 5.3.13-SNAPSHOT + 5.3.15-SNAPSHOT uk.ac.ebi.biosamples core - 5.3.13-SNAPSHOT + 5.3.15-SNAPSHOT diff --git a/pipelines/ncbi-ena-link/src/main/java/uk/ac/ebi/biosamples/ena/NcbiEnaLinkRunner.java b/pipelines/ncbi-ena-link/src/main/java/uk/ac/ebi/biosamples/ena/NcbiEnaLinkRunner.java index 2522e7555f..3e04258c66 100644 --- a/pipelines/ncbi-ena-link/src/main/java/uk/ac/ebi/biosamples/ena/NcbiEnaLinkRunner.java +++ b/pipelines/ncbi-ena-link/src/main/java/uk/ac/ebi/biosamples/ena/NcbiEnaLinkRunner.java @@ -30,7 +30,6 @@ import uk.ac.ebi.biosamples.mongo.util.PipelineCompletionStatus; import uk.ac.ebi.biosamples.service.EraProDao; import uk.ac.ebi.biosamples.service.SampleRetrievalResult; -import uk.ac.ebi.biosamples.service.SampleCallbackResult; import uk.ac.ebi.biosamples.utils.PipelineUniqueIdentifierGenerator; import uk.ac.ebi.biosamples.utils.PipelineUtils; import uk.ac.ebi.biosamples.utils.thread.AdaptiveThreadPoolExecutor; diff --git a/pipelines/neoexport/pom.xml b/pipelines/neoexport/pom.xml deleted file mode 100644 index 59bee2f72b..0000000000 --- a/pipelines/neoexport/pom.xml +++ /dev/null @@ -1,53 +0,0 @@ - - - - - 4.0.0 - pipelines-neoexport - pipelines-neoexport - jar - - - - - uk.ac.ebi.biosamples - biosamples - 5.3.13-SNAPSHOT - ../../ - - - - - uk.ac.ebi.biosamples - pipelines-common - 5.3.13-SNAPSHOT - - - com.fasterxml.jackson.dataformat - jackson-dataformat-csv - - - org.springframework.hateoas - spring-hateoas - 1.3.4 - - - - - - - - org.springframework.boot - spring-boot-maven-plugin - - - - build-info - - - - - - - diff --git a/pipelines/neoexport/src/main/java/uk/ac/ebi/biosamples/Application.java b/pipelines/neoexport/src/main/java/uk/ac/ebi/biosamples/Application.java deleted file mode 100644 index 781dd71b59..0000000000 --- a/pipelines/neoexport/src/main/java/uk/ac/ebi/biosamples/Application.java +++ /dev/null @@ -1,146 +0,0 @@ -/* -* Copyright 2021 EMBL - European Bioinformatics Institute -* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this -* file except in compliance with the License. You may obtain a copy of the License at -* http://www.apache.org/licenses/LICENSE-2.0 -* Unless required by applicable law or agreed to in writing, software distributed under the -* License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR -* CONDITIONS OF ANY KIND, either express or implied. See the License for the -* specific language governing permissions and limitations under the License. -*/ -package uk.ac.ebi.biosamples; - -import org.apache.http.HeaderElement; -import org.apache.http.HeaderElementIterator; -import org.apache.http.HttpHost; -import org.apache.http.HttpResponse; -import org.apache.http.client.HttpClient; -import org.apache.http.client.config.RequestConfig; -import org.apache.http.conn.ConnectionKeepAliveStrategy; -import org.apache.http.conn.routing.HttpRoute; -import org.apache.http.impl.client.cache.CacheConfig; -import org.apache.http.impl.client.cache.CachingHttpClientBuilder; -import org.apache.http.impl.conn.PoolingHttpClientConnectionManager; -import org.apache.http.message.BasicHeaderElementIterator; -import org.apache.http.protocol.HTTP; -import org.apache.http.protocol.HttpContext; -import org.springframework.boot.SpringApplication; -import org.springframework.boot.autoconfigure.SpringBootApplication; -import org.springframework.boot.autoconfigure.jdbc.DataSourceAutoConfiguration; -import org.springframework.boot.web.client.RestTemplateCustomizer; -import org.springframework.cache.annotation.EnableCaching; -import org.springframework.context.annotation.Bean; -import org.springframework.context.annotation.ComponentScan; -import org.springframework.context.annotation.FilterType; -import org.springframework.context.annotation.Import; -import org.springframework.http.client.HttpComponentsClientHttpRequestFactory; -import org.springframework.web.client.RestTemplate; -import uk.ac.ebi.biosamples.configuration.ExclusionConfiguration; -import uk.ac.ebi.biosamples.service.EnaConfig; -import uk.ac.ebi.biosamples.service.EnaSampleToBioSampleConversionService; -import uk.ac.ebi.biosamples.service.EraProDao; - -@SpringBootApplication(exclude = DataSourceAutoConfiguration.class) -@ComponentScan( - excludeFilters = { - @ComponentScan.Filter( - type = FilterType.ASSIGNABLE_TYPE, - value = {EnaConfig.class, EraProDao.class, EnaSampleToBioSampleConversionService.class}) - }) -@Import(ExclusionConfiguration.class) -@EnableCaching -public class Application { - - public static void main(final String[] args) { - SpringApplication.exit(SpringApplication.run(Application.class, args)); - } - - @Bean - public RestTemplate restTemplate(final RestTemplateCustomizer restTemplateCustomizer) { - final RestTemplate restTemplate = new RestTemplate(); - restTemplateCustomizer.customize(restTemplate); - return restTemplate; - } - - @Bean - public RestTemplateCustomizer restTemplateCustomizer( - final BioSamplesProperties bioSamplesProperties, - final PipelinesProperties piplinesProperties) { - return new RestTemplateCustomizer() { - public void customize(final RestTemplate restTemplate) { - - // use a keep alive strategy to try to make it easier to maintain connections for - // reuse - final ConnectionKeepAliveStrategy keepAliveStrategy = - new ConnectionKeepAliveStrategy() { - public long getKeepAliveDuration( - final HttpResponse response, final HttpContext context) { - - // check if there is a non-standard keep alive header present - final HeaderElementIterator it = - new BasicHeaderElementIterator(response.headerIterator(HTTP.CONN_KEEP_ALIVE)); - while (it.hasNext()) { - final HeaderElement he = it.nextElement(); - final String param = he.getName(); - final String value = he.getValue(); - if (value != null && param.equalsIgnoreCase("timeout")) { - return Long.parseLong(value) * 1000; - } - } - // default to 60s if no header - return 60 * 1000; - } - }; - - // set a number of connections to use at once for multiple threads - final PoolingHttpClientConnectionManager poolingHttpClientConnectionManager = - new PoolingHttpClientConnectionManager(); - poolingHttpClientConnectionManager.setMaxTotal(piplinesProperties.getConnectionCountMax()); - poolingHttpClientConnectionManager.setDefaultMaxPerRoute( - piplinesProperties.getConnectionCountDefault()); - poolingHttpClientConnectionManager.setMaxPerRoute( - new HttpRoute(HttpHost.create(piplinesProperties.getZooma())), - piplinesProperties.getConnectionCountZooma()); - poolingHttpClientConnectionManager.setMaxPerRoute( - new HttpRoute(HttpHost.create(bioSamplesProperties.getOls())), - piplinesProperties.getConnectionCountOls()); - - // set a local cache for cacheable responses - final CacheConfig cacheConfig = - CacheConfig.custom() - .setMaxCacheEntries(1024) - .setMaxObjectSize(1024 * 1024) // max size of 1Mb - // number of entries x size of entries = 1Gb total cache size - .setSharedCache(false) // act like a browser cache not a middle-hop cache - .build(); - - // set a timeout limit - // TODO put this in application.properties - final int timeout = 60; // in seconds - final RequestConfig config = - RequestConfig.custom() - .setConnectTimeout(timeout * 1000) // time to establish the connection with the - // remote host - .setConnectionRequestTimeout( - timeout * 1000) // maximum time of inactivity between two - // data packets - .setSocketTimeout(timeout * 1000) - .build(); // time to wait for a connection from the connection - // manager/pool - - // make the actual client - final HttpClient httpClient = - CachingHttpClientBuilder.create() - .setCacheConfig(cacheConfig) - .useSystemProperties() - .setConnectionManager(poolingHttpClientConnectionManager) - .setKeepAliveStrategy(keepAliveStrategy) - .setDefaultRequestConfig(config) - .build(); - - // and wire it into the resttemplate - restTemplate.setRequestFactory(new HttpComponentsClientHttpRequestFactory(httpClient)); - } - }; - } -} diff --git a/pipelines/neoexport/src/main/java/uk/ac/ebi/biosamples/neoexport/NeoCsvExporter.java b/pipelines/neoexport/src/main/java/uk/ac/ebi/biosamples/neoexport/NeoCsvExporter.java deleted file mode 100644 index 4cd77259a4..0000000000 --- a/pipelines/neoexport/src/main/java/uk/ac/ebi/biosamples/neoexport/NeoCsvExporter.java +++ /dev/null @@ -1,232 +0,0 @@ -/* -* Copyright 2021 EMBL - European Bioinformatics Institute -* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this -* file except in compliance with the License. You may obtain a copy of the License at -* http://www.apache.org/licenses/LICENSE-2.0 -* Unless required by applicable law or agreed to in writing, software distributed under the -* License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR -* CONDITIONS OF ANY KIND, either express or implied. See the License for the -* specific language governing permissions and limitations under the License. -*/ -package uk.ac.ebi.biosamples.neoexport; - -import com.fasterxml.jackson.dataformat.csv.CsvMapper; -import com.fasterxml.jackson.dataformat.csv.CsvSchema; -import java.io.File; -import java.io.FileWriter; -import java.io.IOException; -import java.io.Writer; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; -import org.springframework.stereotype.Component; -import uk.ac.ebi.biosamples.core.model.Sample; -import uk.ac.ebi.biosamples.neo4j.model.NeoExternalEntity; -import uk.ac.ebi.biosamples.neo4j.model.NeoRelationship; -import uk.ac.ebi.biosamples.neo4j.model.NeoSample; - -@Component -class NeoCsvExporter { - private static final Logger LOG = LoggerFactory.getLogger(NeoCsvExporter.class); - - private static final String EXPORT_PATH = "./export/"; - private static final String REL_SOURCE_HEADER = "source"; - private static final String REL_TARGET_HEADER = "target"; - private static final int PERSIST_THRESHOLD = 1000000; - - private static final String[] SAMPLES_HEADER = { - "accession", - "name", - "cellType", - "sex", - "taxId", - "project", - "material", - "cellLine", - "organismPart", - "organism" - }; - private static final String[] EXTERNAL_ENTITY_HEADER = {"name", "archive", "ref", "url"}; - private static final String[] REL_HEADER = {REL_SOURCE_HEADER, REL_TARGET_HEADER}; - - private final List> samples = new ArrayList<>(); - private final List> externalEntity = new ArrayList<>(); - - private final List> relsDerivedFrom = new ArrayList<>(); - private final List> relsSameAs = new ArrayList<>(); - private final List> relsHasMember = new ArrayList<>(); - private final List> relsChildOf = new ArrayList<>(); - private final List> relsOther = new ArrayList<>(); - private final List> relsExternalRef = new ArrayList<>(); - - private int sampleIndex = 1; - private int externalEntityIndex = 1; - private int relsExternelRefIndex = 1; - private final int mockIndex = 1; - - void addToCSVFile(final Sample sample) { - final NeoSample neoSample = NeoSample.build(sample); - addSample(neoSample); - } - - void flush() { - writeCSV(samples, "samples-" + sampleIndex + ".csv", SAMPLES_HEADER, false); - writeCSV( - externalEntity, - "ex_reference-" + externalEntityIndex + ".csv", - EXTERNAL_ENTITY_HEADER, - false); - - writeCSV( - relsExternalRef, "external_reference-" + relsExternelRefIndex + ".csv", REL_HEADER, false); - writeCSV(relsDerivedFrom, "derived_from-" + mockIndex + ".csv", REL_HEADER, true); - writeCSV(relsSameAs, "same_as-" + mockIndex + ".csv", REL_HEADER, true); - writeCSV(relsHasMember, "has_member-" + mockIndex + ".csv", REL_HEADER, true); - writeCSV(relsChildOf, "child_of-" + mockIndex + ".csv", REL_HEADER, true); - writeCSV(relsOther, "other-" + mockIndex + ".csv", REL_HEADER, true); - } - - private void addSample(final NeoSample sample) { - final Map attributeMap = new HashMap<>(); - attributeMap.put("accession", sample.getAccession()); - attributeMap.put("name", sample.getName()); - attributeMap.put("organism", sample.getOrganism() == null ? "" : sample.getOrganism()); - attributeMap.put("taxId", sample.getTaxId() == null ? "" : sample.getTaxId()); - attributeMap.put("sex", sample.getSex() == null ? "" : sample.getSex()); - attributeMap.put("cellType", sample.getCellType() == null ? "" : sample.getCellType()); - attributeMap.put("material", sample.getMaterial() == null ? "" : sample.getMaterial()); - attributeMap.put("project", sample.getProject() == null ? "" : sample.getProject()); - attributeMap.put("cellLine", sample.getCellLine() == null ? "" : sample.getCellLine()); - attributeMap.put( - "organismPart", sample.getOrganismPart() == null ? "" : sample.getOrganismPart()); - samples.add(attributeMap); - - for (final NeoRelationship rel : sample.getRelationships()) { - if (rel.getSource().equals(sample.getAccession())) { - switch (rel.getType()) { - case DERIVED_FROM: - final Map e = new HashMap<>(); - e.put(REL_SOURCE_HEADER, rel.getSource()); - e.put(REL_TARGET_HEADER, rel.getTarget()); - relsDerivedFrom.add(e); - break; - case SAME_AS: - final Map e1 = new HashMap<>(); - e1.put(REL_SOURCE_HEADER, rel.getSource()); - e1.put(REL_TARGET_HEADER, rel.getTarget()); - relsSameAs.add(e1); - break; - case HAS_MEMBER: - final Map e2 = new HashMap<>(); - e2.put(REL_SOURCE_HEADER, rel.getSource()); - e2.put(REL_TARGET_HEADER, rel.getTarget()); - relsHasMember.add(e2); - break; - case CHILD_OF: - final Map e3 = new HashMap<>(); - e3.put(REL_SOURCE_HEADER, rel.getSource()); - e3.put(REL_TARGET_HEADER, rel.getTarget()); - relsChildOf.add(e3); - break; - default: - final Map e4 = new HashMap<>(); - e4.put(REL_SOURCE_HEADER, rel.getSource()); - e4.put(REL_TARGET_HEADER, rel.getTarget()); - relsOther.add(e4); - break; - } - } - } - - for (final NeoExternalEntity ref : sample.getExternalRefs()) { - final String refId = ref.getArchive() + "_" + ref.getRef(); - final Map e = new HashMap<>(); - e.put("name", refId); - e.put("archive", ref.getArchive()); - e.put("ref", ref.getRef()); - e.put("url", ref.getUrl()); - externalEntity.add(e); - final Map e1 = new HashMap<>(); - e1.put(REL_SOURCE_HEADER, sample.getAccession()); - e1.put(REL_TARGET_HEADER, refId); - relsExternalRef.add(e1); - } - - checkWriteStatus(); - } - - private void checkWriteStatus() { - if (samples.size() >= PERSIST_THRESHOLD) { - writeCSV(samples, "samples-" + sampleIndex + ".csv", SAMPLES_HEADER, false); - samples.clear(); - sampleIndex++; - } - if (externalEntity.size() >= PERSIST_THRESHOLD) { - writeCSV( - externalEntity, - "ex_reference-" + externalEntityIndex + ".csv", - EXTERNAL_ENTITY_HEADER, - false); - externalEntity.clear(); - externalEntityIndex++; - } - if (relsExternalRef.size() >= PERSIST_THRESHOLD) { - writeCSV( - relsExternalRef, - "external_reference-" + relsExternelRefIndex + ".csv", - REL_HEADER, - false); - relsExternalRef.clear(); - relsExternelRefIndex++; - } - - if (relsDerivedFrom.size() >= PERSIST_THRESHOLD) { - writeCSV(relsDerivedFrom, "derived_from-" + mockIndex + ".csv", REL_HEADER, true); - relsDerivedFrom.clear(); - } - if (relsSameAs.size() >= PERSIST_THRESHOLD) { - writeCSV(relsSameAs, "same_as-" + mockIndex + ".csv", REL_HEADER, true); - relsSameAs.clear(); - } - if (relsHasMember.size() >= PERSIST_THRESHOLD) { - writeCSV(relsHasMember, "has_member-" + mockIndex + ".csv", REL_HEADER, true); - relsHasMember.clear(); - } - if (relsChildOf.size() >= PERSIST_THRESHOLD) { - writeCSV(relsChildOf, "child_of-" + mockIndex + ".csv", REL_HEADER, true); - relsChildOf.clear(); - } - if (relsOther.size() >= PERSIST_THRESHOLD) { - writeCSV(relsOther, "other-" + mockIndex + ".csv", REL_HEADER, true); - relsOther.clear(); - } - } - - private void writeCSV( - final List> records, - final String fileName, - final String[] headerOrder, - final boolean append) { - CsvSchema schema = null; - final CsvSchema.Builder schemaBuilder = CsvSchema.builder(); - if (records != null && !records.isEmpty()) { - for (final String col : records.get(0).keySet()) { - schemaBuilder.addColumn(col); - } - schema = schemaBuilder.build(); - schema = schema.sortedBy(headerOrder); - } - - final CsvMapper mapper = new CsvMapper(); - final File file = new File(EXPORT_PATH + fileName); - try (final Writer writer = new FileWriter(file, append)) { - mapper.writer(schema).writeValues(writer).writeAll(records); - writer.flush(); - } catch (final IOException e) { - LOG.error("Failed writing to csv file: {}", fileName, e); - } - } -} diff --git a/pipelines/neoexport/src/main/java/uk/ac/ebi/biosamples/neoexport/NeoExportCallable.java b/pipelines/neoexport/src/main/java/uk/ac/ebi/biosamples/neoexport/NeoExportCallable.java deleted file mode 100644 index 40aa1d8000..0000000000 --- a/pipelines/neoexport/src/main/java/uk/ac/ebi/biosamples/neoexport/NeoExportCallable.java +++ /dev/null @@ -1,45 +0,0 @@ -/* -* Copyright 2021 EMBL - European Bioinformatics Institute -* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this -* file except in compliance with the License. You may obtain a copy of the License at -* http://www.apache.org/licenses/LICENSE-2.0 -* Unless required by applicable law or agreed to in writing, software distributed under the -* License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR -* CONDITIONS OF ANY KIND, either express or implied. See the License for the -* specific language governing permissions and limitations under the License. -*/ -package uk.ac.ebi.biosamples.neoexport; - -import java.util.concurrent.Callable; -import java.util.concurrent.ConcurrentLinkedQueue; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; -import uk.ac.ebi.biosamples.PipelineResult; -import uk.ac.ebi.biosamples.core.model.Sample; -import uk.ac.ebi.biosamples.neo4j.model.NeoSample; -import uk.ac.ebi.biosamples.neo4j.repo.NeoSampleRepository; - -public class NeoExportCallable implements Callable { - private static final Logger LOG = LoggerFactory.getLogger(NeoExportCallable.class); - static final ConcurrentLinkedQueue failedQueue = new ConcurrentLinkedQueue<>(); - - private final Sample sample; - private final NeoSampleRepository neoSampleRepository; - - NeoExportCallable(final NeoSampleRepository neoSampleRepository, final Sample sample) { - this.neoSampleRepository = neoSampleRepository; - this.sample = sample; - } - - @Override - public PipelineResult call() { - try { - final NeoSample neoSample = NeoSample.build(sample); - neoSampleRepository.loadSample(neoSample); - } catch (final Exception e) { - failedQueue.add(sample.getAccession()); - LOG.error("Failed to load sample: " + sample.getAccession(), e); - } - return new PipelineResult(sample.getAccession(), 0, true); - } -} diff --git a/pipelines/neoexport/src/main/java/uk/ac/ebi/biosamples/neoexport/NeoExportRunner.java b/pipelines/neoexport/src/main/java/uk/ac/ebi/biosamples/neoexport/NeoExportRunner.java deleted file mode 100644 index 6fe1c72c5b..0000000000 --- a/pipelines/neoexport/src/main/java/uk/ac/ebi/biosamples/neoexport/NeoExportRunner.java +++ /dev/null @@ -1,168 +0,0 @@ -/* -* Copyright 2021 EMBL - European Bioinformatics Institute -* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this -* file except in compliance with the License. You may obtain a copy of the License at -* http://www.apache.org/licenses/LICENSE-2.0 -* Unless required by applicable law or agreed to in writing, software distributed under the -* License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR -* CONDITIONS OF ANY KIND, either express or implied. See the License for the -* specific language governing permissions and limitations under the License. -*/ -package uk.ac.ebi.biosamples.neoexport; - -import java.time.Duration; -import java.time.Instant; -import java.util.*; -import java.util.concurrent.Callable; -import java.util.concurrent.ConcurrentLinkedQueue; -import java.util.concurrent.Future; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; -import org.springframework.boot.ApplicationArguments; -import org.springframework.boot.ApplicationRunner; -import org.springframework.hateoas.EntityModel; -import org.springframework.stereotype.Component; -import uk.ac.ebi.biosamples.PipelineFutureCallback; -import uk.ac.ebi.biosamples.PipelineResult; -import uk.ac.ebi.biosamples.PipelinesProperties; -import uk.ac.ebi.biosamples.client.BioSamplesClient; -import uk.ac.ebi.biosamples.core.model.PipelineAnalytics; -import uk.ac.ebi.biosamples.core.model.Sample; -import uk.ac.ebi.biosamples.core.model.SampleAnalytics; -import uk.ac.ebi.biosamples.core.model.filter.Filter; -import uk.ac.ebi.biosamples.neo4j.repo.NeoSampleRepository; -import uk.ac.ebi.biosamples.utils.PipelineUtils; -import uk.ac.ebi.biosamples.utils.thread.AdaptiveThreadPoolExecutor; -import uk.ac.ebi.biosamples.utils.thread.ThreadUtils; - -@Component -public class NeoExportRunner implements ApplicationRunner { - private static final Logger LOG = LoggerFactory.getLogger(NeoExportRunner.class); - - private final BioSamplesClient bioSamplesClient; - private final PipelinesProperties pipelinesProperties; - private final NeoSampleRepository neoSampleRepository; - private final PipelineFutureCallback pipelineFutureCallback; - private final NeoCsvExporter neoCsvExporter; - - public NeoExportRunner( - final BioSamplesClient bioSamplesClient, - final PipelinesProperties pipelinesProperties, - final NeoSampleRepository neoSampleRepository, - final NeoCsvExporter neoCsvExporter) { - this.bioSamplesClient = bioSamplesClient; - this.pipelinesProperties = pipelinesProperties; - this.neoSampleRepository = neoSampleRepository; - this.neoCsvExporter = neoCsvExporter; - pipelineFutureCallback = new PipelineFutureCallback(); - } - - @Override - public void run(final ApplicationArguments args) throws Exception { - final Collection filters = PipelineUtils.getDateFilters(args, "update"); - // RelationFilter relationFilter = new RelationFilter.Builder("has member").build(); - // filters.add(relationFilter); - // ExternalReferenceDataFilter externalFilter = new ExternalReferenceDataFilter.Builder("EGA - // Dataset").build(); - // filters.add(externalFilter); - - final Instant startTime = Instant.now(); - LOG.info("Pipeline started at {}", startTime); - long sampleCount = 0; - boolean isPassed = true; - final SampleAnalytics sampleAnalytics = new SampleAnalytics(); - - String format = ""; - if (args.getOptionNames().contains("format")) { - format = args.getOptionValues("format").iterator().next(); - } - if ("CSV".equalsIgnoreCase(format)) { - LOG.info("Saving into CSV format for later consumption"); - } else { - LOG.info("Directly exporting to neo4j instance"); - } - - try (final AdaptiveThreadPoolExecutor executorService = - AdaptiveThreadPoolExecutor.create( - 100, - 10000, - true, - pipelinesProperties.getThreadCount(), - pipelinesProperties.getThreadCountMax())) { - - final Map> futures = new HashMap<>(); - for (final EntityModel sampleResource : - bioSamplesClient.fetchSampleResourceAll("", filters)) { - LOG.trace("Handling {}", sampleResource); - final Sample sample = sampleResource.getContent(); - Objects.requireNonNull(sample); - collectSampleTypes(sample, sampleAnalytics); - - // we will export only relationship containing entities - if (!sample.getRelationships().isEmpty() || !sample.getExternalReferences().isEmpty()) { - if ("CSV".equalsIgnoreCase(format)) { - neoCsvExporter.addToCSVFile(sample); - } else { - final Callable task = - new NeoExportCallable(neoSampleRepository, sample); - futures.put(sample.getAccession(), executorService.submit(task)); - } - } - - if (++sampleCount % 5000 == 0) { - LOG.info("Scheduled sample count {}", sampleCount); - } - } - - if ("CSV".equalsIgnoreCase(format)) { - neoCsvExporter.flush(); - } else { - LOG.info("Waiting for all scheduled tasks to finish"); - ThreadUtils.checkAndCallbackFutures(futures, 0, pipelineFutureCallback); - } - - } catch (final Exception e) { - LOG.error("Pipeline failed to finish successfully", e); - isPassed = false; - throw e; - } finally { - final Instant endTime = Instant.now(); - LOG.info("Total samples processed {}", sampleCount); - LOG.info("Total curation objects added {}", pipelineFutureCallback.getTotalCount()); - LOG.info("Pipeline finished at {}", endTime); - LOG.info( - "Pipeline total running time {} seconds", - Duration.between(startTime, endTime).getSeconds()); - - final PipelineAnalytics pipelineAnalytics = - new PipelineAnalytics( - "curami", startTime, endTime, sampleCount, pipelineFutureCallback.getTotalCount()); - pipelineAnalytics.setDateRange(filters); - sampleAnalytics.setDateRange(filters); - sampleAnalytics.setProcessedRecords(sampleCount); - } - } - - private String handleFailedSamples() { - final ConcurrentLinkedQueue failedQueue = NeoExportCallable.failedQueue; - String failures = null; - if (!failedQueue.isEmpty()) { - final List fails = new LinkedList<>(); - while (failedQueue.peek() != null) { - fails.add(failedQueue.poll()); - } - failures = "Failed files (" + fails.size() + ") " + String.join(" , ", fails); - LOG.warn(failures); - } else { - LOG.info("Pipeline completed without any failures"); - } - return failures; - } - - private void collectSampleTypes(final Sample sample, final SampleAnalytics sampleAnalytics) { - final String accessionPrefix = sample.getAccession().substring(0, 4); - final String submittedChannel = sample.getSubmittedVia().name(); - sampleAnalytics.addToCenter(accessionPrefix); - sampleAnalytics.addToChannel(submittedChannel); - } -} diff --git a/pipelines/pom.xml b/pipelines/pom.xml index 82a5d05f59..0658343c86 100644 --- a/pipelines/pom.xml +++ b/pipelines/pom.xml @@ -37,7 +37,8 @@ taxonimport reindex chain - + sample-post-release-action + ncbi-ena-link diff --git a/pipelines/sample-post-release-action/pom.xml b/pipelines/sample-post-release-action/pom.xml index d83bb9625e..92a293a386 100644 --- a/pipelines/sample-post-release-action/pom.xml +++ b/pipelines/sample-post-release-action/pom.xml @@ -8,7 +8,7 @@ uk.ac.ebi.biosamples biosamples - 5.3.13-SNAPSHOT + 5.3.15-SNAPSHOT ../../ @@ -20,12 +20,12 @@ uk.ac.ebi.biosamples pipelines-common - 5.3.13-SNAPSHOT + 5.3.15-SNAPSHOT uk.ac.ebi.biosamples core - 5.3.13-SNAPSHOT + 5.3.15-SNAPSHOT org.springframework.hateoas From 3bc037b40f0220c8bc503fedd58267480c3eedfd Mon Sep 17 00:00:00 2001 From: dipayan1985 Date: Mon, 2 Feb 2026 12:02:55 +0000 Subject: [PATCH 2/9] allow dev deployment from chore/working-es-search-k8-deployment-with-cleanup branch --- .gitlab-ci.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 106ef1954f..34f6c81852 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -86,6 +86,7 @@ clone-config: paths: - config only: + - chore/working-es-search-k8-deployment-with-cleanup - dev - main - biosamples-search @@ -98,6 +99,7 @@ deploy_k8s_primary_dev: name: primary_dev url: https://wwwdev.ebi.ac.uk/biosamples only: + - chore/working-es-search-k8-deployment-with-cleanup - dev - main - biosamples-search From ad7f9992b6f7f2e84bf9ab08cc138ff7f4fb2410 Mon Sep 17 00:00:00 2001 From: dipayan1985 Date: Mon, 2 Feb 2026 12:38:48 +0000 Subject: [PATCH 3/9] dont build ncbi-ena-link pipeline --- pipelines/pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pipelines/pom.xml b/pipelines/pom.xml index 0658343c86..211330b37c 100644 --- a/pipelines/pom.xml +++ b/pipelines/pom.xml @@ -38,7 +38,7 @@ reindex chain sample-post-release-action - ncbi-ena-link + From 51954b53dfda65b1185155db179bb7b791c63c7a Mon Sep 17 00:00:00 2001 From: dipayan1985 Date: Wed, 4 Feb 2026 10:22:43 +0000 Subject: [PATCH 4/9] run reindex in fallback prod --- .gitlab-ci.yml | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 34f6c81852..e47dedbc96 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -114,6 +114,7 @@ deploy_k8s_primary_prod: name: primary_prod url: https://www.ebi.ac.uk/biosamples only: + - chore/working-es-search-k8-deployment-with-cleanup - dev - main - biosamples-search @@ -128,6 +129,7 @@ deploy_k8s_fallback_prod: name: fallback_prod url: https://www.ebi.ac.uk/biosamples only: + - chore/working-es-search-k8-deployment-with-cleanup - dev - main - biosamples-search @@ -140,8 +142,24 @@ deploy_pipeline_k8s_primary_prod: K8S_NAMESPACE: biosamples-prod environment: name: primary_prod - url: https://wwwdev.ebi.ac.uk/biosamples + url: https://www.ebi.ac.uk/biosamples + only: + - chore/working-es-search-k8-deployment-with-cleanup + - dev + - main + - biosamples-search + when: manual + extends: .kube_deploy_jobs_script + +deploy_pipeline_k8s_fallback_prod: + variables: + ENVIRONMENT_NAME: fallback_prod + K8S_NAMESPACE: biosamples-prod + environment: + name: fallback_prod + url: https://www.ebi.ac.uk/biosamples only: + - chore/working-es-search-k8-deployment-with-cleanup - dev - main - biosamples-search @@ -156,6 +174,7 @@ deploy_pipeline_k8s_primary_dev: name: primary_dev url: https://wwwdev.ebi.ac.uk/biosamples only: + - chore/working-es-search-k8-deployment-with-cleanup - dev - main - biosamples-search From 65fc5eecef2e5c87c042a5f2042ea0342e7b2f83 Mon Sep 17 00:00:00 2001 From: dipayan1985 Date: Wed, 4 Feb 2026 10:51:33 +0000 Subject: [PATCH 5/9] reindex pipeline to push messages to biosamples.reindexing.es --- .../src/main/java/uk/ac/ebi/biosamples/ReindexRunner.java | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/pipelines/reindex/src/main/java/uk/ac/ebi/biosamples/ReindexRunner.java b/pipelines/reindex/src/main/java/uk/ac/ebi/biosamples/ReindexRunner.java index ebefaba09d..3d990c1adc 100644 --- a/pipelines/reindex/src/main/java/uk/ac/ebi/biosamples/ReindexRunner.java +++ b/pipelines/reindex/src/main/java/uk/ac/ebi/biosamples/ReindexRunner.java @@ -162,11 +162,7 @@ private boolean fetchSampleAndSendMessage(final boolean isRetry) { try { String json = objectMapper.writeValueAsString(sampleOptional.get()); amqpTemplate.convertAndSend( - MessagingConstants.INDEXING_EXCHANGE, MessagingConstants.INDEXING_QUEUE, json); - // amqpTemplate.convertAndSend( - // MessagingConstants.REINDEXING_EXCHANGE, - // MessagingConstants.REINDEXING_QUEUE, - // MessageContent.build(sampleOptional.get(), null, related, false)); + MessagingConstants.INDEXING_EXCHANGE, MessagingConstants.REINDEXING_QUEUE, json); return true; } catch (final Exception e) { From 95d4019018f37ebb2ae6b62b4cfe1abb34a52662 Mon Sep 17 00:00:00 2001 From: dgupta Date: Thu, 5 Feb 2026 11:46:00 +0000 Subject: [PATCH 6/9] Have separate reindexing exchange to avoid federation conflicts --- .../biosamples/messaging/MessagingConstants.java | 1 + .../messaging/config/MessageConfig.java | 16 ++++++++-------- 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/core/src/main/java/uk/ac/ebi/biosamples/messaging/MessagingConstants.java b/core/src/main/java/uk/ac/ebi/biosamples/messaging/MessagingConstants.java index abae07ab0a..953013bdd2 100644 --- a/core/src/main/java/uk/ac/ebi/biosamples/messaging/MessagingConstants.java +++ b/core/src/main/java/uk/ac/ebi/biosamples/messaging/MessagingConstants.java @@ -12,6 +12,7 @@ public class MessagingConstants { public static final String INDEXING_EXCHANGE = "biosamples.indexing"; + public static final String REINDEXING_EXCHANGE = "biosamples.reindexing"; public static final String INDEXING_QUEUE = "biosamples.indexing.es"; public static final String REINDEXING_QUEUE = "biosamples.reindexing.es"; diff --git a/core/src/main/java/uk/ac/ebi/biosamples/messaging/config/MessageConfig.java b/core/src/main/java/uk/ac/ebi/biosamples/messaging/config/MessageConfig.java index b65b8e3f3c..5234a11284 100644 --- a/core/src/main/java/uk/ac/ebi/biosamples/messaging/config/MessageConfig.java +++ b/core/src/main/java/uk/ac/ebi/biosamples/messaging/config/MessageConfig.java @@ -43,6 +43,13 @@ public Exchange indexingExchange() { .build(); } + @Bean(name = "reindexingExchange") + public Exchange reindexingExchange() { + return ExchangeBuilder.directExchange(MessagingConstants.REINDEXING_EXCHANGE) + .durable(true) + .build(); + } + @Bean(name = "uploadExchange") public Exchange uploadExchange() { return ExchangeBuilder.fanoutExchange(MessagingConstants.UPLOAD_EXCHANGE).durable(true).build(); @@ -60,7 +67,7 @@ public Binding indexBinding() { @Bean(name = "reindexingBinding") public Binding reindexBinding() { return BindingBuilder.bind(reindexingQueue()) - .to(indexingExchange()) + .to(reindexingExchange()) .with(MessagingConstants.REINDEXING_QUEUE) .noargs(); } @@ -72,11 +79,4 @@ public Binding uploadBinding() { .with(MessagingConstants.UPLOAD_QUEUE) .noargs(); } - - // enable messaging in json - // note that this class is not the same as the http MessageConverter class - // @Bean - // public MessageConverter getJackson2MessageConverter() { - // return new Jackson2JsonMessageConverter(); - // } } From 194572e65f188b29166c099749cbb5755d7aaa3f Mon Sep 17 00:00:00 2001 From: dgupta Date: Thu, 5 Feb 2026 14:33:51 +0000 Subject: [PATCH 7/9] Reindex should put to reindex exchange --- .../uk/ac/ebi/biosamples/messaging/config/MessageConfig.java | 4 ++-- .../src/main/java/uk/ac/ebi/biosamples/ReindexRunner.java | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/core/src/main/java/uk/ac/ebi/biosamples/messaging/config/MessageConfig.java b/core/src/main/java/uk/ac/ebi/biosamples/messaging/config/MessageConfig.java index 5234a11284..36d45c4cd4 100644 --- a/core/src/main/java/uk/ac/ebi/biosamples/messaging/config/MessageConfig.java +++ b/core/src/main/java/uk/ac/ebi/biosamples/messaging/config/MessageConfig.java @@ -46,8 +46,8 @@ public Exchange indexingExchange() { @Bean(name = "reindexingExchange") public Exchange reindexingExchange() { return ExchangeBuilder.directExchange(MessagingConstants.REINDEXING_EXCHANGE) - .durable(true) - .build(); + .durable(true) + .build(); } @Bean(name = "uploadExchange") diff --git a/pipelines/reindex/src/main/java/uk/ac/ebi/biosamples/ReindexRunner.java b/pipelines/reindex/src/main/java/uk/ac/ebi/biosamples/ReindexRunner.java index 3d990c1adc..808db642a5 100644 --- a/pipelines/reindex/src/main/java/uk/ac/ebi/biosamples/ReindexRunner.java +++ b/pipelines/reindex/src/main/java/uk/ac/ebi/biosamples/ReindexRunner.java @@ -162,7 +162,7 @@ private boolean fetchSampleAndSendMessage(final boolean isRetry) { try { String json = objectMapper.writeValueAsString(sampleOptional.get()); amqpTemplate.convertAndSend( - MessagingConstants.INDEXING_EXCHANGE, MessagingConstants.REINDEXING_QUEUE, json); + MessagingConstants.REINDEXING_EXCHANGE, MessagingConstants.REINDEXING_QUEUE, json); return true; } catch (final Exception e) { From 325ee6b3b0e6bd97b1c65c25c46009338cda8734 Mon Sep 17 00:00:00 2001 From: dgupta Date: Mon, 9 Feb 2026 12:16:33 +0000 Subject: [PATCH 8/9] structured data filter added --- .../core/model/filter/FilterType.java | 3 +- .../model/filter/StructuredDataFilter.java | 95 +++++++++++++++++++ .../core/service/FilterBuilder.java | 12 ++- .../service/facet/SearchFacetMapper.java | 2 +- .../service/search/SearchFilterMapper.java | 20 +++- 5 files changed, 127 insertions(+), 5 deletions(-) create mode 100644 core/src/main/java/uk/ac/ebi/biosamples/core/model/filter/StructuredDataFilter.java diff --git a/core/src/main/java/uk/ac/ebi/biosamples/core/model/filter/FilterType.java b/core/src/main/java/uk/ac/ebi/biosamples/core/model/filter/FilterType.java index 4a5b0b942d..d709d4508c 100644 --- a/core/src/main/java/uk/ac/ebi/biosamples/core/model/filter/FilterType.java +++ b/core/src/main/java/uk/ac/ebi/biosamples/core/model/filter/FilterType.java @@ -25,7 +25,8 @@ public enum FilterType { WEBINID_FILTER("webinId", AuthenticationFilter.Builder.class), DATE_FILTER("dt", DateRangeFilter.DateRangeFilterBuilder.class), EXTERNAL_REFERENCE_DATA_FILTER("extd", ExternalReferenceDataFilter.Builder.class), - ACCESSION_FILTER("acc", AccessionFilter.Builder.class); + ACCESSION_FILTER("acc", AccessionFilter.Builder.class), + STRUCTURED_DATA_FILTER("strd", StructuredDataFilter.Builder.class); private static final List filterTypesByLength; diff --git a/core/src/main/java/uk/ac/ebi/biosamples/core/model/filter/StructuredDataFilter.java b/core/src/main/java/uk/ac/ebi/biosamples/core/model/filter/StructuredDataFilter.java new file mode 100644 index 0000000000..b960395160 --- /dev/null +++ b/core/src/main/java/uk/ac/ebi/biosamples/core/model/filter/StructuredDataFilter.java @@ -0,0 +1,95 @@ +/* +* Copyright 2021 EMBL - European Bioinformatics Institute +* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this +* file except in compliance with the License. You may obtain a copy of the License at +* http://www.apache.org/licenses/LICENSE-2.0 +* Unless required by applicable law or agreed to in writing, software distributed under the +* License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +* CONDITIONS OF ANY KIND, either express or implied. See the License for the +* specific language governing permissions and limitations under the License. +*/ +package uk.ac.ebi.biosamples.core.model.filter; + +import java.util.Objects; +import java.util.Optional; +import uk.ac.ebi.biosamples.core.model.facet.FacetType; + +public class StructuredDataFilter implements Filter { + + private final String dataType; + + private StructuredDataFilter(final String dataType) { + this.dataType = dataType; + } + + @Override + public FilterType getType() { + return FilterType.STRUCTURED_DATA_FILTER; + } + + @Override + public String getLabel() { + return "structured data"; + } + + @Override + public Optional getContent() { + return Optional.ofNullable(dataType); + } + + @Override + public FacetType getAssociatedFacetType() { + return FacetType.NO_TYPE; + } + + @Override + public String getSerialization() { + final StringBuilder serialization = + new StringBuilder(getType().getSerialization()).append(":").append(getLabel()); + getContent().ifPresent(value -> serialization.append(":").append(value)); + return serialization.toString(); + } + + @Override + public boolean equals(final Object obj) { + if (obj == this) { + return true; + } + if (!(obj instanceof StructuredDataFilter)) { + return false; + } + final StructuredDataFilter other = (StructuredDataFilter) obj; + return Objects.equals(other.dataType, dataType); + } + + @Override + public int hashCode() { + return Objects.hash(dataType); + } + + public static class Builder implements Filter.Builder { + private String dataType; + + public Builder() {} + + public Builder(final String label) { + // Label parameter required by FilterType.getBuilderForLabel() but not used + // since label is always "structured data" + } + + public Builder withDataType(final String dataType) { + this.dataType = dataType; + return this; + } + + @Override + public StructuredDataFilter build() { + return new StructuredDataFilter(dataType); + } + + @Override + public Builder parseContent(final String filterValue) { + return withDataType(filterValue); + } + } +} diff --git a/core/src/main/java/uk/ac/ebi/biosamples/core/service/FilterBuilder.java b/core/src/main/java/uk/ac/ebi/biosamples/core/service/FilterBuilder.java index 33fe4df308..4653e84341 100644 --- a/core/src/main/java/uk/ac/ebi/biosamples/core/service/FilterBuilder.java +++ b/core/src/main/java/uk/ac/ebi/biosamples/core/service/FilterBuilder.java @@ -42,8 +42,8 @@ public DateRangeFilter.DateRangeFilterBuilder onUpdateDate() { return new DateRangeFilter.DateRangeFilterBuilder("update"); } - public AuthenticationFilter.Builder onAuthInfo(final String domain) { - return new AuthenticationFilter.Builder(domain); + public AuthenticationFilter.Builder onAuthInfo(final String authInfo) { + return new AuthenticationFilter.Builder(authInfo); } public NameFilter.Builder onName(final String name) { @@ -59,6 +59,14 @@ public ExternalReferenceDataFilter.Builder onDataFromExternalReference( return new ExternalReferenceDataFilter.Builder(extReference); } + public StructuredDataFilter.Builder onStructuredData() { + return new StructuredDataFilter.Builder(); + } + + public StructuredDataFilter.Builder onStructuredData(final String dataType) { + return new StructuredDataFilter.Builder().withDataType(dataType); + } + public Filter buildFromString(final String serializedFilter) { final FilterType filterType = FilterType.ofFilterString(serializedFilter); final List filterParts = filterParts(serializedFilter); diff --git a/webapps/core/src/main/java/uk/ac/ebi/biosamples/service/facet/SearchFacetMapper.java b/webapps/core/src/main/java/uk/ac/ebi/biosamples/service/facet/SearchFacetMapper.java index 6ae53efaf0..65cda197a6 100644 --- a/webapps/core/src/main/java/uk/ac/ebi/biosamples/service/facet/SearchFacetMapper.java +++ b/webapps/core/src/main/java/uk/ac/ebi/biosamples/service/facet/SearchFacetMapper.java @@ -98,7 +98,7 @@ private static void getSearchFilters( filterBuilder.setExternal(externalRefFilterBuilder); } - // todo SraAccessionFilter, Structured data filter + // todo SraAccessionFilter grpcFilters.add(filterBuilder.build()); } diff --git a/webapps/core/src/main/java/uk/ac/ebi/biosamples/service/search/SearchFilterMapper.java b/webapps/core/src/main/java/uk/ac/ebi/biosamples/service/search/SearchFilterMapper.java index 83f3f4508c..11fe4f887b 100644 --- a/webapps/core/src/main/java/uk/ac/ebi/biosamples/service/search/SearchFilterMapper.java +++ b/webapps/core/src/main/java/uk/ac/ebi/biosamples/service/search/SearchFilterMapper.java @@ -17,6 +17,7 @@ import uk.ac.ebi.biosamples.core.model.filter.ExternalReferenceDataFilter; import uk.ac.ebi.biosamples.core.model.filter.InverseRelationFilter; import uk.ac.ebi.biosamples.core.model.filter.RelationFilter; +import uk.ac.ebi.biosamples.core.model.filter.StructuredDataFilter; import uk.ac.ebi.biosamples.search.grpc.*; public class SearchFilterMapper { @@ -52,14 +53,31 @@ private static void getSearchFilters( } else if (filter instanceof uk.ac.ebi.biosamples.core.model.filter.ExternalReferenceDataFilter f) { getExternalReferenceSearchFilter(f, grpcFilters); + } else if (filter instanceof uk.ac.ebi.biosamples.core.model.filter.StructuredDataFilter f) { + getStructuredDataSearchFilter(f, grpcFilters); } else { - // todo SraAccessionFilter, Structured data filter + // todo SraAccessionFilter throw new RuntimeException("Unsupported filter type " + filter.getClass().getName()); } } filterMap.forEach((k, v) -> grpcFilters.add(v.build())); // allows OR filter for attributes } + private static void getStructuredDataSearchFilter( + StructuredDataFilter f, List grpcFilters) { + f.getContent() + .filter(StringUtils::hasText) + .ifPresent( + dataType -> { + grpcFilters.add( + Filter.newBuilder() + .setStructuredData( + uk.ac.ebi.biosamples.search.grpc.StructuredDataFilter.newBuilder() + .setType(dataType)) + .build()); + }); + } + private static void getAuthSearchFilter(AuthenticationFilter f, List grpcFilters) { f.getContent() .ifPresent( From 527c874ce0f671d35cd2fdcb14bc378e59fb6d46 Mon Sep 17 00:00:00 2001 From: dipayan1985 Date: Thu, 26 Feb 2026 11:27:36 +0000 Subject: [PATCH 9/9] fix the missing to/until for date filter queries --- .../service/facet/SearchFacetMapper.java | 8 ++++++-- .../service/search/SearchFilterMapper.java | 14 ++++++-------- 2 files changed, 12 insertions(+), 10 deletions(-) diff --git a/webapps/core/src/main/java/uk/ac/ebi/biosamples/service/facet/SearchFacetMapper.java b/webapps/core/src/main/java/uk/ac/ebi/biosamples/service/facet/SearchFacetMapper.java index 65cda197a6..4b9daaad1f 100644 --- a/webapps/core/src/main/java/uk/ac/ebi/biosamples/service/facet/SearchFacetMapper.java +++ b/webapps/core/src/main/java/uk/ac/ebi/biosamples/service/facet/SearchFacetMapper.java @@ -67,8 +67,12 @@ private static void getSearchFilters( filterBuilder.setDateRange( DateRangeFilter.newBuilder() .setField(dateField) - .setFrom(dateRange.getFrom().toString()) - .setTo(dateRange.getUntil().toString()))); + .setFrom( + dateRange.isFromMinDate() ? "" : dateRange.getFrom().toString()) + .setTo( + dateRange.isUntilMaxDate() + ? "" + : dateRange.getUntil().toString()))); } if (filter instanceof uk.ac.ebi.biosamples.core.model.filter.AttributeFilter f) { AttributeFilter.Builder attributeFilterBuilder = AttributeFilter.newBuilder(); diff --git a/webapps/core/src/main/java/uk/ac/ebi/biosamples/service/search/SearchFilterMapper.java b/webapps/core/src/main/java/uk/ac/ebi/biosamples/service/search/SearchFilterMapper.java index 11fe4f887b..bbe65c505d 100644 --- a/webapps/core/src/main/java/uk/ac/ebi/biosamples/service/search/SearchFilterMapper.java +++ b/webapps/core/src/main/java/uk/ac/ebi/biosamples/service/search/SearchFilterMapper.java @@ -151,14 +151,12 @@ private static void getDateRangeSearchFilter( default -> throw new IllegalArgumentException("Unknown date field " + f.getLabel()); }; - grpcFilters.add( - Filter.newBuilder() - .setDateRange( - DateRangeFilter.newBuilder() - .setField(dateField) - .setFrom(dateRange.getFrom().toString()) - .setTo(dateRange.getUntil().toString())) - .build()); + var dateRangeBuilder = + DateRangeFilter.newBuilder() + .setField(dateField) + .setFrom(dateRange.isFromMinDate() ? "" : dateRange.getFrom().toString()) + .setTo(dateRange.isUntilMaxDate() ? "" : dateRange.getUntil().toString()); + grpcFilters.add(Filter.newBuilder().setDateRange(dateRangeBuilder.build()).build()); }); }