From 1de4584f3f3ad5bcb7fe246d26d8c56aa86821a1 Mon Sep 17 00:00:00 2001 From: Mark Patton Date: Tue, 24 Feb 2026 11:16:12 -0500 Subject: [PATCH 1/2] Update journal loader to understand new format of the PMC journal list. --- .../loader/journal/nih/NihTypeAReader.java | 52 ++++++++++--------- .../src/test/resources/data.csv | 5 +- 2 files changed, 30 insertions(+), 27 deletions(-) diff --git a/pass-journal-loader/pass-journal-loader-nih/src/main/java/org/eclipse/pass/loader/journal/nih/NihTypeAReader.java b/pass-journal-loader/pass-journal-loader-nih/src/main/java/org/eclipse/pass/loader/journal/nih/NihTypeAReader.java index 6a42f46f4..8a8621e9e 100644 --- a/pass-journal-loader/pass-journal-loader-nih/src/main/java/org/eclipse/pass/loader/journal/nih/NihTypeAReader.java +++ b/pass-journal-loader/pass-journal-loader-nih/src/main/java/org/eclipse/pass/loader/journal/nih/NihTypeAReader.java @@ -16,8 +16,6 @@ package org.eclipse.pass.loader.journal.nih; -import static java.util.stream.StreamSupport.stream; - import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; @@ -34,50 +32,55 @@ import org.slf4j.LoggerFactory; /** - * Reads the NIH type A participation .csv file + * Reads the NIH type A participation .csv file. + * *

- * See also: http://www.ncbi.nlm.nih.gov/pmc/front-page/NIH_PA_journal_list.csv + * See the spreadsheet definition at https://www.nlm.nih.gov/pubs/techbull/mj24/mj24_pmc_preview_journal_list.html + * and the spreadsheet at https://cdn.ncbi.nlm.nih.gov/pmc/home/jlist.csv. *

* * @author apb@jhu.edu */ public class NihTypeAReader implements JournalReader { + private static final String[] header = new String[] { + "Journal Title", + "NLM Title Abbreviation (TA)", + "Publisher", + "ISSN (print)", + "ISSN (online)", + "NLM Unique ID", + "Most Recent", + "Earliest", + "Release Delay (Embargo)", + "Agreement Status", + "Agreement to Deposit", + "Journal Note", + "PMC URL" + }; private static final Logger LOG = LoggerFactory.getLogger(NihTypeAReader.class); private Stream readJournals(Reader csv) throws IOException { + CSVFormat csvFormat = CSVFormat.RFC4180.builder().setHeader(header).setSkipHeaderRecord(true).build(); - return stream(CSVFormat.RFC4180.parse(csv).spliterator(), false) - .map(NihTypeAReader::toJournal) - .filter(Objects::nonNull); + return csvFormat.parse(csv).stream().map(NihTypeAReader::toJournal).filter(Objects::nonNull); } private static Journal toJournal(final CSVRecord record) { - LOG.debug("Parsing CSV record.."); - //final Journal j = new PMCSource(); final Journal j = new Journal(); try { - j.setJournalName(record.get(0)); j.setNlmta(record.get(1)); - // columns 2, 3 are issns. column 2 is type "Print" and 3 is type "Online" - // see https://publicaccess.nih.gov/testsite/field_definitions.htm - addIssnIfPresent(j, record.get(2), "Print"); - addIssnIfPresent(j, record.get(3), "Online"); + addIssnIfPresent(j, record.get(3), "Print"); + addIssnIfPresent(j, record.get(4), "Online"); - // 4 is start date (we don't care) - // 5 is end date (if ended, then it's not active) - String endDate = null; - if (record.size() > 5) { //csv file may lack trailing comma if this field is empty - endDate = record.get(5); - } - final boolean isActive = (endDate == null || endDate.trim().equals("")); + String status = record.get(9).strip(); - if (isActive) { + if (status.equals("Active")) { j.setPmcParticipation(PmcParticipation.A); } @@ -90,7 +93,7 @@ private static Journal toJournal(final CSVRecord record) { } private static void addIssnIfPresent(Journal journal, String issn, String type) { - if (issn != null && !issn.trim().equals("")) { + if (issn != null && !issn.strip().equals("")) { journal.getIssns().add(String.join(":", type, issn)); } } @@ -108,5 +111,4 @@ public Stream readJournals(InputStream source, Charset charset) { public boolean hasPmcParticipation() { return true; } - -} +} \ No newline at end of file diff --git a/pass-journal-loader/pass-journal-loader-nih/src/test/resources/data.csv b/pass-journal-loader/pass-journal-loader-nih/src/test/resources/data.csv index 2312895d7..3593cb7ee 100644 --- a/pass-journal-loader/pass-journal-loader-nih/src/test/resources/data.csv +++ b/pass-journal-loader/pass-journal-loader-nih/src/test/resources/data.csv @@ -1,2 +1,3 @@ -Journal 1,j1,2190-572X,2190-5738,Jul 2011, -Journal 2,j2,,1550-7416,Sep 2004,Feb 2016 +Journal Title,NLM Title Abbreviation (TA),Publisher,ISSN (print),ISSN (online),NLM Unique ID,Most Recent,Earliest,Release Delay (Embargo),Agreement Status,Agreement to Deposit,Journal Note,PMC URL +Journal 1,j1,publisher,2190-572X,2190-5738,111111111,v.12(3) 2026,v.1(1) 2011,12 months,Active,All articles,,"http://exmample.com/blah" +Journal 2,j2,publisher,,1550-7416,000000000,v.16(3) 2020,v.1(1) 20009,10 months,No longer participating,All articles,,"http://example.com/blah2" From 99a50646204d132a310cf875fa13ca341852c1f9 Mon Sep 17 00:00:00 2001 From: Mark Patton Date: Wed, 25 Feb 2026 09:36:43 -0500 Subject: [PATCH 2/2] Update journal loader IT for new NIH journal list format --- .../java/org/eclipse/pass/loader/journal/nih/DepositIT.java | 4 ++-- .../pass-journal-loader-nih/src/test/resources/pmc-1.csv | 5 +++-- .../pass-journal-loader-nih/src/test/resources/pmc-2.csv | 5 +++-- 3 files changed, 8 insertions(+), 6 deletions(-) diff --git a/pass-journal-loader/pass-journal-loader-nih/src/test/java/org/eclipse/pass/loader/journal/nih/DepositIT.java b/pass-journal-loader/pass-journal-loader-nih/src/test/java/org/eclipse/pass/loader/journal/nih/DepositIT.java index 5dd9a3f16..6f6f14e41 100644 --- a/pass-journal-loader/pass-journal-loader-nih/src/test/java/org/eclipse/pass/loader/journal/nih/DepositIT.java +++ b/pass-journal-loader/pass-journal-loader-nih/src/test/java/org/eclipse/pass/loader/journal/nih/DepositIT.java @@ -111,7 +111,7 @@ public void loadFromFileTest(WireMockRuntimeInfo wmRuntimeInfo) throws Exception System.setProperty("pmc", ""); Main.main(new String[] {}); - // We expect three journals, but no PMC A journals + // We expect 4 journals, but no PMC A journals assertEquals(4, listJournals().size()); assertEquals(0, typeA(listJournals()).size()); @@ -119,7 +119,7 @@ public void loadFromFileTest(WireMockRuntimeInfo wmRuntimeInfo) throws Exception System.setProperty("pmc", "http://localhost:" + wmPort + "/pmc/front-page/NIH_PA_journal_list-1.csv"); Main.main(new String[] {}); - // We still expect three journals in the repository, but now two are PMC A + // We still expect 4 journals in the repository, but now two are PMC A assertEquals(4, listJournals().size()); assertEquals(2, typeA(listJournals()).size()); diff --git a/pass-journal-loader/pass-journal-loader-nih/src/test/resources/pmc-1.csv b/pass-journal-loader/pass-journal-loader-nih/src/test/resources/pmc-1.csv index d1c8d0ce8..6894d9057 100644 --- a/pass-journal-loader/pass-journal-loader-nih/src/test/resources/pmc-1.csv +++ b/pass-journal-loader/pass-journal-loader-nih/src/test/resources/pmc-1.csv @@ -1,2 +1,3 @@ -Journal 1,1jr,0000-0001,000-001X,Jul 2011, -Journal 2,2jr,,0000-0002,Sep 2004, \ No newline at end of file +Journal Title,NLM Title Abbreviation (TA),Publisher,ISSN (print),ISSN (online),NLM Unique ID,Most Recent,Earliest,Release Delay (Embargo),Agreement Status,Agreement to Deposit,Journal Note,PMC URL +Journal 1,1jr,publisher,0000-0001,000-001X,111111111,v.12(3) 2026,v.1(1) 2011,12 months,Active,All articles,,"http://exmample.com/blah" +Journal 2,2jr,publisher,,0000-0002,000000000,v.16(3) 2020,v.1(1) 20009,10 months,Active,All articles,,"http://example.com/blah2" \ No newline at end of file diff --git a/pass-journal-loader/pass-journal-loader-nih/src/test/resources/pmc-2.csv b/pass-journal-loader/pass-journal-loader-nih/src/test/resources/pmc-2.csv index 6fb8eba9d..03fdfbb77 100644 --- a/pass-journal-loader/pass-journal-loader-nih/src/test/resources/pmc-2.csv +++ b/pass-journal-loader/pass-journal-loader-nih/src/test/resources/pmc-2.csv @@ -1,2 +1,3 @@ -Journal 1,1jr,0000-0001,000-001X,Jul 2011, -Journal 2,2jr,,0000-0002,Sep 2004,Feb 2016 \ No newline at end of file +Journal Title,NLM Title Abbreviation (TA),Publisher,ISSN (print),ISSN (online),NLM Unique ID,Most Recent,Earliest,Release Delay (Embargo),Agreement Status,Agreement to Deposit,Journal Note,PMC URL +Journal 1,1jr,publisher,0000-0001,000-001X,111111111,v.12(3) 2026,v.1(1) 2011,12 months,Active,All articles,,"http://exmample.com/blah" +Journal 2,2jr,publisher,,0000-0002,000000000,v.16(3) 2020,v.1(1) 20009,10 months,No longer participating,All articles,,"http://example.com/blah2"