Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,6 @@

package org.eclipse.pass.loader.journal.nih;

import static java.util.stream.StreamSupport.stream;

import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
Expand All @@ -34,50 +32,55 @@
import org.slf4j.LoggerFactory;

/**
* Reads the NIH type A participation .csv file
* Reads the NIH type A participation .csv file.
*
* <p>
* See also: http://www.ncbi.nlm.nih.gov/pmc/front-page/NIH_PA_journal_list.csv
* See the spreadsheet definition at https://www.nlm.nih.gov/pubs/techbull/mj24/mj24_pmc_preview_journal_list.html
* and the spreadsheet at https://cdn.ncbi.nlm.nih.gov/pmc/home/jlist.csv.
* </p>
*
* @author apb@jhu.edu
*/
public class NihTypeAReader implements JournalReader {
private static final String[] header = new String[] {
"Journal Title",
"NLM Title Abbreviation (TA)",
"Publisher",
"ISSN (print)",
"ISSN (online)",
"NLM Unique ID",
"Most Recent",
"Earliest",
"Release Delay (Embargo)",
"Agreement Status",
"Agreement to Deposit",
"Journal Note",
"PMC URL"
};

private static final Logger LOG = LoggerFactory.getLogger(NihTypeAReader.class);

private Stream<Journal> readJournals(Reader csv) throws IOException {
CSVFormat csvFormat = CSVFormat.RFC4180.builder().setHeader(header).setSkipHeaderRecord(true).build();

return stream(CSVFormat.RFC4180.parse(csv).spliterator(), false)
.map(NihTypeAReader::toJournal)
.filter(Objects::nonNull);
return csvFormat.parse(csv).stream().map(NihTypeAReader::toJournal).filter(Objects::nonNull);
}

private static Journal toJournal(final CSVRecord record) {

LOG.debug("Parsing CSV record..");

//final Journal j = new PMCSource();
final Journal j = new Journal();

try {

j.setJournalName(record.get(0));
j.setNlmta(record.get(1));

// columns 2, 3 are issns. column 2 is type "Print" and 3 is type "Online"
// see https://publicaccess.nih.gov/testsite/field_definitions.htm
addIssnIfPresent(j, record.get(2), "Print");
addIssnIfPresent(j, record.get(3), "Online");
addIssnIfPresent(j, record.get(3), "Print");
addIssnIfPresent(j, record.get(4), "Online");

// 4 is start date (we don't care)
// 5 is end date (if ended, then it's not active)
String endDate = null;
if (record.size() > 5) { //csv file may lack trailing comma if this field is empty
endDate = record.get(5);
}
final boolean isActive = (endDate == null || endDate.trim().equals(""));
String status = record.get(9).strip();

if (isActive) {
if (status.equals("Active")) {
j.setPmcParticipation(PmcParticipation.A);
}

Expand All @@ -90,7 +93,7 @@ private static Journal toJournal(final CSVRecord record) {
}

private static void addIssnIfPresent(Journal journal, String issn, String type) {
if (issn != null && !issn.trim().equals("")) {
if (issn != null && !issn.strip().equals("")) {
journal.getIssns().add(String.join(":", type, issn));
}
}
Expand All @@ -108,5 +111,4 @@ public Stream<Journal> readJournals(InputStream source, Charset charset) {
public boolean hasPmcParticipation() {
return true;
}

}
}
Original file line number Diff line number Diff line change
Expand Up @@ -111,15 +111,15 @@ public void loadFromFileTest(WireMockRuntimeInfo wmRuntimeInfo) throws Exception
System.setProperty("pmc", "");
Main.main(new String[] {});

// We expect three journals, but no PMC A journals
// We expect 4 journals, but no PMC A journals
assertEquals(4, listJournals().size());
assertEquals(0, typeA(listJournals()).size());

System.setProperty("medline", "");
System.setProperty("pmc", "http://localhost:" + wmPort + "/pmc/front-page/NIH_PA_journal_list-1.csv");
Main.main(new String[] {});

// We still expect three journals in the repository, but now two are PMC A
// We still expect 4 journals in the repository, but now two are PMC A
assertEquals(4, listJournals().size());
assertEquals(2, typeA(listJournals()).size());

Expand Down
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
Journal 1,j1,2190-572X,2190-5738,Jul 2011,
Journal 2,j2,,1550-7416,Sep 2004,Feb 2016
Journal Title,NLM Title Abbreviation (TA),Publisher,ISSN (print),ISSN (online),NLM Unique ID,Most Recent,Earliest,Release Delay (Embargo),Agreement Status,Agreement to Deposit,Journal Note,PMC URL
Journal 1,j1,publisher,2190-572X,2190-5738,111111111,v.12(3) 2026,v.1(1) 2011,12 months,Active,All articles,,"http://exmample.com/blah"
Journal 2,j2,publisher,,1550-7416,000000000,v.16(3) 2020,v.1(1) 20009,10 months,No longer participating,All articles,,"http://example.com/blah2"
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
Journal 1,1jr,0000-0001,000-001X,Jul 2011,
Journal 2,2jr,,0000-0002,Sep 2004,
Journal Title,NLM Title Abbreviation (TA),Publisher,ISSN (print),ISSN (online),NLM Unique ID,Most Recent,Earliest,Release Delay (Embargo),Agreement Status,Agreement to Deposit,Journal Note,PMC URL
Journal 1,1jr,publisher,0000-0001,000-001X,111111111,v.12(3) 2026,v.1(1) 2011,12 months,Active,All articles,,"http://exmample.com/blah"
Journal 2,2jr,publisher,,0000-0002,000000000,v.16(3) 2020,v.1(1) 20009,10 months,Active,All articles,,"http://example.com/blah2"
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
Journal 1,1jr,0000-0001,000-001X,Jul 2011,
Journal 2,2jr,,0000-0002,Sep 2004,Feb 2016
Journal Title,NLM Title Abbreviation (TA),Publisher,ISSN (print),ISSN (online),NLM Unique ID,Most Recent,Earliest,Release Delay (Embargo),Agreement Status,Agreement to Deposit,Journal Note,PMC URL
Journal 1,1jr,publisher,0000-0001,000-001X,111111111,v.12(3) 2026,v.1(1) 2011,12 months,Active,All articles,,"http://exmample.com/blah"
Journal 2,2jr,publisher,,0000-0002,000000000,v.16(3) 2020,v.1(1) 20009,10 months,No longer participating,All articles,,"http://example.com/blah2"