-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathcleanBib.py
More file actions
78 lines (63 loc) · 1.93 KB
/
cleanBib.py
File metadata and controls
78 lines (63 loc) · 1.93 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
import numpy as np
import re
from collections import Counter
import argparse
def main(infile,outfile, verbose=True):
infile = infile
eprints=[]
if verbose:
print('Using file:', infile)
w = open(infile, mode='r')
lines = w.readlines()
w.close()
if verbose:
print('Reading...')
temp=[]
for line in lines:
if 'eprint' in line:
#eprint = line.strip('\n').strip(' ').strip(',').strip('}').strip('{').strip('eprint = {')
temp.append(line)
eprints = [x.split('= {')[1].split('}')[0] for x in temp]
if verbose:
print('Read!')
counts = Counter(eprints)
delcount=0
for item, key in counts.items():
if key>1:
for ii in range(key-1):
lines = delete(item, lines)
delcount+=1
if verbose:
print('Deleted ' + str(delcount)+' duplicates.')
if verbose:
print('writing output file to:', outfile)
out = open(outfile, mode='w')
out.writelines(lines)
out.close()
def delete(item, lines):
indxs = [i for i, x in enumerate(lines) if 'eprint' in x and x.split('= {')[1].split('}')[0]==item]
i = indxs[0]
j=i
indx0=-1
while indx0==-1:
if lines[j].strip(' ').startswith('@ARTICLE{'):
begin=True
indx0 = j
else:
j = j-1
k=i
indxn=-1
while indxn==-1:
if lines[k]=='}\n':
begin=True
indxn = k
else:
k=k+1
lines = lines[:indx0] + lines[indxn+1:]
return lines
parser = argparse.ArgumentParser(description='Delete duplicates from the bib file')
parser.add_argument('infile', nargs='?',default='./in.bib')
parser.add_argument('outfile', nargs='?',default='./out.bib')
parser.add_argument('verbose', nargs='?',default=True)
args = parser.parse_args()
main(infile=args.infile, outfile=args.outfile, verbose=args.verbose)