-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathtest.py
More file actions
42 lines (29 loc) · 1.17 KB
/
test.py
File metadata and controls
42 lines (29 loc) · 1.17 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
from bs4 import BeautifulSoup
import urllib
from datetime import datetime
import re
def convertDate(date):
if re.match('^\d{4}$',date):
return date
elif re.match('\d{4}\/\d{1,2}\/\d{1,2}',date):
newdate=datetime.strptime(date,'%Y/%m/%d').strftime('%Y %b %d')
return newdate
elif re.match('\d{4}\/\d{1,2}',date):
newdate=datetime.strptime(date,'%Y/%m').strftime('%Y %b')
return newdate
else:
return ''
root='https://scholar.google.ca'
soup2 = BeautifulSoup(open("one.html").read(),"lxml")
date = convertDate((soup2.findAll('div',{'class':'gsc_vcd_value'},limit=2)[1]).string)
print date
'''
url1= root + '/citations?view_op=view_citation&hl=en&user=WMhS0lAAAAAJ&citation_for_view=WMhS0lAAAAAJ:_kc_bZDykSQC'
url2= root + '/citations?view_op=view_citation&hl=en&user=WMhS0lAAAAAJ&citation_for_view=WMhS0lAAAAAJ:TFP_iSt0sucC'
url3= root + '/citations?view_op=view_citation&hl=en&user=WMhS0lAAAAAJ&cstart=20&pagesize=80&citation_for_view=WMhS0lAAAAAJ:kNdYIx-mwKoC'
url4= root + '/citations?view_op=view_citation&hl=en&user=WMhS0lAAAAAJ&cstart=20&pagesize=80&citation_for_view=WMhS0lAAAAAJ:3fE2CSJIrl8C'
printDate(url1)
printDate(url2)
printDate(url3)
printDate(url4)
'''