-
Notifications
You must be signed in to change notification settings - Fork 6
Expand file tree
/
Copy pathstartCrawling.py
More file actions
57 lines (51 loc) · 1.64 KB
/
startCrawling.py
File metadata and controls
57 lines (51 loc) · 1.64 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
"""
Author: TH
Date: 19/05/2016
Login to Seeking Alpha, use simplespider_session to collect urls on the page, use collectArticle to collect
the articles associated with the urls and use insertDB to insert into database.
"""
from login import loginSA
from simplespider_session import collectFromTicker
from collectArticle import collectArticle
from insertDB import insertDB
import tickers
import tickers_all_NASDAQ
def collectFromOnePage(ticker, page):
session = loginSA()[1]
res = collectFromTicker(session,ticker,str(page))
print(ticker, ' ',str(page))
file = open("NotCollected.txt","a")
for a in res:
#print(a["linkTxt"].replace(u"\u2018", "'").replace(u"\u2019", "'"))
try:
resDB = insertDB(session, a["linkAdr"])
if resDB != "success":
file.write(resDB+'\n')
except Exception as e:
print("isertDB error, ",e)
file.close()
def collectFromSnP500(tickers):
#session = loginSA()[1]
for ticker in tickers:
session = loginSA()[1]
for page in range(1, 11):
res = collectFromTicker(session,ticker,str(page))
print(ticker, ' ',str(page))
file = open("NotCollected.txt","a")
for a in res:
#print(a["linkTxt"].replace(u"\u2018", "'").replace(u"\u2019", "'"))
try:
#print(a["linkAdr"])
resDB = insertDB(session, a["linkAdr"])
if resDB != "success":
file.write(resDB+'\n')
except Exception as e:
print("isertDB error, ",e)
file.close()
if __name__ == "__main__":
#collectFromOnePage('AAPl',3)
# When we finish the snp500 tickers, we move on to all NASDAQ tickers
#tickers = tickers.tickers
# Finish from tickers_1, tickers_2, tickers_3
tickers = tickers.tickers
collectFromSnP500(tickers)