-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathDataManager.py
More file actions
92 lines (76 loc) · 3.21 KB
/
DataManager.py
File metadata and controls
92 lines (76 loc) · 3.21 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
import os
import numpy as np
import pandas as pd
from tqdm import tqdm
from lib.collector import stock_list_down, candlestick_chart_down
from setting import retry_count, stock_list_path, log_path, data_path_root
class DataManager:
def __init__(self, stock_list_path=stock_list_path):
if not os.path.exists(stock_list_path):
stock_list_down()
self.info = pd.read_csv(stock_list_path, dtype=str)
self.data_df = None
self.data_dict = None
def load_data(self, start_date=20050101):
"""method to load all existing stock data into memory, starting from start_date.
as self.data_df and self.data_dict. df contains all data in 1 df; dict key is ts_code, value df"""
assert len(str(start_date)) == 8, "please enter yyyymmdd format date"
if not os.path.exists(data_path_root):
print("data path not detected, creating")
os.mkdir(data_path_root)
data_list = os.listdir(data_path_root)
try:
data_list.remove("stock_list.csv")
except:
pass
if len(data_list) < 1:
print("no data found! please download with downloader_start method!")
else:
data_paths = [os.path.join(data_path_root, i) for i in data_list]
print("loading data...")
dfs = []
self.data_dict = {}
for i in tqdm(range(len(data_paths))):
temp = pd.read_csv(data_paths[i])
temp.drop(temp.loc[temp["trade_date"].astype(np.int) < int(start_date)].index, inplace=True)
dfs.append(temp)
self.data_dict[data_paths[i][-13:-4]] = temp
self.data_df = pd.concat(dfs, axis=0)
del dfs
print("{} data loaded!".format(len(data_paths)))
def downloader_start(self):
"""downloading all stock data to data\, able to update existing files"""
stock_list_down()
self.info = pd.read_csv(stock_list_path, dtype=str)
log = self._down(self.info, self.info.ts_code)
if len(log) == 0:
print("download success!")
else:
print("there are {} failures, as follows: {}".format(len(log), log))
print("retrying download")
log = self._down(self.info, log)
if len(log) == 0:
print("download success!")
else:
print("there are still {} failures, as follows: {}, saving log to {}".format(len(log), log, log_path))
err = pd.DataFrame(log, columns=["Failed file"])
err.to_csv(os.path.join(log_path, "download_error.csv"))
print("download finished!")
@staticmethod
def _down(info, iterator):
"""utility download func for downloader_start method"""
log = []
for c in tqdm(iterator):
flag = "err"
counter = 0
while flag == "err":
flag = candlestick_chart_down(c, info)
counter += 1
if (counter > retry_count) and (flag == "err"):
log.append(c)
break
return log
if __name__ == "__main__":
dm = DataManager()
dm.downloader_start()
# dm.load_data()