-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy path1_GetData.py
More file actions
68 lines (50 loc) · 2.01 KB
/
1_GetData.py
File metadata and controls
68 lines (50 loc) · 2.01 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
import subprocess
import sys
import matplotlib.pyplot as plt
import pandas as pd
import pathlib
import os
def checkForPackages(package):
reqs = subprocess.check_output([sys.executable, '-m', 'pip', 'freeze'])
installed_packages = [r.decode().split('==')[0].lower() for r in reqs.split()]
if not package in installed_packages:
print("Installing", package, "...")
subprocess.check_call([sys.executable, "-m", "pip", "install", package])
try:
import kaggle
except:
checkForPackages('kaggle')
# kaggle competitions download -c demand-forecasting-kernels-only -f test.csv.7z
from kaggle.api.kaggle_api_extended import KaggleApi
dirPath = str(pathlib.Path().resolve())
dataPath = dirPath + "/data/"
api = KaggleApi()
api.authenticate()
api.competition_download_file('demand-forecasting-kernels-only','train.csv', path=dataPath)
# def getDataPerZip(zip):
# countyData = ql.get_table("ZILLOW/REGIONS", region_type='zip', paginate=True)
# regions = countyData[countyData.region.str.contains(zip)].region_id.unique()
# if len(regions) == 1:
# priceData = ql.get_table("ZILLOW/DATA",
# indicator_id='ZCON',
# region_id=regions[0],
# paginate=True)
# else:
# print("Multiple regions selected")
# priceData = ql.get_table("ZILLOW/DATA",
# indicator_id='ZCON',
# paginate=True)
# priceData = priceData.filter(priceData.region_id.str.isin(regions))
# return priceData
# def filterDates(df):
# df.date = pd.to_datetime(df.date, format='%Y-%m-%d')
# df2 = df[(df.date >= '2016-01-01') & (df.date < '2020-01-01')]
# return df2
# ql.ApiConfig.api_key = "3shXhW8vy7tPPKoazXwb"
# df = getDataPerZip('10023')
# df = filterDates(df)
# dirpath = str(pathlib.Path().resolve()) + "/data/"
# if not os.path.exists(dirpath):
# os.mkdir(dirpath)
# path = dirpath + 'raw.csv'
# df.to_csv(path)