Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 12 additions & 2 deletions q01_load_data/build.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,19 @@
# %load q01_load_data/build.py
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split

path = 'data/perrin-freres-monthly-champagne.csv
path = 'data/perrin-freres-monthly-champagne.csv'

"write your solution here"
# 'write your solution here'

def q01_load_data(path):
df = pd.read_csv(path)
df.reset_index(drop=True, inplace=True)
return df

# q01_load_data(path)




16 changes: 15 additions & 1 deletion q02_data_splitter/build.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
# %load q02_data_splitter/build.py
import pandas as pd
import numpy as np
import datetime
Expand All @@ -6,4 +7,17 @@
#sys.path.append('./')
from greyatomlib.time_series_101_project.q01_load_data.build import q01_load_data

'''Write your solution here'''
path = 'data/perrin-freres-monthly-champagne.csv'

# '''Write your solution here'''

def q02_data_splitter(path):
df = q01_load_data(path)
df['Month'] = pd.to_datetime(df['Month'])
split_date = pd.datetime(1971,10,1)
X_train = df[df['Month'] < split_date]
X_valid = df[df['Month'] >=split_date]
return X_train,X_valid



Binary file added q02_data_splitter/tests/test_sol.pkl
Binary file not shown.
Binary file added q02_data_splitter/tests/user_sol.pkl
Binary file not shown.
25 changes: 24 additions & 1 deletion q03_stacked_point_plot/build.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
# %load q03_stacked_point_plot/build.py
import pandas as pd
import numpy as np
#import sys
Expand All @@ -8,5 +9,27 @@
plt.switch_backend('agg')
import seaborn as sns

#"write your solution here"
#'write your solution here'

path = 'data/perrin-freres-monthly-champagne.csv'

x_column_name='month'
y_column_name='Sales'
hue='year'
order_of_the_axis=['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug','Sep', 'Oct', 'Nov', 'Dec']

def q03_stacked_point_plot(path,x_column_name='month',y_column_name='Sales',hue='year',order_of_the_axis=['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug','Sep', 'Oct', 'Nov', 'Dec']):
X_train,X_valid = q02_data_splitter(path)
X_train['year'] = X_train['Month'].dt.year
X_train['month'] = X_train['Month'].dt.strftime('%b')
plt.figure(figsize=(16, 7))
sns.pointplot(x=x_column_name, y=y_column_name, hue=hue, data=X_train, x_order=order_of_the_axis)
plt.xlabel('month')
plt.ylabel('sales')
plt.legend(title='year',loc='upper left')
plt.show()

# q03_stacked_point_plot(path,x_column_name,y_column_name,hue,order_of_the_axis)



Binary file added q03_stacked_point_plot/tests/test_sol.pkl
Binary file not shown.
Binary file added q03_stacked_point_plot/tests/user_sol.pkl
Binary file not shown.
27 changes: 26 additions & 1 deletion q04_boxplot/build.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
# %load q04_boxplot/build.py
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
Expand All @@ -9,6 +10,30 @@
# sys.path.append('./')
from greyatomlib.time_series_101_project.q02_data_splitter.build import q02_data_splitter

"write your solution here"
'write your solution here'

path = 'data/perrin-freres-monthly-champagne.csv'
x ='month'
y ='Sales'
kind = 'box'
order = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug','Sep', 'Oct', 'Nov', 'Dec']
size = 8

def q04_boxplot(path,x='month',y='Sales',kind='box',
order=['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug','Sep', 'Oct', 'Nov', 'Dec'],size=8):

X_train,X_valid = q02_data_splitter(path)
X_train['month'] = X_train['Month'].dt.strftime('%b')
X_train['year'] = X_train['Month'].dt.year
plt.figure(figsize=(16,7))
sns.factorplot(x=x,y=y,data=X_train,kind=kind,order=order,size=size)
plt.xlabel('Month')
plt.ylabel('Sales')
plt.title('Monthly Sales')
plt.show()

# q04_boxplot(path,x,y,kind,order,size)




Binary file added q04_boxplot/tests/test_sol.pkl
Binary file not shown.
Binary file added q04_boxplot/tests/user_sol.pkl
Binary file not shown.
28 changes: 26 additions & 2 deletions q05_sarima_model/build.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,38 @@
# %load q05_sarima_model/build.py
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
import seaborn as sns
import matplotlib.pyplot as plt
# import sys
# sys.path.append('./')

path = 'data/perrin-freres-monthly-champagne.csv'

from greyatomlib.time_series_101_project.q02_data_splitter.build import q02_data_splitter
train, validation = q02_data_splitter(path)

#train, validation = q02_data_splitter(path)

'write your solution here'

def q05_sarima_model(path):

train, validation = q02_data_splitter(path)
train = pd.DataFrame(train)
validation = pd.DataFrame(validation)

"write your solution here"
tss = pd.DataFrame(train['Sales'])
tss.column = ['Sales']
tss.index = train['Month'].values

tss_valid = pd.DataFrame(validation['Sales'])
tss_valid.column = ['Sales']
tss_valid.index = validation['Month'].values

return tss, tss_valid

# q05_sarima_model(path)




Binary file added q05_sarima_model/tests/test_sol.pkl
Binary file not shown.
Binary file added q05_sarima_model/tests/user_sol.pkl
Binary file not shown.
25 changes: 22 additions & 3 deletions q06_sarima_predictor/build.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
# %load q06_sarima_predictor/build.py
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
Expand All @@ -7,10 +8,28 @@
from statsmodels.tsa.statespace import sarimax
import math
from sklearn.metrics import mean_squared_error
#import sys
#sys.path.append('./')
from greyatomlib.time_series_101_project.q05_sarima_model.build import q05_sarima_model
path = 'data/perrin-freres-monthly-champagne.csv'
tss, tss_valid = q05_sarima_model(path)

"write your solution here"
def q06_sarima_predictor(path):
mod = sarimax.SARIMAX(tss['Sales'], order=(5, 1, 1), seasonal_order=(1, 1, 0, 12)).fit()
plt.figure(figsize=(16, 7))
plt.plot(tss.index, tss.Sales, color='lightblue')
plt.plot(tss_valid.index, tss_valid.Sales.values, color='green')

## Forecasting

pred = pd.DataFrame(mod.forecast(len(tss_valid)))
pred.columns = ['yhat']
pred.index = tss_valid.index

measure = math.pow(mean_squared_error(tss_valid.values, pred.values), 0.5)

return pred, measure

# q06_sarima_predictor(path)




Binary file added q06_sarima_predictor/tests/test_sol.pkl
Binary file not shown.
Binary file added q06_sarima_predictor/tests/user_sol.pkl
Binary file not shown.
Binary file added test_sol.pkl
Binary file not shown.
Binary file added user_sol.pkl
Binary file not shown.