diff --git a/q01_load_data/build.py b/q01_load_data/build.py index 48147b4..ba849b6 100644 --- a/q01_load_data/build.py +++ b/q01_load_data/build.py @@ -1,9 +1,19 @@ +# %load q01_load_data/build.py import pandas as pd import numpy as np from sklearn.model_selection import train_test_split -path = 'data/perrin-freres-monthly-champagne.csv +path = 'data/perrin-freres-monthly-champagne.csv' -"write your solution here" +# 'write your solution here' +def q01_load_data(path): + df = pd.read_csv(path) + df.reset_index(drop=True, inplace=True) + return df + +# q01_load_data(path) + + + diff --git a/q02_data_splitter/build.py b/q02_data_splitter/build.py index 16db3ca..8996873 100644 --- a/q02_data_splitter/build.py +++ b/q02_data_splitter/build.py @@ -1,3 +1,4 @@ +# %load q02_data_splitter/build.py import pandas as pd import numpy as np import datetime @@ -6,4 +7,17 @@ #sys.path.append('./') from greyatomlib.time_series_101_project.q01_load_data.build import q01_load_data -'''Write your solution here''' +path = 'data/perrin-freres-monthly-champagne.csv' + +# '''Write your solution here''' + +def q02_data_splitter(path): + df = q01_load_data(path) + df['Month'] = pd.to_datetime(df['Month']) + split_date = pd.datetime(1971,10,1) + X_train = df[df['Month'] < split_date] + X_valid = df[df['Month'] >=split_date] + return X_train,X_valid + + + diff --git a/q02_data_splitter/tests/test_sol.pkl b/q02_data_splitter/tests/test_sol.pkl new file mode 100644 index 0000000..35a0129 Binary files /dev/null and b/q02_data_splitter/tests/test_sol.pkl differ diff --git a/q02_data_splitter/tests/user_sol.pkl b/q02_data_splitter/tests/user_sol.pkl new file mode 100644 index 0000000..8db3730 Binary files /dev/null and b/q02_data_splitter/tests/user_sol.pkl differ diff --git a/q03_stacked_point_plot/build.py b/q03_stacked_point_plot/build.py index b77138c..85fa290 100644 --- a/q03_stacked_point_plot/build.py +++ b/q03_stacked_point_plot/build.py @@ -1,3 +1,4 @@ +# %load q03_stacked_point_plot/build.py import pandas as pd import numpy as np #import sys @@ -8,5 +9,27 @@ plt.switch_backend('agg') import seaborn as sns -#"write your solution here" +#'write your solution here' +path = 'data/perrin-freres-monthly-champagne.csv' + +x_column_name='month' +y_column_name='Sales' +hue='year' +order_of_the_axis=['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug','Sep', 'Oct', 'Nov', 'Dec'] + +def q03_stacked_point_plot(path,x_column_name='month',y_column_name='Sales',hue='year',order_of_the_axis=['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug','Sep', 'Oct', 'Nov', 'Dec']): + X_train,X_valid = q02_data_splitter(path) + X_train['year'] = X_train['Month'].dt.year + X_train['month'] = X_train['Month'].dt.strftime('%b') + plt.figure(figsize=(16, 7)) + sns.pointplot(x=x_column_name, y=y_column_name, hue=hue, data=X_train, x_order=order_of_the_axis) + plt.xlabel('month') + plt.ylabel('sales') + plt.legend(title='year',loc='upper left') + plt.show() + +# q03_stacked_point_plot(path,x_column_name,y_column_name,hue,order_of_the_axis) + + + diff --git a/q03_stacked_point_plot/tests/test_sol.pkl b/q03_stacked_point_plot/tests/test_sol.pkl new file mode 100644 index 0000000..22e5e2f Binary files /dev/null and b/q03_stacked_point_plot/tests/test_sol.pkl differ diff --git a/q03_stacked_point_plot/tests/user_sol.pkl b/q03_stacked_point_plot/tests/user_sol.pkl new file mode 100644 index 0000000..abdc454 Binary files /dev/null and b/q03_stacked_point_plot/tests/user_sol.pkl differ diff --git a/q04_boxplot/build.py b/q04_boxplot/build.py index 6d90712..74644f5 100644 --- a/q04_boxplot/build.py +++ b/q04_boxplot/build.py @@ -1,3 +1,4 @@ +# %load q04_boxplot/build.py import pandas as pd import numpy as np from sklearn.model_selection import train_test_split @@ -9,6 +10,30 @@ # sys.path.append('./') from greyatomlib.time_series_101_project.q02_data_splitter.build import q02_data_splitter -"write your solution here" +'write your solution here' +path = 'data/perrin-freres-monthly-champagne.csv' +x ='month' +y ='Sales' +kind = 'box' +order = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug','Sep', 'Oct', 'Nov', 'Dec'] +size = 8 + +def q04_boxplot(path,x='month',y='Sales',kind='box', + order=['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug','Sep', 'Oct', 'Nov', 'Dec'],size=8): + + X_train,X_valid = q02_data_splitter(path) + X_train['month'] = X_train['Month'].dt.strftime('%b') + X_train['year'] = X_train['Month'].dt.year + plt.figure(figsize=(16,7)) + sns.factorplot(x=x,y=y,data=X_train,kind=kind,order=order,size=size) + plt.xlabel('Month') + plt.ylabel('Sales') + plt.title('Monthly Sales') + plt.show() + +# q04_boxplot(path,x,y,kind,order,size) + + + diff --git a/q04_boxplot/tests/test_sol.pkl b/q04_boxplot/tests/test_sol.pkl new file mode 100644 index 0000000..22ea275 Binary files /dev/null and b/q04_boxplot/tests/test_sol.pkl differ diff --git a/q04_boxplot/tests/user_sol.pkl b/q04_boxplot/tests/user_sol.pkl new file mode 100644 index 0000000..a292707 Binary files /dev/null and b/q04_boxplot/tests/user_sol.pkl differ diff --git a/q05_sarima_model/build.py b/q05_sarima_model/build.py index 0ac51b7..017a5dc 100644 --- a/q05_sarima_model/build.py +++ b/q05_sarima_model/build.py @@ -1,3 +1,4 @@ +# %load q05_sarima_model/build.py import pandas as pd import numpy as np from sklearn.model_selection import train_test_split @@ -5,10 +6,33 @@ import matplotlib.pyplot as plt # import sys # sys.path.append('./') + +path = 'data/perrin-freres-monthly-champagne.csv' + from greyatomlib.time_series_101_project.q02_data_splitter.build import q02_data_splitter -train, validation = q02_data_splitter(path) +#train, validation = q02_data_splitter(path) + +'write your solution here' + +def q05_sarima_model(path): + + train, validation = q02_data_splitter(path) + train = pd.DataFrame(train) + validation = pd.DataFrame(validation) -"write your solution here" + tss = pd.DataFrame(train['Sales']) + tss.column = ['Sales'] + tss.index = train['Month'].values + tss_valid = pd.DataFrame(validation['Sales']) + tss_valid.column = ['Sales'] + tss_valid.index = validation['Month'].values + + return tss, tss_valid + +# q05_sarima_model(path) + + + diff --git a/q05_sarima_model/tests/test_sol.pkl b/q05_sarima_model/tests/test_sol.pkl new file mode 100644 index 0000000..edaa79c Binary files /dev/null and b/q05_sarima_model/tests/test_sol.pkl differ diff --git a/q05_sarima_model/tests/user_sol.pkl b/q05_sarima_model/tests/user_sol.pkl new file mode 100644 index 0000000..db5d702 Binary files /dev/null and b/q05_sarima_model/tests/user_sol.pkl differ diff --git a/q06_sarima_predictor/build.py b/q06_sarima_predictor/build.py index 6e09848..5fe40c1 100644 --- a/q06_sarima_predictor/build.py +++ b/q06_sarima_predictor/build.py @@ -1,3 +1,4 @@ +# %load q06_sarima_predictor/build.py import pandas as pd import numpy as np from sklearn.model_selection import train_test_split @@ -7,10 +8,28 @@ from statsmodels.tsa.statespace import sarimax import math from sklearn.metrics import mean_squared_error -#import sys -#sys.path.append('./') from greyatomlib.time_series_101_project.q05_sarima_model.build import q05_sarima_model +path = 'data/perrin-freres-monthly-champagne.csv' tss, tss_valid = q05_sarima_model(path) - "write your solution here" +def q06_sarima_predictor(path): + mod = sarimax.SARIMAX(tss['Sales'], order=(5, 1, 1), seasonal_order=(1, 1, 0, 12)).fit() + plt.figure(figsize=(16, 7)) + plt.plot(tss.index, tss.Sales, color='lightblue') + plt.plot(tss_valid.index, tss_valid.Sales.values, color='green') + + ## Forecasting + + pred = pd.DataFrame(mod.forecast(len(tss_valid))) + pred.columns = ['yhat'] + pred.index = tss_valid.index + + measure = math.pow(mean_squared_error(tss_valid.values, pred.values), 0.5) + + return pred, measure + +# q06_sarima_predictor(path) + + + diff --git a/q06_sarima_predictor/tests/test_sol.pkl b/q06_sarima_predictor/tests/test_sol.pkl new file mode 100644 index 0000000..26f0309 Binary files /dev/null and b/q06_sarima_predictor/tests/test_sol.pkl differ diff --git a/q06_sarima_predictor/tests/user_sol.pkl b/q06_sarima_predictor/tests/user_sol.pkl new file mode 100644 index 0000000..354e6f5 Binary files /dev/null and b/q06_sarima_predictor/tests/user_sol.pkl differ diff --git a/test_sol.pkl b/test_sol.pkl new file mode 100644 index 0000000..359b3b3 Binary files /dev/null and b/test_sol.pkl differ diff --git a/user_sol.pkl b/user_sol.pkl new file mode 100644 index 0000000..829e1fe Binary files /dev/null and b/user_sol.pkl differ