From 54d6b2d654bb65e8724b274a59370d7052ef5709 Mon Sep 17 00:00:00 2001 From: rajeshbrid Date: Wed, 12 Dec 2018 14:48:49 +0000 Subject: [PATCH 1/6] Done --- q01_load_data/build.py | 14 ++++++++++++-- test_sol.pkl | Bin 0 -> 76 bytes user_sol.pkl | Bin 0 -> 64 bytes 3 files changed, 12 insertions(+), 2 deletions(-) create mode 100644 test_sol.pkl create mode 100644 user_sol.pkl diff --git a/q01_load_data/build.py b/q01_load_data/build.py index 48147b4..ba849b6 100644 --- a/q01_load_data/build.py +++ b/q01_load_data/build.py @@ -1,9 +1,19 @@ +# %load q01_load_data/build.py import pandas as pd import numpy as np from sklearn.model_selection import train_test_split -path = 'data/perrin-freres-monthly-champagne.csv +path = 'data/perrin-freres-monthly-champagne.csv' -"write your solution here" +# 'write your solution here' +def q01_load_data(path): + df = pd.read_csv(path) + df.reset_index(drop=True, inplace=True) + return df + +# q01_load_data(path) + + + diff --git a/test_sol.pkl b/test_sol.pkl new file mode 100644 index 0000000000000000000000000000000000000000..359b3b3c0b12fc6f2775e8e153d0fdf41d512889 GIT binary patch literal 76 zcmZo*PEIdMtxPP*&&|n9(ksc#O^q*3Ey_$SjyE(gj4vq4&q_@$(JKV9bMg~Y;!_e! S67`ZwGjme7P{g Date: Wed, 12 Dec 2018 14:51:29 +0000 Subject: [PATCH 2/6] Done --- q02_data_splitter/build.py | 16 +++++++++++++++- q02_data_splitter/tests/test_sol.pkl | Bin 0 -> 84 bytes q02_data_splitter/tests/user_sol.pkl | Bin 0 -> 72 bytes 3 files changed, 15 insertions(+), 1 deletion(-) create mode 100644 q02_data_splitter/tests/test_sol.pkl create mode 100644 q02_data_splitter/tests/user_sol.pkl diff --git a/q02_data_splitter/build.py b/q02_data_splitter/build.py index 16db3ca..8996873 100644 --- a/q02_data_splitter/build.py +++ b/q02_data_splitter/build.py @@ -1,3 +1,4 @@ +# %load q02_data_splitter/build.py import pandas as pd import numpy as np import datetime @@ -6,4 +7,17 @@ #sys.path.append('./') from greyatomlib.time_series_101_project.q01_load_data.build import q01_load_data -'''Write your solution here''' +path = 'data/perrin-freres-monthly-champagne.csv' + +# '''Write your solution here''' + +def q02_data_splitter(path): + df = q01_load_data(path) + df['Month'] = pd.to_datetime(df['Month']) + split_date = pd.datetime(1971,10,1) + X_train = df[df['Month'] < split_date] + X_valid = df[df['Month'] >=split_date] + return X_train,X_valid + + + diff --git a/q02_data_splitter/tests/test_sol.pkl b/q02_data_splitter/tests/test_sol.pkl new file mode 100644 index 0000000000000000000000000000000000000000..35a0129d156b7ac2824788e473c0badf7dcc8f3e GIT binary patch literal 84 zcmZo*PEIdMtxPP*&&|n9(ksc#O^q*3Ey_$SjyE(gj4vq4&q_@$(JM4Cicd)_NsKQp Y$jK}zNiEV#D$UGE;ldE-DrC?D07DNR_W%F@ literal 0 HcmV?d00001 diff --git a/q02_data_splitter/tests/user_sol.pkl b/q02_data_splitter/tests/user_sol.pkl new file mode 100644 index 0000000000000000000000000000000000000000..8db3730faed284ef42198e0745a363330324ab6e GIT binary patch literal 72 zcmZo*PA Date: Wed, 12 Dec 2018 14:53:34 +0000 Subject: [PATCH 3/6] Done --- q03_stacked_point_plot/build.py | 25 +++++++++++++++++++++- q03_stacked_point_plot/tests/test_sol.pkl | Bin 0 -> 94 bytes q03_stacked_point_plot/tests/user_sol.pkl | Bin 0 -> 82 bytes 3 files changed, 24 insertions(+), 1 deletion(-) create mode 100644 q03_stacked_point_plot/tests/test_sol.pkl create mode 100644 q03_stacked_point_plot/tests/user_sol.pkl diff --git a/q03_stacked_point_plot/build.py b/q03_stacked_point_plot/build.py index b77138c..85fa290 100644 --- a/q03_stacked_point_plot/build.py +++ b/q03_stacked_point_plot/build.py @@ -1,3 +1,4 @@ +# %load q03_stacked_point_plot/build.py import pandas as pd import numpy as np #import sys @@ -8,5 +9,27 @@ plt.switch_backend('agg') import seaborn as sns -#"write your solution here" +#'write your solution here' +path = 'data/perrin-freres-monthly-champagne.csv' + +x_column_name='month' +y_column_name='Sales' +hue='year' +order_of_the_axis=['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug','Sep', 'Oct', 'Nov', 'Dec'] + +def q03_stacked_point_plot(path,x_column_name='month',y_column_name='Sales',hue='year',order_of_the_axis=['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug','Sep', 'Oct', 'Nov', 'Dec']): + X_train,X_valid = q02_data_splitter(path) + X_train['year'] = X_train['Month'].dt.year + X_train['month'] = X_train['Month'].dt.strftime('%b') + plt.figure(figsize=(16, 7)) + sns.pointplot(x=x_column_name, y=y_column_name, hue=hue, data=X_train, x_order=order_of_the_axis) + plt.xlabel('month') + plt.ylabel('sales') + plt.legend(title='year',loc='upper left') + plt.show() + +# q03_stacked_point_plot(path,x_column_name,y_column_name,hue,order_of_the_axis) + + + diff --git a/q03_stacked_point_plot/tests/test_sol.pkl b/q03_stacked_point_plot/tests/test_sol.pkl new file mode 100644 index 0000000000000000000000000000000000000000..22e5e2f6db0aea2381f9196a464445c2a88a3563 GIT binary patch literal 94 zcmZw6xeb6Y3;;kRx(s6_VjwGzg$xePV+uM(X92Fgcg_2tCz@8_1jdzc(qNhxe84vJ cpls0nhSOp`sbEvS44bGyx><_*PuCat0H3iVB>(^b literal 0 HcmV?d00001 diff --git a/q03_stacked_point_plot/tests/user_sol.pkl b/q03_stacked_point_plot/tests/user_sol.pkl new file mode 100644 index 0000000000000000000000000000000000000000..abdc4543a0eee924d4cd8a079049845cc6a8cb39 GIT binary patch literal 82 zcmZo*PA Date: Wed, 12 Dec 2018 14:55:58 +0000 Subject: [PATCH 4/6] Done --- q04_boxplot/build.py | 27 ++++++++++++++++++++++++++- q04_boxplot/tests/test_sol.pkl | Bin 0 -> 72 bytes q04_boxplot/tests/user_sol.pkl | Bin 0 -> 60 bytes 3 files changed, 26 insertions(+), 1 deletion(-) create mode 100644 q04_boxplot/tests/test_sol.pkl create mode 100644 q04_boxplot/tests/user_sol.pkl diff --git a/q04_boxplot/build.py b/q04_boxplot/build.py index 6d90712..74644f5 100644 --- a/q04_boxplot/build.py +++ b/q04_boxplot/build.py @@ -1,3 +1,4 @@ +# %load q04_boxplot/build.py import pandas as pd import numpy as np from sklearn.model_selection import train_test_split @@ -9,6 +10,30 @@ # sys.path.append('./') from greyatomlib.time_series_101_project.q02_data_splitter.build import q02_data_splitter -"write your solution here" +'write your solution here' +path = 'data/perrin-freres-monthly-champagne.csv' +x ='month' +y ='Sales' +kind = 'box' +order = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug','Sep', 'Oct', 'Nov', 'Dec'] +size = 8 + +def q04_boxplot(path,x='month',y='Sales',kind='box', + order=['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug','Sep', 'Oct', 'Nov', 'Dec'],size=8): + + X_train,X_valid = q02_data_splitter(path) + X_train['month'] = X_train['Month'].dt.strftime('%b') + X_train['year'] = X_train['Month'].dt.year + plt.figure(figsize=(16,7)) + sns.factorplot(x=x,y=y,data=X_train,kind=kind,order=order,size=size) + plt.xlabel('Month') + plt.ylabel('Sales') + plt.title('Monthly Sales') + plt.show() + +# q04_boxplot(path,x,y,kind,order,size) + + + diff --git a/q04_boxplot/tests/test_sol.pkl b/q04_boxplot/tests/test_sol.pkl new file mode 100644 index 0000000000000000000000000000000000000000..22ea27580916292f965db26ea33c178d731efa75 GIT binary patch literal 72 zcmZo*PEIdMtxPP*&&|n9(ksc#O^q*3Ey_$SjyE(gj4vq4&q_@$(JM4CiBHO}D9Fh# S(Mu}L%t_%w65%Rj&;tP6^%^Sx literal 0 HcmV?d00001 diff --git a/q04_boxplot/tests/user_sol.pkl b/q04_boxplot/tests/user_sol.pkl new file mode 100644 index 0000000000000000000000000000000000000000..a292707e4c7a2b3cf40f7da03247e27cc864e4d3 GIT binary patch literal 60 zcmZo*PA Date: Wed, 12 Dec 2018 14:58:44 +0000 Subject: [PATCH 5/6] Done --- q05_sarima_model/build.py | 28 ++++++++++++++++++++++++++-- q05_sarima_model/tests/test_sol.pkl | Bin 0 -> 82 bytes q05_sarima_model/tests/user_sol.pkl | Bin 0 -> 70 bytes 3 files changed, 26 insertions(+), 2 deletions(-) create mode 100644 q05_sarima_model/tests/test_sol.pkl create mode 100644 q05_sarima_model/tests/user_sol.pkl diff --git a/q05_sarima_model/build.py b/q05_sarima_model/build.py index 0ac51b7..017a5dc 100644 --- a/q05_sarima_model/build.py +++ b/q05_sarima_model/build.py @@ -1,3 +1,4 @@ +# %load q05_sarima_model/build.py import pandas as pd import numpy as np from sklearn.model_selection import train_test_split @@ -5,10 +6,33 @@ import matplotlib.pyplot as plt # import sys # sys.path.append('./') + +path = 'data/perrin-freres-monthly-champagne.csv' + from greyatomlib.time_series_101_project.q02_data_splitter.build import q02_data_splitter -train, validation = q02_data_splitter(path) +#train, validation = q02_data_splitter(path) + +'write your solution here' + +def q05_sarima_model(path): + + train, validation = q02_data_splitter(path) + train = pd.DataFrame(train) + validation = pd.DataFrame(validation) -"write your solution here" + tss = pd.DataFrame(train['Sales']) + tss.column = ['Sales'] + tss.index = train['Month'].values + tss_valid = pd.DataFrame(validation['Sales']) + tss_valid.column = ['Sales'] + tss_valid.index = validation['Month'].values + + return tss, tss_valid + +# q05_sarima_model(path) + + + diff --git a/q05_sarima_model/tests/test_sol.pkl b/q05_sarima_model/tests/test_sol.pkl new file mode 100644 index 0000000000000000000000000000000000000000..edaa79cb373baa3ceca76a88783bfd07b4d7f546 GIT binary patch literal 82 zcmZo*PEIdMtxPP*&&|n9(ksc#O^q*3Ey_$SjyE(gj4vq4&q_@$(JM4CjW13t%FIoS X&&^Ls&CyFL&CE&RLKo&LWY7Zu0L2~m literal 0 HcmV?d00001 diff --git a/q05_sarima_model/tests/user_sol.pkl b/q05_sarima_model/tests/user_sol.pkl new file mode 100644 index 0000000000000000000000000000000000000000..db5d7025263a23e284ead2d45cbd5d987d0c31cf GIT binary patch literal 70 zcmZo*PA Date: Fri, 11 Jan 2019 03:25:39 +0000 Subject: [PATCH 6/6] Done --- q06_sarima_predictor/build.py | 25 +++++++++++++++++++++--- q06_sarima_predictor/tests/test_sol.pkl | Bin 0 -> 90 bytes q06_sarima_predictor/tests/user_sol.pkl | Bin 0 -> 78 bytes 3 files changed, 22 insertions(+), 3 deletions(-) create mode 100644 q06_sarima_predictor/tests/test_sol.pkl create mode 100644 q06_sarima_predictor/tests/user_sol.pkl diff --git a/q06_sarima_predictor/build.py b/q06_sarima_predictor/build.py index 6e09848..5fe40c1 100644 --- a/q06_sarima_predictor/build.py +++ b/q06_sarima_predictor/build.py @@ -1,3 +1,4 @@ +# %load q06_sarima_predictor/build.py import pandas as pd import numpy as np from sklearn.model_selection import train_test_split @@ -7,10 +8,28 @@ from statsmodels.tsa.statespace import sarimax import math from sklearn.metrics import mean_squared_error -#import sys -#sys.path.append('./') from greyatomlib.time_series_101_project.q05_sarima_model.build import q05_sarima_model +path = 'data/perrin-freres-monthly-champagne.csv' tss, tss_valid = q05_sarima_model(path) - "write your solution here" +def q06_sarima_predictor(path): + mod = sarimax.SARIMAX(tss['Sales'], order=(5, 1, 1), seasonal_order=(1, 1, 0, 12)).fit() + plt.figure(figsize=(16, 7)) + plt.plot(tss.index, tss.Sales, color='lightblue') + plt.plot(tss_valid.index, tss_valid.Sales.values, color='green') + + ## Forecasting + + pred = pd.DataFrame(mod.forecast(len(tss_valid))) + pred.columns = ['yhat'] + pred.index = tss_valid.index + + measure = math.pow(mean_squared_error(tss_valid.values, pred.values), 0.5) + + return pred, measure + +# q06_sarima_predictor(path) + + + diff --git a/q06_sarima_predictor/tests/test_sol.pkl b/q06_sarima_predictor/tests/test_sol.pkl new file mode 100644 index 0000000000000000000000000000000000000000..26f0309db0088b3123b2c03c58d58e8744e321e7 GIT binary patch literal 90 zcmZo*PEIdMtxPP*&&|n9(ksc#O^q*3Ey_$SjyE(gj4vq4&q_@$(JM4Ci!V+r%FIm! a3Z|8lj165&;tMuE+KCK literal 0 HcmV?d00001 diff --git a/q06_sarima_predictor/tests/user_sol.pkl b/q06_sarima_predictor/tests/user_sol.pkl new file mode 100644 index 0000000000000000000000000000000000000000..354e6f5a373d8d79b4ccf3e44f2393dda5df6aa2 GIT binary patch literal 78 zcmZo*PA|8lj165&;tN_bR41p literal 0 HcmV?d00001