diff --git a/q01_load_data/build.py b/q01_load_data/build.py index c79dbfd..dbe1f4f 100644 --- a/q01_load_data/build.py +++ b/q01_load_data/build.py @@ -1,7 +1,17 @@ +# %load q01_load_data/build.py import pandas as pd import numpy as np from sklearn.model_selection import train_test_split -"write your solution here" - +'write your solution here' + +path = 'data/perrin-freres-monthly-champagne.csv' + +def q01_load_data(path): + df = pd.read_csv(path) + df.reset_index(drop=True, inplace=True) + return df + +# q01_load_data(path) + diff --git a/q02_data_splitter/build.py b/q02_data_splitter/build.py index 16db3ca..f22008d 100644 --- a/q02_data_splitter/build.py +++ b/q02_data_splitter/build.py @@ -1,3 +1,4 @@ +# %load q02_data_splitter/build.py import pandas as pd import numpy as np import datetime @@ -6,4 +7,19 @@ #sys.path.append('./') from greyatomlib.time_series_101_project.q01_load_data.build import q01_load_data +path = 'data/perrin-freres-monthly-champagne.csv' + '''Write your solution here''' + +def q02_data_splitter(path): + df = q01_load_data(path) + df['Month'] = pd.to_datetime(df['Month']) + split_date = pd.datetime(1971,10,1) + X_train = df[df['Month'] < split_date] + X_valid = df[df['Month'] >=split_date] + return X_train,X_valid + +# X_train,X_valid=q02_data_splitter(path) +# X_train,X_valid + + diff --git a/q02_data_splitter/tests/test_sol.pkl b/q02_data_splitter/tests/test_sol.pkl new file mode 100644 index 0000000..35a0129 Binary files /dev/null and b/q02_data_splitter/tests/test_sol.pkl differ diff --git a/q02_data_splitter/tests/user_sol.pkl b/q02_data_splitter/tests/user_sol.pkl new file mode 100644 index 0000000..8db3730 Binary files /dev/null and b/q02_data_splitter/tests/user_sol.pkl differ diff --git a/q03_stacked_point_plot/build.py b/q03_stacked_point_plot/build.py index b77138c..a31a816 100644 --- a/q03_stacked_point_plot/build.py +++ b/q03_stacked_point_plot/build.py @@ -1,3 +1,4 @@ +# %load q03_stacked_point_plot/build.py import pandas as pd import numpy as np #import sys @@ -8,5 +9,27 @@ plt.switch_backend('agg') import seaborn as sns -#"write your solution here" - + +#'write your solution here' + +path = 'data/perrin-freres-monthly-champagne.csv' +x_column_name='month' +y_column_name='Sales' +hue='year' +order_of_the_axis=['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug','Sep', 'Oct', 'Nov', 'Dec'] + + +def q03_stacked_point_plot(path,x_column_name='month',y_column_name='Sales',hue='year',order_of_the_axis=['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug','Sep', 'Oct', 'Nov', 'Dec']): + X_train,X_valid = q02_data_splitter(path) + X_train['year'] = X_train['Month'].dt.year + X_train['month'] = X_train['Month'].dt.strftime('%b') + plt.figure(figsize=(16, 7)) + sns.pointplot(x=x_column_name, y=y_column_name, hue=hue, data=X_train, x_order=order_of_the_axis) + plt.xlabel('month') + plt.ylabel('sales') + plt.legend(title='year',loc='upper left') + plt.show() + +# q03_stacked_point_plot(path,x_column_name,y_column_name,hue,order_of_the_axis) + + diff --git a/q03_stacked_point_plot/tests/test_sol.pkl b/q03_stacked_point_plot/tests/test_sol.pkl new file mode 100644 index 0000000..22e5e2f Binary files /dev/null and b/q03_stacked_point_plot/tests/test_sol.pkl differ diff --git a/q03_stacked_point_plot/tests/user_sol.pkl b/q03_stacked_point_plot/tests/user_sol.pkl new file mode 100644 index 0000000..abdc454 Binary files /dev/null and b/q03_stacked_point_plot/tests/user_sol.pkl differ diff --git a/q04_boxplot/build.py b/q04_boxplot/build.py index 6d90712..8dcd78c 100644 --- a/q04_boxplot/build.py +++ b/q04_boxplot/build.py @@ -1,3 +1,4 @@ +# %load q04_boxplot/build.py import pandas as pd import numpy as np from sklearn.model_selection import train_test_split @@ -9,6 +10,29 @@ # sys.path.append('./') from greyatomlib.time_series_101_project.q02_data_splitter.build import q02_data_splitter -"write your solution here" +'write your solution here' + +path = 'data/perrin-freres-monthly-champagne.csv' +x='month' +y='Sales' +kind='box' +order=['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug','Sep', 'Oct', 'Nov', 'Dec'] +size=8 + + +def q04_boxplot(path,x='month',y='Sales',kind='box', + order=['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug','Sep', 'Oct', 'Nov', 'Dec'],size=8): + X_train,X_valid = q02_data_splitter(path) + X_train['month'] = X_train['Month'].dt.strftime('%b') + X_train['year'] = X_train['Month'].dt.year + plt.figure(figsize=(16,7)) + sns.factorplot(x=x,y=y,data=X_train,kind=kind,order=order,size=size) + plt.xlabel('Month') + plt.ylabel('Sales') + plt.title('Monthly Sales') + plt.show() + +# q04_boxplot(path,x,y,kind,order,size) + diff --git a/q04_boxplot/tests/test_sol.pkl b/q04_boxplot/tests/test_sol.pkl new file mode 100644 index 0000000..22ea275 Binary files /dev/null and b/q04_boxplot/tests/test_sol.pkl differ diff --git a/q04_boxplot/tests/user_sol.pkl b/q04_boxplot/tests/user_sol.pkl new file mode 100644 index 0000000..a292707 Binary files /dev/null and b/q04_boxplot/tests/user_sol.pkl differ diff --git a/q05_sarima_model/build.py b/q05_sarima_model/build.py index 0ac51b7..66eb2b1 100644 --- a/q05_sarima_model/build.py +++ b/q05_sarima_model/build.py @@ -1,3 +1,4 @@ +# %load q05_sarima_model/build.py import pandas as pd import numpy as np from sklearn.model_selection import train_test_split @@ -6,9 +7,27 @@ # import sys # sys.path.append('./') from greyatomlib.time_series_101_project.q02_data_splitter.build import q02_data_splitter -train, validation = q02_data_splitter(path) +# train, validation = q02_data_splitter(path) +path = 'data/perrin-freres-monthly-champagne.csv' -"write your solution here" +'write your solution here' +def q05_sarima_model(path): + train, validation = q02_data_splitter(path) + train = pd.DataFrame(train) + validation = pd.DataFrame(validation) + + tss = pd.DataFrame(train['Sales']) + tss.column = ['Sales'] + tss.index = train['Month'].values + + tss_valid = pd.DataFrame(validation['Sales']) + tss_valid.column = ['Sales'] + tss_valid.index = validation['Month'].values + + return tss, tss_valid + +# q05_sarima_model(path) + diff --git a/q05_sarima_model/tests/test_sol.pkl b/q05_sarima_model/tests/test_sol.pkl new file mode 100644 index 0000000..edaa79c Binary files /dev/null and b/q05_sarima_model/tests/test_sol.pkl differ diff --git a/q05_sarima_model/tests/user_sol.pkl b/q05_sarima_model/tests/user_sol.pkl new file mode 100644 index 0000000..db5d702 Binary files /dev/null and b/q05_sarima_model/tests/user_sol.pkl differ diff --git a/test_sol.pkl b/test_sol.pkl new file mode 100644 index 0000000..359b3b3 Binary files /dev/null and b/test_sol.pkl differ diff --git a/user_sol.pkl b/user_sol.pkl new file mode 100644 index 0000000..829e1fe Binary files /dev/null and b/user_sol.pkl differ