diff --git a/q01_load_data_and_add_column_names/__pycache__/__init__.cpython-36.pyc b/q01_load_data_and_add_column_names/__pycache__/__init__.cpython-36.pyc new file mode 100644 index 0000000..e7e5716 Binary files /dev/null and b/q01_load_data_and_add_column_names/__pycache__/__init__.cpython-36.pyc differ diff --git a/q01_load_data_and_add_column_names/__pycache__/build.cpython-36.pyc b/q01_load_data_and_add_column_names/__pycache__/build.cpython-36.pyc new file mode 100644 index 0000000..dda1740 Binary files /dev/null and b/q01_load_data_and_add_column_names/__pycache__/build.cpython-36.pyc differ diff --git a/q01_load_data_and_add_column_names/build.py b/q01_load_data_and_add_column_names/build.py index 3c7624e..d52d0e9 100644 --- a/q01_load_data_and_add_column_names/build.py +++ b/q01_load_data_and_add_column_names/build.py @@ -1,6 +1,21 @@ +# %load q01_load_data_and_add_column_names/build.py import pandas as pd path = 'data/GermanData.csv' -def q01_load_data_and_add_column_names(): +def q01_load_data_and_add_column_names(path): + data = pd.read_csv(path,names=['account_status','month','credit_history','purpose','credit_amount','savings_account/bonds','employment','installment_rate','personal_status/sex','guarantors','residence_since','property','age','other_installment_plans','housing','number_of_existing_credits','jobs','liable','telephone','foreign_worker','good/bad']) + data['good/bad'] = data['good/bad'].replace(1,0) + data['good/bad'] = data['good/bad'].replace(2,1) + return data + + + + + + + + + + diff --git a/q01_load_data_and_add_column_names/tests/__pycache__/__init__.cpython-36.pyc b/q01_load_data_and_add_column_names/tests/__pycache__/__init__.cpython-36.pyc new file mode 100644 index 0000000..849da34 Binary files /dev/null and b/q01_load_data_and_add_column_names/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q01_load_data_and_add_column_names/tests/__pycache__/test.cpython-36.pyc b/q01_load_data_and_add_column_names/tests/__pycache__/test.cpython-36.pyc new file mode 100644 index 0000000..9939db3 Binary files /dev/null and b/q01_load_data_and_add_column_names/tests/__pycache__/test.cpython-36.pyc differ diff --git a/q04_correlation_plot/__pycache__/__init__.cpython-36.pyc b/q04_correlation_plot/__pycache__/__init__.cpython-36.pyc new file mode 100644 index 0000000..a3d139f Binary files /dev/null and b/q04_correlation_plot/__pycache__/__init__.cpython-36.pyc differ diff --git a/q04_correlation_plot/__pycache__/build.cpython-36.pyc b/q04_correlation_plot/__pycache__/build.cpython-36.pyc new file mode 100644 index 0000000..c3bb9b1 Binary files /dev/null and b/q04_correlation_plot/__pycache__/build.cpython-36.pyc differ diff --git a/q04_correlation_plot/build.py b/q04_correlation_plot/build.py index 5e0d2ae..96d9e2f 100644 --- a/q04_correlation_plot/build.py +++ b/q04_correlation_plot/build.py @@ -1,9 +1,29 @@ import matplotlib.pyplot as plt import seaborn as sns -plt.switch_backend('agg') +from sklearn import preprocessing +#plt.switch_backend('agg') import sys, os from greyatomlib.statlog_german_credit_data_project.q03_encode_features.build import q03_encode_features +from greyatomlib.statlog_german_credit_data_project.q01_load_data_and_add_column_names.build import q01_load_data_and_add_column_names + path = 'data/GermanData.csv' -def q04_correlation_plot(): - +def q04_correlation_plot(path): + a = q01_load_data_and_add_column_names(path) + df_corr = a.corr() + df_corr.plot(kind='hist') + + + + + + + + + + + + + + + diff --git a/q04_correlation_plot/tests/__pycache__/__init__.cpython-36.pyc b/q04_correlation_plot/tests/__pycache__/__init__.cpython-36.pyc new file mode 100644 index 0000000..a85ef4f Binary files /dev/null and b/q04_correlation_plot/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q04_correlation_plot/tests/__pycache__/test.cpython-36.pyc b/q04_correlation_plot/tests/__pycache__/test.cpython-36.pyc new file mode 100644 index 0000000..4388a06 Binary files /dev/null and b/q04_correlation_plot/tests/__pycache__/test.cpython-36.pyc differ diff --git a/q05_split/__pycache__/__init__.cpython-36.pyc b/q05_split/__pycache__/__init__.cpython-36.pyc new file mode 100644 index 0000000..8505c5b Binary files /dev/null and b/q05_split/__pycache__/__init__.cpython-36.pyc differ diff --git a/q05_split/__pycache__/build.cpython-36.pyc b/q05_split/__pycache__/build.cpython-36.pyc new file mode 100644 index 0000000..3658f94 Binary files /dev/null and b/q05_split/__pycache__/build.cpython-36.pyc differ diff --git a/q05_split/build.py b/q05_split/build.py index f2a25f3..ea20f09 100644 --- a/q05_split/build.py +++ b/q05_split/build.py @@ -1,7 +1,18 @@ +# %load q05_split/build.py import sys, os from sklearn.model_selection import train_test_split from greyatomlib.statlog_german_credit_data_project.q03_encode_features.build import q03_encode_features path = 'data/GermanData.csv' -def q05_split(): - +def q05_split(path,test_size = 0.2,random_state=9): + data,data_dict = q03_encode_features(path) + Y = data['good/bad'] + X = data.iloc[:,:-1] + X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=test_size, random_state=random_state) + return X_train[1:], X_test, y_train[1:], y_test + + + + + + diff --git a/q05_split/tests/__pycache__/__init__.cpython-36.pyc b/q05_split/tests/__pycache__/__init__.cpython-36.pyc new file mode 100644 index 0000000..53f5b88 Binary files /dev/null and b/q05_split/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q05_split/tests/__pycache__/test.cpython-36.pyc b/q05_split/tests/__pycache__/test.cpython-36.pyc new file mode 100644 index 0000000..cc6dc2f Binary files /dev/null and b/q05_split/tests/__pycache__/test.cpython-36.pyc differ