From 4e545821c0a34fa8d38a4e50d8d7e8b4d377f569 Mon Sep 17 00:00:00 2001 From: mudassirkhan19 Date: Mon, 3 Jul 2017 04:13:29 +0000 Subject: [PATCH] Done --- build.py | 45 ++++++++++++++++++----- build.pyc | Bin 0 -> 2797 bytes tests/__init__.pyc | Bin 0 -> 172 bytes tests/test_get_categorical_variables.pyc | Bin 0 -> 2765 bytes 4 files changed, 35 insertions(+), 10 deletions(-) create mode 100644 build.pyc create mode 100644 tests/__init__.pyc create mode 100644 tests/test_get_categorical_variables.pyc diff --git a/build.py b/build.py index 35cdd2a..f1d1944 100644 --- a/build.py +++ b/build.py @@ -1,26 +1,51 @@ +import matplotlib.pyplot as plt +import seaborn as sns +from scipy.stats import norm +import pandas as pd + def get_categorical_variables(df): - return [] + df['new_user'] = df['new_user'].astype('category') + df['converted'] = df['converted'].astype('category') + cat_cols = df.select_dtypes(include=['object','category']).columns + return cat_cols def get_numerical_variables(df): - return [] - + df['new_user'] = df['new_user'].astype('category') + df['converted'] = df['converted'].astype('category') + num_cols = df.select_dtypes(exclude=['object','category']).columns + return num_cols def get_numerical_variables_percentile(df): - pass + df['new_user'] = df['new_user'].astype('category') + df['converted'] = df['converted'].astype('category') + num_cols = df.select_dtypes(exclude=['object','category']).columns + return df[num_cols].describe() def get_categorical_variables_modes(df): - pass + df['new_user'] = df['new_user'].astype('category') + df['converted'] = df['converted'].astype('category') + cat_cols = df.select_dtypes(include=['object','category']).columns + return df[cat_cols].mode() def get_missing_values_count(df): - pass - + return pd.isnull(df).sum().reset_index() def plot_histogram_with_numerical_values(df): - pass - + df['new_user'] = df['new_user'].astype('category') + df['converted'] = df['converted'].astype('category') + num_cols = df.select_dtypes(exclude=['object','category']).columns + plt.subplot(121) + plt.title(num_cols[0]) + sns.distplot(df[num_cols[0]], color='yellow', fit=norm, kde=False) + plt.subplot(122) + plt.title(num_cols[1]) + sns.distplot(df[num_cols[1]], color='yellow', fit=norm, kde=False) def plot_facet_box(df): - pass + plt.subplot(121) + sns.boxplot('converted','age',data=df) + plt.subplot(122) + sns.boxplot('converted','total_pages_visited',data=df) diff --git a/build.pyc b/build.pyc new file mode 100644 index 0000000000000000000000000000000000000000..07984a59801753274ee62a2b0478264db33ceb42 GIT binary patch literal 2797 zcmcgu+ioL85bg2AcAVII0i-2>gt$ofC2M#@S|KDPp1=={#0tNR#?u|Aot_zMx@Qw3 zcv}92PvS%P063>?E1M{Rmw25_O?CCH`gGN)N&g-l{rTn3zZ7cw>)`z@p71M#!GEH> zQp-fmI=g17W(T5ol<%str)E8sY=`#M>`<9h{h`!spv-|NN6K_XIaa18%21iUC?jPK zMR}ymK$OSa@~80;ws-&FHytNX;{ilfH)X*VouuJAJmE)(-^9_AC0-r%I@z{g(44B9 z=2X#)0~s8m%kc|LQOUBSe#b=a-NTC0cnDe9%d8Eykw%e6yQrIW9H6JDt5>jF6Z>en zs_?C`ku&T0(iU-?LX~zN;(BG{2<>1!x-vsUkcma@+p-E{o+wh7v)x`9`ob6{B_jL) zZE{|hc2c$`55YAT=Xv$j*OQlZa}ic~VJAgx?8KV<^sH`9Hy#F8SEmg;KQ(#GgN>7U z>%4isTE`PCxUezXp34=v&tBz?%jez(o+Jrlpk-Qyfc|IIuky)6UQB&3YU@+qOc=^6Fdr1L`dxdi$kYl9!{ug zWrW{cs9NtMb2YSOJjT1RAezio#$E|~OJ?r#^zUO+22$z3i`4=&=35)U0c{mOp<|>I zb1djtlS5&20%BB~Ozt5=tqC1cQ$R60<^3r)9c{b*g(0ecm$h%+Bxy z5VLuGHGyRt@Z%@Y<5P$-7u3C*qx8%5u!3$>{Sim8m!#9V6UpRIxYhc3hJ3(u7O1G-`6Y{tV(e??rp~D#B!b0fuY6{f3Z-_aGi8!{kXa P>~Y~t_Q>uDez!kh4y+uQH!#J_{Ig!2 z+RMY6pJp|W)v~Av#$21JIX4yN4|1y=bess%7*HgZzQerT!JSJl19}3_fq(+qmW%bA nUsNP=wgr`JKt&-1B~bzAVvg$EW_ateF7FeE@kxj2U#iU))wn7r literal 0 HcmV?d00001 diff --git a/tests/test_get_categorical_variables.pyc b/tests/test_get_categorical_variables.pyc new file mode 100644 index 0000000000000000000000000000000000000000..14817c875bdbff74aeb517fd5fb986d0f9c70697 GIT binary patch literal 2765 zcmc&$O>fgc5FI-oZAwuKRCthCDlyber%TQSVe~mIRz9Aa1t=6InjH zT~?;6V+^(?8)%s0P-$!z7g1w}==JuBk?D=91P`a>a4*WAKJR@h%Ap-cv4O&g>6s*2 z9~9;KiQMZYlwGIh*0`R--mq%JnUFWeU)_DYkFm3(2-Ysch_~vkcx(E1R^ayt_%8yh zbR>IPx|{HcBdQ&j>gRo}S8 zF|;|!aTjl%$Z-zXE4Xh7<5-M|5ZqXsy!!&?pM-FlY1p~@NH!#4eO_(pexBz35hk3$ zeK17&t8mO{#W>A4WB-M5jhi#${6v^Aejd1oe^RZPt_3m- zb0lvVYKn){rOMz2EE&Nn&2<{)NWcnwa=L2oqAOm&PZOS?mWO`Z>nwKKomQvSrK^Cd zJawcZz<_AkA4fTAv1SVnO_2f4I}I&Ol!W}k;wVe6CR43B-!o^dy2cl^A}_ff#k}e* K`)yS675_J6&7Nrh literal 0 HcmV?d00001