From 8dd49c1e0d69bb6ba06fb86ea654a450b4db3fe8 Mon Sep 17 00:00:00 2001 From: nikhilmborkar Date: Wed, 26 Jul 2017 04:50:26 +0000 Subject: [PATCH] Done --- build.py | 44 +++++++++++++++++++---- build.pyc | Bin 0 -> 3180 bytes tests/__init__.pyc | Bin 0 -> 171 bytes tests/test_get_categorical_variables.pyc | Bin 0 -> 2758 bytes 4 files changed, 38 insertions(+), 6 deletions(-) create mode 100644 build.pyc create mode 100644 tests/__init__.pyc create mode 100644 tests/test_get_categorical_variables.pyc diff --git a/build.py b/build.py index 35cdd2a..d6c9b06 100644 --- a/build.py +++ b/build.py @@ -1,26 +1,58 @@ +import numpy as np +import pandas as pd +from pandas import Series, DataFrame +import operator +import matplotlib.pyplot as plt + +df = pd.read_csv('data/conversion_data.csv') + def get_categorical_variables(df): - return [] + return df[['country','source','new_user','converted']] def get_numerical_variables(df): - return [] + return df._get_numeric_data() def get_numerical_variables_percentile(df): - pass + df_temp = get_numerical_variables(df) + return df_temp.describe().T def get_categorical_variables_modes(df): - pass - + dic = {'converted':0, 'country':'', 'new_user':0, 'source':''} + for col in df.mode().columns: + dic[col] = df.mode()[col][0] + return pd.DataFrame(dic.items(), columns=['var_name', 'mode']) def get_missing_values_count(df): - pass + my_missing_value = pd.DataFrame(df.isnull().sum(), columns=['missing_value_count']) + my_missing_value.index.name = 'var_name' + return my_missing_value def plot_histogram_with_numerical_values(df): + fig, axes = plt.subplots(2, 2) + df1 = get_numerical_variables(df) + list_of_cols = df1.columns + axes[0,0].hist(df[list_of_cols[0]]) + axes[0,0].set_title(list_of_cols[0]) + axes[0,1].hist(df[list_of_cols[1]]) + axes[0,1].set_title(list_of_cols[1]) + axes[1,0].hist(df[list_of_cols[2]]) + axes[1,0].set_title(list_of_cols[2]) + axes[1,1].hist(df[list_of_cols[3]]) + axes[1,1].set_title(list_of_cols[3]) + plt.tight_layout() + plt.show() pass def plot_facet_box(df): + def plot_facet_box(df): + list_of_cols = df.columns + for col in list_of_cols: + plt.boxplot(df[col], 1) + plt.title(col) + plt.show() pass diff --git a/build.pyc b/build.pyc new file mode 100644 index 0000000000000000000000000000000000000000..29a60c2f185313458835c49f069338c385325b63 GIT binary patch literal 3180 zcmcImU27aS6xGar*t@agBu&yRqzO2IE)-`;TLOhp3O0pO*oSe!bZHo7N7}WV`B;y% zjt$OJ@-O-``&`;{?sz{Enm)z0C0|K*q9z>hlOsGxemN;H>)<-Qc*)&7iQa zk8PHZ%j$Jf4UX{7&63o>WNijUCwr5!+FRMbS0&!;(Fte#U_7^(em1-CJ?u9%K2EVZ zEi0QQS$vjMHW_Ee5fn!gRIv1#W{KBb2=Mwusdqq8%pKN7-^NPJO^bOBbW?#&mLej4 zjoy5T?bi-o_hlPP>%Pz}Ht~fhcyR}0dmE_BK%E5i>m*#d)__h#L?m+BxU{lk=>al`FHgT!M_}zqLojR-L!>7X6Dc_jk;}nh zpUfFrbSk1P03H%!5z*F&HeMx7HqQ$uc!QI1fzrc_MZ-F>LIO>`>Q4ekdI>)_A@y0?$!zDEaSfH;#3 z$e~9qT$Gt9=othE55h*aHdWlBaMAm-LNTTGGE+1$iZhoW1U?-P~i4m-h& zL+grpmI*{|o_m3@(B@n~C;DqV=N|b*{K59u2r4TwLH*!7SNro2YmWA-;OT)J@3Ug+tL%QtmRSf=~LPGu=M#|2Mk_2jvHoMF$noPa%< zyGG7F#gTL=rVr&5FN^q%^~Z~AQEn$BrQ3y`BZ*b;0Q~I)z2M#lts8V#6@uvP^9#ll zWPY#6WQj84J4~rSZ;9R^n)?MEc#NBx<@^VhiGx?qh5V=-!hi6ZRUC;C;6`KhX05IV zQpAJPTk2?ah!CiTo7FdZgidPqxICxJQadDC(Gx7K>TvWm)$ef*t?oYiqvj@d7T_m% z5cwE3v6?#GI|}e_2Y1ox^9h+;+|f6XuFodf{yQ9Tbsu-yeO70|3C|?F#Arz|lNaJF zDRkoM;*8m=3cU%b#O0p&vhuw6^2F22%#QJr6VyrlTC5|&Pb!n>81L5FOFl;H3?qFa wUPRGX?CAPO7zi4lJvg-KC2qlaW_~19<*~g5-rq;t33tLy6Thua8^5>z1*|uqumAu6 literal 0 HcmV?d00001 diff --git a/tests/__init__.pyc b/tests/__init__.pyc new file mode 100644 index 0000000000000000000000000000000000000000..821f0068118c927ea8c44cc13c2c80bb61e99bdc GIT binary patch literal 171 zcmZ9Fu?hk)5JV$dh+t>u4_p)X2Sn}*Y!t*Q#AM~dU9v}Z75#31z&)^XVBWwKGxN`S zwcWg~!TdDSY}LF_L}O0P)Epay`2(M|gN`ynNqv`we52a?3KtC?eLx5l5iojtr)mS0 nCss~WZ1sXRdhv_`f-DTqrIN(C-T0QJ4(~I?$AE|Fp4H|HkU}ZW literal 0 HcmV?d00001 diff --git a/tests/test_get_categorical_variables.pyc b/tests/test_get_categorical_variables.pyc new file mode 100644 index 0000000000000000000000000000000000000000..86e2e9d9420b042f2d0543fbcf33d51d7da38ac5 GIT binary patch literal 2758 zcmc&$O>fgc5FI-oZTJx7`x^v8B9}HNZU`ZYKwLPuNQhjlt-UF39Xs-_3rOWu{xLrQ z-W#`RXdA= zD8rVdEg81Owq&~_wk`XJ=&HLUwyW;4qzDLP6iC{UA0qoApN2h7vC;2h`s^2e{Qy0` z4D-P{ziym;@9w}`8>GgYK~fgGwsKin#Prt^x9jg81i!UDPK>u(WtAl+k9SR#na$j~ zDH_iG3e7!(u|g2QfTavWB4Y0BHNg}EqkHtC?mnaQ0L@O_vFyh%up(zj2yk6;p{g2t$7q+8Cc=S zFX57ub>XXTN^)Vp#I>`PrvR?3tHk=A#*k-k(|#MCC+f{Q%hQPSM8xgS!5GJ9KUB5t z>llNb%|{yMIaFEK$3@h*ZS)48%ds64+0N%IA8(e`j;RJ8ad8uqSO}cjflbZosH|3x z)LyM1>?$R9)(<>V!>NtuB3_$(^AGSk#?Fr-R(AnLf|cM>a8>_K>iZ4>|3zO_j$F?w zcN0Ew6!q91OWlTQ5@`@|6<1$+bK^Y6&=#Y{ zJ-m3T#wl1YEN}_khj|?HI!hpTWx4I&r<&w6V9PNo>){v z;j`Q-qkYy+!!lQT#IjZblqj4x1?q<-GD)?JUL#@ORA$_iN27#3yjUfkVugQGVg@>M z(=lNiQO-b;hj`cAs*cClAKY<0Zs3t4J)$dgSu4>sn3*EbxW!Wc1S^_Sf9h$~KxDC; zE$_GRM)E$+9CCRJ$7fz+kI~cOIN^-_7s54ev4r#MU`qID*dG1Kw3fM^jd5I{a^qO* zJEkmk4maX)5v|Z%r(uOeJbaIKR_&efVHfeEguAEXe%}px%e`)|)9du