diff --git a/__pycache__/__init__.cpython-36.pyc b/__pycache__/__init__.cpython-36.pyc index b4b7209..df848ac 100644 Binary files a/__pycache__/__init__.cpython-36.pyc and b/__pycache__/__init__.cpython-36.pyc differ diff --git a/data/scrapeddata.csv b/data/scrapeddata.csv new file mode 100644 index 0000000..eaa25bd --- /dev/null +++ b/data/scrapeddata.csv @@ -0,0 +1,78 @@ +,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14 +11,United States of America,Federal state,".mw-parser-output .monospaced{font-family:monospace,monospace}USUSA840",US,00,,,U.S.,U.S.,U.S.A.,,,,, +12,Alabama,State,US-AL,AL,01,AL,AL,Ala.,Ala.,,,,,, +13,Alaska,State,US-AK,AK,02,AK,AK,Alaska,Alaska,Alas.,,,,, +14,Arizona,State,US-AZ,AZ,04,AZ,AZ,Ariz.,Ariz.,Az.,,,,, +15,Arkansas,State,US-AR,AR,05,AR,AR,Ark.,Ark.,,,,,, +16,California,State,US-CA,CA,06,CA,CF,Calif.,Calif.,"Ca., Cal.",,,,, +17,Colorado,State,US-CO,CO,08,CO,CL,Colo.,Colo.,Col.,,,,, +18,Connecticut,State,US-CT,CT,09,CT,CT,Conn.,Conn.,Ct.,,,,, +19,Delaware,State,US-DE,DE,10,DE,DL,Del.,Del.,De.,,,,, +20,District of Columbia,Federal district,US-DC,DC,11,DC,DC,D.C.,D.C.,Wash. D.C.,,,,, +21,Florida,State,US-FL,FL,12,FL,FL,Fla.,Fla.,"Fl., Flor.",,,,, +22,Georgia,State,US-GA,GA,13,GA,GA,Ga.,Ga.,Geo.,,,,, +23,Hawaii,State,US-HI,HI,15,HI,HA,Hawaii,Hawaii,H.I.,,,,, +24,Idaho,State,US-ID,ID,16,ID,ID,Idaho,Idaho,"Id., Ida.",,,,, +25,Illinois,State,US-IL,IL,17,IL,IL,Ill.,Ill.,"Il., Ills., Ill's",,,,, +26,Indiana,State,US-IN,IN,18,IN,IN,Ind.,Ind.,In.,,,,, +27,Iowa,State,US-IA,IA,19,IA,IA,Iowa,Iowa,"Ia., Ioa.[1]",,,,, +28,Kansas,State,US-KS,KS,20,KS,KA,Kans.,Kan.,"Ks., Ka.",,,,, +29,Kentucky,State (Commonwealth),US-KY,KY,21,KY,KY,Ky.,Ky.,"Ken., Kent.",,,,, +30,Louisiana,State,US-LA,LA,22,LA,LA,La.,La.,,,,,, +31,Maine,State,US-ME,ME,23,ME,ME,Maine,Maine,Me.,,,,, +32,Maryland,State,US-MD,MD,24,MD,MD,Md.,Md.,,,,,, +33,Massachusetts,State (Commonwealth),US-MA,MA,25,MA,MS,Mass.,Mass.,,,,,, +34,Michigan,State,US-MI,MI,26,MI,MC,Mich.,Mich.,,,,,, +35,Minnesota,State,US-MN,MN,27,MN,MN,Minn.,Minn.,Mn.,,,,, +36,Mississippi,State,US-MS,MS,28,MS,MI,Miss.,Miss.,,,,,, +37,Missouri,State,US-MO,MO,29,MO,MO,Mo.,Mo.,,,,,, +38,Montana,State,US-MT,MT,30,MT,MT,Mont.,Mont.,,,,,, +39,Nebraska,State,US-NE,NE,31,NE,NB,Nebr.,Neb.,,,,,, +40,Nevada,State,US-NV,NV,32,NV,NV,Nev.,Nev.,Nv.,,,,, +41,New Hampshire,State,US-NH,NH,33,NH,NH,N.H.,N.H.,,,,,, +42,New Jersey,State,US-NJ,NJ,34,NJ,NJ,N.J.,N.J.,N.Jersey,,,,, +43,New Mexico,State,US-NM,NM,35,NM,NM,N. Mex.,N.M.,New M.,,,,, +44,New York,State,US-NY,NY,36,NY,NY,N.Y.,N.Y.,N. York,,,,, +45,North Carolina,State,US-NC,NC,37,NC,NC,N.C.,N.C.,N. Car.,,,,, +46,North Dakota,State,US-ND,ND,38,ND,ND,N. Dak.,N.D.,NoDak,,,,, +47,Ohio,State,US-OH,OH,39,OH,OH,Ohio,Ohio,"O., Oh.",,,,, +48,Oklahoma,State,US-OK,OK,40,OK,OK,Okla.,Okla.,Ok.,,,,, +49,Oregon,State,US-OR,OR,41,OR,OR,Oreg.,Ore.,Or.,,,,, +50,Pennsylvania,State (Commonwealth),US-PA,PA,42,PA,PA,Pa.,Pa.,"Penn., Penna.",,,,, +51,Rhode Island,State,US-RI,RI,44,RI,RI,R.I.,R.I.,"R.I. & P.P., R. Isl.",,,,, +52,South Carolina,State,US-SC,SC,45,SC,SC,S.C.,S.C.,S. Car.,,,,, +53,South Dakota,State,US-SD,SD,46,SD,SD,S. Dak.,S.D.,SoDak,,,,, +54,Tennessee,State,US-TN,TN,47,TN,TN,Tenn.,Tenn.,,,,,, +55,Texas,State,US-TX,TX,48,TX,TX,Tex.,Texas,Tx.,,,,, +56,Utah,State,US-UT,UT,49,UT,UT,Utah,Utah,Ut.,,,,, +57,Vermont,State,US-VT,VT,50,VT,VT,Vt.,Vt.,,,,,, +58,Virginia,State (Commonwealth),US-VA,VA,51,VA,VA,Va.,Va.,Virg.,,,,, +59,Washington,State,US-WA,WA,53,WA,WN,Wash.,Wash.,"Wa., Wn.[2]",,,,, +60,West Virginia,State,US-WV,WV,54,WV,WV,W. Va.,W.Va.,"W.V., W. Virg.",,,,, +61,Wisconsin,State,US-WI,WI,55,WI,WS,Wis.,Wis.,"Wi., Wisc.",,,,, +62,Wyoming,State,US-WY,WY,56,WY,WY,Wyo.,Wyo.,Wy.,,,,, +63,American Samoa,Insular area (Territory),ASASM016US-AS,AS,60,AS,AS,A.S.,,,,,,, +64,Guam,Insular area (Territory),GUGUM316US-GU,GU,66,GU,GU,Guam,,,,,,, +65,Northern Mariana Islands,Insular area (Commonwealth),MPMNP580US-MP,MP,69,MP,CM,M.P.,,CNMI[3],,,,, +66,Puerto Rico,Insular area (Territory),PRPRI630US-PR,PR,72,PR,PR,P.R.,,,,,,, +67,U.S. Virgin Islands,Insular area (Territory),VIVIR850US-VI,VI,78,VI,VI,V.I.,,U.S.V.I.,,,,, +68,U.S. Minor Outlying Islands,Insular areas,UMUMI581US-UM,UM,74,,,,,,,,,, +69,Baker Island,island,UM-81,,81,,,,,XB[4],,,,, +70,Howland Island,island,UM-84,,84,,,,,XH[4],,,,, +71,Jarvis Island,island,UM-86,,86,,,,,XQ[4],,,,, +72,Johnston Atoll,atoll,UM-67,,67,,,,,XU[4],,,,, +73,Kingman Reef,atoll,UM-89,,89,,,,,XM[4],,,,, +74,Midway Islands,atoll,UM-71,,71,,,,,QM[4],,,,, +75,Navassa Island,island,UM-76,,76,,,,,XV[4],,,,, +76,Palmyra Atoll[5],atoll[5],UM-95,,95,,,,,XL[4],,,,, +77,Wake Island,atoll,UM-79,,79,,,,,QW[4],,,,, +78,Micronesia,Freely associated state,FMFSM583,FM,64,FM,,,,,,,,, +79,Marshall Islands,Freely associated state,MHMHL584,MH,68,MH,,,,,,,,, +80,Palau,Freely associated state,PWPLW585,PW,70,PW,,,,,,,,, +81,U.S. Armed Forces – Americas[6],US military mail code,,,,AA,,,,,,,,, +82,U.S. Armed Forces – Europe[7],US military mail code,,,,AE,,,,,,,,, +83,U.S. Armed Forces – Pacific[8],US military mail code,,,,AP,,,,,,,,, +84,Northern Mariana Islands,Obsolete postal code[9],,,,CM,,,,,,,,, +85,Panama Canal Zone,Obsolete postal code,PZPCZ594,,,CZ,,,,,,,,, +86,Nebraska,Obsolete postal code[10],,,,NB,,,,,,,,, +87,Philippine Islands,Obsolete postal code,PHPHL608[11],,,PI,,,,,,,,, diff --git a/q01_load_data/__pycache__/__init__.cpython-36.pyc b/q01_load_data/__pycache__/__init__.cpython-36.pyc index 92b3ac2..c5ee832 100644 Binary files a/q01_load_data/__pycache__/__init__.cpython-36.pyc and b/q01_load_data/__pycache__/__init__.cpython-36.pyc differ diff --git a/q01_load_data/__pycache__/build.cpython-36.pyc b/q01_load_data/__pycache__/build.cpython-36.pyc index e27baf6..0771661 100644 Binary files a/q01_load_data/__pycache__/build.cpython-36.pyc and b/q01_load_data/__pycache__/build.cpython-36.pyc differ diff --git a/q01_load_data/build.py b/q01_load_data/build.py index 69d7209..ecc4355 100644 --- a/q01_load_data/build.py +++ b/q01_load_data/build.py @@ -1,5 +1,18 @@ +# %load q01_load_data/build.py import pandas as pd +from pandas import ExcelWriter +from pandas import ExcelFile +path = 'data/excel-comp-data.xlsx' def q01_load_data(path): - "write your solution here" + # 'write your solution here' + df = pd.read_excel(path) + df['state'] = df['state'].str.lower() + df['total'] = df['Jan']+df['Feb']+df['Mar'] + return df + +q01_load_data(path) + + + diff --git a/q01_load_data/tests/__pycache__/__init__.cpython-36.pyc b/q01_load_data/tests/__pycache__/__init__.cpython-36.pyc index 2a2dfc7..7deb3c7 100644 Binary files a/q01_load_data/tests/__pycache__/__init__.cpython-36.pyc and b/q01_load_data/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q01_load_data/tests/__pycache__/tests.cpython-36.pyc b/q01_load_data/tests/__pycache__/tests.cpython-36.pyc index 76e04c8..8d288ef 100644 Binary files a/q01_load_data/tests/__pycache__/tests.cpython-36.pyc and b/q01_load_data/tests/__pycache__/tests.cpython-36.pyc differ diff --git a/q02_append_row/__pycache__/__init__.cpython-36.pyc b/q02_append_row/__pycache__/__init__.cpython-36.pyc index de0cf61..3f4e6f6 100644 Binary files a/q02_append_row/__pycache__/__init__.cpython-36.pyc and b/q02_append_row/__pycache__/__init__.cpython-36.pyc differ diff --git a/q02_append_row/__pycache__/build.cpython-36.pyc b/q02_append_row/__pycache__/build.cpython-36.pyc index 5088267..8043792 100644 Binary files a/q02_append_row/__pycache__/build.cpython-36.pyc and b/q02_append_row/__pycache__/build.cpython-36.pyc differ diff --git a/q02_append_row/build.py b/q02_append_row/build.py index af3701d..a7f3154 100644 --- a/q02_append_row/build.py +++ b/q02_append_row/build.py @@ -1,11 +1,38 @@ +# %load q02_append_row/build.py import pandas as pd import sys, os +from pandas import ExcelWriter +from pandas import ExcelFile + +path = 'data/excel-comp-data.xlsx' + #sys.path.append(os.path.join(os.path.dirname(os.curdir))) from greyatomlib.pandas_guided_project.q01_load_data.build import q01_load_data - def q02_append_row(path): - "write your solution here" + # 'write your solution here' + df = pd.read_excel(path) + df['total'] = df['Jan']+df['Feb']+df['Mar'] + # print (df) + + df2 = pd.DataFrame(columns = df.columns) + + t1 = df['Jan'].sum() + t2 = df['Feb'].sum() + t3 = df['Mar'].sum() + t4 = df['total'].sum() + + df2.loc[0,'Jan'] = t1 + df2.loc[0,'Feb'] = t2 + df2.loc[0,'Mar'] = t3 + df2.loc[0,'total'] = t4 + + df=df.append(df2) + return df + +#q02_append_row(path) + + diff --git a/q02_append_row/tests/__pycache__/__init__.cpython-36.pyc b/q02_append_row/tests/__pycache__/__init__.cpython-36.pyc index dab3eca..2bce6e6 100644 Binary files a/q02_append_row/tests/__pycache__/__init__.cpython-36.pyc and b/q02_append_row/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q02_append_row/tests/__pycache__/tests.cpython-36.pyc b/q02_append_row/tests/__pycache__/tests.cpython-36.pyc index 742ee79..845d630 100644 Binary files a/q02_append_row/tests/__pycache__/tests.cpython-36.pyc and b/q02_append_row/tests/__pycache__/tests.cpython-36.pyc differ diff --git a/q03_scrape_clean/__pycache__/__init__.cpython-36.pyc b/q03_scrape_clean/__pycache__/__init__.cpython-36.pyc index e99e173..4928620 100644 Binary files a/q03_scrape_clean/__pycache__/__init__.cpython-36.pyc and b/q03_scrape_clean/__pycache__/__init__.cpython-36.pyc differ diff --git a/q03_scrape_clean/__pycache__/build.cpython-36.pyc b/q03_scrape_clean/__pycache__/build.cpython-36.pyc index cdec2c4..735f3c3 100644 Binary files a/q03_scrape_clean/__pycache__/build.cpython-36.pyc and b/q03_scrape_clean/__pycache__/build.cpython-36.pyc differ diff --git a/q03_scrape_clean/build.py b/q03_scrape_clean/build.py index a88e3e2..49a32a8 100644 --- a/q03_scrape_clean/build.py +++ b/q03_scrape_clean/build.py @@ -1,9 +1,17 @@ +# %load q03_scrape_clean/build.py import pandas as pd import sys, os import requests sys.path.append(os.path.join(os.path.dirname(os.curdir))) - def q03_scrape_clean(url): - "write your solution here" + data = requests.get(url) + mlist = pd.read_html(data.text) + df = mlist[0] + df = df[11:-1] + df.to_csv('./data/scrapeddata.csv') + return df + + + diff --git a/q03_scrape_clean/tests/__pycache__/__init__.cpython-36.pyc b/q03_scrape_clean/tests/__pycache__/__init__.cpython-36.pyc index bee36fb..e20b139 100644 Binary files a/q03_scrape_clean/tests/__pycache__/__init__.cpython-36.pyc and b/q03_scrape_clean/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q03_scrape_clean/tests/__pycache__/tests.cpython-36.pyc b/q03_scrape_clean/tests/__pycache__/tests.cpython-36.pyc index 8529c87..407e89a 100644 Binary files a/q03_scrape_clean/tests/__pycache__/tests.cpython-36.pyc and b/q03_scrape_clean/tests/__pycache__/tests.cpython-36.pyc differ diff --git a/q04_mapping/__pycache__/__init__.cpython-36.pyc b/q04_mapping/__pycache__/__init__.cpython-36.pyc index ee0618f..f501da6 100644 Binary files a/q04_mapping/__pycache__/__init__.cpython-36.pyc and b/q04_mapping/__pycache__/__init__.cpython-36.pyc differ diff --git a/q04_mapping/__pycache__/build.cpython-36.pyc b/q04_mapping/__pycache__/build.cpython-36.pyc index 8283165..28111bf 100644 Binary files a/q04_mapping/__pycache__/build.cpython-36.pyc and b/q04_mapping/__pycache__/build.cpython-36.pyc differ diff --git a/q04_mapping/build.py b/q04_mapping/build.py index 914cfa8..ce82a48 100644 --- a/q04_mapping/build.py +++ b/q04_mapping/build.py @@ -1,10 +1,26 @@ +# %load q04_mapping/build.py import pandas as pd import sys, os import numpy as np #sys.path.append(os.path.join(os.path.dirname(os.curdir))) from greyatomlib.pandas_guided_project.q02_append_row.build import q02_append_row + def q04_mapping(path1,path2): - "write your solution here" + 'write your solution here' + df1 = pd.read_excel(path1) + df1['total']=df1['Jan']+df1['Feb']+df1['Mar'] + df1.loc[len(df1),:]=df1.sum() + + df2 = pd.read_csv(path2) + + abbr_dict = dict(zip(df2.iloc[:,[1,6]]['United States of America'],df2.iloc[:,[1,6]]['Unnamed: 6'])) + + #df1['abbr']=np.where(df1.iloc[4]) in abbr_dict,str(abbr_dict[df1.iloc[4]]),'nan') + df1.iloc[:,6]=df1['state'].map(abbr_dict) + + return df1 + +# q04_mapping(path1,path2) diff --git a/q04_mapping/tests/__pycache__/__init__.cpython-36.pyc b/q04_mapping/tests/__pycache__/__init__.cpython-36.pyc index eef3d6b..b6aa2b5 100644 Binary files a/q04_mapping/tests/__pycache__/__init__.cpython-36.pyc and b/q04_mapping/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q04_mapping/tests/__pycache__/test.cpython-36.pyc b/q04_mapping/tests/__pycache__/test.cpython-36.pyc index 7f7c96e..c7e87a1 100644 Binary files a/q04_mapping/tests/__pycache__/test.cpython-36.pyc and b/q04_mapping/tests/__pycache__/test.cpython-36.pyc differ diff --git a/q05_replace_missing_values/__pycache__/__init__.cpython-36.pyc b/q05_replace_missing_values/__pycache__/__init__.cpython-36.pyc index f50c1d5..1bf239d 100644 Binary files a/q05_replace_missing_values/__pycache__/__init__.cpython-36.pyc and b/q05_replace_missing_values/__pycache__/__init__.cpython-36.pyc differ diff --git a/q05_replace_missing_values/__pycache__/build.cpython-36.pyc b/q05_replace_missing_values/__pycache__/build.cpython-36.pyc index 6a32964..611ca52 100644 Binary files a/q05_replace_missing_values/__pycache__/build.cpython-36.pyc and b/q05_replace_missing_values/__pycache__/build.cpython-36.pyc differ diff --git a/q05_replace_missing_values/build.py b/q05_replace_missing_values/build.py index 97d9755..9a6af3e 100644 --- a/q05_replace_missing_values/build.py +++ b/q05_replace_missing_values/build.py @@ -1,3 +1,4 @@ +# %load q05_replace_missing_values/build.py import pandas as pd import numpy as np import sys @@ -7,7 +8,23 @@ path1 = 'data/excel-comp-data.xlsx' path2 = 'data/scraped.csv' + def q05_replace_missing_values(path1,path2): + df1 = pd.read_excel(path1) + df1['total']=df1['Jan']+df1['Feb']+df1['Mar'] + df1.loc[len(df1),:]=df1.sum() + df2 = pd.read_csv(path2) + + abbr_dict = dict(zip(df2.iloc[:,[1,6]]['United States of America'],df2.iloc[:,[1,6]]['Unnamed: 6'])) + + df1.iloc[:,6] = df1['state'].map(abbr_dict) + df1.iloc[6,6] = 'MS' + df1.iloc[10,6] = 'TN' + + return df1 + +#print(q05_replace_missing_values(path1,path2).shape) +# q05_replace_missing_values(path1,path2) + -#print(q05_replace_missing_values(path1,path2).shape) \ No newline at end of file diff --git a/q05_replace_missing_values/tests/__pycache__/__init__.cpython-36.pyc b/q05_replace_missing_values/tests/__pycache__/__init__.cpython-36.pyc index 03391a7..466b32b 100644 Binary files a/q05_replace_missing_values/tests/__pycache__/__init__.cpython-36.pyc and b/q05_replace_missing_values/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q05_replace_missing_values/tests/__pycache__/tests.cpython-36.pyc b/q05_replace_missing_values/tests/__pycache__/tests.cpython-36.pyc index 3b9d62a..22c7f86 100644 Binary files a/q05_replace_missing_values/tests/__pycache__/tests.cpython-36.pyc and b/q05_replace_missing_values/tests/__pycache__/tests.cpython-36.pyc differ diff --git a/q06_sub_total/__pycache__/__init__.cpython-36.pyc b/q06_sub_total/__pycache__/__init__.cpython-36.pyc index f70134c..5e664ed 100644 Binary files a/q06_sub_total/__pycache__/__init__.cpython-36.pyc and b/q06_sub_total/__pycache__/__init__.cpython-36.pyc differ diff --git a/q06_sub_total/__pycache__/build.cpython-36.pyc b/q06_sub_total/__pycache__/build.cpython-36.pyc index adaf0ce..a2507ff 100644 Binary files a/q06_sub_total/__pycache__/build.cpython-36.pyc and b/q06_sub_total/__pycache__/build.cpython-36.pyc differ diff --git a/q06_sub_total/build.py b/q06_sub_total/build.py index c420838..7f477bd 100644 --- a/q06_sub_total/build.py +++ b/q06_sub_total/build.py @@ -1,3 +1,4 @@ +# %load q06_sub_total/build.py import pandas as pd import numpy as np from sklearn.model_selection import train_test_split @@ -10,7 +11,22 @@ path2 = 'data/scraped.csv' def q06_sub_total(path1,path2): - "write your solution here" + # 'write your solution here' + df1 = pd.read_excel(path1) + df1['total']=df1['Jan']+df1['Feb']+df1['Mar'] + df1.loc[len(df1),:]=df1.sum() + + df2 = pd.read_csv(path2) + + abbr_dict = dict(zip(df2.iloc[:,[1,6]]['United States of America'],df2.iloc[:,[1,6]]['Unnamed: 6'])) + + df1.iloc[:,6]=df1['state'].map(abbr_dict) + df1.iloc[6,6] = 'MS' + df1.iloc[10,6] = 'TN' + + df1.rename(index=str,columns={'Jan':'abbr'},inplace=True) + + return df1.groupby(['abbr']).sum().iloc[:,:4] diff --git a/q06_sub_total/tests/__pycache__/__init__.cpython-36.pyc b/q06_sub_total/tests/__pycache__/__init__.cpython-36.pyc index 93ecd56..da93904 100644 Binary files a/q06_sub_total/tests/__pycache__/__init__.cpython-36.pyc and b/q06_sub_total/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q06_sub_total/tests/__pycache__/test.cpython-36.pyc b/q06_sub_total/tests/__pycache__/test.cpython-36.pyc index 691280a..e248013 100644 Binary files a/q06_sub_total/tests/__pycache__/test.cpython-36.pyc and b/q06_sub_total/tests/__pycache__/test.cpython-36.pyc differ diff --git a/q07_symbols/__pycache__/__init__.cpython-36.pyc b/q07_symbols/__pycache__/__init__.cpython-36.pyc index 60b0cca..018ebf3 100644 Binary files a/q07_symbols/__pycache__/__init__.cpython-36.pyc and b/q07_symbols/__pycache__/__init__.cpython-36.pyc differ diff --git a/q07_symbols/__pycache__/build.cpython-36.pyc b/q07_symbols/__pycache__/build.cpython-36.pyc index d28eaa9..f7760a1 100644 Binary files a/q07_symbols/__pycache__/build.cpython-36.pyc and b/q07_symbols/__pycache__/build.cpython-36.pyc differ diff --git a/q07_symbols/build.py b/q07_symbols/build.py index b8cbb92..93fff47 100644 --- a/q07_symbols/build.py +++ b/q07_symbols/build.py @@ -1,3 +1,4 @@ +# %load q07_symbols/build.py import pandas as pd import numpy as np from sklearn.model_selection import train_test_split @@ -10,9 +11,26 @@ path2 = 'data/scraped.csv' def q07_symbols(path1,path2): - "write your solution here" + # 'write your solution here' + df1 = pd.read_excel(path1) + df1['total']=df1['Jan']+df1['Feb']+df1['Mar'] + df1.loc[len(df1),:]=df1.sum() + + df2 = pd.read_csv(path2) + + abbr_dict = dict(zip(df2.iloc[:,[1,6]]['United States of America'],df2.iloc[:,[1,6]]['Unnamed: 6'])) + + df1.iloc[:,6]=df1['state'].map(abbr_dict) + df1.iloc[6,6] = 'MS' + df1.iloc[10,6] = 'TN' + + df1.rename(index=str,columns={'Jan':'abbr'},inplace=True) + + return df1.groupby(['abbr']).sum().applymap(lambda x:'$'+str(int(x))).applymap(lambda x:x[:4]+','+x[4:]) + +#q07_symbols(path1,path2) +#print(q07_symbols(path1,path2)) -#print(q07_symbols(path1,path2)) diff --git a/q07_symbols/tests/__pycache__/__init__.cpython-36.pyc b/q07_symbols/tests/__pycache__/__init__.cpython-36.pyc index f854b4a..35fef29 100644 Binary files a/q07_symbols/tests/__pycache__/__init__.cpython-36.pyc and b/q07_symbols/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q07_symbols/tests/__pycache__/test.cpython-36.pyc b/q07_symbols/tests/__pycache__/test.cpython-36.pyc index 1a8a9c3..9536383 100644 Binary files a/q07_symbols/tests/__pycache__/test.cpython-36.pyc and b/q07_symbols/tests/__pycache__/test.cpython-36.pyc differ diff --git a/q09_pie_chart_jan/__pycache__/__init__.cpython-36.pyc b/q09_pie_chart_jan/__pycache__/__init__.cpython-36.pyc index a0e3add..a928daa 100644 Binary files a/q09_pie_chart_jan/__pycache__/__init__.cpython-36.pyc and b/q09_pie_chart_jan/__pycache__/__init__.cpython-36.pyc differ diff --git a/q09_pie_chart_jan/__pycache__/build.cpython-36.pyc b/q09_pie_chart_jan/__pycache__/build.cpython-36.pyc index 25a6c03..4670a0f 100644 Binary files a/q09_pie_chart_jan/__pycache__/build.cpython-36.pyc and b/q09_pie_chart_jan/__pycache__/build.cpython-36.pyc differ diff --git a/q09_pie_chart_jan/build.py b/q09_pie_chart_jan/build.py index 6483bc6..58e84ae 100644 --- a/q09_pie_chart_jan/build.py +++ b/q09_pie_chart_jan/build.py @@ -1,15 +1,36 @@ +# %load q09_pie_chart_jan/build.py import pandas as pd import numpy as np from sklearn.model_selection import train_test_split import sys,os sys.path.append(os.path.join(os.path.dirname(os.curdir))) -from greyatomlib.pandas_guided_project.q06_sub_total.build import q06_sub_total +#from greyatomlib.pandas_guided_project.q06_sub_total.build import q06_sub_total import matplotlib.pyplot as plt plt.switch_backend('agg') +path1='data/excel-comp-data.xlsx' +path2='data/scraped.csv' + def q09_pie_chart_jan(path1,path2): + df1 = pd.read_excel(path1) + df1['total']=df1['Jan']+df1['Feb']+df1['Mar'] + df1.loc[len(df1),:]=df1.sum() + + df2 = pd.read_csv(path2) + + abbr_dict = dict(zip(df2.iloc[:,[1,6]]['United States of America'],df2.iloc[:,[1,6]]['Unnamed: 6'])) + + dftemp = df1['state'].map(abbr_dict) + + df1.insert(loc=6,column='abbr',value=dftemp) + df1.iloc[6,6] = 'MS' + df1.iloc[10,6] = 'TN' + + df3 = df1.groupby(['abbr']).sum() + + return df3.plot.pie(y='Jan',figsize=(7,7)) - "write your solution here" +q09_pie_chart_jan(path1,path2) diff --git a/q09_pie_chart_jan/tests/__pycache__/__init__.cpython-36.pyc b/q09_pie_chart_jan/tests/__pycache__/__init__.cpython-36.pyc index 07ab367..740b426 100644 Binary files a/q09_pie_chart_jan/tests/__pycache__/__init__.cpython-36.pyc and b/q09_pie_chart_jan/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q09_pie_chart_jan/tests/__pycache__/tests.cpython-36.pyc b/q09_pie_chart_jan/tests/__pycache__/tests.cpython-36.pyc index b3b93c2..758df9d 100644 Binary files a/q09_pie_chart_jan/tests/__pycache__/tests.cpython-36.pyc and b/q09_pie_chart_jan/tests/__pycache__/tests.cpython-36.pyc differ