commit-live-students · sonal7930 · May 9, 2018 · May 9, 2018 · Jul 7, 2018 · Jul 29, 2018
diff --git a/__pycache__/__init__.cpython-36.pyc b/__pycache__/__init__.cpython-36.pyc
diff --git a/q01_load_data/__pycache__/__init__.cpython-36.pyc b/q01_load_data/__pycache__/__init__.cpython-36.pyc
diff --git a/q01_load_data/__pycache__/build.cpython-36.pyc b/q01_load_data/__pycache__/build.cpython-36.pyc
diff --git a/q01_load_data/build.py b/q01_load_data/build.py
@@ -1,5 +1,8 @@
 import pandas as pd
-
-
+path = 'data/excel-comp-data.xlsx'
 def q01_load_data(path):
-    "write your solution here"
+    df = pd.read_excel(path)
+    df["state"] = df["state"].str.lower()
+    total = df["Jan"]+df["Feb"]+df["Mar"] #total amount in the first quarter of the financial year
+    df["total"] = total
+    return df
diff --git a/q01_load_data/tests/__pycache__/__init__.cpython-36.pyc b/q01_load_data/tests/__pycache__/__init__.cpython-36.pyc
diff --git a/q01_load_data/tests/__pycache__/tests.cpython-36.pyc b/q01_load_data/tests/__pycache__/tests.cpython-36.pyc
diff --git a/q02_append_row/__pycache__/__init__.cpython-36.pyc b/q02_append_row/__pycache__/__init__.cpython-36.pyc
diff --git a/q02_append_row/__pycache__/build.cpython-36.pyc b/q02_append_row/__pycache__/build.cpython-36.pyc
diff --git a/q02_append_row/build.py b/q02_append_row/build.py
@@ -1,12 +1,19 @@
+# %load q02_append_row/build.py
 import pandas as pd
 import sys, os
 #sys.path.append(os.path.join(os.path.dirname(os.curdir)))
 from greyatomlib.pandas_guided_project.q01_load_data.build import q01_load_data
 
+path = 'data/excel-comp-data.xlsx'
 
 def q02_append_row(path):
-    "write your solution here"
-
+    'write your solution here'
+    df = q01_load_data(path)
+    sum_row=df[['Jan','Feb','Mar','total']].sum() #sum for the month and total columns.
+    df_sum=pd.DataFrame(data=sum_row).T #Converting the series into a dataframe and transposing it
+    df_sum=df_sum.reindex(columns=df.columns) #using reindex to add the missing values
+    df_final=df.append(df_sum,ignore_index=True)
+    return df_final
 
 
 
diff --git a/q02_append_row/tests/__pycache__/__init__.cpython-36.pyc b/q02_append_row/tests/__pycache__/__init__.cpython-36.pyc
diff --git a/q02_append_row/tests/__pycache__/tests.cpython-36.pyc b/q02_append_row/tests/__pycache__/tests.cpython-36.pyc
diff --git a/q03_scrape_clean/__pycache__/__init__.cpython-36.pyc b/q03_scrape_clean/__pycache__/__init__.cpython-36.pyc
diff --git a/q03_scrape_clean/__pycache__/build.cpython-36.pyc b/q03_scrape_clean/__pycache__/build.cpython-36.pyc
diff --git a/q03_scrape_clean/build.py b/q03_scrape_clean/build.py
@@ -3,7 +3,18 @@
 import requests
 sys.path.append(os.path.join(os.path.dirname(os.curdir)))
 
+url = 'https://en.wikipedia.org/wiki/List_of_U.S._state_abbreviations'
 
 def q03_scrape_clean(url):
-    "write your solution here"
-
+    r = requests.get(url)
+    tables = pd.read_html(url,header = 0) #but reading the url via requests and then parsing the text is good enough.
+    t = tables[0]
+    t.rename(columns= {"Codes:  ISO ISO 3166 codes (2-letter, 3-letter, and 3-digit codes from ISO 3166-1; 2+2-letter codes from ISO 3166-2)  ANSI 2-letter and 2-digit codes from the ANSI standard INCITS 38:2009  USPS 2-letter codes used by the United States Postal Service  USCG 2-letter codes used by the United States Coast Guard (red text shows differences between ANSI and USCG) Abbreviations:  GPO Older variable-length official US Government Printing Office abbreviations  AP Abbreviations from the AP Stylebook (red text shows differences between GPO and AP)" : "United States of America",
+                        "ISO 3166 codes (2-letter, 3-letter, and 3-digit codes from ISO 3166-1; 2+2-letter codes from ISO 3166-2)": "C4",
+                        "2-letter and 2-digit codes from the ANSI standard INCITS 38:2009":"C6",
+                        "2-letter codes used by the United States Postal Service":"Codes used by US' Postal Service",
+                        "2-letter codes used by the United States Coast Guard (red text shows differences between ANSI and USCG)":"Codes used by US' Coast Guard",
+                        "Abbreviations from the AP Stylebook (red text shows differences between GPO and AP)":"Abbreviations from the AP Stylebook",
+                        "Codes:":"Codes"},inplace = True)
+    t = t[11:]
+    return t
diff --git a/q03_scrape_clean/tests/__pycache__/__init__.cpython-36.pyc b/q03_scrape_clean/tests/__pycache__/__init__.cpython-36.pyc
diff --git a/q03_scrape_clean/tests/__pycache__/tests.cpython-36.pyc b/q03_scrape_clean/tests/__pycache__/tests.cpython-36.pyc
diff --git a/q04_mapping/__pycache__/__init__.cpython-36.pyc b/q04_mapping/__pycache__/__init__.cpython-36.pyc
diff --git a/q04_mapping/__pycache__/build.cpython-36.pyc b/q04_mapping/__pycache__/build.cpython-36.pyc
diff --git a/q04_mapping/build.py b/q04_mapping/build.py
@@ -1,10 +1,34 @@
+# %load q04_mapping/build.py
 import pandas as pd
 import sys, os
 import numpy as np
 #sys.path.append(os.path.join(os.path.dirname(os.curdir)))
 from greyatomlib.pandas_guided_project.q02_append_row.build import q02_append_row
+path1 = 'data/excel-comp-data.xlsx'
+path2 = 'data/scraped.csv'
 def q04_mapping(path1,path2):
-    "write your solution here"
+
+    df1 = q02_append_row(path1)
+    df1['abbr'] = np.nan
+    df2 = pd.read_csv(path2)
+    ab = df2.iloc[:,7]
+    name = df2['United States of America']
+    d = {}
+    for i in range(0,ab.shape[0]):
+        d[name[i].lower()] = ab[i]
+
+    for i in range(0,df1.shape[0]):
+        if df1.iloc[i,:]['state'] in d.keys():
+            df1.iloc[i,-1] = d[df1.iloc[i,:]['state']]
+
+    df2 = df1.iloc[:,0:5]
+    df2['total'] = df1['total']
+    df2['abbr'] = df1['abbr']
+    return df2
+
+
+
+
 
 
 
diff --git a/q04_mapping/tests/__pycache__/__init__.cpython-36.pyc b/q04_mapping/tests/__pycache__/__init__.cpython-36.pyc
diff --git a/q04_mapping/tests/__pycache__/test.cpython-36.pyc b/q04_mapping/tests/__pycache__/test.cpython-36.pyc
diff --git a/q05_replace_missing_values/__pycache__/__init__.cpython-36.pyc b/q05_replace_missing_values/__pycache__/__init__.cpython-36.pyc
diff --git a/q05_replace_missing_values/__pycache__/build.cpython-36.pyc b/q05_replace_missing_values/__pycache__/build.cpython-36.pyc
diff --git a/q05_replace_missing_values/build.py b/q05_replace_missing_values/build.py
@@ -1,3 +1,4 @@
+# %load q05_replace_missing_values/build.py
 import pandas as pd
 import numpy as np
 import sys
@@ -8,6 +9,13 @@
 path1 = 'data/excel-comp-data.xlsx'
 path2 = 'data/scraped.csv'
 def q05_replace_missing_values(path1,path2):
+    df = q04_mapping(path1,path2)
+    df.iloc[6,6] = 'MS'
+    df.iloc[10,6] = 'TN'
+    return df
 
 
-#print(q05_replace_missing_values(path1,path2).shape)
+#print(q
+
+
+
diff --git a/q05_replace_missing_values/tests/__pycache__/__init__.cpython-36.pyc b/q05_replace_missing_values/tests/__pycache__/__init__.cpython-36.pyc
diff --git a/q05_replace_missing_values/tests/__pycache__/tests.cpython-36.pyc b/q05_replace_missing_values/tests/__pycache__/tests.cpython-36.pyc
diff --git a/q06_sub_total/__pycache__/__init__.cpython-36.pyc b/q06_sub_total/__pycache__/__init__.cpython-36.pyc
diff --git a/q06_sub_total/__pycache__/build.cpython-36.pyc b/q06_sub_total/__pycache__/build.cpython-36.pyc
diff --git a/q06_sub_total/build.py b/q06_sub_total/build.py
@@ -1,3 +1,4 @@
+# %load q06_sub_total/build.py
 import pandas as pd
 import numpy as np
 from sklearn.model_selection import train_test_split
@@ -10,7 +11,13 @@
 path2 = 'data/scraped.csv'
 
 def q06_sub_total(path1,path2):
-    "write your solution here"
+    'write your solution here'
+    df = q05_replace_missing_values(path1,path2)
+    #print(df)
+    df1 = df.groupby('abbr').sum()
+    return df1
+
+
 
 
 
diff --git a/q06_sub_total/tests/__pycache__/__init__.cpython-36.pyc b/q06_sub_total/tests/__pycache__/__init__.cpython-36.pyc
diff --git a/q06_sub_total/tests/__pycache__/test.cpython-36.pyc b/q06_sub_total/tests/__pycache__/test.cpython-36.pyc