forked from cs573-22s/datavis-final
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathprocess_data.py
More file actions
53 lines (40 loc) · 1.6 KB
/
process_data.py
File metadata and controls
53 lines (40 loc) · 1.6 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
import math
import numpy as np
import pandas as pd
df = pd.read_csv('HousingData2019.csv')
df_income = pd.read_csv('IncomeStd.csv')
df_population = pd.read_csv("PopulationStd.csv",decimal=".")
df_michellin = pd.read_csv("michellin_id.csv")
grouped = df_michellin.groupby("id").agg({"rating": "sum","name": lambda x: list(x)})
print(grouped)
grouped.columns = ["Stars","Restaurants"]
Housing = df[["Area_Code","Average_Price"]]
Housing.columns = ["Code","Housing"]
Income = df_income[["AREACD","2019","AREANM"]]
Income.columns = ["Code","Income","Name"]
Pop = df_population[["Area Code","2019"]]
Pop.columns = ["Code","Population"]
Pop["Population"] = Pop['Population'].replace(',','', regex=True)
Pop["Population"] = pd.to_numeric(Pop["Population"] )
def calculate_constant(pop,income):
print(income)
return pop/100 + income*0.3
def rgb(value, minimum=71130, maximum=941023):
minimum, maximum = float(minimum), float(maximum)
ratio = 2 * (value-minimum) / (maximum - minimum)
b = int(max(0, 255*(1 - ratio)))
r = int(max(0, 255*(ratio - 1)))
g = 255 - b - r
return (r, g, b)
rest = Housing.merge(grouped,left_on='Code',right_on='id',how='outer')
merge_2 = rest.merge(Pop,left_on='Code',right_on='Code')
m = merge_2.merge(Income, left_on='Code', right_on='Code')
m["weighted"] = m.apply(lambda row: (row.Population/100) + row.Income*0.7 + row.Housing*0.7, axis=1)
m["Color"] = m["weighted"].apply(rgb)
print(m)
print(m["weighted"].max())
print(m["weighted"].min())
# Pop["Color"] = Pop["Population"].apply(rgb)
# print(Pop)
m.to_csv("joint.csv",index=False)
Pop.to_csv("population.csv",index=False)