-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathlogreg_predict.py
More file actions
101 lines (84 loc) · 2.91 KB
/
logreg_predict.py
File metadata and controls
101 lines (84 loc) · 2.91 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
import sys
import csv
import numpy as np
import ml_functions as ml
import matplotlib.pyplot as plt
def sigmoid(z):
return 1 / (1 + np.exp(-z))
def showDistribution(distribution):
kwargs = dict(histtype='stepfilled', ec='black', alpha=0.3, bins=2)
for i in range(len(distribution)):
plt.hist(distribution[i], **kwargs)
plt.title('Distribution')
plt.legend(['Gryffindor', 'Ravenclaw', 'Slytherin', 'Hufflepuff'])
plt.xlabel('Houses', fontsize=16)
plt.ylabel('Students', fontsize=16)
plt.xticks(fontsize=14)
plt.yticks(fontsize=14)
plt.show()
def predict():
f = open('assets/houses.csv', 'w')
f.write('Index,Hogwarts House\n')
distribution = [[], [], [], []]
for i, studMarks in enumerate(X):
probs = []
for _, weights in enumerate(housesWeights):
z = np.dot(studMarks, weights)
probs.append(sigmoid(z))
_, maxV = ml.getMinMax(probs)
house = ''
for j in range(len(probs)):
if probs[j] == maxV:
house = houses[j]
if house == 'Gryffindor':
distribution[0].append('Gryffindor')
elif house == 'Ravenclaw':
distribution[1].append('Ravenclaw')
elif house == 'Slytherin':
distribution[2].append('Slytherin')
elif house == 'Hufflepuff':
distribution[3].append('Hufflepuff')
f.write(str(i) + ',' + house + '\n')
return distribution
def formatFeatures():
data = []
for _ in range(lenFeatures):
data.append([])
for row in rawdata:
for i in range(lenFeatures):
if row[i + indexFeatures] == '':
data[i].append(0.0)
else:
data[i].append(float(row[i + indexFeatures]))
# Normalize
for i in range(lenFeatures):
minV, maxV = ml.getMinMax(data[i])
data[i] = ml.normalizeData(data[i], minV, maxV)
return data
if __name__ == '__main__':
if (len(sys.argv) < 2):
print("Argument missing")
exit()
csvfile = open(sys.argv[1])
rawdata = list(csv.reader(csvfile))
# Features: modify indexFeatures according to the dataset
# In that case we skip the first two features
# The second one has a clone and the first has no impact
indexFeatures = 8
lenFeatures = len(rawdata[0]) - indexFeatures
del rawdata[0]
houses = ['Gryffindor', 'Ravenclaw', 'Slytherin', 'Hufflepuff']
data = formatFeatures()
X = np.transpose(data)
ones = np.ones((len(X), 1))
X = np.concatenate((ones, X), axis=1)
csvWeights = open('assets/weights.csv')
rawWeights = list(csv.reader(csvWeights))
housesWeights = []
for i in range(len(rawWeights)):
w = []
for j in range(1, len(rawWeights[i])):
w.append(float(rawWeights[i][j]))
housesWeights.append(w)
distribution = predict()
showDistribution(distribution)