forked from olin-toolboxes/Toolbox-MachineLearning
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathlearning_curve.py
More file actions
56 lines (47 loc) · 1.85 KB
/
learning_curve.py
File metadata and controls
56 lines (47 loc) · 1.85 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
import numpy
from sklearn.datasets import *
from sklearn.cross_validation import train_test_split
from sklearn.linear_model import LogisticRegression
import matplotlib.pyplot as plt
def display_digits():
"""Read in the 8x8 pictures of numbers and display 10 of them"""
digits = load_digits()
print(digits.DESCR)
fig = plt.figure()
for i in range(10):
subplot = fig.add_subplot(5, 2, i+1)
subplot.matshow(numpy.reshape(digits.data[i], (8, 8)), cmap='gray')
plt.show()
def train_model():
"""Train a model on pictures of digits.
Read in 8x8 pictures of numbers and evaluate the accuracy of the model
when different percentages of the data are used as training data. This function
plots the average accuracy of the model as a function of the percent of data
used to train it.
"""
data = load_digits()
num_trials = 10
train_percentages = range(5, 95, 5)
test_accuracies = numpy.zeros(len(train_percentages))
for i in train_percentages:
# repeat each value of train_size 10 times to smooth out variability
score_test = 0
for k in range(num_trials):
X_train, X_test, y_train, y_test = train_test_split(data.data, data.target, train_size=i/100)
model = LogisticRegression(C=10**-10)
model.fit(X_train, y_train)
score_test += model.score(X_test, y_test)
# print(i, k, model.score(X_test, y_test))
accuracy_test = score_test/10
# print(accuracy_test)
h = int(i/5-1)
test_accuracies[h]=accuracy_test
fig = plt.figure()
plt.plot(train_percentages, test_accuracies)
plt.xlabel('Percentage of Data Used for Training')
plt.ylabel('Accuracy on Test Set')
plt.show()
if __name__ == "__main__":
# Feel free to comment/uncomment as needed
# display_digits()
train_model()