-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy patherrorMetrics.py
More file actions
184 lines (157 loc) · 6.92 KB
/
errorMetrics.py
File metadata and controls
184 lines (157 loc) · 6.92 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Sun Dec 1 15:59:39 2019
@author: vector
"""
"""
Created on Fri Jun 21 12:39:02 2019
Error metric functions
@author: lhuang
"""
import gc
import os
import sys
#import cv2
import matplotlib.pyplot as plt
from sklearn import metrics
import pandas as pd
import glob
#import numba
import numpy as np
#import skimage.io as io
#from . import IO as IO
#import src.ImageObject as image
#import src.DataGeneratorFunctions as Gen
#%% error metric functions
#@numba.jit
def Thresh(npImage, fThreshVal = 0.5):
""" Thresholds a 2D multiple channel image
"""
npThreshImage = np.zeros(np.shape(npImage))
npThreshImage[npImage >= fThreshVal] = 1
npThreshImage[npImage < fThreshVal] = 0
return npThreshImage
#@numba.jit # jit compilse and increases the speed
def ConfMatrix(npTruthImg, npPredImg):
""" uses parallel computing to generate confusion matrix
Found to be faster than sklearn's confusion_matrix for large images at least
Can be a list of images.
Returns [tn, fp, fn, tp]
"""
npPredImg = npPredImg.astype(np.bool)
npTruthImg = npTruthImg.astype(np.bool)
npTP = np.logical_and(npPredImg, npTruthImg)
npTN = np.logical_and(np.invert(npPredImg), np.invert(npTruthImg))
npFN = np.logical_and(np.invert(npPredImg), npTruthImg)
npFP = np.logical_and(npPredImg, np.invert(npTruthImg))
#TODO: check if npTP is a numpy array
return [np.sum(npTN), np.sum(npFP), np.sum(npFN), np.sum(npTP)]#confusion_matrix(npTruthImg.flatten(), npPredImg.flatten(), labels = lLabels).ravel()
#%% error metrics for classifications
def GenAUC(npPred, npTruth):
"""
generates auc, false pos rate, true pos rate and thresholds of each given a prediction and
truth numpt array. Should work in any dimentional data.
Assumes binary classification and that a positive result is a 1
"""
fpr, tpr, thresholds = metrics.roc_curve(npTruth, npPred, pos_label =1)
return metrics.auc(fpr, tpr), fpr, tpr, thresholds
def PlotAUC(npPred, npTruth, sOutDir):
""" Creates AUC plots.
"""
fpr, tpr, thresholds = metrics.roc_curve(npTruth, npPred, pos_label =1)
plt.plot(fpr, tpr)
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.savefig(sOutDir)
plt.show()
def OptimalThreshAUC(fpr, tpr, thresholds):
""" returns the optimal threshold value in a binary classification
when using an ROC caluclator
Chose the optimal threshold by finding the point on the ROC curve that has the minimal
distance from (0,1). This was calculated by simple geometry. Use pythagras therom to find the
shortest distance from 1. The x-axis is fpr. Let the fpr for a given threshold be b.
The y-axis is tpr. Let the tpr for a given threshold be a. The distance from a threshold
to the point (0,1) is np.sqrt(fpr**2+(1-tpr)**2)
"""
distance = np.sqrt((1-tpr)**2+fpr**2)
index_min = np.argmin(distance)
return thresholds[index_min]
def GenErrorRates(npPred, npTruth):
""" gets a set of data, finds the F1,recall, precision, and error rate
Assumes prediction is already thresholded.
Only works for boolean classifications
REturns
F1,recall, precision, and error rate
"""
if np.shape(npPred) != np.shape(npTruth):
raise ValueError('prediction and truth labels must be the same size')
if np.sum((npTruth !=0) * (npTruth != 1.0)) > 0 or np.sum((npPred !=0) * (npPred != 1.0)) > 0 :
raise ValueError('inputs must be 0s or 1s')
npTN, npFP, npFN, npTP = ConfMatrix(npTruth, npPred)
recall = npTP/(npTP+npFN)
precision = npTP/(npTP+npFP)
ErrorRate = (npFP+npFN)/(npTN+npFP+npFN+npTP)
F1 = 2*precision*recall/(precision+recall)
return F1, recall, precision, ErrorRate
def GenFNRFPR(npPred, npTruth):
""" gets a set of data, finds the false negative rate and false positive rate
"""
if np.shape(npPred) != np.shape(npTruth):
raise ValueError('prediction and truth labels must be the same size')
if np.sum((npTruth !=0) * (npTruth != 1.0)) > 0 or np.sum((npPred !=0) * (npPred != 1.0)) > 0 :
raise ValueError('inputs must be 0s or 1s')
npTN, npFP, npFN, npTP = ConfMatrix(npTruth, npPred)
FNR = npFN/(npFN+npTP)
FPR = npFP/(npFP+npTN)
return FNR, FPR
def CalcErrorRates(npResults, npTruthValues, pdConf, sPatID, sSlideID, lClasses):
""" Calculates AUC, fpr, tpr, thresholds optimal thresholds, for npresults and npvalues
returns an pdConf that has all the values added
"""
auc, fpr, tpr, thresholds = GenAUC(npResults, npTruthValues)
optThresh = OptimalThreshAUC(fpr, tpr, thresholds)
F1, recall, precision, ErrorRate = GenErrorRates(Thresh(npResults[:,0]), npTruthValues)
pdConf = pdConf.append({'PatID':sPatID, 'Slide ID':sSlideID,
'fpr': fpr, 'tpr':tpr, 'auc':auc, 'f1':F1,
'recall': recall, 'precision':precision,
'Error Rate':ErrorRate,
'optimal threshold':optThresh}, ignore_index=True)
def SaveTrainingHistory(sOutDir, history):
"""
Saves training information to sOutDir.
Also does some error metrics
Saves ROC, accuracy and loss plots. Save the training history( val, val_acc, acc and loss)
to a pickle file. Saves lnpPredictions and truth labels to a .txt file.
"""
# Model.save(os.path.join(sOutDir,'trainedModel.h5')) # file sizes too large to do every cross validation
os.makedirs(sOutDir, exist_ok = True)
FigAcc, FigLoss = TrainingHistoryPlots(history)
FigAcc.savefig(os.path.join(sOutDir,'model_accuracy.png'), dpi = 300)
FigLoss.savefig(os.path.join(sOutDir,'model_loss.png'), dpi = 300)
plt.close(FigAcc)
plt.close(FigLoss)
def TrainingHistoryPlots(history):
"""
https://machinelearningmastery.com/display-deep-learning-model-training-history-in-keras/
Create loss, validation loss plots and accuracy and validatio accuracy plots.
"""
Fig1 = plt.figure()
# make plots for things that aren't loss
allKeys = [str(x) for x in history.history.keys() if 'loss' not in str(x)]
for key in allKeys:
plt.plot(np.arange(1,len(history.history[key])+1), history.history[key], figure = Fig1)
# plt.plot(history.history[key], figure = Fig1)
plt.title('model accuracy', figure=Fig1)
plt.ylabel('accuracy', figure=Fig1)
plt.xlabel('epoch', figure=Fig1)
plt.legend(allKeys, loc='upper left')
# summarize history for loss
Fig2 = plt.figure()
plt.plot(np.arange(1,len(history.history[key])+1), history.history['loss'], figure=Fig2)
plt.plot(np.arange(1,len(history.history[key])+1), history.history['val_loss'], figure=Fig2)
plt.title('model loss', figure=Fig2)
plt.ylabel('loss', figure=Fig2)
plt.xlabel('epoch', figure=Fig2)
plt.legend(['train', 'val'], loc='upper left')
return Fig1, Fig2