-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathCVforAllModels.R
More file actions
108 lines (101 loc) · 3.74 KB
/
CVforAllModels.R
File metadata and controls
108 lines (101 loc) · 3.74 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
library(caret)
library(pROC)
library(randomForest)
library(MASS)
library(klaR)
models <- c('logistic', 'rf', 'qda', 'nb', 'knn', 'svm')
features <- c('NDAI','logSD','CORR','DF','CF','BF','AF','AN')
accuracy_loss <- function(pred, actual){
return(mean(pred == actual))
}
precision_loss <- function(pred, actual){
return(sum(pred == actual & actual == 1)/sum(pred == 1))
}
recall_loss <- function(pred, actual){
return(sum(pred == actual & actual == 1)/sum(actual == 1))
}
f1_loss <- function(pred, actual){
p <- precision_loss(pred, actual)
r <- recall_loss(pred, actual)
return((2* p * r)/(p + r))
}
auc_loss <- function(pred, actual){
#tpr <- recall_loss(pred, actual)
#fpr <- sum(actual == 1 & pred != actual)/sum(actual == 0)
roc <- roc(actual, pred[, "1"])
plot(roc)
return(auc(roc))
}
CVBinaryClassifer <- function(classifier, features, labels, K, data, loss){
folds <- createFolds(data[,labels], k = K)
loss_vec <- c()
formula <- paste(paste(labels, "~"), paste(features, collapse = "+"))
if (classifier == "logistic"){
formula <- paste(paste(labels, "~"), paste(features, collapse = "+"))
for ( f in 1:length(folds) ){
fit <- glm(formula, family = 'binomial', data = data[-folds[[f]], ])
pred_prob <- predict(fit, data[folds[[f]], features])
pred <- ifelse(pred_prob > 0.5, 1, -1)
loss_vec[f] <- loss(pred, data[folds[[f]], labels])
print(paste("CV score for Fold", f, "is", loss_vec[f]))
}
}
else if (classifier == "rf"){
for ( f in 1:length(folds) ){
model <- train(as.formula(formula),
data = data[-folds[[f]], ],
method = 'rf')
pred <- predict(model, data[folds[[f]], features])
pred_prod <- predict(model, data[folds[[f]], features], type = "prob")
loss_vec[f] = loss(pred, data[folds[[f]], labels])
print(paste("CV score for Fold", f, "is", loss_vec[f]))
}
}
else if (classifier == "qda"){
for ( f in 1:length(folds) ){
model <- train(as.formula(formula),
data = data[-folds[[f]], ],
method = 'qda')
pred <- predict(model, data[folds[[f]], features])
pred_prod <- predict(model, data[folds[[f]], features], type = "prob")
loss_vec[f] = loss(pred, data[folds[[f]], labels])
print(paste("CV score for Fold", f, "is", loss_vec[f]))
}
}
else if (classifier == "nb"){
for ( f in 1:length(folds) ){
model <- train(as.formula(formula),
data = data[-folds[[f]], ],
method = 'nb')
pred <- predict(model, data[folds[[f]], features])
pred_prod <- predict(model, data[folds[[f]], features], type = "prob")
loss_vec[f] = loss(pred, data[folds[[f]], labels])
print(paste("CV score for Fold", f, "is", loss_vec[f]))
}
}
else if (classifier == "knn"){
for ( f in 1:length(folds) ){
model <- train(as.formula(formula),
data = data[-folds[[f]], ],
method = 'knn')
pred <- predict(model, data[folds[[f]], features])
pred_prod <- predict(model, data[folds[[f]], features], type = "prob")
loss_vec[f] = loss(pred, data[folds[[f]], labels])
print(paste("CV score for Fold", f, "is", loss_vec[f]))
}
}
else if (classifier == "svm"){
for ( f in 1:length(folds) ){
model <- train(as.formula(formula),
data = data[-folds[[f]], ],
method = 'svmLinear2')
pred <- predict(model, data[folds[[f]], features])
pred_prod <- predict(model, data[folds[[f]], features], type = "prob")
loss_vec[f] = loss(pred, data[folds[[f]], labels])
print(paste("CV score for Fold", f, "is", loss_vec[f]))
}
}
else{
print("Not a supported classifier")
}
}