-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathresampling_methods_ch5.R
More file actions
58 lines (44 loc) · 1.99 KB
/
resampling_methods_ch5.R
File metadata and controls
58 lines (44 loc) · 1.99 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
#Loading Data
prostate_cancer <- read.table("~/Documents/github/stat_learning/data/prostate_caner.dat", quote="\"")
colnames(prostate_cancer)[1] <- "idCode"
colnames(prostate_cancer)[2] <- "tumor"
colnames(prostate_cancer)[3] <- "age"
colnames(prostate_cancer)[4] <- "race"
colnames(prostate_cancer)[5] <- "rectalExamResult"
colnames(prostate_cancer)[6] <- "capsularInvolvement"
colnames(prostate_cancer)[7] <- "antigenValue"
colnames(prostate_cancer)[8] <- "tumorVolume"
colnames(prostate_cancer)[9] <- "gleasonScore"
prostate_cancer$race <- as.factor(prostate_cancer$race)
prostate_cancer$rectalExamResult <- as.factor(prostate_cancer$rectalExamResult)
attach(prostate_cancer)
##Validation Set
train=sample(380,200)
glm.fit=glm(tumor~age+race+rectalExamResult+capsularInvolvement+antigenValue+gleasonScore,data=prostate_cancer,family = binomial,subset=train)
mean((tumor-predict(glm.fit,prostate_cancer, type="response"))[-train]^2)
##LOOCV
library(boot)
glm.fit=glm(tumor~age+race+rectalExamResult+capsularInvolvement+antigenValue+gleasonScore,data=prostate_cancer,family = binomial)
coef(glm.fit)
kfCV <- cv.glm(data=prostate_cancer, glmfit=glm.fit)
kfCV$delta
i <- 2
#Polynomial Logistic Regression LOOCV
cv.error=rep(0,5)
for (i in 1:5){
glm.fit=glm(tumor~age+race+rectalExamResult+capsularInvolvement+poly(antigenValue,i)+poly(gleasonScore,i),data=prostate_cancer,family = binomial)
cv.error[i]=cv.glm(data=prostate_cancer, glmfit=glm.fit)$delta[1]
}
cv.error
#Polynomial Logistic Regresion CV K=5
cv.error=rep(0,4)
for (i in 1:4){
glm.fit=glm(tumor~poly(age,i)+race+rectalExamResult+capsularInvolvement+antigenValue+gleasonScore,data=prostate_cancer,family = binomial)
cv.error[i]=cv.glm(data=prostate_cancer, glmfit=glm.fit,K=5)$delta[1]
}
#Estimate model parameters via boostrapping
library(boot)
boot.fn=function(data,index)
coefficients(glm(tumor~age+capsularInvolvement+antigenValue+gleasonScore,data=prostate_cancer,family = binomial,subset=index))
set.seed(1)
boot(prostate_cancer,boot.fn,1000)