From 590fc25ddc6392890ab84bf1158ec5902b9db96e Mon Sep 17 00:00:00 2001 From: zekiakyol <40212849+zekiakyol@users.noreply.github.com> Date: Sun, 26 Mar 2023 14:30:38 +0300 Subject: [PATCH] Fix `weighted.mean(w =)` argument's denominator Thank your for the great post! Even though R automatically re-weights if sum of the weights are not equal to 1, following fix makes is mathematically correct in my opinion: Since `folds` object is a vector, `sum(folds)` is sum of numerically 1s, 2s, 3s, 4s, 5s which is greater than total sample size 1000. ``` > sum(folds) [1] 2956 > table(folds)/sum(folds) folds 1 2 3 4 5 0.07104195 0.07171854 0.06326116 0.06562923 0.06664411 ``` I think the correct denominator and weight is ``` > sum(xtabs(~folds)) [1] 1000 > table(folds)/sum(xtabs(~folds)) folds 1 2 3 4 5 0.210 0.212 0.187 0.194 0.197 ``` --- Rmarkdown/Cross-ValidationInClassHO.Rmd | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/Rmarkdown/Cross-ValidationInClassHO.Rmd b/Rmarkdown/Cross-ValidationInClassHO.Rmd index d607497..fbf7cf3 100644 --- a/Rmarkdown/Cross-ValidationInClassHO.Rmd +++ b/Rmarkdown/Cross-ValidationInClassHO.Rmd @@ -191,7 +191,7 @@ The left side of Figure 5.2 on page 178 of @james_introduction_2013 shows the va ```{r} library(ISLR) n <- nrow(Auto) -plot(1:10, type ="n", xlab = "Degree of Polynomial", ylim = c(15, 30), +plot(1:10, type = "n", xlab = "Degree of Polynomial", ylim = c(15, 30), ylab = "Mean Squared Prediction Error") IND <- sample(1:n, size = floor(n/2), replace = FALSE) train <- Auto[IND, ] @@ -277,11 +277,11 @@ table(folds) sum(xtabs(~folds)) for(j in 1:k){ modq <- lm(y ~ poly(x, 2, raw = TRUE), data = DF[folds != j, ]) - pred <- predict(modq, newdata = DF[folds ==j, ]) + pred <- predict(modq, newdata = DF[folds == j, ]) MSPE[j] <- mean((DF[folds == j, ]$y - pred)^2) } MSPE -weighted.mean(MSPE, table(folds)/sum(folds)) +weighted.mean(MSPE, w = table(folds)/sum(xtabs(~folds))) ``` ### Using `caret` @@ -382,7 +382,7 @@ The right side of Figure 5.4 on page 180 of @james_introduction_2013 shows the 1 ```{r} # Your code here -plot(1:10, type ="n", xlab = "Degree of Polynomial", ylim = c(16, 26), +plot(1:10, type = "n", xlab = "Degree of Polynomial", ylim = c(16, 26), ylab = "Mean Squared Prediction Error", main = "10-fold CV") k <- 10 # number of folds MSPE <- numeric(k) @@ -406,7 +406,7 @@ cv <- numeric(k) ```{r} # Your Code Here set.seed(123) -plot(1:10, type ="n", xlab = "Degree of Polynomial", ylim = c(16, 26), +plot(1:10, type = "n", xlab = "Degree of Polynomial", ylim = c(16, 26), ylab = "Mean Squared Prediction Error", main = "10-fold CV") k <- 10 # number of folds MSPE <- numeric(k) @@ -455,7 +455,7 @@ MSPE <- numeric(k) ```{r} # Your Code Here -plot(1:10, type ="n", xlab = "Degree of Polynomial", ylim = c(16, 26), +plot(1:10, type = "n", xlab = "Degree of Polynomial", ylim = c(16, 26), ylab = "Mean Squared Prediction Error", main = "10-fold CV") # # @@ -577,7 +577,7 @@ glm.fit <- glm(hwfat ~ abs + triceps, data = HSWRESTLER) ```{r} # Your Code Here -plot(1:10, type ="n", xlab = "Degree of Polynomial", ylim = c(16, 26), +plot(1:10, type = "n", xlab = "Degree of Polynomial", ylim = c(16, 26), ylab = "Mean Squared Prediction Error") k <- nrow(Auto) # number of folds MSPE <- numeric(k) @@ -600,7 +600,7 @@ Using the short cut formula: ```{r} # Your Code Here -plot(1:10, type ="n", xlab = "Degree of Polynomial", ylim = c(16, 26), +plot(1:10, type = "n", xlab = "Degree of Polynomial", ylim = c(16, 26), ylab = "Mean Squared Prediction Error") cv <- numeric(10) #