From 943135f13e0dead5cffb2ff7571accf1ab8760ad Mon Sep 17 00:00:00 2001 From: "Robert J. van der Boon" Date: Fri, 24 Mar 2023 17:22:32 +0100 Subject: [PATCH 1/2] Fix mean and var when mode is lower than trunc min --- 02-Truncated-Triangular-Distribution.Rmd | 12 ++--- Scripts/Generate-Truncated-Triangular.R | 58 ++++++++---------------- 2 files changed, 25 insertions(+), 45 deletions(-) diff --git a/02-Truncated-Triangular-Distribution.Rmd b/02-Truncated-Triangular-Distribution.Rmd index 19871fb..baad6a3 100644 --- a/02-Truncated-Triangular-Distribution.Rmd +++ b/02-Truncated-Triangular-Distribution.Rmd @@ -317,11 +317,11 @@ $$ Case 3 ($M \leq a \leq b \leq U$): $$ - E(X)=\frac{\int_{M}^{b} x \cdot \frac{2(U-x)}{(U-L)(U-M)} dx}{F(b)-F(a)} + E(X)=\frac{\int_{a}^{b} x \cdot \frac{2(U-x)}{(U-L)(U-M)} dx}{F(b)-F(a)} $$ $$ - = \frac{\frac{3b^2U-3M^2U-2b^3+2M^3}{3\left(U-L\right)\left(U-M\right)}}{F(b)-F(a)} + = \frac{\frac{-2a^3+b^2\left(2b-3U\right)+3a^2 U}{3\left(L-U\right)\left(U-M\right)}}{F(b)-F(a)} $$ ### Median @@ -378,11 +378,11 @@ $$ Case 3 ($M \leq a \leq b \leq U$): $$ - \text{Var}(X)=\frac{\int_{M}^{b} x^{2} \cdot \frac{2(U-x)}{(U-L)(U-M)} dx}{F(b)-F(a)}-E(X)^{2} + \text{Var}(X)=\frac{\int_{a}^{b} x^{2} \cdot \frac{2(U-x)}{(U-L)(U-M)} dx}{F(b)-F(a)}-E(X)^{2} $$ $$ - = \frac{\frac{4b^3U-4M^3U-3b^4+3M^4}{6\left(U-L\right)\left(U-M\right)}}{F(b)-F(a)}-E(X)^{2} + = \frac{\frac{-3a^4+4a^3U+b^3(3b-4U)}{6\left(L-U\right)\left(U-M\right)}}{F(b)-F(a)}-E(X)^{2} $$ ## An Object to Hold These Properties @@ -455,7 +455,7 @@ generate.truncated.triangular <- function(a, b, orig.tri.dist) { result.numerator / result.denominator } else if (M <= a & a <= b) { result.numerator <- - (3 * b ^ 2 * U - 3 * a ^ 2 * U - 2 * b ^ 3 + 2 * a ^ 3) / (3 * (U - L) * (U - M)) + (-2 * (a ^ 3) + (b ^ 2)* (2 * b - 3 * U) + 3 * (a ^ 2) * U) / (3* (L - U) * (U - M)) result.denominator <- orig.tri.dist$cdf(b) - orig.tri.dist$cdf(a) result.numerator / result.denominator @@ -497,7 +497,7 @@ generate.truncated.triangular <- function(a, b, orig.tri.dist) { (result.numerator / result.denominator) - trun.tri.mean ^ 2 } else if (M <= a & a <= b) { result.numerator <- - (4 * (b ^ 3) * U - 4 * (a ^ 3) * U - 2 * b ^ 4 + 3 * a ^ 4) / (6 * (U - L) * (U - M)) + (-3 * (a ^ 4) + 4 * (a ^ 3) * U + (b ^ 3) * (3 * b - 4 * U)) / (6 * (L - U) * (U - M)) result.denominator <- orig.tri.dist$cdf(b) - orig.tri.dist$cdf(a) (result.numerator / result.denominator) - trun.tri.mean ^ 2 diff --git a/Scripts/Generate-Truncated-Triangular.R b/Scripts/Generate-Truncated-Triangular.R index 2302968..741594c 100644 --- a/Scripts/Generate-Truncated-Triangular.R +++ b/Scripts/Generate-Truncated-Triangular.R @@ -8,7 +8,7 @@ generate.truncated.triangular <- function(a, b, orig.tri.dist) { L <- orig.tri.dist$tri.lower U <- orig.tri.dist$tri.upper - ## Throw an error if the new bounds do not fall witin the old bounds of the triangular pdf. + ## Throw an error if the new bounds do not fall within the old bounds of the triangular pdf. if (a < L | b > U) { stop("The new bounds of the pdf do not fall within the original range") } @@ -47,27 +47,17 @@ generate.truncated.triangular <- function(a, b, orig.tri.dist) { } ## Create a vector of length 1 that describes the mean of the distribution - trun.tri.mean <- if (a <= b & b < M) { - result.numerator <- - (2 * (-3 * L * ((b ^ 2) / 2 - (a ^ 2) / 2) + b ^ 3 - a ^ 3)) / (3 * (U - L) * (M - L)) - result.denominator <- - orig.tri.dist$cdf(b) - orig.tri.dist$cdf(a) - result.numerator / result.denominator + trun.tri.denominator <- (orig.tri.dist$cdf(b) - orig.tri.dist$cdf(a)) + trun.tri.meanNumerator <- if (a <= b & b < M) { + (2 * (-3 * L * ((b ^ 2) / 2 - (a ^ 2) / 2) + b ^ 3 - a ^ 3)) / (3 * (U - L) * (M - L)) } else if (a < M & b >= M) { - result.numerator <- - ( - -M ^ 3 * U - 2 * a ^ 3 * U + 3 * L * a ^ 2 * U + 3 * M * b ^ 2 * U - 3 * L * b ^ 2 * U + L * M ^ 3 + 2 * M * a ^ 3 - 3 * L * M * a ^ 2 - 2 * M * b ^ 3 + 2 * L * b ^ 3 - ) / (3 * (U - L) * (M - L) * (U - M)) - result.denominator <- - orig.tri.dist$cdf(b) - orig.tri.dist$cdf(a) - result.numerator / result.denominator + ( + -M ^ 3 * U - 2 * a ^ 3 * U + 3 * L * a ^ 2 * U + 3 * M * b ^ 2 * U - 3 * L * b ^ 2 * U + L * M ^ 3 + 2 * M * a ^ 3 - 3 * L * M * a ^ 2 - 2 * M * b ^ 3 + 2 * L * b ^ 3 + ) / (3 * (U - L) * (M - L) * (U - M)) } else if (M <= a & a <= b) { - result.numerator <- - (3 * b ^ 2 * U - 3 * a ^ 2 * U - 2 * b ^ 3 + 2 * a ^ 3) / (3 * (U - L) * (U - M)) - result.denominator <- - orig.tri.dist$cdf(b) - orig.tri.dist$cdf(a) - result.numerator / result.denominator + (-2 * (a ^ 3) + (b ^ 2)* (2 * b - 3 * U) + 3 * (a ^ 2) * U) / (3* (L - U) * (U - M)) } + trun.tri.mean <- (trun.tri.meanNumerator / trun.tri.denominator) ## Create a vector of length 1 that describes the median of the distribution @@ -89,28 +79,17 @@ generate.truncated.triangular <- function(a, b, orig.tri.dist) { trun.tri.lower <- a ## Create a vector of length 1 that describes the variance of the distribution - trun.tri.var <- if (a <= b & b < M) { - result.numerator <- - (-4 * L * ((b ^ 3) / 3 - (a ^ 3) / 3) + b ^ 4 - a ^ 4) / (2 * (U - L) * (M - L)) - result.denominator <- - orig.tri.dist$cdf(b) - orig.tri.dist$cdf(a) - (result.numerator / result.denominator) - trun.tri.mean ^ 2 + trun.tri.varNumerator <- if (a <= b & b < M) { + (-4 * L * ((b ^ 3) / 3 - (a ^ 3) / 3) + b ^ 4 - a ^ 4) / (2 * (U - L) * (M - L)) } else if (a < M & b >= M) { - result.numerator <- - ( - -M ^ 4 * U - 3 * a ^ 4 * U + 4 * L * a ^ 3 * U + 4 * M * b ^ 3 * U - 4 * L * b ^ 3 * U + L * M ^ 4 + 3 * M * a ^ 4 - 4 * L * M * a ^ 3 - 3 * M * b ^ 4 + 3 * L * b ^ 4 - ) / (6 * (U - L) * (M - L) * (U - M)) - result.denominator <- - orig.tri.dist$cdf(b) - orig.tri.dist$cdf(a) - (result.numerator / result.denominator) - trun.tri.mean ^ 2 + ( + -M ^ 4 * U - 3 * a ^ 4 * U + 4 * L * a ^ 3 * U + 4 * M * b ^ 3 * U - 4 * L * b ^ 3 * U + L * M ^ 4 + 3 * M * a ^ 4 - 4 * L * M * a ^ 3 - 3 * M * b ^ 4 + 3 * L * b ^ 4 + ) / (6 * (U - L) * (M - L) * (U - M)) } else if (M <= a & a <= b) { - result.numerator <- - (4 * (b ^ 3) * U - 4 * (a ^ 3) * U - 2 * b ^ 4 + 3 * a ^ 4) / (6 * (U - L) * (U - M)) - result.denominator <- - orig.tri.dist$cdf(b) - orig.tri.dist$cdf(a) - (result.numerator / result.denominator) - trun.tri.mean ^ 2 + (-3 * (a ^ 4) + 4 * (a ^ 3) * U + (b ^ 3) * (3 * b - 4 * U)) / (6 * (L - U) * (U - M)) } - + trun.tri.var <- (trun.tri.varNumerator / trun.tri.denominator) - (trun.tri.mean ^ 2) + ## Build the list and return it. This list contains all major properties of the truncated triangular distribution return( list( @@ -122,7 +101,8 @@ generate.truncated.triangular <- function(a, b, orig.tri.dist) { trun.tri.mode = trun.tri.mode, trun.tri.upper = trun.tri.upper, trun.tri.lower = trun.tri.lower, - trun.tri.var = trun.tri.var + trun.tri.var = trun.tri.var, + trun.tri.originalarguments = list(L=L,a=a,M=M,b=b,U=U) ) ) } From c86a6d05f11ec44bb6416a680c9a1bebb69d7dae Mon Sep 17 00:00:00 2001 From: "Robert J. van der Boon" Date: Fri, 24 Mar 2023 17:23:30 +0100 Subject: [PATCH 2/2] More flexible Simulation.R for easier debugging --- Scripts/Simulation.R | 323 +++++++++++++++++++++++++++++-------------- 1 file changed, 217 insertions(+), 106 deletions(-) diff --git a/Scripts/Simulation.R b/Scripts/Simulation.R index bc2fd69..24a8172 100644 --- a/Scripts/Simulation.R +++ b/Scripts/Simulation.R @@ -1,119 +1,230 @@ ## Import relevant functions to generate triangular and truncated triangular distributions with cached properties (created as a list) source(file.path("Scripts", "Generate-Triangular.R")) source(file.path("Scripts", "Generate-Truncated-Triangular.R")) +test <- function(testcaseparameters, plotPDFs=FALSE, plotCDFs=FALSE, plotSIM=FALSE) { + my.L <- testcaseparameters[1] + my.a <- testcaseparameters[2] + my.M <- testcaseparameters[3] + my.b <- testcaseparameters[4] + my.U <- testcaseparameters[5] + print(paste("Test Case: ", "L=",my.L,", a=", my.a, ", M=",my.M,", b=",my.b,", U=", my.U, sep="", collapse="")) + + my.plotPDFs <- plotPDFs + my.plotCDFs <- plotCDFs + + my.sim.n <- 20000 + my.sim.triFrom <- my.L + my.sim.triTo <- my.U + my.sim.trunFrom <- my.a - 0.0001 + my.sim.trunTo <- my.b + 0.0001 + my.sim.triFromCdf <- my.L + my.sim.triToCdf <- my.U + my.sim.trunFromCdf <- my.L + my.sim.trunToCdf <- my.U + my.sim.plot <- plotSIM + + ## Set a seed so results are replicable + set.seed(10) + + ## Create a list that contains the major properties of a triangular distribution with L, U, and M + my.tri.dist <- generate.triangular(L = my.L, U = my.U, M = my.M) + + ## Create a list that contains the major properties of a truncated triangular distribution between a and b, based on the previous triangular + my.trun.tri.dist <- + generate.truncated.triangular(a = my.a, + b = my.b, + orig.tri.dist = my.tri.dist) + + print("Triangular Distribution:") + print(paste(" Analytical Mean: ", my.tri.dist$tri.mean)) + print(paste(" Analytical Median: ", my.tri.dist$tri.median)) + print(paste(" Analytical Variance: ", my.tri.dist$tri.var)) + print("Truncated Triangular Distribution:") + print(paste(" Analytical Mean: ", my.trun.tri.dist$trun.tri.mean)) + print(paste(" Analytical Median: ", my.trun.tri.dist$trun.tri.median)) + print(paste(" Analytical Variance: ", my.trun.tri.dist$trun.tri.var)) -## Set a seed so results are replicable -set.seed(10) - -## Create a list that contains the major properties of a triangular distribution with L = 2, U = 9, and M = 7 -my.tri.dist <- generate.triangular(L = 2, U = 9, M = 7) - -## Create a list that contains the major properties of a truncated triangular distribution between a = 3 and b = 8, based on the previous triangular -my.trun.tri.dist <- - generate.truncated.triangular(a = 3, - b = 8, - orig.tri.dist = my.tri.dist) - -## Plot and describe the triangular distribution -plot.function( - function(x) { - sapply(x, FUN = my.tri.dist$pdf) - }, - from = 2, - to = 9, - n = 100000, - ylab = "f(x)", - xlab = "x", - main = "Triangular Distribution - PDF" -) -print("Triangular Distribution:") -print(paste("Analytical Mean: ", my.tri.dist$tri.mean)) -print(paste("Analytical Median: ", my.tri.dist$tri.median)) -print(paste("Analytical Variance: ", my.tri.dist$tri.var)) - -## Plot and describe the truncated triangular distribution -plot.function( - function(x) { - sapply(x, FUN = my.trun.tri.dist$pdf) - }, - from = 2.99999, - to = 8.0001, - n = 100000, - ylab = "f( x | a < x < b )", - xlab = "x", - main = "Truncated Triangular Distribution - PDF" -) -print("Truncated Triangular Distribution:") -print(paste("Analytical Mean: ", my.trun.tri.dist$trun.tri.mean)) -print(paste("Analytical Median: ", my.trun.tri.dist$trun.tri.median)) -print(paste("Analytical Variance: ", my.trun.tri.dist$trun.tri.var)) - -## Simulate 10000 instances of a random variable with f(x) = truncated triangular by trial and error. -vec <- rep(0, 10000) -success.count <- 0 -while (success.count < 10000) { - u.1 <- - runif( - n = 1, - min = my.trun.tri.dist$trun.tri.lower, - max = my.trun.tri.dist$trun.tri.upper + ## Plot and describe the PDFs + if (my.plotPDFs) { + plot.function( + function(x) { + sapply(x, FUN = my.trun.tri.dist$pdf) + }, + from = my.sim.triFrom, + to = my.sim.triTo, + n = my.sim.n, + ylab = "f( x | a < x < b )", + xlab = "x", + main = "Truncated Triangular Distribution - PDF" ) - u.2 <- - runif( - n = 1, - min = 0, - max = my.trun.tri.dist$pdf(my.trun.tri.dist$trun.tri.mode) + + plot.function( + function(x) { + sapply(x, FUN = my.tri.dist$pdf) + }, + from = my.sim.triFrom, + to = my.sim.triTo, + n = my.sim.n, + ylab = "f(x)", + xlab = "x", + main = "Triangular Distribution - PDF", + add = TRUE, + col = "red" ) - if (u.2 <= my.trun.tri.dist$pdf(u.1)) { - success.count <- success.count + 1 - vec[success.count] <- u.1 } -} + ## Plot and describe the CDFs + if (my.plotCDFs) { + plot.function( + function(x) { + sapply(x, FUN = my.trun.tri.dist$cdf) + }, + from = my.sim.trunFromCdf, + to = my.sim.trunToCdf, + n = my.sim.n, + ylab = "F( x | a < x < b )", + xlab = "x", + main = "Truncated Triangular Distribution - CDF" + ) + plot.function( + function(x) { + sapply(x, FUN = my.tri.dist$cdf) + }, + from = my.sim.triFromCdf, + to = my.sim.triToCdf, + n = my.sim.n, + ylab = "F(x)", + xlab = "x", + main = "Triangular Distribution - CDF", + add = TRUE, + col = "red" + ) + plot.function( + function(x) { sapply(x, FUN = function(y){0.5})}, + from = my.sim.triFromCdf, + to = my.sim.triToCdf, + n = my.sim.n, + add = TRUE, + col = "blue" + ) + } + + ## Simulate n instances of a random variable with f(x) = truncated triangular by trial and error. + vec <- rep(0, my.sim.n) + success.count <- 0 + success.truntrimax <- if (my.trun.tri.dist$trun.tri.mode <= my.trun.tri.dist$trun.tri.lower) { + # at lower it will be 0, go slightly above that + (my.trun.tri.dist$pdf(my.trun.tri.dist$trun.tri.lower+0.00000001)) + } else if (my.trun.tri.dist$trun.tri.mode > my.trun.tri.dist$trun.tri.upper) { + (my.trun.tri.dist$pdf(my.trun.tri.dist$trun.tri.upper)) + } else { + (my.trun.tri.dist$pdf(my.trun.tri.dist$trun.tri.mode)) + } + while (success.count < my.sim.n) { + u.1 <- + runif( + n = 1, + min = my.trun.tri.dist$trun.tri.lower, + max = my.trun.tri.dist$trun.tri.upper + ) + u.2 <- + runif( + n = 1, + min = 0, + max = success.truntrimax + ) + if (u.2 <= my.trun.tri.dist$pdf(u.1)) { + success.count <- success.count + 1 + vec[success.count] <- u.1 + } + } + + ## Simulate n instances of a random variable with f(x) = truncated triangular by inverse cdf method (faster than by trial and error) + rand.unif <- runif(my.sim.n, min = 0, max = 1) + vec2 <- sapply(rand.unif, my.trun.tri.dist$inverse.cdf) + + print(paste("Simulation 1 Mean: ", mean(vec))) + print(paste("Simulation 1 Variance: ", var(vec))) + print(paste("Simulation 2 Mean: ", mean(vec2))) + print(paste("Simulation 2 Variance: ", var(vec2))) -## Simulate 10000 instances of a random variable with f(x) = truncated triangular by inverse cdf method (faster than by trial and error) -rand.unif <- runif(10000, min = 0, max = 1) -vec2 <- sapply(rand.unif, my.trun.tri.dist$inverse.cdf) + if (my.trun.tri.dist$trun.tri.mean < my.a + || my.trun.tri.dist$trun.tri.mean > my.b) { + print(paste("FAILED test 'lower <= mean <= upper': ", + my.a, + "<=", my.trun.tri.dist$trun.tri.mean, + "<=", my.b)) + } + if (my.trun.tri.dist$trun.tri.var < 0) { + print(paste("FAILED test 'variance is positive': ", + my.trun.tri.dist$trun.tri.var)) + } + + ## following tests are not statistically significant, but are a quick check to verify that + ## it is at least in the right ballpark for our test cases + if (abs((mean(vec2) - my.trun.tri.dist$trun.tri.mean)/my.trun.tri.dist$trun.tri.mean) > 0.01) { + print(paste("FAILED test 'simulated mean should be close to analytical mean': ", + abs((mean(vec2) - my.trun.tri.dist$trun.tri.mean)/my.trun.tri.dist$trun.tri.mean))) + } + if (abs((var(vec2) - my.trun.tri.dist$trun.tri.var) / my.trun.tri.dist$trun.tri.mode) > 0.01) { + print(paste("FAILED test 'simulated variance should be close to analytical variance': ", + abs((var(vec2) - my.trun.tri.dist$trun.tri.var) / my.trun.tri.dist$trun.tri.mode))) + } -## Make the truncated PDF compatible with vectors for plotting -trun.vector.pdf <- function(x) { - sapply(x, FUN = my.trun.tri.dist$pdf) + if (my.sim.plot) { + ## Make the truncated PDF compatible with vectors for plotting + trun.vector.pdf <- function(x) { + sapply(x, FUN = my.trun.tri.dist$pdf) + } + + ## Plot and describe the first simulation + hist(vec, + xlab = "x", + breaks = 20, + freq = FALSE) + + curve( + trun.vector.pdf, + from = my.sim.trunFrom, + to = my.sim.trunTo, + n = my.sim.n, + col = "red", + add = TRUE + ) + + ## Plot and describe the second simulation + hist(vec2, + xlab = "x", + breaks = 20, + freq = FALSE) + curve( + trun.vector.pdf, + from = my.sim.trunFrom, + to = my.sim.trunTo, + n = my.sim.n, + col = "red", + add = TRUE + ) + } } -## Plot and describe the first simulation -hist(vec, - xlab = "x", - breaks = (my.trun.tri.dist$trun.tri.upper - my.trun.tri.dist$trun.tri.lower) * 10, - freq = FALSE) - -curve( - trun.vector.pdf, - from = 2.99999, - to = 8.0001, - n = 100000, - col = "red", - add = TRUE +testcases <- list( + #c(L, a, M, b, U) + #c(0.5, 0.75, 0.50, 1.25, 2), + #c(0.5, 0.75, 0.55, 1.25, 2), + #c(0.5, 0.75, 0.749, 1.25, 2), + #c(0.5, 0.75, 0.75, 1.25, 2), + #c(0.5, 0.75, 0.7500001, 1.25, 2), + #c(0.5, 0.75, 1.00, 1.25, 2), + #c(0.5, 0.75, 1.249999, 1.25, 2), + #c(0.5, 0.75, 1.25, 1.25, 2), + #c(0.5, 0.75, 1.251, 1.25, 2), + #c(0.5, 0.75, 1.50, 1.25, 2), + #c(0.5, 0.75, 2.00, 1.25, 2), + c(2, 3, 2.5, 8, 9), + c(2, 3, 8.5, 8, 9), + c(2, 3, 7, 8, 9) ) -#lines(y = trun.tri.dist.range, x = trun.tri.dist.domain, col = "red") -print(paste("Simulated Mean 1: ", mean(vec))) -print(paste("Simulated Median 1: ", median(vec))) -print(paste("Simulated Variance 1: ", var(vec))) - -## Plot and describe the second simulation - -hist(vec2, - xlab = "x", - breaks = (my.trun.tri.dist$trun.tri.upper - my.trun.tri.dist$trun.tri.lower) * 10, - freq = FALSE) -curve( - trun.vector.pdf, - from = 2.99999, - to = 8.0001, - n = 100000, - col = "red", - add = TRUE -) -#lines(y = trun.tri.dist.range, x = trun.tri.dist.domain, col = "red") -print(paste("Simulated Mean 2: ", mean(vec2))) -print(paste("Simulated Median 2: ", median(vec2))) -print(paste("Simulated Variance 2: ", var(vec2))) +for (testcase in testcases) { + test(unlist(testcase)) +}