Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,9 @@ export(mtr_tpr)
export(mtr_true_negative_rate)
export(mtr_true_positive_rate)
export(mtr_youden_index)
export(mtr_mutual_info_score)
export(mtr_normalized_mutual_info_score)
export(mtr_adjusted_mutual_info_score)
importFrom(Rcpp,evalCpp)
importFrom(stats,complete.cases)
importFrom(stats,median)
Expand Down
59 changes: 59 additions & 0 deletions R/clustering.r
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
##' @title
##' Clustering Metrics Parameters
##'
##' @description
##' Documentation for shared parameters of functions that compute clustering
##' metrics.
##'
##' @param actual \code{[numeric]} The ground truth numeric vector.
##' @param predicted \code{[numeric]} The predicted numeric vector, where each
##' element in the vector is a prediction of the corresponding elements in
##' \code{actual}.
##' @name clustering_params
##' @include helper-functions.r
NULL


##' @title
##' Adjusted Mutual Information Score / Mututal Information Score
##'
##'
##' @description
##'
##' \code{mtr_mutual_info_score} measures the similarity, or mutual dependence
##' between two variable. The worst possible score is 0, higher values are
##' better.
##'
##'
##' @inheritParams clustering_params
##' @importFrom stats var
##' @seealso \code{\link{mtr_r2}}
##' @return A numeric scalar output
##' @author Phuc Nguyen
##' @examples
##'
##' act <- sample(1:10, 100, replace = T)
##' pred <- sample(1:10, 100, replace = T)
##' mtr_mutual_info_score(act, pred)
##'
##' act <- rep(c('a', 'b', 'c'), times = 4)
##' pred <- rep(c('a', 'b', 'c'), each = 4)
##' mtr_mutual_info_score(act, pred)
##'
##' @export
mtr_mutual_info_score <- function(actual, predicted) {
chec_empty_vec(actual)
check_equal_length(actual, predicted)
entropy(actual) + entropy(predicted) - joint_entropy(vec_1 = actual,
vec_2 = predicted)
}

mtr_normalized_mutual_info_score <- function(actual, predicted) {
mtr_mutual_info_score(actual = actual, predicted = predicted) /
mean(c(entropy(vec = actual), entropy(vec = predicted)))
}

mtr_adjusted_mutual_info_score <- function(actual, predicted) {
(mtr_mutual_info_score(actual, predicted) - expected_mutual_info(actual, predicted)) /
(mean(c(entropy(actual), entropy(predicted))) - expected_mutual_info(actual, predicted))
}
65 changes: 64 additions & 1 deletion R/helper-functions.r
Original file line number Diff line number Diff line change
@@ -1,5 +1,11 @@


chec_empty_vec <- function(vec) {
if (length(vec) == 0) {
stop("vector must have positive length.", call. = FALSE)
}

invisible()
}

check_equal_length <- function(actual, predicted) {

Expand Down Expand Up @@ -60,3 +66,60 @@ trapezoid <- function(x, y) {

sum(dx * height)
}

class_prob <- function(vec, class) {
chec_empty_vec(vec)
length(which(vec == class)) / length(vec)
}

entropy <- function(vec) {
chec_empty_vec(vec)
li = c()
for (cl in unique(vec)) {
m = class_prob(vec = vec, class = cl)
li = c(li, -1 * m * log(m))
}
etp = sum(li, na.rm = TRUE)
etp
}

joint_class_prob <- function(vec_1, vec_2, class_1, class_2) {
chec_empty_vec(vec_1)
check_equal_length(vec_1, vec_2)
length(which(vec_1 == class_1 & vec_2 == class_2)) / length(vec_1)
}

joint_entropy <- function(vec_1, vec_2) {
check_equal_length(vec_1, vec_2)
li = c()
for(cl_1 in unique(vec_1)) {
for(cl_2 in unique(vec_2)) {
m = joint_class_prob(vec_1 = vec_1, vec_2 = vec_2,
class_1 = cl_1, class_2 = cl_2)
li = c(li, - 1 * m * log(m))
}
}
joint_etp = sum(li, na.rm = TRUE)
joint_etp
}

expected_mutual_info <- function(vec_1, vec_2) {
check_equal_length(vec_1, vec_2)
N = length(vec_1)
li = c()
for (i in unique(vec_1)) {
a = length(which(vec_1 == i))
for (j in unique(vec_2)) {
b = length(which(vec_2 == j))
for (nij in max(a + b - N, 0, na.rm = TRUE): min(a, b, na.rm = TRUE)) {
li = c(li, (nij / N) *
log((N * nij) / (a * b)) *
(factorial(a) * factorial(b) * factorial(N - a) * factorial(N - b)) /
(factorial(N) * factorial(nij) * factorial(a - nij) * factorial(b - nij) * factorial(N - a - b + nij)))
}
}
}
emi = sum(li, na.rm = TRUE)
emi
}

24 changes: 22 additions & 2 deletions TODO.org
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@




* List of performance metrics
Expand All @@ -23,6 +23,8 @@ Metrics that built around confusion matrix:

- [X] Balanced Accuracy

- [ ] Balanced Error Rate

- [X] Positive Predicted Value (PPV) / Precision

- [ ] Average Precision
Expand All @@ -31,12 +33,18 @@ Metrics that built around confusion matrix:

- [X] False Omission Rate (FOR)

- [ ] Positive Likelihood

- [ ] Negative Likelihood

- [X] Prevalence

- [X] F1 Score

- [X] Matthews Correlation Coefficient (MCC)

- [ ] Discriminant Power

- [X] Informedness (Bookmaker Informedness - BM) / Youden Index (Youden's J Statistic)

- [X] Markedness (MK)
Expand Down Expand Up @@ -77,17 +85,29 @@ Proper scoring rule:

- [X] Mean Squared Error

- [ ] Normalized Mean Squared Error

- [X] Root Mean Squared Error

- [X] Mean Squared Logarithmic Error

- [X] Median Absolute Error

- [ ] Mean Absolute Percentage Error

- [ ] Mean Absolute Scaled Error

- [ ] Median Squared Error

- [X] R2 Score

- [ ] Adjusted R2 Score

- [ ] M-Estimators

** Clustering tasks

- [ ] Adjusted Mututal Information Score / Mutual Information Score
- [X] Adjusted Mututal Information Score / Mutual Information Score

- [ ] Adjusted Rand Score

Expand Down
29 changes: 29 additions & 0 deletions inst/tinytest/test-clustering.r
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@

## test correctness ------------------------------------------------------------

vec_a = c(0, 1, 2, 0, 3, 4, 5, 1)
vec_b = c(1, 1, 0, 0, 2, 2, 2, 2)

tinytest::expect_equal(
mtr_mutual_info_score(vec_a, vec_b),
target = 0.693147180559945,
tol = 1e-7
)

tinytest::expect_equal(
mtr_normalized_mutual_info_score(vec_a, vec_b),
# target = 0.5163977794943221,
# changed test value due to respective example in sklearn.metrics is
# for version 0.21. Below value is compatible with version 0.22.
target = 0.5,
tol = 1e-7
)

tinytest::expect_equal(
mtr_adjusted_mutual_info_score(vec_a, vec_b),
# target = -0.10526315789473674,
# changed test value due to respective example in sklearn.metrics is
# for version 0.21. Below value is compatible with version 0.22.
target = -0.1666666667,
tol = 1e-7
)