From e402e3eb9cb8484c21811f0dd4e5b83aa7ff29ca Mon Sep 17 00:00:00 2001
From: minhsphuc12 <phucnm.ec@gmail.com>
Date: Wed, 24 Jul 2019 22:47:02 +0700
Subject: [PATCH 1/6] add few item to todo list

---
 TODO.org | 24 +++++++++++++++++++++++-
 1 file changed, 23 insertions(+), 1 deletion(-)

diff --git a/TODO.org b/TODO.org
index 760e065..658d2c0 100644
--- a/TODO.org
+++ b/TODO.org
@@ -1,4 +1,4 @@
-
+
 
 
 * List of performance metrics
@@ -23,6 +23,8 @@ Metrics that built around confusion matrix:
 
 - [X] Balanced Accuracy
 
+- [ ] Balanced Error Rate
+
 - [X] Positive Predicted Value (PPV) / Precision
 
 - [ ] Average Precision
@@ -31,12 +33,20 @@ Metrics that built around confusion matrix:
 
 - [X] False Omission Rate (FOR)
 
+- [ ] Positive Likelihood 
+
+- [ ] Negative Likelihood
+
 - [X] Prevalence
 
 - [X] F1 Score
 
+- [ ] F Measure (Weighted Harmonic Mean Between Precision And Recall) 
+
 - [X] Matthews Correlation Coefficient (MCC)
 
+- [ ] Discriminant Power
+
 - [X] Informedness (Bookmaker Informedness - BM) / Youden Index (Youden's J Statistic)
 
 - [X] Markedness (MK)
@@ -77,14 +87,26 @@ Proper scoring rule:
 
 - [X] Mean Squared Error
 
+- [ ] Normalized Mean Squared Error
+
 - [X] Root Mean Squared Error
 
 - [X] Mean Squared Logarithmic Error
 
 - [X] Median Absolute Error
 
+- [ ] Mean Absolute Percentage Error
+
+- [ ] Mean Absolute Scaled Error
+
+- [ ] Median Squared Error
+
 - [X] R2 Score
 
+- [ ] Adjusted R2 Score
+
+- [ ] M-Estimators
+
 ** Clustering tasks
 
 - [ ] Adjusted Mututal Information Score / Mutual Information Score

From e5eca2d7eeebf896606e44d35269b9052af21e92 Mon Sep 17 00:00:00 2001
From: minhsphuc12 <phucnm.ec@gmail.com>
Date: Sat, 27 Jul 2019 22:36:37 +0700
Subject: [PATCH 2/6] remove F-measure line from todo list as already
 implemented

---
 TODO.org | 2 --
 1 file changed, 2 deletions(-)

diff --git a/TODO.org b/TODO.org
index 658d2c0..c29993d 100644
--- a/TODO.org
+++ b/TODO.org
@@ -41,8 +41,6 @@ Metrics that built around confusion matrix:
 
 - [X] F1 Score
 
-- [ ] F Measure (Weighted Harmonic Mean Between Precision And Recall) 
-
 - [X] Matthews Correlation Coefficient (MCC)
 
 - [ ] Discriminant Power

From 249af3aa58a2340c58c357c356ac72034783de08 Mon Sep 17 00:00:00 2001
From: minhsphuc12 <phucnm.ec@gmail.com>
Date: Sat, 27 Jul 2019 23:08:15 +0700
Subject: [PATCH 3/6] add code, helper function, tests for mutual info score

---
 R/clustering.r                  | 59 ++++++++++++++++++++++++++++++
 R/helper-functions.r            | 65 ++++++++++++++++++++++++++++++++-
 inst/tinytest/test-clustering.r | 23 ++++++++++++
 3 files changed, 146 insertions(+), 1 deletion(-)
 create mode 100644 R/clustering.r
 create mode 100644 inst/tinytest/test-clustering.r

diff --git a/R/clustering.r b/R/clustering.r
new file mode 100644
index 0000000..4380947
--- /dev/null
+++ b/R/clustering.r
@@ -0,0 +1,59 @@
+##' @title
+##' Clustering Metrics Parameters
+##'
+##' @description
+##' Documentation for shared parameters of functions that compute clustering
+##' metrics.
+##'
+##' @param actual \code{[numeric]} The ground truth numeric vector.
+##' @param predicted \code{[numeric]} The predicted numeric vector, where each
+##'     element in the vector is a prediction of the corresponding elements in
+##'     \code{actual}.
+##' @name clustering_params
+##' @include helper-functions.r
+NULL
+
+
+##' @title
+##' Adjusted Mutual Information Score / Mututal Information Score
+##'
+##'
+##' @description
+##'
+##' \code{mtr_mutual_info_score} measures the similarity, or mutual dependence 
+##' between two variable. The worst possible score is 0, higher values are 
+##' better.
+##' 
+##' 
+##' @inheritParams clustering_params
+##' @importFrom stats var
+##' @seealso \code{\link{mtr_r2}}
+##' @return A numeric scalar output
+##' @author Phuc Nguyen
+##' @examples
+##'
+##' act <- sample(1:10, 100, replace = T)
+##' pred <- sample(1:10, 100, replace = T)
+##' mtr_mutual_info_score(act, pred)
+##'
+##' act <- rep(c('a', 'b', 'c'), times = 4)
+##' pred <- rep(c('a', 'b', 'c'), each = 4)
+##' mtr_mutual_info_score(act, pred)
+##'
+##' @export
+mtr_mutual_info_score <- function(actual, predicted) {
+    chec_empty_vec(actual)
+    check_equal_length(actual, predicted)
+    entropy(actual) + entropy(predicted) - joint_entropy(vec_1 = actual, 
+                                                         vec_2 = predicted)
+}
+
+mtr_normalized_mutual_info_score <- function(actual, predicted) {
+    mtr_mutual_info_score(actual = actual, predicted = predicted) / 
+        mean(c(entropy(vec = actual), entropy(vec = predicted)))
+}
+
+mtr_adjusted_mutual_info_score <- function(actual, predicted) {
+    (mtr_mutual_info_score(actual, predicted) - expected_mutual_info(actual, predicted)) / 
+        (mean(c(entropy(actual), entropy(predicted))) - expected_mutual_info(actual, predicted))
+}
diff --git a/R/helper-functions.r b/R/helper-functions.r
index 12d9585..67125ed 100644
--- a/R/helper-functions.r
+++ b/R/helper-functions.r
@@ -1,5 +1,11 @@
 
-
+chec_empty_vec <- function(vec) {
+    if (length(vec) == 0) {
+        stop("vector must have positive length.", call. = FALSE)
+    }
+    
+    invisible()
+}
 
 check_equal_length <- function(actual, predicted) {
 
@@ -60,3 +66,60 @@ trapezoid <- function(x, y) {
 
     sum(dx * height)
 }
+
+class_prob <- function(vec, class) {
+    chec_empty_vec(vec)
+    length(which(vec == class)) / length(vec)
+}
+
+entropy <- function(vec) {
+    chec_empty_vec(vec)
+    li = c()
+    for (cl in unique(vec)) {
+        m = class_prob(vec = vec, class = cl)
+        li = c(li, -1 * m * log(m))
+    }
+    etp = sum(li, na.rm = TRUE)
+    etp
+}
+
+joint_class_prob <- function(vec_1, vec_2, class_1, class_2) {
+    chec_empty_vec(vec_1)
+    check_equal_length(vec_1, vec_2)
+    length(which(vec_1 == class_1 & vec_2 == class_2)) / length(vec_1)
+}
+
+joint_entropy <- function(vec_1, vec_2) {
+    check_equal_length(vec_1, vec_2)
+    li = c()
+    for(cl_1 in unique(vec_1)) {
+        for(cl_2 in unique(vec_2)) {
+            m = joint_class_prob(vec_1 = vec_1, vec_2 = vec_2, 
+                                 class_1 = cl_1, class_2 = cl_2)
+            li = c(li, - 1 * m * log(m))
+        }
+    }
+    joint_etp = sum(li, na.rm = TRUE)
+    joint_etp
+}
+
+expected_mutual_info <- function(vec_1, vec_2) {
+    check_equal_length(vec_1, vec_2)
+    N = length(vec_1)
+    li = c()
+    for (i in unique(vec_1)) {
+        a = length(which(vec_1 == i))
+        for (j in unique(vec_2)) {
+            b = length(which(vec_2 == j))
+            for (nij in max(a + b - N, 0, na.rm = TRUE): min(a, b, na.rm = TRUE)) {
+                li = c(li, (nij / N) * 
+                           log((N * nij) / (a * b)) * 
+                           (factorial(a) * factorial(b) * factorial(N - a) * factorial(N - b)) /
+                           (factorial(N) * factorial(nij) * factorial(a - nij) * factorial(b - nij) * factorial(N - a - b + nij)))
+            }
+        }
+    }
+    emi = sum(li, na.rm = TRUE)
+    emi
+}
+
diff --git a/inst/tinytest/test-clustering.r b/inst/tinytest/test-clustering.r
new file mode 100644
index 0000000..998e093
--- /dev/null
+++ b/inst/tinytest/test-clustering.r
@@ -0,0 +1,23 @@
+
+## test correctness ------------------------------------------------------------
+
+vec_a = c(0, 1, 2, 0, 3, 4, 5, 1)
+vec_b = c(1, 1, 0, 0, 2, 2, 2, 2)
+
+tinytest::expect_equal(
+    mtr_mutual_info_score(vec_a, vec_b),
+    target = 0.693147180559945,
+    tol = 1e-7
+)
+
+tinytest::expect_equal(
+    mtr_normalized_mutual_info_score(vec_a, vec_b),
+    target = 0.5163977794943221,
+    tol = 1e-7
+)
+
+tinytest::expect_equal(
+    mtr_adjusted_mutual_info_score(vec_a, vec_b),
+    target = -0.10526315789473674,
+    tol = 1e-7
+)

From c998ce2938c22f75d38cf27ef9b8a4c205247c53 Mon Sep 17 00:00:00 2001
From: minhsphuc12 <phucnm.ec@gmail.com>
Date: Mon, 29 Jul 2019 01:32:43 +0700
Subject: [PATCH 4/6] update test due to detecting error in sklearn
 implementation

---
 inst/tinytest/test-clustering.r | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/inst/tinytest/test-clustering.r b/inst/tinytest/test-clustering.r
index 998e093..97007c7 100644
--- a/inst/tinytest/test-clustering.r
+++ b/inst/tinytest/test-clustering.r
@@ -12,12 +12,18 @@ tinytest::expect_equal(
 
 tinytest::expect_equal(
     mtr_normalized_mutual_info_score(vec_a, vec_b),
-    target = 0.5163977794943221,
+    # target = 0.5163977794943221,
+    # changed test value due to respective example in sklearn.metrics is 
+    # wrongly implemented
+    target = 0.5,
     tol = 1e-7
 )
 
 tinytest::expect_equal(
     mtr_adjusted_mutual_info_score(vec_a, vec_b),
-    target = -0.10526315789473674,
+    # target = -0.10526315789473674,
+    # changed test value due to respective example in sklearn.metrics is 
+    # wrongly implemented
+    target = -0.1666666667,
     tol = 1e-7
 )

From 3c27f8ac090b4e682f5adbe068916e9bba88e289 Mon Sep 17 00:00:00 2001
From: minhsphuc12 <phucnm.ec@gmail.com>
Date: Mon, 29 Jul 2019 01:36:48 +0700
Subject: [PATCH 5/6] update namespace and TODO list

---
 NAMESPACE | 3 +++
 TODO.org  | 2 +-
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/NAMESPACE b/NAMESPACE
index 8442b94..77b6ef5 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -67,6 +67,9 @@ export(mtr_tpr)
 export(mtr_true_negative_rate)
 export(mtr_true_positive_rate)
 export(mtr_youden_index)
+export(mtr_mutual_info_score)
+export(mtr_normalized_mutual_info_score)
+export(mtr_adjusted_mutual_info_score)
 importFrom(Rcpp,evalCpp)
 importFrom(stats,complete.cases)
 importFrom(stats,median)
diff --git a/TODO.org b/TODO.org
index c29993d..fb2ff27 100644
--- a/TODO.org
+++ b/TODO.org
@@ -107,7 +107,7 @@ Proper scoring rule:
 
 ** Clustering tasks
 
-- [ ] Adjusted Mututal Information Score / Mutual Information Score
+- [X] Adjusted Mututal Information Score / Mutual Information Score
 
 - [ ] Adjusted Rand Score
 

From 679c0cdeaf52a876e37829b7a6693392ab913e95 Mon Sep 17 00:00:00 2001
From: minhsphuc12 <phucnm.ec@gmail.com>
Date: Mon, 29 Jul 2019 19:54:48 +0700
Subject: [PATCH 6/6] fix comment on why test value is not the same as python
 version

---
 inst/tinytest/test-clustering.r | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/inst/tinytest/test-clustering.r b/inst/tinytest/test-clustering.r
index 97007c7..404d25b 100644
--- a/inst/tinytest/test-clustering.r
+++ b/inst/tinytest/test-clustering.r
@@ -14,7 +14,7 @@ tinytest::expect_equal(
     mtr_normalized_mutual_info_score(vec_a, vec_b),
     # target = 0.5163977794943221,
     # changed test value due to respective example in sklearn.metrics is 
-    # wrongly implemented
+    # for version 0.21. Below value is compatible with version 0.22.
     target = 0.5,
     tol = 1e-7
 )
@@ -23,7 +23,7 @@ tinytest::expect_equal(
     mtr_adjusted_mutual_info_score(vec_a, vec_b),
     # target = -0.10526315789473674,
     # changed test value due to respective example in sklearn.metrics is 
-    # wrongly implemented
+    # for version 0.21. Below value is compatible with version 0.22.
     target = -0.1666666667,
     tol = 1e-7
 )