From 19e09c79ca828380dedbc43ff0e40b29502efd80 Mon Sep 17 00:00:00 2001
From: ML Metrics Team <ml-metrics-dev@google.com>
Date: Tue, 17 Mar 2026 13:50:02 -0700
Subject: [PATCH] internal change

PiperOrigin-RevId: 885192332
---
 ml_metrics/_src/metrics/classification.py     | 1051 ++++++++++++++---
 .../_src/metrics/classification_test.py       |    6 -
 ml_metrics/_src/metrics/retrieval.py          |  390 +++++-
 ml_metrics/_src/metrics/retrieval_test.py     |    6 -
 ml_metrics/_src/metrics/text.py               |   16 +-
 ml_metrics/_src/metrics/utils.py              |    6 +-
 6 files changed, 1239 insertions(+), 236 deletions(-)

diff --git a/ml_metrics/_src/metrics/classification.py b/ml_metrics/_src/metrics/classification.py
index fbab66ba..1825ed81 100644
--- a/ml_metrics/_src/metrics/classification.py
+++ b/ml_metrics/_src/metrics/classification.py
@@ -22,38 +22,13 @@
 from ml_metrics._src.aggregates import classification
 from ml_metrics._src.aggregates import types
 from ml_metrics._src.metrics import utils
+from ml_metrics.google.tools.signal_registry import registry
 from ml_metrics._src.tools.telemetry import telemetry
 import numpy as np
 
 
 _StrOrMetric = classification.ConfusionMatrixMetric | str
 
-_METRIC_PYDOC_POSTFIX = """
-
-  Args:
-    y_true: array of sample's true labels
-    y_pred: array of sample's label predictions
-    pos_label: The class to report if average='binary' and the data is binary.
-      By default it is 1. Please set in case this default is not a valid label.
-      If the data are multiclass or multilabel, this will be ignored.
-    input_type: one input type from types.InputType
-    average: one average  type from types.AverageType
-    vocab: an external vocabulary that maps categorical value to integer class
-      id. This is required if computed distributed (when merge_accumulators is
-      called) and the average is macro where the class id mapping needs to be
-      stable.
-    dtype: dtype of the confusion matrix and all computations. Default to None
-      as it is inferred.
-    k_list: k_list is only applicable for average_type != Samples and
-      multiclass/multioutput input types. It is a list of topk each of which
-      slices y_pred by y_pred[:topk] assuming the predictions are sorted in
-      descending order. Default 'None' means consider all outputs in the
-      prediction.
-
-  Returns:
-    Tuple with metric value(s)
-"""
-
 CalibrationHistogramResult = collections.namedtuple(
     'CalibrationHistogramResult',
     ('num_examples_hist', 'labels_hist', 'predictions_hist', 'bin_edges'),
@@ -61,7 +36,10 @@
 
 
 # TODO: b/368067018 - Inherit from ml_metrics._src.aggregates.stats.Histogram.
-@telemetry.class_monitor(category=telemetry.CATEGORY.METRIC)
+@registry.register_signal(
+    signal_modality=registry.SignalModality.OTHER,
+    usage_category=telemetry.CATEGORY.METRIC,
+)
 @dataclasses.dataclass
 class CalibrationHistogram(chainable.MergeableMetric):
   """Computes the Histogram of the inputs.
@@ -173,7 +151,10 @@ def result(self) -> CalibrationHistogramResult:
     )
 
 
-@telemetry.class_monitor(category=telemetry.CATEGORY.METRIC)
+@registry.register_signal(
+    signal_modality=registry.SignalModality.OTHER,
+    usage_category=telemetry.CATEGORY.METRIC,
+)
 class ClassificationAggFn(chainable.AggregateFn):
   """Wrapper over the Classification AggFn classes."""
 
@@ -238,6 +219,10 @@ def merge_states(self, states):
     return self.agg_fn.merge_states(states)
 
 
+@registry.register_signal(
+    signal_modality=registry.SignalModality.OTHER,
+    enable_telemetry=False,
+)
 def classification_metrics(
     metrics: Sequence[_StrOrMetric] | _StrOrMetric,
     *,
@@ -289,6 +274,10 @@ def classification_metrics(
   )(y_true, y_pred)
 
 
+@registry.register_signal(
+    signal_modality=registry.SignalModality.OTHER,
+    enable_telemetry=False,
+)
 def precision(
     y_true,
     y_pred,
@@ -299,7 +288,31 @@ def precision(
     dtype: type[Any] | None = None,
     k_list: Sequence[int] | None = None,
 ) -> tuple[float, ...]:
-  """Compute Precision classification metric."""
+  """Compute Precision classification metric.
+
+  Args:
+    y_true: array of sample's true labels
+    y_pred: array of sample's label predictions
+    pos_label: The class to report if average='binary' and the data is binary.
+      By default it is 1. Please set in case this default is not a valid label.
+      If the data are multiclass or multilabel, this will be ignored.
+    input_type: one input type from types.InputType
+    average: one average  type from types.AverageType
+    vocab: an external vocabulary that maps categorical value to integer class
+      id. This is required if computed distributed (when merge_accumulators is
+      called) and the average is macro where the class id mapping needs to be
+      stable.
+    dtype: dtype of the confusion matrix and all computations. Default to None
+      as it is inferred.
+    k_list: k_list is only applicable for average_type != Samples and
+      multiclass/multioutput input types. It is a list of topk each of which
+      slices y_pred by y_pred[:topk] assuming the predictions are sorted in
+      descending order. Default 'None' means consider all outputs in the
+      prediction.
+
+  Returns:
+    Tuple with metric value(s)
+  """
   utils.verify_input(y_true, y_pred, average, input_type, vocab, pos_label)
   return ClassificationAggFn(
       metrics=classification.ConfusionMatrixMetric.PRECISION,
@@ -312,9 +325,10 @@ def precision(
   )(y_true, y_pred)
 
 
-precision.__doc__ += _METRIC_PYDOC_POSTFIX
-
-
+@registry.register_signal(
+    signal_modality=registry.SignalModality.OTHER,
+    enable_telemetry=False,
+)
 def ppv(
     y_true,
     y_pred,
@@ -325,7 +339,31 @@ def ppv(
     dtype: type[Any] | None = None,
     k_list: Sequence[int] | None = None,
 ) -> tuple[float, ...]:
-  """Compute PPV classification metric."""
+  """Compute PPV classification metric.
+
+  Args:
+    y_true: array of sample's true labels
+    y_pred: array of sample's label predictions
+    pos_label: The class to report if average='binary' and the data is binary.
+      By default it is 1. Please set in case this default is not a valid label.
+      If the data are multiclass or multilabel, this will be ignored.
+    input_type: one input type from types.InputType
+    average: one average  type from types.AverageType
+    vocab: an external vocabulary that maps categorical value to integer class
+      id. This is required if computed distributed (when merge_accumulators is
+      called) and the average is macro where the class id mapping needs to be
+      stable.
+    dtype: dtype of the confusion matrix and all computations. Default to None
+      as it is inferred.
+    k_list: k_list is only applicable for average_type != Samples and
+      multiclass/multioutput input types. It is a list of topk each of which
+      slices y_pred by y_pred[:topk] assuming the predictions are sorted in
+      descending order. Default 'None' means consider all outputs in the
+      prediction.
+
+  Returns:
+    Tuple with metric value(s)
+  """
   utils.verify_input(y_true, y_pred, average, input_type, vocab, pos_label)
   return ClassificationAggFn(
       metrics=classification.ConfusionMatrixMetric.PPV,
@@ -338,9 +376,10 @@ def ppv(
   )(y_true, y_pred)
 
 
-ppv.__doc__ += _METRIC_PYDOC_POSTFIX
-
-
+@registry.register_signal(
+    signal_modality=registry.SignalModality.OTHER,
+    enable_telemetry=False,
+)
 def recall(
     y_true,
     y_pred,
@@ -351,7 +390,31 @@ def recall(
     dtype: type[Any] | None = None,
     k_list: Sequence[int] | None = None,
 ) -> tuple[float, ...]:
-  """Compute Recall classification metric."""
+  """Compute Recall classification metric.
+
+  Args:
+    y_true: array of sample's true labels
+    y_pred: array of sample's label predictions
+    pos_label: The class to report if average='binary' and the data is binary.
+      By default it is 1. Please set in case this default is not a valid label.
+      If the data are multiclass or multilabel, this will be ignored.
+    input_type: one input type from types.InputType
+    average: one average  type from types.AverageType
+    vocab: an external vocabulary that maps categorical value to integer class
+      id. This is required if computed distributed (when merge_accumulators is
+      called) and the average is macro where the class id mapping needs to be
+      stable.
+    dtype: dtype of the confusion matrix and all computations. Default to None
+      as it is inferred.
+    k_list: k_list is only applicable for average_type != Samples and
+      multiclass/multioutput input types. It is a list of topk each of which
+      slices y_pred by y_pred[:topk] assuming the predictions are sorted in
+      descending order. Default 'None' means consider all outputs in the
+      prediction.
+
+  Returns:
+    Tuple with metric value(s)
+  """
   utils.verify_input(y_true, y_pred, average, input_type, vocab, pos_label)
   return ClassificationAggFn(
       metrics=classification.ConfusionMatrixMetric.RECALL,
@@ -364,9 +427,10 @@ def recall(
   )(y_true, y_pred)
 
 
-recall.__doc__ += _METRIC_PYDOC_POSTFIX
-
-
+@registry.register_signal(
+    signal_modality=registry.SignalModality.OTHER,
+    enable_telemetry=False,
+)
 def f1_score(
     y_true,
     y_pred,
@@ -377,7 +441,31 @@ def f1_score(
     dtype: type[Any] | None = None,
     k_list: Sequence[int] | None = None,
 ) -> tuple[float, ...]:
-  """Compute F1 Score classification metric."""
+  """Compute F1 Score classification metric.
+
+  Args:
+    y_true: array of sample's true labels
+    y_pred: array of sample's label predictions
+    pos_label: The class to report if average='binary' and the data is binary.
+      By default it is 1. Please set in case this default is not a valid label.
+      If the data are multiclass or multilabel, this will be ignored.
+    input_type: one input type from types.InputType
+    average: one average  type from types.AverageType
+    vocab: an external vocabulary that maps categorical value to integer class
+      id. This is required if computed distributed (when merge_accumulators is
+      called) and the average is macro where the class id mapping needs to be
+      stable.
+    dtype: dtype of the confusion matrix and all computations. Default to None
+      as it is inferred.
+    k_list: k_list is only applicable for average_type != Samples and
+      multiclass/multioutput input types. It is a list of topk each of which
+      slices y_pred by y_pred[:topk] assuming the predictions are sorted in
+      descending order. Default 'None' means consider all outputs in the
+      prediction.
+
+  Returns:
+    Tuple with metric value(s)
+  """
   utils.verify_input(y_true, y_pred, average, input_type, vocab, pos_label)
   return ClassificationAggFn(
       metrics=classification.ConfusionMatrixMetric.F1_SCORE,
@@ -390,9 +478,10 @@ def f1_score(
   )(y_true, y_pred)
 
 
-f1_score.__doc__ += _METRIC_PYDOC_POSTFIX
-
-
+@registry.register_signal(
+    signal_modality=registry.SignalModality.OTHER,
+    enable_telemetry=False,
+)
 def accuracy(
     y_true,
     y_pred,
@@ -403,7 +492,31 @@ def accuracy(
     dtype: type[Any] | None = None,
     k_list: Sequence[int] | None = None,
 ) -> tuple[float, ...]:
-  """Compute Accuracy classification metric."""
+  """Compute Accuracy classification metric.
+
+  Args:
+    y_true: array of sample's true labels
+    y_pred: array of sample's label predictions
+    pos_label: The class to report if average='binary' and the data is binary.
+      By default it is 1. Please set in case this default is not a valid label.
+      If the data are multiclass or multilabel, this will be ignored.
+    input_type: one input type from types.InputType
+    average: one average  type from types.AverageType
+    vocab: an external vocabulary that maps categorical value to integer class
+      id. This is required if computed distributed (when merge_accumulators is
+      called) and the average is macro where the class id mapping needs to be
+      stable.
+    dtype: dtype of the confusion matrix and all computations. Default to None
+      as it is inferred.
+    k_list: k_list is only applicable for average_type != Samples and
+      multiclass/multioutput input types. It is a list of topk each of which
+      slices y_pred by y_pred[:topk] assuming the predictions are sorted in
+      descending order. Default 'None' means consider all outputs in the
+      prediction.
+
+  Returns:
+    Tuple with metric value(s)
+  """
   utils.verify_input(y_true, y_pred, average, input_type, vocab, pos_label)
   return ClassificationAggFn(
       metrics=classification.ConfusionMatrixMetric.ACCURACY,
@@ -416,9 +529,10 @@ def accuracy(
   )(y_true, y_pred)
 
 
-accuracy.__doc__ += _METRIC_PYDOC_POSTFIX
-
-
+@registry.register_signal(
+    signal_modality=registry.SignalModality.OTHER,
+    enable_telemetry=False,
+)
 def binary_accuracy(
     y_true,
     y_pred,
@@ -429,7 +543,31 @@ def binary_accuracy(
     dtype: type[Any] | None = None,
     k_list: Sequence[int] | None = None,
 ) -> tuple[float, ...]:
-  """Compute Binary Accuracy classification metric."""
+  """Compute Binary Accuracy classification metric.
+
+  Args:
+    y_true: array of sample's true labels
+    y_pred: array of sample's label predictions
+    pos_label: The class to report if average='binary' and the data is binary.
+      By default it is 1. Please set in case this default is not a valid label.
+      If the data are multiclass or multilabel, this will be ignored.
+    input_type: one input type from types.InputType
+    average: one average  type from types.AverageType
+    vocab: an external vocabulary that maps categorical value to integer class
+      id. This is required if computed distributed (when merge_accumulators is
+      called) and the average is macro where the class id mapping needs to be
+      stable.
+    dtype: dtype of the confusion matrix and all computations. Default to None
+      as it is inferred.
+    k_list: k_list is only applicable for average_type != Samples and
+      multiclass/multioutput input types. It is a list of topk each of which
+      slices y_pred by y_pred[:topk] assuming the predictions are sorted in
+      descending order. Default 'None' means consider all outputs in the
+      prediction.
+
+  Returns:
+    Tuple with metric value(s)
+  """
   utils.verify_input(y_true, y_pred, average, input_type, vocab, pos_label)
   return ClassificationAggFn(
       metrics=classification.ConfusionMatrixMetric.BINARY_ACCURACY,
@@ -442,9 +580,10 @@ def binary_accuracy(
   )(y_true, y_pred)
 
 
-binary_accuracy.__doc__ += _METRIC_PYDOC_POSTFIX
-
-
+@registry.register_signal(
+    signal_modality=registry.SignalModality.OTHER,
+    enable_telemetry=False,
+)
 def sensitivity(
     y_true,
     y_pred,
@@ -455,7 +594,31 @@ def sensitivity(
     dtype: type[Any] | None = None,
     k_list: Sequence[int] | None = None,
 ) -> tuple[float, ...]:
-  """Compute Sensitivity classification metric."""
+  """Compute Sensitivity classification metric.
+
+  Args:
+    y_true: array of sample's true labels
+    y_pred: array of sample's label predictions
+    pos_label: The class to report if average='binary' and the data is binary.
+      By default it is 1. Please set in case this default is not a valid label.
+      If the data are multiclass or multilabel, this will be ignored.
+    input_type: one input type from types.InputType
+    average: one average  type from types.AverageType
+    vocab: an external vocabulary that maps categorical value to integer class
+      id. This is required if computed distributed (when merge_accumulators is
+      called) and the average is macro where the class id mapping needs to be
+      stable.
+    dtype: dtype of the confusion matrix and all computations. Default to None
+      as it is inferred.
+    k_list: k_list is only applicable for average_type != Samples and
+      multiclass/multioutput input types. It is a list of topk each of which
+      slices y_pred by y_pred[:topk] assuming the predictions are sorted in
+      descending order. Default 'None' means consider all outputs in the
+      prediction.
+
+  Returns:
+    Tuple with metric value(s)
+  """
   utils.verify_input(y_true, y_pred, average, input_type, vocab, pos_label)
   return ClassificationAggFn(
       metrics=classification.ConfusionMatrixMetric.SENSITIVITY,
@@ -468,9 +631,10 @@ def sensitivity(
   )(y_true, y_pred)
 
 
-sensitivity.__doc__ += _METRIC_PYDOC_POSTFIX
-
-
+@registry.register_signal(
+    signal_modality=registry.SignalModality.OTHER,
+    enable_telemetry=False,
+)
 def tpr(
     y_true,
     y_pred,
@@ -481,7 +645,31 @@ def tpr(
     dtype: type[Any] | None = None,
     k_list: Sequence[int] | None = None,
 ) -> tuple[float, ...]:
-  """Compute TPR (True Positive rate/sensitivity) classification metric."""
+  """Compute TPR (True Positive rate/sensitivity) classification metric.
+
+  Args:
+    y_true: array of sample's true labels
+    y_pred: array of sample's label predictions
+    pos_label: The class to report if average='binary' and the data is binary.
+      By default it is 1. Please set in case this default is not a valid label.
+      If the data are multiclass or multilabel, this will be ignored.
+    input_type: one input type from types.InputType
+    average: one average  type from types.AverageType
+    vocab: an external vocabulary that maps categorical value to integer class
+      id. This is required if computed distributed (when merge_accumulators is
+      called) and the average is macro where the class id mapping needs to be
+      stable.
+    dtype: dtype of the confusion matrix and all computations. Default to None
+      as it is inferred.
+    k_list: k_list is only applicable for average_type != Samples and
+      multiclass/multioutput input types. It is a list of topk each of which
+      slices y_pred by y_pred[:topk] assuming the predictions are sorted in
+      descending order. Default 'None' means consider all outputs in the
+      prediction.
+
+  Returns:
+    Tuple with metric value(s)
+  """
   utils.verify_input(y_true, y_pred, average, input_type, vocab, pos_label)
   return ClassificationAggFn(
       metrics=classification.ConfusionMatrixMetric.TPR,
@@ -494,9 +682,10 @@ def tpr(
   )(y_true, y_pred)
 
 
-tpr.__doc__ += _METRIC_PYDOC_POSTFIX
-
-
+@registry.register_signal(
+    signal_modality=registry.SignalModality.OTHER,
+    enable_telemetry=False,
+)
 def specificity(
     y_true,
     y_pred,
@@ -507,22 +696,47 @@ def specificity(
     dtype: type[Any] | None = None,
     k_list: Sequence[int] | None = None,
 ) -> tuple[float, ...]:
-  """Compute Specificity classification metric."""
-  utils.verify_input(y_true, y_pred, average, input_type, vocab, pos_label)
-  return ClassificationAggFn(
-      metrics=classification.ConfusionMatrixMetric.SPECIFICITY,
-      pos_label=pos_label,
-      input_type=input_type,
-      average=average,
-      vocab=vocab,
-      dtype=dtype,
-      k_list=k_list,
-  )(y_true, y_pred)
-
-
-specificity.__doc__ += _METRIC_PYDOC_POSTFIX
+  """Compute Specificity classification metric.
 
+  Args:
+    y_true: array of sample's true labels
+    y_pred: array of sample's label predictions
+    pos_label: The class to report if average='binary' and the data is binary.
+      By default it is 1. Please set in case this default is not a valid label.
+      If the data are multiclass or multilabel, this will be ignored.
+    input_type: one input type from types.InputType
+    average: one average  type from types.AverageType
+    vocab: an external vocabulary that maps categorical value to integer class
+      id. This is required if computed distributed (when merge_accumulators is
+      called) and the average is macro where the class id mapping needs to be
+      stable.
+    dtype: dtype of the confusion matrix and all computations. Default to None
+      as it is inferred.
+    k_list: k_list is only applicable for average_type != Samples and
+      multiclass/multioutput input types. It is a list of topk each of which
+      slices y_pred by y_pred[:topk] assuming the predictions are sorted in
+      descending order. Default 'None' means consider all outputs in the
+      prediction.
+
+  Returns:
+    Tuple with metric value(s)
+  """
+  utils.verify_input(y_true, y_pred, average, input_type, vocab, pos_label)
+  return ClassificationAggFn(
+      metrics=classification.ConfusionMatrixMetric.SPECIFICITY,
+      pos_label=pos_label,
+      input_type=input_type,
+      average=average,
+      vocab=vocab,
+      dtype=dtype,
+      k_list=k_list,
+  )(y_true, y_pred)
 
+
+@registry.register_signal(
+    signal_modality=registry.SignalModality.OTHER,
+    enable_telemetry=False,
+)
 def tnr(
     y_true,
     y_pred,
@@ -533,7 +747,31 @@ def tnr(
     dtype: type[Any] | None = None,
     k_list: Sequence[int] | None = None,
 ) -> tuple[float, ...]:
-  """Compute TNR (True negative rate) classification metric."""
+  """Compute TNR (True negative rate) classification metric.
+
+  Args:
+    y_true: array of sample's true labels
+    y_pred: array of sample's label predictions
+    pos_label: The class to report if average='binary' and the data is binary.
+      By default it is 1. Please set in case this default is not a valid label.
+      If the data are multiclass or multilabel, this will be ignored.
+    input_type: one input type from types.InputType
+    average: one average  type from types.AverageType
+    vocab: an external vocabulary that maps categorical value to integer class
+      id. This is required if computed distributed (when merge_accumulators is
+      called) and the average is macro where the class id mapping needs to be
+      stable.
+    dtype: dtype of the confusion matrix and all computations. Default to None
+      as it is inferred.
+    k_list: k_list is only applicable for average_type != Samples and
+      multiclass/multioutput input types. It is a list of topk each of which
+      slices y_pred by y_pred[:topk] assuming the predictions are sorted in
+      descending order. Default 'None' means consider all outputs in the
+      prediction.
+
+  Returns:
+    Tuple with metric value(s)
+  """
   utils.verify_input(y_true, y_pred, average, input_type, vocab, pos_label)
   return ClassificationAggFn(
       metrics=classification.ConfusionMatrixMetric.TNR,
@@ -546,9 +784,10 @@ def tnr(
   )(y_true, y_pred)
 
 
-tnr.__doc__ += _METRIC_PYDOC_POSTFIX
-
-
+@registry.register_signal(
+    signal_modality=registry.SignalModality.OTHER,
+    enable_telemetry=False,
+)
 def fall_out(
     y_true,
     y_pred,
@@ -559,7 +798,31 @@ def fall_out(
     dtype: type[Any] | None = None,
     k_list: Sequence[int] | None = None,
 ) -> tuple[float, ...]:
-  """Compute Fall-out classification metric."""
+  """Compute Fall-out classification metric.
+
+  Args:
+    y_true: array of sample's true labels
+    y_pred: array of sample's label predictions
+    pos_label: The class to report if average='binary' and the data is binary.
+      By default it is 1. Please set in case this default is not a valid label.
+      If the data are multiclass or multilabel, this will be ignored.
+    input_type: one input type from types.InputType
+    average: one average  type from types.AverageType
+    vocab: an external vocabulary that maps categorical value to integer class
+      id. This is required if computed distributed (when merge_accumulators is
+      called) and the average is macro where the class id mapping needs to be
+      stable.
+    dtype: dtype of the confusion matrix and all computations. Default to None
+      as it is inferred.
+    k_list: k_list is only applicable for average_type != Samples and
+      multiclass/multioutput input types. It is a list of topk each of which
+      slices y_pred by y_pred[:topk] assuming the predictions are sorted in
+      descending order. Default 'None' means consider all outputs in the
+      prediction.
+
+  Returns:
+    Tuple with metric value(s)
+  """
   utils.verify_input(y_true, y_pred, average, input_type, vocab, pos_label)
   return ClassificationAggFn(
       metrics=classification.ConfusionMatrixMetric.FALL_OUT,
@@ -572,9 +835,10 @@ def fall_out(
   )(y_true, y_pred)
 
 
-fall_out.__doc__ += _METRIC_PYDOC_POSTFIX
-
-
+@registry.register_signal(
+    signal_modality=registry.SignalModality.OTHER,
+    enable_telemetry=False,
+)
 def fpr(
     y_true,
     y_pred,
@@ -585,7 +849,31 @@ def fpr(
     dtype: type[Any] | None = None,
     k_list: Sequence[int] | None = None,
 ) -> tuple[float, ...]:
-  """Compute FPR (False Positive rate) classification metric."""
+  """Compute FPR (False Positive rate) classification metric.
+
+  Args:
+    y_true: array of sample's true labels
+    y_pred: array of sample's label predictions
+    pos_label: The class to report if average='binary' and the data is binary.
+      By default it is 1. Please set in case this default is not a valid label.
+      If the data are multiclass or multilabel, this will be ignored.
+    input_type: one input type from types.InputType
+    average: one average  type from types.AverageType
+    vocab: an external vocabulary that maps categorical value to integer class
+      id. This is required if computed distributed (when merge_accumulators is
+      called) and the average is macro where the class id mapping needs to be
+      stable.
+    dtype: dtype of the confusion matrix and all computations. Default to None
+      as it is inferred.
+    k_list: k_list is only applicable for average_type != Samples and
+      multiclass/multioutput input types. It is a list of topk each of which
+      slices y_pred by y_pred[:topk] assuming the predictions are sorted in
+      descending order. Default 'None' means consider all outputs in the
+      prediction.
+
+  Returns:
+    Tuple with metric value(s)
+  """
   utils.verify_input(y_true, y_pred, average, input_type, vocab, pos_label)
   return ClassificationAggFn(
       metrics=classification.ConfusionMatrixMetric.FPR,
@@ -598,9 +886,10 @@ def fpr(
   )(y_true, y_pred)
 
 
-fpr.__doc__ += _METRIC_PYDOC_POSTFIX
-
-
+@registry.register_signal(
+    signal_modality=registry.SignalModality.OTHER,
+    enable_telemetry=False,
+)
 def miss_rate(
     y_true,
     y_pred,
@@ -611,7 +900,31 @@ def miss_rate(
     dtype: type[Any] | None = None,
     k_list: Sequence[int] | None = None,
 ) -> tuple[float, ...]:
-  """Compute Miss Rate classification metric."""
+  """Compute Miss Rate classification metric.
+
+  Args:
+    y_true: array of sample's true labels
+    y_pred: array of sample's label predictions
+    pos_label: The class to report if average='binary' and the data is binary.
+      By default it is 1. Please set in case this default is not a valid label.
+      If the data are multiclass or multilabel, this will be ignored.
+    input_type: one input type from types.InputType
+    average: one average  type from types.AverageType
+    vocab: an external vocabulary that maps categorical value to integer class
+      id. This is required if computed distributed (when merge_accumulators is
+      called) and the average is macro where the class id mapping needs to be
+      stable.
+    dtype: dtype of the confusion matrix and all computations. Default to None
+      as it is inferred.
+    k_list: k_list is only applicable for average_type != Samples and
+      multiclass/multioutput input types. It is a list of topk each of which
+      slices y_pred by y_pred[:topk] assuming the predictions are sorted in
+      descending order. Default 'None' means consider all outputs in the
+      prediction.
+
+  Returns:
+    Tuple with metric value(s)
+  """
   utils.verify_input(y_true, y_pred, average, input_type, vocab, pos_label)
   return ClassificationAggFn(
       metrics=classification.ConfusionMatrixMetric.MISS_RATE,
@@ -624,9 +937,10 @@ def miss_rate(
   )(y_true, y_pred)
 
 
-miss_rate.__doc__ += _METRIC_PYDOC_POSTFIX
-
-
+@registry.register_signal(
+    signal_modality=registry.SignalModality.OTHER,
+    enable_telemetry=False,
+)
 def fnr(
     y_true,
     y_pred,
@@ -637,7 +951,31 @@ def fnr(
     dtype: type[Any] | None = None,
     k_list: Sequence[int] | None = None,
 ) -> tuple[float, ...]:
-  """Compute FNR (False Negative Rate) classification metric."""
+  """Compute FNR (False Negative Rate) classification metric.
+
+  Args:
+    y_true: array of sample's true labels
+    y_pred: array of sample's label predictions
+    pos_label: The class to report if average='binary' and the data is binary.
+      By default it is 1. Please set in case this default is not a valid label.
+      If the data are multiclass or multilabel, this will be ignored.
+    input_type: one input type from types.InputType
+    average: one average  type from types.AverageType
+    vocab: an external vocabulary that maps categorical value to integer class
+      id. This is required if computed distributed (when merge_accumulators is
+      called) and the average is macro where the class id mapping needs to be
+      stable.
+    dtype: dtype of the confusion matrix and all computations. Default to None
+      as it is inferred.
+    k_list: k_list is only applicable for average_type != Samples and
+      multiclass/multioutput input types. It is a list of topk each of which
+      slices y_pred by y_pred[:topk] assuming the predictions are sorted in
+      descending order. Default 'None' means consider all outputs in the
+      prediction.
+
+  Returns:
+    Tuple with metric value(s)
+  """
   utils.verify_input(y_true, y_pred, average, input_type, vocab, pos_label)
   return ClassificationAggFn(
       metrics=classification.ConfusionMatrixMetric.FNR,
@@ -650,9 +988,10 @@ def fnr(
   )(y_true, y_pred)
 
 
-fnr.__doc__ += _METRIC_PYDOC_POSTFIX
-
-
+@registry.register_signal(
+    signal_modality=registry.SignalModality.OTHER,
+    enable_telemetry=False,
+)
 def negative_predictive_value(
     y_true,
     y_pred,
@@ -663,7 +1002,31 @@ def negative_predictive_value(
     dtype: type[Any] | None = None,
     k_list: Sequence[int] | None = None,
 ) -> tuple[float, ...]:
-  """Compute Negative Predictive Value classification metric."""
+  """Compute Negative Predictive Value classification metric.
+
+  Args:
+    y_true: array of sample's true labels
+    y_pred: array of sample's label predictions
+    pos_label: The class to report if average='binary' and the data is binary.
+      By default it is 1. Please set in case this default is not a valid label.
+      If the data are multiclass or multilabel, this will be ignored.
+    input_type: one input type from types.InputType
+    average: one average  type from types.AverageType
+    vocab: an external vocabulary that maps categorical value to integer class
+      id. This is required if computed distributed (when merge_accumulators is
+      called) and the average is macro where the class id mapping needs to be
+      stable.
+    dtype: dtype of the confusion matrix and all computations. Default to None
+      as it is inferred.
+    k_list: k_list is only applicable for average_type != Samples and
+      multiclass/multioutput input types. It is a list of topk each of which
+      slices y_pred by y_pred[:topk] assuming the predictions are sorted in
+      descending order. Default 'None' means consider all outputs in the
+      prediction.
+
+  Returns:
+    Tuple with metric value(s)
+  """
   utils.verify_input(y_true, y_pred, average, input_type, vocab, pos_label)
   return ClassificationAggFn(
       metrics=classification.ConfusionMatrixMetric.NEGATIVE_PREDICTIVE_VALUE,
@@ -676,9 +1039,10 @@ def negative_predictive_value(
   )(y_true, y_pred)
 
 
-negative_predictive_value.__doc__ += _METRIC_PYDOC_POSTFIX
-
-
+@registry.register_signal(
+    signal_modality=registry.SignalModality.OTHER,
+    enable_telemetry=False,
+)
 def npv(
     y_true,
     y_pred,
@@ -689,7 +1053,31 @@ def npv(
     dtype: type[Any] | None = None,
     k_list: Sequence[int] | None = None,
 ) -> tuple[float, ...]:
-  """Compute alias of Negative Predictive Value classification metric."""
+  """Compute alias of Negative Predictive Value classification metric.
+
+  Args:
+    y_true: array of sample's true labels
+    y_pred: array of sample's label predictions
+    pos_label: The class to report if average='binary' and the data is binary.
+      By default it is 1. Please set in case this default is not a valid label.
+      If the data are multiclass or multilabel, this will be ignored.
+    input_type: one input type from types.InputType
+    average: one average  type from types.AverageType
+    vocab: an external vocabulary that maps categorical value to integer class
+      id. This is required if computed distributed (when merge_accumulators is
+      called) and the average is macro where the class id mapping needs to be
+      stable.
+    dtype: dtype of the confusion matrix and all computations. Default to None
+      as it is inferred.
+    k_list: k_list is only applicable for average_type != Samples and
+      multiclass/multioutput input types. It is a list of topk each of which
+      slices y_pred by y_pred[:topk] assuming the predictions are sorted in
+      descending order. Default 'None' means consider all outputs in the
+      prediction.
+
+  Returns:
+    Tuple with metric value(s)
+  """
   utils.verify_input(y_true, y_pred, average, input_type, vocab, pos_label)
   return ClassificationAggFn(
       metrics=classification.ConfusionMatrixMetric.NPV,
@@ -702,9 +1090,10 @@ def npv(
   )(y_true, y_pred)
 
 
-npv.__doc__ += _METRIC_PYDOC_POSTFIX
-
-
+@registry.register_signal(
+    signal_modality=registry.SignalModality.OTHER,
+    enable_telemetry=False,
+)
 def false_discovery_rate(
     y_true,
     y_pred,
@@ -715,7 +1104,31 @@ def false_discovery_rate(
     dtype: type[Any] | None = None,
     k_list: Sequence[int] | None = None,
 ) -> tuple[float, ...]:
-  """Compute False Discovery Rate classification metric."""
+  """Compute False Discovery Rate classification metric.
+
+  Args:
+    y_true: array of sample's true labels
+    y_pred: array of sample's label predictions
+    pos_label: The class to report if average='binary' and the data is binary.
+      By default it is 1. Please set in case this default is not a valid label.
+      If the data are multiclass or multilabel, this will be ignored.
+    input_type: one input type from types.InputType
+    average: one average  type from types.AverageType
+    vocab: an external vocabulary that maps categorical value to integer class
+      id. This is required if computed distributed (when merge_accumulators is
+      called) and the average is macro where the class id mapping needs to be
+      stable.
+    dtype: dtype of the confusion matrix and all computations. Default to None
+      as it is inferred.
+    k_list: k_list is only applicable for average_type != Samples and
+      multiclass/multioutput input types. It is a list of topk each of which
+      slices y_pred by y_pred[:topk] assuming the predictions are sorted in
+      descending order. Default 'None' means consider all outputs in the
+      prediction.
+
+  Returns:
+    Tuple with metric value(s)
+  """
   utils.verify_input(y_true, y_pred, average, input_type, vocab, pos_label)
   return ClassificationAggFn(
       metrics=classification.ConfusionMatrixMetric.FALSE_DISCOVERY_RATE,
@@ -728,9 +1141,10 @@ def false_discovery_rate(
   )(y_true, y_pred)
 
 
-false_discovery_rate.__doc__ += _METRIC_PYDOC_POSTFIX
-
-
+@registry.register_signal(
+    signal_modality=registry.SignalModality.OTHER,
+    enable_telemetry=False,
+)
 def false_omission_rate(
     y_true,
     y_pred,
@@ -741,7 +1155,31 @@ def false_omission_rate(
     dtype: type[Any] | None = None,
     k_list: Sequence[int] | None = None,
 ) -> tuple[float, ...]:
-  """Compute False Omission Rate classification metric."""
+  """Compute False Omission Rate classification metric.
+
+  Args:
+    y_true: array of sample's true labels
+    y_pred: array of sample's label predictions
+    pos_label: The class to report if average='binary' and the data is binary.
+      By default it is 1. Please set in case this default is not a valid label.
+      If the data are multiclass or multilabel, this will be ignored.
+    input_type: one input type from types.InputType
+    average: one average  type from types.AverageType
+    vocab: an external vocabulary that maps categorical value to integer class
+      id. This is required if computed distributed (when merge_accumulators is
+      called) and the average is macro where the class id mapping needs to be
+      stable.
+    dtype: dtype of the confusion matrix and all computations. Default to None
+      as it is inferred.
+    k_list: k_list is only applicable for average_type != Samples and
+      multiclass/multioutput input types. It is a list of topk each of which
+      slices y_pred by y_pred[:topk] assuming the predictions are sorted in
+      descending order. Default 'None' means consider all outputs in the
+      prediction.
+
+  Returns:
+    Tuple with metric value(s)
+  """
   utils.verify_input(y_true, y_pred, average, input_type, vocab, pos_label)
   return ClassificationAggFn(
       metrics=classification.ConfusionMatrixMetric.FALSE_OMISSION_RATE,
@@ -754,9 +1192,10 @@ def false_omission_rate(
   )(y_true, y_pred)
 
 
-false_omission_rate.__doc__ += _METRIC_PYDOC_POSTFIX
-
-
+@registry.register_signal(
+    signal_modality=registry.SignalModality.OTHER,
+    enable_telemetry=False,
+)
 def threat_score(
     y_true,
     y_pred,
@@ -767,7 +1206,31 @@ def threat_score(
     dtype: type[Any] | None = None,
     k_list: Sequence[int] | None = None,
 ) -> tuple[float, ...]:
-  """Compute Threat Score classification metric."""
+  """Compute Threat Score classification metric.
+
+  Args:
+    y_true: array of sample's true labels
+    y_pred: array of sample's label predictions
+    pos_label: The class to report if average='binary' and the data is binary.
+      By default it is 1. Please set in case this default is not a valid label.
+      If the data are multiclass or multilabel, this will be ignored.
+    input_type: one input type from types.InputType
+    average: one average  type from types.AverageType
+    vocab: an external vocabulary that maps categorical value to integer class
+      id. This is required if computed distributed (when merge_accumulators is
+      called) and the average is macro where the class id mapping needs to be
+      stable.
+    dtype: dtype of the confusion matrix and all computations. Default to None
+      as it is inferred.
+    k_list: k_list is only applicable for average_type != Samples and
+      multiclass/multioutput input types. It is a list of topk each of which
+      slices y_pred by y_pred[:topk] assuming the predictions are sorted in
+      descending order. Default 'None' means consider all outputs in the
+      prediction.
+
+  Returns:
+    Tuple with metric value(s)
+  """
   utils.verify_input(y_true, y_pred, average, input_type, vocab, pos_label)
   return ClassificationAggFn(
       metrics=classification.ConfusionMatrixMetric.THREAT_SCORE,
@@ -780,9 +1243,10 @@ def threat_score(
   )(y_true, y_pred)
 
 
-threat_score.__doc__ += _METRIC_PYDOC_POSTFIX
-
-
+@registry.register_signal(
+    signal_modality=registry.SignalModality.OTHER,
+    enable_telemetry=False,
+)
 def positive_likelihood_ratio(
     y_true,
     y_pred,
@@ -793,7 +1257,31 @@ def positive_likelihood_ratio(
     dtype: type[Any] | None = None,
     k_list: Sequence[int] | None = None,
 ) -> tuple[float, ...]:
-  """Compute Positive Likelihood Ratio classification metric."""
+  """Compute Positive Likelihood Ratio classification metric.
+
+  Args:
+    y_true: array of sample's true labels
+    y_pred: array of sample's label predictions
+    pos_label: The class to report if average='binary' and the data is binary.
+      By default it is 1. Please set in case this default is not a valid label.
+      If the data are multiclass or multilabel, this will be ignored.
+    input_type: one input type from types.InputType
+    average: one average  type from types.AverageType
+    vocab: an external vocabulary that maps categorical value to integer class
+      id. This is required if computed distributed (when merge_accumulators is
+      called) and the average is macro where the class id mapping needs to be
+      stable.
+    dtype: dtype of the confusion matrix and all computations. Default to None
+      as it is inferred.
+    k_list: k_list is only applicable for average_type != Samples and
+      multiclass/multioutput input types. It is a list of topk each of which
+      slices y_pred by y_pred[:topk] assuming the predictions are sorted in
+      descending order. Default 'None' means consider all outputs in the
+      prediction.
+
+  Returns:
+    Tuple with metric value(s)
+  """
   utils.verify_input(y_true, y_pred, average, input_type, vocab, pos_label)
   return ClassificationAggFn(
       metrics=classification.ConfusionMatrixMetric.POSITIVE_LIKELIHOOD_RATIO,
@@ -806,9 +1294,10 @@ def positive_likelihood_ratio(
   )(y_true, y_pred)
 
 
-positive_likelihood_ratio.__doc__ += _METRIC_PYDOC_POSTFIX
-
-
+@registry.register_signal(
+    signal_modality=registry.SignalModality.OTHER,
+    enable_telemetry=False,
+)
 def negative_likelihood_ratio(
     y_true,
     y_pred,
@@ -819,7 +1308,31 @@ def negative_likelihood_ratio(
     dtype: type[Any] | None = None,
     k_list: Sequence[int] | None = None,
 ) -> tuple[float, ...]:
-  """Compute Negative Likelihood Ratio classification metric."""
+  """Compute Negative Likelihood Ratio classification metric.
+
+  Args:
+    y_true: array of sample's true labels
+    y_pred: array of sample's label predictions
+    pos_label: The class to report if average='binary' and the data is binary.
+      By default it is 1. Please set in case this default is not a valid label.
+      If the data are multiclass or multilabel, this will be ignored.
+    input_type: one input type from types.InputType
+    average: one average  type from types.AverageType
+    vocab: an external vocabulary that maps categorical value to integer class
+      id. This is required if computed distributed (when merge_accumulators is
+      called) and the average is macro where the class id mapping needs to be
+      stable.
+    dtype: dtype of the confusion matrix and all computations. Default to None
+      as it is inferred.
+    k_list: k_list is only applicable for average_type != Samples and
+      multiclass/multioutput input types. It is a list of topk each of which
+      slices y_pred by y_pred[:topk] assuming the predictions are sorted in
+      descending order. Default 'None' means consider all outputs in the
+      prediction.
+
+  Returns:
+    Tuple with metric value(s)
+  """
   utils.verify_input(y_true, y_pred, average, input_type, vocab, pos_label)
   return ClassificationAggFn(
       metrics=classification.ConfusionMatrixMetric.NEGATIVE_LIKELIHOOD_RATIO,
@@ -832,9 +1345,10 @@ def negative_likelihood_ratio(
   )(y_true, y_pred)
 
 
-negative_likelihood_ratio.__doc__ += _METRIC_PYDOC_POSTFIX
-
-
+@registry.register_signal(
+    signal_modality=registry.SignalModality.OTHER,
+    enable_telemetry=False,
+)
 def diagnostic_odds_ratio(
     y_true,
     y_pred,
@@ -845,7 +1359,31 @@ def diagnostic_odds_ratio(
     dtype: type[Any] | None = None,
     k_list: Sequence[int] | None = None,
 ) -> tuple[float, ...]:
-  """Compute Diagnostic Odds Ratio classification metric."""
+  """Compute Diagnostic Odds Ratio classification metric.
+
+  Args:
+    y_true: array of sample's true labels
+    y_pred: array of sample's label predictions
+    pos_label: The class to report if average='binary' and the data is binary.
+      By default it is 1. Please set in case this default is not a valid label.
+      If the data are multiclass or multilabel, this will be ignored.
+    input_type: one input type from types.InputType
+    average: one average  type from types.AverageType
+    vocab: an external vocabulary that maps categorical value to integer class
+      id. This is required if computed distributed (when merge_accumulators is
+      called) and the average is macro where the class id mapping needs to be
+      stable.
+    dtype: dtype of the confusion matrix and all computations. Default to None
+      as it is inferred.
+    k_list: k_list is only applicable for average_type != Samples and
+      multiclass/multioutput input types. It is a list of topk each of which
+      slices y_pred by y_pred[:topk] assuming the predictions are sorted in
+      descending order. Default 'None' means consider all outputs in the
+      prediction.
+
+  Returns:
+    Tuple with metric value(s)
+  """
   utils.verify_input(y_true, y_pred, average, input_type, vocab, pos_label)
   return ClassificationAggFn(
       metrics=classification.ConfusionMatrixMetric.DIAGNOSTIC_ODDS_RATIO,
@@ -858,9 +1396,10 @@ def diagnostic_odds_ratio(
   )(y_true, y_pred)
 
 
-diagnostic_odds_ratio.__doc__ += _METRIC_PYDOC_POSTFIX
-
-
+@registry.register_signal(
+    signal_modality=registry.SignalModality.OTHER,
+    enable_telemetry=False,
+)
 def positive_predictive_value(
     y_true,
     y_pred,
@@ -871,7 +1410,31 @@ def positive_predictive_value(
     dtype: type[Any] | None = None,
     k_list: Sequence[int] | None = None,
 ) -> tuple[float, ...]:
-  """Compute Positive Predictive Value classification metric."""
+  """Compute Positive Predictive Value classification metric.
+
+  Args:
+    y_true: array of sample's true labels
+    y_pred: array of sample's label predictions
+    pos_label: The class to report if average='binary' and the data is binary.
+      By default it is 1. Please set in case this default is not a valid label.
+      If the data are multiclass or multilabel, this will be ignored.
+    input_type: one input type from types.InputType
+    average: one average  type from types.AverageType
+    vocab: an external vocabulary that maps categorical value to integer class
+      id. This is required if computed distributed (when merge_accumulators is
+      called) and the average is macro where the class id mapping needs to be
+      stable.
+    dtype: dtype of the confusion matrix and all computations. Default to None
+      as it is inferred.
+    k_list: k_list is only applicable for average_type != Samples and
+      multiclass/multioutput input types. It is a list of topk each of which
+      slices y_pred by y_pred[:topk] assuming the predictions are sorted in
+      descending order. Default 'None' means consider all outputs in the
+      prediction.
+
+  Returns:
+    Tuple with metric value(s)
+  """
   utils.verify_input(y_true, y_pred, average, input_type, vocab, pos_label)
   return ClassificationAggFn(
       metrics=classification.ConfusionMatrixMetric.POSITIVE_PREDICTIVE_VALUE,
@@ -884,9 +1447,10 @@ def positive_predictive_value(
   )(y_true, y_pred)
 
 
-positive_predictive_value.__doc__ += _METRIC_PYDOC_POSTFIX
-
-
+@registry.register_signal(
+    signal_modality=registry.SignalModality.OTHER,
+    enable_telemetry=False,
+)
 def intersection_over_union(
     y_true,
     y_pred,
@@ -897,7 +1461,31 @@ def intersection_over_union(
     dtype: type[Any] | None = None,
     k_list: Sequence[int] | None = None,
 ) -> tuple[float, ...]:
-  """Compute Intersection over Union classification metric."""
+  """Compute Intersection over Union classification metric.
+
+  Args:
+    y_true: array of sample's true labels
+    y_pred: array of sample's label predictions
+    pos_label: The class to report if average='binary' and the data is binary.
+      By default it is 1. Please set in case this default is not a valid label.
+      If the data are multiclass or multilabel, this will be ignored.
+    input_type: one input type from types.InputType
+    average: one average  type from types.AverageType
+    vocab: an external vocabulary that maps categorical value to integer class
+      id. This is required if computed distributed (when merge_accumulators is
+      called) and the average is macro where the class id mapping needs to be
+      stable.
+    dtype: dtype of the confusion matrix and all computations. Default to None
+      as it is inferred.
+    k_list: k_list is only applicable for average_type != Samples and
+      multiclass/multioutput input types. It is a list of topk each of which
+      slices y_pred by y_pred[:topk] assuming the predictions are sorted in
+      descending order. Default 'None' means consider all outputs in the
+      prediction.
+
+  Returns:
+    Tuple with metric value(s)
+  """
   utils.verify_input(y_true, y_pred, average, input_type, vocab, pos_label)
   return ClassificationAggFn(
       metrics=classification.ConfusionMatrixMetric.INTERSECTION_OVER_UNION,
@@ -910,9 +1498,10 @@ def intersection_over_union(
   )(y_true, y_pred)
 
 
-intersection_over_union.__doc__ += _METRIC_PYDOC_POSTFIX
-
-
+@registry.register_signal(
+    signal_modality=registry.SignalModality.OTHER,
+    enable_telemetry=False,
+)
 def prevalence(
     y_true,
     y_pred,
@@ -923,7 +1512,31 @@ def prevalence(
     dtype: type[Any] | None = None,
     k_list: Sequence[int] | None = None,
 ) -> tuple[float, ...]:
-  """Compute Prevalence classification metric."""
+  """Compute Prevalence classification metric.
+
+  Args:
+    y_true: array of sample's true labels
+    y_pred: array of sample's label predictions
+    pos_label: The class to report if average='binary' and the data is binary.
+      By default it is 1. Please set in case this default is not a valid label.
+      If the data are multiclass or multilabel, this will be ignored.
+    input_type: one input type from types.InputType
+    average: one average  type from types.AverageType
+    vocab: an external vocabulary that maps categorical value to integer class
+      id. This is required if computed distributed (when merge_accumulators is
+      called) and the average is macro where the class id mapping needs to be
+      stable.
+    dtype: dtype of the confusion matrix and all computations. Default to None
+      as it is inferred.
+    k_list: k_list is only applicable for average_type != Samples and
+      multiclass/multioutput input types. It is a list of topk each of which
+      slices y_pred by y_pred[:topk] assuming the predictions are sorted in
+      descending order. Default 'None' means consider all outputs in the
+      prediction.
+
+  Returns:
+    Tuple with metric value(s)
+  """
   utils.verify_input(y_true, y_pred, average, input_type, vocab, pos_label)
   return ClassificationAggFn(
       metrics=classification.ConfusionMatrixMetric.PREVALENCE,
@@ -936,9 +1549,10 @@ def prevalence(
   )(y_true, y_pred)
 
 
-prevalence.__doc__ += _METRIC_PYDOC_POSTFIX
-
-
+@registry.register_signal(
+    signal_modality=registry.SignalModality.OTHER,
+    enable_telemetry=False,
+)
 def prevalence_threshold(
     y_true,
     y_pred,
@@ -949,7 +1563,31 @@ def prevalence_threshold(
     dtype: type[Any] | None = None,
     k_list: Sequence[int] | None = None,
 ) -> tuple[float, ...]:
-  """Compute Prevalence Threshold classification metric."""
+  """Compute Prevalence Threshold classification metric.
+
+  Args:
+    y_true: array of sample's true labels
+    y_pred: array of sample's label predictions
+    pos_label: The class to report if average='binary' and the data is binary.
+      By default it is 1. Please set in case this default is not a valid label.
+      If the data are multiclass or multilabel, this will be ignored.
+    input_type: one input type from types.InputType
+    average: one average  type from types.AverageType
+    vocab: an external vocabulary that maps categorical value to integer class
+      id. This is required if computed distributed (when merge_accumulators is
+      called) and the average is macro where the class id mapping needs to be
+      stable.
+    dtype: dtype of the confusion matrix and all computations. Default to None
+      as it is inferred.
+    k_list: k_list is only applicable for average_type != Samples and
+      multiclass/multioutput input types. It is a list of topk each of which
+      slices y_pred by y_pred[:topk] assuming the predictions are sorted in
+      descending order. Default 'None' means consider all outputs in the
+      prediction.
+
+  Returns:
+    Tuple with metric value(s)
+  """
   utils.verify_input(y_true, y_pred, average, input_type, vocab, pos_label)
   return ClassificationAggFn(
       metrics=classification.ConfusionMatrixMetric.PREVALENCE_THRESHOLD,
@@ -962,9 +1600,10 @@ def prevalence_threshold(
   )(y_true, y_pred)
 
 
-prevalence_threshold.__doc__ += _METRIC_PYDOC_POSTFIX
-
-
+@registry.register_signal(
+    signal_modality=registry.SignalModality.OTHER,
+    enable_telemetry=False,
+)
 def matthews_correlation_coefficient(
     y_true,
     y_pred,
@@ -975,7 +1614,31 @@ def matthews_correlation_coefficient(
     dtype: type[Any] | None = None,
     k_list: Sequence[int] | None = None,
 ) -> tuple[float, ...]:
-  """Compute Matthews Correlation Coefficient classification metric."""
+  """Compute Matthews Correlation Coefficient classification metric.
+
+  Args:
+    y_true: array of sample's true labels
+    y_pred: array of sample's label predictions
+    pos_label: The class to report if average='binary' and the data is binary.
+      By default it is 1. Please set in case this default is not a valid label.
+      If the data are multiclass or multilabel, this will be ignored.
+    input_type: one input type from types.InputType
+    average: one average  type from types.AverageType
+    vocab: an external vocabulary that maps categorical value to integer class
+      id. This is required if computed distributed (when merge_accumulators is
+      called) and the average is macro where the class id mapping needs to be
+      stable.
+    dtype: dtype of the confusion matrix and all computations. Default to None
+      as it is inferred.
+    k_list: k_list is only applicable for average_type != Samples and
+      multiclass/multioutput input types. It is a list of topk each of which
+      slices y_pred by y_pred[:topk] assuming the predictions are sorted in
+      descending order. Default 'None' means consider all outputs in the
+      prediction.
+
+  Returns:
+    Tuple with metric value(s)
+  """
   utils.verify_input(y_true, y_pred, average, input_type, vocab, pos_label)
   return ClassificationAggFn(
       metrics=classification.ConfusionMatrixMetric.MATTHEWS_CORRELATION_COEFFICIENT,
@@ -988,9 +1651,10 @@ def matthews_correlation_coefficient(
   )(y_true, y_pred)
 
 
-matthews_correlation_coefficient.__doc__ += _METRIC_PYDOC_POSTFIX
-
-
+@registry.register_signal(
+    signal_modality=registry.SignalModality.OTHER,
+    enable_telemetry=False,
+)
 def informedness(
     y_true,
     y_pred,
@@ -1001,7 +1665,31 @@ def informedness(
     dtype: type[Any] | None = None,
     k_list: Sequence[int] | None = None,
 ) -> tuple[float, ...]:
-  """Compute Informedness classification metric."""
+  """Compute Informedness classification metric.
+
+  Args:
+    y_true: array of sample's true labels
+    y_pred: array of sample's label predictions
+    pos_label: The class to report if average='binary' and the data is binary.
+      By default it is 1. Please set in case this default is not a valid label.
+      If the data are multiclass or multilabel, this will be ignored.
+    input_type: one input type from types.InputType
+    average: one average  type from types.AverageType
+    vocab: an external vocabulary that maps categorical value to integer class
+      id. This is required if computed distributed (when merge_accumulators is
+      called) and the average is macro where the class id mapping needs to be
+      stable.
+    dtype: dtype of the confusion matrix and all computations. Default to None
+      as it is inferred.
+    k_list: k_list is only applicable for average_type != Samples and
+      multiclass/multioutput input types. It is a list of topk each of which
+      slices y_pred by y_pred[:topk] assuming the predictions are sorted in
+      descending order. Default 'None' means consider all outputs in the
+      prediction.
+
+  Returns:
+    Tuple with metric value(s)
+  """
   utils.verify_input(y_true, y_pred, average, input_type, vocab, pos_label)
   return ClassificationAggFn(
       metrics=classification.ConfusionMatrixMetric.INFORMEDNESS,
@@ -1014,9 +1702,10 @@ def informedness(
   )(y_true, y_pred)
 
 
-informedness.__doc__ += _METRIC_PYDOC_POSTFIX
-
-
+@registry.register_signal(
+    signal_modality=registry.SignalModality.OTHER,
+    enable_telemetry=False,
+)
 def markedness(
     y_true,
     y_pred,
@@ -1027,7 +1716,31 @@ def markedness(
     dtype: type[Any] | None = None,
     k_list: Sequence[int] | None = None,
 ) -> tuple[float, ...]:
-  """Compute Markedness classification metric."""
+  """Compute Markedness classification metric.
+
+  Args:
+    y_true: array of sample's true labels
+    y_pred: array of sample's label predictions
+    pos_label: The class to report if average='binary' and the data is binary.
+      By default it is 1. Please set in case this default is not a valid label.
+      If the data are multiclass or multilabel, this will be ignored.
+    input_type: one input type from types.InputType
+    average: one average  type from types.AverageType
+    vocab: an external vocabulary that maps categorical value to integer class
+      id. This is required if computed distributed (when merge_accumulators is
+      called) and the average is macro where the class id mapping needs to be
+      stable.
+    dtype: dtype of the confusion matrix and all computations. Default to None
+      as it is inferred.
+    k_list: k_list is only applicable for average_type != Samples and
+      multiclass/multioutput input types. It is a list of topk each of which
+      slices y_pred by y_pred[:topk] assuming the predictions are sorted in
+      descending order. Default 'None' means consider all outputs in the
+      prediction.
+
+  Returns:
+    Tuple with metric value(s)
+  """
   utils.verify_input(y_true, y_pred, average, input_type, vocab, pos_label)
   return ClassificationAggFn(
       metrics=classification.ConfusionMatrixMetric.MARKEDNESS,
@@ -1040,9 +1753,10 @@ def markedness(
   )(y_true, y_pred)
 
 
-markedness.__doc__ += _METRIC_PYDOC_POSTFIX
-
-
+@registry.register_signal(
+    signal_modality=registry.SignalModality.OTHER,
+    enable_telemetry=False,
+)
 def balanced_accuracy(
     y_true,
     y_pred,
@@ -1053,7 +1767,31 @@ def balanced_accuracy(
     dtype: type[Any] | None = None,
     k_list: Sequence[int] | None = None,
 ) -> tuple[float, ...]:
-  """Compute Balanced Accuracy classification metric."""
+  """Compute Balanced Accuracy classification metric.
+
+  Args:
+    y_true: array of sample's true labels
+    y_pred: array of sample's label predictions
+    pos_label: The class to report if average='binary' and the data is binary.
+      By default it is 1. Please set in case this default is not a valid label.
+      If the data are multiclass or multilabel, this will be ignored.
+    input_type: one input type from types.InputType
+    average: one average  type from types.AverageType
+    vocab: an external vocabulary that maps categorical value to integer class
+      id. This is required if computed distributed (when merge_accumulators is
+      called) and the average is macro where the class id mapping needs to be
+      stable.
+    dtype: dtype of the confusion matrix and all computations. Default to None
+      as it is inferred.
+    k_list: k_list is only applicable for average_type != Samples and
+      multiclass/multioutput input types. It is a list of topk each of which
+      slices y_pred by y_pred[:topk] assuming the predictions are sorted in
+      descending order. Default 'None' means consider all outputs in the
+      prediction.
+
+  Returns:
+    Tuple with metric value(s)
+  """
   utils.verify_input(y_true, y_pred, average, input_type, vocab, pos_label)
   return ClassificationAggFn(
       metrics=classification.ConfusionMatrixMetric.BALANCED_ACCURACY,
@@ -1064,6 +1802,3 @@ def balanced_accuracy(
       dtype=dtype,
       k_list=k_list,
   )(y_true, y_pred)
-
-
-balanced_accuracy.__doc__ += _METRIC_PYDOC_POSTFIX
diff --git a/ml_metrics/_src/metrics/classification_test.py b/ml_metrics/_src/metrics/classification_test.py
index f6e8fbdb..2da290d3 100644
--- a/ml_metrics/_src/metrics/classification_test.py
+++ b/ml_metrics/_src/metrics/classification_test.py
@@ -421,12 +421,6 @@ def test_individual_metrics(
         input_type=types.InputType.MULTICLASS_MULTIOUTPUT,
         average=types.AverageType.MICRO,
     )
-    metric_doc_details = "\n".join(
-        metric_fn.__doc__.split("\n")[1:]
-    ).strip()  # ignore the description line for comparison
-    self.assertEqual(
-        metric_doc_details, classification._METRIC_PYDOC_POSTFIX.strip()
-    )
     np.testing.assert_allclose(expected_no_k_list, actual_no_k_list)
     k_list = [1, 2]
     # k=[1 2], tp=[5 6], tn=[12 12], fp=[3 3], fn=[4 3]
diff --git a/ml_metrics/_src/metrics/retrieval.py b/ml_metrics/_src/metrics/retrieval.py
index ce3abede..a65706e9 100644
--- a/ml_metrics/_src/metrics/retrieval.py
+++ b/ml_metrics/_src/metrics/retrieval.py
@@ -16,29 +16,18 @@
 from collections.abc import Sequence
 from ml_metrics._src.aggregates import retrieval
 from ml_metrics._src.aggregates import types
+from ml_metrics.google.tools.signal_registry import registry
 
 
 # TODO: b/368688941 - Remove this alias once all users are migrated to the new
 # module structure.
 TopKRetrievalAggFn = retrieval.TopKRetrievalAggFn
 
-_METRIC_PYDOC_POSTFIX = """
-
-  Args:
-    y_true: array of sample's true labels
-    y_pred: array of sample's label predictions
-    k_list: k_list is only applicable for average_type != Samples and
-      multiclass/multioutput input types. It is a list of topk each of which
-      slices y_pred by y_pred[:topk] assuming the predictions are sorted in
-      descending order. Default 'None' means consider all outputs in the
-      prediction.
-        input_type: one input type from types.InputType
-
-  Returns:
-    Tuple with metric value(s)
-"""
-
 
+@registry.register_signal(
+    signal_modality=registry.SignalModality.OTHER,
+    enable_telemetry=False,
+)
 def topk_retrieval_metrics(
     metrics: Sequence[retrieval.RetrievalMetric],
     *,
@@ -69,13 +58,31 @@ def topk_retrieval_metrics(
   )(y_true, y_pred)
 
 
+@registry.register_signal(
+    signal_modality=registry.SignalModality.OTHER,
+    enable_telemetry=False,
+)
 def precision(
     y_true,
     y_pred,
     k_list: list[int] | None = None,
     input_type: types.InputType = types.InputType.MULTICLASS_MULTIOUTPUT,
 ) -> tuple[float, ...]:
-  """Compute Precision Retrieval metric."""
+  """Compute Precision Retrieval metric.
+
+  Args:
+    y_true: array of sample's true labels
+    y_pred: array of sample's label predictions
+    k_list: k_list is only applicable for average_type != Samples and
+      multiclass/multioutput input types. It is a list of topk each of which
+      slices y_pred by y_pred[:topk] assuming the predictions are sorted in
+      descending order. Default 'None' means consider all outputs in the
+      prediction.
+    input_type: one input type from types.InputType
+
+  Returns:
+    Tuple with metric value(s)
+  """
   return retrieval.TopKRetrieval(
       metrics=retrieval.RetrievalMetric.PRECISION,
       k_list=k_list,
@@ -83,16 +90,31 @@ def precision(
   ).as_agg_fn()(y_true, y_pred)
 
 
-precision.__doc__ += _METRIC_PYDOC_POSTFIX
-
-
+@registry.register_signal(
+    signal_modality=registry.SignalModality.OTHER,
+    enable_telemetry=False,
+)
 def ppv(
     y_true,
     y_pred,
     k_list: list[int] | None = None,
     input_type: types.InputType = types.InputType.MULTICLASS_MULTIOUTPUT,
 ) -> tuple[float, ...]:
-  """Compute PPV Retrieval metric."""
+  """Compute PPV Retrieval metric.
+
+  Args:
+    y_true: array of sample's true labels
+    y_pred: array of sample's label predictions
+    k_list: k_list is only applicable for average_type != Samples and
+      multiclass/multioutput input types. It is a list of topk each of which
+      slices y_pred by y_pred[:topk] assuming the predictions are sorted in
+      descending order. Default 'None' means consider all outputs in the
+      prediction.
+    input_type: one input type from types.InputType
+
+  Returns:
+    Tuple with metric value(s)
+  """
   return retrieval.TopKRetrievalAggFn(
       metrics=retrieval.RetrievalMetric.PPV,
       k_list=k_list,
@@ -100,16 +122,31 @@ def ppv(
   )(y_true, y_pred)
 
 
-ppv.__doc__ += _METRIC_PYDOC_POSTFIX
-
-
+@registry.register_signal(
+    signal_modality=registry.SignalModality.OTHER,
+    enable_telemetry=False,
+)
 def recall(
     y_true,
     y_pred,
     k_list: list[int] | None = None,
     input_type: types.InputType = types.InputType.MULTICLASS_MULTIOUTPUT,
 ) -> tuple[float, ...]:
-  """Compute Recall Retrieval metric."""
+  """Compute Recall Retrieval metric.
+
+  Args:
+    y_true: array of sample's true labels
+    y_pred: array of sample's label predictions
+    k_list: k_list is only applicable for average_type != Samples and
+      multiclass/multioutput input types. It is a list of topk each of which
+      slices y_pred by y_pred[:topk] assuming the predictions are sorted in
+      descending order. Default 'None' means consider all outputs in the
+      prediction.
+    input_type: one input type from types.InputType
+
+  Returns:
+    Tuple with metric value(s)
+  """
   return retrieval.TopKRetrievalAggFn(
       metrics=retrieval.RetrievalMetric.RECALL,
       k_list=k_list,
@@ -117,31 +154,63 @@ def recall(
   )(y_true, y_pred)
 
 
-recall.__doc__ += _METRIC_PYDOC_POSTFIX
-
-
+@registry.register_signal(
+    signal_modality=registry.SignalModality.OTHER,
+    enable_telemetry=False,
+)
 def sensitivity(
     y_true,
     y_pred,
     k_list: list[int] | None = None,
     input_type: types.InputType = types.InputType.MULTICLASS_MULTIOUTPUT,
 ) -> tuple[float, ...]:
-  """Compute Sensitivity Retrieval metric."""
+  """Compute Sensitivity Retrieval metric.
+
+  Args:
+    y_true: array of sample's true labels
+    y_pred: array of sample's label predictions
+    k_list: k_list is only applicable for average_type != Samples and
+      multiclass/multioutput input types. It is a list of topk each of which
+      slices y_pred by y_pred[:topk] assuming the predictions are sorted in
+      descending order. Default 'None' means consider all outputs in the
+      prediction.
+    input_type: one input type from types.InputType
+
+  Returns:
+    Tuple with metric value(s)
+  """
   return retrieval.TopKRetrievalAggFn(
       metrics=retrieval.RetrievalMetric.SENSITIVITY,
       k_list=k_list,
       input_type=input_type,
   )(y_true, y_pred)
-sensitivity.__doc__ += _METRIC_PYDOC_POSTFIX
 
 
+@registry.register_signal(
+    signal_modality=registry.SignalModality.OTHER,
+    enable_telemetry=False,
+)
 def tpr(
     y_true,
     y_pred,
     k_list: list[int] | None = None,
     input_type: types.InputType = types.InputType.MULTICLASS_MULTIOUTPUT,
 ) -> tuple[float, ...]:
-  """Compute TPR Retrieval metric."""
+  """Compute TPR Retrieval metric.
+
+  Args:
+    y_true: array of sample's true labels
+    y_pred: array of sample's label predictions
+    k_list: k_list is only applicable for average_type != Samples and
+      multiclass/multioutput input types. It is a list of topk each of which
+      slices y_pred by y_pred[:topk] assuming the predictions are sorted in
+      descending order. Default 'None' means consider all outputs in the
+      prediction.
+    input_type: one input type from types.InputType
+
+  Returns:
+    Tuple with metric value(s)
+  """
   return retrieval.TopKRetrievalAggFn(
       metrics=retrieval.RetrievalMetric.TPR,
       k_list=k_list,
@@ -149,46 +218,95 @@ def tpr(
   )(y_true, y_pred)
 
 
-tpr.__doc__ += _METRIC_PYDOC_POSTFIX
-
-
+@registry.register_signal(
+    signal_modality=registry.SignalModality.OTHER,
+    enable_telemetry=False,
+)
 def intersection_over_union(
     y_true,
     y_pred,
     k_list: list[int] | None = None,
     input_type: types.InputType = types.InputType.MULTICLASS_MULTIOUTPUT,
 ) -> tuple[float, ...]:
-  """Compute Intersection Over Union Retrieval metric."""
+  """Compute Intersection Over Union Retrieval metric.
+
+  Args:
+    y_true: array of sample's true labels
+    y_pred: array of sample's label predictions
+    k_list: k_list is only applicable for average_type != Samples and
+      multiclass/multioutput input types. It is a list of topk each of which
+      slices y_pred by y_pred[:topk] assuming the predictions are sorted in
+      descending order. Default 'None' means consider all outputs in the
+      prediction.
+    input_type: one input type from types.InputType
+
+  Returns:
+    Tuple with metric value(s)
+  """
   return retrieval.TopKRetrievalAggFn(
       metrics=retrieval.RetrievalMetric.INTERSECTION_OVER_UNION,
       k_list=k_list,
       input_type=input_type,
   )(y_true, y_pred)
-intersection_over_union.__doc__ += _METRIC_PYDOC_POSTFIX
 
 
+@registry.register_signal(
+    signal_modality=registry.SignalModality.OTHER,
+    enable_telemetry=False,
+)
 def positive_predictive_value(
     y_true,
     y_pred,
     k_list: list[int] | None = None,
     input_type: types.InputType = types.InputType.MULTICLASS_MULTIOUTPUT,
 ) -> tuple[float, ...]:
-  """Compute Positive Predictive Value Retrieval metric."""
+  """Compute Positive Predictive Value Retrieval metric.
+
+  Args:
+    y_true: array of sample's true labels
+    y_pred: array of sample's label predictions
+    k_list: k_list is only applicable for average_type != Samples and
+      multiclass/multioutput input types. It is a list of topk each of which
+      slices y_pred by y_pred[:topk] assuming the predictions are sorted in
+      descending order. Default 'None' means consider all outputs in the
+      prediction.
+    input_type: one input type from types.InputType
+
+  Returns:
+    Tuple with metric value(s)
+  """
   return retrieval.TopKRetrievalAggFn(
       metrics=retrieval.RetrievalMetric.POSITIVE_PREDICTIVE_VALUE,
       k_list=k_list,
       input_type=input_type,
   )(y_true, y_pred)
-positive_predictive_value.__doc__ += _METRIC_PYDOC_POSTFIX
 
 
+@registry.register_signal(
+    signal_modality=registry.SignalModality.OTHER,
+    enable_telemetry=False,
+)
 def f1_score(
     y_true,
     y_pred,
     k_list: list[int] | None = None,
     input_type: types.InputType = types.InputType.MULTICLASS_MULTIOUTPUT,
 ) -> tuple[float, ...]:
-  """Compute F1 Score Retrieval metric."""
+  """Compute F1 Score Retrieval metric.
+
+  Args:
+    y_true: array of sample's true labels
+    y_pred: array of sample's label predictions
+    k_list: k_list is only applicable for average_type != Samples and
+      multiclass/multioutput input types. It is a list of topk each of which
+      slices y_pred by y_pred[:topk] assuming the predictions are sorted in
+      descending order. Default 'None' means consider all outputs in the
+      prediction.
+    input_type: one input type from types.InputType
+
+  Returns:
+    Tuple with metric value(s)
+  """
   return retrieval.TopKRetrievalAggFn(
       metrics=retrieval.RetrievalMetric.F1_SCORE,
       k_list=k_list,
@@ -196,16 +314,31 @@ def f1_score(
   )(y_true, y_pred)
 
 
-f1_score.__doc__ += _METRIC_PYDOC_POSTFIX
-
-
+@registry.register_signal(
+    signal_modality=registry.SignalModality.OTHER,
+    enable_telemetry=False,
+)
 def miss_rate(
     y_true,
     y_pred,
     k_list: list[int] | None = None,
     input_type: types.InputType = types.InputType.MULTICLASS_MULTIOUTPUT,
 ) -> tuple[float, ...]:
-  """Compute Miss Rate Retrieval metric."""
+  """Compute Miss Rate Retrieval metric.
+
+  Args:
+    y_true: array of sample's true labels
+    y_pred: array of sample's label predictions
+    k_list: k_list is only applicable for average_type != Samples and
+      multiclass/multioutput input types. It is a list of topk each of which
+      slices y_pred by y_pred[:topk] assuming the predictions are sorted in
+      descending order. Default 'None' means consider all outputs in the
+      prediction.
+    input_type: one input type from types.InputType
+
+  Returns:
+    Tuple with metric value(s)
+  """
   return retrieval.TopKRetrievalAggFn(
       metrics=retrieval.RetrievalMetric.MISS_RATE,
       k_list=k_list,
@@ -213,124 +346,257 @@ def miss_rate(
   )(y_true, y_pred)
 
 
-miss_rate.__doc__ += _METRIC_PYDOC_POSTFIX
-
-
+@registry.register_signal(
+    signal_modality=registry.SignalModality.OTHER,
+    enable_telemetry=False,
+)
 def mean_average_precision(
     y_true,
     y_pred,
     k_list: list[int] | None = None,
     input_type: types.InputType = types.InputType.MULTICLASS_MULTIOUTPUT,
 ) -> tuple[float, ...]:
-  """Compute Mean Average Precision Retrieval metric."""
+  """Compute Mean Average Precision Retrieval metric.
+
+  Args:
+    y_true: array of sample's true labels
+    y_pred: array of sample's label predictions
+    k_list: k_list is only applicable for average_type != Samples and
+      multiclass/multioutput input types. It is a list of topk each of which
+      slices y_pred by y_pred[:topk] assuming the predictions are sorted in
+      descending order. Default 'None' means consider all outputs in the
+      prediction.
+    input_type: one input type from types.InputType
+
+  Returns:
+    Tuple with metric value(s)
+  """
   return retrieval.TopKRetrievalAggFn(
       metrics=retrieval.RetrievalMetric.MEAN_AVERAGE_PRECISION,
       k_list=k_list,
       input_type=input_type,
   )(y_true, y_pred)
-mean_average_precision.__doc__ += _METRIC_PYDOC_POSTFIX
 
 
+@registry.register_signal(
+    signal_modality=registry.SignalModality.OTHER,
+    enable_telemetry=False,
+)
 def mean_reciprocal_rank(
     y_true,
     y_pred,
     k_list: list[int] | None = None,
     input_type: types.InputType = types.InputType.MULTICLASS_MULTIOUTPUT,
 ) -> tuple[float, ...]:
-  """Compute Mean Reciprocal Rank Retrieval metric."""
+  """Compute Mean Reciprocal Rank Retrieval metric.
+
+  Args:
+    y_true: array of sample's true labels
+    y_pred: array of sample's label predictions
+    k_list: k_list is only applicable for average_type != Samples and
+      multiclass/multioutput input types. It is a list of topk each of which
+      slices y_pred by y_pred[:topk] assuming the predictions are sorted in
+      descending order. Default 'None' means consider all outputs in the
+      prediction.
+    input_type: one input type from types.InputType
+
+  Returns:
+    Tuple with metric value(s)
+  """
   return retrieval.TopKRetrievalAggFn(
       metrics=retrieval.RetrievalMetric.MEAN_RECIPROCAL_RANK,
       k_list=k_list,
       input_type=input_type,
   )(y_true, y_pred)
-mean_reciprocal_rank.__doc__ += _METRIC_PYDOC_POSTFIX
 
 
+@registry.register_signal(
+    signal_modality=registry.SignalModality.OTHER,
+    enable_telemetry=False,
+)
 def accuracy(
     y_true,
     y_pred,
     k_list: list[int] | None = None,
     input_type: types.InputType = types.InputType.MULTICLASS_MULTIOUTPUT,
 ) -> tuple[float, ...]:
-  """Compute Accuracy Retrieval metric."""
+  """Compute Accuracy Retrieval metric.
+
+  Args:
+    y_true: array of sample's true labels
+    y_pred: array of sample's label predictions
+    k_list: k_list is only applicable for average_type != Samples and
+      multiclass/multioutput input types. It is a list of topk each of which
+      slices y_pred by y_pred[:topk] assuming the predictions are sorted in
+      descending order. Default 'None' means consider all outputs in the
+      prediction.
+    input_type: one input type from types.InputType
+
+  Returns:
+    Tuple with metric value(s)
+  """
   return retrieval.TopKRetrievalAggFn(
       metrics=retrieval.RetrievalMetric.ACCURACY,
       k_list=k_list,
       input_type=input_type,
   )(y_true, y_pred)
-accuracy.__doc__ += _METRIC_PYDOC_POSTFIX
 
 
+@registry.register_signal(
+    signal_modality=registry.SignalModality.OTHER,
+    enable_telemetry=False,
+)
 def dcg_score(
     y_true,
     y_pred,
     k_list: list[int] | None = None,
     input_type: types.InputType = types.InputType.MULTICLASS_MULTIOUTPUT,
 ) -> tuple[float, ...]:
-  """Compute DCG Score Retrieval metric."""
+  """Compute DCG Score Retrieval metric.
+
+  Args:
+    y_true: array of sample's true labels
+    y_pred: array of sample's label predictions
+    k_list: k_list is only applicable for average_type != Samples and
+      multiclass/multioutput input types. It is a list of topk each of which
+      slices y_pred by y_pred[:topk] assuming the predictions are sorted in
+      descending order. Default 'None' means consider all outputs in the
+      prediction.
+    input_type: one input type from types.InputType
+
+  Returns:
+    Tuple with metric value(s)
+  """
   return retrieval.TopKRetrievalAggFn(
       metrics=retrieval.RetrievalMetric.DCG_SCORE,
       k_list=k_list,
       input_type=input_type,
   )(y_true, y_pred)
-dcg_score.__doc__ += _METRIC_PYDOC_POSTFIX
 
 
+@registry.register_signal(
+    signal_modality=registry.SignalModality.OTHER,
+    enable_telemetry=False,
+)
 def ndcg_score(
     y_true,
     y_pred,
     k_list: list[int] | None = None,
     input_type: types.InputType = types.InputType.MULTICLASS_MULTIOUTPUT,
 ) -> tuple[float, ...]:
-  """Compute NDCG Score Retrieval metric."""
+  """Compute NDCG Score Retrieval metric.
+
+  Args:
+    y_true: array of sample's true labels
+    y_pred: array of sample's label predictions
+    k_list: k_list is only applicable for average_type != Samples and
+      multiclass/multioutput input types. It is a list of topk each of which
+      slices y_pred by y_pred[:topk] assuming the predictions are sorted in
+      descending order. Default 'None' means consider all outputs in the
+      prediction.
+    input_type: one input type from types.InputType
+
+  Returns:
+    Tuple with metric value(s)
+  """
   return retrieval.TopKRetrievalAggFn(
       metrics=retrieval.RetrievalMetric.NDCG_SCORE,
       k_list=k_list,
       input_type=input_type,
   )(y_true, y_pred)
-ndcg_score.__doc__ += _METRIC_PYDOC_POSTFIX
 
 
+@registry.register_signal(
+    signal_modality=registry.SignalModality.OTHER,
+    enable_telemetry=False,
+)
 def fowlkes_mallows_index(
     y_true,
     y_pred,
     k_list: list[int] | None = None,
     input_type: types.InputType = types.InputType.MULTICLASS_MULTIOUTPUT,
 ) -> tuple[float, ...]:
-  """Compute Fowlkes Mallows Index Retrieval metric."""
+  """Compute Fowlkes Mallows Index Retrieval metric.
+
+  Args:
+    y_true: array of sample's true labels
+    y_pred: array of sample's label predictions
+    k_list: k_list is only applicable for average_type != Samples and
+      multiclass/multioutput input types. It is a list of topk each of which
+      slices y_pred by y_pred[:topk] assuming the predictions are sorted in
+      descending order. Default 'None' means consider all outputs in the
+      prediction.
+    input_type: one input type from types.InputType
+
+  Returns:
+    Tuple with metric value(s)
+  """
   return retrieval.TopKRetrievalAggFn(
       metrics=retrieval.RetrievalMetric.FOWLKES_MALLOWS_INDEX,
       k_list=k_list,
       input_type=input_type,
   )(y_true, y_pred)
-fowlkes_mallows_index.__doc__ += _METRIC_PYDOC_POSTFIX
 
 
+@registry.register_signal(
+    signal_modality=registry.SignalModality.OTHER,
+    enable_telemetry=False,
+)
 def false_discovery_rate(
     y_true,
     y_pred,
     k_list: list[int] | None = None,
     input_type: types.InputType = types.InputType.MULTICLASS_MULTIOUTPUT,
 ) -> tuple[float, ...]:
-  """Compute False Discovery Rate Retrieval metric."""
+  """Compute False Discovery Rate Retrieval metric.
+
+  Args:
+    y_true: array of sample's true labels
+    y_pred: array of sample's label predictions
+    k_list: k_list is only applicable for average_type != Samples and
+      multiclass/multioutput input types. It is a list of topk each of which
+      slices y_pred by y_pred[:topk] assuming the predictions are sorted in
+      descending order. Default 'None' means consider all outputs in the
+      prediction.
+    input_type: one input type from types.InputType
+
+  Returns:
+    Tuple with metric value(s)
+  """
   return retrieval.TopKRetrievalAggFn(
       metrics=retrieval.RetrievalMetric.FALSE_DISCOVERY_RATE,
       k_list=k_list,
       input_type=input_type,
   )(y_true, y_pred)
-false_discovery_rate.__doc__ += _METRIC_PYDOC_POSTFIX
 
 
+@registry.register_signal(
+    signal_modality=registry.SignalModality.OTHER,
+    enable_telemetry=False,
+)
 def threat_score(
     y_true,
     y_pred,
     k_list: list[int] | None = None,
     input_type: types.InputType = types.InputType.MULTICLASS_MULTIOUTPUT,
 ) -> tuple[float, ...]:
-  """Compute Threat Score Retrieval metric."""
+  """Compute Threat Score Retrieval metric.
+
+  Args:
+    y_true: array of sample's true labels
+    y_pred: array of sample's label predictions
+    k_list: k_list is only applicable for average_type != Samples and
+      multiclass/multioutput input types. It is a list of topk each of which
+      slices y_pred by y_pred[:topk] assuming the predictions are sorted in
+      descending order. Default 'None' means consider all outputs in the
+      prediction.
+    input_type: one input type from types.InputType
+
+  Returns:
+    Tuple with metric value(s)
+  """
   return retrieval.TopKRetrievalAggFn(
       metrics=retrieval.RetrievalMetric.THREAT_SCORE,
       k_list=k_list,
       input_type=input_type,
   )(y_true, y_pred)
-threat_score.__doc__ += _METRIC_PYDOC_POSTFIX
diff --git a/ml_metrics/_src/metrics/retrieval_test.py b/ml_metrics/_src/metrics/retrieval_test.py
index 09ab5dbf..ef4d4041 100644
--- a/ml_metrics/_src/metrics/retrieval_test.py
+++ b/ml_metrics/_src/metrics/retrieval_test.py
@@ -157,12 +157,6 @@ def test_individual_metric(self, metric_fn, expected):
     k_list = [1, 2]
     y_pred = [["y"], ["n", "y"], ["y"], ["n"], ["y"], ["n"], ["n"], ["u"]]
     y_true = [["y"], ["y"], ["n"], ["n"], ["y", "n"], ["n"], ["y"], ["u"]]
-    metric_doc_details = "\n".join(
-        metric_fn.__doc__.split("\n")[1:]
-    ).strip()  # ignore the description line for comparison
-    self.assertEqual(
-        metric_doc_details, retrieval._METRIC_PYDOC_POSTFIX.strip()
-    )
     np.testing.assert_allclose(
         expected,
         metric_fn(
diff --git a/ml_metrics/_src/metrics/text.py b/ml_metrics/_src/metrics/text.py
index e7285afa..5a376709 100644
--- a/ml_metrics/_src/metrics/text.py
+++ b/ml_metrics/_src/metrics/text.py
@@ -19,10 +19,14 @@
 from ml_metrics._src.aggregates import stats
 from ml_metrics._src.aggregates import text
 from ml_metrics._src.signals import text as text_scores
+from ml_metrics.google.tools.signal_registry import registry
 from ml_metrics._src.tools.telemetry import telemetry
 
 
-@telemetry.function_monitor(category=telemetry.CATEGORY.METRIC)
+@registry.register_signal(
+    signal_modality=registry.SignalModality.TEXT,
+    usage_category=telemetry.CATEGORY.METRIC,
+)
 def topk_word_ngrams(
     texts: Sequence[str],
     k: int,
@@ -74,7 +78,10 @@ def topk_word_ngrams(
   ).as_agg_fn()(texts)
 
 
-@telemetry.function_monitor(category=telemetry.CATEGORY.METRIC)
+@registry.register_signal(
+    signal_modality=registry.SignalModality.TEXT,
+    usage_category=telemetry.CATEGORY.METRIC,
+)
 def pattern_frequency(
     texts: Sequence[str], patterns: Sequence[str], count_duplicate: bool = True
 ) -> list[tuple[str, float]]:
@@ -109,7 +116,10 @@ def pattern_frequency(
   ).as_agg_fn()(texts)
 
 
-@telemetry.function_monitor(category=telemetry.CATEGORY.METRIC)
+@registry.register_signal(
+    signal_modality=registry.SignalModality.TEXT,
+    usage_category=telemetry.CATEGORY.METRIC,
+)
 def avg_alphabetical_char_count(
     texts: Sequence[str],
 ) -> stats.MeanAndVariance:
diff --git a/ml_metrics/_src/metrics/utils.py b/ml_metrics/_src/metrics/utils.py
index 8af4cd55..a3486e5b 100644
--- a/ml_metrics/_src/metrics/utils.py
+++ b/ml_metrics/_src/metrics/utils.py
@@ -5,10 +5,14 @@
 
 from ml_metrics._src.aggregates import classification
 from ml_metrics._src.aggregates import types
+from ml_metrics.google.tools.signal_registry import registry
 from ml_metrics._src.tools.telemetry import telemetry
 
 
-@telemetry.function_monitor(category=telemetry.CATEGORY.METRIC)
+@registry.register_signal(
+    signal_modality=registry.SignalModality.OTHER,
+    usage_category=telemetry.CATEGORY.METRIC,
+)
 def verify_input(y_true, y_pred, average, input_type, vocab, pos_label):
   if (
       average == types.AverageType.BINARY