From d5396ec8fbbc4583f053b8e218d6eaeb11a6d0e1 Mon Sep 17 00:00:00 2001 From: Amine MAHIDDINE Date: Wed, 3 Sep 2025 13:32:18 +0100 Subject: [PATCH 1/2] FEAT: add mult support --- fair_mango/metrics/superset.py | 302 ++++++++++++++++++++++----------- fair_mango/typing.py | 4 + tests/metrics/test_superset.py | 54 ++++++ 3 files changed, 262 insertions(+), 98 deletions(-) diff --git a/fair_mango/metrics/superset.py b/fair_mango/metrics/superset.py index 00dd4b9..fa8682d 100644 --- a/fair_mango/metrics/superset.py +++ b/fair_mango/metrics/superset.py @@ -66,42 +66,26 @@ def __init__( self, data: Dataset, ) -> None: - sensitive = data.sensitive - real_target = data.real_target - predicted_target = data.predicted_target - positive_target = data.positive_target - df = data.df - if predicted_target == []: - predicted_target = None - if sensitive is None: + self.dataset = data + self.sensitive = data.sensitive + self.real_target = data.real_target + self.predicted_target = data.predicted_target + self.positive_target = data.positive_target + self.df = data.df + + if self.predicted_target == []: + self.predicted_target = None + if self.sensitive is None: raise AttributeError( "'sensitive_group' attribute is required when data is pandas dataframe" ) - pairs = list( + self.pairs = list( chain.from_iterable( - combinations(sensitive, r) for r in range(1, len(sensitive) + 1) + combinations(self.sensitive, r) for r in range(1, len(self.sensitive) + 1) ) ) - self.df = df - self.sensitive = sensitive - self.real_target = real_target - self.predicted_target = predicted_target - self.positive_target = positive_target - self.pairs = pairs - - def _create_dataset_for_pair(self, pair: tuple[str, ...]) -> Dataset: - """Create a Dataset instance for a given pair of sensitive attributes.""" - return Dataset( - self.df, - pair, - self.real_target, - self.predicted_target, - self.positive_target, - ) - - class SupersetFairnessMetrics(Superset): """Calculate fairness metrics score for all combinations of sensitive attributes and ranks them. This class computes all applicable fairness metrics across different @@ -183,6 +167,34 @@ def __init__( "false_positive_rate_ratio": FalsePositiveRateRatio, } + def _rank_binary(self, dataset: Dataset, positive_outcome: str | None = None) -> list[SupersetFairnessRankingResult]: + """Helper to run rank on a binary dataset.""" + results = [] + for pair in self.pairs: + pair_dataset = Dataset( + dataset.df, + pair, + dataset.real_target, + dataset.predicted_target, + dataset.positive_target, + ) + rankings = {} + for metric_name, metric_class in self._dataset_metrics.items(): + metric = metric_class(pair_dataset) + rankings[metric_name] = metric.rank() + if self.predicted_target is not None: + for metric_name, metric_class in self._model_metrics.items(): + metric = metric_class(pair_dataset) + rankings[metric_name] = metric.rank() + results.append( + SupersetFairnessRankingResult( + sensitive_attributes=pair, + rankings=rankings, + positive_outcome=positive_outcome, + ) + ) + return results + def rank(self) -> list[SupersetFairnessRankingResult]: """Calculate fairness metrics rankings for all combinations of sensitive attributes and all applicable fairness metrics. @@ -194,26 +206,59 @@ def rank(self) -> list[SupersetFairnessRankingResult]: - sensitive_attributes: List of sensitive attribute names for this combination - rankings: Dictionary mapping metric names to their ranking results """ - results = [] + y_true = self.dataset.df[self.dataset.real_target] + is_multiclass = y_true.nunique() > 2 - for pair in self.pairs: - dataset = self._create_dataset_for_pair(pair) + if not is_multiclass: + return self._rank_binary(self.dataset) - rankings = {} + results = [] - for metric_name, metric_class in self._dataset_metrics.items(): - metric = metric_class(dataset) - rankings[metric_name] = metric.rank() + classes = sorted(y_true.unique()) + original_df = self.dataset.df.copy() + + for positive_outcome in classes: + temp_df = original_df.copy() + real_target_col = self.dataset.real_target + pred_target_col = self.dataset.predicted_target + temp_df[real_target_col] = (temp_df[real_target_col] == positive_outcome).astype(int) + if pred_target_col and pred_target_col in temp_df.columns: + temp_df[pred_target_col] = (temp_df[pred_target_col] == positive_outcome).astype(int) + binary_dataset = Dataset( + df=temp_df, + sensitive=self.dataset.sensitive, + real_target=self.dataset.real_target, + predicted_target=self.dataset.predicted_target, + positive_target=1, + ) + class_results = self._rank_binary(binary_dataset, str(positive_outcome)) + results.extend(class_results) + return results + def _summary_binary(self, dataset: Dataset, positive_outcome: str | None = None) -> list[SupersetFairnessSummaryResult]: + """Helper to run summary on a binary dataset.""" + results = [] + for pair in self.pairs: + pair_dataset = Dataset( + dataset.df, + pair, + dataset.real_target, + dataset.predicted_target, + dataset.positive_target, + ) + summaries: dict[str, FairnessSummaryDifferenceResult | FairnessSummaryDifferenceFairResult | FairnessSummaryRatioResult | FairnessSummaryRatioFairResult] = {} + for metric_name, metric_class in self._dataset_metrics.items(): + metric = metric_class(pair_dataset) + summaries[metric_name] = metric.summary() if self.predicted_target is not None: for metric_name, metric_class in self._model_metrics.items(): - metric = metric_class(dataset) - rankings[metric_name] = metric.rank() - + metric = metric_class(pair_dataset) + summaries[metric_name] = metric.summary() results.append( - SupersetFairnessRankingResult( - sensitive_attributes=list(pair), - rankings=rankings, + SupersetFairnessSummaryResult( + sensitive_attributes=pair, + summaries=summaries, + positive_outcome=positive_outcome, ) ) @@ -230,40 +275,68 @@ def summary(self) -> list[SupersetFairnessSummaryResult]: - sensitive_attributes: List of sensitive attribute names for this combination - summaries: Dictionary mapping metric names to their summary results """ - results = [] + y_true = self.dataset.df[self.dataset.real_target] + is_multiclass = y_true.nunique() > 2 - for pair in self.pairs: - dataset = self._create_dataset_for_pair(pair) + if not is_multiclass: + return self._summary_binary(self.dataset) - summaries: dict[ - str, - FairnessSummaryDifferenceResult - | FairnessSummaryDifferenceFairResult - | FairnessSummaryRatioResult - | FairnessSummaryRatioFairResult, - ] = {} + results = [] - for metric_name, metric_class in self._dataset_metrics.items(): - metric = metric_class(dataset) - summaries[metric_name] = metric.summary() + classes = sorted(y_true.unique()) + original_df = self.dataset.df.copy() + + for positive_outcome in classes: + temp_df = original_df.copy() + real_target_col = self.dataset.real_target + pred_target_col = self.dataset.predicted_target + temp_df[real_target_col] = (temp_df[real_target_col] == positive_outcome).astype(int) + if pred_target_col and pred_target_col in temp_df.columns: + temp_df[pred_target_col] = (temp_df[pred_target_col] == positive_outcome).astype(int) + binary_dataset = Dataset( + df=temp_df, + sensitive=self.dataset.sensitive, + real_target=self.dataset.real_target, + predicted_target=self.dataset.predicted_target, + positive_target=1, + ) + class_results = self._summary_binary(binary_dataset, str(positive_outcome)) + results.extend(class_results) + return results + def _is_biased_binary(self, dataset: Dataset, thresholds: dict[str, float], positive_outcome: str | None = None) -> list[SupersetBiasResult]: + """Helper to run is_biased on a binary dataset.""" + effective_thresholds = DEFAULT_BIAS_THRESHOLDS | thresholds + results = [] + for pair in self.pairs: + pair_dataset = Dataset( + dataset.df, + pair, + dataset.real_target, + dataset.predicted_target, + dataset.positive_target, + ) + bias_results = {} + for metric_name, metric_class in self._dataset_metrics.items(): + metric = metric_class(pair_dataset) + threshold = effective_thresholds[metric_name] + bias_results[metric_name] = metric.is_biased(threshold) if self.predicted_target is not None: for metric_name, metric_class in self._model_metrics.items(): - metric = metric_class(dataset) - summaries[metric_name] = metric.summary() - + metric = metric_class(pair_dataset) + threshold = effective_thresholds[metric_name] + bias_results[metric_name] = metric.is_biased(threshold) results.append( - SupersetFairnessSummaryResult( - sensitive_attributes=list(pair), - summaries=summaries, + SupersetBiasResult( + sensitive_attributes=pair, + bias_results=bias_results, + positive_outcome=positive_outcome, ) ) return results - def is_biased( - self, thresholds: dict[str, float] | None = None - ) -> list[SupersetBiasResult]: + def is_biased(self, thresholds: dict[str, float] | None = None) -> list[SupersetBiasResult]: """Determine bias for all combinations of sensitive attributes and all applicable fairness metrics. @@ -282,34 +355,33 @@ def is_biased( """ if thresholds is None: thresholds = {} + + y_true = self.dataset.df[self.dataset.real_target] + is_multiclass = y_true.nunique() > 2 - effective_thresholds = DEFAULT_BIAS_THRESHOLDS | thresholds + if not is_multiclass: + return self._is_biased_binary(self.dataset, thresholds) results = [] - - for pair in self.pairs: - dataset = self._create_dataset_for_pair(pair) - - bias_results = {} - - for metric_name, metric_class in self._dataset_metrics.items(): - metric = metric_class(dataset) - threshold = effective_thresholds[metric_name] - bias_results[metric_name] = metric.is_biased(threshold) - - if self.predicted_target is not None: - for metric_name, metric_class in self._model_metrics.items(): - metric = metric_class(dataset) - threshold = effective_thresholds[metric_name] - bias_results[metric_name] = metric.is_biased(threshold) - - results.append( - SupersetBiasResult( - sensitive_attributes=list(pair), - bias_results=bias_results, - ) + classes = sorted(y_true.unique()) + original_df = self.dataset.df.copy() + + for positive_outcome in classes: + temp_df = original_df.copy() + real_target_col = self.dataset.real_target + pred_target_col = self.dataset.predicted_target + temp_df[real_target_col] = (temp_df[real_target_col] == positive_outcome).astype(int) + if pred_target_col and pred_target_col in temp_df.columns: + temp_df[pred_target_col] = (temp_df[pred_target_col] == positive_outcome).astype(int) + binary_dataset = Dataset( + df=temp_df, + sensitive=self.dataset.sensitive, + real_target=self.dataset.real_target, + predicted_target=self.dataset.predicted_target, + positive_target=1, ) - + class_results = self._is_biased_binary(binary_dataset, thresholds, str(positive_outcome)) + results.extend(class_results) return results @@ -344,6 +416,28 @@ def __init__( super().__init__(data) self.metrics = [SelectionRate, PerformanceMetric, ConfusionMatrix] + def _evaluate_binary(self, dataset: Dataset, positive_outcome: str | None = None) -> list[SupersetPerformanceMetricsResult]: + """Helper to run evaluate on a binary dataset.""" + results = [] + for pair in self.pairs: + pair_dataset = Dataset( + dataset.df, + pair, + dataset.real_target, + dataset.predicted_target, + dataset.positive_target, + ) + combined_results = self._initialize_base_results(pair_dataset) + self._process_metrics_for_dataset(pair_dataset, combined_results) + results.append( + SupersetPerformanceMetricsResult( + sensitive_attributes=pair, + data=combined_results, + positive_outcome=positive_outcome, + ) + ) + return results + def evaluate(self) -> list[SupersetPerformanceMetricsResult]: """Calculate performance evaluation metrics for different subsets of sensitive attributes. Ex: @@ -403,20 +497,32 @@ def evaluate(self) -> list[SupersetPerformanceMetricsResult]: } ] """ - results = [] + y_true = self.dataset.df[self.dataset.real_target] + is_multiclass = y_true.nunique() > 2 - for pair in self.pairs: - dataset = self._create_dataset_for_pair(pair) - combined_results = self._initialize_base_results(dataset) - self._process_metrics_for_dataset(dataset, combined_results) + if not is_multiclass: + return self._evaluate_binary(self.dataset) - results.append( - SupersetPerformanceMetricsResult( - sensitive_attributes=pair, - data=combined_results, - ) + results = [] + classes = sorted(y_true.unique()) + original_df = self.dataset.df.copy() + + for positive_outcome in classes: + temp_df = original_df.copy() + real_target_col = self.dataset.real_target + pred_target_col = self.dataset.predicted_target + temp_df[real_target_col] = (temp_df[real_target_col] == positive_outcome).astype(int) + if pred_target_col and pred_target_col in temp_df.columns: + temp_df[pred_target_col] = (temp_df[pred_target_col] == positive_outcome).astype(int) + binary_dataset = Dataset( + df=temp_df, + sensitive=self.dataset.sensitive, + real_target=self.dataset.real_target, + predicted_target=self.dataset.predicted_target, + positive_target=1, ) - + class_results = self._evaluate_binary(binary_dataset, str(positive_outcome)) + results.extend(class_results) return results def _initialize_base_results( diff --git a/fair_mango/typing.py b/fair_mango/typing.py index 722b45a..8234479 100644 --- a/fair_mango/typing.py +++ b/fair_mango/typing.py @@ -168,6 +168,7 @@ class SupersetFairnessRankingResult: sensitive_attributes: SensitiveAttributeT rankings: dict[str, list[RankResult]] + positive_outcome: str | None = None @dataclass @@ -182,6 +183,7 @@ class SupersetFairnessSummaryResult: | FairnessSummaryRatioResult | FairnessSummaryRatioFairResult, ] + positive_outcome: str | None = None @dataclass @@ -190,6 +192,7 @@ class SupersetBiasResult: sensitive_attributes: SensitiveAttributeT bias_results: dict[str, bool] + positive_outcome: str | None = None @dataclass @@ -198,4 +201,5 @@ class SupersetPerformanceMetricsResult: sensitive_attributes: SensitiveAttributeT data: list[CombinedPerformanceResult] + positive_outcome: str | None = None \ No newline at end of file diff --git a/tests/metrics/test_superset.py b/tests/metrics/test_superset.py index 56f5ae6..a8adf53 100644 --- a/tests/metrics/test_superset.py +++ b/tests/metrics/test_superset.py @@ -16,6 +16,12 @@ df = pd.read_csv("tests/data/heart_data.csv") +df_multiclass = pd.DataFrame({ + 'sensitive': ['A', 'A', 'B', 'B', 'A', 'B', 'A', 'B'], + 'real_target': ['cat', 'dog', 'cat', 'dog', 'cat', 'bird', 'dog', 'bird'], + 'pred_target': ['cat', 'cat', 'dog', 'dog', 'cat', 'bird', 'dog', 'cat'], +}) + dataset1 = Dataset(df, ["Sex"], "HeartDisease") dataset2 = Dataset(df, ["Sex"], "HeartDisease", "HeartDiseasePred") @@ -194,6 +200,30 @@ def test_super_set_fairness_metrics( f"Score mismatch for {expected_key}: expected {expected_score}, got {actual_score}" ) +def test_super_set_fairness_metrics_multiclass(): + """ + Tests SupersetFairnessMetrics with a multi-class target. + """ + dataset = Dataset( + df=df_multiclass, + sensitive=['sensitive'], + real_target='real_target', + predicted_target='pred_target' + ) + + super_set_fairness_metrics = SupersetFairnessMetrics(dataset) + results = super_set_fairness_metrics.rank() + + assert isinstance(results, list) + assert len(results) == 3 + + classes_of_interest = {res.positive_outcome for res in results} + assert classes_of_interest == {'cat', 'dog', 'bird'} + + for result in results: + assert isinstance(result, SupersetFairnessRankingResult) + assert result.sensitive_attributes == ('sensitive',) + super_set_performance_metrics_expected_result_2 = [ { @@ -311,3 +341,27 @@ def test_super_set_performance_metrics( SupersetPerformanceMetrics( data, ).evaluate() + +def test_super_set_performance_metrics_multiclass(): + """ + Tests SupersetPerformanceMetrics with a multi-class target. + """ + dataset = Dataset( + df=df_multiclass, + sensitive=['sensitive'], + real_target='real_target', + predicted_target='pred_target' + ) + + super_set_performance_metrics = SupersetPerformanceMetrics(dataset) + results = super_set_performance_metrics.evaluate() + + assert isinstance(results, list) + assert len(results) == 3 + + classes_of_interest = {res.positive_outcome for res in results} + assert classes_of_interest == {'cat', 'dog', 'bird'} + + for result in results: + assert isinstance(result, SupersetPerformanceMetricsResult) + assert result.sensitive_attributes == ('sensitive',) \ No newline at end of file From cc99d23e1114a39a5c0f28a8d991b3674870aef0 Mon Sep 17 00:00:00 2001 From: Amine MAHIDDINE Date: Wed, 3 Sep 2025 14:40:08 +0100 Subject: [PATCH 2/2] FIX: Corrected sensitive_attributes comparison in test_superset.py --- tests/metrics/test_superset.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/metrics/test_superset.py b/tests/metrics/test_superset.py index a8adf53..e8559fc 100644 --- a/tests/metrics/test_superset.py +++ b/tests/metrics/test_superset.py @@ -170,10 +170,10 @@ def test_super_set_fairness_metrics( for result, expected_result in zip(results, expected_results): assert isinstance(result, SupersetFairnessRankingResult) - assert isinstance(result.sensitive_attributes, list) + assert isinstance(result.sensitive_attributes, tuple) assert isinstance(result.rankings, dict) - assert result.sensitive_attributes == expected_result["sensitive_attributes"] + assert result.sensitive_attributes == tuple(expected_result["sensitive_attributes"]) for metric_name, expected_metric_results in expected_result["rankings"].items(): if metric_name in result.rankings: @@ -296,7 +296,7 @@ def test_super_set_performance_metrics( assert isinstance(result.sensitive_attributes, tuple) assert isinstance(result.data, list) - assert result.sensitive_attributes == expected_result["sensitive_attributes"] + assert result.sensitive_attributes == tuple(expected_result["sensitive_attributes"]) result_list = result.data expected_result_list = expected_result["data"]