Skip to content

Commit 92e8765

Browse files
committed
merge error
1 parent 9a9088e commit 92e8765

File tree

1 file changed

+0
-23
lines changed

1 file changed

+0
-23
lines changed

eval_protocol/pytest/evaluation_test.py

Lines changed: 0 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -299,13 +299,6 @@ def wrapper_body(**kwargs):
299299

300300
cohort_id = generate_id()
301301

302-
def _log_eval_error(
303-
status: Literal["finished", "error"], rows: Optional[List[EvaluationRow]] | None, passed: bool
304-
) -> None:
305-
log_eval_status_and_rows(eval_metadata, rows, status, passed, default_logger)
306-
307-
cohort_id = generate_id()
308-
309302
def _log_eval_error(
310303
status: Literal["finished", "error"], rows: Optional[List[EvaluationRow]] | None, passed: bool
311304
) -> None:
@@ -461,25 +454,9 @@ def _log_eval_error(
461454
sum([r.evaluation_result.score for r in result if r.evaluation_result]) / len(result)
462455
for result in all_results
463456
]
464-
print(f"SCORES: {scores}")
465457
agg_score = aggregate(scores, aggregation_method)
466458
score_std = statistics.stdev(scores) if len(scores) > 1 else 0.0
467459

468-
# Compute 95% confidence interval for the fixed-set mean μ (by-question, using repeats)
469-
ci_low: float | None = None
470-
ci_high: float | None = None
471-
if aggregation_method == "mean":
472-
try:
473-
result_ci = compute_fixed_set_mu_ci([item for sublist in all_results for item in sublist])
474-
mu_ci_low, mu_ci_high = result_ci[1], result_ci[2]
475-
if mu_ci_low is not None and mu_ci_high is not None:
476-
ci_low = float(mu_ci_low)
477-
ci_high = float(mu_ci_high)
478-
# Keep agg_score as-is (mean over scores). For equal repeats per question these match.
479-
except Exception:
480-
ci_low = None
481-
ci_high = None
482-
483460
# Compute 95% confidence interval for the fixed-set mean μ (by-question, using repeats)
484461
ci_low: float | None = None
485462
ci_high: float | None = None

0 commit comments

Comments
 (0)