From 7bfd66564a94f029147605fbcdef56616a54c5e3 Mon Sep 17 00:00:00 2001 From: "google-labs-jules[bot]" <161369871+google-labs-jules[bot]@users.noreply.github.com> Date: Tue, 31 Mar 2026 19:43:30 +0000 Subject: [PATCH] perf: optimize results filtering loops in fix_encoding.py Co-authored-by: daggerstuff <261005129+daggerstuff@users.noreply.github.com> --- training/scripts/fix_encoding.py | 32 +++++++++++++++++++++----------- 1 file changed, 21 insertions(+), 11 deletions(-) diff --git a/training/scripts/fix_encoding.py b/training/scripts/fix_encoding.py index 6c93f76d..12593dbb 100644 --- a/training/scripts/fix_encoding.py +++ b/training/scripts/fix_encoding.py @@ -30,9 +30,7 @@ S3DatasetLoader, # noqa: E402 ) -logging.getLogger("ai.training.utils.s3_dataset_loader").setLevel( - logging.ERROR -) +logging.getLogger("ai.training.utils.s3_dataset_loader").setLevel(logging.ERROR) DEFAULT_S3_BUCKET = "pixel-data" MAX_RETRIES = 3 @@ -887,10 +885,16 @@ def print_results(results: list[dict[str, Any]], output: OutputHandler) -> None: output.header("šŸ“Š ENCODING FIX RESULTS") output.separator() - successful = [r for r in results if r.get("success")] - failed = [r for r in results if not r.get("success")] - skipped = [r for r in successful if r.get("skipped")] - fixed = [r for r in successful if not r.get("skipped")] + successful, failed, skipped, fixed = [], [], [], [] + for r in results: + if r.get("success"): + successful.append(r) + if r.get("skipped"): + skipped.append(r) + else: + fixed.append(r) + else: + failed.append(r) output.info(f"\nāœ… Fixed: {len(fixed)} files") output.info(f"ā­ļø Skipped (already UTF-8): {len(skipped)} files") @@ -919,10 +923,16 @@ def save_results( results: list[dict[str, Any]], ) -> Path: """Save encoding fix results to JSON file""" - successful = [r for r in results if r.get("success")] - failed = [r for r in results if not r.get("success")] - skipped = [r for r in successful if r.get("skipped")] - fixed = [r for r in successful if not r.get("skipped")] + successful, failed, skipped, fixed = [], [], [], [] + for r in results: + if r.get("success"): + successful.append(r) + if r.get("skipped"): + skipped.append(r) + else: + fixed.append(r) + else: + failed.append(r) results_path = project_root / "ai/training_ready/data/encoding_fix_results.json" with open(results_path, "w") as f: