diff --git a/README.md b/README.md index a5ee994..8048c23 100644 --- a/README.md +++ b/README.md @@ -112,6 +112,20 @@ saved to the folder profile, `Queue Folder Encode` is unlocked so the real folder job can enter the encode queue without letting a stale unsaved preview slip into production work. +For scene-aware engine research, generate a repeatable bakeoff plan from an +existing manifest instead of replacing the production engine path directly: + +```bash +uv run mediaforce bakeoff path/to/run-manifest.json --all \ + --output ~/Desktop/mediaforce-bakeoff.json +``` + +The bakeoff plan carries the same size-first defaults and per-item resolved +policy used by Folder Studio, then lays out candidate commands and tool +requirements for the current `ab-av1` path plus Av1an, Xav, and Auto-Boost. Use +the plan to collect output size, runtime, selected CRF or quantizer, metric +score, and review artifacts before choosing a production engine migration. + You can run Mediaforce either directly with `python3` or through `uv`: ```bash diff --git a/mediaforce/cli.py b/mediaforce/cli.py index 0b24d23..fd5ea0d 100644 --- a/mediaforce/cli.py +++ b/mediaforce/cli.py @@ -10,6 +10,7 @@ from mediaforce.core.db_tables import run_manifests as run_manifests_table from mediaforce.execution import describe_item_plan, encode_manifest_items, promote_manifest_items, \ validate_manifest_items +from mediaforce.encoding.bakeoff import DEFAULT_BAKEOFF_ENGINES, build_bakeoff_plan, write_bakeoff_plan from mediaforce.library.folder_profiles import inspect_prefix from mediaforce.library.planner import recommend_item from mediaforce.library.run_manifests import build_run_manifest as build_db_run_manifest, \ @@ -98,6 +99,32 @@ def build_parser() -> argparse.ArgumentParser: _add_manifest_selection_args(compare_parser, require_manifest=False) _add_compare_clip_args(compare_parser) + bakeoff_parser = subparsers.add_parser("bakeoff", help="Write a scene-aware engine bakeoff plan") + _add_manifest_selection_args(bakeoff_parser, require_manifest=False) + bakeoff_parser.add_argument( + "--engine", + action="append", + choices=DEFAULT_BAKEOFF_ENGINES, + default=[], + help="Candidate engine to include; defaults to all candidates", + ) + bakeoff_parser.add_argument( + "--output", + type=Path, + help="Write the bakeoff plan JSON to an explicit path", + ) + bakeoff_parser.add_argument( + "--artifact-dir", + type=Path, + help="Directory where bakeoff artifacts should be written by the candidate commands", + ) + bakeoff_parser.add_argument( + "--clip-duration", + type=float, + default=20.0, + help="Review clip duration expected from each candidate engine", + ) + return parser @@ -293,6 +320,29 @@ def main(argv: Sequence[str] | None = None) -> int: ) return 0 + if args.command == "bakeoff": + manifest_path = _resolve_manifest_path(connection, args.manifest) + manifest = _load_manifest(manifest_path) + indexes = _resolve_indexes(manifest, args) + artifact_dir = args.artifact_dir or config.paths.review_dir / "engine-bakeoff" + plan = build_bakeoff_plan( + config, + manifest, + indexes=indexes, + engines=args.engine or None, + output_dir=artifact_dir, + clip_duration_seconds=args.clip_duration, + ) + output_path = args.output or config.paths.run_manifest_dir / f"bakeoff-{manifest.get('run_id', 'latest')}.json" + write_bakeoff_plan(plan, output_path) + print(f"bakeoff plan {output_path}") + for item in plan["items"]: + print( + f" item {item['index']}: target={_format_optional_size(item['target_size_bytes'])} " + f"runtime={item['duration_seconds']:.0f}s engines={len(item['engines'])}" + ) + return 0 + return 1 @@ -586,6 +636,12 @@ def _format_size(size_bytes: int) -> str: return f"{size_bytes}B" +def _format_optional_size(size_bytes: int | None) -> str: + if size_bytes is None: + return "unset" + return _format_size(size_bytes) + + def _format_signed_bytes(size_bytes: int) -> str: prefix = "+" if size_bytes >= 0 else "-" return f"{prefix}{_format_size(abs(size_bytes))}" diff --git a/mediaforce/encoding/bakeoff.py b/mediaforce/encoding/bakeoff.py new file mode 100644 index 0000000..4516f0b --- /dev/null +++ b/mediaforce/encoding/bakeoff.py @@ -0,0 +1,365 @@ +import json +from dataclasses import dataclass +from pathlib import Path +from typing import Any + +from mediaforce.core.config import MediaforceConfig +from mediaforce.core.type_defs import float_value, int_value, object_dict +from mediaforce.encoding.helpers import build_svt_params +from mediaforce.encoding.video_filters import build_video_filter + + +DEFAULT_BAKEOFF_ENGINES = ("ab-av1", "av1an", "xav", "auto-boost") + + +@dataclass(frozen=True, slots=True) +class BakeoffCandidate: + key: str + label: str + category: str + maturity: str + required_tools: tuple[str, ...] + metric_support: tuple[str, ...] + command: tuple[str, ...] + command_status: str + sources: tuple[str, ...] + notes: tuple[str, ...] + + +def build_bakeoff_plan( + config: MediaforceConfig, + manifest: dict[str, Any], + *, + indexes: list[int], + engines: list[str] | None = None, + output_dir: Path | None = None, + clip_duration_seconds: float = 20.0, +) -> dict[str, Any]: + requested_engines = _normalize_engines(engines) + items = [object_dict(item) for item in manifest.get("items", [])] + selected_items = [] + for index in indexes: + if index < 0 or index >= len(items): + raise IndexError(f"Manifest index out of range: {index}") + selected_items.append(_build_item_plan(config, items[index], index, requested_engines, output_dir, clip_duration_seconds)) + + return { + "schema_version": 1, + "purpose": "Compare current fast sampling against scene-aware candidate engines before production integration.", + "decision_model": str(config.video.get("decision_model") or "size_first_review"), + "default_targets": _default_targets(config), + "required_result_fields": [ + "engine", + "source_rel_path", + "runtime_seconds", + "output_size_bytes", + "output_size_percent", + "selected_crf_or_quantizer", + "metric_name", + "metric_score", + "encode_wall_seconds", + "review_artifacts", + "operator_verdict", + ], + "items": selected_items, + "recommendation_rule": ( + "Prefer engines that hit the configured size target while preserving acceptable review clips. " + "Metric scores are guardrails, not the final decision." + ), + } + + +def write_bakeoff_plan(plan: dict[str, Any], output_path: Path) -> Path: + output_path.parent.mkdir(parents=True, exist_ok=True) + output_path.write_text(json.dumps(plan, indent=2) + "\n") + return output_path + + +def _build_item_plan( + config: MediaforceConfig, + item: dict[str, Any], + index: int, + engines: list[str], + output_dir: Path | None, + clip_duration_seconds: float, +) -> dict[str, Any]: + policy = object_dict(item.get("resolved_policy")) + video_policy = object_dict(policy.get("video")) or config.video + width = int_value(item.get("width")) or None + height = int_value(item.get("height")) or None + source_path = str(item.get("source_path") or "") + rel_path = str(item.get("rel_path") or source_path) + item_output_dir = output_dir / f"item-{index:02d}" if output_dir is not None else None + size_target_bytes = _target_size_bytes(video_policy) + source_size_bytes = int_value(item.get("source_size_bytes")) or int_value(item.get("size_bytes")) + + return { + "index": index, + "rel_path": rel_path, + "source_path": source_path, + "source_size_bytes": source_size_bytes, + "duration_seconds": float_value(item.get("duration_seconds")), + "resolution": _resolution(width, height), + "target_size_bytes": size_target_bytes, + "target_size_percent": _target_size_percent(size_target_bytes, source_size_bytes), + "quality_floor": _quality_floor(video_policy), + "max_height": int_value(video_policy.get("max_height")), + "clip_duration_seconds": clip_duration_seconds, + "review_artifact_dir": str(item_output_dir) if item_output_dir is not None else None, + "engines": [_engine_candidate(engine, item, video_policy, item_output_dir) for engine in engines], + } + + +def _engine_candidate( + engine: str, + item: dict[str, Any], + video_policy: dict[str, Any], + output_dir: Path | None, +) -> dict[str, Any]: + source_path = str(item.get("source_path") or "SOURCE_PATH") + source_codec = str(item.get("video_codec") or "") + width = int_value(item.get("width")) or None + height = int_value(item.get("height")) or None + video_filter = build_video_filter(video_policy, width=width, height=height, detected_crop=None) + target_size_mb = int_value(video_policy.get("target_size_mb")) + max_encoded_percent = int_value(video_policy.get("max_encoded_percent")) + target_vmaf = float_value(video_policy.get("target_vmaf")) + min_target_vmaf = float_value(video_policy.get("min_target_vmaf")) + preset = int_value(video_policy.get("preset")) + pixel_format = str(video_policy.get("pixel_format") or "yuv420p10le") + svt_params = build_svt_params(video_policy) + + if engine == "ab-av1": + command = [ + "ab-av1", + "crf-search", + "-i", + source_path, + "--encoder", + "libsvtav1", + "--preset", + str(preset), + "--pix-format", + pixel_format, + "--min-vmaf", + _number(target_vmaf), + "--max-encoded-percent", + str(max_encoded_percent), + ] + command.extend(_sample_args(video_policy)) + command.extend(_filter_args(video_filter, "--vfilter")) + for param in svt_params: + command.extend(["--svt", param]) + candidate = BakeoffCandidate( + key="ab-av1", + label="ab-av1 fast sample", + category="current", + maturity="production-current", + required_tools=("ab-av1", "ffmpeg with libvmaf or xpsnr", "SVT-AV1 encoder"), + metric_support=("vmaf", "xpsnr"), + command=tuple(command), + command_status="production-current", + sources=("https://github.com/alexheretic/ab-av1",), + notes=( + "Current Mediaforce search path; fast and already host-orchestrated.", + "Samples across the file but does not split by scene before scoring.", + ), + ) + elif engine == "av1an": + command = [ + "av1an", + "-i", + source_path, + "-o", + str((output_dir or Path("bakeoff")) / "av1an.mkv"), + "--encoder", + "svt-av1", + "--target-quality", + _number(min_target_vmaf), + "--target-metric", + "ssimulacra2", + "--pix-format", + pixel_format, + "--video-params", + _svt_video_params(video_policy), + ] + command.extend(_filter_args(video_filter, "--vfilter")) + candidate = BakeoffCandidate( + key="av1an", + label="Av1an target quality", + category="scene-aware-candidate", + maturity="candidate", + required_tools=("av1an", "ffmpeg", "mkvmerge", "SVT-AV1 encoder", "metric plugin/runtime"), + metric_support=("vmaf", "ssimulacra2"), + command=tuple(command), + command_status="template-needs-host-validation", + sources=( + "https://rust-av.github.io/Av1an/Features/TargetQuality", + "https://rust-av.github.io/Av1an/Cli/target_quality.html", + ), + notes=( + "Primary candidate for scene/chunk-aware target-quality workflow.", + "Command uses SSIMULACRA2 because Av1an target-quality supports it directly; verify installed Av1an and metric plugin names on host before production use.", + ), + ) + elif engine == "xav": + command = [ + "xav", + "--input", + source_path, + "--output", + str((output_dir or Path("bakeoff")) / "xav.mkv"), + "--encoder", + "svt-av1", + "--metric", + "ssimulacra2", + "--target", + _number(min_target_vmaf), + ] + candidate = BakeoffCandidate( + key="xav", + label="Xav target quality", + category="scene-aware-candidate", + maturity="experimental-candidate", + required_tools=("xav", "ffmpeg", "SVT-AV1 encoder", "GPU metric support recommended"), + metric_support=("ssimulacra2", "vmaf"), + command=tuple(command), + command_status="research-template-needs-cli-validation", + sources=("https://github.com/emrakyz/xav",), + notes=( + "Performance-oriented Av1an-style candidate.", + "Upstream README documents chunked target-quality goals but points to a work-in-progress PDF for detailed CLI usage.", + ), + ) + elif engine == "auto-boost": + command = [ + "auto-boost-essential", + source_path, + str((output_dir or Path("bakeoff")) / "auto-boost.mkv"), + "--target-size-mb", + str(target_size_mb), + ] + candidate = BakeoffCandidate( + key="auto-boost", + label="Auto-Boost Essential", + category="scene-aware-candidate", + maturity="research-candidate", + required_tools=("Auto-Boost-Essential script", "SVT-AV1-Essential or compatible SVT-AV1"), + metric_support=("script-defined"), + command=tuple(command), + command_status="research-template-needs-script-validation", + sources=("https://github.com/nekotrix/auto-boost-algorithm/tree/main/Auto-Boost-Essential",), + notes=( + "Candidate from ecosystem feedback for faster consistent quality allocation.", + f"Source codec detected as {source_codec or 'unknown'}; validate script compatibility before full encode.", + ), + ) + else: + raise ValueError(f"Unsupported bakeoff engine: {engine}") + return _candidate_to_dict(candidate) + + +def _candidate_to_dict(candidate: BakeoffCandidate) -> dict[str, Any]: + return { + "key": candidate.key, + "label": candidate.label, + "category": candidate.category, + "maturity": candidate.maturity, + "required_tools": list(candidate.required_tools), + "metric_support": list(candidate.metric_support), + "command": list(candidate.command), + "command_status": candidate.command_status, + "sources": list(candidate.sources), + "notes": list(candidate.notes), + } + + +def _normalize_engines(engines: list[str] | None) -> list[str]: + if not engines: + return list(DEFAULT_BAKEOFF_ENGINES) + normalized = [] + aliases = {"auto_boost": "auto-boost", "autoboost": "auto-boost", "abav1": "ab-av1"} + for engine in engines: + key = aliases.get(engine.strip().lower(), engine.strip().lower()) + if key not in DEFAULT_BAKEOFF_ENGINES: + raise ValueError(f"Unsupported bakeoff engine: {engine}") + if key not in normalized: + normalized.append(key) + return normalized + + +def _default_targets(config: MediaforceConfig) -> dict[str, Any]: + video = config.video + target_size_mb = int_value(video.get("target_size_mb")) + target_runtime_minutes = int_value(video.get("target_runtime_minutes")) + return { + "target_size_mb": target_size_mb, + "target_runtime_minutes": target_runtime_minutes, + "target_size_bytes": target_size_mb * 1024 * 1024 if target_size_mb > 0 else None, + "max_height": int_value(video.get("max_height")), + "quality_metric": str(video.get("quality_metric") or "auto"), + "target_vmaf": float_value(video.get("target_vmaf")), + "min_target_vmaf": float_value(video.get("min_target_vmaf")), + "target_xpsnr": float_value(video.get("target_xpsnr")), + "min_target_xpsnr": float_value(video.get("min_target_xpsnr")), + "max_encoded_percent": int_value(video.get("max_encoded_percent")), + "decision_model": str(video.get("decision_model") or "size_first_review"), + "quality_engine": str(video.get("quality_engine") or "ab_av1_fast_sample"), + } + + +def _target_size_bytes(video_policy: dict[str, Any]) -> int | None: + target_size_mb = int_value(video_policy.get("target_size_mb")) + if target_size_mb <= 0: + return None + return target_size_mb * 1024 * 1024 + + +def _target_size_percent(target_size_bytes: int | None, source_size_bytes: int) -> float | None: + if target_size_bytes is None or source_size_bytes <= 0: + return None + return round((target_size_bytes / source_size_bytes) * 100.0, 3) + + +def _quality_floor(video_policy: dict[str, Any]) -> dict[str, Any]: + metric = str(video_policy.get("quality_metric") or "auto").lower() + if metric == "xpsnr": + return {"metric": "xpsnr", "target": float_value(video_policy.get("target_xpsnr")), + "minimum": float_value(video_policy.get("min_target_xpsnr"))} + return {"metric": "vmaf", "target": float_value(video_policy.get("target_vmaf")), + "minimum": float_value(video_policy.get("min_target_vmaf"))} + + +def _resolution(width: int | None, height: int | None) -> str | None: + if width is None or height is None: + return None + return f"{width}x{height}" + + +def _sample_args(video_policy: dict[str, Any]) -> list[str]: + return [ + "--sample-every", + str(video_policy.get("sample_every") or "8m"), + "--sample-duration", + str(video_policy.get("sample_duration") or "20s"), + ] + + +def _filter_args(video_filter: str | None, flag: str) -> list[str]: + if not video_filter: + return [] + return [flag, video_filter] + + +def _svt_video_params(video_policy: dict[str, Any]) -> str: + params = [ + f"--preset {int_value(video_policy.get('preset'))}", + f"--crf {int_value(video_policy.get('min_crf'))}", + f"--film-grain {int_value(video_policy.get('default_grain'))}", + f"--film-grain-denoise {int_value(video_policy.get('grain_denoise'))}", + ] + return " ".join(params) + + +def _number(value: float) -> str: + return f"{value:.3f}".rstrip("0").rstrip(".") diff --git a/tests/test_bakeoff.py b/tests/test_bakeoff.py new file mode 100644 index 0000000..570af31 --- /dev/null +++ b/tests/test_bakeoff.py @@ -0,0 +1,96 @@ +import json +from pathlib import Path +import tempfile +import unittest + +from mediaforce.core.config import load_config +from mediaforce.encoding.bakeoff import build_bakeoff_plan, write_bakeoff_plan + + +def _manifest() -> dict[str, object]: + return { + "run_id": "test-run", + "items": [ + { + "source_path": "/media/tv/Show/Season 1/Episode.mkv", + "rel_path": "tv/Show/Season 1/Episode.mkv", + "source_size_bytes": 4_294_967_296, + "duration_seconds": 2700.0, + "width": 1920, + "height": 1080, + "video_codec": "hevc", + "resolved_policy": { + "video": { + "encoder": "libsvtav1", + "pixel_format": "yuv420p10le", + "preset": 4, + "quality_metric": "vmaf", + "target_vmaf": 85.0, + "min_target_vmaf": 80.0, + "target_xpsnr": 39.0, + "min_target_xpsnr": 35.0, + "sample_every": "8m", + "sample_duration": "20s", + "min_crf": 18, + "max_crf": 38, + "max_encoded_percent": 80, + "default_grain": 8, + "grain_denoise": 0, + "max_height": 1080, + "target_size_mb": 300, + "target_runtime_minutes": 45, + "decision_model": "size_first_review", + "quality_engine": "ab_av1_fast_sample", + }, + "audio": {}, + "subtitle": {}, + "planning": {}, + }, + } + ], + } + + +class BakeoffPlanTests(unittest.TestCase): + def test_build_bakeoff_plan_uses_size_first_defaults_and_candidates(self) -> None: + config = load_config(Path("config/defaults.toml")) + plan = build_bakeoff_plan( + config, + _manifest(), + indexes=[0], + output_dir=Path("/tmp/mediaforce-bakeoff"), + ) + + self.assertEqual(plan["decision_model"], "size_first_review") + self.assertEqual(plan["default_targets"]["target_size_mb"], 300) + self.assertEqual(plan["default_targets"]["min_target_vmaf"], 80.0) + item = plan["items"][0] + self.assertEqual(item["target_size_bytes"], 314_572_800) + self.assertEqual(item["duration_seconds"], 2700.0) + self.assertEqual(item["resolution"], "1920x1080") + self.assertEqual(item["quality_floor"], {"metric": "vmaf", "target": 85.0, "minimum": 80.0}) + engine_keys = [engine["key"] for engine in item["engines"]] + self.assertEqual(engine_keys, ["ab-av1", "av1an", "xav", "auto-boost"]) + av1an = item["engines"][1] + self.assertIn("scene-aware-candidate", av1an["category"]) + self.assertIn("ssimulacra2", av1an["metric_support"]) + self.assertIn("ssimulacra2", av1an["command"]) + self.assertEqual(av1an["command_status"], "template-needs-host-validation") + self.assertIn("https://rust-av.github.io/Av1an/Features/TargetQuality", av1an["sources"]) + + def test_build_bakeoff_plan_can_limit_engines(self) -> None: + config = load_config(Path("config/defaults.toml")) + plan = build_bakeoff_plan(config, _manifest(), indexes=[0], engines=["av1an"]) + + self.assertEqual([engine["key"] for engine in plan["items"][0]["engines"]], ["av1an"]) + + def test_write_bakeoff_plan_creates_parent_dirs(self) -> None: + with tempfile.TemporaryDirectory() as tmp: + output_path = Path(tmp) / "nested" / "plan.json" + write_bakeoff_plan({"ok": True}, output_path) + + self.assertEqual(json.loads(output_path.read_text()), {"ok": True}) + + +if __name__ == "__main__": + unittest.main()