Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions packages/cli/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ dependencies = [
"pyodide-cli",
"pyjson5>=1.6.0",
"pyodide-py",
"wheel-optimizer",
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

All other dependencies specify version bounds. Consider pinning wheel-optimizer to avoid unexpected breakage on a new major release.

Suggested change
"wheel-optimizer",
"wheel-optimizer>=0.1.0,<1.0.0",

(Adjust the range to match the version you're currently developing against.)

"workers-runtime-sdk>=0.1.0",
]

Expand Down
106 changes: 106 additions & 0 deletions packages/cli/src/pywrangler/optimize.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
import logging
from pathlib import Path
from typing import TypedDict

from wheel_optimizer import OptimizerConfig, OptimizerPipeline

from .utils import read_pyproject_toml

# Note: When adding a new optimizer, make sure to update the following:
# - _ALL_OPTIMIZER_FIELDS
# - DEFAULT_ON_OPTIMIZERS
# - OptimizeConfig


class OptimizeConfig(TypedDict, total=False):
disable_all: bool
remove_docstrings: bool
remove_type_annotations: bool
remove_assertions: bool
remove_comments: bool
remove_tests: bool
remove_typestubs: bool
remove_pycache: bool
remove_c_source: bool
remove_cython_source: bool
minify_whitespace: bool
compile_pyc: bool


logger = logging.getLogger(__name__)
# Disable wheel_optimizer logging, we have our own logging
logging.getLogger("wheel_optimizer").setLevel(logging.CRITICAL)

DEFAULT_ON_OPTIMIZERS: frozenset[str] = frozenset(
{
"remove_docstrings",
"remove_pycache",
"remove_comments",
"minify_whitespace",
}
)

_ALL_OPTIMIZER_FIELDS: frozenset[str] = frozenset(
{
"remove_docstrings",
"remove_type_annotations",
"remove_assertions",
"remove_comments",
"remove_tests",
"remove_typestubs",
"remove_pycache",
"remove_c_source",
"remove_cython_source",
"minify_whitespace",
"compile_pyc",
}
)


def _read_optimize_section() -> OptimizeConfig:
data = read_pyproject_toml()
tool = data.get("tool", {})
pywrangler = tool.get("pywrangler", {}) if isinstance(tool, dict) else {}
optimize = pywrangler.get("optimize", {}) if isinstance(pywrangler, dict) else {}
result: OptimizeConfig = {}
if isinstance(optimize, dict):
result.update(optimize) # type: ignore[typeddict-item]
return result


def get_optimize_config() -> OptimizerConfig:
user_config = _read_optimize_section()

if user_config.get("disable_all", False):
return OptimizerConfig(disable_all=True)

kwargs: dict[str, bool] = {}
for field in _ALL_OPTIMIZER_FIELDS:
user_value = user_config.get(field)
if user_value is not None:
kwargs[field] = bool(user_value)
else:
kwargs[field] = field in DEFAULT_ON_OPTIMIZERS

return OptimizerConfig(**kwargs)


def optimize_packages(vendor_path: Path) -> None:
config = get_optimize_config()

if config.disable_all:
logger.debug("Bundle optimization disabled via disable_all = true")
return

pipeline = OptimizerPipeline(config)

if not pipeline.optimizers:
logger.debug("No optimizers enabled, skipping optimization")
return

names = [opt.name for opt in pipeline.optimizers]
logger.info(
f"Optimizing vendor packages ({', '.join(names)})...",
)
pipeline.run(vendor_path)
logger.debug("Bundle optimization complete.")
4 changes: 3 additions & 1 deletion packages/cli/src/pywrangler/sync.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

import click

from .optimize import optimize_packages
from .utils import (
check_uv_version,
check_wrangler_config,
Expand Down Expand Up @@ -224,7 +225,8 @@ def _install_requirements_to_vendor(requirements: list[str]) -> str | None:
shutil.rmtree(vendor_path)
shutil.copytree(pyodide_site_packages, vendor_path)

# Create a pyvenv.cfg file in python_modules to mark it as a virtual environment
optimize_packages(vendor_path)

(vendor_path / "pyvenv.cfg").touch()
get_vendor_token_path().touch()

Expand Down
3 changes: 2 additions & 1 deletion packages/cli/src/pywrangler/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -205,8 +205,9 @@ class PyProjectProject(TypedDict):
dependencies: list[str]


class PyProject(TypedDict):
class PyProject(TypedDict, total=False):
project: PyProjectProject
tool: dict[str, object]


def read_pyproject_toml() -> PyProject:
Expand Down
241 changes: 241 additions & 0 deletions packages/cli/tests/test_optimize.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,241 @@
import re
import shutil
import subprocess
from dataclasses import fields
from pathlib import Path
from textwrap import dedent

import pytest
from wheel_optimizer import OptimizerConfig

from pywrangler.optimize import (
_ALL_OPTIMIZER_FIELDS,
DEFAULT_ON_OPTIMIZERS,
get_optimize_config,
optimize_packages,
)

SAMPLE_PY = dedent('''\
"""Module docstring."""


def hello():
"""Function docstring."""
# This is a comment
x = 1
return x
''')

SAMPLE_PY_WITH_TYPES = dedent("""\
def add(a: int, b: int) -> int:
return a + b
""")


@pytest.fixture()
def vendor_dir(tmp_path: Path) -> Path:
pkg = tmp_path / "mypkg"
pkg.mkdir()
(pkg / "__init__.py").write_text(SAMPLE_PY)
(pkg / "typed.py").write_text(SAMPLE_PY_WITH_TYPES)
(pkg / "__pycache__").mkdir()
(pkg / "__pycache__" / "foo.cpython-312.pyc").write_bytes(b"fake")
return tmp_path


def _mock_pyproject(monkeypatch, optimize_section=None):
toml_data: dict = {"project": {"dependencies": []}}
if optimize_section is not None:
toml_data["tool"] = {"pywrangler": {"optimize": optimize_section}}
monkeypatch.setattr("pywrangler.optimize.read_pyproject_toml", lambda: toml_data)


class TestGetOptimizeConfig:
def test_defaults_when_no_config_section(self, monkeypatch):
_mock_pyproject(monkeypatch)
config = get_optimize_config()

for field in _ALL_OPTIMIZER_FIELDS:
expected = field in DEFAULT_ON_OPTIMIZERS
assert getattr(config, field) is expected, (
f"{field}: expected {expected}, got {getattr(config, field)}"
)
assert config.disable_all is False

def test_user_can_disable_default_on_optimizer(self, monkeypatch):
_mock_pyproject(monkeypatch, {"remove_docstrings": False})
config = get_optimize_config()

assert config.remove_docstrings is False
assert config.remove_pycache is True
assert config.remove_comments is True
assert config.minify_whitespace is True

def test_user_can_enable_opt_in_optimizer(self, monkeypatch):
_mock_pyproject(monkeypatch, {"remove_type_annotations": True})
config = get_optimize_config()

assert config.remove_type_annotations is True
for field in DEFAULT_ON_OPTIMIZERS:
assert getattr(config, field) is True

def test_disable_all_overrides_everything(self, monkeypatch):
_mock_pyproject(
monkeypatch,
{"disable_all": True, "remove_docstrings": True},
)
config = get_optimize_config()
assert config.disable_all is True

def test_all_fields_accounted_for(self):
dataclass_fields = {
f.name for f in fields(OptimizerConfig) if f.name != "disable_all"
}
assert _ALL_OPTIMIZER_FIELDS == dataclass_fields


class TestOptimizeVendor:
def test_default_removes_docstrings_and_comments(self, monkeypatch, vendor_dir):
_mock_pyproject(monkeypatch)
optimize_packages(vendor_dir)

result = (vendor_dir / "mypkg" / "__init__.py").read_text()
assert '"""Module docstring."""' not in result
assert '"""Function docstring."""' not in result
assert "# This is a comment" not in result

def test_default_removes_pycache(self, monkeypatch, vendor_dir):
_mock_pyproject(monkeypatch)
pyc = vendor_dir / "mypkg" / "__pycache__" / "foo.cpython-312.pyc"
assert pyc.exists()

optimize_packages(vendor_dir)
assert not pyc.exists()

def test_default_minifies_whitespace(self, monkeypatch, vendor_dir):
four_space = " x = 1\n"
src = (vendor_dir / "mypkg" / "__init__.py").read_text()
assert four_space in src

_mock_pyproject(monkeypatch)
optimize_packages(vendor_dir)

result = (vendor_dir / "mypkg" / "__init__.py").read_text()
assert four_space not in result

def test_default_does_not_remove_type_annotations(self, monkeypatch, vendor_dir):
_mock_pyproject(monkeypatch)
optimize_packages(vendor_dir)

result = (vendor_dir / "mypkg" / "typed.py").read_text()
assert "int" in result

def test_opt_in_removes_type_annotations(self, monkeypatch, vendor_dir):
_mock_pyproject(monkeypatch, {"remove_type_annotations": True})
optimize_packages(vendor_dir)

result = (vendor_dir / "mypkg" / "typed.py").read_text()
assert ": int" not in result
assert "-> int" not in result

def test_disable_all_skips_everything(self, monkeypatch, vendor_dir):
_mock_pyproject(monkeypatch, {"disable_all": True})
optimize_packages(vendor_dir)

result = (vendor_dir / "mypkg" / "__init__.py").read_text()
assert '"""Module docstring."""' in result
assert "# This is a comment" in result
pyc = vendor_dir / "mypkg" / "__pycache__" / "foo.cpython-312.pyc"
assert pyc.exists()

def test_all_defaults_off_skips_everything(self, monkeypatch, vendor_dir):
all_off = dict.fromkeys(_ALL_OPTIMIZER_FIELDS, False)
_mock_pyproject(monkeypatch, all_off)
optimize_packages(vendor_dir)

result = (vendor_dir / "mypkg" / "__init__.py").read_text()
assert '"""Module docstring."""' in result


@pytest.fixture()
def integration_dir():
workspace = Path(__file__).parent / "test_workspace_optimize"
shutil.rmtree(workspace, ignore_errors=True)
(workspace / "src").mkdir(parents=True)
try:
yield workspace.absolute()
finally:
shutil.rmtree(workspace, ignore_errors=True)


def _write_pyproject(
test_dir: Path,
dependencies: list[str],
optimize_section: dict[str, bool] | None = None,
) -> None:
deps_str = ", ".join(f'"{d}"' for d in dependencies)
content = dedent(f"""\
[build-system]
requires = ["setuptools>=61.0"]
build-backend = "setuptools.build_meta"

[project]
name = "test-project"
version = "0.1.0"
requires-python = ">=3.12"
dependencies = [{deps_str}]
""")
if optimize_section is not None:
content += "\n[tool.pywrangler.optimize]\n"
for key, val in optimize_section.items():
content += f"{key} = {str(val).lower()}\n"
(test_dir / "pyproject.toml").write_text(content)


def _write_wrangler_jsonc(test_dir: Path) -> None:
content = dedent("""\
{
"name": "test-worker",
"main": "src/worker.py",
"compatibility_date": "2026-03-20",
"compatibility_flags": ["python_workers"]
}
""")
(test_dir / "wrangler.jsonc").write_text(content)


def test_sync_applies_default_optimizations(integration_dir):
_write_pyproject(integration_dir, ["six"])
_write_wrangler_jsonc(integration_dir)

result = subprocess.run(
["uv", "run", "pywrangler", "sync"],
capture_output=True,
text=True,
cwd=integration_dir,
check=False,
)
assert result.returncode == 0, f"sync failed:\n{result.stdout}\n{result.stderr}"

vendor = integration_dir / "python_modules"
assert vendor.exists()

min_file_size = 100
py_files = [
f
for f in vendor.rglob("*.py")
if f.stat().st_size > min_file_size and f.name != "pyvenv.cfg"
]
content = py_files[0].read_text()
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If no .py file exceeds 100 bytes (unlikely with six, but possible if the package layout changes), this line raises an IndexError instead of a clear test failure. Adding a guard assertion gives a better diagnostic.

Suggested change
content = py_files[0].read_text()
assert py_files, "No .py files larger than 100 bytes found in vendor directory"
content = py_files[0].read_text()


# minify_whitespace: original 4-space indentation becomes 1-space.
# 1-space-indented lines are impossible in unminified source, so their
# presence proves the optimizer ran.
assert re.search(r"^ \S", content, re.MULTILINE), (
f"Expected 1-space indentation from minify_whitespace in {py_files[0].name}"
)

# remove_docstrings: file should not start with a triple-quoted string.
assert not content.lstrip().startswith(('"""', "'''")), (
f"Module docstring still present in {py_files[0].name}"
)
Loading
Loading