diff --git a/crates/deny.toml b/crates/deny.toml index 6f45bc63c8..1b126b7581 100644 --- a/crates/deny.toml +++ b/crates/deny.toml @@ -18,6 +18,16 @@ ignore = [ "RUSTSEC-2025-0090", # unic-emoji-char "RUSTSEC-2025-0098", # unic-ucd-version "RUSTSEC-2025-0100", # unic-ucd-ident + + # pyo3 vulnerabilities fixed in 0.29.0, but we are pinned to 0.27 because our + # transitive deps numpy (<=0.28) and pyo3-async-runtimes (<=0.28) only support + # pyo3 ^0.28 and have no pyo3 0.29-compatible release yet. Neither vulnerable + # code path is reachable in coglet: we never call nth/nth_back on PyList/PyTuple + # iterators, never use PyCFunction::new_closure, and run GIL-bound (abi3-py310, + # no free-threading). Remove these once numpy/pyo3-async-runtimes ship pyo3 0.29 + # support and we can upgrade. + "RUSTSEC-2026-0176", # Out-of-bounds read in nth/nth_back for PyList/PyTuple iterators + "RUSTSEC-2026-0177", # Missing Sync bound on PyCFunction::new_closure closures ] [licenses] diff --git a/examples/blur/README.md b/examples/blur/README.md new file mode 100644 index 0000000000..2a4ec3001e --- /dev/null +++ b/examples/blur/README.md @@ -0,0 +1,15 @@ +# Blur + +This model applies box blur to an input image. + +## Usage + +First, make sure you've got the [latest version of Cog](https://github.com/replicate/cog#install) installed. + +Run predictions on the model: + +```sh +cog predict -i image=@examples/kodim24.png -i blur=4 + +cog predict -i image=@examples/kodim24.png -i blur=6 +``` diff --git a/examples/blur/cog.yaml b/examples/blur/cog.yaml new file mode 100644 index 0000000000..82c2b4349c --- /dev/null +++ b/examples/blur/cog.yaml @@ -0,0 +1,4 @@ +build: + python_version: "3.12" + python_requirements: requirements.txt +run: "run.py:Runner" diff --git a/examples/blur/examples/kodim24-blur.png b/examples/blur/examples/kodim24-blur.png new file mode 100755 index 0000000000..b73b22f0c8 Binary files /dev/null and b/examples/blur/examples/kodim24-blur.png differ diff --git a/examples/blur/examples/kodim24.png b/examples/blur/examples/kodim24.png new file mode 100644 index 0000000000..85bd64fb8a Binary files /dev/null and b/examples/blur/examples/kodim24.png differ diff --git a/examples/blur/requirements.txt b/examples/blur/requirements.txt new file mode 100644 index 0000000000..2564f9b628 --- /dev/null +++ b/examples/blur/requirements.txt @@ -0,0 +1 @@ +pillow==12.1.1 diff --git a/examples/blur/run.py b/examples/blur/run.py new file mode 100644 index 0000000000..cc7008f697 --- /dev/null +++ b/examples/blur/run.py @@ -0,0 +1,20 @@ +import tempfile + +from PIL import Image, ImageFilter + +from cog import BaseRunner, Input, Path + + +class Runner(BaseRunner): + def run( + self, + image: Path = Input(description="Input image"), + blur: float = Input(description="Blur radius", default=5), + ) -> Path: + if blur == 0: + return image + im = Image.open(str(image)) + im = im.filter(ImageFilter.BoxBlur(blur)) + out_path = Path(tempfile.mkdtemp()) / "out.png" + im.save(str(out_path)) + return out_path diff --git a/examples/canary/README.md b/examples/canary/README.md new file mode 100644 index 0000000000..9b9f6784b3 --- /dev/null +++ b/examples/canary/README.md @@ -0,0 +1,15 @@ +# Canary + +This simple model takes a string as input and returns a streaming string output. + +## Usage + +First, make sure you've got the [latest version of Cog](https://github.com/replicate/cog#install) installed. + +Run predictions on the model: + +```sh +cog predict -i text=Athena + +cog predict -i text=Zeus +``` diff --git a/examples/canary/cog.yaml b/examples/canary/cog.yaml new file mode 100644 index 0000000000..e19c4aea46 --- /dev/null +++ b/examples/canary/cog.yaml @@ -0,0 +1,3 @@ +build: + python_version: "3.12" +run: "run.py:Runner" diff --git a/examples/canary/run.py b/examples/canary/run.py new file mode 100644 index 0000000000..b75f4a57a3 --- /dev/null +++ b/examples/canary/run.py @@ -0,0 +1,10 @@ +from cog import BaseRunner, ConcatenateIterator, Input + + +class Runner(BaseRunner): + def run( + self, text: str = Input(description="Text to prefix with 'hello there, '") + ) -> ConcatenateIterator[str]: + yield "hello " + yield "there, " + yield text diff --git a/examples/resnet/.dockerignore b/examples/experimental/resnet-managed-weights/.dockerignore similarity index 100% rename from examples/resnet/.dockerignore rename to examples/experimental/resnet-managed-weights/.dockerignore diff --git a/examples/resnet/.gitignore b/examples/experimental/resnet-managed-weights/.gitignore similarity index 100% rename from examples/resnet/.gitignore rename to examples/experimental/resnet-managed-weights/.gitignore diff --git a/examples/experimental/resnet-managed-weights/README.md b/examples/experimental/resnet-managed-weights/README.md new file mode 100644 index 0000000000..aa762768ec --- /dev/null +++ b/examples/experimental/resnet-managed-weights/README.md @@ -0,0 +1,144 @@ +# examples/experimental/resnet-managed-weights + +ResNet50 image classifier (microsoft/resnet-50 from HuggingFace) packaged +with v1 managed weights. Takes an image, returns top-3 ImageNet classes. + +Use this as a starting point for packaging a real model with managed weights. + +## What are managed weights? + +Managed weights separate your model weights from your model image. Instead of +baking multi-GB weight files into the Docker image (slow builds, huge layers), +cog packs them into dedicated OCI layers that get mounted at runtime. + +The key idea: your `run.py` reads weights from a path like +`/src/weights/resnet50`, but those files don't live inside the Docker image -- +they arrive separately and get overlaid at that path when the container starts. + +## File layout + +``` +examples/experimental/resnet-managed-weights/ +├── cog.yaml # model config -- declares weights, build settings +├── run.py # runner -- loads weights from target path +├── requirements.txt # python deps +├── weights.lock # generated by `cog weights import` -- don't hand-edit +├── .dockerignore # keeps local weight dirs out of the Docker build context +├── .gitignore # keeps local weight dirs and .cog/ out of git +├── hotdog.png # test image +└── cat.png # test image +``` + +Weight files themselves don't live in the project directory. `cog weights import` +downloads them into a content-addressed store at `~/.cache/cog/weights/` (override +with `$COG_CACHE_DIR`). When you run `cog run`, cog assembles a temporary +directory under `.cog/mounts/` using hardlinks from the store and bind-mounts it +into the container at the `target` path. The mount dir is cleaned up when the +container stops. + +## How `cog.yaml` works + +```yaml +weights: + - name: resnet50 + source: + uri: hf://microsoft/resnet-50 # where to fetch from + exclude: # files to skip + - "pytorch_model.bin" + - "flax_model.msgpack" + - "tf_model.h5" + - "README.md" + - ".gitattributes" + target: /src/weights/resnet50 # where files appear in the container +``` + +**`name`** -- an identifier for this weight set. Used in lockfile entries and +OCI tags. Pick something short and descriptive. + +**`source.uri`** -- where the weights come from. Two formats: + +- `hf:///` -- pulls from HuggingFace Hub +- A local directory path (e.g. `weights/`) -- uses files already on disk + +**`source.exclude`** -- glob patterns for files to skip. Most HF repos ship +weights in multiple formats (PyTorch, TF, Flax, ONNX). Exclude the ones you +don't need -- it'll save gigabytes. + +**`target`** -- the absolute path where weight files land inside the container. +Your `run.py` loads from this path. Must start with `/`. + +## Getting started + +### 1. Import weights + +This downloads weight files from HuggingFace into the local cache and +generates `weights.lock`: + +```sh +cd examples/experimental/resnet-managed-weights +cog weights import +``` + +The lockfile records digests and sizes for every file. It's how cog knows +whether weights have changed on subsequent imports. Commit `weights.lock` +to version control. + +### 2. Run a prediction locally + +```sh +cog run -i image=@hotdog.png +``` + +Locally, cog assembles the weight files from the cache and bind-mounts them +into the container at the `target` path. You don't need to push anything to test. + +### 3. Build and push + +```sh +cog push +``` + +This builds the model image and pushes it as the model named by `model:` +in `cog.yaml`, alongside the weight layers as an OCI image index. The weights +and model image are separate artifacts in the registry -- the image index ties +them together. + +## Important: `.dockerignore` + +The `.dockerignore` excludes `weights/` and `.cog/weights-cache/` from the +Docker build context. This matters if you're using local directory weight +sources -- without it, Docker would send the full weight directory to the +build daemon on every `cog build`. + +## Adapting this for your own model + +1. Copy this directory as a starting point +2. Edit `cog.yaml`: + - Change `source.uri` to your HuggingFace repo (or a local path) + - Adjust `exclude` patterns for the formats you don't need + - Set `target` to wherever your code expects to find the weights + - Set `model` to your model name (required for `cog push`) +3. Edit `run.py` to load your model from `WEIGHTS_DIR` +4. Update `requirements.txt` with your dependencies +5. Run `cog weights import` to fetch weights and generate the lockfile +6. Test with `cog run` +7. Push with `cog push` + +### Using local weights instead of HuggingFace + +If you already have weights on disk (downloaded separately, trained locally, +etc.), point the source at a local directory: + +```yaml +weights: + - name: my-model + source: + uri: my-weights-dir/ + include: + - "*.safetensors" + - "*.json" + target: /src/weights/my-model +``` + +Then run `cog weights import` as usual -- it'll hash the local files and +generate the lockfile. diff --git a/examples/experimental/resnet-managed-weights/cat.png b/examples/experimental/resnet-managed-weights/cat.png new file mode 100644 index 0000000000..15296784ac Binary files /dev/null and b/examples/experimental/resnet-managed-weights/cat.png differ diff --git a/examples/experimental/resnet-managed-weights/cog.yaml b/examples/experimental/resnet-managed-weights/cog.yaml new file mode 100644 index 0000000000..7637b05a7f --- /dev/null +++ b/examples/experimental/resnet-managed-weights/cog.yaml @@ -0,0 +1,28 @@ +# ResNet50 image classifier using v1 managed weights. +# +# Weights are pulled from HuggingFace at import time: +# cog weights import +# +# Build and push: +# cog push + +model: resnet-managed-weights + +build: + gpu: true + python_version: "3.13" + python_requirements: requirements.txt + +run: "run.py:Runner" + +weights: + - name: resnet50 + source: + uri: hf://microsoft/resnet-50 + exclude: + - "pytorch_model.bin" # legacy format, redundant with model.safetensors + - "flax_model.msgpack" # Flax/JAX weights + - "tf_model.h5" # TensorFlow weights + - "README.md" + - ".gitattributes" + target: /src/weights/resnet50 diff --git a/examples/experimental/resnet-managed-weights/hotdog.png b/examples/experimental/resnet-managed-weights/hotdog.png new file mode 100644 index 0000000000..16a2719693 Binary files /dev/null and b/examples/experimental/resnet-managed-weights/hotdog.png differ diff --git a/examples/experimental/resnet-managed-weights/requirements.txt b/examples/experimental/resnet-managed-weights/requirements.txt new file mode 100644 index 0000000000..72eea68af8 --- /dev/null +++ b/examples/experimental/resnet-managed-weights/requirements.txt @@ -0,0 +1,3 @@ +pillow==12.1.1 +torch==2.8.0 +transformers==4.52.3 diff --git a/examples/experimental/resnet-managed-weights/run.py b/examples/experimental/resnet-managed-weights/run.py new file mode 100644 index 0000000000..5d86d0edfc --- /dev/null +++ b/examples/experimental/resnet-managed-weights/run.py @@ -0,0 +1,27 @@ +import torch +from PIL import Image +from transformers import AutoImageProcessor, ResNetForImageClassification + +from cog import BaseRunner, Input, Path + +WEIGHTS_DIR = "/src/weights/resnet50" + + +class Runner(BaseRunner): + def setup(self) -> None: + self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + self.processor = AutoImageProcessor.from_pretrained(WEIGHTS_DIR) + self.model = ResNetForImageClassification.from_pretrained(WEIGHTS_DIR) + self.model = self.model.to(self.device) + self.model.eval() + + def run(self, image: Path = Input(description="Image to classify")) -> dict: + img = Image.open(image).convert("RGB") + inputs = self.processor(img, return_tensors="pt").to(self.device) + + with torch.no_grad(): + logits = self.model(**inputs).logits + + top3 = logits[0].softmax(0).topk(3) + labels = self.model.config.id2label + return {labels[i.item()]: p.item() for p, i in zip(*top3, strict=True)} diff --git a/examples/resnet/weights.lock b/examples/experimental/resnet-managed-weights/weights.lock similarity index 81% rename from examples/resnet/weights.lock rename to examples/experimental/resnet-managed-weights/weights.lock index 3c5f63730c..18f9a6e837 100644 --- a/examples/resnet/weights.lock +++ b/examples/experimental/resnet-managed-weights/weights.lock @@ -5,19 +5,21 @@ { "name": "resnet50", "target": "/src/weights/resnet50", - "source": { - "uri": "hf://microsoft/resnet-50", - "fingerprint": "commit:34c2154c194f829b11125337b98c8f5f9965ff19", - "include": [], - "exclude": [ - ".gitattributes", - "README.md", - "flax_model.msgpack", - "pytorch_model.bin", - "tf_model.h5" - ], - "importedAt": "2026-04-30T21:49:23.515142Z" - }, + "sources": [ + { + "uri": "hf://microsoft/resnet-50", + "fingerprint": "commit:34c2154c194f829b11125337b98c8f5f9965ff19", + "include": [], + "exclude": [ + ".gitattributes", + "README.md", + "flax_model.msgpack", + "pytorch_model.bin", + "tf_model.h5" + ] + } + ], + "importedAt": "2026-06-11T20:45:55.187706Z", "digest": "sha256:d2daafad96409df82d69df3c92192d2e651344f579a12683a59e4a6140a5abf5", "setDigest": "sha256:52924993c7eff45d5d1deaecf1f375d774c30faa1b4ce61379f5d552fd376744", "size": 102552676, diff --git a/examples/hello-concurrency/.dockerignore b/examples/hello-concurrency/.dockerignore new file mode 100644 index 0000000000..ed2db796de --- /dev/null +++ b/examples/hello-concurrency/.dockerignore @@ -0,0 +1,18 @@ +# The .dockerignore file excludes files from the container build process. +# +# https://docs.docker.com/engine/reference/builder/#dockerignore-file + +# Exclude Git files +.git +.github +.gitignore + +# Exclude Python cache files +__pycache__ +.mypy_cache +.pytest_cache +.ruff_cache + +# Exclude Python virtual environment +.venv +venv diff --git a/examples/hello-concurrency/.gitignore b/examples/hello-concurrency/.gitignore new file mode 100644 index 0000000000..a1a01e70cf --- /dev/null +++ b/examples/hello-concurrency/.gitignore @@ -0,0 +1,2 @@ +.venv +honeycomb_token.key diff --git a/examples/hello-concurrency/README.md b/examples/hello-concurrency/README.md new file mode 100644 index 0000000000..a3e85c8552 --- /dev/null +++ b/examples/hello-concurrency/README.md @@ -0,0 +1,26 @@ +# hello-concurrency + +This is an example Cog project that demonstrates the newly added concurrency support within +cog >= 0.14.0. + +The key piece is the new `concurrency` field in the cog.yaml. + +```yaml +concurrency: + max: 4 +``` + +This combined with the async setup and run methods in `run.py` allows Cog to run up to +4 concurrent predictions. If Cog reaches the max concurrency threshold it will reject subsequent +predictions with a `409 Conflict` response. + +### Telemetry + +It also uses the open-telemetry package to demonstrate how to collect telemetry for your model. + +This requires a file named `honeycomb_token.key` to be included in the image build. + +It will then start sending events to the `cog-model` data source. You can configure this by +editing the `OTEL_SERVICE_NAME`. If you use a custom endpoint this can be configured via `OTEL_EXPORTER_OTLP_ENDPOINT`. + +Lastly, there is a section in `run.py` that can be uncommented to run telemetry locally and print events to the console for debugging. diff --git a/examples/hello-concurrency/cog.yaml b/examples/hello-concurrency/cog.yaml new file mode 100644 index 0000000000..9be8201142 --- /dev/null +++ b/examples/hello-concurrency/cog.yaml @@ -0,0 +1,9 @@ +# Configuration for Cog ⚙️ +# Reference: https://github.com/replicate/cog/blob/main/docs/yaml.md +build: + gpu: false + python_version: "3.12" + python_requirements: requirements.txt +run: "run.py:Runner" +concurrency: + max: 4 diff --git a/examples/hello-concurrency/requirements.txt b/examples/hello-concurrency/requirements.txt new file mode 100644 index 0000000000..e0cce909ed --- /dev/null +++ b/examples/hello-concurrency/requirements.txt @@ -0,0 +1,3 @@ +opentelemetry-api +opentelemetry-sdk +opentelemetry-exporter-otlp-proto-http diff --git a/examples/hello-concurrency/run.py b/examples/hello-concurrency/run.py new file mode 100644 index 0000000000..090f5d562e --- /dev/null +++ b/examples/hello-concurrency/run.py @@ -0,0 +1,121 @@ +# Prediction interface for Cog ⚙️ +# https://github.com/replicate/cog/blob/main/docs/python.md + +import asyncio +import logging +import os +import time + +from opentelemetry import trace +from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter +from opentelemetry.sdk.resources import Resource +from opentelemetry.sdk.trace import TracerProvider +from opentelemetry.sdk.trace.export import ( + BatchSpanProcessor, +) + +from cog import ( + AsyncConcatenateIterator, + BaseRunner, + Input, + __version__, + current_scope, +) + +logging.basicConfig( + format="%(asctime)s %(levelname)-8s %(message)s", + level=logging.INFO, + datefmt="%Y-%m-%d %H:%M:%S", +) + +honeycomb_token = "" +try: + with open("./honeycomb_token.key", "r") as f: + honeycomb_token = f.read().strip() +except FileNotFoundError: + logging.info("honeycomb_token.key not found; OTEL will be disabled") + +if not honeycomb_token: + os.environ["OTEL_SDK_DISABLED"] = "true" + +os.environ["OTEL_EXPORTER_OTLP_ENDPOINT"] = "https://api.honeycomb.io/" +os.environ["OTEL_EXPORTER_OTLP_HEADERS"] = f"x-honeycomb-team={honeycomb_token}" +os.environ["OTEL_SERVICE_NAME"] = "cog-model" + +resource = Resource( + attributes={"model.name": "replicate/hello-concurrency", "cog_version": __version__} +) +provider = TracerProvider(resource=resource) +provider.add_span_processor(BatchSpanProcessor(OTLPSpanExporter())) +trace.set_tracer_provider(provider) +tracer = trace.get_tracer("predict") + +# Local OTEL debugging +# from opentelemetry.sdk.trace.export import ConsoleSpanExporter, SimpleSpanProcessor + +# os.environ["OTEL_EXPORTER_OTLP_ENDPOINT"] = http://otel-collector.local-otel.orb.local:4318 +# os.environ["OTEL_SDK_DISABLED"] = "" +# provider.add_span_processor(SimpleSpanProcessor(ConsoleSpanExporter())) + + +class Runner(BaseRunner): + async def setup(self) -> None: + with tracer.start_as_current_span("setup") as span: + self._setup_context = span.get_span_context() + + start_time = time.time() + logging.info(f"starting setup: cog_version={__version__}") + + time.sleep(1) + + duration = time.time() - start_time + logging.info(f"completed setup in {duration} seconds") + span.set_attribute("model.setup_time_seconds", duration) + + async def run( # pyright: ignore + self, + total: int = Input(default=5), + interval: int = Input(default=3), + ) -> AsyncConcatenateIterator[str]: # pyright: ignore + links = [] + if setup_context := getattr(self, "_setup_context", None): + links.append(trace.Link(setup_context)) + + with tracer.start_as_current_span("predict", links=links) as span: + span.set_attribute("inputs.total", total) + span.set_attribute("inputs.interval", interval) + + start_time = time.time() + logging.info( + f"starting prediction: cog_version={__version__} total={total} interval={interval}" + ) + + """Run a single prediction on the model""" + fruits = [ + "Apple", + "Banana", + "Orange", + "Grape", + "Strawberry", + "Mango", + "Pineapple", + "Blueberry", + "Watermelon", + "Peach", + ][:total] + + for index, fruit in enumerate(fruits): + if index + 1 == total: + yield f"{fruit}" + else: + yield f"{fruit}\n" + logging.info(f"output fruit: {fruit}") + await asyncio.sleep(interval) + + logging.info(f"emit_metric: output_tokens={total}") + current_scope().record_metric("output_tokens", total) + span.set_attribute("metrics.output_tokens", total) + + duration = time.time() - start_time + logging.info(f"completed prediction in {duration} seconds") + span.set_attribute("model.predict_time_seconds", duration) diff --git a/examples/hello-context/.dockerignore b/examples/hello-context/.dockerignore new file mode 100644 index 0000000000..1d4c71fdac --- /dev/null +++ b/examples/hello-context/.dockerignore @@ -0,0 +1,20 @@ +# The .dockerignore file excludes files from the container build process. +# +# https://docs.docker.com/engine/reference/builder/#dockerignore-file + +# Exclude Git files +**/.git +**/.github +**/.gitignore + +# Exclude Python tooling +.python-version + +# Exclude Python cache files +__pycache__ +.mypy_cache +.pytest_cache +.ruff_cache + +# Exclude Python virtual environment +/venv diff --git a/examples/hello-context/README.md b/examples/hello-context/README.md new file mode 100644 index 0000000000..ea76bbfc8d --- /dev/null +++ b/examples/hello-context/README.md @@ -0,0 +1,3 @@ +## hello-context + +A simple model that echoes the `text` input and any prediction context in the output. diff --git a/examples/hello-context/cog.yaml b/examples/hello-context/cog.yaml new file mode 100644 index 0000000000..fb6f97203f --- /dev/null +++ b/examples/hello-context/cog.yaml @@ -0,0 +1,28 @@ +# Configuration for Cog ⚙️ +# Reference: https://cog.run/yaml + +build: + # set to true if your model requires a GPU + gpu: false + + # a list of ubuntu apt packages to install + # system_packages: + # - "libgl1-mesa-glx" + # - "libglib2.0-0" + + # python version in the form '3.11' or '3.11.4' + python_version: "3.11" + + # path to a Python requirements.txt file + python_requirements: requirements.txt + + # enable fast boots + fast: false + + # commands run after the environment is setup + # run: + # - "echo env is ready!" + # - "echo another command if needed" + +# run.py defines how predictions are run on your model +run: "run.py:run" diff --git a/examples/hello-context/requirements.txt b/examples/hello-context/requirements.txt new file mode 100644 index 0000000000..ea75294519 --- /dev/null +++ b/examples/hello-context/requirements.txt @@ -0,0 +1,23 @@ +# This is a normal Python requirements.txt file. + +# You can add dependencies directly from PyPI: +# +# numpy==1.26.4 +# torch==2.2.1 +# torchvision==0.17.1 + + +# You can also add Git repos as dependencies, but you'll need to add git to the system_packages list in cog.yaml: +# +# build: +# system_packages: +# - "git" +# +# Then you can use a URL like this: +# +# git+https://github.com/huggingface/transformers + + +# You can also pin Git repos to a specific commit: +# +# git+https://github.com/huggingface/transformers@2d1602a diff --git a/examples/hello-context/run.py b/examples/hello-context/run.py new file mode 100644 index 0000000000..4f10524b02 --- /dev/null +++ b/examples/hello-context/run.py @@ -0,0 +1,11 @@ +import warnings + +from cog import ExperimentalFeatureWarning, Input, current_scope + +warnings.filterwarnings(action="ignore", category=ExperimentalFeatureWarning) + + +def run( + text: str = Input(description="Example text input"), +) -> dict[str, dict[str, str]]: + return {"inputs": {"text": text}, "context": current_scope().context} diff --git a/examples/hello-image/README.md b/examples/hello-image/README.md new file mode 100644 index 0000000000..366960336f --- /dev/null +++ b/examples/hello-image/README.md @@ -0,0 +1,13 @@ +# Hello image + +This simple model takes no input and returns a static image. + +## Usage + +First, make sure you've got the [latest version of Cog](https://github.com/replicate/cog#install) installed. + +Run predictions on the model: + +```sh +cog predict +``` diff --git a/examples/hello-image/cog.yaml b/examples/hello-image/cog.yaml new file mode 100644 index 0000000000..e19c4aea46 --- /dev/null +++ b/examples/hello-image/cog.yaml @@ -0,0 +1,3 @@ +build: + python_version: "3.12" +run: "run.py:Runner" diff --git a/examples/hello-image/hello.webp b/examples/hello-image/hello.webp new file mode 100644 index 0000000000..df83a4d4f4 Binary files /dev/null and b/examples/hello-image/hello.webp differ diff --git a/examples/hello-image/run.py b/examples/hello-image/run.py new file mode 100644 index 0000000000..71e2659874 --- /dev/null +++ b/examples/hello-image/run.py @@ -0,0 +1,6 @@ +from cog import BaseRunner, Path + + +class Runner(BaseRunner): + def run(self) -> Path: + return Path("hello.webp") diff --git a/examples/hello-train/README.md b/examples/hello-train/README.md new file mode 100644 index 0000000000..d8b5225137 --- /dev/null +++ b/examples/hello-train/README.md @@ -0,0 +1,17 @@ +# Hello, Train 🚂 + +This example demonstrates how to define a training interface in Cog using the `train:` field in `cog.yaml`. + +The training API allows you to define a fine-tuning interface for an existing Cog model, so users of the model can bring their own training data to create derivative fine-tuned models. Real-world examples of this API in use include [fine-tuning SDXL with images](https://replicate.com/blog/fine-tune-sdxl) or [fine-tuning Llama 2 with structured text](https://replicate.com/blog/fine-tune-llama-2). + +This simple trainable model takes a string as input and returns a string as output. + +**Note:** The `cog train` CLI command is deprecated. Training is still supported via the Replicate API and the `train:` field in `cog.yaml`. + +## Usage + +Run predictions with: + +```console +cog predict -i text=world +``` diff --git a/examples/hello-train/cog.yaml b/examples/hello-train/cog.yaml new file mode 100644 index 0000000000..4107278f62 --- /dev/null +++ b/examples/hello-train/cog.yaml @@ -0,0 +1,4 @@ +build: + python_version: "3.12" +run: "run.py:Runner" +train: "train.py:train" diff --git a/examples/hello-train/run.py b/examples/hello-train/run.py new file mode 100644 index 0000000000..cdac7ef666 --- /dev/null +++ b/examples/hello-train/run.py @@ -0,0 +1,18 @@ +from typing import Optional + +import requests + +from cog import BaseRunner, Input, Path + + +class Runner(BaseRunner): + def setup(self, weights: Optional[Path] = None) -> None: + if weights: + self.prefix = requests.get(weights).text + else: + self.prefix = "hello" + + def run( + self, text: str = Input(description="Text to prefix with 'hello ' or weights") + ) -> str: + return self.prefix + " " + text diff --git a/examples/hello-train/train.py b/examples/hello-train/train.py new file mode 100644 index 0000000000..598a8896f9 --- /dev/null +++ b/examples/hello-train/train.py @@ -0,0 +1,15 @@ +from cog import BaseModel, Input, Path + + +class TrainingOutput(BaseModel): + weights: Path + + +def train( + prefix: str = Input(description="data you wish to save"), +) -> TrainingOutput: + weights = Path("output.txt") + with open(weights, "w") as f: + f.write(prefix) + + return TrainingOutput(weights=weights) diff --git a/examples/hello-world/README.md b/examples/hello-world/README.md new file mode 100644 index 0000000000..0991ae3ff2 --- /dev/null +++ b/examples/hello-world/README.md @@ -0,0 +1,15 @@ +# Hello World + +This simple model takes a string as input and returns a string as output. + +## Usage + +First, make sure you've got the [latest version of Cog](https://github.com/replicate/cog#install) installed. + +Run predictions on the model: + +```sh +cog predict -i text=Athena + +cog predict -i text=Zeus +``` diff --git a/examples/hello-world/cog.yaml b/examples/hello-world/cog.yaml new file mode 100644 index 0000000000..e19c4aea46 --- /dev/null +++ b/examples/hello-world/cog.yaml @@ -0,0 +1,3 @@ +build: + python_version: "3.12" +run: "run.py:Runner" diff --git a/examples/hello-world/run.py b/examples/hello-world/run.py new file mode 100644 index 0000000000..36c920b541 --- /dev/null +++ b/examples/hello-world/run.py @@ -0,0 +1,9 @@ +from cog import BaseRunner, Input + + +class Runner(BaseRunner): + def setup(self) -> None: + self.prefix = "hello" + + def run(self, text: str = Input(description="Text to prefix with 'hello '")) -> str: + return self.prefix + " " + text diff --git a/examples/managed-weights/cog.yaml b/examples/managed-weights/cog.yaml index cfbde00fa6..506959595b 100644 --- a/examples/managed-weights/cog.yaml +++ b/examples/managed-weights/cog.yaml @@ -1,4 +1,4 @@ -# Example model for testing the v1 managed-weights OCI artifact format. +# Test fixture for the v1 managed-weights OCI pipeline. # # The weights/ directory is populated by a human (see README.md) with # nvidia/parakeet-tdt-0.6b-v3 from HuggingFace. It's listed in .gitignore @@ -10,7 +10,7 @@ # Build and push the full bundle: # cog push -image: /managed-weights +model: managed-weights build: gpu: false diff --git a/examples/managed-weights/weights.lock b/examples/managed-weights/weights.lock index a61dda3245..9931b2c3bb 100644 --- a/examples/managed-weights/weights.lock +++ b/examples/managed-weights/weights.lock @@ -5,32 +5,34 @@ { "name": "parakeet", "target": "/src/weights/parakeet", - "source": { - "uri": "file://./weights", - "fingerprint": "sha256:7ebdcfebeca9a959621601570f1beec20729f23ec721694f2432939789cfad86", - "include": [ - "*.json", - "*.safetensors" - ], - "exclude": [], - "importedAt": "2026-04-29T17:23:00.414483Z" - }, - "digest": "sha256:f2c88df678c449d8fca7def7afd5d9c17bb8963096108a51fd8fb9a92bf6fc8a", - "setDigest": "sha256:05933849eadc5067660cf0d164375fc9a80cf11a2be19a663017af62f5284bf2", - "size": 2509473151, - "sizeCompressed": 2508595098, + "sources": [ + { + "uri": "file://./weights", + "fingerprint": "sha256:818d763a4ae969c4b9876338d3f294c44bcd1a9182a713d787a83f5624b65a7d", + "include": [ + "*.json", + "*.safetensors" + ], + "exclude": [] + } + ], + "importedAt": "2026-06-12T16:41:25.807184Z", + "digest": "sha256:c8d627863b85a8fa8441d1bd54b74be116812030e310c08707455227b8196e65", + "setDigest": "sha256:4ebfc8927425c01119080b2e2ca1cf7d57ca9a598109ae6d17b99b9a4ad42792", + "size": 2509473204, + "sizeCompressed": 2508595123, "files": [ { "path": "config.json", "size": 1153, "digest": "sha256:e747b85e1bdfd300c8b8ac63bac8dd5221f8fe9bc275b48d06c735fcd6971b6e", - "layer": "sha256:5f33cdfadfaf568afd41ce78ac566bda102564454e8211f11370281e9ee9619a" + "layer": "sha256:c8410de988af73e8ace73b9f5b40c3280667296f120d8c37328580c2cf894055" }, { "path": "generation_config.json", - "size": 268, - "digest": "sha256:fc78f636b071231420356dbf70140a81a389132eb49bc81bcb2efdbe8293e7ad", - "layer": "sha256:5f33cdfadfaf568afd41ce78ac566bda102564454e8211f11370281e9ee9619a" + "size": 289, + "digest": "sha256:b141de6ec6d7f982ece13f98f604e3fe1807ea9c0e839185d0ab7064604209d0", + "layer": "sha256:c8410de988af73e8ace73b9f5b40c3280667296f120d8c37328580c2cf894055" }, { "path": "model.safetensors", @@ -42,56 +44,58 @@ "path": "processor_config.json", "size": 392, "digest": "sha256:8346a93a3b987fa1dec57a78f045cd0817d21786589a5a096b41a57a446fd1d7", - "layer": "sha256:5f33cdfadfaf568afd41ce78ac566bda102564454e8211f11370281e9ee9619a" + "layer": "sha256:c8410de988af73e8ace73b9f5b40c3280667296f120d8c37328580c2cf894055" }, { "path": "tokenizer.json", "size": 1159960, "digest": "sha256:bd321b096832a3f270bd3b2a88823957920f1a5c5ada71114a26ea729d0cbe91", - "layer": "sha256:5f33cdfadfaf568afd41ce78ac566bda102564454e8211f11370281e9ee9619a" + "layer": "sha256:c8410de988af73e8ace73b9f5b40c3280667296f120d8c37328580c2cf894055" }, { "path": "tokenizer_config.json", - "size": 258, - "digest": "sha256:5e04ae3487a5533224c295622cb206cc6e53914be527503978ce81f2cc75c559", - "layer": "sha256:5f33cdfadfaf568afd41ce78ac566bda102564454e8211f11370281e9ee9619a" + "size": 290, + "digest": "sha256:0b2fe0037599ee335f0b972fa682bf0ece74e4ccfec755cb7daa3405d3d3e874", + "layer": "sha256:c8410de988af73e8ace73b9f5b40c3280667296f120d8c37328580c2cf894055" } ], "layers": [ - { - "digest": "sha256:5f33cdfadfaf568afd41ce78ac566bda102564454e8211f11370281e9ee9619a", - "mediaType": "application/vnd.oci.image.layer.v1.tar+gzip", - "size": 280986, - "sizeUncompressed": 1162031 - }, { "digest": "sha256:65f11b0713429c604eb159d02a1c805815d599414e10c1c295e1937472db3a2f", "mediaType": "application/vnd.oci.image.layer.v1.tar", "size": 2508314112, "sizeUncompressed": 2508311120 + }, + { + "digest": "sha256:c8410de988af73e8ace73b9f5b40c3280667296f120d8c37328580c2cf894055", + "mediaType": "application/vnd.oci.image.layer.v1.tar+gzip", + "size": 281011, + "sizeUncompressed": 1162084 } ] }, { "name": "minilm", "target": "/src/weights/minilm", - "source": { - "uri": "hf://sentence-transformers/all-MiniLM-L6-v2", - "fingerprint": "commit:c9745ed1d9f207416be6d2e6f8de32d1f16199bf", - "include": [], - "exclude": [ - ".gitattributes", - "README.md", - "data_config.json", - "onnx/", - "openvino/", - "pytorch_model.bin", - "rust_model.ot", - "tf_model.h5", - "train_script.py" - ], - "importedAt": "2026-04-29T22:43:36.709798Z" - }, + "sources": [ + { + "uri": "hf://sentence-transformers/all-MiniLM-L6-v2", + "fingerprint": "commit:1110a243fdf4706b3f48f1d95db1a4f5529b4d41", + "include": [], + "exclude": [ + ".gitattributes", + "README.md", + "data_config.json", + "onnx/", + "openvino/", + "pytorch_model.bin", + "rust_model.ot", + "tf_model.h5", + "train_script.py" + ] + } + ], + "importedAt": "2026-06-11T20:44:00.402112Z", "digest": "sha256:6b15a4ac4f7e4dec39939043d32e7c51238516356f810b496735c50d0d7310be", "setDigest": "sha256:18074874da7fd77e7cf1fefcbb2fc7edee0c38417f2255962e8a4fff5567b1d2", "size": 91567913, diff --git a/examples/notebook/README.md b/examples/notebook/README.md new file mode 100644 index 0000000000..7413afad55 --- /dev/null +++ b/examples/notebook/README.md @@ -0,0 +1,31 @@ +# notebook + +A simple example using a Jupyter Notebook with Cog + +## Usage + +First, make sure you've got the [latest version of Cog](https://github.com/replicate/cog#install) installed. + +Run the Jupyter Notebook server with Cog: + +```sh +cog run -p 8888 --debug jupyter notebook --allow-root --ip=0.0.0.0 --NotebookApp.token=mytoken +``` + +Copy the notebook URL to your browser (you can change the `mytoken` to your preferred token or have it autogenerated): + +```sh +http://127.0.0.1:8888/?token=mytoken +``` + +Save any changes you make to your notebook, then export it as a Python script: + +```sh +jupyter nbconvert --to script my_notebook.ipynb # creates my_notebook.py +``` + +Now you can run predictions on the model: + +```sh +cog predict -i name=Alice +``` diff --git a/examples/notebook/cog.yaml b/examples/notebook/cog.yaml new file mode 100644 index 0000000000..663a1fa4a5 --- /dev/null +++ b/examples/notebook/cog.yaml @@ -0,0 +1,7 @@ +build: + gpu: false + python_packages: + - "jupyterlab==3.2.4" + python_version: "3.12" + +run: "run.py:Runner" diff --git a/examples/notebook/my_notebook.ipynb b/examples/notebook/my_notebook.ipynb new file mode 100644 index 0000000000..6d43bc74b0 --- /dev/null +++ b/examples/notebook/my_notebook.ipynb @@ -0,0 +1,36 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "4622e827", + "metadata": {}, + "outputs": [], + "source": [ + "def say_hello(name) -> str:\n", + " return f\"hello, {name}\"" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.12" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/examples/notebook/my_notebook.py b/examples/notebook/my_notebook.py new file mode 100644 index 0000000000..98bf3274e7 --- /dev/null +++ b/examples/notebook/my_notebook.py @@ -0,0 +1,8 @@ +#!/usr/bin/env python +# coding: utf-8 + +# In[ ]: + + +def say_hello(name) -> str: + return f"hello, {name}" diff --git a/examples/notebook/run.py b/examples/notebook/run.py new file mode 100644 index 0000000000..8a7e942b5c --- /dev/null +++ b/examples/notebook/run.py @@ -0,0 +1,14 @@ +import my_notebook + +from cog import BaseRunner, Input + + +class Runner(BaseRunner): + def setup(self) -> None: + """Prepare the model so multiple predictions run efficiently (optional)""" + + def run(self, name: str = Input(description="name of person to greet")) -> str: + """Run a single prediction""" + + output = my_notebook.say_hello(name) + return output diff --git a/examples/resnet/README.md b/examples/resnet/README.md index faf973b874..d86d3729bc 100644 --- a/examples/resnet/README.md +++ b/examples/resnet/README.md @@ -1,144 +1,19 @@ -# examples/resnet +# resnet -ResNet50 image classifier (microsoft/resnet-50 from HuggingFace) packaged -with v1 managed weights. Takes an image, returns top-3 ImageNet classes. +This model tells you what's in an image. It's configured as a GPU example in `cog.yaml`. -Use this as a starting point for packaging a real model with managed weights. +It uses ResNet50 with ImageNet weights from torchvision. Torchvision fetches and +caches the checkpoint the first time the model starts, so startup requires network +access unless the checkpoint is already cached. Takes an image, returns the top-3 +ImageNet classes. -## What are managed weights? +## Usage -Managed weights separate your model weights from your model image. Instead of -baking multi-GB weight files into the Docker image (slow builds, huge layers), -cog packs them into dedicated OCI layers that get mounted at runtime. +First, make sure you've got the [latest version of +Cog](https://github.com/replicate/cog#install) installed. -The key idea: your `run.py` reads weights from a path like -`/src/weights/resnet50`, but those files don't live inside the Docker image -- -they arrive separately and get overlaid at that path when the container starts. - -## File layout - -``` -examples/resnet/ -├── cog.yaml # model config -- declares weights, build settings -├── run.py # runner -- loads weights from target path -├── requirements.txt # python deps -├── weights.lock # generated by `cog weights import` -- don't hand-edit -├── .dockerignore # keeps local weight dirs out of the Docker build context -├── .gitignore # keeps local weight dirs and .cog/ out of git -├── hotdog.png # test image -└── cat.png # test image -``` - -Weight files themselves don't live in the project directory. `cog weights import` -downloads them into a content-addressed store at `~/.cache/cog/weights/` (override -with `$COG_CACHE_DIR`). When you run `cog run`, cog assembles a temporary -directory under `.cog/mounts/` using hardlinks from the store and bind-mounts it -into the container at the `target` path. The mount dir is cleaned up when the -container stops. - -## How `cog.yaml` works - -```yaml -weights: - - name: resnet50 - source: - uri: hf://microsoft/resnet-50 # where to fetch from - exclude: # files to skip - - "pytorch_model.bin" - - "flax_model.msgpack" - - "tf_model.h5" - - "README.md" - - ".gitattributes" - target: /src/weights/resnet50 # where files appear in the container -``` - -**`name`** -- an identifier for this weight set. Used in lockfile entries and -OCI tags. Pick something short and descriptive. - -**`source.uri`** -- where the weights come from. Two formats: - -- `hf:///` -- pulls from HuggingFace Hub -- A local directory path (e.g. `weights/`) -- uses files already on disk - -**`source.exclude`** -- glob patterns for files to skip. Most HF repos ship -weights in multiple formats (PyTorch, TF, Flax, ONNX). Exclude the ones you -don't need -- it'll save gigabytes. - -**`target`** -- the absolute path where weight files land inside the container. -Your `run.py` loads from this path. Must start with `/`. - -## Getting started - -### 1. Import weights - -This downloads weight files from HuggingFace into the local cache and -generates `weights.lock`: - -```sh -cd examples/resnet -cog weights import -``` - -The lockfile records digests and sizes for every file. It's how cog knows -whether weights have changed on subsequent imports. Commit `weights.lock` -to version control. - -### 2. Run a prediction locally +Then run predictions on the model: ```sh -cog run -i image=@hotdog.png -``` - -Locally, cog assembles the weight files from the cache and bind-mounts them -into the container at the `target` path. You don't need to push anything to test. - -### 3. Build and push - -```sh -cog push -``` - -This builds the model image and pushes it to the registry specified by `image:` -in `cog.yaml`, alongside the weight layers as an OCI image index. The weights -and model image are separate artifacts in the registry -- the image index ties -them together. - -## Important: `.dockerignore` - -The `.dockerignore` excludes `weights/` and `.cog/weights-cache/` from the -Docker build context. This matters if you're using local directory weight -sources -- without it, Docker would send the full weight directory to the -build daemon on every `cog build`. - -## Adapting this for your own model - -1. Copy this directory as a starting point -2. Edit `cog.yaml`: - - Change `source.uri` to your HuggingFace repo (or a local path) - - Adjust `exclude` patterns for the formats you don't need - - Set `target` to wherever your code expects to find the weights - - Set `image` to your registry destination (required for `cog push`) -3. Edit `run.py` to load your model from `WEIGHTS_DIR` -4. Update `requirements.txt` with your dependencies -5. Run `cog weights import` to fetch weights and generate the lockfile -6. Test with `cog run` -7. Push with `cog push` - -### Using local weights instead of HuggingFace - -If you already have weights on disk (downloaded separately, trained locally, -etc.), point the source at a local directory: - -```yaml -weights: - - name: my-model - source: - uri: my-weights-dir/ - include: - - "*.safetensors" - - "*.json" - target: /src/weights/my-model +cog predict -i image=@hotdog.png ``` - -Then run `cog weights import` as usual -- it'll hash the local files and -generate the lockfile. diff --git a/examples/resnet/cog.yaml b/examples/resnet/cog.yaml index f2fa21b3aa..d410af492e 100644 --- a/examples/resnet/cog.yaml +++ b/examples/resnet/cog.yaml @@ -1,28 +1,14 @@ -# ResNet50 image classifier using v1 managed weights. +# ResNet50 image classifier using torchvision's bundled weights. # -# Weights are pulled from HuggingFace at import time: -# cog weights import +# Weights ship with torchvision and download at setup time, so there's +# nothing to import -- just build and run. # # Build and push: # cog push -image: /resnet - build: gpu: true - python_version: "3.13" + python_version: "3.12" python_requirements: requirements.txt run: "run.py:Runner" - -weights: - - name: resnet50 - source: - uri: hf://microsoft/resnet-50 - exclude: - - "pytorch_model.bin" # legacy format, redundant with model.safetensors - - "flax_model.msgpack" # Flax/JAX weights - - "tf_model.h5" # TensorFlow weights - - "README.md" - - ".gitattributes" - target: /src/weights/resnet50 diff --git a/examples/resnet/requirements.txt b/examples/resnet/requirements.txt index 72eea68af8..f9c8d1ef75 100644 --- a/examples/resnet/requirements.txt +++ b/examples/resnet/requirements.txt @@ -1,3 +1,3 @@ pillow==12.1.1 torch==2.8.0 -transformers==4.52.3 +torchvision==0.23.0 diff --git a/examples/resnet/run.py b/examples/resnet/run.py index 5d86d0edfc..1f3f83f797 100644 --- a/examples/resnet/run.py +++ b/examples/resnet/run.py @@ -1,27 +1,29 @@ +import os + +os.environ["TORCH_HOME"] = "." + import torch from PIL import Image -from transformers import AutoImageProcessor, ResNetForImageClassification +from torchvision import models from cog import BaseRunner, Input, Path -WEIGHTS_DIR = "/src/weights/resnet50" +WEIGHTS = models.ResNet50_Weights.IMAGENET1K_V1 class Runner(BaseRunner): def setup(self) -> None: self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") - self.processor = AutoImageProcessor.from_pretrained(WEIGHTS_DIR) - self.model = ResNetForImageClassification.from_pretrained(WEIGHTS_DIR) - self.model = self.model.to(self.device) + self.model = models.resnet50(weights=WEIGHTS).to(self.device) self.model.eval() def run(self, image: Path = Input(description="Image to classify")) -> dict: img = Image.open(image).convert("RGB") - inputs = self.processor(img, return_tensors="pt").to(self.device) + inputs = WEIGHTS.transforms()(img).unsqueeze(0).to(self.device) with torch.no_grad(): - logits = self.model(**inputs).logits + preds = self.model(inputs) - top3 = logits[0].softmax(0).topk(3) - labels = self.model.config.id2label - return {labels[i.item()]: p.item() for p, i in zip(*top3, strict=True)} + top3 = preds[0].softmax(0).topk(3) + categories = WEIGHTS.meta["categories"] + return {categories[i]: p.item() for p, i in zip(*top3, strict=True)} diff --git a/examples/streaming-text/cog.yaml b/examples/streaming-text/cog.yaml index 68866b7aeb..d8e89c1cce 100644 --- a/examples/streaming-text/cog.yaml +++ b/examples/streaming-text/cog.yaml @@ -4,4 +4,4 @@ build: python_version: "3.12" python_requirements: requirements.txt -predict: "predict.py:Predictor" +run: "run.py:Runner" diff --git a/examples/streaming-text/predict.py b/examples/streaming-text/run.py similarity index 98% rename from examples/streaming-text/predict.py rename to examples/streaming-text/run.py index c94ddb04f3..b51bee3332 100644 --- a/examples/streaming-text/predict.py +++ b/examples/streaming-text/run.py @@ -9,7 +9,7 @@ MODEL_NAME = "HuggingFaceTB/SmolLM2-135M-Instruct" -class Predictor(BaseRunner): +class Runner(BaseRunner): def setup(self) -> None: self.device = "cuda" if torch.cuda.is_available() else "cpu" dtype = torch.float16 if self.device == "cuda" else torch.float32 diff --git a/examples/z-image-turbo/.dockerignore b/examples/z-image-turbo/.dockerignore new file mode 100644 index 0000000000..16d3c4dbbf --- /dev/null +++ b/examples/z-image-turbo/.dockerignore @@ -0,0 +1 @@ +.cache diff --git a/examples/z-image-turbo/README.md b/examples/z-image-turbo/README.md new file mode 100644 index 0000000000..8ab9bfb03e --- /dev/null +++ b/examples/z-image-turbo/README.md @@ -0,0 +1,5 @@ +# Z-Image-Turbo + +A simple version of [Z-Image-Turbo](https://huggingface.co/Tongyi-MAI/Z-Image-Turbo) to demonstrate how to use Cog for generative models. + + $ cog predict -i prompt="a cat with a hat" diff --git a/examples/z-image-turbo/cog.yaml b/examples/z-image-turbo/cog.yaml new file mode 100644 index 0000000000..4bf7e969a4 --- /dev/null +++ b/examples/z-image-turbo/cog.yaml @@ -0,0 +1,5 @@ +build: + gpu: true + python_version: "3.13" + python_requirements: "requirements.txt" +run: "run.py:Runner" \ No newline at end of file diff --git a/examples/z-image-turbo/requirements.txt b/examples/z-image-turbo/requirements.txt new file mode 100644 index 0000000000..beb415596e --- /dev/null +++ b/examples/z-image-turbo/requirements.txt @@ -0,0 +1,3 @@ +diffusers==0.36.0 +transformers==4.57.3 +torch==2.8.0 diff --git a/examples/z-image-turbo/run.py b/examples/z-image-turbo/run.py new file mode 100644 index 0000000000..15d52f2ab0 --- /dev/null +++ b/examples/z-image-turbo/run.py @@ -0,0 +1,36 @@ +import os + +os.environ["HF_HUB_CACHE"] = "./.cache" +os.environ["HF_XET_HIGH_PERFORMANCE"] = "1" + + +import tempfile + +import torch +from diffusers import ZImagePipeline + +from cog import BaseRunner, Path + + +class Runner(BaseRunner): + def setup(self) -> None: + self.model = ZImagePipeline.from_pretrained( + "Tongyi-MAI/Z-Image-Turbo", + torch_dtype=torch.bfloat16, + low_cpu_mem_usage=False, + ) + self.model.to("cuda") + + def run(self, prompt: str) -> Path: + image = self.model( + prompt=prompt, + height=1024, + width=1024, + num_inference_steps=9, # This actually results in 8 DiT forwards + guidance_scale=0.0, # Guidance should be 0 for the Turbo models + generator=torch.Generator("cuda").manual_seed(42), + ).images[0] + with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as f: + output_path = Path(f.name) + image.save(output_path) + return output_path diff --git a/tools/test-harness/cmd/build.go b/tools/test-harness/cmd/build.go index 615b2464d9..1e02a7ca9f 100644 --- a/tools/test-harness/cmd/build.go +++ b/tools/test-harness/cmd/build.go @@ -3,6 +3,7 @@ package cmd import ( "context" "fmt" + "path/filepath" "github.com/spf13/cobra" @@ -27,7 +28,7 @@ func runBuild(ctx context.Context) error { return err } - _, models, resolved, err := resolveSetup() + _, models, resolved, mfPath, err := resolveSetup() if err != nil { return err } @@ -49,6 +50,7 @@ func runBuild(ctx context.Context) error { CogBinary: resolved.CogBinary, SDKVersion: resolved.SDKPatchVersion, SDKWheel: resolved.SDKWheel, + ManifestDir: filepath.Dir(mfPath), CleanImages: cleanImages, KeepOutputs: keepOutputs, Parallel: parallel, diff --git a/tools/test-harness/cmd/list.go b/tools/test-harness/cmd/list.go index 6aa3d8b687..737d5827e6 100644 --- a/tools/test-harness/cmd/list.go +++ b/tools/test-harness/cmd/list.go @@ -40,7 +40,7 @@ func runList() error { envTag = fmt.Sprintf(" (requires: %s)", strings.Join(m.RequiresEnv, ", ")) } - fmt.Printf(" %-25s %s/%s%s%s\n", m.Name, m.Repo, m.Path, gpuTag, envTag) + fmt.Printf(" %-25s %s%s%s\n", m.Name, m.Source(), gpuTag, envTag) } fmt.Printf("\n%d model(s) total\n", len(mf.Models)) diff --git a/tools/test-harness/cmd/root.go b/tools/test-harness/cmd/root.go index 114f5a77c9..af9440521c 100644 --- a/tools/test-harness/cmd/root.go +++ b/tools/test-harness/cmd/root.go @@ -62,10 +62,10 @@ It reads the same manifest.yaml format as the Python version.`, } // resolveSetup loads the manifest, resolves versions, and filters models. -func resolveSetup() (*manifest.Manifest, []manifest.Model, *resolver.Result, error) { +func resolveSetup() (*manifest.Manifest, []manifest.Model, *resolver.Result, string, error) { mf, mfPath, err := manifest.Load(manifestPath) if err != nil { - return nil, nil, nil, fmt.Errorf("loading manifest: %w", err) + return nil, nil, nil, "", fmt.Errorf("loading manifest: %w", err) } fmt.Printf("Loaded manifest: %s\n", mfPath) @@ -75,12 +75,12 @@ func resolveSetup() (*manifest.Manifest, []manifest.Model, *resolver.Result, err "cog_version": mf.Defaults.CogVersion, }) if err != nil { - return nil, nil, nil, fmt.Errorf("resolving versions: %w", err) + return nil, nil, nil, "", fmt.Errorf("resolving versions: %w", err) } fmt.Printf("Using cog CLI: %s (%s)\n", resolved.CogBinary, resolved.CogVersion) models := mf.FilterModels(modelFilter, noGPU, gpuOnly) - return mf, models, resolved, nil + return mf, models, resolved, mfPath, nil } // validateConcurrency checks that the concurrency flag is a valid value. diff --git a/tools/test-harness/cmd/run.go b/tools/test-harness/cmd/run.go index c7167d9779..20ea23ebc9 100644 --- a/tools/test-harness/cmd/run.go +++ b/tools/test-harness/cmd/run.go @@ -4,6 +4,7 @@ import ( "context" "fmt" "os" + "path/filepath" "github.com/spf13/cobra" @@ -40,7 +41,7 @@ func runRun(ctx context.Context, outputFormat, outputFile string) error { return err } - _, models, resolved, err := resolveSetup() + _, models, resolved, mfPath, err := resolveSetup() if err != nil { return err } @@ -68,6 +69,7 @@ func runRun(ctx context.Context, outputFormat, outputFile string) error { CogBinary: resolved.CogBinary, SDKVersion: resolved.SDKPatchVersion, SDKWheel: resolved.SDKWheel, + ManifestDir: filepath.Dir(mfPath), CleanImages: cleanImages, KeepOutputs: keepOutputs, Parallel: parallel, diff --git a/tools/test-harness/fixtures/models/complex-output/cog.yaml b/tools/test-harness/fixtures/models/complex-output/cog.yaml index defc03d8cf..e19c4aea46 100644 --- a/tools/test-harness/fixtures/models/complex-output/cog.yaml +++ b/tools/test-harness/fixtures/models/complex-output/cog.yaml @@ -1,3 +1,3 @@ build: python_version: "3.12" -predict: "predict.py:Predictor" +run: "run.py:Runner" diff --git a/tools/test-harness/fixtures/models/complex-output/predict.py b/tools/test-harness/fixtures/models/complex-output/run.py similarity index 71% rename from tools/test-harness/fixtures/models/complex-output/predict.py rename to tools/test-harness/fixtures/models/complex-output/run.py index 34d932d491..1c9439c858 100644 --- a/tools/test-harness/fixtures/models/complex-output/predict.py +++ b/tools/test-harness/fixtures/models/complex-output/run.py @@ -1,4 +1,4 @@ -from cog import BaseModel, BasePredictor, Input +from cog import BaseModel, BaseRunner, Input class Output(BaseModel): @@ -7,8 +7,8 @@ class Output(BaseModel): tags: list[str] -class Predictor(BasePredictor): - def predict( +class Runner(BaseRunner): + def run( self, prompt: str = Input(description="Input prompt"), ) -> Output: diff --git a/tools/test-harness/fixtures/models/constraints-and-choices/cog.yaml b/tools/test-harness/fixtures/models/constraints-and-choices/cog.yaml index defc03d8cf..e19c4aea46 100644 --- a/tools/test-harness/fixtures/models/constraints-and-choices/cog.yaml +++ b/tools/test-harness/fixtures/models/constraints-and-choices/cog.yaml @@ -1,3 +1,3 @@ build: python_version: "3.12" -predict: "predict.py:Predictor" +run: "run.py:Runner" diff --git a/tools/test-harness/fixtures/models/constraints-and-choices/predict.py b/tools/test-harness/fixtures/models/constraints-and-choices/run.py similarity index 88% rename from tools/test-harness/fixtures/models/constraints-and-choices/predict.py rename to tools/test-harness/fixtures/models/constraints-and-choices/run.py index 14cb867c46..57cd4a13c6 100644 --- a/tools/test-harness/fixtures/models/constraints-and-choices/predict.py +++ b/tools/test-harness/fixtures/models/constraints-and-choices/run.py @@ -1,8 +1,8 @@ -from cog import BasePredictor, Input +from cog import BaseRunner, Input -class Predictor(BasePredictor): - def predict( +class Runner(BaseRunner): + def run( self, prompt: str = Input(description="The prompt", default="hello"), temperature: float = Input( diff --git a/tools/test-harness/fixtures/models/file-path-types/cog.yaml b/tools/test-harness/fixtures/models/file-path-types/cog.yaml index defc03d8cf..e19c4aea46 100644 --- a/tools/test-harness/fixtures/models/file-path-types/cog.yaml +++ b/tools/test-harness/fixtures/models/file-path-types/cog.yaml @@ -1,3 +1,3 @@ build: python_version: "3.12" -predict: "predict.py:Predictor" +run: "run.py:Runner" diff --git a/tools/test-harness/fixtures/models/file-path-types/predict.py b/tools/test-harness/fixtures/models/file-path-types/run.py similarity index 79% rename from tools/test-harness/fixtures/models/file-path-types/predict.py rename to tools/test-harness/fixtures/models/file-path-types/run.py index 10baedfc9b..e88b79b546 100644 --- a/tools/test-harness/fixtures/models/file-path-types/predict.py +++ b/tools/test-harness/fixtures/models/file-path-types/run.py @@ -1,8 +1,8 @@ -from cog import BasePredictor, File, Input, Path +from cog import BaseRunner, File, Input, Path -class Predictor(BasePredictor): - def predict( +class Runner(BaseRunner): + def run( self, image: Path = Input(description="An image path"), document: File = Input(description="A file upload"), diff --git a/tools/test-harness/fixtures/models/list-types/cog.yaml b/tools/test-harness/fixtures/models/list-types/cog.yaml index defc03d8cf..e19c4aea46 100644 --- a/tools/test-harness/fixtures/models/list-types/cog.yaml +++ b/tools/test-harness/fixtures/models/list-types/cog.yaml @@ -1,3 +1,3 @@ build: python_version: "3.12" -predict: "predict.py:Predictor" +run: "run.py:Runner" diff --git a/tools/test-harness/fixtures/models/list-types/predict.py b/tools/test-harness/fixtures/models/list-types/run.py similarity index 79% rename from tools/test-harness/fixtures/models/list-types/predict.py rename to tools/test-harness/fixtures/models/list-types/run.py index b6878a55ec..823f1fb57b 100644 --- a/tools/test-harness/fixtures/models/list-types/predict.py +++ b/tools/test-harness/fixtures/models/list-types/run.py @@ -1,10 +1,10 @@ from typing import List -from cog import BasePredictor, File, Input, Path +from cog import BaseRunner, File, Input, Path -class Predictor(BasePredictor): - def predict( +class Runner(BaseRunner): + def run( self, tags: list[str] = Input(description="List of strings"), numbers: List[int] = Input(description="List of ints"), diff --git a/tools/test-harness/fixtures/models/optional-list-types/cog.yaml b/tools/test-harness/fixtures/models/optional-list-types/cog.yaml index defc03d8cf..e19c4aea46 100644 --- a/tools/test-harness/fixtures/models/optional-list-types/cog.yaml +++ b/tools/test-harness/fixtures/models/optional-list-types/cog.yaml @@ -1,3 +1,3 @@ build: python_version: "3.12" -predict: "predict.py:Predictor" +run: "run.py:Runner" diff --git a/tools/test-harness/fixtures/models/optional-list-types/predict.py b/tools/test-harness/fixtures/models/optional-list-types/run.py similarity index 87% rename from tools/test-harness/fixtures/models/optional-list-types/predict.py rename to tools/test-harness/fixtures/models/optional-list-types/run.py index 2cdd60ed35..18e99eae4c 100644 --- a/tools/test-harness/fixtures/models/optional-list-types/predict.py +++ b/tools/test-harness/fixtures/models/optional-list-types/run.py @@ -1,10 +1,10 @@ from typing import List, Optional -from cog import BasePredictor, File, Input, Path +from cog import BaseRunner, File, Input, Path -class Predictor(BasePredictor): - def predict( +class Runner(BaseRunner): + def run( self, text: str = Input(description="Required anchor field"), # PEP 604 optional lists diff --git a/tools/test-harness/fixtures/models/optional-types/cog.yaml b/tools/test-harness/fixtures/models/optional-types/cog.yaml index defc03d8cf..e19c4aea46 100644 --- a/tools/test-harness/fixtures/models/optional-types/cog.yaml +++ b/tools/test-harness/fixtures/models/optional-types/cog.yaml @@ -1,3 +1,3 @@ build: python_version: "3.12" -predict: "predict.py:Predictor" +run: "run.py:Runner" diff --git a/tools/test-harness/fixtures/models/optional-types/predict.py b/tools/test-harness/fixtures/models/optional-types/run.py similarity index 87% rename from tools/test-harness/fixtures/models/optional-types/predict.py rename to tools/test-harness/fixtures/models/optional-types/run.py index c4a34122df..4647b66319 100644 --- a/tools/test-harness/fixtures/models/optional-types/predict.py +++ b/tools/test-harness/fixtures/models/optional-types/run.py @@ -1,10 +1,10 @@ from typing import Optional -from cog import BasePredictor, File, Input, Path +from cog import BaseRunner, File, Input, Path -class Predictor(BasePredictor): - def predict( +class Runner(BaseRunner): + def run( self, text: str = Input(description="Required string"), # PEP 604 style optionals diff --git a/tools/test-harness/fixtures/models/scalar-types/cog.yaml b/tools/test-harness/fixtures/models/scalar-types/cog.yaml index defc03d8cf..e19c4aea46 100644 --- a/tools/test-harness/fixtures/models/scalar-types/cog.yaml +++ b/tools/test-harness/fixtures/models/scalar-types/cog.yaml @@ -1,3 +1,3 @@ build: python_version: "3.12" -predict: "predict.py:Predictor" +run: "run.py:Runner" diff --git a/tools/test-harness/fixtures/models/scalar-types/predict.py b/tools/test-harness/fixtures/models/scalar-types/run.py similarity index 69% rename from tools/test-harness/fixtures/models/scalar-types/predict.py rename to tools/test-harness/fixtures/models/scalar-types/run.py index b6bcd3d4d5..2b5f20f569 100644 --- a/tools/test-harness/fixtures/models/scalar-types/predict.py +++ b/tools/test-harness/fixtures/models/scalar-types/run.py @@ -1,13 +1,12 @@ -from cog import BasePredictor, Input, Secret +from cog import BaseRunner, Input -class Predictor(BasePredictor): - def predict( +class Runner(BaseRunner): + def run( self, text: str = Input(description="A string input"), count: int = Input(description="An integer", default=5), temperature: float = Input(description="A float", default=0.7), flag: bool = Input(description="A boolean", default=True), - api_key: Secret = Input(description="A secret key"), ) -> str: return f"{text}-{count}-{temperature}-{flag}" diff --git a/tools/test-harness/internal/manifest/manifest.go b/tools/test-harness/internal/manifest/manifest.go index 8be4a3ef8e..905e403f16 100644 --- a/tools/test-harness/internal/manifest/manifest.go +++ b/tools/test-harness/internal/manifest/manifest.go @@ -26,6 +26,7 @@ type Model struct { Name string `yaml:"name"` Repo string `yaml:"repo"` Path string `yaml:"path"` + BaseDir string `yaml:"base_dir"` GPU bool `yaml:"gpu"` Timeout int `yaml:"timeout"` RequiresEnv []string `yaml:"requires_env"` @@ -73,8 +74,14 @@ func Load(explicitPath string) (*Manifest, string, error) { return nil, "", fmt.Errorf("parsing manifest: %w", err) } - // Apply default timeout + // Apply default timeout and validate model resolution fields. for i := range manifest.Models { + if manifest.Models[i].Repo == "local" { + return nil, "", fmt.Errorf( + "model %q: 'repo: local' is no longer supported -- use 'base_dir' for local models instead", + manifest.Models[i].Name, + ) + } if manifest.Models[i].Timeout == 0 { manifest.Models[i].Timeout = 300 } @@ -127,6 +134,19 @@ func resolvePath(explicitPath string) (string, error) { return "", fmt.Errorf("manifest not found: specify --manifest or run from project root") } +// Source returns a human-readable description of where the model is resolved +// from, mirroring the runner's resolution precedence (base_dir first, then repo). +func (m *Model) Source() string { + if m.BaseDir == "" && m.Repo != "" { + return fmt.Sprintf("%s/%s", m.Repo, m.Path) + } + base := m.BaseDir + if base == "" { + base = "fixtures/models" + } + return fmt.Sprintf("%s/%s", base, m.Path) +} + // GetModel returns a model by name func (m *Manifest) GetModel(name string) *Model { for i := range m.Models { diff --git a/tools/test-harness/internal/manifest/manifest_test.go b/tools/test-harness/internal/manifest/manifest_test.go new file mode 100644 index 0000000000..bacc684ddb --- /dev/null +++ b/tools/test-harness/internal/manifest/manifest_test.go @@ -0,0 +1,88 @@ +package manifest + +import ( + "os" + "path/filepath" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func writeManifest(t *testing.T, body string) string { + t.Helper() + dir := t.TempDir() + path := filepath.Join(dir, "manifest.yaml") + require.NoError(t, os.WriteFile(path, []byte(body), 0o644)) + return path +} + +func TestLoadRejectsRepoLocal(t *testing.T) { + path := writeManifest(t, ` +models: + - name: legacy + repo: local + path: legacy +`) + + _, _, err := Load(path) + require.Error(t, err) + assert.Contains(t, err.Error(), "'repo: local' is no longer supported") + assert.Contains(t, err.Error(), "legacy") +} + +func TestLoadAcceptsBaseDirAndRepo(t *testing.T) { + path := writeManifest(t, ` +models: + - name: example + base_dir: examples + path: hello-world + - name: remote + repo: replicate/cog-examples + path: hello-world + - name: fixture + base_dir: tools/test-harness/fixtures/models + path: scalar-types +`) + + mf, _, err := Load(path) + require.NoError(t, err) + require.Len(t, mf.Models, 3) + // Default timeout applied. + assert.Equal(t, 300, mf.Models[0].Timeout) +} + +func TestModelSource(t *testing.T) { + tests := []struct { + name string + model Model + want string + }{ + { + name: "base_dir local", + model: Model{BaseDir: "examples", Path: "hello-world"}, + want: "examples/hello-world", + }, + { + name: "repo clone", + model: Model{Repo: "replicate/cog-examples", Path: "hello-world"}, + want: "replicate/cog-examples/hello-world", + }, + { + name: "default fixtures", + model: Model{Path: "scalar-types"}, + want: "fixtures/models/scalar-types", + }, + { + name: "base_dir takes priority over repo", + model: Model{BaseDir: "examples", Repo: "replicate/cog-examples", Path: "hello-world"}, + want: "examples/hello-world", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + assert.Equal(t, tt.want, tt.model.Source()) + }) + } +} diff --git a/tools/test-harness/internal/runner/runner.go b/tools/test-harness/internal/runner/runner.go index 43c37de7ab..1361a8b9f7 100644 --- a/tools/test-harness/internal/runner/runner.go +++ b/tools/test-harness/internal/runner/runner.go @@ -119,6 +119,7 @@ type Options struct { SDKVersion string SDKWheel string FixturesDir string + ManifestDir string CleanImages bool KeepOutputs bool Parallel bool // Prefix output lines with model name (for parallel execution) @@ -129,6 +130,7 @@ type Options struct { type Runner struct { opts Options fixturesDir string + manifestDir string workDir string cloneGroup singleflight.Group // deduplicates concurrent clones of the same repo } @@ -165,6 +167,7 @@ func New(opts Options) (*Runner, error) { return &Runner{ opts: opts, fixturesDir: fixturesDir, + manifestDir: opts.ManifestDir, workDir: workDir, }, nil } @@ -360,36 +363,74 @@ func (r *Runner) BuildModel(ctx context.Context, model manifest.Model) *report.M return result } +// resolveLocalBaseDir returns the directory to resolve model.Path against +// for local models. A relative BaseDir is resolved against the repo root; an +// absolute BaseDir is used as-is. When BaseDir is empty it falls back to +// fixtures/models. +func (r *Runner) resolveLocalBaseDir(model manifest.Model) string { + if model.BaseDir == "" { + return filepath.Join(r.fixturesDir, "models") + } + if filepath.IsAbs(model.BaseDir) { + return model.BaseDir + } + return filepath.Join(r.repoRoot(), model.BaseDir) +} + +// repoRoot returns the repository root that relative base_dir values are +// resolved against. It walks up from manifestDir looking for a .git entry, +// falling back to manifestDir if none is found. +func (r *Runner) repoRoot() string { + if r.manifestDir == "" { + return "" + } + dir := r.manifestDir + for { + if _, err := os.Stat(filepath.Join(dir, ".git")); err == nil { + return dir + } + parent := filepath.Dir(dir) + if parent == dir { + return r.manifestDir + } + dir = parent + } +} + func (r *Runner) prepareModel(ctx context.Context, model manifest.Model) (string, error) { var modelDir string - // Local fixture models - if model.Repo == "local" { - fixturesModels := filepath.Join(r.fixturesDir, "models") - srcDir, err := safeSubpath(fixturesModels, model.Path) + // Resolution precedence: base_dir first, then repo. + // - base_dir set -> local only (never clones) + // - repo set, no base_dir -> clone from GitHub + // - neither set -> local under fixtures/models (fixtures) + if model.BaseDir == "" && model.Repo != "" { + // Clone repo (shared cache, thread-safe) + repoDir, err := r.cloneRepo(ctx, model.Repo) if err != nil { return "", err } - // Copy to work dir - dest := filepath.Join(r.workDir, fmt.Sprintf("local-%s", model.Name)) + // Each model gets its own copy so that setup commands (e.g. + // select.sh) don't clobber each other when running in parallel. + srcDir := filepath.Join(repoDir, model.Path) + dest := filepath.Join(r.workDir, fmt.Sprintf("model-%s", model.Name)) if err := copyDir(srcDir, dest); err != nil { - return "", fmt.Errorf("copying model: %w", err) + return "", fmt.Errorf("copying repo for model %s: %w", model.Name, err) } modelDir = dest } else { - // Clone repo (shared cache, thread-safe) - repoDir, err := r.cloneRepo(ctx, model.Repo) + // Local model: resolve model.Path under base_dir (or fixtures/models). + baseDir := r.resolveLocalBaseDir(model) + srcDir, err := safeSubpath(baseDir, model.Path) if err != nil { return "", err } - // Each model gets its own copy so that setup commands (e.g. - // select.sh) don't clobber each other when running in parallel. - srcDir := filepath.Join(repoDir, model.Path) - dest := filepath.Join(r.workDir, fmt.Sprintf("model-%s", model.Name)) + // Copy to work dir + dest := filepath.Join(r.workDir, fmt.Sprintf("local-%s", model.Name)) if err := copyDir(srcDir, dest); err != nil { - return "", fmt.Errorf("copying repo for model %s: %w", model.Name, err) + return "", fmt.Errorf("copying local model %s from %s: %w", model.Name, srcDir, err) } modelDir = dest } @@ -594,9 +635,13 @@ func (r *Runner) runCogTest(ctx context.Context, modelDir string, model manifest timeout = 300 } - // Build command — pass setup-timeout matching the model timeout so - // cog predict doesn't kill the container during model weight downloads. - args := []string{command, "--setup-timeout", fmt.Sprintf("%d", timeout)} + // Build command. Pass setup-timeout matching the model timeout so cog + // predict doesn't kill the container during model weight downloads. + // Only `cog predict` supports --setup-timeout; `cog train` does not. + args := []string{command} + if command == "predict" { + args = append(args, "--setup-timeout", fmt.Sprintf("%d", timeout)) + } keys := make([]string, 0, len(tc.Inputs)) for k := range tc.Inputs { keys = append(keys, k) diff --git a/tools/test-harness/internal/runner/runner_test.go b/tools/test-harness/internal/runner/runner_test.go index cdcc9f72c6..3711600e1f 100644 --- a/tools/test-harness/internal/runner/runner_test.go +++ b/tools/test-harness/internal/runner/runner_test.go @@ -1,13 +1,63 @@ package runner import ( + "os" "path/filepath" "testing" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" + + "github.com/replicate/cog/tools/test-harness/internal/manifest" ) +func TestResolveLocalBaseDir(t *testing.T) { + // Simulate a repo root (has .git) with the manifest nested under it. + repoRoot := t.TempDir() + require.NoError(t, os.Mkdir(filepath.Join(repoRoot, ".git"), 0o755)) + manifestDir := filepath.Join(repoRoot, "tools", "test-harness") + require.NoError(t, os.MkdirAll(manifestDir, 0o755)) + + r := &Runner{ + fixturesDir: "/fixtures", + manifestDir: manifestDir, + } + + tests := []struct { + name string + baseDir string + want string + }{ + { + name: "no base_dir defaults to fixtures/models", + baseDir: "", + want: filepath.Join("/fixtures", "models"), + }, + { + name: "relative base_dir resolves against repo root", + baseDir: "examples", + want: filepath.Join(repoRoot, "examples"), + }, + { + name: "nested relative base_dir resolves against repo root", + baseDir: "tools/test-harness/fixtures/models", + want: filepath.Join(repoRoot, "tools", "test-harness", "fixtures", "models"), + }, + { + name: "absolute base_dir is used as-is", + baseDir: "/abs/examples", + want: "/abs/examples", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := r.resolveLocalBaseDir(manifest.Model{BaseDir: tt.baseDir}) + assert.Equal(t, tt.want, got) + }) + } +} + func TestSafeSubpathAllowsPathInsideRoot(t *testing.T) { root := t.TempDir() inside := filepath.Join("models", "fixture-a") diff --git a/tools/test-harness/manifest.yaml b/tools/test-harness/manifest.yaml index ba7d7f9d0e..a4c36c8b7b 100644 --- a/tools/test-harness/manifest.yaml +++ b/tools/test-harness/manifest.yaml @@ -28,6 +28,14 @@ # install instructions for known tools (yq, envsubst). # Example: # requires_tools: ["yq"] +# +# Model resolution (base_dir takes priority over repo): +# - Set `base_dir` for local models -- `path` is resolved under it. A +# relative `base_dir` is resolved against the repo root; an absolute path +# is used as-is. Local only; never clones. +# - Set `repo: owner/name` (with no `base_dir`) to clone from GitHub. +# - Set neither to default to fixtures/models/ (local fixtures). +# `repo: local` is no longer supported -- use `base_dir` instead. defaults: sdk_version: "latest" # "latest" = newest stable from PyPI; or pin e.g. "0.16.12" @@ -37,7 +45,7 @@ models: # ── cog-examples (CPU) ────────────────────────────────────────────── - name: hello-world - repo: replicate/cog-examples + base_dir: examples path: hello-world gpu: false tests: @@ -49,7 +57,7 @@ models: value: "hello world" - name: canary - repo: replicate/cog-examples + base_dir: examples path: canary gpu: false tests: @@ -61,7 +69,7 @@ models: value: "friend" - name: blur - repo: replicate/cog-examples + base_dir: examples path: blur gpu: false tests: @@ -74,7 +82,7 @@ models: mime: "image/png" - name: hello-image - repo: replicate/cog-examples + base_dir: examples path: hello-image gpu: false tests: @@ -84,7 +92,7 @@ models: type: file_exists - name: hello-concurrency - repo: replicate/cog-examples + base_dir: examples path: hello-concurrency gpu: false tests: @@ -97,7 +105,7 @@ models: value: "Apple" - name: hello-context - repo: replicate/cog-examples + base_dir: examples path: hello-context gpu: false # NOTE: This model uses current_scope().context which may not be @@ -113,12 +121,9 @@ models: text: "testing" - name: hello-train - repo: replicate/cog-examples + base_dir: examples path: hello-train gpu: false - # NOTE: `cog train` in the RC may have input validation issues - # (validates against predict schema instead of train schema). - # The train_test below may fail — that's a real compatibility signal. train_tests: - description: "train produces weights file" inputs: @@ -133,10 +138,45 @@ models: type: contains value: "world" - # ── cog-examples (GPU required) ───────────────────────────────────── + - name: notebook + base_dir: examples + path: notebook + gpu: false + tests: + - description: "basic predict" + inputs: + name: "world" + expect: + type: contains + value: "hello" + + - name: managed-weights + base_dir: examples + path: managed-weights + gpu: false + requires_env: + - COG_MANAGED_WEIGHTS_READY + tests: + - description: "weight validation smoke test" + inputs: {} + expect: + type: not_empty + + - name: streaming-text + base_dir: examples + path: streaming-text + gpu: false + tests: + - description: "streaming text generation" + inputs: + prompt: "Hello" + expect: + type: not_empty + + # ── cog-examples (GPU) ────────────────────────────────────────────── - name: resnet - repo: replicate/cog-examples + base_dir: examples path: resnet gpu: true tests: @@ -147,7 +187,7 @@ models: type: json_keys - name: z-image-turbo - repo: replicate/cog-examples + base_dir: examples path: z-image-turbo gpu: true timeout: 600 @@ -159,40 +199,25 @@ models: type: file_exists mime: "image/png" - # ── cog-examples (requires external API, optional) ────────────────── - - - name: hello-replicate - repo: replicate/cog-examples - path: hello-replicate - gpu: false - requires_env: - - REPLICATE_API_TOKEN - tests: - - description: "round-trip through replicate API" - inputs: - image: "@test_image.png" - expect: - type: file_exists - # ── Local fixture models ───────────────────────────────────────────── - # These use repo: local and path: relative to fixtures/models/. - # They cover the full input type matrix for build and prediction testing. + # These set base_dir to the fixtures dir (relative to the repo root), so + # path is relative to it. They cover the full input type matrix for build + # and prediction testing. - name: fixture-scalar-types - repo: local + base_dir: tools/test-harness/fixtures/models path: scalar-types gpu: false tests: - description: "basic scalar inputs" inputs: text: "hello" - api_key: "secret123" expect: type: contains value: "hello" - name: fixture-optional-types - repo: local + base_dir: tools/test-harness/fixtures/models path: optional-types gpu: false tests: @@ -204,12 +229,12 @@ models: value: "hello" - name: fixture-list-types - repo: local + base_dir: tools/test-harness/fixtures/models path: list-types gpu: false - name: fixture-optional-list-types - repo: local + base_dir: tools/test-harness/fixtures/models path: optional-list-types gpu: false tests: @@ -221,7 +246,7 @@ models: value: "hello" - name: fixture-constraints-and-choices - repo: local + base_dir: tools/test-harness/fixtures/models path: constraints-and-choices gpu: false tests: @@ -232,12 +257,12 @@ models: value: "hello-0.7-50-balanced-1" - name: fixture-file-path-types - repo: local + base_dir: tools/test-harness/fixtures/models path: file-path-types gpu: false - name: fixture-complex-output - repo: local + base_dir: tools/test-harness/fixtures/models path: complex-output gpu: false tests: