From 443e70ff12513029dea17bbc768c656fcd2c01a1 Mon Sep 17 00:00:00 2001 From: jaylfc Date: Sun, 14 Jun 2026 12:19:39 +0100 Subject: [PATCH 01/11] fix(agent): wire generate_image -> canvas so art lands on the board The demo's image step was broken two ways: - generate_image b64-encoded the route's JSON response as if it were PNG bytes, so 'image_b64' was garbage. It now parses the JSON and returns 'image_ref' (the saved filename) + 'url' (the served path). No consumer used image_b64. - canvas_add_image took a 'file_id' that had to already be a project canvas file, but generate_image saves to the workspace -> the image never rendered. It now takes 'image_ref', copies the workspace PNG into the project's canvas files (projects_root//files/canvas/.png, where ImageShape renders it), then creates the element. Still ownership-checked; '.name' strips path parts. So the agent flow works end to end: generate_image -> canvas_add_image(image_ref) -> the cover art appears live on the ideas board. Manual + skill schema updated; project_tools/image_tool tests green (19). --- docs/agent-manual/09-os-control.md | 7 +++--- docs/taos-agent-manual.md | 7 +++--- tests/test_project_tools.py | 37 ++++++++++++++++++++++++------ tinyagentos/skills.py | 6 ++--- tinyagentos/tools/image_tool.py | 18 +++++++-------- tinyagentos/tools/project_tools.py | 35 ++++++++++++++++++++++++---- 6 files changed, 80 insertions(+), 30 deletions(-) diff --git a/docs/agent-manual/09-os-control.md b/docs/agent-manual/09-os-control.md index 69d955f0..f1780e9e 100644 --- a/docs/agent-manual/09-os-control.md +++ b/docs/agent-manual/09-os-control.md @@ -20,10 +20,11 @@ update the open Projects app in real time): Returns a `project_id` to use in the next calls. - **add_task** — add a to-do task to a project's board. Args: `project_id`, `title`. - **canvas_add_image** — place a generated image on a project's ideas board. Args: - `project_id`, `file_id` (from `generate_image`), optional `alt`. + `project_id`, `image_ref` (the `image_ref` returned by `generate_image`), optional `alt`. -A typical flow: open the Projects app, create_project, add a few tasks, generate -an image, then canvas_add_image it onto the board. +A typical flow: open the Projects app, create_project, add a few tasks, call +generate_image and keep its `image_ref`, then canvas_add_image(project_id, image_ref) +to drop it on the board. These drive the user's own desktop in their session. Use them to make your work visible: open the relevant app so the user can watch, then carry out the task with diff --git a/docs/taos-agent-manual.md b/docs/taos-agent-manual.md index 1dbfebb2..34d0506c 100644 --- a/docs/taos-agent-manual.md +++ b/docs/taos-agent-manual.md @@ -162,10 +162,11 @@ update the open Projects app in real time): Returns a `project_id` to use in the next calls. - **add_task** — add a to-do task to a project's board. Args: `project_id`, `title`. - **canvas_add_image** — place a generated image on a project's ideas board. Args: - `project_id`, `file_id` (from `generate_image`), optional `alt`. + `project_id`, `image_ref` (the `image_ref` returned by `generate_image`), optional `alt`. -A typical flow: open the Projects app, create_project, add a few tasks, generate -an image, then canvas_add_image it onto the board. +A typical flow: open the Projects app, create_project, add a few tasks, call +generate_image and keep its `image_ref`, then canvas_add_image(project_id, image_ref) +to drop it on the board. These drive the user's own desktop in their session. Use them to make your work visible: open the relevant app so the user can watch, then carry out the task with diff --git a/tests/test_project_tools.py b/tests/test_project_tools.py index e6397ab9..1fba1ac7 100644 --- a/tests/test_project_tools.py +++ b/tests/test_project_tools.py @@ -22,7 +22,7 @@ async def create_project(self, **kw): async def get_project(self, project_id): if project_id == "missing": return None - return {"id": project_id, "user_id": self._owner} + return {"id": project_id, "user_id": self._owner, "slug": "luna"} class _FakeTaskStore: @@ -43,18 +43,30 @@ async def add_element(self, **kw): return {"id": "el_1"} -def _req(user_id="user-1", owner="user-1", is_admin=False): +def _req(user_id="user-1", owner="user-1", is_admin=False, base=None): state = types.SimpleNamespace( project_store=_FakeProjectStore(owner=owner), project_task_store=_FakeTaskStore(), project_canvas_store=_FakeCanvasStore(), ) + if base is not None: + # config_path.parent is the data dir; projects live under projects_root. + state.config_path = str(base / "config.json") + state.projects_root = base / "projects" app = types.SimpleNamespace(state=state) return types.SimpleNamespace( app=app, state=types.SimpleNamespace(user_id=user_id, is_admin=is_admin) ) +def _seed_generated_image(base, name="img_cover.png"): + """Create a fake generated image where generate_image would have saved it.""" + d = base / "workspace" / "images" / "generated" + d.mkdir(parents=True, exist_ok=True) + (d / name).write_bytes(b"\x89PNG\r\n\x1a\n fake") + return name + + def test_slugify(): assert _slugify("Luna and the Lighthouse") == "luna-and-the-lighthouse" assert _slugify(" ") == "project" @@ -92,15 +104,26 @@ async def test_add_task_requires_fields(): @pytest.mark.asyncio -async def test_canvas_add_image(): - req = _req() - res = await execute_canvas_add_image({"project_id": "proj_1", "file_id": "img_cover", "alt": "cover"}, req) +async def test_canvas_add_image(tmp_path): + ref = _seed_generated_image(tmp_path) + req = _req(base=tmp_path) + res = await execute_canvas_add_image({"project_id": "proj_1", "image_ref": ref, "alt": "cover"}, req) assert res["ok"] and res["element_id"] == "el_1" + # the generated image was copied into the project's canvas files + canvas_dir = tmp_path / "projects" / "luna" / "files" / "canvas" + copied = list(canvas_dir.glob("*.png")) + assert len(copied) == 1 call = req.app.state.project_canvas_store.calls[0] - assert call["project_id"] == "proj_1" assert call["author_kind"] == "agent" and call["author_id"] == "user-1" el = call["element"] - assert el["kind"] == "image" and el["payload"]["file_id"] == "img_cover" + assert el["kind"] == "image" and el["payload"]["file_id"] == copied[0].name + + +@pytest.mark.asyncio +async def test_canvas_add_image_missing_file(tmp_path): + req = _req(base=tmp_path) + res = await execute_canvas_add_image({"project_id": "proj_1", "image_ref": "nope.png"}, req) + assert "error" in res and "not found" in res["error"] @pytest.mark.asyncio diff --git a/tinyagentos/skills.py b/tinyagentos/skills.py index d80fd4fb..9a8dc86e 100644 --- a/tinyagentos/skills.py +++ b/tinyagentos/skills.py @@ -348,15 +348,15 @@ async def _seed_defaults(self): "description": "Place a generated image on a project's canvas", "tool_schema": { "name": "canvas_add_image", - "description": "Place a generated image (by file_id from generate_image) on a project's ideas board.", + "description": "Place a generated image on a project's ideas board.", "input_schema": { "type": "object", "properties": { "project_id": {"type": "string", "description": "Id from create_project."}, - "file_id": {"type": "string", "description": "Image file id from generate_image."}, + "image_ref": {"type": "string", "description": "The image_ref returned by generate_image."}, "alt": {"type": "string", "description": "Alt text."}, }, - "required": ["project_id", "file_id"], + "required": ["project_id", "image_ref"], }, }, "frameworks": { diff --git a/tinyagentos/tools/image_tool.py b/tinyagentos/tools/image_tool.py index fe0db58e..666d218b 100644 --- a/tinyagentos/tools/image_tool.py +++ b/tinyagentos/tools/image_tool.py @@ -172,10 +172,9 @@ async def execute_image_generation( fallback omits the model field so the local backend uses whatever checkpoint it has loaded rather than a pinned model name. - Returns dict with 'success', 'image_b64' (base64 PNG), and 'error' - if failed. + Returns dict with 'success', 'image_ref' (the saved filename, usable by + canvas_add_image), 'url' (web path to the PNG), and 'error' if failed. """ - import base64 import httpx import random @@ -203,14 +202,15 @@ async def execute_image_generation( async with httpx.AsyncClient(timeout=120) as client: resp = await client.post(target_url, json=payload) resp.raise_for_status() - # /api/images/generate returns raw PNG bytes - image_bytes = resp.content + # The scheduler route saves the PNG and returns JSON metadata. + data = resp.json() return { "success": True, - "image_b64": base64.b64encode(image_bytes).decode(), - "seed": seed, - "model": model or "", - "size": size, + "image_ref": data.get("filename", ""), + "url": data.get("path", ""), + "seed": data.get("seed", seed), + "model": data.get("model", model or ""), + "size": data.get("size", size), } except httpx.ConnectError: controller_unreachable = True # fall through to direct path below diff --git a/tinyagentos/tools/project_tools.py b/tinyagentos/tools/project_tools.py index e1697e01..59fd88ef 100644 --- a/tinyagentos/tools/project_tools.py +++ b/tinyagentos/tools/project_tools.py @@ -10,6 +10,8 @@ from __future__ import annotations import re +from pathlib import Path +from uuid import uuid4 from fastapi import Request @@ -18,6 +20,14 @@ def _user_id(request: Request) -> str | None: return getattr(request.state, "user_id", None) or None +def _data_dir(request: Request) -> Path: + """Workspace data dir, resolved the same way images.py does.""" + config_path = getattr(request.app.state, "config_path", None) + if config_path is not None: + return Path(config_path).parent + return Path(__file__).parent.parent.parent / "data" + + async def _owned_project(request: Request, project_id: str, user_id: str): """Return (project, None) if the caller owns project_id (or is admin), else (None, error_dict). Prevents writing tasks/images into another user's project.""" @@ -71,9 +81,11 @@ async def execute_add_task(args: dict, request: Request) -> dict: async def execute_canvas_add_image(args: dict, request: Request) -> dict: project_id = (args or {}).get("project_id") - file_id = (args or {}).get("file_id") - if not isinstance(project_id, str) or not project_id or not isinstance(file_id, str) or not file_id: - return {"error": "canvas_add_image requires 'project_id' and 'file_id' strings"} + # `image_ref` is the filename returned by generate_image; accept the legacy + # `file_id` key too for callers that already have a canvas file id. + image_ref = (args or {}).get("image_ref") or (args or {}).get("file_id") + if not isinstance(project_id, str) or not project_id or not isinstance(image_ref, str) or not image_ref: + return {"error": "canvas_add_image requires 'project_id' and 'image_ref' strings"} try: x = float((args or {}).get("x", 80)) y = float((args or {}).get("y", 80)) @@ -82,9 +94,22 @@ async def execute_canvas_add_image(args: dict, request: Request) -> dict: user_id = _user_id(request) if not user_id: return {"error": "no authenticated user"} - _, err = await _owned_project(request, project_id, user_id) + project, err = await _owned_project(request, project_id, user_id) if err: return err + + # Copy the generated image (saved by generate_image under the workspace) into + # the project's canvas files, where the canvas renders it from + # /api/projects/{slug}/files/canvas/{file_id}. `.name` strips any path part. + src = _data_dir(request) / "workspace" / "images" / "generated" / Path(image_ref).name + if not src.is_file(): + return {"error": f"image not found: {image_ref}"} + slug = project.get("slug") or project_id + canvas_dir = Path(request.app.state.projects_root) / slug / "files" / "canvas" + canvas_dir.mkdir(parents=True, exist_ok=True) + file_id = f"{uuid4().hex}{src.suffix or '.png'}" + (canvas_dir / file_id).write_bytes(src.read_bytes()) + store = request.app.state.project_canvas_store el = await store.add_element( project_id=project_id, @@ -99,4 +124,4 @@ async def execute_canvas_add_image(args: dict, request: Request) -> dict: author_kind="agent", author_id=user_id, ) - return {"ok": True, "element_id": el["id"]} + return {"ok": True, "element_id": el["id"], "file_id": file_id} From e94de444bdcb66e70752c80a36db33e98def15cd Mon Sep 17 00:00:00 2001 From: jaylfc Date: Sun, 14 Jun 2026 12:22:42 +0100 Subject: [PATCH 02/11] =?UTF-8?q?feat(agent):=20describe=5Fimage=5Fcapabil?= =?UTF-8?q?ities=20=E2=80=94=20cluster=20tier/tool=20awareness?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Read-only tool so the agent knows what hardware tiers exist (this host's NPU/GPU/CPU + cluster workers like an NVIDIA box) and which image tools/models each has loaded, before calling generate_image. Maps backend type -> tier (rkllama=npu, sd-cpp=cpu/gpu, comfyui=gpu) and reports loaded state from the backend catalog + cluster manager. Defensive when cluster/catalog absent. The agent picks the model by intent (npu draft vs gpu cover) and generate_image routes there; the scheduler + lifecycle manager own load/unload/queue. Seeded as a skill, wired into skill_exec, manual updated; 4 tool tests + 26 related green. --- docs/agent-manual/09-os-control.md | 5 ++ docs/taos-agent-manual.md | 5 ++ tests/test_cluster_tools.py | 81 +++++++++++++++++++++++++ tinyagentos/routes/skill_exec.py | 11 ++++ tinyagentos/skills.py | 18 ++++++ tinyagentos/tools/cluster_tools.py | 95 ++++++++++++++++++++++++++++++ 6 files changed, 215 insertions(+) create mode 100644 tests/test_cluster_tools.py create mode 100644 tinyagentos/tools/cluster_tools.py diff --git a/docs/agent-manual/09-os-control.md b/docs/agent-manual/09-os-control.md index f1780e9e..5ecbafb7 100644 --- a/docs/agent-manual/09-os-control.md +++ b/docs/agent-manual/09-os-control.md @@ -21,6 +21,11 @@ update the open Projects app in real time): - **add_task** — add a to-do task to a project's board. Args: `project_id`, `title`. - **canvas_add_image** — place a generated image on a project's ideas board. Args: `project_id`, `image_ref` (the `image_ref` returned by `generate_image`), optional `alt`. +- **describe_image_capabilities** — see the hardware tiers (this host + any cluster + workers, e.g. an NVIDIA box) and which image tools/models each has loaded. Use it + to pick the right model before `generate_image`: an NPU model for a fast draft, a + GPU model for a quality cover. The system loads/unloads and queues for you — you + just choose the model. A typical flow: open the Projects app, create_project, add a few tasks, call generate_image and keep its `image_ref`, then canvas_add_image(project_id, image_ref) diff --git a/docs/taos-agent-manual.md b/docs/taos-agent-manual.md index 34d0506c..942e9245 100644 --- a/docs/taos-agent-manual.md +++ b/docs/taos-agent-manual.md @@ -163,6 +163,11 @@ update the open Projects app in real time): - **add_task** — add a to-do task to a project's board. Args: `project_id`, `title`. - **canvas_add_image** — place a generated image on a project's ideas board. Args: `project_id`, `image_ref` (the `image_ref` returned by `generate_image`), optional `alt`. +- **describe_image_capabilities** — see the hardware tiers (this host + any cluster + workers, e.g. an NVIDIA box) and which image tools/models each has loaded. Use it + to pick the right model before `generate_image`: an NPU model for a fast draft, a + GPU model for a quality cover. The system loads/unloads and queues for you — you + just choose the model. A typical flow: open the Projects app, create_project, add a few tasks, call generate_image and keep its `image_ref`, then canvas_add_image(project_id, image_ref) diff --git a/tests/test_cluster_tools.py b/tests/test_cluster_tools.py new file mode 100644 index 00000000..1a87005b --- /dev/null +++ b/tests/test_cluster_tools.py @@ -0,0 +1,81 @@ +import types + +import pytest + +from tinyagentos.tools.cluster_tools import execute_describe_image_capabilities + + +class _Backend: + def __init__(self, name, type_, models, lifecycle="running"): + self.name = name + self.type = type_ + self.models = models + self.lifecycle_state = lifecycle + + +class _Catalog: + def __init__(self, backends): + self._b = backends + + def backends_with_capability(self, cap): + return self._b if cap == "image-generation" else [] + + +class _Worker: + def __init__(self, name, hardware, backends, status="online"): + self.name = name + self.hardware = hardware + self.backends = backends + self.status = status + + +class _Cluster: + def __init__(self, workers): + self._w = workers + + def get_workers(self): + return self._w + + +def _req(catalog=None, cluster=None, hardware=None): + state = types.SimpleNamespace( + backend_catalog=catalog, cluster_manager=cluster, hardware_profile=hardware + ) + return types.SimpleNamespace(app=types.SimpleNamespace(state=state)) + + +@pytest.mark.asyncio +async def test_local_image_backends_listed_with_tier_and_loaded(): + catalog = _Catalog([_Backend("sd", "sd-cpp", [{"id": "sdxl"}], "running")]) + res = await execute_describe_image_capabilities({}, _req(catalog=catalog, hardware={"gpu": "RTX 3060", "vram": "12GB"})) + local = res["tiers"][0] + assert local["node"] == "local" + assert local["hardware"]["gpu"] == "RTX 3060" + be = local["image_backends"][0] + assert be["type"] == "sd-cpp" and be["tier"] == "cpu/gpu" and be["loaded"] is True + assert be["models"] == ["sdxl"] + + +@pytest.mark.asyncio +async def test_cluster_workers_included(): + worker = _Worker("nvidia-box", {"gpu": "3060", "vram": "12GB"}, + [{"name": "sd", "type": "sd-cpp", "capabilities": ["image-generation"], "models": ["sdxl"]}]) + res = await execute_describe_image_capabilities({}, _req(cluster=_Cluster([worker]))) + nodes = [t["node"] for t in res["tiers"]] + assert "nvidia-box" in nodes + w = next(t for t in res["tiers"] if t["node"] == "nvidia-box") + assert w["image_backends"][0]["type"] == "sd-cpp" + + +@pytest.mark.asyncio +async def test_offline_worker_skipped(): + worker = _Worker("down", {}, [], status="offline") + res = await execute_describe_image_capabilities({}, _req(cluster=_Cluster([worker]))) + assert all(t["node"] != "down" for t in res["tiers"]) + + +@pytest.mark.asyncio +async def test_empty_state_is_safe(): + res = await execute_describe_image_capabilities({}, _req()) + assert res["tiers"][0]["node"] == "local" + assert res["tiers"][0]["image_backends"] == [] diff --git a/tinyagentos/routes/skill_exec.py b/tinyagentos/routes/skill_exec.py index 4e40c9c8..bb46909e 100644 --- a/tinyagentos/routes/skill_exec.py +++ b/tinyagentos/routes/skill_exec.py @@ -225,6 +225,16 @@ async def _skill_canvas_add_image(args: dict, request: Request) -> dict: return {"error": str(exc)} +async def _skill_describe_image_capabilities(args: dict, request: Request) -> dict: + """Describe the cluster's image-gen tiers + tools (agent OS control).""" + try: + from tinyagentos.tools.cluster_tools import execute_describe_image_capabilities + + return await execute_describe_image_capabilities(args, request) + except Exception as exc: + return {"error": str(exc)} + + SKILL_IMPLEMENTATIONS = { "memory_search": _skill_memory_search, "file_read": _skill_file_read, @@ -239,6 +249,7 @@ async def _skill_canvas_add_image(args: dict, request: Request) -> dict: "create_project": _skill_create_project, "add_task": _skill_add_task, "canvas_add_image": _skill_canvas_add_image, + "describe_image_capabilities": _skill_describe_image_capabilities, } diff --git a/tinyagentos/skills.py b/tinyagentos/skills.py index 9a8dc86e..007ef6dd 100644 --- a/tinyagentos/skills.py +++ b/tinyagentos/skills.py @@ -367,6 +367,24 @@ async def _seed_defaults(self): "install_method": "builtin", "install_target": "tinyagentos.tools.project_tools", }, + { + "id": "describe_image_capabilities", + "name": "Describe Image Capabilities", + "category": "media", + "description": "See the cluster's image-generation tiers and tools (NPU/GPU/CPU)", + "tool_schema": { + "name": "describe_image_capabilities", + "description": "List the hardware tiers (this host + cluster workers) and which image-generation tools/models each has loaded, so you can pick the best one before generate_image.", + "input_schema": {"type": "object", "properties": {}}, + }, + "frameworks": { + "smolagents": "adapter", "openclaw": "adapter", "pocketflow": "adapter", + "langroid": "adapter", "hermes": "adapter", "agent-zero": "adapter", + "openai-agents-sdk": "adapter", "generic": "adapter", + }, + "install_method": "builtin", + "install_target": "tinyagentos.tools.cluster_tools", + }, ] for skill in defaults: diff --git a/tinyagentos/tools/cluster_tools.py b/tinyagentos/tools/cluster_tools.py new file mode 100644 index 00000000..51112053 --- /dev/null +++ b/tinyagentos/tools/cluster_tools.py @@ -0,0 +1,95 @@ +"""Agent tool: describe the cluster's image-generation capabilities. + +Gives the agent read-only awareness of what hardware tiers exist (this host's +NPU/GPU/CPU plus any cluster workers like an NVIDIA box) and which image tools +live on each tier, including what's loaded right now. The agent uses this to +pick the best tool by intent — a fast NPU draft vs the good GPU model for a +cover — and to tell the user what it's doing. + +The agent does NOT manage queues/load/unload; the scheduler + lifecycle manager +do that. This tool is the menu, not the controls. +""" +from __future__ import annotations + +from fastapi import Request + +# Map a backend type to the hardware tier it runs on, for the agent's benefit. +_TIER = { + "rkllama": "npu", + "rk-llama-cpp": "npu", + "ezrknpu": "npu", + "sd-cpp": "cpu/gpu", + "comfyui": "gpu", + "ollama": "gpu/cpu", +} + + +def _hw_summary(hw) -> dict: + """Best-effort dict summary of a hardware profile (object or dict).""" + if hw is None: + return {} + if isinstance(hw, dict): + return {k: hw.get(k) for k in ("cpu", "gpu", "npu", "vram", "ram", "tier", "platform") if k in hw} + return { + k: getattr(hw, k) + for k in ("cpu", "gpu", "npu", "vram", "ram", "tier", "platform") + if getattr(hw, k, None) is not None + } + + +def _image_backends_from_catalog(catalog) -> list[dict]: + out = [] + if not catalog: + return out + try: + for be in catalog.backends_with_capability("image-generation"): + out.append({ + "name": be.name, + "type": be.type, + "tier": _TIER.get(be.type, "unknown"), + "loaded": getattr(be, "lifecycle_state", "running") == "running", + "models": [m.get("id") or m.get("name") for m in (be.models or [])][:10], + }) + except Exception: + pass + return out + + +def _image_backends_from_worker(worker) -> list[dict]: + out = [] + for b in (getattr(worker, "backends", None) or []): + caps = b.get("capabilities") or [] + if "image-generation" in caps or b.get("type") in ("sd-cpp", "rkllama", "comfyui"): + out.append({ + "name": b.get("name"), + "type": b.get("type"), + "tier": _TIER.get(b.get("type"), "unknown"), + "models": [m.get("id") or m.get("name") if isinstance(m, dict) else m for m in (b.get("models") or [])][:10], + }) + return out + + +async def execute_describe_image_capabilities(args: dict, request: Request) -> dict: + state = request.app.state + tiers = [{ + "node": "local", + "hardware": _hw_summary(getattr(state, "hardware_profile", None)), + "image_backends": _image_backends_from_catalog(getattr(state, "backend_catalog", None)), + }] + cluster = getattr(state, "cluster_manager", None) + if cluster is not None: + try: + for w in cluster.get_workers(): + if getattr(w, "status", "online") != "online": + continue + tiers.append({ + "node": w.name, + "hardware": _hw_summary(getattr(w, "hardware", None)), + "image_backends": _image_backends_from_worker(w), + }) + except Exception: + pass + return { + "tiers": tiers, + "hint": "Pick a model on the tier that fits the task (npu = fast draft, gpu = best quality), then call generate_image with that model. The system loads/unloads and queues for you.", + } From 165e0b833f80b487dd44f6565f20560f96eb29ed Mon Sep 17 00:00:00 2001 From: jaylfc Date: Sun, 14 Jun 2026 12:39:45 +0100 Subject: [PATCH 03/11] fix(agent): address image-gen bot review - describe_image_capabilities returned dataclass fields from a real hardware_profile -> JSON-serialise would 500. Coerce hardware values to JSON primitives (_json_safe) + test with an object profile (Gitar). - generate_image now fails instead of false-succeeding when the scheduler response has no filename (CodeRabbit/Kilo). - canvas_add_image: drop the misleading file_id fallback (a real canvas file_id isn't in the workspace); require image_ref (Kilo). - worker image_backends now report a 'loaded' field for parity with local backends (Gitar). 24 tests green. --- tests/test_cluster_tools.py | 22 ++++++++++++++++++++++ tests/test_project_tools.py | 4 ++-- tinyagentos/tools/cluster_tools.py | 26 +++++++++++++++++++------- tinyagentos/tools/image_tool.py | 5 ++++- tinyagentos/tools/project_tools.py | 5 ++--- 5 files changed, 49 insertions(+), 13 deletions(-) diff --git a/tests/test_cluster_tools.py b/tests/test_cluster_tools.py index 1a87005b..08fb2e2e 100644 --- a/tests/test_cluster_tools.py +++ b/tests/test_cluster_tools.py @@ -79,3 +79,25 @@ async def test_empty_state_is_safe(): res = await execute_describe_image_capabilities({}, _req()) assert res["tiers"][0]["node"] == "local" assert res["tiers"][0]["image_backends"] == [] + + +@pytest.mark.asyncio +async def test_object_hardware_profile_is_json_safe(): + """A real hardware_profile is an object with nested objects; the summary must + stay JSON-serialisable (else the tool 500s when returned as JSON).""" + import json + + class _Gpu: + def __repr__(self): + return "RTX 3060 12GB" + + class _HW: + gpu = _Gpu() + npu = None + cpu = "x86" + vram = 12 + + res = await execute_describe_image_capabilities({}, _req(hardware=_HW())) + hw = res["tiers"][0]["hardware"] + assert hw["gpu"] == "RTX 3060 12GB" and hw["cpu"] == "x86" and hw["vram"] == 12 + json.dumps(res) # must not raise diff --git a/tests/test_project_tools.py b/tests/test_project_tools.py index 1fba1ac7..bb6af926 100644 --- a/tests/test_project_tools.py +++ b/tests/test_project_tools.py @@ -138,7 +138,7 @@ async def test_add_task_denied_on_other_users_project(): @pytest.mark.asyncio async def test_canvas_add_image_denied_on_other_users_project(): req = _req(user_id="attacker", owner="victim") - res = await execute_canvas_add_image({"project_id": "proj_1", "file_id": "f"}, req) + res = await execute_canvas_add_image({"project_id": "proj_1", "image_ref": "f"}, req) assert res.get("error") == "not your project" assert req.app.state.project_canvas_store.calls == [] @@ -160,4 +160,4 @@ async def test_add_task_missing_project(): async def test_tools_refuse_without_user(): assert "error" in await execute_create_project({"name": "x"}, _req(user_id=None)) assert "error" in await execute_add_task({"project_id": "p", "title": "t"}, _req(user_id=None)) - assert "error" in await execute_canvas_add_image({"project_id": "p", "file_id": "f"}, _req(user_id=None)) + assert "error" in await execute_canvas_add_image({"project_id": "p", "image_ref": "f"}, _req(user_id=None)) diff --git a/tinyagentos/tools/cluster_tools.py b/tinyagentos/tools/cluster_tools.py index 51112053..c673b71e 100644 --- a/tinyagentos/tools/cluster_tools.py +++ b/tinyagentos/tools/cluster_tools.py @@ -24,17 +24,26 @@ } +def _json_safe(v): + """Coerce a value to something JSON-serialisable (the tool result is + returned as JSON, so nested dataclasses/objects would 500).""" + if v is None or isinstance(v, (str, int, float, bool)): + return v + if isinstance(v, dict): + return {str(k): _json_safe(x) for k, x in v.items()} + if isinstance(v, (list, tuple)): + return [_json_safe(x) for x in v] + return str(v) + + def _hw_summary(hw) -> dict: - """Best-effort dict summary of a hardware profile (object or dict).""" + """Best-effort, JSON-safe summary of a hardware profile (object or dict).""" if hw is None: return {} + keys = ("cpu", "gpu", "npu", "vram", "ram", "tier", "platform") if isinstance(hw, dict): - return {k: hw.get(k) for k in ("cpu", "gpu", "npu", "vram", "ram", "tier", "platform") if k in hw} - return { - k: getattr(hw, k) - for k in ("cpu", "gpu", "npu", "vram", "ram", "tier", "platform") - if getattr(hw, k, None) is not None - } + return {k: _json_safe(hw.get(k)) for k in keys if hw.get(k) is not None} + return {k: _json_safe(getattr(hw, k)) for k in keys if getattr(hw, k, None) is not None} def _image_backends_from_catalog(catalog) -> list[dict]: @@ -60,10 +69,13 @@ def _image_backends_from_worker(worker) -> list[dict]: for b in (getattr(worker, "backends", None) or []): caps = b.get("capabilities") or [] if "image-generation" in caps or b.get("type") in ("sd-cpp", "rkllama", "comfyui"): + ls = b.get("lifecycle_state") out.append({ "name": b.get("name"), "type": b.get("type"), "tier": _TIER.get(b.get("type"), "unknown"), + # mirror the 'loaded' field local backends report; None = unknown + "loaded": b.get("loaded") if "loaded" in b else (ls == "running" if ls else None), "models": [m.get("id") or m.get("name") if isinstance(m, dict) else m for m in (b.get("models") or [])][:10], }) return out diff --git a/tinyagentos/tools/image_tool.py b/tinyagentos/tools/image_tool.py index 666d218b..46381f5c 100644 --- a/tinyagentos/tools/image_tool.py +++ b/tinyagentos/tools/image_tool.py @@ -204,9 +204,12 @@ async def execute_image_generation( resp.raise_for_status() # The scheduler route saves the PNG and returns JSON metadata. data = resp.json() + filename = data.get("filename") + if not filename: + return {"success": False, "error": f"image backend returned no filename: {str(data)[:200]}"} return { "success": True, - "image_ref": data.get("filename", ""), + "image_ref": filename, "url": data.get("path", ""), "seed": data.get("seed", seed), "model": data.get("model", model or ""), diff --git a/tinyagentos/tools/project_tools.py b/tinyagentos/tools/project_tools.py index 59fd88ef..33a1070b 100644 --- a/tinyagentos/tools/project_tools.py +++ b/tinyagentos/tools/project_tools.py @@ -81,9 +81,8 @@ async def execute_add_task(args: dict, request: Request) -> dict: async def execute_canvas_add_image(args: dict, request: Request) -> dict: project_id = (args or {}).get("project_id") - # `image_ref` is the filename returned by generate_image; accept the legacy - # `file_id` key too for callers that already have a canvas file id. - image_ref = (args or {}).get("image_ref") or (args or {}).get("file_id") + # `image_ref` is the filename returned by generate_image (a workspace file). + image_ref = (args or {}).get("image_ref") if not isinstance(project_id, str) or not project_id or not isinstance(image_ref, str) or not image_ref: return {"error": "canvas_add_image requires 'project_id' and 'image_ref' strings"} try: From 10f4732c4b7be9291d9f6af00f10033d351d25c8 Mon Sep 17 00:00:00 2001 From: jaylfc Date: Sun, 14 Jun 2026 12:57:52 +0100 Subject: [PATCH 04/11] fix(agent-image): address bot review findings - skills.py: refresh code-owned (builtin) skill rows after INSERT OR IGNORE so installs seeded by an earlier release (e.g. the Pi, with the pre-image_ref canvas_add_image schema) converge on the current tool_schema. Scoped to install_method='builtin' so user-installed skills are never overwritten. - image_tool.py: type-validate the scheduler 'filename' (not just truthiness) before claiming success; document that the controller-unreachable fallback returns image_b64 (no controller workspace to save into), so the image_ref contract only applies to the in-process scheduler path. - project_tools.py: reject a project slug that isn't its own slugify and verify the resolved canvas dir stays within projects_root (defense-in-depth against a legacy/odd slug escaping the tree). - cluster_tools.py: include HardwareProfile's real 'ram_mb' field in the hardware summary so total RAM (a tier signal) isn't dropped. - tests: scheduler mocks now return real filename/path metadata. Refs #884 --- tests/test_image_tool.py | 9 +++++++++ tinyagentos/skills.py | 18 ++++++++++++++++++ tinyagentos/tools/cluster_tools.py | 5 ++++- tinyagentos/tools/image_tool.py | 9 ++++++++- tinyagentos/tools/project_tools.py | 11 ++++++++++- 5 files changed, 49 insertions(+), 3 deletions(-) diff --git a/tests/test_image_tool.py b/tests/test_image_tool.py index 1c272065..8ea6c25e 100644 --- a/tests/test_image_tool.py +++ b/tests/test_image_tool.py @@ -161,6 +161,9 @@ async def test_image_generation_forwards_new_params(): mock_resp.status_code = 200 mock_resp.content = fake_png mock_resp.raise_for_status = MagicMock() + # Scheduler route returns JSON metadata (filename + path), which the tool + # now requires to honour the image_ref contract. + mock_resp.json = MagicMock(return_value={"filename": "gen.png", "path": "/api/images/files/gen.png"}) captured_payload: dict = {} captured_url: dict = {} @@ -205,6 +208,9 @@ async def test_image_generation_default_routes_via_scheduler(): mock_resp.status_code = 200 mock_resp.content = fake_png mock_resp.raise_for_status = MagicMock() + # Scheduler route returns JSON metadata (filename + path), which the tool + # now requires to honour the image_ref contract. + mock_resp.json = MagicMock(return_value={"filename": "gen.png", "path": "/api/images/files/gen.png"}) captured: dict = {} @@ -245,6 +251,9 @@ async def test_image_generation_blank_model_treated_as_omitted(): mock_resp.status_code = 200 mock_resp.content = fake_png mock_resp.raise_for_status = MagicMock() + # Scheduler route returns JSON metadata (filename + path), which the tool + # now requires to honour the image_ref contract. + mock_resp.json = MagicMock(return_value={"filename": "gen.png", "path": "/api/images/files/gen.png"}) captured: dict = {} diff --git a/tinyagentos/skills.py b/tinyagentos/skills.py index 007ef6dd..9e479052 100644 --- a/tinyagentos/skills.py +++ b/tinyagentos/skills.py @@ -400,6 +400,24 @@ async def _seed_defaults(self): time.time(), ), ) + # INSERT OR IGNORE leaves an existing row untouched, so an install + # seeded by an earlier release keeps its stale tool_schema (e.g. the + # pre-image_ref canvas_add_image contract). Refresh the code-owned + # fields for builtin skills so existing installs converge on the + # current definition. Scoped to install_method='builtin' so a user's + # installed/customised skills are never overwritten. + await self._db.execute( + """UPDATE skills + SET name = ?, category = ?, description = ?, tool_schema = ?, + frameworks = ?, requires_services = ?, install_target = ? + WHERE id = ? AND install_method = 'builtin'""", + ( + skill["name"], skill["category"], skill["description"], + json.dumps(skill["tool_schema"]), json.dumps(skill["frameworks"]), + json.dumps(skill.get("requires_services", [])), + skill["install_target"], skill["id"], + ), + ) await self._db.commit() async def list_skills(self, category: str | None = None) -> list[dict]: diff --git a/tinyagentos/tools/cluster_tools.py b/tinyagentos/tools/cluster_tools.py index c673b71e..442a0d89 100644 --- a/tinyagentos/tools/cluster_tools.py +++ b/tinyagentos/tools/cluster_tools.py @@ -40,7 +40,10 @@ def _hw_summary(hw) -> dict: """Best-effort, JSON-safe summary of a hardware profile (object or dict).""" if hw is None: return {} - keys = ("cpu", "gpu", "npu", "vram", "ram", "tier", "platform") + # HardwareProfile stores total RAM as `ram_mb`; include both that and the + # generic `ram`/`vram` keys so dict- and dataclass-shaped profiles both + # surface memory, the main tier-selection signal. + keys = ("cpu", "gpu", "npu", "vram", "ram", "ram_mb", "tier", "platform") if isinstance(hw, dict): return {k: _json_safe(hw.get(k)) for k in keys if hw.get(k) is not None} return {k: _json_safe(getattr(hw, k)) for k in keys if getattr(hw, k, None) is not None} diff --git a/tinyagentos/tools/image_tool.py b/tinyagentos/tools/image_tool.py index 46381f5c..1ac91392 100644 --- a/tinyagentos/tools/image_tool.py +++ b/tinyagentos/tools/image_tool.py @@ -174,6 +174,13 @@ async def execute_image_generation( Returns dict with 'success', 'image_ref' (the saved filename, usable by canvas_add_image), 'url' (web path to the PNG), and 'error' if failed. + + Note: the connect-failure fallback (used only when the controller itself + is unreachable, e.g. an LXC agent that can't see localhost:6969) returns + 'image_b64' instead of 'image_ref' -- it has no controller workspace to + save into. In-process tool calls always take the scheduler path above and + get an 'image_ref', so canvas_add_image works; a caller relying on the + fallback must handle the bytes itself. """ import httpx import random @@ -205,7 +212,7 @@ async def execute_image_generation( # The scheduler route saves the PNG and returns JSON metadata. data = resp.json() filename = data.get("filename") - if not filename: + if not isinstance(filename, str) or not filename: return {"success": False, "error": f"image backend returned no filename: {str(data)[:200]}"} return { "success": True, diff --git a/tinyagentos/tools/project_tools.py b/tinyagentos/tools/project_tools.py index 33a1070b..be477605 100644 --- a/tinyagentos/tools/project_tools.py +++ b/tinyagentos/tools/project_tools.py @@ -103,8 +103,17 @@ async def execute_canvas_add_image(args: dict, request: Request) -> dict: src = _data_dir(request) / "workspace" / "images" / "generated" / Path(image_ref).name if not src.is_file(): return {"error": f"image not found: {image_ref}"} + # The slug is the on-disk directory AND the key the canvas render route + # (/api/projects/{slug}/files/canvas/{file_id}) reads back, so it must stay + # the project's real slug. New projects slugify safely, but reject a legacy + # row or fallback id that carries separators rather than escape projects_root. slug = project.get("slug") or project_id - canvas_dir = Path(request.app.state.projects_root) / slug / "files" / "canvas" + if slug != _slugify(slug): + return {"error": f"unsafe project slug: {slug!r}"} + projects_root = Path(request.app.state.projects_root).resolve() + canvas_dir = (projects_root / slug / "files" / "canvas").resolve() + if not canvas_dir.is_relative_to(projects_root): + return {"error": "resolved canvas path escapes projects_root"} canvas_dir.mkdir(parents=True, exist_ok=True) file_id = f"{uuid4().hex}{src.suffix or '.png'}" (canvas_dir / file_id).write_bytes(src.read_bytes()) From fe2b287d296fc20fdc5e5be2282e07eaf3526d50 Mon Sep 17 00:00:00 2001 From: jaylfc Date: Sun, 14 Jun 2026 12:51:51 +0100 Subject: [PATCH 05/11] fix(store): make the rkllama service install entry actually work (#844) The rkllama service manifest declared install.method: script pointing at scripts/install-rkllama.sh, but that file never existed -- only scripts/install-rknpu.sh did. ScriptInstaller resolves install.script relative to the repo root, so clicking Install in the store failed with 'script not found'. rkllama is the NPU LLM backend our RK3588 target audience needs from the first boot, so this broke their primary path. - Add scripts/install-rkllama.sh: an idempotent, headless wrapper the store's ScriptInstaller can run non-interactively. It short-circuits to exit 0 when a live rkllama already answers (7833 or legacy 8080), and otherwise delegates to the verified install-rknpu.sh with TAOS_RKNPU_SETUP=1 set explicitly. That env var is required: without it install-rknpu.sh takes its 'non-interactive shell, nothing to confirm -> exit 0' path and would report success while installing nothing. Keeping the heavy NPU runtime install behind a store-triggered script (cloned at install time, not bundled) keeps the arms-length, source-available posture intact. - Harden RkllamaInstaller's /api/tags verification. A 200 from /api/pull is necessary but not sufficient; only /api/tags confirms the weight is loadable. Previously an unreachable /api/tags was swallowed and the install returned a false success. Now it retries a few times and, if the check never succeeds, returns success: False with an actionable error -- a model the agent can't load is worse than a clear failure. - Tests: install() verification (confirmed / absent / unreachable) plus a regression guard asserting the rkllama manifest's install.script exists. --- scripts/install-rkllama.sh | 45 ++++++++++ tests/test_rkllama_installer.py | 93 +++++++++++++++++++++ tinyagentos/installers/rkllama_installer.py | 42 +++++++--- 3 files changed, 169 insertions(+), 11 deletions(-) create mode 100755 scripts/install-rkllama.sh diff --git a/scripts/install-rkllama.sh b/scripts/install-rkllama.sh new file mode 100755 index 00000000..9a542a97 --- /dev/null +++ b/scripts/install-rkllama.sh @@ -0,0 +1,45 @@ +#!/usr/bin/env bash +# Store entrypoint for the rkllama (RK3588 NPU LLM) service. +# +# This is the script the App Store's `rkllama` service manifest points at +# (install.method: script). The store's ScriptInstaller invokes it +# non-interactively as `bash install-rkllama.sh `, so this +# wrapper must be headless, idempotent, and must never report success +# without actually installing. +# +# It is a thin wrapper over the verified NPU installer (install-rknpu.sh): +# 1. If rkllama already answers locally, exit 0 (idempotent no-op). +# 2. Otherwise delegate to install-rknpu.sh in headless mode. We set +# TAOS_RKNPU_SETUP=1 explicitly so install-rknpu.sh does NOT take its +# "non-interactive shell, nothing to confirm -> exit 0" path, which +# would otherwise return success while installing nothing. +# +# install-rknpu.sh handles board detection (it dies on non-RK3588 hosts) +# and uses sudo only for the privileged librknnrt + systemd steps; in a +# store context without a TTY those sudo calls fail loudly (non-zero), +# which ScriptInstaller correctly surfaces as an install failure. +set -euo pipefail + +PROJECT_DIR="${1:-$(pwd)}" +PORT="${TAOS_RKLLAMA_PORT:-7833}" +LEGACY_PORT=8080 + +# 1. Idempotent short-circuit: a live rkllama already satisfies the install. +for p in "$PORT" "$LEGACY_PORT"; do + if curl -fsS --max-time 2 "http://localhost:${p}/api/tags" >/dev/null 2>&1; then + echo "rkllama already running on port ${p} — nothing to install" + exit 0 + fi +done + +NPU_SCRIPT="${PROJECT_DIR}/scripts/install-rknpu.sh" +if [[ ! -f "$NPU_SCRIPT" ]]; then + echo "install-rkllama.sh: expected NPU installer at ${NPU_SCRIPT}" >&2 + exit 1 +fi + +# 2. Delegate to the verified installer in headless mode. TAOS_RKNPU_SETUP=1 +# skips the interactive confirmation AND the false-success exit-0 path. +echo "rkllama not detected — running NPU installer (${NPU_SCRIPT})" +exec env TAOS_RKNPU_SETUP=1 TAOS_RKLLAMA_PORT="${PORT}" \ + bash "$NPU_SCRIPT" --yes diff --git a/tests/test_rkllama_installer.py b/tests/test_rkllama_installer.py index 2d743802..00154c0a 100644 --- a/tests/test_rkllama_installer.py +++ b/tests/test_rkllama_installer.py @@ -6,9 +6,12 @@ """ from __future__ import annotations +import httpx import pytest +import respx from tinyagentos.installers.rkllama_installer import ( + RkllamaInstaller, parse_hf_resolve_url, resolve_rkllama_url, rkllama_is_running, @@ -117,3 +120,93 @@ def test_remote_name_becomes_url_7833(self): def test_ip_address_7833(self): assert resolve_rkllama_url("192.168.1.10") == "http://192.168.1.10:7833" + + +_VARIANT = { + "id": "qwen2.5-3b", + "download_url": ( + "https://huggingface.co/c01zaut/Qwen2.5-3B-Instruct-rk3588-1.1.1/" + "resolve/main/Qwen2.5-3B-Instruct-rk3588-w8a8.rkllm" + ), +} + + +class TestInstallVerification: + """install() must only report success once /api/tags confirms the model. + + A 200 from /api/pull alone is necessary but not sufficient -- a model the + agent can't load is worse than a clear error, so an unconfirmable pull + fails rather than returning a false success. + """ + + def _installer(self): + # Pass an explicit URL so __init__ doesn't probe the network. + return RkllamaInstaller(rkllama_url="http://localhost:7833") + + @respx.mock + @pytest.mark.asyncio + async def test_success_when_tags_lists_model(self): + respx.post("http://localhost:7833/api/pull").mock( + return_value=httpx.Response(200, text='{"status":"success"}\n') + ) + respx.get("http://localhost:7833/api/tags").mock( + return_value=httpx.Response(200, json={"models": [{"name": "rkllama-x"}]}) + ) + res = await self._installer().install("rkllama-x", {}, variant=_VARIANT) + assert res["success"] is True + assert res["model_name"] == "rkllama-x" + + @respx.mock + @pytest.mark.asyncio + async def test_failure_when_model_absent_from_tags(self): + respx.post("http://localhost:7833/api/pull").mock( + return_value=httpx.Response(200, text='{"status":"success"}\n') + ) + respx.get("http://localhost:7833/api/tags").mock( + return_value=httpx.Response(200, json={"models": [{"name": "other"}]}) + ) + res = await self._installer().install("rkllama-x", {}, variant=_VARIANT) + assert res["success"] is False + assert "not in" in res["error"] + + @respx.mock + @pytest.mark.asyncio + async def test_failure_when_tags_unreachable(self, monkeypatch): + # Previously this path returned a false success. Now an unreachable + # /api/tags (after retries) is a clean failure. + monkeypatch.setattr(rkllama_installer.asyncio, "sleep", _no_sleep) + respx.post("http://localhost:7833/api/pull").mock( + return_value=httpx.Response(200, text='{"status":"success"}\n') + ) + respx.get("http://localhost:7833/api/tags").mock( + side_effect=httpx.ConnectError("refused") + ) + res = await self._installer().install("rkllama-x", {}, variant=_VARIANT) + assert res["success"] is False + assert "could not be reached" in res["error"] + + +async def _no_sleep(*_a, **_k): + return None + + +class TestRkllamaServiceManifest: + """The rkllama service manifest (install.method: script) must point at a + script that actually exists. ScriptInstaller resolves install.script + relative to the repo root (its cwd), so a missing file means the store + install fails with 'script not found'. This is the #844 regression guard. + """ + + def test_install_script_exists(self): + import pathlib + import yaml + + repo = pathlib.Path(__file__).resolve().parent.parent + manifest = yaml.safe_load( + (repo / "app-catalog" / "services" / "rkllama" / "manifest.yaml").read_text() + ) + install = manifest.get("install") or {} + assert install.get("method") == "script" + script = install.get("script") + assert script, "rkllama manifest declares no install.script" + assert (repo / script).is_file(), f"rkllama install script missing: {script}" diff --git a/tinyagentos/installers/rkllama_installer.py b/tinyagentos/installers/rkllama_installer.py index 73aea082..739df0e0 100644 --- a/tinyagentos/installers/rkllama_installer.py +++ b/tinyagentos/installers/rkllama_installer.py @@ -14,6 +14,7 @@ """ from __future__ import annotations +import asyncio import logging import re import socket @@ -184,12 +185,18 @@ async def install( } # Verify the model now appears in /api/tags so we know rkllama - # successfully registered it. - try: - async with httpx.AsyncClient(timeout=10) as client: - tags = await client.get(f"{self.rkllama_url}/api/tags") - tags.raise_for_status() - names = {m.get("name") for m in tags.json().get("models", [])} + # successfully registered it. The pull returning 200 is necessary but + # not sufficient: only /api/tags confirms the weight is loadable. We + # retry a few times to tolerate a transient blip, but if the check + # never succeeds we report failure rather than a false success -- a + # model the agent can't actually load is worse than a clear error. + last_exc: httpx.HTTPError | None = None + for attempt in range(3): + try: + async with httpx.AsyncClient(timeout=10) as client: + tags = await client.get(f"{self.rkllama_url}/api/tags") + tags.raise_for_status() + names = {m.get("name") for m in tags.json().get("models", [])} if app_id not in names: return { "success": False, @@ -198,11 +205,24 @@ async def install( f"/api/tags. Known models: {sorted(names)[:5]}" ), } - except httpx.HTTPError as exc: - logger.warning( - "rkllama install: /api/tags verification failed: %s", exc - ) - # Non-fatal -- pull succeeded; verification problem is likely transient. + break # verified + except httpx.HTTPError as exc: + last_exc = exc + logger.warning( + "rkllama install: /api/tags verification attempt %d/3 failed: %s", + attempt + 1, exc, + ) + if attempt < 2: + await asyncio.sleep(1.0 * (attempt + 1)) + else: + # Exhausted retries without ever confirming the model. + return { + "success": False, + "error": ( + f"rkllama pull returned 200 but /api/tags could not be reached " + f"to confirm {app_id!r} installed: {last_exc}. Retry the install." + ), + } return {"success": True, "app_id": app_id, "model_name": app_id} From 118409a5540af28cd2c6fbf98cc60dd61b94a6c5 Mon Sep 17 00:00:00 2001 From: jaylfc Date: Sun, 14 Jun 2026 13:04:20 +0100 Subject: [PATCH 06/11] docs(status): freshness sweep -- correct branch tips and open-PR list - branch tips: master=51837bed (#887 released #885), dev=d5c089e9 - open PRs: #884 (agent image-gen, review fixes baking), #886 (rkllama store fix #844, off origin/dev), #876 (deps); #885 merged dev->master - record the #844/#884 fix details and the catalog-manifest debt note --- docs/STATUS.md | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/docs/STATUS.md b/docs/STATUS.md index 699e4c4a..5ef63805 100644 --- a/docs/STATUS.md +++ b/docs/STATUS.md @@ -1,9 +1,10 @@ SINGLE SOURCE OF TRUTH for cross-agent handoff. -Last updated: 2026-06-14 ~12:10 BST, @taOS (active). +Last updated: 2026-06-14 ~13:05 BST, @taOS (active). ▶ RELEASED TO MASTER 2026-06-14 (#883, master=c9c5b0c9, Jay asked "merge dev to main so all users get updates"): the whole overnight body of work is now on master — agent OS control framework (#877-882), macOS-dark theme + purple purge (#879), App Store/real-desktop/Agents/chat redesigns, mobile chat #880 + chat-pwa theme #881. Merge-commit (history preserved), dev NOT deleted. master strict-mode + behind required an admin merge. ▶ IN FLIGHT: PR #884 agent-controlled image generation (the storybook demo's image step): generate_image now returns image_ref (fixed a broken b64-of-JSON bug) + canvas_add_image copies the workspace PNG into the project canvas files so art renders on the board; + describe_image_capabilities (read-only cluster tier/tool awareness, agent picks model by intent, system owns load/unload/queue). 26 tests. Baking -> merge to dev -> deploy Pi -> drive the FULL storybook flow to verify. DEMO BLOCKER TO CHECK: an image backend (sd-cpp on 3060 / rkllama on Pi NPU) must be installed+reachable or generate_image fails. Cross-worker image routing (Pi->3060) is a SEPARATE greenlight (TaskRouter exists, not auto-invoked). + -- 2026-06-14 ~12:50: pushed bot-review fixes to #884 (commit 10f4732c): skills.py now REFRESHES builtin skill rows after INSERT OR IGNORE (so the Pi, seeded by #882 with the old file_id canvas schema, converges on image_ref); strict filename type-check in image_tool + honest fallback docstring; project_tools canvas-path slug guard (reject non-slugify slug + assert inside projects_root); cluster_tools includes ram_mb. 67 touched-suite tests green. Re-baking for bot re-review. CAUTION LEARNED: local `dev` had 3 unpushed #884 commits -> a branch cut from it (the rkllama branch) accidentally bundled them; fixed by rebasing #886 --onto origin/dev and resetting local dev to origin/dev. Always branch from origin/dev. ▶ X POST (Jay, premium): drafted, honest framing (NOT first-ever; Goose/Open-Interpreter/Self-Operating-Computer exist). Angle = agent-native OS so a 4B local model drives the whole thing offline; post WITH the demo video. Add the win to README+website too (draft for approval). Private reasoning only. ▶▶ MORNING MUST-DO (Jay overnight ask, asleep): features tested+working by morning; agent OS control DONE simple; **offline agent RESULTS by morning**. @@ -36,15 +37,17 @@ Last updated: 2026-06-14 ~12:10 BST, @taOS (active). 4. PROMO HERO PROGRAM (memory [[promo-hero-initiative]]): only the agent CHAT + a demo PROJECT stay mock; build everything else REAL. Hero = multi-window (chat + project canvas + store), 5:2 X-cut on all promo. Needs store (#871), project canvas/mind-map (#16, net-new), demo seed (#17), agent window-mgmt API (#18). Mock data PRIVATE on local `marketing` branch (never push/merge; MARKETING.md). 5. Also queued: store popularity LIVE stars backend (#13), per-app install telemetry -> the now-secured stats page (#15), widget redesign (#19, NOT in the shot), mobile audit, wallpaper picker #864, island v2 #854, GitHub #858 ph2, live-wallpaper package brainstorm. -Branch tips: master=6394a3ed. dev=67dceb64 (#877-#882 agent OS control + mobile + theme MERGED). Merged overall this session: #867 #868 #869 #870 (theme/wallpaper), #871 (store redesign), #873 (real desktop: dock right-click + inline New Folder + FS-backed icons + rename API), #874 (window.taosDesktop control API + docs/desktop-control.md); taos-website #5 (stats Basic Auth -> main, set STATS_USER/STATS_PASS in Coolify). Local-only `marketing` branch (private, no upstream; NEVER push/merge). +Branch tips: master=51837bed (#887 released #885 to master), dev=d5c089e9 (#885 mobile branch-dropdown fix merged). Merged overall this session: #867 #868 #869 #870 (theme/wallpaper), #871 (store redesign), #873 (real desktop: dock right-click + inline New Folder + FS-backed icons + rename API), #874 (window.taosDesktop control API + docs/desktop-control.md); taos-website #5 (stats Basic Auth -> main, set STATS_USER/STATS_PASS in Coolify). Local-only `marketing` branch (private, no upstream; NEVER push/merge). -Session state: ACTIVE (autonomous overnight). ALL baking PRs MERGED to dev (tip=4ecc7961): #872 (tsParticles wallpaper + sliders), #873 (real desktop), #874 (agent OS controls). Open-PR queue drained (only draft #476 remains; #846 already CLOSED). #872 SWAPS the animated wallpaper renderer from the hand-rolled canvas NeuralWallpaper (component "neural") to tsParticles ParticlesWallpaper (component "particles"); theme-store registers id "neural-live" w/ component "particles" -- VERIFY the tsParticles look LIVE on Pi (headless can't rasterize it). #25 (tiled double-header) CLOSED: not a bug, was the 32px top-bar chrome. SECURITY: dependabot alert #5 (esbuild RCE < 0.28.1) is STALE -- desktop already pins esbuild 0.28.1 via overrides (lockfile + installed both 0.28.1); leave for dependabot to auto-close, no code change. #19 widget redesign HELD for Jay (taste + depends on the desktop/widget/dash mode-switcher brainstorm [[project_desktop_modes]]). FEDORA MODEL TESTS (Jay 2026-06-14 ~02:00): eval harness + runbook built PRIVATE (~/tinyagentos-private/specs/storybook-demo/storybook_toolcall_eval.py) -- scores local models on the storybook tool-call flow incl ID-threading; A2A sent to @taOSmd (msg 431) to coordinate Fedora box (it's mid E-009 sweep, do NOT interrupt); awaiting its ping + local-model list. tsParticles look + Safari dark<->light + live-wallpaper animation + desktop icons/thumbnails are all best checked LIVE on the Pi (preview has no backend; tsParticles canvas does not rasterize headless). +Session state: ACTIVE (autonomous overnight). OPEN PRs in flight: #884 (agent image-gen, review fixes pushed, baking), #886 (rkllama store fix #844, off origin/dev, baking), #876 (dependabot SPA deps), draft #476. #885 merged dev->master via #887. #872 SWAPS the animated wallpaper renderer from the hand-rolled canvas NeuralWallpaper (component "neural") to tsParticles ParticlesWallpaper (component "particles"); theme-store registers id "neural-live" w/ component "particles" -- VERIFY the tsParticles look LIVE on Pi (headless can't rasterize it). #25 (tiled double-header) CLOSED: not a bug, was the 32px top-bar chrome. SECURITY: dependabot alert #5 (esbuild RCE < 0.28.1) is STALE -- desktop already pins esbuild 0.28.1 via overrides (lockfile + installed both 0.28.1); leave for dependabot to auto-close, no code change. #19 widget redesign HELD for Jay (taste + depends on the desktop/widget/dash mode-switcher brainstorm [[project_desktop_modes]]). FEDORA MODEL TESTS (Jay 2026-06-14 ~02:00): eval harness + runbook built PRIVATE (~/tinyagentos-private/specs/storybook-demo/storybook_toolcall_eval.py) -- scores local models on the storybook tool-call flow incl ID-threading; A2A sent to @taOSmd (msg 431) to coordinate Fedora box (it's mid E-009 sweep, do NOT interrupt); awaiting its ping + local-model list. tsParticles look + Safari dark<->light + live-wallpaper animation + desktop icons/thumbnails are all best checked LIVE on the Pi (preview has no backend; tsParticles canvas does not rasterize headless). WEBSITE: taos.my live. All 4 taos-website PRs merged (stats/changelog/nav/accessibility). CI: test suite parallelized via #839 (xdist -n auto). CodeRabbit may be out of credits -- do not merge on a fake rate-limit pass. Use @coderabbitai full review to retrigger; manual review OK for tiny already-reviewed PRs. OPEN PRs: +- #886 fix(store): rkllama service install entry (#844) -- 3 files off origin/dev, baking; merge dev->master when green +- #884 feat(agent): agent-controlled image generation -- bot-review fixes pushed (10f4732c), baking; merge to dev then deploy Pi - #876 chore(deps): dependabot SPA deps group bump (32 updates) -- review and merge when CI green @@ -52,7 +55,7 @@ OPEN PRs: (#872/#871 MERGED to dev; #846 SUPERSEDED by #849 on dev; taos-website #5 merged to main.) Notable open issues (bugs first): -- #844 rkllama store-UI install chain broken (wrong script + non-interactive false-success) -- unresolved +- #844 rkllama store-UI install chain broken (wrong script + non-interactive false-success) -- FIX IN PR #886 (off origin/dev): adds scripts/install-rkllama.sh (idempotent headless wrapper -> delegates to install-rknpu.sh with TAOS_RKNPU_SETUP=1 so it can't take the false-success exit-0; short-circuits when rkllama already answers 7833/8080) + hardens RkllamaInstaller /api/tags verify (retry then fail, no more swallowed false success) + regression guard test. NOTE found while auditing: ~19 OTHER catalog manifests (stable-diffusion-cpp, wan2gp, dify, agents...) reference install scripts that also don't exist at repo root -- separate follow-up, NOT in #886. - #841 update check shows no updates when local branch diverged from origin -- unresolved - #825 taOS agent model swap breaks routing (stale per-agent key preferred over master key) - #840 chat: per-agent framework slash commands (Telegram-style) in DMs and via @agent / From d6960af0890f64dbbb667f3808e7f0d17bd927a3 Mon Sep 17 00:00:00 2001 From: jaylfc Date: Sun, 14 Jun 2026 13:16:06 +0100 Subject: [PATCH 07/11] fix(store): harden rkllama verify + readiness check (bot review) - RkllamaInstaller verify loop: catch ValueError so a 200 with a non-JSON /api/tags body fails the check instead of raising JSONDecodeError out of install(); validate the response shape (dict with a list of dict models); retry on an absent model too, not just on HTTPError, so registration lag after the pull's 200 no longer reports a false failure. - install-rkllama.sh: the idempotent short-circuit now requires an rkllama/Ollama-shaped /api/tags body (a "models" key) rather than any HTTP 200, so an unrelated service on 7833/8080 can't be mistaken for an installed rkllama. Mirrors _port_responds_with_rkllama(). - tests: non-JSON /api/tags and late-appearing model. Refs #844 #886 --- scripts/install-rkllama.sh | 7 ++- tests/test_rkllama_installer.py | 41 ++++++++++++-- tinyagentos/installers/rkllama_installer.py | 59 ++++++++++++--------- 3 files changed, 77 insertions(+), 30 deletions(-) diff --git a/scripts/install-rkllama.sh b/scripts/install-rkllama.sh index 9a542a97..0ab8b104 100755 --- a/scripts/install-rkllama.sh +++ b/scripts/install-rkllama.sh @@ -25,8 +25,13 @@ PORT="${TAOS_RKLLAMA_PORT:-7833}" LEGACY_PORT=8080 # 1. Idempotent short-circuit: a live rkllama already satisfies the install. +# Require an rkllama/Ollama-shaped /api/tags body (a "models" key), not just +# any HTTP 200 -- another local service on these ports must not be mistaken +# for an installed rkllama. Mirrors _port_responds_with_rkllama() in the +# Python installer. for p in "$PORT" "$LEGACY_PORT"; do - if curl -fsS --max-time 2 "http://localhost:${p}/api/tags" >/dev/null 2>&1; then + body="$(curl -fsS --max-time 2 "http://localhost:${p}/api/tags" 2>/dev/null || true)" + if printf '%s' "$body" | grep -q '"models"'; then echo "rkllama already running on port ${p} — nothing to install" exit 0 fi diff --git a/tests/test_rkllama_installer.py b/tests/test_rkllama_installer.py index 00154c0a..527b1c44 100644 --- a/tests/test_rkllama_installer.py +++ b/tests/test_rkllama_installer.py @@ -158,7 +158,8 @@ async def test_success_when_tags_lists_model(self): @respx.mock @pytest.mark.asyncio - async def test_failure_when_model_absent_from_tags(self): + async def test_failure_when_model_absent_from_tags(self, monkeypatch): + monkeypatch.setattr(rkllama_installer.asyncio, "sleep", _no_sleep) respx.post("http://localhost:7833/api/pull").mock( return_value=httpx.Response(200, text='{"status":"success"}\n') ) @@ -167,7 +168,7 @@ async def test_failure_when_model_absent_from_tags(self): ) res = await self._installer().install("rkllama-x", {}, variant=_VARIANT) assert res["success"] is False - assert "not in" in res["error"] + assert "could not confirm" in res["error"] @respx.mock @pytest.mark.asyncio @@ -183,7 +184,41 @@ async def test_failure_when_tags_unreachable(self, monkeypatch): ) res = await self._installer().install("rkllama-x", {}, variant=_VARIANT) assert res["success"] is False - assert "could not be reached" in res["error"] + assert "could not confirm" in res["error"] + + @respx.mock + @pytest.mark.asyncio + async def test_failure_when_tags_returns_non_json(self, monkeypatch): + # A 200 with a non-JSON body must be treated as a failed check, not + # raise an uncaught JSONDecodeError out of install(). + monkeypatch.setattr(rkllama_installer.asyncio, "sleep", _no_sleep) + respx.post("http://localhost:7833/api/pull").mock( + return_value=httpx.Response(200, text='{"status":"success"}\n') + ) + respx.get("http://localhost:7833/api/tags").mock( + return_value=httpx.Response(200, text="nginx") + ) + res = await self._installer().install("rkllama-x", {}, variant=_VARIANT) + assert res["success"] is False + assert "could not confirm" in res["error"] + + @respx.mock + @pytest.mark.asyncio + async def test_retries_then_succeeds_when_model_appears_late(self, monkeypatch): + # Registration can lag the pull's 200; the verify loop retries on an + # absent model and succeeds once it appears. + monkeypatch.setattr(rkllama_installer.asyncio, "sleep", _no_sleep) + respx.post("http://localhost:7833/api/pull").mock( + return_value=httpx.Response(200, text='{"status":"success"}\n') + ) + respx.get("http://localhost:7833/api/tags").mock( + side_effect=[ + httpx.Response(200, json={"models": []}), + httpx.Response(200, json={"models": [{"name": "rkllama-x"}]}), + ] + ) + res = await self._installer().install("rkllama-x", {}, variant=_VARIANT) + assert res["success"] is True async def _no_sleep(*_a, **_k): diff --git a/tinyagentos/installers/rkllama_installer.py b/tinyagentos/installers/rkllama_installer.py index 739df0e0..5c0ee486 100644 --- a/tinyagentos/installers/rkllama_installer.py +++ b/tinyagentos/installers/rkllama_installer.py @@ -187,43 +187,50 @@ async def install( # Verify the model now appears in /api/tags so we know rkllama # successfully registered it. The pull returning 200 is necessary but # not sufficient: only /api/tags confirms the weight is loadable. We - # retry a few times to tolerate a transient blip, but if the check - # never succeeds we report failure rather than a false success -- a - # model the agent can't actually load is worse than a clear error. - last_exc: httpx.HTTPError | None = None + # retry a few times to tolerate a transient blip AND registration lag + # (the model can take a moment to appear after pull returns), but if the + # check never confirms the model we report failure rather than a false + # success -- a model the agent can't actually load is worse than a clear + # error. Malformed/unexpected /api/tags bodies are treated as a failed + # check, not allowed to raise out of the installer. + last_problem = "verification did not run" + verified = False for attempt in range(3): try: async with httpx.AsyncClient(timeout=10) as client: tags = await client.get(f"{self.rkllama_url}/api/tags") tags.raise_for_status() - names = {m.get("name") for m in tags.json().get("models", [])} - if app_id not in names: - return { - "success": False, - "error": ( - f"rkllama pull returned 200 but {app_id!r} is not in " - f"/api/tags. Known models: {sorted(names)[:5]}" - ), - } - break # verified - except httpx.HTTPError as exc: - last_exc = exc - logger.warning( - "rkllama install: /api/tags verification attempt %d/3 failed: %s", - attempt + 1, exc, + payload = tags.json() + except (httpx.HTTPError, ValueError) as exc: + # ValueError covers a 200 with a non-JSON body (json.JSONDecodeError). + last_problem = f"/api/tags unreachable or not JSON: {exc}" + else: + models = payload.get("models") if isinstance(payload, dict) else None + names = ( + {m.get("name") for m in models if isinstance(m, dict)} + if isinstance(models, list) + else set() ) - if attempt < 2: - await asyncio.sleep(1.0 * (attempt + 1)) - else: - # Exhausted retries without ever confirming the model. + if app_id in names: + verified = True + break + known = sorted(n for n in names if n)[:5] + last_problem = f"{app_id!r} not yet in /api/tags (known: {known})" + logger.warning( + "rkllama install: /api/tags verification attempt %d/3: %s", + attempt + 1, last_problem, + ) + if attempt < 2: + await asyncio.sleep(1.0 * (attempt + 1)) + + if not verified: return { "success": False, "error": ( - f"rkllama pull returned 200 but /api/tags could not be reached " - f"to confirm {app_id!r} installed: {last_exc}. Retry the install." + f"rkllama pull returned 200 but could not confirm {app_id!r} " + f"installed after 3 checks: {last_problem}. Retry the install." ), } - return {"success": True, "app_id": app_id, "model_name": app_id} async def uninstall(self, app_id: str) -> dict: From a578a87036b4c0ce7fd610e3dfdb8265998e7c5d Mon Sep 17 00:00:00 2001 From: jaylfc Date: Sun, 14 Jun 2026 13:17:13 +0100 Subject: [PATCH 08/11] fix(agent-image): per-item resilience in describe_image_capabilities One malformed backend or worker entry no longer drops the whole capability menu (CodeRabbit). Guard each backend/worker independently instead of wrapping the entire loop in a single try/except, and extract a shared _model_id helper for dict-or-str model entries. Refs #884 --- tests/test_cluster_tools.py | 22 +++++++++++++++++++ tinyagentos/tools/cluster_tools.py | 35 +++++++++++++++++++++++------- 2 files changed, 49 insertions(+), 8 deletions(-) diff --git a/tests/test_cluster_tools.py b/tests/test_cluster_tools.py index 08fb2e2e..7270cbd2 100644 --- a/tests/test_cluster_tools.py +++ b/tests/test_cluster_tools.py @@ -81,6 +81,28 @@ async def test_empty_state_is_safe(): assert res["tiers"][0]["image_backends"] == [] +class _BadBackend: + """A backend whose .models raises when iterated for ids.""" + name = "bad" + type = "sd-cpp" + lifecycle_state = "running" + + @property + def models(self): + raise RuntimeError("boom") + + +@pytest.mark.asyncio +async def test_one_malformed_backend_does_not_drop_the_rest(): + catalog = _Catalog([ + _BadBackend(), + _Backend("good", "sd-cpp", [{"id": "sdxl"}], "running"), + ]) + res = await execute_describe_image_capabilities({}, _req(catalog=catalog)) + names = [b["name"] for b in res["tiers"][0]["image_backends"]] + assert "good" in names # the healthy backend survives the bad one + + @pytest.mark.asyncio async def test_object_hardware_profile_is_json_safe(): """A real hardware_profile is an object with nested objects; the summary must diff --git a/tinyagentos/tools/cluster_tools.py b/tinyagentos/tools/cluster_tools.py index 442a0d89..2cf18292 100644 --- a/tinyagentos/tools/cluster_tools.py +++ b/tinyagentos/tools/cluster_tools.py @@ -49,21 +49,34 @@ def _hw_summary(hw) -> dict: return {k: _json_safe(getattr(hw, k)) for k in keys if getattr(hw, k, None) is not None} +def _model_id(m): + """Best-effort model identifier from a dict-or-str model entry.""" + if isinstance(m, dict): + return m.get("id") or m.get("name") + return m + + def _image_backends_from_catalog(catalog) -> list[dict]: out = [] if not catalog: return out try: - for be in catalog.backends_with_capability("image-generation"): + backends = catalog.backends_with_capability("image-generation") + except Exception: + return out + # Guard each backend independently: one malformed entry must not drop the + # whole capability list (the agent relies on this menu to pick a tier). + for be in backends or []: + try: out.append({ "name": be.name, "type": be.type, "tier": _TIER.get(be.type, "unknown"), "loaded": getattr(be, "lifecycle_state", "running") == "running", - "models": [m.get("id") or m.get("name") for m in (be.models or [])][:10], + "models": [_model_id(m) for m in (be.models or [])][:10], }) - except Exception: - pass + except Exception: + continue return out @@ -79,7 +92,7 @@ def _image_backends_from_worker(worker) -> list[dict]: "tier": _TIER.get(b.get("type"), "unknown"), # mirror the 'loaded' field local backends report; None = unknown "loaded": b.get("loaded") if "loaded" in b else (ls == "running" if ls else None), - "models": [m.get("id") or m.get("name") if isinstance(m, dict) else m for m in (b.get("models") or [])][:10], + "models": [_model_id(m) for m in (b.get("models") or [])][:10], }) return out @@ -94,7 +107,13 @@ async def execute_describe_image_capabilities(args: dict, request: Request) -> d cluster = getattr(state, "cluster_manager", None) if cluster is not None: try: - for w in cluster.get_workers(): + workers = cluster.get_workers() + except Exception: + workers = [] + # Guard each worker independently so one bad worker entry doesn't drop + # the rest of the cluster from the menu. + for w in workers or []: + try: if getattr(w, "status", "online") != "online": continue tiers.append({ @@ -102,8 +121,8 @@ async def execute_describe_image_capabilities(args: dict, request: Request) -> d "hardware": _hw_summary(getattr(w, "hardware", None)), "image_backends": _image_backends_from_worker(w), }) - except Exception: - pass + except Exception: + continue return { "tiers": tiers, "hint": "Pick a model on the tier that fits the task (npu = fast draft, gpu = best quality), then call generate_image with that model. The system loads/unloads and queues for you.", From ddeb1bec3939eb1e7da91548e9e387edea453d36 Mon Sep 17 00:00:00 2001 From: jaylfc Date: Sun, 14 Jun 2026 13:20:19 +0100 Subject: [PATCH 09/11] docs(agent): teach the agent good image-prompting technique The agent drives generate_image; result quality is mostly the prompt. Add agent-manual/10-image-prompting.md (compiled into taos-agent-manual.md): prompt structure (subject -> descriptors -> setting -> composition -> style -> lighting), be-specific/front-load/one-scene principles, negative_prompt for common defects, the tool's actual parameters (size/steps/guidance_scale/ seed/model with sensible ranges), model-family differences (FLUX sentences vs SDXL phrases, text-in-image caveat), and deliberate iteration. Also enrich the generate_image 'prompt' field description with a compact inline hint so guidance is present at call time. Refs #884 --- docs/agent-manual/10-image-prompting.md | 89 +++++++++++++++++++++++++ docs/agent-manual/index.md | 1 + docs/taos-agent-manual.md | 89 +++++++++++++++++++++++++ tinyagentos/tools/image_tool.py | 10 ++- 4 files changed, 188 insertions(+), 1 deletion(-) create mode 100644 docs/agent-manual/10-image-prompting.md diff --git a/docs/agent-manual/10-image-prompting.md b/docs/agent-manual/10-image-prompting.md new file mode 100644 index 00000000..5f7923dd --- /dev/null +++ b/docs/agent-manual/10-image-prompting.md @@ -0,0 +1,89 @@ + + +# Generating good images + +When you call `generate_image`, the quality of the result depends mostly on the +prompt. A vague prompt gives a generic image; a specific, well-ordered one gives +what the user actually asked for. Spend a sentence getting it right rather than +regenerating five times. + +## Structure a prompt + +Lead with the subject, then layer detail. A reliable order: + +1. **Subject** — what it is. "a small red sailboat", "a friendly cartoon fox". +2. **Descriptors** — appearance, colour, material, mood. "weathered wooden hull, + bright red sail". +3. **Setting / background** — where it is. "on a calm blue lake at sunrise". +4. **Composition** — framing and viewpoint. "wide shot, centred, low angle". +5. **Style** — the look. "watercolour children's book illustration", "flat vector + art", "photorealistic", "oil painting". Naming a concrete style matters more + than any other single word. +6. **Lighting / quality** — "soft warm light, gentle shadows, highly detailed". + +Example: `a friendly cartoon fox reading a book under a tree, autumn leaves, +warm soft light, watercolour children's book illustration, centred, highly detailed`. + +## Principles + +- **Be specific, not long.** Concrete nouns and adjectives beat a wall of vague + words. "golden retriever puppy on grass" beats "a nice cute lovely beautiful + amazing dog". +- **Front-load what matters.** Earlier words carry more weight. Put the subject + and the must-have details first. +- **One clear scene.** Don't pack several unrelated ideas into one prompt; the + model blends them into mush. Generate separate images instead. +- **Name the style explicitly.** If the user wants a storybook look, say + "children's book illustration" or "storybook watercolour". If they want a logo, + say "flat minimalist vector logo". +- **Match the user's intent.** Ask yourself what they pictured and describe that, + not a generic version of it. For a book cover, say "book cover, title space at + the top, central character". + +## Use negative_prompt to remove faults + +`negative_prompt` lists what to avoid (comma-separated). It is the fix for common +defects: + +- General cleanup: `blurry, low quality, jpeg artifacts, watermark, text, signature`. +- People/animals: add `deformed hands, extra fingers, extra limbs, mutated`. +- Keep a clean style: add `cluttered, busy background` if you want simplicity. + +Reach for it when a first result has a recurring flaw rather than rewriting the +whole prompt. + +## Parameters (what the tool exposes) + +- **size** — `256x256`, `384x384`, or `512x512`. Use 512x512 for the final + artwork; a smaller size is only worth it for a quick rough draft. +- **steps** — 1 to 8 (default 4). These backends are tuned for few-step + generation; 4 is a good balance, 6 to 8 for a bit more detail. More is not + always better here. +- **guidance_scale** — 1 to 20 (default 7.5). How strictly the image follows the + prompt. Lower (2 to 5) is looser and more artistic; higher (8 to 12) sticks to + the prompt harder. Raise it when the model ignores a detail you asked for; + lower it if results look over-baked or harsh. +- **seed** — omit for a fresh random image. To make small edits to an image the + user liked, reuse its returned `seed` and tweak the prompt so the composition + stays close. +- **model** — call `describe_image_capabilities` first and pick a model that fits + the task: a fast NPU draft model for iterating, a GPU model for the final cover. + Omit it to let the scheduler choose. + +## Picking a model by intent + +Different model families respond to prompts differently: + +- **FLUX-style models** follow natural-language sentences well and render text + reasonably. Write a full descriptive sentence. +- **SDXL-style models** respond well to comma-separated descriptive phrases and + strong style keywords. +- **Text in the image** (a title, a sign, a label) is unreliable on most models; + prefer a model noted for text if one is loaded, keep the text very short, and + put it in quotes, e.g. `a poster with the title "Brave Little Fox"`. + +## Iterate deliberately + +If the first image is close but not right, change one thing at a time: adjust the +style word, add a missing detail, or add a negative term for the defect, keeping +the same seed. Tell the user what you changed so they can steer. diff --git a/docs/agent-manual/index.md b/docs/agent-manual/index.md index b756db5f..7b44fa3a 100644 --- a/docs/agent-manual/index.md +++ b/docs/agent-manual/index.md @@ -18,3 +18,4 @@ Run `python3 scripts/build-agent-manual.py` to compile these into `docs/taos-age | `07-after-update.md` | Breakage-log-first troubleshooting for post-update reports | | `08-answer-templates.md` | Canned answer shapes for common questions | | `09-os-control.md` | Driving the desktop: open_app / arrange_windows tools | +| `10-image-prompting.md` | Writing good prompts for the generate_image tool | diff --git a/docs/taos-agent-manual.md b/docs/taos-agent-manual.md index 942e9245..5fba10db 100644 --- a/docs/taos-agent-manual.md +++ b/docs/taos-agent-manual.md @@ -179,3 +179,92 @@ that app's own tools and your other skills. Keep it purposeful: open what you need, don't rearrange the user's windows without reason, and tell the user what you're doing as you do it. +--- + +# Generating good images + +When you call `generate_image`, the quality of the result depends mostly on the +prompt. A vague prompt gives a generic image; a specific, well-ordered one gives +what the user actually asked for. Spend a sentence getting it right rather than +regenerating five times. + +## Structure a prompt + +Lead with the subject, then layer detail. A reliable order: + +1. **Subject** — what it is. "a small red sailboat", "a friendly cartoon fox". +2. **Descriptors** — appearance, colour, material, mood. "weathered wooden hull, + bright red sail". +3. **Setting / background** — where it is. "on a calm blue lake at sunrise". +4. **Composition** — framing and viewpoint. "wide shot, centred, low angle". +5. **Style** — the look. "watercolour children's book illustration", "flat vector + art", "photorealistic", "oil painting". Naming a concrete style matters more + than any other single word. +6. **Lighting / quality** — "soft warm light, gentle shadows, highly detailed". + +Example: `a friendly cartoon fox reading a book under a tree, autumn leaves, +warm soft light, watercolour children's book illustration, centred, highly detailed`. + +## Principles + +- **Be specific, not long.** Concrete nouns and adjectives beat a wall of vague + words. "golden retriever puppy on grass" beats "a nice cute lovely beautiful + amazing dog". +- **Front-load what matters.** Earlier words carry more weight. Put the subject + and the must-have details first. +- **One clear scene.** Don't pack several unrelated ideas into one prompt; the + model blends them into mush. Generate separate images instead. +- **Name the style explicitly.** If the user wants a storybook look, say + "children's book illustration" or "storybook watercolour". If they want a logo, + say "flat minimalist vector logo". +- **Match the user's intent.** Ask yourself what they pictured and describe that, + not a generic version of it. For a book cover, say "book cover, title space at + the top, central character". + +## Use negative_prompt to remove faults + +`negative_prompt` lists what to avoid (comma-separated). It is the fix for common +defects: + +- General cleanup: `blurry, low quality, jpeg artifacts, watermark, text, signature`. +- People/animals: add `deformed hands, extra fingers, extra limbs, mutated`. +- Keep a clean style: add `cluttered, busy background` if you want simplicity. + +Reach for it when a first result has a recurring flaw rather than rewriting the +whole prompt. + +## Parameters (what the tool exposes) + +- **size** — `256x256`, `384x384`, or `512x512`. Use 512x512 for the final + artwork; a smaller size is only worth it for a quick rough draft. +- **steps** — 1 to 8 (default 4). These backends are tuned for few-step + generation; 4 is a good balance, 6 to 8 for a bit more detail. More is not + always better here. +- **guidance_scale** — 1 to 20 (default 7.5). How strictly the image follows the + prompt. Lower (2 to 5) is looser and more artistic; higher (8 to 12) sticks to + the prompt harder. Raise it when the model ignores a detail you asked for; + lower it if results look over-baked or harsh. +- **seed** — omit for a fresh random image. To make small edits to an image the + user liked, reuse its returned `seed` and tweak the prompt so the composition + stays close. +- **model** — call `describe_image_capabilities` first and pick a model that fits + the task: a fast NPU draft model for iterating, a GPU model for the final cover. + Omit it to let the scheduler choose. + +## Picking a model by intent + +Different model families respond to prompts differently: + +- **FLUX-style models** follow natural-language sentences well and render text + reasonably. Write a full descriptive sentence. +- **SDXL-style models** respond well to comma-separated descriptive phrases and + strong style keywords. +- **Text in the image** (a title, a sign, a label) is unreliable on most models; + prefer a model noted for text if one is loaded, keep the text very short, and + put it in quotes, e.g. `a poster with the title "Brave Little Fox"`. + +## Iterate deliberately + +If the first image is close but not right, change one thing at a time: adjust the +style word, add a missing detail, or add a negative term for the defect, keeping +the same seed. Tell the user what you changed so they can steer. diff --git a/tinyagentos/tools/image_tool.py b/tinyagentos/tools/image_tool.py index 1ac91392..2264bba3 100644 --- a/tinyagentos/tools/image_tool.py +++ b/tinyagentos/tools/image_tool.py @@ -10,7 +10,15 @@ "properties": { "prompt": { "type": "string", - "description": "Text description of the image to generate", + "description": ( + "Text description of the image. Lead with the subject, then " + "layer descriptors, setting, composition, and an explicit style " + "(e.g. 'children's book watercolour', 'flat vector', " + "'photorealistic'). Be specific, not long; front-load what " + "matters; keep to one clear scene. Example: 'a friendly cartoon " + "fox reading under a tree, autumn leaves, warm soft light, " + "watercolour children's book illustration, centred'." + ), }, "size": { "type": "string", From f4b3ce9f09a84b99dc5ca986c78de130e25f2c84 Mon Sep 17 00:00:00 2001 From: jaylfc Date: Sun, 14 Jun 2026 13:30:21 +0100 Subject: [PATCH 10/11] docs(status): pause for a fresh session -- handoff block + 3060 GO - add a SESSION PAUSED 'NEW SESSION START HERE' block: branch tips, the two in-flight PRs (#884 image-gen tip ddeb1bec, #886 rkllama #844 tip d6960af0, both baking on tests only), the remaining minor non-blocking bot nits with my assessment, the merge gate, and re-arm reminders - record the 3060 SD-backend GO from @taOSmd (task #34 unblocked; do after #884) --- docs/STATUS.md | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/docs/STATUS.md b/docs/STATUS.md index 5ef63805..7bcaca9c 100644 --- a/docs/STATUS.md +++ b/docs/STATUS.md @@ -1,5 +1,16 @@ SINGLE SOURCE OF TRUTH for cross-agent handoff. -Last updated: 2026-06-14 ~13:05 BST, @taOS (active). +Last updated: 2026-06-14 ~13:30 BST, @taOS (PAUSED for a fresh session). + +▶▶ SESSION PAUSED 2026-06-14 ~13:30 BST (Jay asked to pause + update handoff). NEW SESSION START HERE: + - master=51837bed, dev=118409a5. Working tree clean. NO uncommitted work anywhere. + - TWO PRs IN FLIGHT, both baking (only `test (3.12/3.13)` pending; lint/spa-build/Gitar/Kilo/CodeRabbit all green on the last bake): + • PR #884 feat(agent) agent-controlled image generation. Branch feat/agent-image-gen, tip ddeb1bec. Commits this session: 443e70ff canvas wiring + e94de444 describe_image_capabilities + 165e0b83/10f4732c/a578a870 bot-review hardening + ddeb1bec image-prompting manual. WHEN GREEN: merge to dev, then DEPLOY Pi and drive the storybook flow. + • PR #886 fix(store) rkllama install entry (#844). Branch fix/rkllama-store-install, tip d6960af0 (cleanly off origin/dev, 3 code files + tests + manual). WHEN GREEN: merge to dev, then dev->master (Jay wanted #844 fixed for the target audience). + - REMAINING BOT NITS on both PRs are MINOR + non-blocking (judged, not yet actioned, left for your call): #884 kilo wants _image_backends_from_worker hardened per-entry (worker-level guard already contains it; symmetric 1-line isinstance guard would fully satisfy). #886 kilo flags the install-rkllama.sh `"models"` short-circuit on `{"models":[]}` (that is CORRECT: an empty-but-running rkllama IS installed; models are a separate concern) and non-string model names in verify (can't false-match a string app_id, safe). Decide per-nit; none block merge. + - MERGE GATE (handoff 0f): green CI + Kilo + Qodo + Gitar + author. CodeRabbit is legacy/rate-limited, do not block on it. Check INLINE bot comments, not just the check summary. + - Tasks #30 (rkllama/#844, in_progress -> close when #886 merges) and #35 (NEW: ~19 other catalog manifests reference missing install scripts; separate follow-up) capture the store-install debt. + - 3060 SD BACKEND UNBLOCKED (task #34): @taOSmd relayed Jay's GO 2026-06-14 ~12:30 -- the Fedora RTX 3060 window is OURS to install the SD backend ourselves (stable-diffusion.cpp or ComfyUI, our pick); @taOSmd manages nothing outside taOSmd, so we own the SD backend + its model + pointing the controller's image_backend_url at it. Do this AFTER #884 merges so the storybook image step has a real GPU backend. Box access = resolve the Fedora node via our own tailscale (NEVER commit the IP / put it on the bus). + - Re-arm on arrival: freshness cron (:08/:38), A2A SSE monitor, repo-watch (:23). Resume pair for the 15:40Z window is armed (primary 16:42, retry 17:01 local). ▶ RELEASED TO MASTER 2026-06-14 (#883, master=c9c5b0c9, Jay asked "merge dev to main so all users get updates"): the whole overnight body of work is now on master — agent OS control framework (#877-882), macOS-dark theme + purple purge (#879), App Store/real-desktop/Agents/chat redesigns, mobile chat #880 + chat-pwa theme #881. Merge-commit (history preserved), dev NOT deleted. master strict-mode + behind required an admin merge. From daa78301a94dd431596edeb39f603813688d205d Mon Sep 17 00:00:00 2001 From: jaylfc Date: Sun, 14 Jun 2026 13:34:38 +0100 Subject: [PATCH 11/11] docs(status): #884 and #886 now green + ready to merge (held for fresh session) --- docs/STATUS.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/STATUS.md b/docs/STATUS.md index 7bcaca9c..5f31a8ce 100644 --- a/docs/STATUS.md +++ b/docs/STATUS.md @@ -3,7 +3,7 @@ Last updated: 2026-06-14 ~13:30 BST, @taOS (PAUSED for a fresh session). ▶▶ SESSION PAUSED 2026-06-14 ~13:30 BST (Jay asked to pause + update handoff). NEW SESSION START HERE: - master=51837bed, dev=118409a5. Working tree clean. NO uncommitted work anywhere. - - TWO PRs IN FLIGHT, both baking (only `test (3.12/3.13)` pending; lint/spa-build/Gitar/Kilo/CodeRabbit all green on the last bake): + - TWO PRs IN FLIGHT, both now CLEAN + FULLY GREEN as of ~13:35 BST (all checks + Gitar/Kilo/CodeRabbit SUCCESS) -- READY TO MERGE, left for the fresh session per the pause: • PR #884 feat(agent) agent-controlled image generation. Branch feat/agent-image-gen, tip ddeb1bec. Commits this session: 443e70ff canvas wiring + e94de444 describe_image_capabilities + 165e0b83/10f4732c/a578a870 bot-review hardening + ddeb1bec image-prompting manual. WHEN GREEN: merge to dev, then DEPLOY Pi and drive the storybook flow. • PR #886 fix(store) rkllama install entry (#844). Branch fix/rkllama-store-install, tip d6960af0 (cleanly off origin/dev, 3 code files + tests + manual). WHEN GREEN: merge to dev, then dev->master (Jay wanted #844 fixed for the target audience). - REMAINING BOT NITS on both PRs are MINOR + non-blocking (judged, not yet actioned, left for your call): #884 kilo wants _image_backends_from_worker hardened per-entry (worker-level guard already contains it; symmetric 1-line isinstance guard would fully satisfy). #886 kilo flags the install-rkllama.sh `"models"` short-circuit on `{"models":[]}` (that is CORRECT: an empty-but-running rkllama IS installed; models are a separate concern) and non-string model names in verify (can't false-match a string app_id, safe). Decide per-nit; none block merge.