runpod · deanq · Feb 20, 2026 · Feb 19, 2026 · Feb 19, 2026 · Feb 19, 2026
diff --git a/.github/workflows/quality.yml b/.github/workflows/quality.yml
@@ -30,11 +30,5 @@ jobs:
       - name: Install dependencies
         run: make setup
 
-      - name: Check dependency sync
-        run: |
-          echo "::group::Dependency sync check"
-          uv run python scripts/sync_example_deps.py --check
-          echo "::endgroup::"
-
       - name: Run quality checks
         run: make ci-quality-github
diff --git a/.github/workflows/test-examples.yml b/.github/workflows/test-examples.yml
@@ -38,4 +38,4 @@ jobs:
         run: make venv-info
 
       - name: Run quality checks
-        run: make quality-check-strict
+        run: make quality-check
diff --git a/01_getting_started/01_hello_world/.env.example b/01_getting_started/01_hello_world/.env.example
diff --git a/01_getting_started/01_hello_world/.gitignore b/01_getting_started/01_hello_world/.gitignore
@@ -42,3 +42,4 @@ uv.lock
 # OS
 .DS_Store
 Thumbs.db
+.flash/
diff --git a/01_getting_started/01_hello_world/README.md b/01_getting_started/01_hello_world/README.md
@@ -26,21 +26,19 @@ Get your API key from [Runpod Settings](https://www.runpod.io/console/user/setti
 flash run
 ```
 
-Server starts at **http://localhost:8000**
+Server starts at **http://localhost:8888**
 
 ### 4. Test the API
 
-```bash
-# Health check
-curl http://localhost:8000/ping
+Visit **http://localhost:8888/docs** for interactive API documentation. QB endpoints are auto-generated by `flash run` based on your `@remote` functions.
 
-# GPU worker
-curl -X POST http://localhost:8000/gpu/hello \
+```bash
+curl -X POST http://localhost:8888/gpu_worker/run_sync \
   -H "Content-Type: application/json" \
   -d '{"message": "Hello GPU!"}'
 ```
 
-Visit **http://localhost:8000/docs** for interactive API documentation.
+Visit **http://localhost:8888/docs** for interactive API documentation.
 
 ### Full CLI Documentation
 
@@ -67,7 +65,9 @@ The worker demonstrates:
 
 ## API Endpoints
 
-### POST /gpu/hello
+QB (queue-based) endpoints are auto-generated from `@remote` functions. Visit `/docs` for the full API schema.
+
+### `gpu_hello`
 
 Executes a simple GPU worker and returns system/GPU information.
 
@@ -100,9 +100,7 @@ Executes a simple GPU worker and returns system/GPU information.
 
 ```
 01_hello_world/
-├── main.py              # FastAPI application
 ├── gpu_worker.py        # GPU worker with @remote decorator
-├── mothership.py        # Mothership endpoint configuration
 ├── pyproject.toml       # Project metadata
 ├── requirements.txt     # Dependencies
 ├── .env.example         # Environment variables template

diff --git a/01_getting_started/01_hello_world/__init__.py b/01_getting_started/01_hello_world/__init__.py
diff --git a/01_getting_started/01_hello_world/gpu_worker.py b/01_getting_started/01_hello_world/gpu_worker.py
@@ -1,46 +1,20 @@
-## Hello world: GPU serverless workers
-# In this part of the example code, we provision a GPU-based worker and have it
-# execute code. We can run the worker directly, or have it handle API requests
-# to the router function. It's registered to a subrouter in the __init__.py
-# file in this folder, and subsequently imported by main.py and attached to the
-# FastAPI app there.
+# GPU serverless worker -- detects available GPU hardware.
+# Run with: flash run
+# Test directly: python gpu_worker.py
+from runpod_flash import GpuGroup, LiveServerless, remote
 
-# Scaling behavior is controlled by configuration passed to the
-# `LiveServerless` class.
-from fastapi import APIRouter
-from pydantic import BaseModel
-from runpod_flash import (
-    GpuGroup,
-    LiveServerless,
-    remote,
-)
-
-# Here, we'll define several variables that change the
-# default behavior of our serverless endpoint. `workersMin` sets our endpoint
-# to scale to 0 active containers; `workersMax` will allow our endpoint to run
-# up to 3 workers in parallel as the endpoint receives more work. We also set
-# an idle timeout of 5 minutes so that any active worker stays alive for 5
-# minutes after completing a request.
 gpu_config = LiveServerless(
     name="01_01_gpu_worker",
-    gpus=[GpuGroup.ANY],  # Run on any GPU
+    gpus=[GpuGroup.ANY],
     workersMin=0,
     workersMax=3,
     idleTimeout=5,
 )
 
 
-# Decorating our function with `remote` will package up the function code and
-# deploy it on the infrastructure according to the passed input config. The
-# results from the worker will be returned to your terminal. In this example
-# the function will return a greeting to the input string passed in the `name`
-# key. The code itself will run on a GPU worker, and information about the GPU
-# the worker has access to will be included in the response.
 @remote(resource_config=gpu_config)
-async def gpu_hello(
-    input_data: dict,
-) -> dict:
-    """Simple GPU worker example with GPU detection."""
+async def gpu_hello(input_data: dict) -> dict:
+    """Simple GPU worker that returns GPU hardware info."""
     import platform
     from datetime import datetime
 
@@ -51,10 +25,7 @@ async def gpu_hello(
     gpu_count = torch.cuda.device_count()
     gpu_memory = torch.cuda.get_device_properties(0).total_memory / (1024**3)
 
-    message = input_data.get(
-        "message",
-        "Hello from GPU worker!",
-    )
+    message = input_data.get("message", "Hello from GPU worker!")
 
     return {
         "status": "success",
@@ -64,40 +35,14 @@ async def gpu_hello(
             "available": gpu_available,
             "name": gpu_name,
             "count": gpu_count,
-            "memory_gb": round(
-                gpu_memory,
-                2,
-            ),
+            "memory_gb": round(gpu_memory, 2),
         },
         "timestamp": datetime.now().isoformat(),
         "platform": platform.system(),
         "python_version": platform.python_version(),
     }
 
 
-# We define a subrouter for our gpu worker so that our main router in `main.py`
-# can attach it for routing gpu-specific requests.
-gpu_router = APIRouter()
-
-
-class MessageRequest(BaseModel):
-    """Request model for GPU worker."""
-
-    message: str = "Hello from GPU!"
-
-
-@gpu_router.post("/hello")
-async def hello(
-    request: MessageRequest,
-):
-    """Simple GPU worker endpoint."""
-    result = await gpu_hello({"message": request.message})
-    return result
-
-
-# This code is packaged up as a "worker" that will handle requests sent to the
-# endpoint at /gpu/hello, but you can also trigger it directly by running
-# python -m workers.gpu.endpoint
 if __name__ == "__main__":
     import asyncio
 

diff --git a/01_getting_started/01_hello_world/main.py b/01_getting_started/01_hello_world/main.py
diff --git a/01_getting_started/01_hello_world/mothership.py b/01_getting_started/01_hello_world/mothership.py
diff --git a/01_getting_started/01_hello_world/requirements.txt b/01_getting_started/01_hello_world/requirements.txt
diff --git a/01_getting_started/02_cpu_worker/.env.example b/01_getting_started/02_cpu_worker/.env.example
diff --git a/01_getting_started/02_cpu_worker/README.md b/01_getting_started/02_cpu_worker/README.md
@@ -26,22 +26,18 @@ Get your API key from [Runpod Settings](https://www.runpod.io/console/user/setti
 flash run
 ```
 
-Server starts at **http://localhost:8000**
+Server starts at **http://localhost:8888**
 
 ### 4. Test the API
 
-```bash
-# Health check
-curl http://localhost:8000/ping
+Visit **http://localhost:8888/docs** for interactive API documentation. QB endpoints are auto-generated by `flash run` based on your `@remote` functions.
 
-# CPU worker
-curl -X POST http://localhost:8000/cpu/hello \
+```bash
+curl -X POST http://localhost:8888/cpu_worker/run_sync \
   -H "Content-Type: application/json" \
   -d '{"name": "Flash User"}'
 ```
 
-Visit **http://localhost:8000/docs** for interactive API documentation.
-
 ### Full CLI Documentation
 
 For complete CLI usage including deployment, environment management, and troubleshooting:
@@ -67,7 +63,9 @@ The worker demonstrates:
 
 ## API Endpoints
 
-### POST /cpu/hello
+QB (queue-based) endpoints are auto-generated from `@remote` functions. Visit `/docs` for the full API schema.
+
+### `cpu_hello`
 
 Executes a simple CPU worker and returns a greeting with system information.
 
@@ -94,9 +92,7 @@ Executes a simple CPU worker and returns a greeting with system information.
 
 ```
 02_cpu_worker/
-├── main.py              # FastAPI application
 ├── cpu_worker.py        # CPU worker with @remote decorator
-├── mothership.py        # Mothership endpoint configuration
 ├── pyproject.toml       # Project metadata
 ├── requirements.txt     # Dependencies
 ├── .env.example         # Environment variables template

diff --git a/01_getting_started/02_cpu_worker/__init__.py b/01_getting_started/02_cpu_worker/__init__.py
-Original file line number
+Diff line change
@@ Expand Up / @@ -42,3 +42,4 @@ uv.lock @@
     # OS
     .DS_Store
     Thumbs.db
+    .flash/