Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion 01_getting_started/01_hello_world/__init__.py

This file was deleted.

73 changes: 9 additions & 64 deletions 01_getting_started/01_hello_world/gpu_worker.py
Original file line number Diff line number Diff line change
@@ -1,46 +1,20 @@
## Hello world: GPU serverless workers
# In this part of the example code, we provision a GPU-based worker and have it
# execute code. We can run the worker directly, or have it handle API requests
# to the router function. It's registered to a subrouter in the __init__.py
# file in this folder, and subsequently imported by main.py and attached to the
# FastAPI app there.
# GPU serverless worker -- detects available GPU hardware.
# Run with: flash run
# Test directly: python gpu_worker.py
from runpod_flash import GpuGroup, LiveServerless, remote

# Scaling behavior is controlled by configuration passed to the
# `LiveServerless` class.
from fastapi import APIRouter
from pydantic import BaseModel
from runpod_flash import (
GpuGroup,
LiveServerless,
remote,
)

# Here, we'll define several variables that change the
# default behavior of our serverless endpoint. `workersMin` sets our endpoint
# to scale to 0 active containers; `workersMax` will allow our endpoint to run
# up to 3 workers in parallel as the endpoint receives more work. We also set
# an idle timeout of 5 minutes so that any active worker stays alive for 5
# minutes after completing a request.
gpu_config = LiveServerless(
name="01_01_gpu_worker",
gpus=[GpuGroup.ANY], # Run on any GPU
gpus=[GpuGroup.ANY],
workersMin=0,
workersMax=3,
idleTimeout=5,
)


# Decorating our function with `remote` will package up the function code and
# deploy it on the infrastructure according to the passed input config. The
# results from the worker will be returned to your terminal. In this example
# the function will return a greeting to the input string passed in the `name`
# key. The code itself will run on a GPU worker, and information about the GPU
# the worker has access to will be included in the response.
@remote(resource_config=gpu_config)
async def gpu_hello(
input_data: dict,
) -> dict:
"""Simple GPU worker example with GPU detection."""
async def gpu_hello(input_data: dict) -> dict:
"""Simple GPU worker that returns GPU hardware info."""
import platform
from datetime import datetime

Expand All @@ -51,10 +25,7 @@ async def gpu_hello(
gpu_count = torch.cuda.device_count()
gpu_memory = torch.cuda.get_device_properties(0).total_memory / (1024**3)

message = input_data.get(
"message",
"Hello from GPU worker!",
)
message = input_data.get("message", "Hello from GPU worker!")

return {
"status": "success",
Expand All @@ -64,40 +35,14 @@ async def gpu_hello(
"available": gpu_available,
"name": gpu_name,
"count": gpu_count,
"memory_gb": round(
gpu_memory,
2,
),
"memory_gb": round(gpu_memory, 2),
},
"timestamp": datetime.now().isoformat(),
"platform": platform.system(),
"python_version": platform.python_version(),
}


# We define a subrouter for our gpu worker so that our main router in `main.py`
# can attach it for routing gpu-specific requests.
gpu_router = APIRouter()


class MessageRequest(BaseModel):
"""Request model for GPU worker."""

message: str = "Hello from GPU!"


@gpu_router.post("/hello")
async def hello(
request: MessageRequest,
):
"""Simple GPU worker endpoint."""
result = await gpu_hello({"message": request.message})
return result


# This code is packaged up as a "worker" that will handle requests sent to the
# endpoint at /gpu/hello, but you can also trigger it directly by running
# python -m workers.gpu.endpoint
if __name__ == "__main__":
import asyncio

Expand Down
71 changes: 0 additions & 71 deletions 01_getting_started/01_hello_world/main.py

This file was deleted.

53 changes: 0 additions & 53 deletions 01_getting_started/01_hello_world/mothership.py

This file was deleted.

1 change: 0 additions & 1 deletion 01_getting_started/02_cpu_worker/__init__.py

This file was deleted.

43 changes: 6 additions & 37 deletions 01_getting_started/02_cpu_worker/cpu_worker.py
Original file line number Diff line number Diff line change
@@ -1,33 +1,20 @@
## Hello world: CPU serverless workers
# In this part of the example code, we provision a CPU-based worker (no GPUs) and have it execute code.
# We can run the worker directly, or have it handle API requests to the router function.
# It's registered to a subrouter in the __init__.py file in this folder, and subsequently
# imported by main.py and attached to the FastAPI app there.
from fastapi import APIRouter
from pydantic import BaseModel
# CPU serverless worker -- lightweight processing without GPU.
# Run with: flash run
# Test directly: python cpu_worker.py
from runpod_flash import CpuInstanceType, CpuLiveServerless, remote

# Scaling behavior is controlled by configuration passed to the `CpuLiveServerless` class.
# Here, we'll define several variables that change the default behavior of our serverless endpoint.
# `workersMin` sets our endpoint to scale to 0 active containers; `workersMax` will allow our endpoint
# to run up to 5 workers in parallel as the endpoint receives more work.
# We also set an idle timeout of 5 minutes so that any active worker stays alive for 5 minutes after completing a request.
cpu_config = CpuLiveServerless(
name="01_02_cpu_worker",
instanceIds=[CpuInstanceType.CPU3C_1_2],
workersMin=0, # Scale to zero
workersMin=0,
workersMax=3,
idleTimeout=5, # Leave workers alive for 5 minutes after they serve a request
idleTimeout=5,
)


# Decorating our function with `remote` will package up the function code and deploy it on the infrastructure
# according to the passed input config.
# In this example the function will return a greeting to the input string passed in the `name` key.
# The results are displayed in your terminal, but the work was performed by CPU workers on runpod infra.
@remote(resource_config=cpu_config)
async def cpu_hello(input_data: dict) -> dict:
"""Simple CPU worker example."""
"""Simple CPU worker that returns a greeting."""
import platform
from datetime import datetime

Expand All @@ -43,24 +30,6 @@ async def cpu_hello(input_data: dict) -> dict:
}


cpu_router = APIRouter()


class MessageRequest(BaseModel):
"""Request model for CPU worker."""

name: str = "Flash expert"


@cpu_router.post("/hello")
async def hello(request: MessageRequest):
"""Simple CPU worker endpoint."""
result = await cpu_hello({"message": request.name})
return result


# This code is packaged up as a "worker" that will handle requests sent to the endpoint at
# /cpu/hello, but you can also trigger it locally by running python -m workers.cpu.endpoint.
if __name__ == "__main__":
import asyncio

Expand Down
60 changes: 0 additions & 60 deletions 01_getting_started/02_cpu_worker/main.py

This file was deleted.

Loading