From f502c2cd404e1e64808a7e1e1272aec93fc34cc9 Mon Sep 17 00:00:00 2001 From: Reiase Date: Sun, 1 Mar 2026 01:09:25 +0800 Subject: [PATCH 1/5] Refactor HTTP/2 transport to use builder pattern for remote transport creation - Updated `Http2RemoteTransport` to utilize a builder pattern, enhancing flexibility in specifying transport targets (by name, ID, or hierarchical path). - Introduced `TransportTarget` enum to clearly define the target type for remote actors. - Refactored related code in `ActorSystem` to align with the new transport creation method, improving readability and maintainability. - Enhanced tests to validate the new builder functionality and ensure correct behavior across various transport scenarios. --- CONTRIBUTING.md | 202 +++--- README.md | 32 +- README.zh.md | 32 +- crates/pulsing-actor/src/system/resolve.rs | 25 +- .../pulsing-actor/src/transport/http2/mod.rs | 161 +++-- crates/pulsing-actor/src/transport/mod.rs | 5 +- .../tests/unit/transport/client_tests.rs | 15 +- crates/pulsing-py/src/actor.rs | 50 -- crates/pulsing-py/src/errors.rs | 168 ++--- .../pulsing-py/src/python_error_converter.rs | 134 ++-- pyproject.toml | 2 +- python/pulsing/core/__init__.py | 4 +- python/pulsing/core/helpers.py | 133 +++- python/pulsing/core/remote.py | 633 +++++------------- .../pulsing/integrations/autogen/runtime.py | 2 +- python/pulsing/integrations/ray_compat.py | 2 +- python/pulsing/serving/load_stream.py | 42 +- python/pulsing/serving/router.py | 247 ++++--- python/pulsing/serving/scheduler.py | 15 +- python/pulsing/streaming/__init__.py | 10 +- python/pulsing/streaming/backend.py | 90 ++- python/pulsing/streaming/broker.py | 289 ++++---- python/pulsing/streaming/manager.py | 172 ++--- python/pulsing/streaming/pubsub.py | 34 +- python/pulsing/streaming/queue.py | 46 +- python/pulsing/streaming/storage.py | 94 +-- python/pulsing/streaming/sync_queue.py | 23 +- .../python/apis/actor/test_actor_behavior.py | 12 +- .../actor_system/test_actor_system_api.py | 6 +- tests/python/core/test_helpers.py | 19 +- tests/python/core/test_helpers_coverage.py | 177 +++++ tests/python/core/test_init_coverage.py | 180 +++++ tests/python/core/test_remote_edge_cases.py | 251 +++++++ tests/python/core/test_remote_system_ops.py | 331 +++++++++ tests/python/core/test_remote_unit.py | 509 ++++++++++++++ tests/python/streaming/test_queue.py | 23 +- tests/python/streaming/test_queue_backends.py | 23 +- tests/python/test_actor_list.py | 10 +- tests/python/test_exceptions.py | 192 ++++++ tests/python/test_resolve_as_any.py | 6 +- tests/python/test_sealed_message.py | 77 +-- tests/python/test_system_actor.py | 2 +- 42 files changed, 2928 insertions(+), 1552 deletions(-) create mode 100644 tests/python/core/test_helpers_coverage.py create mode 100644 tests/python/core/test_init_coverage.py create mode 100644 tests/python/core/test_remote_system_ops.py create mode 100644 tests/python/core/test_remote_unit.py create mode 100644 tests/python/test_exceptions.py diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 976e182bd..4a16e21c4 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -1,139 +1,169 @@ # Contributing to Pulsing -感谢你对 Pulsing 的兴趣!我们欢迎各种形式的贡献。 +感谢你对 Pulsing 的贡献兴趣!本文档介绍如何搭建开发环境、运行测试以及提交代码。 -## 开发环境设置 +## 前置要求 -### 前置要求 +| 工具 | 版本 | 安装方式 | +|------|------|----------| +| Rust | ≥ 1.75 | `curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs \| sh` | +| Python | ≥ 3.10 | [python.org](https://python.org) 或 `uv python install 3.11` | +| uv | 最新 | `curl -LsSf https://astral.sh/uv/install.sh \| sh` | +| just | 最新 | `cargo install just` 或 `brew install just` | -- Rust 1.75+ -- Python 3.10+ -- maturin (`pip install maturin`) - -### 构建项目 +## 快速开始(三步) ```bash -# 克隆仓库 -git clone https://github.com/DeepLink-org/Pulsing.git -cd pulsing - -# 安装 Python 依赖 -pip install -e . -# 或使用 maturin -maturin develop - -# 运行测试 -cargo test -pytest tests/ -``` +# 1. 克隆仓库 +git clone https://github.com/DeepLink-org/Pulsing.git && cd Pulsing -## 贡献流程 +# 2. 创建并激活 Python 虚拟环境,安装开发依赖 +uv sync --extra dev -### 1. 创建 Issue - -在开始工作之前,请先创建一个 Issue 讨论你想要做的改动。这有助于避免重复工作并确保你的贡献与项目方向一致。 +# 3. 编译 Rust 核心并安装到当前环境 +uv run maturin develop +``` -### 2. Fork 和 Clone +完成后可以验证安装: ```bash -git clone https://github.com/YOUR_USERNAME/pulsing.git -cd pulsing -git remote add upstream https://github.com/DeepLink-org/Pulsing.git +uv run python -c "import pulsing; print(pulsing.__version__)" ``` -### 3. 创建分支 +## 常用开发命令 + +项目使用 [just](https://github.com/casey/just) 作为任务运行器,所有常用命令都在 `Justfile` 中定义。 ```bash -git checkout -b feature/your-feature-name -# 或 -git checkout -b fix/your-fix-name +just dev # 编译并安装(开发模式,等同于 maturin develop) +just test # 运行全部测试(Rust + Python) +just test-python # 仅运行 Python 测试 +just test-rust # 仅运行 Rust 测试 +just fmt # 格式化代码(Rust + Python) +just lint # 代码检查 +just check # 提交前完整检查(格式 + lint + 测试) +just cov # 生成覆盖率报告 +just clean # 清理构建产物 ``` -### 4. 开发 +> **提示**:提交代码前请运行 `just check`,确保所有检查通过。 -- 遵循现有的代码风格 -- 添加必要的测试 -- 更新相关文档 +## 项目结构 -### 5. 提交 +``` +Pulsing/ +├── crates/ +│ ├── pulsing-actor/ # Rust 核心:Actor、Cluster、Transport +│ └── pulsing-py/ # PyO3 绑定:将 Rust 类型暴露给 Python +├── python/pulsing/ # Python 包 +│ ├── core/ # @remote 装饰器、ActorProxy +│ ├── serving/ # LLM 服务路由、调度 +│ ├── streaming/ # 分布式队列与发布/订阅 +│ ├── agent/ # Agent 运行时工具 +│ └── integrations/ # Ray / AutoGen / LangGraph 集成 +├── tests/python/ # Python 测试 +├── examples/ # 示例代码 +└── docs/ # 文档(MkDocs) +``` -我们使用 [Conventional Commits](https://www.conventionalcommits.org/) 规范: +## 开发工作流 +### 修改 Python 代码 + +Python 代码无需重新编译,修改后直接运行测试: + +```bash +just test-python +# 或者运行单个文件 +uv run pytest tests/python/test_remote_decorator.py -v ``` -feat: 添加新功能 -fix: 修复 bug -docs: 更新文档 -test: 添加或修改测试 -refactor: 代码重构 -chore: 构建过程或辅助工具的变动 -``` -示例: +### 修改 Rust 代码 + +修改 Rust 代码后需要重新编译: + ```bash -git commit -m "feat: add streaming support to ActorRef" -git commit -m "fix: resolve memory leak in mailbox" -git commit -m "docs: update README with new examples" +just dev +just test ``` -### 6. 提交 PR +### 添加新特性 -- 确保所有测试通过 -- 确保代码格式正确 (`cargo fmt`, `ruff format`) -- 提供清晰的 PR 描述 +1. 在 `crates/pulsing-actor/` 实现 Rust 逻辑 +2. 在 `crates/pulsing-py/src/` 添加 PyO3 绑定 +3. 在 `python/pulsing/` 添加 Python 封装(如需要) +4. 在 `tests/python/` 添加测试 +5. 运行 `just check` 确认无误 -## 代码风格 +## 代码规范 ### Rust -- 使用 `cargo fmt` 格式化代码 -- 使用 `cargo clippy` 检查代码质量 -- 遵循 [Rust API Guidelines](https://rust-lang.github.io/api-guidelines/) +- 使用 `cargo fmt` 格式化(`just fmt` 会自动运行) +- 通过 `cargo clippy` 检查(`just lint` 会自动运行) +- 公共 API 必须有文档注释(`///`) +- 错误类型使用 `thiserror` 定义,避免使用 `anyhow` 做为库的公共 API ### Python -- 使用 `ruff` 进行格式化和检查 -- 遵循 PEP 8 -- 使用类型注解 +- 使用 `ruff format` 格式化(行宽 88) +- 使用 `ruff check` 检查(遵循 E/F/W/I/UP/B 规则集) +- 类型注解尽量完整 +- 异步函数优先使用 `async def` -## 测试 +### 测试 -### Rust 测试 +- Python 测试使用 `pytest-asyncio`,配置 `asyncio_mode = "auto"` +- 测试函数命名:`test_<功能描述>_<场景>` +- 避免测试间共享全局状态(每个测试通过 fixture 独立初始化系统) + +## 运行文档 ```bash -# 运行所有测试 -cargo test +cd docs +uv run mkdocs serve +# 访问 http://localhost:8000 +``` -# 运行特定测试 -cargo test test_name +## 提交 PR -# 运行 Actor System 测试 -cargo test -p pulsing-actor -``` +1. Fork 仓库并创建特性分支:`git checkout -b feat/your-feature` +2. 编写代码和测试 +3. 运行 `just check` 确保全部通过 +4. Push 并在 GitHub 上创建 Pull Request +5. PR 描述中说明改动目的和测试方式 -### Python 测试 +## 常见问题 -```bash -# 运行所有测试 -pytest tests/ +**Q: `maturin develop` 报错 `linker 'cc' not found`** + +Linux 上需要安装 gcc: -# 运行特定测试 -pytest tests/actor_system/ +```bash +# Ubuntu/Debian +sudo apt install build-essential +# CentOS/Fedora +sudo dnf install gcc gcc-c++ ``` -## 文档 +**Q: 运行测试时报 `ImportError: cannot import name '_core' from 'pulsing'`** -- API 文档使用 rustdoc / docstring -- 设计文档放在 `docs/design/` -- 示例代码放在 `examples/` +需要先编译 Rust 核心: -## 行为准则 +```bash +just dev +``` -请阅读并遵守我们的 [行为准则](CODE_OF_CONDUCT.md)。 +**Q: macOS 上 `maturin develop` 很慢** -## 许可证 +可以尝试只编译当前架构: -通过贡献代码,你同意你的贡献将在 Apache-2.0 许可证下发布。 +```bash +maturin develop --target $(rustc -vV | grep host | cut -d' ' -f2) +``` -## 问题? +**Q: 如何只跑某一个测试?** -如果你有任何问题,请通过 [GitHub Issues](https://github.com/DeepLink-org/Pulsing/issues) 联系我们。 +```bash +uv run pytest tests/python/test_remote_decorator.py::test_spawn_actor -v +``` diff --git a/README.md b/README.md index be34e43a7..0ca3e4576 100644 --- a/README.md +++ b/README.md @@ -205,15 +205,37 @@ Pulsing/ ## 🛠️ Development +### Prerequisites + +- [Rust](https://rustup.rs/) ≥ 1.75 +- Python ≥ 3.10 +- [uv](https://docs.astral.sh/uv/) (recommended package manager) +- [just](https://github.com/casey/just) (task runner: `cargo install just` or `brew install just`) + +### Quick Setup + ```bash -# Development build -maturin develop +# 1. Install Python dependencies +uv sync --extra dev + +# 2. Compile Rust core and install (run again after any Rust changes) +uv run maturin develop +``` + +### Common Commands -# Run tests -pytest tests/python/ -cargo test --workspace +```bash +just dev # Compile and install in development mode +just test # Run all tests (Rust + Python) +just test-python # Python tests only +just fmt # Format code (Rust + Python) +just lint # Lint check +just check # Full pre-commit check (format + lint + test) +just cov # Generate coverage report ``` +See [CONTRIBUTING.md](CONTRIBUTING.md) for a detailed guide on the development workflow. + ## 📄 License Apache-2.0 diff --git a/README.zh.md b/README.zh.md index e476e3795..6e2f095f8 100644 --- a/README.zh.md +++ b/README.zh.md @@ -205,15 +205,37 @@ Pulsing/ ## 🛠️ 开发 +### 前置依赖 + +- [Rust](https://rustup.rs/) ≥ 1.75 +- Python ≥ 3.10 +- [uv](https://docs.astral.sh/uv/)(推荐的包管理器) +- [just](https://github.com/casey/just)(任务运行器:`cargo install just` 或 `brew install just`) + +### 快速搭建 + ```bash -# 开发构建 -maturin develop +# 1. 安装 Python 依赖 +uv sync --extra dev + +# 2. 编译 Rust 核心并安装(修改 Rust 代码后需重新执行) +uv run maturin develop +``` + +### 常用命令 -# 运行测试 -pytest tests/python/ -cargo test --workspace +```bash +just dev # 编译并安装(开发模式) +just test # 运行全部测试(Rust + Python) +just test-python # 仅运行 Python 测试 +just fmt # 格式化代码 +just lint # 代码检查 +just check # 提交前完整检查(格式 + lint + 测试) +just cov # 生成覆盖率报告 ``` +详细开发指南请参阅 [CONTRIBUTING.md](CONTRIBUTING.md)。 + ## 📄 License Apache-2.0 diff --git a/crates/pulsing-actor/src/system/resolve.rs b/crates/pulsing-actor/src/system/resolve.rs index 214260d24..8af097a0d 100644 --- a/crates/pulsing-actor/src/system/resolve.rs +++ b/crates/pulsing-actor/src/system/resolve.rs @@ -12,7 +12,7 @@ use crate::policies::LoadBalancingPolicy; use crate::system::config::ResolveOptions; use crate::system::load_balancer::{MemberWorker, NodeLoadTracker}; use crate::system::ActorSystem; -use crate::transport::Http2RemoteTransport; +use crate::transport::{Http2RemoteTransport, TransportTarget}; use std::net::SocketAddr; use std::sync::Arc; use std::time::Duration; @@ -42,8 +42,12 @@ impl ActorSystem { // Lookup actor location in cluster if let Some(member_info) = cluster.lookup_actor(id).await { - let transport = - Http2RemoteTransport::new_by_id(self.transport.client(), member_info.addr, *id); + let transport = Http2RemoteTransport::builder( + self.transport.client(), + member_info.addr, + TransportTarget::ById(*id), + ) + .build(); return Ok(ActorRef::remote(*id, member_info.addr, Arc::new(transport))); } @@ -124,11 +128,12 @@ impl ActorSystem { if nid != self.node_id { if let Some(member) = cluster.get_member(&nid).await { if !options.filter_alive || member.status == MemberStatus::Alive { - let transport = Http2RemoteTransport::new_named( + let transport = Http2RemoteTransport::builder( self.transport.client(), member.addr, - path.clone(), - ); + TransportTarget::Named(path.clone()), + ) + .build(); let actor_id = ActorId::generate(); return Ok(ActorRef::remote( actor_id, @@ -188,8 +193,12 @@ impl ActorSystem { return Ok(ActorRef::local(handle.actor_id, handle.sender.clone())); } - let transport = - Http2RemoteTransport::new_named(self.transport.client(), target.addr, path.clone()); + let transport = Http2RemoteTransport::builder( + self.transport.client(), + target.addr, + TransportTarget::Named(path.clone()), + ) + .build(); // For named actors, we don't have a specific ActorId until we resolve // Use a placeholder ID (this will be replaced when the actor is actually accessed) diff --git a/crates/pulsing-actor/src/transport/http2/mod.rs b/crates/pulsing-actor/src/transport/http2/mod.rs index ac0460fab..e5dcbd470 100644 --- a/crates/pulsing-actor/src/transport/http2/mod.rs +++ b/crates/pulsing-actor/src/transport/http2/mod.rs @@ -224,17 +224,71 @@ impl ResponseType { } } +/// Describes which actor on the remote node to reach. +#[derive(Debug, Clone)] +pub enum TransportTarget { + /// Actor registered by name (path: `/actors/{name}`) + ByName(String), + /// Actor identified by its UUID (path: `/actors/{id}`) + ById(ActorId), + /// Actor registered under a hierarchical named path (path: `/named/{path}`) + Named(ActorPath), +} + +impl TransportTarget { + fn to_path(&self) -> String { + match self { + TransportTarget::ByName(name) => format!("/actors/{}", name), + TransportTarget::ById(id) => format!("/actors/{}", id), + TransportTarget::Named(path) => format!("/named/{}", path.as_str()), + } + } +} + +/// Builder for [`Http2RemoteTransport`]. +/// +/// ```ignore +/// let transport = Http2RemoteTransport::builder( +/// client, addr, TransportTarget::ByName("my_actor".into())) +/// .circuit_breaker(CircuitBreakerConfig::default()) +/// .build(); +/// ``` +pub struct Http2RemoteTransportBuilder { + client: Arc, + remote_addr: SocketAddr, + target: TransportTarget, + cb_config: Option, +} + +impl Http2RemoteTransportBuilder { + pub fn circuit_breaker(mut self, config: CircuitBreakerConfig) -> Self { + self.cb_config = Some(config); + self + } + + pub fn build(self) -> Http2RemoteTransport { + Http2RemoteTransport { + client: self.client, + remote_addr: self.remote_addr, + path: self.target.to_path(), + circuit_breaker: match self.cb_config { + Some(cfg) => CircuitBreaker::with_config(cfg), + None => CircuitBreaker::new(), + }, + } + } +} + /// HTTP/2 Remote Transport for ActorRef /// /// Implements the `RemoteTransport` trait, enabling `ActorRef` to communicate /// with remote actors over HTTP/2, including streaming support. /// -/// Features: -/// - Automatic connection pooling and reuse -/// - Retry with exponential backoff for transient failures -/// - Circuit breaker for fault tolerance -/// - Configurable timeouts -/// - Streaming response support +/// Build via [`Http2RemoteTransport::builder`]: +/// ```ignore +/// let transport = Http2RemoteTransport::builder( +/// client, addr, TransportTarget::ByName("my_actor".into())).build(); +/// ``` pub struct Http2RemoteTransport { client: Arc, remote_addr: SocketAddr, @@ -244,63 +298,17 @@ pub struct Http2RemoteTransport { } impl Http2RemoteTransport { - /// Create a new remote transport targeting an actor by name - pub fn new(client: Arc, remote_addr: SocketAddr, actor_name: String) -> Self { - Self { - client, - remote_addr, - path: format!("/actors/{}", actor_name), - circuit_breaker: CircuitBreaker::new(), - } - } - - /// Create a new remote transport targeting an actor by ID - pub fn new_by_id(client: Arc, remote_addr: SocketAddr, actor_id: ActorId) -> Self { - Self { - client, - remote_addr, - path: format!("/actors/{}", actor_id), - circuit_breaker: CircuitBreaker::new(), - } - } - - /// Create a new remote transport targeting a named actor by path - pub fn new_named(client: Arc, remote_addr: SocketAddr, path: ActorPath) -> Self { - Self { - client, - remote_addr, - path: format!("/named/{}", path.as_str()), - circuit_breaker: CircuitBreaker::new(), - } - } - - /// Create a new remote transport with custom circuit breaker configuration - pub fn with_circuit_breaker( + /// Start building a transport for the given target. + pub fn builder( client: Arc, remote_addr: SocketAddr, - actor_name: String, - cb_config: CircuitBreakerConfig, - ) -> Self { - Self { + target: TransportTarget, + ) -> Http2RemoteTransportBuilder { + Http2RemoteTransportBuilder { client, remote_addr, - path: format!("/actors/{}", actor_name), - circuit_breaker: CircuitBreaker::with_config(cb_config), - } - } - - /// Create a new remote transport targeting a named actor with custom circuit breaker - pub fn new_named_with_circuit_breaker( - client: Arc, - remote_addr: SocketAddr, - path: ActorPath, - cb_config: CircuitBreakerConfig, - ) -> Self { - Self { - client, - remote_addr, - path: format!("/named/{}", path.as_str()), - circuit_breaker: CircuitBreaker::with_config(cb_config), + target, + cb_config: None, } } @@ -419,11 +427,17 @@ mod tests { let client = Arc::new(Http2Client::new(Http2Config::default())); let addr: SocketAddr = "127.0.0.1:8080".parse().unwrap(); - let transport = Http2RemoteTransport::new(client.clone(), addr, "my_actor".to_string()); + let transport = Http2RemoteTransport::builder( + client.clone(), + addr, + TransportTarget::ByName("my_actor".into()), + ) + .build(); assert_eq!(transport.path(), "/actors/my_actor"); let path = ActorPath::new("services/llm").unwrap(); - let transport = Http2RemoteTransport::new_named(client, addr, path); + let transport = + Http2RemoteTransport::builder(client, addr, TransportTarget::Named(path)).build(); assert_eq!(transport.path(), "/named/services/llm"); } @@ -484,7 +498,8 @@ mod tests { let addr: SocketAddr = "127.0.0.1:8080".parse().unwrap(); let actor_id = ActorId::generate(); - let transport = Http2RemoteTransport::new_by_id(client, addr, actor_id); + let transport = + Http2RemoteTransport::builder(client, addr, TransportTarget::ById(actor_id)).build(); // Path should be /actors/{uuid} where uuid is 32 hex chars assert!(transport.path().starts_with("/actors/")); assert_eq!(transport.path().len(), 8 + 32); // "/actors/" + 32 hex chars @@ -497,12 +512,10 @@ mod tests { let addr: SocketAddr = "127.0.0.1:8080".parse().unwrap(); let cb_config = CircuitBreakerConfig::default(); - let transport = Http2RemoteTransport::with_circuit_breaker( - client, - addr, - "my_actor".to_string(), - cb_config, - ); + let transport = + Http2RemoteTransport::builder(client, addr, TransportTarget::ByName("my_actor".into())) + .circuit_breaker(cb_config) + .build(); assert_eq!(transport.path(), "/actors/my_actor"); assert!(transport.circuit_breaker().can_execute()); } @@ -514,8 +527,9 @@ mod tests { let path = ActorPath::new("services/llm").unwrap(); let cb_config = CircuitBreakerConfig::default(); - let transport = - Http2RemoteTransport::new_named_with_circuit_breaker(client, addr, path, cb_config); + let transport = Http2RemoteTransport::builder(client, addr, TransportTarget::Named(path)) + .circuit_breaker(cb_config) + .build(); assert_eq!(transport.path(), "/named/services/llm"); } @@ -524,7 +538,12 @@ mod tests { let client = Arc::new(Http2Client::new(Http2Config::default())); let addr: SocketAddr = "192.168.1.100:9000".parse().unwrap(); - let transport = Http2RemoteTransport::new(client.clone(), addr, "test_actor".to_string()); + let transport = Http2RemoteTransport::builder( + client.clone(), + addr, + TransportTarget::ByName("test_actor".into()), + ) + .build(); assert_eq!(transport.remote_addr(), addr); assert_eq!(transport.path(), "/actors/test_actor"); diff --git a/crates/pulsing-actor/src/transport/mod.rs b/crates/pulsing-actor/src/transport/mod.rs index 0c4be1fdf..c38e38bc3 100644 --- a/crates/pulsing-actor/src/transport/mod.rs +++ b/crates/pulsing-actor/src/transport/mod.rs @@ -17,6 +17,7 @@ pub mod http2; // HTTP/2 exports pub use http2::{ BinaryFrameParser, Http2Client, Http2ClientBuilder, Http2Config, Http2RemoteTransport, - Http2Server, Http2ServerHandler, Http2Transport, MessageMode, PoolConfig, PoolStats, - RequestType, RetryConfig, RetryableError, StreamFrame, StreamHandle, FLAG_END, FLAG_ERROR, + Http2RemoteTransportBuilder, Http2Server, Http2ServerHandler, Http2Transport, MessageMode, + PoolConfig, PoolStats, RequestType, RetryConfig, RetryableError, StreamFrame, StreamHandle, + TransportTarget, FLAG_END, FLAG_ERROR, }; diff --git a/crates/pulsing-actor/tests/unit/transport/client_tests.rs b/crates/pulsing-actor/tests/unit/transport/client_tests.rs index 0db6a10ee..9d1719e2c 100644 --- a/crates/pulsing-actor/tests/unit/transport/client_tests.rs +++ b/crates/pulsing-actor/tests/unit/transport/client_tests.rs @@ -2,7 +2,9 @@ use crate::common::fixtures::{StreamingHandler, TestCounters, TestHandler}; use pulsing_actor::actor::{ActorId, Message}; -use pulsing_actor::transport::{Http2Client, Http2Config, Http2RemoteTransport, Http2Server}; +use pulsing_actor::transport::{ + Http2Client, Http2Config, Http2RemoteTransport, Http2Server, TransportTarget, +}; use std::sync::atomic::Ordering; use std::sync::Arc; use tokio_util::sync::CancellationToken; @@ -181,7 +183,9 @@ async fn test_http2_remote_transport_ask() { let addr = server.local_addr(); let client = Arc::new(Http2Client::new(Http2Config::default())); - let transport = Http2RemoteTransport::new(client, addr, "test-actor".to_string()); + let transport = + Http2RemoteTransport::builder(client, addr, TransportTarget::ByName("test-actor".into())) + .build(); use pulsing_actor::actor::RemoteTransport; @@ -216,7 +220,9 @@ async fn test_http2_remote_transport_tell() { let addr = server.local_addr(); let client = Arc::new(Http2Client::new(Http2Config::default())); - let transport = Http2RemoteTransport::new(client, addr, "fire-actor".to_string()); + let transport = + Http2RemoteTransport::builder(client, addr, TransportTarget::ByName("fire-actor".into())) + .build(); use pulsing_actor::actor::RemoteTransport; @@ -252,7 +258,8 @@ async fn test_http2_remote_transport_named_path() { let client = Arc::new(Http2Client::new(Http2Config::default())); use pulsing_actor::actor::ActorPath; let path = ActorPath::new("services/llm/worker").unwrap(); - let transport = Http2RemoteTransport::new_named(client, addr, path); + let transport = + Http2RemoteTransport::builder(client, addr, TransportTarget::Named(path)).build(); use pulsing_actor::actor::RemoteTransport; diff --git a/crates/pulsing-py/src/actor.rs b/crates/pulsing-py/src/actor.rs index b4c16e579..f39175571 100644 --- a/crates/pulsing-py/src/actor.rs +++ b/crates/pulsing-py/src/actor.rs @@ -395,54 +395,6 @@ impl PyMessage { } } -// ============================================================================ -// SealedPyMessage - Pickle-encoded Python objects for Python-to-Python communication -// ============================================================================ - -/// Pickle-encoded Python object wrapper for transparent Python object passing. -/// -/// This allows Python actors to send and receive arbitrary Python objects -/// without the need for JSON serialization. The object is serialized using -/// Python's pickle module. -#[pyclass(name = "SealedPyMessage")] -#[derive(Clone)] -pub struct PySealedMessage { - /// Pickle-encoded Python object bytes - data: Vec, -} - -#[pymethods] -impl PySealedMessage { - /// Create a SealedPyMessage by pickling any Python object - #[staticmethod] - fn seal(py: Python<'_>, obj: PyObject) -> PyResult { - let pickle = py.import("pickle")?; - let dumped = pickle.call_method1("dumps", (&obj,))?; - let bytes = dumped.downcast::()?; - Ok(Self { - data: bytes.as_bytes().to_vec(), - }) - } - - /// Unseal (unpickle) the message back to a Python object - fn unseal(&self, py: Python<'_>) -> PyResult { - let pickle = py.import("pickle")?; - let bytes = PyBytes::new(py, &self.data); - let obj = pickle.call_method1("loads", (bytes,))?; - Ok(obj.into()) - } - - /// Get raw pickle bytes - #[getter] - fn data<'py>(&self, py: Python<'py>) -> Bound<'py, PyBytes> { - PyBytes::new(py, &self.data) - } - - fn __repr__(&self) -> String { - format!("SealedPyMessage(data_len={})", self.data.len()) - } -} - /// Descriptor object for optional zerocopy payload transport. #[pyclass(name = "ZeroCopyDescriptor")] #[derive(Clone)] @@ -2111,8 +2063,6 @@ pub fn add_to_module(m: &Bound<'_, pyo3::types::PyModule>) -> PyResult<()> { m.add_class::()?; m.add_class::()?; m.add_class::()?; - // Sealed message support (for Python-to-Python communication) - m.add_class::()?; m.add_class::()?; Ok(()) } diff --git a/crates/pulsing-py/src/errors.rs b/crates/pulsing-py/src/errors.rs index b777cd3f6..1dc16a10d 100644 --- a/crates/pulsing-py/src/errors.rs +++ b/crates/pulsing-py/src/errors.rs @@ -1,113 +1,85 @@ //! Python exception bindings for Pulsing errors //! -//! This module converts Rust error types to Python exceptions using -//! JSON-structured error envelopes instead of string prefixes. -//! -//! The JSON envelope format: -//! ```json -//! { -//! "category": "actor" | "runtime", -//! // For actor errors (category="actor"): -//! "error": { "type": "business", "code": 400, ... }, -//! // For runtime errors (category="runtime"): -//! "kind": "actor_not_found", -//! "message": "Actor not found: my-actor", -//! "actor_name": "my-actor" // optional -//! } -//! ``` +//! This module converts Rust error types directly to Python exception instances +//! by importing `pulsing.exceptions` and calling the appropriate class constructor. +//! No intermediate JSON/string encoding is needed. -use pulsing_actor::error::PulsingError; +use pulsing_actor::error::{ActorError, PulsingError}; use pyo3::exceptions::PyRuntimeError; use pyo3::prelude::*; +use std::sync::OnceLock; -/// JSON marker prefix for structured error envelopes. -/// Python layer detects this prefix and parses the JSON payload. -pub const ERROR_ENVELOPE_PREFIX: &str = "__PULSING_ERROR__:"; +static EXCEPTIONS_MODULE: OnceLock> = OnceLock::new(); -/// Convert Rust PulsingError to Python exception using JSON envelope. -/// -/// Instead of string-prefix-based encoding (fragile, requires regex parsing), -/// this uses a JSON-structured envelope that Python can reliably decode. -pub fn pulsing_error_to_py_err(err: PulsingError) -> PyErr { - let json_str = match &err { - PulsingError::Actor(actor_err) => { - // ActorError already derives Serialize with serde(tag = "type") - let actor_json = serde_json::to_value(actor_err).unwrap_or_else(|_| { - serde_json::json!({"type": "system", "error": err.to_string(), "recoverable": true}) - }); - serde_json::json!({ - "category": "actor", - "error": actor_json, - }) - .to_string() +fn get_exceptions(py: Python<'_>) -> PyResult> { + if let Some(m) = EXCEPTIONS_MODULE.get() { + return Ok(m.bind(py).clone()); + } + let module = py.import("pulsing.exceptions")?; + // Another thread may have set it concurrently; that's fine — both see the same module. + let _ = EXCEPTIONS_MODULE.set(module.clone().unbind()); + Ok(module) +} + +fn try_create_py_err(py: Python<'_>, err: &PulsingError) -> PyResult { + let exc = get_exceptions(py)?; + match err { + PulsingError::Runtime(re) => { + let cls = exc.getattr("PulsingRuntimeError")?; + Ok(PyErr::from_value(cls.call1((re.to_string(),))?)) } - PulsingError::Runtime(runtime_err) => serde_json::json!({ - "category": "runtime", - "kind": runtime_err.kind(), - "message": runtime_err.to_string(), - "actor_name": runtime_err.actor_name(), - }) - .to_string(), - }; + PulsingError::Actor(ae) => match ae { + ActorError::Business { + code, + message, + details, + } => { + let cls = exc.getattr("PulsingBusinessError")?; + let exc_obj = if let Some(d) = details { + cls.call1((*code, message.as_str(), d.as_str()))? + } else { + cls.call1((*code, message.as_str()))? + }; + Ok(PyErr::from_value(exc_obj)) + } + ActorError::System { error, recoverable } => { + let cls = exc.getattr("PulsingSystemError")?; + Ok(PyErr::from_value( + cls.call1((error.as_str(), *recoverable))?, + )) + } + ActorError::Timeout { + operation, + duration_ms, + } => { + let cls = exc.getattr("PulsingTimeoutError")?; + Ok(PyErr::from_value( + cls.call1((operation.as_str(), *duration_ms))?, + )) + } + ActorError::Unsupported { operation } => { + let cls = exc.getattr("PulsingUnsupportedError")?; + Ok(PyErr::from_value(cls.call1((operation.as_str(),))?)) + } + }, + } +} - PyRuntimeError::new_err(format!("{}{}", ERROR_ENVELOPE_PREFIX, json_str)) +/// Convert Rust PulsingError to Python exception. +/// +/// Directly instantiates the matching `pulsing.exceptions` class so the caller +/// receives a typed exception (PulsingRuntimeError, PulsingBusinessError, …) +/// without any intermediate JSON envelope or string parsing. +pub fn pulsing_error_to_py_err(err: PulsingError) -> PyErr { + Python::with_gil(|py| { + try_create_py_err(py, &err).unwrap_or_else(|_| PyRuntimeError::new_err(err.to_string())) + }) } -/// Add error classes to Python module +/// Add error classes to Python module. /// -/// Note: In abi3 mode, we can't create custom exception classes directly. -/// Exception classes are defined in Python (pulsing/exceptions.py). -/// This function is kept for API consistency. +/// Exception classes are defined in Python (`pulsing/exceptions.py`); nothing +/// to register here. pub fn add_to_module(_m: &Bound<'_, PyModule>) -> PyResult<()> { - // Error classes are defined in Python layer Ok(()) } - -#[cfg(test)] -mod tests { - use super::*; - use pulsing_actor::error::{ActorError, RuntimeError}; - - #[test] - fn test_actor_error_envelope() { - let err = PulsingError::Actor(ActorError::business(400, "Invalid input", None)); - let py_err = pulsing_error_to_py_err(err); - let msg = py_err.to_string(); - assert!(msg.starts_with(ERROR_ENVELOPE_PREFIX)); - - let json_str = &msg[ERROR_ENVELOPE_PREFIX.len()..]; - let envelope: serde_json::Value = serde_json::from_str(json_str).unwrap(); - assert_eq!(envelope["category"], "actor"); - assert_eq!(envelope["error"]["type"], "business"); - assert_eq!(envelope["error"]["code"], 400); - } - - #[test] - fn test_runtime_error_envelope() { - let err = PulsingError::Runtime(RuntimeError::actor_not_found("my-actor")); - let py_err = pulsing_error_to_py_err(err); - let msg = py_err.to_string(); - assert!(msg.starts_with(ERROR_ENVELOPE_PREFIX)); - - let json_str = &msg[ERROR_ENVELOPE_PREFIX.len()..]; - let envelope: serde_json::Value = serde_json::from_str(json_str).unwrap(); - assert_eq!(envelope["category"], "runtime"); - assert_eq!(envelope["kind"], "actor_not_found"); - assert_eq!(envelope["actor_name"], "my-actor"); - } - - #[test] - fn test_anyhow_error_conversion() { - let anyhow_err = anyhow::anyhow!("something went wrong"); - let py_err = pulsing_error_to_py_err(PulsingError::from(RuntimeError::Other( - anyhow_err.to_string(), - ))); - let msg = py_err.to_string(); - assert!(msg.starts_with(ERROR_ENVELOPE_PREFIX)); - - let json_str = &msg[ERROR_ENVELOPE_PREFIX.len()..]; - let envelope: serde_json::Value = serde_json::from_str(json_str).unwrap(); - assert_eq!(envelope["category"], "runtime"); - assert_eq!(envelope["kind"], "other"); - } -} diff --git a/crates/pulsing-py/src/python_error_converter.rs b/crates/pulsing-py/src/python_error_converter.rs index abc4eb41c..387832022 100644 --- a/crates/pulsing-py/src/python_error_converter.rs +++ b/crates/pulsing-py/src/python_error_converter.rs @@ -1,111 +1,81 @@ //! Convert Python exceptions to Rust ActorError //! -//! This module provides automatic conversion from Python exceptions -//! to unified ActorError types, enabling seamless error handling -//! across Rust and Python boundaries. +//! Uses `isinstance` checks against `pulsing.exceptions` classes — matching +//! most-specific types first — then falls back to standard Python exception +//! types for interoperability. use pulsing_actor::error::ActorError; use pyo3::exceptions::{PyTimeoutError, PyTypeError, PyValueError}; use pyo3::prelude::*; -/// Convert Python exception (PyErr) to ActorError +/// Convert Python exception to ActorError. /// -/// This function automatically classifies Python exceptions: -/// - ValueError, TypeError -> Business error -/// - TimeoutError -> Timeout error -/// - Other exceptions -> System error +/// Matching order (most-specific first): +/// 1. Pulsing custom types: Business → System → Timeout → Unsupported +/// 2. Standard Python types: TimeoutError → ValueError/TypeError → fallback pub fn convert_python_exception_to_actor_error( py: Python, err: &PyErr, ) -> anyhow::Result { - // Try to extract exception type and message - let err_type = err.get_type(py); - let type_name = err_type.name()?.to_string(); - let err_msg = err.to_string(); - - // Check for specific exception types - if err.is_instance_of::(py) { - // Timeout error - return Ok(ActorError::timeout("python_operation", 0)); - } - - if err.is_instance_of::(py) || err.is_instance_of::(py) { - // Business error: validation/type errors - return Ok(ActorError::business(400, err_msg, None)); - } - - // Check if it's a custom Pulsing exception - // Try to extract error details from exception attributes - let py_err_obj = err.value(py); - - // Check for PulsingBusinessError - if let Ok(code_attr) = py_err_obj.getattr("code") { - if let Ok(code) = code_attr.extract::() { - let message_attr = py_err_obj.getattr("message").ok(); - let message = message_attr - .and_then(|m| m.extract::().ok()) - .unwrap_or_else(|| err_msg.clone()); - - let details_attr = py_err_obj.getattr("details").ok(); - let details = details_attr.and_then(|d| d.extract::().ok()); - - return Ok(ActorError::business(code, message, details)); + let err_obj = err.value(py); + + if let Ok(exc) = py.import("pulsing.exceptions") { + // PulsingBusinessError — check before PulsingActorError (more specific) + if let Ok(cls) = exc.getattr("PulsingBusinessError") { + if err_obj.is_instance(&cls).unwrap_or(false) { + let code = err_obj.getattr("code")?.extract::()?; + let message = err_obj.getattr("message")?.extract::()?; + let details = err_obj + .getattr("details") + .ok() + .and_then(|d| d.extract::>().ok()) + .flatten(); + return Ok(ActorError::business(code, message, details)); + } } - } - // Check for PulsingSystemError - if let Ok(error_attr) = py_err_obj.getattr("error") { - if let Ok(error_msg) = error_attr.extract::() { - let recoverable_attr = py_err_obj.getattr("recoverable").ok(); - let recoverable = recoverable_attr - .and_then(|r| r.extract::().ok()) - .unwrap_or(true); - - return Ok(ActorError::system(error_msg, recoverable)); + if let Ok(cls) = exc.getattr("PulsingSystemError") { + if err_obj.is_instance(&cls).unwrap_or(false) { + let error = err_obj.getattr("error")?.extract::()?; + let recoverable = err_obj + .getattr("recoverable") + .ok() + .and_then(|r| r.extract::().ok()) + .unwrap_or(true); + return Ok(ActorError::system(error, recoverable)); + } } - } - // Check for PulsingTimeoutError (has both operation and duration_ms) - if let Ok(operation_attr) = py_err_obj.getattr("operation") { - if let Ok(operation) = operation_attr.extract::() { - let duration_attr = py_err_obj.getattr("duration_ms").ok(); - if let Some(duration_ms) = duration_attr.and_then(|d| d.extract::().ok()) { - // Has duration_ms -> Timeout error + if let Ok(cls) = exc.getattr("PulsingTimeoutError") { + if err_obj.is_instance(&cls).unwrap_or(false) { + let operation = err_obj.getattr("operation")?.extract::()?; + let duration_ms = err_obj + .getattr("duration_ms") + .ok() + .and_then(|d| d.extract::().ok()) + .unwrap_or(0); return Ok(ActorError::timeout(operation, duration_ms)); } } - } - // Check for PulsingUnsupportedError (by type name or operation attribute without duration_ms) - if type_name.contains("Unsupported") || type_name.contains("unsupported") { - if let Ok(operation_attr) = py_err_obj.getattr("operation") { - if let Ok(operation) = operation_attr.extract::() { + if let Ok(cls) = exc.getattr("PulsingUnsupportedError") { + if err_obj.is_instance(&cls).unwrap_or(false) { + let operation = err_obj.getattr("operation")?.extract::()?; return Ok(ActorError::unsupported(operation)); } } - // Fallback: use error message as operation - return Ok(ActorError::unsupported(err_msg)); } - // Default: classify based on exception type name - match type_name.as_str() { - "TimeoutError" | "asyncio.TimeoutError" => Ok(ActorError::timeout("python_operation", 0)), - "ValueError" | "TypeError" | "KeyError" | "AttributeError" => { - // Business errors: user input errors - Ok(ActorError::business(400, err_msg, None)) - } - "RuntimeError" | "SystemError" | "OSError" | "IOError" => { - // System errors: internal errors - Ok(ActorError::system(err_msg, true)) - } - _ => { - // Unknown exception type: treat as system error - Ok(ActorError::system( - format!("{}: {}", type_name, err_msg), - true, - )) - } + // Standard Python exception fallback + if err.is_instance_of::(py) { + return Ok(ActorError::timeout("python_operation", 0)); + } + + if err.is_instance_of::(py) || err.is_instance_of::(py) { + return Ok(ActorError::business(400, err.to_string(), None)); } + + Ok(ActorError::system(err.to_string(), true)) } #[cfg(test)] diff --git a/pyproject.toml b/pyproject.toml index a30dcbd72..8658c61cf 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -9,7 +9,7 @@ authors = [ license = "Apache-2.0" requires-python = ">=3.10" dependencies = [ - "pydantic>=2.10.6,<=2.11.7", + "pydantic>=2.10.6", "uvloop>=0.21.0", "aiohttp>=3.9.0", "hyperparameter>=0.5.0", diff --git a/python/pulsing/core/__init__.py b/python/pulsing/core/__init__.py index da67eee19..a205ed347 100644 --- a/python/pulsing/core/__init__.py +++ b/python/pulsing/core/__init__.py @@ -29,7 +29,6 @@ def incr(self): self.value += 1; return self.value ActorRef, ActorSystem, NodeId, - SealedPyMessage, ZeroCopyDescriptor, StreamReader, StreamWriter, @@ -214,12 +213,11 @@ async def tell_with_timeout( get_system_actor, health_check, list_actors, - mount, - unmount, ping, remote, resolve, ) +from .helpers import mount, unmount # Import exceptions for convenience from pulsing.exceptions import ( diff --git a/python/pulsing/core/helpers.py b/python/pulsing/core/helpers.py index f6d162f57..d148ae408 100644 --- a/python/pulsing/core/helpers.py +++ b/python/pulsing/core/helpers.py @@ -1,9 +1,10 @@ -"""Actor helper functions - simplify actor creation and lifecycle management""" +"""Actor helper functions - lifecycle management and sync/async bridge.""" import asyncio +import concurrent.futures import signal import sys -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, Any if TYPE_CHECKING: from . import Actor, ActorSystem @@ -86,3 +87,131 @@ async def spawn_and_run( print(f"[{name}] Started at {system.addr}") await run_until_signal(name) + + +# --------------------------------------------------------------------------- +# Sync/async bridge — single implementation used by mount/unmount +# --------------------------------------------------------------------------- + + +def run_sync(coro) -> Any: + """Execute a coroutine synchronously on the Pulsing background event loop. + + Handles three environments: + - Ray: submits to the background loop started by ``init_in_ray`` + - Standalone (no running loop): uses ``asyncio.run`` + - Inside a running loop (e.g. a Jupyter cell): runs in a thread-pool worker + + Raises: + TimeoutError: if the coroutine does not complete within 30 s. + """ + try: + from pulsing.integrations.ray import _loop + + if _loop is not None: + fut = asyncio.run_coroutine_threadsafe(coro, _loop) + return fut.result(timeout=30) + except ImportError: + pass + + try: + loop = asyncio.get_running_loop() + except RuntimeError: + loop = None + + if loop is None: + return asyncio.run(coro) + + with concurrent.futures.ThreadPoolExecutor(max_workers=1) as pool: + return pool.submit(asyncio.run, coro).result(timeout=30) + + +# --------------------------------------------------------------------------- +# mount / unmount — sync API to expose Python objects as Pulsing actors +# --------------------------------------------------------------------------- + + +def _auto_init_pulsing() -> None: + """Auto-detect environment and initialize Pulsing.""" + try: + import ray + + if ray.is_initialized(): + from pulsing.integrations.ray import init_in_ray + + init_in_ray() + return + except ImportError: + pass + + raise RuntimeError( + "Pulsing not initialized. Please call await pul.init() or run in Ray environment." + ) + + +def mount(instance: Any, *, name: str, public: bool = True) -> None: + """Mount an existing Python object to the Pulsing communication network. + + Synchronous interface, can be called in ``__init__``. Automatically: + 1. Initialize Pulsing (if not already, auto-detects Ray environment) + 2. Wrap instance as a Pulsing actor + 3. Register to Pulsing network — other nodes can discover via ``pul.resolve(name)`` + + Args: + instance: Object to mount (any Python instance) + name: Pulsing name, other nodes resolve via this name + public: Whether discoverable by other cluster nodes (default True) + + Example:: + + @ray.remote + class Counter: + def __init__(self, name, peers): + self.name = name + pul.mount(self, name=name) + + async def greet(self, msg): + return f"Hello from {self.name}: {msg}" + """ + from . import _global_system + + if _global_system is None: + _auto_init_pulsing() + + from . import _global_system as system + + if system is None: + raise RuntimeError( + "Pulsing initialization failed. Please call pul.init() or run in Ray environment." + ) + + from .remote import _WrappedActor, _register_actor_metadata + + actor_name = name if "/" in name else f"actors/{name}" + wrapped = _WrappedActor(instance) + + async def _do_mount(): + return await system.spawn(wrapped, name=actor_name, public=public) + + actor_ref = run_sync(_do_mount()) + wrapped._inject_delayed(actor_ref) + _register_actor_metadata(actor_name, type(instance)) + + +def unmount(name: str) -> None: + """Unmount a previously mounted actor from the Pulsing network. + + Args: + name: Name used during mounting + """ + from . import _global_system + + if _global_system is None: + return + + actor_name = name if "/" in name else f"actors/{name}" + + async def _do_unmount(): + await _global_system.stop(actor_name) + + run_sync(_do_unmount()) diff --git a/python/pulsing/core/remote.py b/python/pulsing/core/remote.py index 16f22457f..fafc7db63 100644 --- a/python/pulsing/core/remote.py +++ b/python/pulsing/core/remote.py @@ -84,8 +84,9 @@ def _wrap_response(result: Any = None, error: str | None = None) -> dict: def _unwrap_response(resp: dict) -> tuple[Any, str | None]: """Unwrap response. Returns (result, error) - one of them will be None. - Accepts: wire format (__pulsing__.result/error), legacy (__result__/__error__), - and top-level "result"/"error" (e.g. from Message payload JSON). + Accepts: + - Wire format: {"__pulsing__": {"result": ..., "error": ...}} + - Message JSON: {"result": ..., "error": ...} (Rust actor responses) """ pulsing = resp.get("__pulsing__", {}) if isinstance(pulsing, dict): @@ -93,10 +94,6 @@ def _unwrap_response(resp: dict) -> tuple[Any, str | None]: return (None, pulsing["error"]) if "result" in pulsing: return (pulsing["result"], None) - if "__error__" in resp: - return (None, resp["__error__"]) - if "__result__" in resp: - return (resp["__result__"], None) if "error" in resp: return (None, resp["error"]) if "result" in resp: @@ -104,85 +101,9 @@ def _unwrap_response(resp: dict) -> tuple[Any, str | None]: return (None, None) -_PULSING_ERROR_PREFIX = "__PULSING_ERROR__:" - - -def _convert_rust_error(err: RuntimeError) -> Exception: - """Convert Rust-raised RuntimeError to appropriate Pulsing exception. - - Rust layer encodes errors as JSON envelopes with prefix "__PULSING_ERROR__:". - The JSON format: - Actor errors: {"category": "actor", "error": {"type": "business", "code": 400, ...}} - Runtime errors: {"category": "runtime", "kind": "actor_not_found", "message": "...", ...} - - This replaces the previous regex-based string prefix parsing with - reliable JSON deserialization. - """ - import json - - from pulsing.exceptions import ( - PulsingBusinessError, - PulsingSystemError, - PulsingTimeoutError, - PulsingUnsupportedError, - ) - - err_msg = str(err) - - if not err_msg.startswith(_PULSING_ERROR_PREFIX): - # Not a structured Pulsing error, wrap as generic RuntimeError - return PulsingRuntimeError(err_msg) - - json_str = err_msg[len(_PULSING_ERROR_PREFIX) :] - try: - envelope = json.loads(json_str) - except (json.JSONDecodeError, ValueError): - # JSON parse failed, fall back to generic error - return PulsingRuntimeError(err_msg) - - category = envelope.get("category") - - if category == "actor": - actor_err = envelope.get("error", {}) - err_type = actor_err.get("type") - - if err_type == "business": - code = actor_err.get("code", 0) - message = actor_err.get("message", "Unknown error") - details = actor_err.get("details") - return PulsingBusinessError(code, message, details=details) - - if err_type == "system": - error = actor_err.get("error", "Unknown error") - recoverable = actor_err.get("recoverable", True) - return PulsingSystemError(error, recoverable=recoverable) - - if err_type == "timeout": - operation = actor_err.get("operation", "unknown") - duration_ms = actor_err.get("duration_ms", 0) - return PulsingTimeoutError(operation, duration_ms) - - if err_type == "unsupported": - operation = actor_err.get("operation", "unknown") - return PulsingUnsupportedError(operation) - - # Unknown actor error type, generic fallback - return PulsingActorError(str(actor_err)) - - if category == "runtime": - message = envelope.get("message", "Unknown runtime error") - return PulsingRuntimeError(message) - - # Unknown category - return PulsingRuntimeError(err_msg) - - async def _ask_convert_errors(ref, msg) -> Any: - """Call ref.ask(msg) and convert Rust RuntimeError to Pulsing exceptions.""" - try: - return await ref.ask(msg) - except RuntimeError as e: - raise _convert_rust_error(e) from e + """Call ref.ask(msg); Rust raises typed Pulsing exceptions directly.""" + return await ref.ask(msg) logger = logging.getLogger(__name__) @@ -338,24 +259,17 @@ def __await__(self): async def _sync_call(self, *args, **kwargs) -> Any: """Synchronous method call.""" call_msg = _wrap_call(self._method, args, kwargs, False) - resp = await _ask_convert_errors(self._ref, call_msg) if isinstance(resp, dict): result, error = _unwrap_response(resp) if error: - # Actor execution error - try: - raise PulsingActorError( - error, actor_name=str(self._ref.actor_id.id) - ) - except RuntimeError as e: - # If it's a Rust error, convert it - raise _convert_rust_error(e) from e + raise PulsingActorError(error, actor_name=str(self._ref.actor_id.id)) return result elif isinstance(resp, Message): if resp.is_stream: - return _SyncGeneratorStreamReader(resp) + # Sync generator: return an awaitable/iterable stream reader + return _AsyncMethodCall.from_message(self._ref, resp) data = resp.to_json() if not isinstance(data, dict): return resp @@ -366,10 +280,7 @@ async def _sync_call(self, *args, **kwargs) -> Any: ) result, error = _unwrap_response(data) if error: - raise PulsingActorError( - error, - actor_name=str(self._ref.actor_id.id), - ) + raise PulsingActorError(error, actor_name=str(self._ref.actor_id.id)) if result is not None: return result return data.get("result") @@ -377,14 +288,11 @@ async def _sync_call(self, *args, **kwargs) -> Any: class _AsyncMethodCall: - """Async method call - supports await and async for + """Async method call — supports await (final result) and async for (stream). Usage: - # Directly await to get final result - result = await service.generate("hello") - - # Stream intermediate results - async for chunk in service.generate("hello"): + result = await actor.generate("hello") # get final result + async for chunk in actor.generate("hello"): # stream chunks print(chunk) """ @@ -399,135 +307,101 @@ def __init__( self._final_result = None self._got_result = False - async def _get_stream(self): - """Get stream (lazy initialization)""" - if self._stream_reader is None: - call_msg = _wrap_call(self._method, self._args, self._kwargs, True) - resp = await _ask_convert_errors(self._ref, call_msg) - - # Response may be PyMessage (streaming) or direct Python object - if isinstance(resp, Message): - # Check if it's a streaming message - if resp.is_stream: - self._stream_reader = resp.stream_reader() - else: - # Not streaming, might be an error - data = resp.to_json() - if resp.msg_type == "Error": - # Actor execution error + @classmethod + def from_message(cls, ref: ActorRef, message: Message) -> "_AsyncMethodCall": + """Build from a pre-acquired streaming Message (sync generator return path).""" + obj = cls.__new__(cls) + obj._ref = ref + obj._method = "" + obj._args = () + obj._kwargs = {} + obj._stream_reader = message.stream_reader() + obj._final_result = None + obj._got_result = False + return obj + + async def _ensure_stream(self) -> None: + """Send RPC and resolve the response. + + For streaming responses, initialises _stream_reader. + For direct responses (non-streaming), resolves _final_result immediately + so __anext__ can stop without an extra iterator allocation. + """ + if self._stream_reader is not None or self._got_result: + return + + call_msg = _wrap_call(self._method, self._args, self._kwargs, True) + resp = await _ask_convert_errors(self._ref, call_msg) + + if isinstance(resp, Message): + if resp.is_stream: + self._stream_reader = resp.stream_reader() + else: + data = resp.to_json() + if resp.msg_type == "Error": + raise PulsingActorError( + data.get("error", "Remote call failed"), + actor_name=str(self._ref.actor_id.id), + ) + result, error = _unwrap_response(data) + if error: + raise PulsingActorError( + error, actor_name=str(self._ref.actor_id.id) + ) + self._final_result = result + self._got_result = True + else: + # Direct dict from Python actor called with is_async=True + if isinstance(resp, dict): + pulsing = resp.get("__pulsing__", {}) + if isinstance(pulsing, dict): + if "error" in pulsing: raise PulsingActorError( - data.get("error", "Remote call failed"), - actor_name=str(self._ref.actor_id.id), + pulsing["error"], actor_name=str(self._ref.actor_id.id) ) - # Wrap as single-value iterator - self._stream_reader = _SingleValueIterator(data) - else: - # Regular Python object (might be dict) - self._stream_reader = _SingleValueIterator(resp) - - return self._stream_reader + self._final_result = pulsing.get("result") + self._got_result = True + return + self._final_result = resp + self._got_result = True def __aiter__(self): - """Support async iteration, get intermediate results""" return self async def __anext__(self): - """Get next streaming data""" - reader = await self._get_stream() + await self._ensure_stream() + if self._got_result: + raise StopAsyncIteration try: - item = await reader.__anext__() + item = await self._stream_reader.__anext__() if isinstance(item, dict): - # Wire format (__pulsing__.result/error) or legacy (__result__/__error__) - result, error = _unwrap_response(item) - if error is not None: - raise PulsingActorError( - error, actor_name=str(self._ref.actor_id.id) - ) - if ( - result is not None - and "__yield__" not in item - and "__final__" not in item - ): - # Single-value response (non-streaming) - self._final_result = result - self._got_result = True - raise StopAsyncIteration - if "__final__" in item: - self._final_result = item.get("__result__") - self._got_result = True - raise StopAsyncIteration - if "__error__" in item: - raise PulsingActorError( - item["__error__"], actor_name=str(self._ref.actor_id.id) - ) + pulsing = item.get("__pulsing__", {}) + if isinstance(pulsing, dict): + if "error" in pulsing: + raise PulsingActorError( + pulsing["error"], actor_name=str(self._ref.actor_id.id) + ) + if pulsing.get("final"): + self._final_result = pulsing.get("result") + self._got_result = True + raise StopAsyncIteration if "__yield__" in item: return item["__yield__"] - if "__result__" in item: - self._final_result = item.get("__result__") - self._got_result = True - raise StopAsyncIteration return item except StopAsyncIteration: raise def __await__(self): - """Support await, get final result""" return self._await_result().__await__() async def _await_result(self): - """Consume entire stream, return final result""" async for _ in self: - pass # Consume all yielded intermediate values + pass if self._got_result: return self._final_result return None -class _SingleValueIterator: - """Single-value async iterator - wraps a single value as async iterator""" - - def __init__(self, value): - self._value = value - self._consumed = False - - def __aiter__(self): - return self - - async def __anext__(self): - if self._consumed: - raise StopAsyncIteration - self._consumed = True - return self._value - - -class _SyncGeneratorStreamReader: - """Stream reader for sync generator returned from non-async method""" - - def __init__(self, message: Message): - self._reader = message.stream_reader() - self._final_result = None - self._got_result = False - - def __aiter__(self): - return self - - async def __anext__(self): - try: - item = await self._reader.__anext__() - if isinstance(item, dict): - if "__final__" in item: - self._final_result = item.get("__result__") - self._got_result = True - raise StopAsyncIteration - if "__error__" in item: - raise PulsingActorError(item["__error__"]) - if "__yield__" in item: - return item["__yield__"] - return item - except StopAsyncIteration: - raise - - class _DelayedCallProxy: """Proxy returned by ``self.delayed(sec)`` — any method call becomes a delayed message to self. @@ -690,7 +564,7 @@ async def receive(self, msg) -> Any: except Exception as e: return Message.from_json("Error", {"error": str(e)}) - return {"__error__": f"Unknown message type: {type(msg)}"} + return _wrap_response(error=f"Unknown message type: {type(msg)}") @staticmethod async def _safe_stream_write(writer, obj: dict) -> bool: @@ -730,10 +604,12 @@ async def execute(): ): return await self._safe_stream_write( - writer, {"__final__": True, "__result__": None} + writer, {"__pulsing__": {"final": True, "result": None}} ) except Exception as e: - await self._safe_stream_write(writer, {"__error__": str(e)}) + await self._safe_stream_write( + writer, {"__pulsing__": {"error": str(e)}} + ) finally: await self._safe_stream_close(writer) @@ -757,12 +633,12 @@ async def execute(): ): return await self._safe_stream_write( - writer, {"__final__": True, "__result__": None} + writer, {"__pulsing__": {"final": True, "result": None}} ) elif asyncio.iscoroutine(result): final_result = await result await self._safe_stream_write( - writer, {"__final__": True, "__result__": final_result} + writer, {"__pulsing__": {"final": True, "result": final_result}} ) elif inspect.isgenerator(result): for item in result: @@ -771,14 +647,16 @@ async def execute(): ): return await self._safe_stream_write( - writer, {"__final__": True, "__result__": None} + writer, {"__pulsing__": {"final": True, "result": None}} ) else: await self._safe_stream_write( - writer, {"__final__": True, "__result__": result} + writer, {"__pulsing__": {"final": True, "result": result}} ) except Exception as e: - await self._safe_stream_write(writer, {"__error__": str(e)}) + await self._safe_stream_write( + writer, {"__pulsing__": {"error": str(e)}} + ) finally: await self._safe_stream_close(writer) @@ -877,22 +755,20 @@ def factory(): class ActorClass: - """Actor class wrapper + """Actor class wrapper. - Provides two ways to create actors: + Usage:: - 1. Simple API (uses global system): await init() - counter = await Counter.spawn(init=10) - - 2. Explicit system: - system = await pul.actor_system() - counter = await Counter.local(system, init=10) + counter = await Counter.spawn(init=10) # local, global system + counter = await Counter.spawn(system=s, init=10) # local, explicit system + counter = await Counter.spawn(placement="remote") # random remote node + counter = await Counter.spawn(placement=node_id) # specific node """ @staticmethod def _unwrap_ray_class(cls): - """Extract original user class if cls is a Ray ActorClass""" + """Extract original user class if cls is a Ray ActorClass.""" try: from ray.actor import ActorClass as RayActorClass except ImportError: @@ -912,7 +788,7 @@ def __init__( max_backoff: float = 30.0, ): unwrapped = self._unwrap_ray_class(cls) - # Keep Ray handle so .remote() remains available + # Keep Ray handle so .remote() remains available for Ray-wrapped classes self._ray_cls = cls if unwrapped is not cls else None cls = unwrapped self._cls = cls @@ -922,7 +798,6 @@ def __init__( self._min_backoff = min_backoff self._max_backoff = max_backoff - # Collect all public methods self._methods = [] self._async_methods = set() @@ -930,38 +805,43 @@ def __init__( if name.startswith("_"): continue self._methods.append(name) - # Detect if it's an async method (including async functions and async generators) if inspect.iscoroutinefunction(method) or inspect.isasyncgenfunction( method ): self._async_methods.add(name) - # Register class _actor_class_registry[self._class_name] = cls - # If original class was decorated with @ray.remote, override with Ray's .remote() method + # If original class was decorated with @ray.remote, expose Ray's .remote() if self._ray_cls is not None: self.remote = self._ray_cls.remote async def spawn( self, *args, + system: ActorSystem | None = None, name: str | None = None, public: bool | None = None, + placement: "str | int" = "local", **kwargs, ) -> ActorProxy: - """Create actor using global system (simple API) - - Must call `await init()` before using this method. + """Create an actor and return its proxy. Args: - *args: Positional arguments for the class constructor - name: Optional actor name (if provided, defaults to public=True) - public: Whether the actor should be publicly resolvable (default: True if name provided) - **kwargs: Keyword arguments for the class constructor - - Example: - from pulsing.core import init, remote + *args: Positional arguments forwarded to the class constructor. + system: ActorSystem to use. Defaults to the global system + (requires ``await init()`` to have been called first). + name: Optional actor name. When given, ``public`` defaults to True. + public: Whether the actor is cluster-discoverable. + Defaults to True when *name* is set, False otherwise. + placement: Where to place the actor. + - ``"local"`` *(default)*: spawn on the current node. + - ``"remote"``: spawn on a randomly-chosen remote node; + falls back to local if no remote nodes are available. + - ``int``: spawn on the node with that specific node_id. + **kwargs: Keyword arguments forwarded to the class constructor. + + Example:: await init() @@ -973,52 +853,52 @@ def incr(self): self.value += 1; return self.value counter = await Counter.spawn(init=10) result = await counter.incr() """ - # Import here to avoid circular import from . import _global_system - if _global_system is None: + if system is None: + system = _global_system + if system is None: raise PulsingRuntimeError( "Actor system not initialized. Call 'await init()' first." ) - # Default public=True if name is provided if public is None: public = name is not None - return await self.local( - _global_system, *args, name=name, public=public, **kwargs - ) + if placement == "local": + return await self._spawn_local( + system, *args, name=name, public=public, **kwargs + ) + elif placement == "remote": + return await self._spawn_remote( + system, None, *args, name=name, public=public, **kwargs + ) + elif isinstance(placement, int): + return await self._spawn_remote( + system, placement, *args, name=name, public=public, **kwargs + ) + else: + raise ValueError( + f"Invalid placement {placement!r}. Use 'local', 'remote', or an int node_id." + ) - async def local( + async def _spawn_local( self, system: ActorSystem, *args, name: str | None = None, - public: bool | None = None, + public: bool = False, **kwargs, ) -> ActorProxy: - """Create actor locally with explicit system. - - Args: - system: The ActorSystem to spawn the actor in - *args: Positional arguments for the class constructor - name: Optional actor name (if provided, defaults to public=True) - public: Whether the actor should be publicly resolvable (default: True if name provided) - **kwargs: Keyword arguments for the class constructor - - Note: Use pul.actor_system() to create ActorSystem, - which automatically registers PythonActorService. - """ - # Default public=True if name is provided - if public is None: - public = name is not None - - # Actor name must follow namespace/name format - if name: - # Ensure user-provided name has namespace - actor_name = name if "/" in name else f"actors/{name}" - else: - actor_name = f"actors/{self._cls.__name__}_{uuid.uuid4().hex[:8]}" + actor_name = ( + name + if (name and "/" in name) + else ( + f"actors/{name}" + if name + else f"actors/{self._cls.__name__}_{uuid.uuid4().hex[:8]}" + ) + ) if self._restart_policy != "never": _wrapped_holder: list[_WrappedActor] = [] @@ -1046,64 +926,44 @@ def factory(): actor_ref = await system.spawn(actor, name=actor_name, public=public) actor._inject_delayed(actor_ref) - # Register actor metadata _register_actor_metadata(actor_name, self._cls) - return ActorProxy(actor_ref, self._methods, self._async_methods) - async def remote( + async def _spawn_remote( self, system: ActorSystem, + node_id: int | None, *args, name: str | None = None, - public: bool | None = None, + public: bool = False, **kwargs, ) -> ActorProxy: - """Create actor remotely (randomly selects a remote node). - - Args: - system: The ActorSystem to spawn the actor in - *args: Positional arguments for the class constructor - name: Optional actor name (if provided, defaults to public=True) - public: Whether the actor should be publicly resolvable (default: True if name provided) - **kwargs: Keyword arguments for the class constructor - - Note: Use pul.actor_system() to create ActorSystem, - which automatically registers PythonActorService. - """ - # Default public=True if name is provided - if public is None: - public = name is not None - - members = await system.members() - # members["node_id"] is string, convert local_id to string for comparison - local_id = str(system.node_id.id) - - # Filter out remote nodes (node_id is string) - remote_nodes = [m for m in members if m["node_id"] != local_id] - - if not remote_nodes: - # No remote nodes, fallback to local creation - logger.warning("No remote nodes, fallback to local") - return await self.local(system, *args, name=name, public=public, **kwargs) - - # Randomly select one - target = random.choice(remote_nodes) - # Convert back to int for resolve_named - target_id = int(target["node_id"]) + """Spawn on a specific remote node (node_id=None means random).""" + if node_id is None: + members = await system.members() + local_id = str(system.node_id.id) + remote_nodes = [m for m in members if m["node_id"] != local_id] + if not remote_nodes: + logger.warning("No remote nodes available, falling back to local spawn") + return await self._spawn_local( + system, *args, name=name, public=public, **kwargs + ) + node_id = int(random.choice(remote_nodes)["node_id"]) - # Get target node's Python actor creation service service_ref = await system.resolve_named( - PYTHON_ACTOR_SERVICE_NAME, node_id=target_id + PYTHON_ACTOR_SERVICE_NAME, node_id=node_id ) - # Actor name must follow namespace/name format - if name: - actor_name = name if "/" in name else f"actors/{name}" - else: - actor_name = f"actors/{self._cls.__name__}_{uuid.uuid4().hex[:8]}" + actor_name = ( + name + if (name and "/" in name) + else ( + f"actors/{name}" + if name + else f"actors/{self._cls.__name__}_{uuid.uuid4().hex[:8]}" + ) + ) - # Send creation request resp = await _ask_convert_errors( service_ref, Message.from_json( @@ -1114,7 +974,6 @@ async def remote( "args": list(args), "kwargs": kwargs, "public": public, - # Supervision config "restart_policy": self._restart_policy, "max_restarts": self._max_restarts, "min_backoff": self._min_backoff, @@ -1125,19 +984,14 @@ async def remote( data = resp.to_json() if resp.msg_type == "Error": - # System error: actor creation failed raise PulsingRuntimeError(f"Remote create failed: {data.get('error')}") - # Build remote ActorRef from pulsing._core import ActorId - # actor_id is now a UUID (u128), may be transmitted as string actor_id = data["actor_id"] if isinstance(actor_id, str): actor_id = int(actor_id) - remote_id = ActorId(actor_id) - actor_ref = await system.actor_ref(remote_id) - + actor_ref = await system.actor_ref(ActorId(actor_id)) return ActorProxy( actor_ref, data.get("methods", self._methods), self._async_methods ) @@ -1146,22 +1000,6 @@ def __call__(self, *args, **kwargs): """Direct call returns local instance (not an Actor)""" return self._cls(*args, **kwargs) - def proxy(self, actor_ref: ActorRef) -> ActorProxy: - """Wrap ActorRef into typed ActorProxy - - Args: - actor_ref: Underlying actor reference - - Returns: - ActorProxy: Proxy with method type information - - Example: - ref = await system.resolve_named("my_counter") - counter = Counter.proxy(ref) - await counter.increment() - """ - return ActorProxy(actor_ref, self._methods, self._async_methods) - async def resolve( self, name: str, @@ -1518,10 +1356,7 @@ async def resolve( if _global_system is None: raise RuntimeError("Actor system not initialized. Call 'await init()' first.") - try: - return await _global_system.resolve(name, node_id=node_id, timeout=timeout) - except RuntimeError as e: - raise _convert_rust_error(e) from e + return await _global_system.resolve(name, node_id=node_id, timeout=timeout) def as_any(ref: ActorRef) -> ActorProxy: @@ -1541,122 +1376,6 @@ def as_any(ref: ActorRef) -> ActorProxy: return ref.as_any() -def mount(instance: Any, *, name: str, public: bool = True) -> None: - """Mount an existing Python object to the Pulsing communication network. - - Synchronous interface, can be called in ``__init__``. Automatically: - 1. Initialize Pulsing (if not already, auto-detects Ray environment) - 2. Wrap instance as a Pulsing actor - 3. Register to Pulsing network, other nodes can discover via ``pul.resolve(name)`` - - Args: - instance: Object to mount (any Python instance) - name: Pulsing name, other nodes resolve via this name - public: Whether discoverable by other cluster nodes (default True) - - Example:: - - @ray.remote - class Counter: - def __init__(self, name, peers): - self.name = name - self.peers = sorted(peers) - pul.mount(self, name=name) - - async def greet(self, msg): - return f"Hello from {self.name}: {msg}" - """ - from . import _global_system - - # Auto-initialize Pulsing - if _global_system is None: - _auto_init_pulsing() - - from . import _global_system as system - - if system is None: - raise RuntimeError( - "Pulsing initialization failed. Please call pul.init() or run in Ray environment." - ) - - actor_name = name if "/" in name else f"actors/{name}" - wrapped = _WrappedActor(instance) - - async def _do_mount(): - ref = await system.spawn(wrapped, name=actor_name, public=public) - return ref - - actor_ref = _run_sync_on_pulsing_loop(_do_mount()) - wrapped._inject_delayed(actor_ref) - _register_actor_metadata(actor_name, type(instance)) - - -def unmount(name: str) -> None: - """Unmount a previously mounted actor from the Pulsing network. - - Args: - name: Name used during mounting - """ - from . import _global_system - - if _global_system is None: - return - - actor_name = name if "/" in name else f"actors/{name}" - - async def _do_unmount(): - await _global_system.stop(actor_name) - - _run_sync_on_pulsing_loop(_do_unmount()) - - -def _auto_init_pulsing(): - """Auto-detect environment and initialize Pulsing.""" - try: - import ray - - if ray.is_initialized(): - from pulsing.integrations.ray import init_in_ray - - init_in_ray() - return - except ImportError: - pass - - raise RuntimeError( - "Pulsing not initialized. Please call await pul.init() or run in Ray environment." - ) - - -def _run_sync_on_pulsing_loop(coro): - """Execute coroutine synchronously on Pulsing's background event loop.""" - import asyncio - import concurrent.futures - - # Try to use pulsing.integrations.ray's background loop (Ray environment) - try: - from pulsing.integrations.ray import _loop - - if _loop is not None: - fut = asyncio.run_coroutine_threadsafe(coro, _loop) - return fut.result(timeout=30) - except ImportError: - pass - - # Non-Ray environment: try to create new loop in current thread - try: - loop = asyncio.get_running_loop() - except RuntimeError: - loop = None - - if loop is None: - return asyncio.run(coro) - - # Already have running loop (e.g., async context), run in new thread - with concurrent.futures.ThreadPoolExecutor(max_workers=1) as pool: - return pool.submit(asyncio.run, coro).result(timeout=30) - - RemoteClass = ActorClass # Keep old name as alias (backward compatibility) SystemActor = PythonActorService diff --git a/python/pulsing/integrations/autogen/runtime.py b/python/pulsing/integrations/autogen/runtime.py index e02782739..d345d2dd4 100644 --- a/python/pulsing/integrations/autogen/runtime.py +++ b/python/pulsing/integrations/autogen/runtime.py @@ -318,7 +318,7 @@ async def publish_message( actor_ref = self._agent_refs.get(full_key) if actor_ref: # Use tell (don't wait for response) - task = asyncio.create_task(actor_ref.ask(envelope)) + task = asyncio.ensure_future(actor_ref.ask(envelope)) tasks.append(task) if tasks: diff --git a/python/pulsing/integrations/ray_compat.py b/python/pulsing/integrations/ray_compat.py index b8737bd5e..7d1d0ea17 100644 --- a/python/pulsing/integrations/ray_compat.py +++ b/python/pulsing/integrations/ray_compat.py @@ -198,7 +198,7 @@ def remote(self, *args, **kwargs) -> _ActorHandle: self._ensure_wrapped() async def create(): - proxy = await self._pulsing_class.local(_system, *args, **kwargs) + proxy = await self._pulsing_class.spawn(*args, system=_system, **kwargs) return _ActorHandle(proxy, self._methods) return _run_coro_sync(create()) diff --git a/python/pulsing/serving/load_stream.py b/python/pulsing/serving/load_stream.py index 3f6ae554e..b87e103cf 100644 --- a/python/pulsing/serving/load_stream.py +++ b/python/pulsing/serving/load_stream.py @@ -1,16 +1,15 @@ """Load information subscription based on streaming requests Architecture: - Router sends SubscribeLoad request → Worker returns StreamMessage - Worker continuously pushes load updates in stream → Router reads asynchronously + Scheduler (LoadStreamConsumer) calls worker_ref.as_any().subscribe_load() + and iterates the async generator; Worker yields load snapshot dicts. ┌─────────┐ ┌─────────┐ - │ Router │ ─── SubscribeLoad ─► │ Worker │ - │ │ │ │ - │ │ ◄─── Stream ──────── │ │ + │ Scheduler│ ─ subscribe_load() ►│ Worker │ + │ (Consumer)│ │ @remote │ + │ │ ◄── async for ────── │ │ │ │ {load: 5} │ │ - │ │ ◄─── Stream ──────── │ │ - │ │ {load: 3} │ │ + │ │ ◄── {load: 3} ────── │ │ └─────────┘ └─────────┘ Usage: @@ -25,7 +24,9 @@ from collections.abc import Callable from dataclasses import dataclass -from pulsing.core import ActorRef, Message +from pulsing.core import ActorRef + +from .scheduler import Scheduler @dataclass @@ -70,17 +71,19 @@ def __init__(self, stale_timeout: float = 10.0): self._on_disconnect: Callable[[str], None] | None = None async def subscribe(self, worker_ref: ActorRef, worker_id: str = None): - """Subscribe to Worker's load stream""" + """Subscribe to Worker's load stream via @remote subscribe_load().""" wid = worker_id or str(worker_ref.actor_id) await self.unsubscribe(wid) async def consume(): try: - stream_msg = await worker_ref.ask( - Message.from_json("SubscribeLoad", {}) - ) - async for chunk in stream_msg: - data = chunk.to_json() + proxy = worker_ref.as_any() + async for chunk in proxy.subscribe_load(): + data = ( + chunk + if isinstance(chunk, dict) + else (chunk.to_json() if hasattr(chunk, "to_json") else {}) + ) snapshot = LoadSnapshot.from_dict(data) async with self._lock: self._loads[snapshot.worker_id] = snapshot @@ -145,7 +148,7 @@ def on_update(self, callback: Callable[[LoadSnapshot], None]): self._on_update = callback -class StreamLoadScheduler: +class StreamLoadScheduler(Scheduler): """Load-aware scheduler based on stream subscription - Automatically discovers new Workers and subscribes @@ -160,8 +163,7 @@ def __init__( auto_discover: bool = True, discover_interval: float = 10.0, ): - self._system = actor_system - self._worker_name = worker_name + super().__init__(actor_system, worker_name) self._auto_discover = auto_discover self._discover_interval = discover_interval @@ -170,7 +172,6 @@ def __init__( self._subscribed_workers: set = set() self._running = False self._discover_task: asyncio.Task | None = None - self._lock = asyncio.Lock() self._on_worker_added: Callable[[str], None] | None = None self._on_worker_removed: Callable[[str], None] | None = None @@ -280,9 +281,12 @@ async def select_worker( def get_all_loads(self) -> dict[str, LoadSnapshot]: return self._consumer.get_all_loads() - def get_worker_count(self) -> int: + async def get_worker_count(self) -> int: return len(self._subscribed_workers) + async def get_healthy_worker_count(self) -> int: + return len(self._consumer.get_all_loads()) + def get_subscribed_workers(self) -> set: return self._subscribed_workers.copy() diff --git a/python/pulsing/serving/router.py b/python/pulsing/serving/router.py index dfba1a920..952b1c457 100644 --- a/python/pulsing/serving/router.py +++ b/python/pulsing/serving/router.py @@ -9,6 +9,7 @@ from aiohttp import web from pulsing.core import ActorId, ActorSystem, get_system, remote +from pulsing.serving.scheduler import Scheduler # noqa: F401 (used in type annotation) @dataclass @@ -54,7 +55,9 @@ def from_dict(cls, data: dict) -> "CompletionRequest": class _OpenAIHandler: """OpenAI-compatible HTTP request handler.""" - def __init__(self, actor_system: ActorSystem, model_name: str, scheduler): + def __init__( + self, actor_system: ActorSystem, model_name: str, scheduler: Scheduler + ): self._actor_system = actor_system self.model_name = model_name self._request_count = 0 @@ -69,21 +72,8 @@ async def index(self, request: web.Request) -> web.Response: ) async def health_check(self, request: web.Request) -> web.Response: - if hasattr(self._scheduler, "get_worker_count"): - count = self._scheduler.get_worker_count() - if hasattr(count, "__await__"): - total_workers = await count - else: - total_workers = count - else: - total_workers = 0 - - if hasattr(self._scheduler, "get_healthy_worker_count"): - healthy_workers = await self._scheduler.get_healthy_worker_count() - elif hasattr(self._scheduler, "get_all_loads"): - healthy_workers = len(self._scheduler.get_all_loads()) - else: - healthy_workers = total_workers + total_workers = await self._scheduler.get_worker_count() + healthy_workers = await self._scheduler.get_healthy_worker_count() return web.json_response( { @@ -321,36 +311,8 @@ async def _stream_generate( return stream_response -async def start_router( - system: ActorSystem, - http_host: str = "0.0.0.0", - http_port: int = 8080, - model_name: str = "pulsing-model", - worker_name: str = "worker", - scheduler_type: str = "stream_load", - scheduler=None, - scheduler_class=None, # Backward compatibility -) -> web.AppRunner: - """Start Router HTTP server, returns AppRunner - - Args: - system: ActorSystem instance - http_host: HTTP listen address - http_port: HTTP listen port - model_name: Model name - worker_name: Worker actor name - scheduler_type: Scheduler type, supports: - - "stream_load": Stream load-aware (default, recommended) - - "random": Random - - "round_robin": Round robin - - "power_of_two": Power-of-Two Choices - - "cache_aware": Cache-aware - scheduler: Custom scheduler instance (takes priority) - scheduler_class: [Deprecated] Use scheduler parameter instead - - Returns: - AppRunner instance - """ +def _build_scheduler(system: ActorSystem, worker_name: str, scheduler_type: str): + """Create a Scheduler instance from scheduler_type string.""" from .load_stream import StreamLoadScheduler from .scheduler import ( RUST_POLICIES_AVAILABLE, @@ -360,77 +322,38 @@ async def start_router( RustPowerOfTwoScheduler, ) - # Backward compatibility: scheduler_class -> scheduler - if scheduler_class is not None and scheduler is None: - scheduler = scheduler_class(system, worker_name) - - # Create scheduler - if scheduler is None: - scheduler_map = { - "stream_load": StreamLoadScheduler, - "random": RandomScheduler, - "round_robin": RoundRobinScheduler, - } - - # Rust high-performance schedulers (requires compilation) - if RUST_POLICIES_AVAILABLE: - scheduler_map["power_of_two"] = RustPowerOfTwoScheduler - scheduler_map["cache_aware"] = RustCacheAwareScheduler - - scheduler_class = scheduler_map.get(scheduler_type, StreamLoadScheduler) - scheduler = scheduler_class(system, worker_name) - - # Start scheduler (if has start method) - if hasattr(scheduler, "start"): - await scheduler.start() - - handler = _OpenAIHandler(system, model_name, scheduler) - - app = web.Application() - app.router.add_get("/", handler.index) - app.router.add_get("/health", handler.health_check) - app.router.add_get("/v1/models", handler.list_models) - app.router.add_post("/v1/chat/completions", handler.chat_completions) - app.router.add_post("/v1/completions", handler.completions) - - # Save scheduler reference for cleanup - app["scheduler"] = scheduler - - runner = web.AppRunner(app) - await runner.setup() - site = web.TCPSite(runner, http_host, http_port) - await site.start() - - print(f"[Router] HTTP server started at http://{http_host}:{http_port}") - print(f"[Router] Using scheduler: {scheduler_type}") - return runner - - -async def stop_router(runner: web.AppRunner): - """Stop Router HTTP server""" - if runner: - # Stop scheduler (if has stop method) - app = runner.app - scheduler = app.get("scheduler") - if scheduler and hasattr(scheduler, "stop"): - await scheduler.stop() + scheduler_map = { + "stream_load": StreamLoadScheduler, + "random": RandomScheduler, + "round_robin": RoundRobinScheduler, + } + if RUST_POLICIES_AVAILABLE: + scheduler_map["power_of_two"] = RustPowerOfTwoScheduler + scheduler_map["cache_aware"] = RustCacheAwareScheduler - await runner.cleanup() - print("[Router] HTTP server stopped") + cls = scheduler_map.get(scheduler_type, StreamLoadScheduler) + return cls(system, worker_name) @remote class Router: - """Router - OpenAI 兼容 HTTP API 路由,通过 pulsing.remote 暴露 health_check / get_config。 + """OpenAI-compatible HTTP API router Actor. - 包装 start_router/stop_router,支持 CLI:pulsing actor pulsing.serving.Router。 + Starts an HTTP server on ``on_start`` and stops it on ``on_stop``. + Exposes ``health_check`` and ``get_config`` as remote-callable methods. + + CLI usage:: + + pulsing actor pulsing.serving.Router --addr 0.0.0.0:8000 -- \\ + --http_port 8080 --model_name my-model Args: - http_host: HTTP 监听地址 (default: "0.0.0.0") - http_port: HTTP 监听端口 (default: 8080) - model_name: API 响应中的模型名 (default: "pulsing-model") - worker_name: 路由目标 worker 名称 (default: "worker") - scheduler_type: 调度策略,支持 stream_load / random / round_robin / power_of_two / cache_aware + http_host: HTTP listen address (default: "0.0.0.0") + http_port: HTTP listen port (default: 8080) + model_name: Model name returned in API responses (default: "pulsing-model") + worker_name: Worker actor name to route to (default: "worker") + scheduler_type: Scheduling policy — stream_load (default) / random / + round_robin / power_of_two / cache_aware """ def __init__( @@ -439,7 +362,7 @@ def __init__( http_port: int = 8080, model_name: str = "pulsing-model", worker_name: str = "worker", - scheduler_type: str = "round_robin", + scheduler_type: str = "stream_load", ): self.http_host = http_host self.http_port = http_port @@ -448,41 +371,46 @@ def __init__( self.scheduler_type = scheduler_type self._runner: web.AppRunner | None = None - self._actor_id: ActorId | None = None + self._scheduler: Scheduler | None = None async def on_start(self, actor_id: ActorId) -> None: - """Start the HTTP server when actor starts""" - self._actor_id = actor_id - - # Get global system (set by CLI via init()) system = get_system() - - # Start HTTP server - self._runner = await start_router( - system=system, - http_host=self.http_host, - http_port=self.http_port, - model_name=self.model_name, - worker_name=self.worker_name, - scheduler_type=self.scheduler_type, + self._scheduler = _build_scheduler( + system, self.worker_name, self.scheduler_type + ) + await self._scheduler.start() + + handler = _OpenAIHandler(system, self.model_name, self._scheduler) + app = web.Application() + app.router.add_get("/", handler.index) + app.router.add_get("/health", handler.health_check) + app.router.add_get("/v1/models", handler.list_models) + app.router.add_post("/v1/chat/completions", handler.chat_completions) + app.router.add_post("/v1/completions", handler.completions) + + self._runner = web.AppRunner(app) + await self._runner.setup() + await web.TCPSite(self._runner, self.http_host, self.http_port).start() + + print( + f"[Router] HTTP server started at http://{self.http_host}:{self.http_port}" ) - print(f"[Router] Actor started: {actor_id}") def on_stop(self) -> None: - """Stop the HTTP server when actor stops""" if self._runner: - # Schedule cleanup in background (on_stop is sync) - asyncio.create_task(self._cleanup()) + asyncio.create_task(self._shutdown()) - async def _cleanup(self): - """Async cleanup helper""" + async def _shutdown(self) -> None: + if self._scheduler: + await self._scheduler.stop() + self._scheduler = None if self._runner: - await stop_router(self._runner) + await self._runner.cleanup() self._runner = None + print("[Router] HTTP server stopped") def metadata(self) -> dict[str, str]: - """Return router metadata for diagnostics""" return { "type": "router", "http_host": self.http_host, @@ -493,7 +421,6 @@ def metadata(self) -> dict[str, str]: } def health_check(self) -> dict: - """健康检查。""" return { "status": "healthy", "http_port": self.http_port, @@ -501,7 +428,6 @@ def health_check(self) -> dict: } def get_config(self) -> dict: - """路由配置。""" return { "http_host": self.http_host, "http_port": self.http_port, @@ -509,3 +435,56 @@ def get_config(self) -> dict: "worker_name": self.worker_name, "scheduler_type": self.scheduler_type, } + + +async def start_router( + system: ActorSystem, + http_host: str = "0.0.0.0", + http_port: int = 8080, + model_name: str = "pulsing-model", + worker_name: str = "worker", + scheduler_type: str = "stream_load", + scheduler=None, + scheduler_class=None, +) -> web.AppRunner: + """Start an OpenAI-compatible HTTP server without creating a Router actor. + + Useful for embedding the router in an existing asyncio application. + For CLI / Actor-lifecycle usage prefer the ``Router`` actor class instead. + + Returns: + ``web.AppRunner`` — pass to ``stop_router()`` for cleanup. + """ + if scheduler_class is not None and scheduler is None: + scheduler = scheduler_class(system, worker_name) + if scheduler is None: + scheduler = _build_scheduler(system, worker_name, scheduler_type) + + await scheduler.start() + + handler = _OpenAIHandler(system, model_name, scheduler) + app = web.Application() + app.router.add_get("/", handler.index) + app.router.add_get("/health", handler.health_check) + app.router.add_get("/v1/models", handler.list_models) + app.router.add_post("/v1/chat/completions", handler.chat_completions) + app.router.add_post("/v1/completions", handler.completions) + app["scheduler"] = scheduler + + runner = web.AppRunner(app) + await runner.setup() + await web.TCPSite(runner, http_host, http_port).start() + + print(f"[Router] HTTP server started at http://{http_host}:{http_port}") + return runner + + +async def stop_router(runner: web.AppRunner) -> None: + """Stop a router started via ``start_router()``.""" + if not runner: + return + scheduler = runner.app.get("scheduler") + if scheduler: + await scheduler.stop() + await runner.cleanup() + print("[Router] HTTP server stopped") diff --git a/python/pulsing/serving/scheduler.py b/python/pulsing/serving/scheduler.py index b5672e50b..be4528936 100644 --- a/python/pulsing/serving/scheduler.py +++ b/python/pulsing/serving/scheduler.py @@ -45,17 +45,27 @@ class Scheduler(ABC): - """Scheduler base class""" + """Scheduler base class + + All scheduler implementations must inherit from this class. + Provides default lifecycle (start/stop) and health query methods. + """ def __init__(self, actor_system, worker_name: str = "worker"): self._system = actor_system self._worker_name = worker_name self._lock = asyncio.Lock() + async def start(self): + """Start the scheduler. Override for schedulers that need background tasks.""" + + async def stop(self): + """Stop the scheduler. Override for schedulers that need cleanup.""" + async def get_available_workers(self): try: return await self._system.get_named_instances(self._worker_name) - except Exception as e: + except Exception: return [] async def get_worker_count(self) -> int: @@ -67,7 +77,6 @@ async def get_healthy_worker_count(self) -> int: async def _resolve_worker(self, node_id: int | None = None): try: - # node_id is now u128 integer from members() return await self._system.resolve_named(self._worker_name, node_id=node_id) except Exception: return None diff --git a/python/pulsing/streaming/__init__.py b/python/pulsing/streaming/__init__.py index f236fc79c..e09252164 100644 --- a/python/pulsing/streaming/__init__.py +++ b/python/pulsing/streaming/__init__.py @@ -33,9 +33,9 @@ subscribe_to_topic, write_topic, ) -from .queue import Queue, QueueReader, QueueWriter, read_queue, write_queue +from .queue import Queue, QueueReader, read_queue, write_queue from .storage import BucketStorage -from .sync_queue import SyncQueue, SyncQueueReader, SyncQueueWriter +from .sync_queue import SyncQueue, SyncQueueReader if TYPE_CHECKING: from pulsing._core import ActorSystem @@ -57,8 +57,8 @@ async def write( storage_path: str | None = None, backend: str | type = "memory", backend_options: dict[str, Any] | None = None, - ) -> QueueWriter: - """Open queue for writing""" + ) -> Queue: + """Open queue for writing, returns a ``Queue``.""" return await write_queue( self._system, topic, @@ -130,12 +130,10 @@ async def read( "QueueAPI", "TopicAPI", "Queue", - "QueueWriter", "QueueReader", "write_queue", "read_queue", "SyncQueue", - "SyncQueueWriter", "SyncQueueReader", "StorageManager", "BucketStorage", diff --git a/python/pulsing/streaming/backend.py b/python/pulsing/streaming/backend.py index d1e7383a4..c4cfa59f6 100644 --- a/python/pulsing/streaming/backend.py +++ b/python/pulsing/streaming/backend.py @@ -30,24 +30,25 @@ @runtime_checkable class StorageBackend(Protocol): - """Storage Backend Protocol + """Core Storage Backend Protocol. - All storage backends must implement this protocol. Can be implemented via inheritance or duck typing. + Every backend must implement these seven methods. + Duck typing is fine — inheritance from this class is not required. """ @abstractmethod async def put(self, record: dict[str, Any]) -> None: - """Write a single record""" + """Write a single record.""" ... @abstractmethod async def put_batch(self, records: list[dict[str, Any]]) -> None: - """Write records in batch""" + """Write records in batch.""" ... @abstractmethod async def get(self, limit: int, offset: int) -> list[dict[str, Any]]: - """Read records""" + """Read records.""" ... @abstractmethod @@ -58,32 +59,46 @@ async def get_stream( wait: bool = False, timeout: float | None = None, ) -> AsyncIterator[list[dict[str, Any]]]: - """Stream read records""" + """Stream records.""" ... @abstractmethod async def flush(self) -> None: - """Flush buffer to persistent storage""" + """Flush buffer to persistent storage.""" ... @abstractmethod async def stats(self) -> dict[str, Any]: - """Get statistics""" + """Return statistics dict.""" ... @abstractmethod def total_count(self) -> int: - """Total record count""" + """Total record count (synchronous).""" ... + +@runtime_checkable +class TensorBackend(Protocol): + """Extension protocol for tensor-native backends. + + Backends that store tensor/array data efficiently should implement this. + BucketStorage checks ``isinstance(backend, TensorBackend)`` once at startup + and delegates tensor operations to the typed reference; it never falls back + to generic ``get``/``put`` for these paths. + """ + + @abstractmethod async def put_tensor(self, data: Any, **kwargs: Any) -> Any: - """Optional tensor-native put API.""" - raise NotImplementedError + """Write tensor data; return metadata describing stored indexes.""" + ... + @abstractmethod async def get_data(self, batch_meta: Any, fields: list[str] | None = None) -> Any: - """Optional tensor-native batch data API.""" - raise NotImplementedError + """Fetch tensor data for a batch described by batch_meta.""" + ... + @abstractmethod async def get_meta( self, fields: list[str], @@ -92,8 +107,38 @@ async def get_meta( sampler: Any = None, **sampling_kwargs: Any, ) -> Any: - """Optional tensor-native metadata API.""" - raise NotImplementedError + """Return sampling metadata for a training batch.""" + ... + + +@runtime_checkable +class ConsumptionBackend(Protocol): + """Extension protocol for backends that track consumption state. + + Implementing this allows BucketStorage to delegate consumption bookkeeping + to the backend (e.g. for persistent replay or deduplication). + When absent, BucketStorage maintains its own in-memory tracking. + """ + + @abstractmethod + async def mark_consumed(self, task_name: str, global_indexes: list[int]) -> None: + """Mark indexes as consumed for a given task.""" + ... + + @abstractmethod + async def reset_consumption(self, task_name: str) -> None: + """Reset consumption state for a given task.""" + ... + + @abstractmethod + async def clear(self, global_indexes: list[int]) -> None: + """Remove records at the given global indexes.""" + ... + + @abstractmethod + async def get_by_indices(self, indexes: list[int]) -> list[dict[str, Any]]: + """Fetch records by their global indexes (more efficient than repeated get).""" + ... class MemoryBackend: @@ -235,6 +280,21 @@ async def get_meta( "global_indexes": sampled, } + # ---- ConsumptionBackend methods ---- + + async def get_by_indices(self, indexes: list[int]) -> list[dict[str, Any]]: + return [self.buffer[i] for i in indexes if 0 <= i < len(self.buffer)] + + async def mark_consumed(self, task_name: str, global_indexes: list[int]) -> None: + pass # BucketStorage owns the consumption state for MemoryBackend + + async def reset_consumption(self, task_name: str) -> None: + pass + + async def clear(self, global_indexes: list[int]) -> None: + to_remove = set(global_indexes) + self.buffer = [r for i, r in enumerate(self.buffer) if i not in to_remove] + # ============================================================ # Backend Registry diff --git a/python/pulsing/streaming/broker.py b/python/pulsing/streaming/broker.py index 82ae9c4fa..2bcd22668 100644 --- a/python/pulsing/streaming/broker.py +++ b/python/pulsing/streaming/broker.py @@ -144,17 +144,19 @@ async def publish( } if mode == "fire_and_forget": - return await self._fanout_tell(envelope, sender_id) + return await self._fanout_sequential(envelope, sender_id) + elif mode == "best_effort": + return await self._fanout_sequential( + envelope, sender_id, per_sub_timeout=5.0 + ) elif mode == "wait_all_acks": - return await self._fanout_ask( + return await self._fanout_concurrent( envelope, sender_id, wait_all=True, timeout=timeout ) elif mode == "wait_any_ack": - return await self._fanout_ask( + return await self._fanout_concurrent( envelope, sender_id, wait_all=False, timeout=timeout ) - elif mode == "best_effort": - return await self._fanout_best_effort(envelope, sender_id) else: raise ValueError(f"Unknown mode: {mode}") @@ -176,10 +178,8 @@ def get_stats(self) -> dict: async def _resolve(self, sub: _Subscriber) -> "ActorRef | None": now = time.time() - if sub._ref is not None and (now - sub._ref_resolved_at) < REF_TTL_SECONDS: return sub._ref - try: sub._ref = await self.system.resolve_named( sub.actor_name, node_id=sub.node_id @@ -212,176 +212,167 @@ async def _evict_zombies(self, zombie_ids: list[str]) -> None: f"TopicBroker[{self.topic}] evicted zombie subscriber: {sub_id}" ) - async def _fanout_tell(self, envelope: dict, sender_id: str | None) -> dict: - sent = 0 - failed = 0 - zombies: list[str] = [] + async def _resolve_targets( + self, sender_id: str | None + ) -> tuple[list[tuple[str, "_Subscriber", "ActorRef"]], list[str]]: + """Resolve all eligible subscriber refs. + Returns (targets, zombie_ids) where targets is a list of + (sub_id, sub, ref) triples and zombie_ids are subs that hit the + consecutive-failure threshold during resolution. + """ + targets: list[tuple[str, "_Subscriber", "ActorRef"]] = [] + zombies: list[str] = [] for sub_id, sub in list(self._subscribers.items()): if sender_id and sub_id == sender_id: continue + ref = await self._resolve(sub) + if ref: + targets.append((sub_id, sub, ref)) + else: + if self._record_failure(sub): + zombies.append(sub_id) + return targets, zombies + + def _fanout_result( + self, + delivered: int, + failed: int, + failed_ids: list[str] | None, + always_success: bool = False, + timed_out: bool = False, + ) -> dict: + """Build the standard publish result dict and update running totals.""" + self._total_delivered += delivered + self._total_failed += failed + result: dict = { + "success": True if always_success else (delivered > 0 or failed == 0), + "delivered": delivered, + "failed": failed, + "subscriber_count": len(self._subscribers), + } + if failed_ids is not None: + result["failed_subscribers"] = failed_ids + if timed_out: + result["timed_out"] = True + return result + + async def _fanout_sequential( + self, + envelope: dict, + sender_id: str | None, + per_sub_timeout: float | None = None, + ) -> dict: + """Sequential fanout engine used by fire_and_forget and best_effort. + + When *per_sub_timeout* is None the message is sent fire-and-forget via + ``ref.tell``; otherwise each subscriber is awaited via ``ref.ask`` with + the given per-subscriber timeout. + """ + targets, zombies = await self._resolve_targets(sender_id) + delivered = failed = 0 + failed_ids: list[str] | None = [] if per_sub_timeout is not None else None + + for sub_id, sub, ref in targets: try: - ref = await self._resolve(sub) - if ref: - await ref.tell(envelope) - sent += 1 - self._record_success(sub) + if per_sub_timeout is not None: + await asyncio.wait_for(ref.ask(envelope), timeout=per_sub_timeout) else: - failed += 1 - if self._record_failure(sub): - zombies.append(sub_id) + await ref.tell(envelope) + delivered += 1 + self._record_success(sub) except Exception: failed += 1 + if failed_ids is not None: + failed_ids.append(sub_id) if self._record_failure(sub): zombies.append(sub_id) await self._evict_zombies(zombies) + return self._fanout_result(delivered, failed, failed_ids, always_success=True) - self._total_delivered += sent - self._total_failed += failed - - return { - "success": True, - "delivered": sent, - "failed": failed, - "subscriber_count": len(self._subscribers), - } - - async def _fanout_ask( + async def _fanout_concurrent( self, envelope: dict, sender_id: str | None, wait_all: bool, timeout: float = DEFAULT_FANOUT_TIMEOUT, ) -> dict: - """Wait for ack mode.""" - tasks = [] - sub_ids = [] - resolve_failed: list[str] = [] + """Concurrent fanout engine used by wait_all_acks and wait_any_ack. - for sub_id, sub in list(self._subscribers.items()): - if sender_id and sub_id == sender_id: - continue - ref = await self._resolve(sub) - if ref: - tasks.append(ref.ask(envelope)) - sub_ids.append(sub_id) - else: - if self._record_failure(sub): - resolve_failed.append(sub_id) + All subscriber asks are launched as Tasks simultaneously; *wait_all* + controls whether we wait for every ack or only the first. + """ + targets, zombies = await self._resolve_targets(sender_id) - if not tasks: - await self._evict_zombies(resolve_failed) + if not targets: + await self._evict_zombies(zombies) return {"success": True, "delivered": 0, "failed": 0, "subscriber_count": 0} - delivered = 0 - failed = 0 - failed_ids = [] - zombies: list[str] = resolve_failed.copy() + sub_ids = [sub_id for sub_id, _, _ in targets] + subs = {sub_id: sub for sub_id, sub, _ in targets} + # ref.ask() returns a pyo3 Future (not a native coroutine); ensure_future + # handles both Futures and coroutines, unlike create_task which rejects Futures. + tasks = [asyncio.ensure_future(ref.ask(envelope)) for _, _, ref in targets] - if wait_all: - # wait_all_acks: wait for all responses with overall timeout - try: - results = await asyncio.wait_for( - asyncio.gather(*tasks, return_exceptions=True), - timeout=timeout, - ) - for i, result in enumerate(results): - sub = self._subscribers.get(sub_ids[i]) - if isinstance(result, Exception): - failed += 1 - failed_ids.append(sub_ids[i]) - if sub and self._record_failure(sub): - zombies.append(sub_ids[i]) - else: - delivered += 1 - if sub: - self._record_success(sub) - except asyncio.TimeoutError: - # Timeout: all tasks considered failed - logger.warning( - f"TopicBroker[{self.topic}] wait_all_acks timeout after {timeout}s" - ) - failed = len(tasks) - failed_ids = sub_ids.copy() - # Cancel all pending tasks - for task in tasks: - if not task.done(): - task.cancel() - else: - # wait_any_ack: wait for any response with overall timeout - try: - done, pending = await asyncio.wait( + delivered = failed = 0 + failed_ids: list[str] = [] + + try: + if wait_all: + try: + results = await asyncio.wait_for( + asyncio.gather(*tasks, return_exceptions=True), + timeout=timeout, + ) + for i, result in enumerate(results): + sub = subs.get(sub_ids[i]) + if isinstance(result, Exception): + failed += 1 + failed_ids.append(sub_ids[i]) + if sub and self._record_failure(sub): + zombies.append(sub_ids[i]) + else: + delivered += 1 + if sub: + self._record_success(sub) + except asyncio.TimeoutError: + logger.warning( + f"TopicBroker[{self.topic}] wait_all_acks timeout after {timeout}s" + ) + await self._evict_zombies(zombies) + return self._fanout_result( + 0, len(tasks), sub_ids.copy(), timed_out=True + ) + else: + # asyncio.wait with timeout returns empty done-set on timeout (no exception) + done, _ = await asyncio.wait( tasks, return_when=asyncio.FIRST_COMPLETED, timeout=timeout, ) - for task in done: - if not task.exception(): - delivered = 1 - break - # Cancel other pending tasks - for task in pending: - task.cancel() - except asyncio.TimeoutError: - logger.warning( - f"TopicBroker[{self.topic}] wait_any_ack timeout after {timeout}s" - ) - failed = len(tasks) - failed_ids = sub_ids.copy() - for task in tasks: - if not task.done(): - task.cancel() - - await self._evict_zombies(zombies) - - self._total_delivered += delivered - self._total_failed += failed - - return { - "success": delivered > 0 or failed == 0, - "delivered": delivered, - "failed": failed, - "failed_subscribers": failed_ids, - "subscriber_count": len(self._subscribers), - } - - async def _fanout_best_effort(self, envelope: dict, sender_id: str | None) -> dict: - """Best-effort: try to send, record failures""" - delivered = 0 - failed = 0 - failed_ids = [] - zombies: list[str] = [] - - for sub_id, sub in list(self._subscribers.items()): - if sender_id and sub_id == sender_id: - continue - try: - ref = await self._resolve(sub) - if ref: - await asyncio.wait_for(ref.ask(envelope), timeout=5.0) - delivered += 1 - self._record_success(sub) + if done: + for task in done: + try: + task.result() + delivered = 1 + break + except Exception: + failed += 1 + if not delivered: + failed_ids = sub_ids[: len(done)] else: - failed += 1 - failed_ids.append(sub_id) - if self._record_failure(sub): - zombies.append(sub_id) - except Exception: - failed += 1 - failed_ids.append(sub_id) - if self._record_failure(sub): - zombies.append(sub_id) + logger.warning( + f"TopicBroker[{self.topic}] wait_any_ack timeout after {timeout}s" + ) + failed = len(tasks) + failed_ids = sub_ids.copy() + finally: + pending = [t for t in tasks if not t.done()] + for t in pending: + t.cancel() + if pending: + await asyncio.gather(*pending, return_exceptions=True) await self._evict_zombies(zombies) - - self._total_delivered += delivered - self._total_failed += failed - - return { - "success": True, - "delivered": delivered, - "failed": failed, - "failed_subscribers": failed_ids, - "subscriber_count": len(self._subscribers), - } + return self._fanout_result(delivered, failed, failed_ids) diff --git a/python/pulsing/streaming/manager.py b/python/pulsing/streaming/manager.py index 3f48ac1dc..f8b93c948 100644 --- a/python/pulsing/streaming/manager.py +++ b/python/pulsing/streaming/manager.py @@ -158,13 +158,13 @@ async def _get_or_create_bucket( self._buckets[key] = await self.system.resolve_named(actor_name) logger.debug(f"Resolved existing bucket: {actor_name}") except Exception: - proxy = await BucketStorage.local( - self.system, + proxy = await BucketStorage.spawn( bucket_id=bucket_id, storage_path=bucket_storage_path, batch_size=batch_size, backend=backend or self.default_backend, backend_options=backend_options, + system=self.system, name=actor_name, public=True, ) @@ -192,8 +192,12 @@ async def _get_or_create_topic_broker(self, topic_name: str) -> ActorRef: except Exception: from pulsing.streaming.broker import TopicBroker - proxy = await TopicBroker.local( - self.system, topic_name, self.system, name=actor_name, public=True + proxy = await TopicBroker.spawn( + topic_name, + self.system, + system=self.system, + name=actor_name, + public=True, ) self._topics[topic_name] = proxy.ref logger.info(f"Created topic broker: {actor_name}") @@ -364,10 +368,9 @@ async def get_storage_manager(system: ActorSystem) -> "ActorProxy": except Exception: pass - # Create new StorageManager using .local() try: - return await StorageManager.local( - system, system, name=STORAGE_MANAGER_NAME, public=True + return await StorageManager.spawn( + system, system=system, name=STORAGE_MANAGER_NAME, public=True ) except Exception as e: if "already exists" in str(e).lower(): @@ -388,6 +391,32 @@ async def ensure_storage_managers(system: ActorSystem) -> None: logger.debug(f"Local StorageManager ensured on node {system.node_id.id}") +async def _get_remote_manager( + system: ActorSystem, + owner_node_id_str: str, + retries: int = 10, +) -> "ActorProxy": + """Resolve StorageManager on a remote node, retrying until it appears.""" + owner_node_id_int = int(owner_node_id_str) + last_exc: Exception | None = None + for attempt in range(retries): + try: + return await StorageManager.resolve( + STORAGE_MANAGER_NAME, system=system, node_id=owner_node_id_int + ) + except Exception as e: + last_exc = e + if attempt < retries - 1: + logger.debug( + f"StorageManager not on node {owner_node_id_str}, " + f"retry {attempt + 1}/{retries}" + ) + await asyncio.sleep(0.5) + raise RuntimeError( + f"StorageManager not found on node {owner_node_id_str} after {retries} retries: {last_exc}" + ) from last_exc + + async def get_bucket_ref( system: ActorSystem, topic: str, @@ -398,31 +427,13 @@ async def get_bucket_ref( backend_options: dict | None = None, max_redirects: int = 3, ) -> "ActorProxy": - """Get ActorProxy for specified bucket - - Automatically handles redirects to ensure getting the bucket on the correct node. - Returns ActorProxy for direct method calls on BucketStorage. - - Args: - system: Actor system - topic: Queue topic - bucket_id: Bucket ID - batch_size: Batch size - storage_path: Custom storage path (optional) - backend: Storage backend name or class (optional) - backend_options: Additional backend options (optional) - max_redirects: Maximum redirect count - """ - # Request from local StorageManager first + """Get ActorProxy for the specified bucket, following redirects automatically.""" manager = await get_storage_manager(system) - - # Convert backend class to name if needed - backend_name = None - if backend: - backend_name = backend if isinstance(backend, str) else backend.__name__ + backend_name = ( + (backend if isinstance(backend, str) else backend.__name__) if backend else None + ) for redirect_count in range(max_redirects + 1): - # Call manager.get_bucket() via proxy resp_data = await manager.get_bucket( topic=topic, bucket_id=bucket_id, @@ -431,60 +442,28 @@ async def get_bucket_ref( backend=backend_name, backend_options=backend_options, ) - msg_type = resp_data.get("_type", "") if msg_type == "BucketReady": - # Successfully got bucket - resolve by actor name for typed proxy - actor_name = f"bucket_{topic}_{bucket_id}" - # Use BucketStorage.resolve to get typed ActorProxy - return await BucketStorage.resolve(actor_name, system=system) - - elif msg_type == "Redirect": - # Need to redirect to other node - # owner_node_id transmitted as string, keep as string for comparison - owner_node_id_str = str(resp_data.get("owner_node_id")) - owner_addr = resp_data.get("owner_addr") - - logger.debug( - f"Redirecting bucket {topic}:{bucket_id} to node {owner_node_id_str} @ {owner_addr}" + return await BucketStorage.resolve( + f"bucket_{topic}_{bucket_id}", system=system ) + if msg_type == "Redirect": + owner_node_id_str = str(resp_data.get("owner_node_id")) if redirect_count >= max_redirects: raise RuntimeError(f"Too many redirects for bucket {topic}:{bucket_id}") - - # Check if redirecting to self (avoid infinite loop) - # Compare as strings for consistency - if str(owner_node_id_str) == str(system.node_id.id): + if owner_node_id_str == str(system.node_id.id): raise RuntimeError( f"Redirect loop detected for bucket {topic}:{bucket_id}" ) + logger.debug( + f"Redirecting bucket {topic}:{bucket_id} to node {owner_node_id_str}" + ) + manager = await _get_remote_manager(system, owner_node_id_str) + continue - # Get owner node's StorageManager (with retry, wait for remote node initialization) - # Convert to int for resolve_named which expects int - owner_node_id_int = int(owner_node_id_str) - max_resolve_retries = 10 - for resolve_retry in range(max_resolve_retries): - try: - manager = await StorageManager.resolve( - STORAGE_MANAGER_NAME, system=system, node_id=owner_node_id_int - ) - break - except Exception as e: - if resolve_retry < max_resolve_retries - 1: - logger.debug( - f"StorageManager not found on node {owner_node_id_str}, " - f"retry {resolve_retry + 1}/{max_resolve_retries}" - ) - await asyncio.sleep(0.5) - else: - raise RuntimeError( - f"StorageManager not found on node {owner_node_id_str} after " - f"{max_resolve_retries} retries: {e}" - ) from e - - else: - raise RuntimeError(f"Unexpected response: {msg_type}") + raise RuntimeError(f"Unexpected response type: {msg_type}") raise RuntimeError(f"Failed to get bucket {topic}:{bucket_id}") @@ -494,61 +473,28 @@ async def get_topic_broker( topic: str, max_redirects: int = 3, ) -> "ActorProxy": - """Get broker ActorProxy for specified topic - - Automatically handles redirects to ensure getting the broker on the correct node. - Returns ActorProxy for direct method calls on TopicBroker. - - Args: - system: Actor system - topic: Topic name - max_redirects: Maximum redirect count - """ + """Get broker ActorProxy for the specified topic, following redirects automatically.""" from pulsing.streaming.broker import TopicBroker manager = await get_storage_manager(system) for redirect_count in range(max_redirects + 1): - # Call manager.get_topic() via proxy resp_data = await manager.get_topic(topic=topic) msg_type = resp_data.get("_type", "") if msg_type == "TopicReady": - # Successfully got topic - resolve by actor name for typed proxy - actor_name = f"_topic_broker_{topic}" - return await TopicBroker.resolve(actor_name, system=system) + return await TopicBroker.resolve(f"_topic_broker_{topic}", system=system) - elif msg_type == "Redirect": - # owner_node_id transmitted as string, keep as string for comparison + if msg_type == "Redirect": owner_node_id_str = str(resp_data["owner_node_id"]) - - logger.debug(f"Redirecting topic {topic} to node {owner_node_id_str}") - if redirect_count >= max_redirects: raise RuntimeError(f"Too many redirects for topic: {topic}") - - # Compare as strings for consistency - if str(owner_node_id_str) == str(system.node_id.id): + if owner_node_id_str == str(system.node_id.id): raise RuntimeError(f"Redirect loop for topic: {topic}") + logger.debug(f"Redirecting topic {topic} to node {owner_node_id_str}") + manager = await _get_remote_manager(system, owner_node_id_str) + continue - # Get owner node's StorageManager via proxy - # Convert to int for resolve_named which expects int - owner_node_id_int = int(owner_node_id_str) - for retry in range(10): - try: - manager = await StorageManager.resolve( - STORAGE_MANAGER_NAME, system=system, node_id=owner_node_id_int - ) - break - except Exception as e: - if retry < 9: - await asyncio.sleep(0.5) - else: - raise RuntimeError( - f"StorageManager not found on node {owner_node_id_str}: {e}" - ) from e - - else: - raise RuntimeError(f"Unexpected response: {msg_type}") + raise RuntimeError(f"Unexpected response type: {msg_type}") raise RuntimeError(f"Failed to get topic broker: {topic}") diff --git a/python/pulsing/streaming/pubsub.py b/python/pulsing/streaming/pubsub.py index ba54d7cf3..cc2bb5a89 100644 --- a/python/pulsing/streaming/pubsub.py +++ b/python/pulsing/streaming/pubsub.py @@ -116,32 +116,32 @@ async def publish( Args: message: Message to publish (any Python object) mode: Publish mode - timeout: Timeout in seconds. None means use default timeout. - For WAIT_ANY_ACK and WAIT_ALL_ACKS modes, local task will be cancelled after timeout, - but remote handler may still be executing (relies on HTTP/2 RST_STREAM to propagate cancellation). + timeout: Fanout timeout forwarded to the broker (seconds). + The broker respects this deadline when waiting for subscriber acks + and returns a structured PublishResult after the timeout. + None uses the broker's default (DEFAULT_FANOUT_TIMEOUT). Returns: - PublishResult: Publish result + PublishResult: Publish result with delivery statistics. Raises: - asyncio.TimeoutError: Timeout - RuntimeError: Other errors + PulsingRuntimeError: If the broker is unreachable. """ broker = await self._broker_ref() - - # Determine timeout value effective_timeout = timeout if timeout is not None else DEFAULT_PUBLISH_TIMEOUT - async def _do_publish(): - # Direct method call on broker proxy - return await broker.publish( - message, - mode=mode.value, - sender_id=self._writer_id, - timeout=effective_timeout, - ) + data = await broker.publish( + message, + mode=mode.value, + sender_id=self._writer_id, + timeout=effective_timeout, + ) - data = await asyncio.wait_for(_do_publish(), timeout=effective_timeout) + if data.get("timed_out"): + raise asyncio.TimeoutError( + f"Publish timed out after {effective_timeout}s " + f"({data.get('delivered', 0)}/{data.get('subscriber_count', 0)} acks received)" + ) return PublishResult( success=data.get("success", False), diff --git a/python/pulsing/streaming/queue.py b/python/pulsing/streaming/queue.py index 7f1a7a283..6db5a3570 100644 --- a/python/pulsing/streaming/queue.py +++ b/python/pulsing/streaming/queue.py @@ -14,7 +14,7 @@ from .manager import get_bucket_ref, get_storage_manager if TYPE_CHECKING: - from .sync_queue import SyncQueue, SyncQueueReader, SyncQueueWriter + from .sync_queue import SyncQueue, SyncQueueReader logger = logging.getLogger(__name__) @@ -221,27 +221,6 @@ def sync(self) -> "SyncQueue": return SyncQueue(self) -class QueueWriter: - """Queue write handle""" - - def __init__(self, queue: Queue): - self.queue = queue - - async def put( - self, record: dict[str, Any] | list[dict[str, Any]] - ) -> dict[str, Any] | list[dict[str, Any]]: - return await self.queue.put(record) - - async def flush(self) -> None: - await self.queue.flush() - - def sync(self) -> "SyncQueueWriter": - """Return synchronous wrapper""" - from .sync_queue import SyncQueueWriter - - return SyncQueueWriter(self) - - class QueueReader: """Queue read handle @@ -314,8 +293,8 @@ async def write_queue( storage_path: str | None = None, backend: str | type = "memory", backend_options: dict[str, Any] | None = None, -) -> QueueWriter: - """Open queue for writing +) -> Queue: + """Open queue for writing, returns a ``Queue`` object. Args: system: Actor system @@ -324,26 +303,18 @@ async def write_queue( num_buckets: Number of buckets batch_size: Batch size storage_path: Storage path - backend: Storage backend - - "memory": Pure in-memory backend (default) - - Custom: register_backend() or pass StorageBackend class + backend: Storage backend ("memory" by default; or a registered name / class) backend_options: Additional backend parameters - Example: - writer = await write_queue(system, "my_queue") + Example:: - # Custom backend from a plugin - from my_plugin import MyBackend - from .backend import register_backend - register_backend("my_backend", MyBackend) - writer = await write_queue(system, "my_queue", backend="my_backend") + queue = await write_queue(system, "my_queue") + await queue.put({"id": "1", "value": 42}) """ - # Ensure all nodes in cluster have StorageManager from .manager import ensure_storage_managers await ensure_storage_managers(system) - - queue = Queue( + return Queue( system=system, topic=topic, bucket_column=bucket_column, @@ -353,7 +324,6 @@ async def write_queue( backend=backend, backend_options=backend_options, ) - return QueueWriter(queue) def _assign_buckets(num_buckets: int, rank: int, world_size: int) -> list[int]: diff --git a/python/pulsing/streaming/storage.py b/python/pulsing/streaming/storage.py index 35fc33fd3..ac73206df 100644 --- a/python/pulsing/streaming/storage.py +++ b/python/pulsing/streaming/storage.py @@ -6,7 +6,12 @@ from pulsing.core import ActorId, StreamMessage, remote -from .backend import StorageBackend, get_backend_class +from .backend import ( + ConsumptionBackend, + StorageBackend, + TensorBackend, + get_backend_class, +) logger = logging.getLogger(__name__) @@ -43,12 +48,14 @@ def __init__( # Backend instance (initialized in on_start) self._backend: StorageBackend | None = None + # Typed extension references — set once in on_start via isinstance checks + self._tensor_backend: TensorBackend | None = None + self._consumption_backend: ConsumptionBackend | None = None self._production_status: dict[int, dict[str, str]] = {} self._consumption_status: dict[str, set[int]] = {} self._key_to_index: dict[str, int] = {} def on_start(self, actor_id: ActorId) -> None: - # Create backend instance backend_class = get_backend_class(self._backend_type) self._backend = backend_class( bucket_id=self.bucket_id, @@ -56,9 +63,19 @@ def on_start(self, actor_id: ActorId) -> None: batch_size=self.batch_size, **self._backend_options, ) + # Capability detection — done once here, never repeated via hasattr elsewhere + self._tensor_backend = ( + self._backend if isinstance(self._backend, TensorBackend) else None + ) + self._consumption_backend = ( + self._backend if isinstance(self._backend, ConsumptionBackend) else None + ) backend_name = getattr(backend_class, "__name__", str(self._backend_type)) logger.info( - f"BucketStorage[{self.bucket_id}] started with {backend_name} at {self.storage_path}" + f"BucketStorage[{self.bucket_id}] started with {backend_name} " + f"(tensor={self._tensor_backend is not None}, " + f"consumption={self._consumption_backend is not None}) " + f"at {self.storage_path}" ) def on_stop(self) -> None: @@ -104,17 +121,17 @@ async def put_batch(self, records: list[dict]) -> dict: async def put_tensor( self, data: Any, partition_id: str = "default", **kwargs: Any ) -> dict: - if hasattr(self._backend, "put_tensor"): - meta = await self._backend.put_tensor( - data, partition_id=partition_id, **kwargs - ) - if hasattr(meta, "global_indexes") and hasattr(meta, "field_names"): - for idx in meta.global_indexes: - self._production_status[idx] = { - field: "ready" for field in meta.field_names - } - return {"status": "ok"} - raise NotImplementedError("Backend does not support put_tensor") + if self._tensor_backend is None: + raise NotImplementedError("Backend does not support put_tensor") + meta = await self._tensor_backend.put_tensor( + data, partition_id=partition_id, **kwargs + ) + if hasattr(meta, "global_indexes") and hasattr(meta, "field_names"): + for idx in meta.global_indexes: + self._production_status[idx] = { + field: "ready" for field in meta.field_names + } + return {"status": "ok"} async def get(self, limit: int = 100, offset: int = 0) -> list[dict]: """Get records. @@ -174,27 +191,24 @@ async def get_meta( sampler: Any = None, **sampling_kwargs: Any, ) -> dict: - if hasattr(self._backend, "get_meta"): - meta = await self._backend.get_meta( + if self._tensor_backend is not None: + meta = await self._tensor_backend.get_meta( fields=fields, batch_size=batch_size, task_name=task_name, sampler=sampler, **sampling_kwargs, ) - if hasattr(meta, "to_dict"): - return meta.to_dict() - return meta + return meta.to_dict() if hasattr(meta, "to_dict") else meta + # Generic fallback: use in-memory production/consumption tracking consumed = self._consumption_status.setdefault(task_name, set()) - ready = [] - for idx in sorted(self._production_status): - if idx in consumed: - continue - status = self._production_status[idx] - if all(status.get(field) == "ready" for field in fields): - ready.append(idx) - + ready = [ + idx + for idx in sorted(self._production_status) + if idx not in consumed + and all(self._production_status[idx].get(f) == "ready" for f in fields) + ] if sampler is not None: sampled, marked = sampler.sample(ready, batch_size, **sampling_kwargs) else: @@ -207,13 +221,13 @@ async def get_meta( "partition_id": sampling_kwargs.get("partition_id", "default"), "global_index": idx, "fields": { - field: { - "name": field, + f: { + "name": f, "dtype": None, "shape": None, "production_status": "ready", } - for field in fields + for f in fields }, } for idx in sampled @@ -222,14 +236,14 @@ async def get_meta( } async def get_data(self, batch_meta: dict, fields: list[str] | None = None) -> Any: - if hasattr(self._backend, "get_data"): - return await self._backend.get_data(batch_meta, fields=fields) + if self._tensor_backend is not None: + return await self._tensor_backend.get_data(batch_meta, fields=fields) indexes = batch_meta.get("global_indexes") or [ sample.get("global_index", -1) for sample in batch_meta.get("samples", []) ] - if hasattr(self._backend, "get_by_indices"): - rows = await self._backend.get_by_indices(indexes) + if self._consumption_backend is not None: + rows = await self._consumption_backend.get_by_indices(indexes) else: rows = [] for idx in indexes: @@ -240,19 +254,19 @@ async def get_data(self, batch_meta: dict, fields: list[str] | None = None) -> A async def mark_consumed(self, task_name: str, global_indexes: list[int]) -> dict: self._consumption_status.setdefault(task_name, set()).update(global_indexes) - if hasattr(self._backend, "mark_consumed"): - await self._backend.mark_consumed(task_name, global_indexes) + if self._consumption_backend is not None: + await self._consumption_backend.mark_consumed(task_name, global_indexes) return {"status": "ok"} async def reset_consumption(self, task_name: str) -> dict: self._consumption_status.pop(task_name, None) - if hasattr(self._backend, "reset_consumption"): - await self._backend.reset_consumption(task_name) + if self._consumption_backend is not None: + await self._consumption_backend.reset_consumption(task_name) return {"status": "ok"} async def clear(self, global_indexes: list[int]) -> dict: - if hasattr(self._backend, "clear"): - await self._backend.clear(global_indexes) + if self._consumption_backend is not None: + await self._consumption_backend.clear(global_indexes) return {"status": "ok"} async def kv_register(self, key: str, global_index: int) -> dict: diff --git a/python/pulsing/streaming/sync_queue.py b/python/pulsing/streaming/sync_queue.py index 212e6ecd6..1af1fc3af 100644 --- a/python/pulsing/streaming/sync_queue.py +++ b/python/pulsing/streaming/sync_queue.py @@ -10,7 +10,7 @@ from typing import TYPE_CHECKING, Any if TYPE_CHECKING: - from .queue import Queue, QueueReader, QueueWriter + from .queue import Queue, QueueReader class SyncQueue: @@ -48,27 +48,6 @@ def stats(self) -> dict[str, Any]: return self._run(self._queue.stats()) -class SyncQueueWriter: - """Synchronous writer wrapper""" - - def __init__(self, writer: "QueueWriter"): - self._writer = writer - self._loop = writer.queue._loop - - def _run(self, coro): - if self._loop is None or not self._loop.is_running(): - raise RuntimeError( - "Event loop not running. Sync wrapper requires a running event loop." - ) - return asyncio.run_coroutine_threadsafe(coro, self._loop).result() - - def put(self, record: dict[str, Any] | list[dict[str, Any]]): - return self._run(self._writer.put(record)) - - def flush(self) -> None: - self._run(self._writer.flush()) - - class SyncQueueReader: """Synchronous reader wrapper""" diff --git a/tests/python/apis/actor/test_actor_behavior.py b/tests/python/apis/actor/test_actor_behavior.py index b9f2ea262..f6b2ea590 100644 --- a/tests/python/apis/actor/test_actor_behavior.py +++ b/tests/python/apis/actor/test_actor_behavior.py @@ -135,7 +135,7 @@ def reset(self): @pytest.mark.asyncio async def test_remote_sync_method(system): """Test @pul.remote class sync method.""" - counter = await Counter.local(system, init=0) + counter = await Counter.spawn(system=system, init=0) result = await counter.incr() assert result == 1 @@ -147,7 +147,7 @@ async def test_remote_sync_method(system): @pytest.mark.asyncio async def test_remote_async_method(system): """Test @pul.remote class async method.""" - counter = await Counter.local(system, init=10) + counter = await Counter.spawn(system=system, init=10) result = await counter.async_incr() assert result == 11 @@ -156,7 +156,7 @@ async def test_remote_async_method(system): @pytest.mark.asyncio async def test_remote_no_return_method(system): """Test @pul.remote class method with no return value.""" - counter = await Counter.local(system, init=100) + counter = await Counter.spawn(system=system, init=100) # reset() has no return value await counter.reset() @@ -169,7 +169,7 @@ async def test_remote_no_return_method(system): @pytest.mark.asyncio async def test_remote_sync_method_sequential(system): """Test that sync methods are processed sequentially.""" - counter = await Counter.local(system, init=0) + counter = await Counter.spawn(system=system, init=0) # Multiple calls should be sequential results = [] @@ -314,7 +314,7 @@ def sync_stream(self, n): @pytest.mark.asyncio async def test_remote_async_generator_stream(system): """Test @pul.remote with async generator for streaming.""" - service = await StreamingService.local(system) + service = await StreamingService.spawn(system=system) chunks = [] async for chunk in service.async_stream(5): @@ -328,7 +328,7 @@ async def test_remote_async_generator_stream(system): @pytest.mark.asyncio async def test_remote_sync_generator_stream(system): """Test @pul.remote with sync generator for streaming.""" - service = await StreamingService.local(system) + service = await StreamingService.spawn(system=system) # For sync generator methods, need to await then iterate result = await service.sync_stream(3) diff --git a/tests/python/apis/actor_system/test_actor_system_api.py b/tests/python/apis/actor_system/test_actor_system_api.py index bfcda1ef4..1c0e477dc 100644 --- a/tests/python/apis/actor_system/test_actor_system_api.py +++ b/tests/python/apis/actor_system/test_actor_system_api.py @@ -225,7 +225,7 @@ def get(self): @pytest.mark.asyncio async def test_remote_decorator_spawn(system): """Test @pul.remote class spawn.""" - counter = await Counter.local(system, init=10) + counter = await Counter.spawn(system=system, init=10) assert counter is not None result = await counter.get() assert result == 10 @@ -234,7 +234,7 @@ async def test_remote_decorator_spawn(system): @pytest.mark.asyncio async def test_remote_decorator_sync_method(system): """Test calling sync method on @pul.remote class.""" - counter = await Counter.local(system, init=0) + counter = await Counter.spawn(system=system, init=0) result = await counter.incr() assert result == 1 result = await counter.incr() @@ -244,7 +244,7 @@ async def test_remote_decorator_sync_method(system): @pytest.mark.asyncio async def test_remote_decorator_async_method(system): """Test calling async method on @pul.remote class.""" - counter = await Counter.local(system, init=5) + counter = await Counter.spawn(system=system, init=5) result = await counter.decr() assert result == 4 diff --git a/tests/python/core/test_helpers.py b/tests/python/core/test_helpers.py index 1855c29ff..68a0778cd 100644 --- a/tests/python/core/test_helpers.py +++ b/tests/python/core/test_helpers.py @@ -61,11 +61,26 @@ async def test_unwrap_response(): # ============================================================================ +class _SingleValueIterator: + """Local test helper: minimal single-value async iterator.""" + + def __init__(self, value): + self._value = value + self._consumed = False + + def __aiter__(self): + return self + + async def __anext__(self): + if self._consumed: + raise StopAsyncIteration + self._consumed = True + return self._value + + @pytest.mark.asyncio async def test_single_value_iterator(): """Test _SingleValueIterator yields one value then stops.""" - from pulsing.core.remote import _SingleValueIterator - it = _SingleValueIterator("single_value") results = [] async for v in it: diff --git a/tests/python/core/test_helpers_coverage.py b/tests/python/core/test_helpers_coverage.py new file mode 100644 index 000000000..2c4b96dde --- /dev/null +++ b/tests/python/core/test_helpers_coverage.py @@ -0,0 +1,177 @@ +"""Tests for core/helpers.py — covers run_until_signal and spawn_and_run with mocking.""" + +import asyncio +import signal +from unittest.mock import AsyncMock, MagicMock, patch + +import pytest + +from pulsing.core.helpers import run_until_signal, spawn_and_run + + +# ============================================================================ +# run_until_signal +# ============================================================================ + + +class TestRunUntilSignal: + @pytest.mark.asyncio + async def test_signal_triggers_shutdown(self): + """Simulate SIGTERM by directly calling the registered handler.""" + captured_handlers = {} + + def mock_add_signal_handler(sig, handler): + captured_handlers[sig] = handler + + mock_system = MagicMock() + mock_system.stop = AsyncMock() + + with ( + patch("pulsing.core.helpers.asyncio.get_running_loop") as mock_loop_fn, + patch("pulsing.core.get_system", return_value=mock_system), + patch("pulsing.core.shutdown", new_callable=AsyncMock) as mock_shutdown, + ): + loop = MagicMock() + loop.add_signal_handler = mock_add_signal_handler + mock_loop_fn.return_value = loop + + async def run_with_trigger(): + task = asyncio.create_task(run_until_signal("test_actor")) + await asyncio.sleep(0.01) + captured_handlers[signal.SIGTERM]() + await task + + await run_with_trigger() + + mock_system.stop.assert_awaited_once_with("test_actor") + mock_shutdown.assert_awaited_once() + + @pytest.mark.asyncio + async def test_signal_without_actor_name(self): + captured_handlers = {} + + def mock_add_signal_handler(sig, handler): + captured_handlers[sig] = handler + + mock_system = MagicMock() + mock_system.stop = AsyncMock() + + with ( + patch("pulsing.core.helpers.asyncio.get_running_loop") as mock_loop_fn, + patch("pulsing.core.get_system", return_value=mock_system), + patch("pulsing.core.shutdown", new_callable=AsyncMock), + ): + loop = MagicMock() + loop.add_signal_handler = mock_add_signal_handler + mock_loop_fn.return_value = loop + + task = asyncio.create_task(run_until_signal(None)) + await asyncio.sleep(0.01) + captured_handlers[signal.SIGINT]() + await task + + mock_system.stop.assert_not_awaited() + + @pytest.mark.asyncio + async def test_stop_error_handled(self): + captured_handlers = {} + + def mock_add_signal_handler(sig, handler): + captured_handlers[sig] = handler + + mock_system = MagicMock() + mock_system.stop = AsyncMock(side_effect=RuntimeError("stop failed")) + + with ( + patch("pulsing.core.helpers.asyncio.get_running_loop") as mock_loop_fn, + patch("pulsing.core.get_system", return_value=mock_system), + patch("pulsing.core.shutdown", new_callable=AsyncMock), + ): + loop = MagicMock() + loop.add_signal_handler = mock_add_signal_handler + mock_loop_fn.return_value = loop + + task = asyncio.create_task(run_until_signal("err_actor")) + await asyncio.sleep(0.01) + captured_handlers[signal.SIGTERM]() + await task # should not raise + + @pytest.mark.asyncio + async def test_shutdown_error_handled(self): + captured_handlers = {} + + def mock_add_signal_handler(sig, handler): + captured_handlers[sig] = handler + + mock_system = MagicMock() + mock_system.stop = AsyncMock() + + with ( + patch("pulsing.core.helpers.asyncio.get_running_loop") as mock_loop_fn, + patch("pulsing.core.get_system", return_value=mock_system), + patch( + "pulsing.core.shutdown", + new_callable=AsyncMock, + side_effect=RuntimeError("shutdown failed"), + ), + ): + loop = MagicMock() + loop.add_signal_handler = mock_add_signal_handler + mock_loop_fn.return_value = loop + + task = asyncio.create_task(run_until_signal("actor")) + await asyncio.sleep(0.01) + captured_handlers[signal.SIGTERM]() + await task # should not raise + + +# ============================================================================ +# spawn_and_run +# ============================================================================ + + +class TestSpawnAndRun: + @pytest.mark.asyncio + async def test_spawn_and_run_calls_init_and_spawn(self): + mock_system = MagicMock() + mock_system.spawn = AsyncMock() + mock_system.addr = "127.0.0.1:8000" + + mock_actor = MagicMock() + + with ( + patch( + "pulsing.core.init", new_callable=AsyncMock, return_value=mock_system + ) as mock_init, + patch( + "pulsing.core.helpers.run_until_signal", new_callable=AsyncMock + ) as mock_signal, + ): + await spawn_and_run( + mock_actor, + name="test", + addr="0.0.0.0:9000", + seeds=["seed:8000"], + public=True, + ) + + mock_init.assert_awaited_once_with(addr="0.0.0.0:9000", seeds=["seed:8000"]) + mock_system.spawn.assert_awaited_once_with( + mock_actor, name="test", public=True + ) + mock_signal.assert_awaited_once_with("test") + + @pytest.mark.asyncio + async def test_spawn_and_run_defaults(self): + mock_system = MagicMock() + mock_system.spawn = AsyncMock() + mock_system.addr = "127.0.0.1:0" + + with ( + patch( + "pulsing.core.init", new_callable=AsyncMock, return_value=mock_system + ), + patch("pulsing.core.helpers.run_until_signal", new_callable=AsyncMock), + ): + await spawn_and_run(MagicMock(), name="default_actor") + mock_system.spawn.assert_awaited_once() diff --git a/tests/python/core/test_init_coverage.py b/tests/python/core/test_init_coverage.py new file mode 100644 index 000000000..fef24c9b6 --- /dev/null +++ b/tests/python/core/test_init_coverage.py @@ -0,0 +1,180 @@ +"""Tests for core/__init__.py — cover uncovered branches. + +Targets: +- get_system() when not initialized +- is_initialized() +- init() with head_node + head_addr conflict +- init() idempotency (double init returns same system) +- shutdown() when no system +- Actor base class +- ask_with_timeout / tell_with_timeout +""" + +import asyncio + +import pytest + +from pulsing.core import ( + Actor, + ActorId, + ActorRef, + ActorSystem, + Message, + StreamMessage, + SystemConfig, + get_system, + init, + is_initialized, + shutdown, +) +from pulsing.exceptions import PulsingRuntimeError + + +# ============================================================================ +# Global system lifecycle +# ============================================================================ + + +class TestGetSystem: + @pytest.mark.asyncio + async def test_get_system_before_init_raises(self): + assert not is_initialized() + with pytest.raises(PulsingRuntimeError, match="not initialized"): + get_system() + + @pytest.mark.asyncio + async def test_is_initialized_false_by_default(self): + assert is_initialized() is False + + +class TestInit: + @pytest.mark.asyncio + async def test_head_node_and_head_addr_conflict(self): + with pytest.raises(ValueError, match="Cannot set both"): + await init(addr="0.0.0.0:9999", is_head_node=True, head_addr="1.2.3.4:8000") + + @pytest.mark.asyncio + async def test_init_and_shutdown(self): + system = await init() + assert is_initialized() is True + assert get_system() is system + await shutdown() + assert is_initialized() is False + + @pytest.mark.asyncio + async def test_double_init_returns_same(self): + system1 = await init() + system2 = await init() + assert system1 is system2 + await shutdown() + + @pytest.mark.asyncio + async def test_shutdown_when_not_initialized(self): + assert not is_initialized() + await shutdown() # should not raise + + +# ============================================================================ +# Actor base class +# ============================================================================ + + +class TestActorBaseClass: + def test_on_start_default_noop(self): + class MyActor(Actor): + async def receive(self, msg): + return msg + + actor = MyActor() + result = actor.on_start(ActorId(1)) + assert result is None + + def test_on_stop_default_noop(self): + class MyActor(Actor): + async def receive(self, msg): + return msg + + actor = MyActor() + result = actor.on_stop() + assert result is None + + def test_metadata_default_empty(self): + class MyActor(Actor): + async def receive(self, msg): + return msg + + actor = MyActor() + assert actor.metadata() == {} + + def test_cannot_instantiate_without_receive(self): + with pytest.raises(TypeError): + + class BadActor(Actor): + pass + + BadActor() + + +# ============================================================================ +# ask_with_timeout / tell_with_timeout +# ============================================================================ + + +class TestTimeoutUtilities: + @pytest.mark.asyncio + async def test_ask_with_timeout_success(self): + from pulsing.core import ask_with_timeout + + system = await init() + try: + system_ref = await system.system() + result = await ask_with_timeout( + system_ref, + Message.from_json("SystemMessage", {"type": "Ping"}), + timeout=5.0, + ) + assert result is not None + finally: + await shutdown() + + @pytest.mark.asyncio + async def test_ask_with_timeout_expired(self): + from pulsing.core import ask_with_timeout + + system = await init() + try: + system_ref = await system.system() + + class NeverRespond: + async def ask(self, msg): + await asyncio.sleep(100) + + with pytest.raises(asyncio.TimeoutError): + await ask_with_timeout(NeverRespond(), "msg", timeout=0.01) + finally: + await shutdown() + + +# ============================================================================ +# Module exports +# ============================================================================ + + +class TestModuleExports: + def test_core_exports(self): + from pulsing.core import ( + ActorClass, + ActorProxy, + PulsingError, + PulsingRuntimeError, + PulsingActorError, + remote, + resolve, + mount, + unmount, + as_any, + ) + + assert ActorClass is not None + assert ActorProxy is not None + assert remote is not None diff --git a/tests/python/core/test_remote_edge_cases.py b/tests/python/core/test_remote_edge_cases.py index 4b4b8e772..14a20de5a 100644 --- a/tests/python/core/test_remote_edge_cases.py +++ b/tests/python/core/test_remote_edge_cases.py @@ -580,5 +580,256 @@ def get_events(self): await shutdown() +# ============================================================================ +# _WrappedActor.receive: attribute access (non-callable) +# ============================================================================ + + +@pytest.mark.asyncio +async def test_attribute_read_via_protocol(): + """Test accessing a non-callable attribute returns its value.""" + + @remote + class AttrActor: + def __init__(self): + self.name = "alice" + self.count = 42 + + def get_name(self): + return self.name + + await init() + try: + actor = await AttrActor.spawn() + proxy = actor.as_any() + result = await proxy.name + assert result == "alice" + result = await proxy.count + assert result == 42 + finally: + await shutdown() + + +# ============================================================================ +# ActorClass.spawn without init raises +# ============================================================================ + + +@pytest.mark.asyncio +async def test_spawn_without_init_raises(): + """Calling spawn before init() should raise PulsingRuntimeError.""" + from pulsing.exceptions import PulsingRuntimeError + + @remote + class NeverSpawned: + def ping(self): + return "pong" + + with pytest.raises(PulsingRuntimeError, match="not initialized"): + await NeverSpawned.spawn() + + +# ============================================================================ +# ActorClass.local with name namespace handling +# ============================================================================ + + +@pytest.mark.asyncio +async def test_local_name_with_namespace(): + """Name already containing / should be used as-is.""" + + @remote + class NsActor: + def ping(self): + return "pong" + + await init() + try: + actor = await NsActor.spawn(name="custom/my_actor") + assert await actor.ping() == "pong" + finally: + await shutdown() + + +@pytest.mark.asyncio +async def test_local_without_name(): + """Spawning without name should auto-generate one.""" + + @remote + class AutoNameActor: + def ping(self): + return "pong" + + await init() + try: + actor = await AutoNameActor.spawn() + assert await actor.ping() == "pong" + finally: + await shutdown() + + +# ============================================================================ +# ActorClass.resolve +# ============================================================================ + + +@pytest.mark.asyncio +async def test_actor_class_resolve(): + """Test ActorClass.resolve returns typed proxy.""" + + @remote + class ResolvableActor: + def greet(self): + return "hello" + + await init() + try: + await ResolvableActor.spawn(name="resolvable_test") + proxy = await ResolvableActor.resolve("resolvable_test") + assert await proxy.greet() == "hello" + finally: + await shutdown() + + +@pytest.mark.asyncio +async def test_actor_class_resolve_without_init(): + """Resolve without init raises RuntimeError.""" + + @remote + class NeverResolved: + def ping(self): + return "pong" + + with pytest.raises(RuntimeError, match="not initialized"): + await NeverResolved.resolve("nonexistent") + + +# ============================================================================ +# ActorClass.proxy wraps existing ref +# ============================================================================ + + +@pytest.mark.asyncio +async def test_actor_class_resolve_typed(): + """Test resolving a named actor into a typed proxy.""" + + @remote + class ProxyTestActor: + def double(self, x): + return x * 2 + + await init() + try: + await ProxyTestActor.spawn(name="proxy_wrap_test", public=True) + typed = await ProxyTestActor.resolve("proxy_wrap_test") + assert await typed.double(7) == 14 + finally: + await shutdown() + + +# ============================================================================ +# Method that raises specific exception types +# ============================================================================ + + +@pytest.mark.asyncio +async def test_method_raises_value_error(): + """Verify that ValueError from actor method is propagated.""" + + @remote + class ValidatingActor: + def validate(self, x): + if x < 0: + raise ValueError("must be non-negative") + return x + + await init() + try: + actor = await ValidatingActor.spawn() + assert await actor.validate(5) == 5 + from pulsing.exceptions import PulsingActorError + + with pytest.raises(PulsingActorError, match="non-negative"): + await actor.validate(-1) + finally: + await shutdown() + + +# ============================================================================ +# Async method with direct await (non-streaming) +# ============================================================================ + + +@pytest.mark.asyncio +async def test_async_method_await_returns_value(): + """Async method awaited directly should return final value.""" + + @remote + class AsyncValueActor: + async def compute(self, x): + await asyncio.sleep(0.01) + return x**2 + + await init() + try: + actor = await AsyncValueActor.spawn() + result = await actor.compute(5) + assert result == 25 + finally: + await shutdown() + + +# ============================================================================ +# Actor with supervision (restart_policy) +# ============================================================================ + + +@pytest.mark.asyncio +async def test_supervised_actor_spawn(): + """Test spawning an actor with supervision parameters.""" + + @remote(restart_policy="on-failure", max_restarts=2) + class SupervisedActor: + def __init__(self): + self.value = 0 + + def incr(self): + self.value += 1 + return self.value + + await init() + try: + actor = await SupervisedActor.spawn() + assert await actor.incr() == 1 + assert await actor.incr() == 2 + finally: + await shutdown() + + +# ============================================================================ +# as_any top-level function +# ============================================================================ + + +@pytest.mark.asyncio +async def test_as_any_function(): + """Test the module-level as_any() function.""" + from pulsing.core import as_any + + @remote + class AsAnyActor: + def greet(self): + return "hi" + + await init() + try: + actor = await AsAnyActor.spawn(name="as_any_test", public=True) + ref = await get_system().resolve("as_any_test") + proxy = as_any(ref) + assert await proxy.greet() == "hi" + finally: + await shutdown() + + if __name__ == "__main__": pytest.main([__file__, "-v"]) diff --git a/tests/python/core/test_remote_system_ops.py b/tests/python/core/test_remote_system_ops.py new file mode 100644 index 000000000..ab44f009e --- /dev/null +++ b/tests/python/core/test_remote_system_ops.py @@ -0,0 +1,331 @@ +"""Tests for remote.py system operation helpers and legacy functions. + +Covers: list_actors, get_metrics, get_node_info, health_check, ping, +resolve, SystemActorProxy, PythonActorServiceProxy, get_system_actor, +get_python_actor_service. +""" + +import asyncio + +import pytest + +from pulsing.core import init, shutdown, get_system + + +# ============================================================================ +# Legacy helper functions (call SystemActor under the hood) +# ============================================================================ + + +@pytest.mark.asyncio +async def test_list_actors(): + from pulsing.core.remote import list_actors + + system = await init() + try: + actors = await list_actors(system) + assert isinstance(actors, list) + finally: + await shutdown() + + +@pytest.mark.asyncio +async def test_get_metrics(): + from pulsing.core.remote import get_metrics + + system = await init() + try: + metrics = await get_metrics(system) + assert isinstance(metrics, dict) + finally: + await shutdown() + + +@pytest.mark.asyncio +async def test_get_node_info(): + from pulsing.core.remote import get_node_info + + system = await init() + try: + info = await get_node_info(system) + assert isinstance(info, dict) + finally: + await shutdown() + + +@pytest.mark.asyncio +async def test_health_check(): + from pulsing.core.remote import health_check + + system = await init() + try: + result = await health_check(system) + assert isinstance(result, dict) + finally: + await shutdown() + + +@pytest.mark.asyncio +async def test_ping(): + from pulsing.core.remote import ping + + system = await init() + try: + result = await ping(system) + assert isinstance(result, dict) + finally: + await shutdown() + + +# ============================================================================ +# SystemActorProxy +# ============================================================================ + + +@pytest.mark.asyncio +async def test_system_actor_proxy_all_methods(): + from pulsing.core.remote import get_system_actor + + system = await init() + try: + proxy = await get_system_actor(system) + assert proxy.ref is not None + + actors = await proxy.list_actors() + assert isinstance(actors, list) + + metrics = await proxy.get_metrics() + assert isinstance(metrics, dict) + + node_info = await proxy.get_node_info() + assert isinstance(node_info, dict) + + health = await proxy.health_check() + assert isinstance(health, dict) + + pong = await proxy.ping() + assert isinstance(pong, dict) + finally: + await shutdown() + + +# ============================================================================ +# PythonActorServiceProxy +# ============================================================================ + + +@pytest.mark.asyncio +async def test_python_actor_service_proxy_list_registry(): + from pulsing.core.remote import get_python_actor_service, remote + + @remote + class RegisteredActor: + def hello(self): + return "hi" + + system = await init() + try: + service = await get_python_actor_service(system) + assert service.ref is not None + + classes = await service.list_registry() + assert isinstance(classes, list) + assert any("RegisteredActor" in c for c in classes) + finally: + await shutdown() + + +@pytest.mark.asyncio +async def test_python_actor_service_proxy_create_actor(): + from pulsing.core.remote import get_python_actor_service, remote + + @remote + class CreatableActor: + def __init__(self, val=0): + self.val = val + + def get_val(self): + return self.val + + system = await init() + try: + service = await get_python_actor_service(system) + class_name = f"{CreatableActor._cls.__module__}.{CreatableActor._cls.__name__}" + result = await service.create_actor(class_name, name="created_test", val=42) + assert "actor_id" in result + assert "node_id" in result + finally: + await shutdown() + + +@pytest.mark.asyncio +async def test_python_actor_service_proxy_create_unknown_class(): + from pulsing.core.remote import get_python_actor_service + from pulsing.exceptions import PulsingRuntimeError + + system = await init() + try: + service = await get_python_actor_service(system) + with pytest.raises(PulsingRuntimeError): + await service.create_actor( + "nonexistent.module.FakeClass", name="should_fail" + ) + finally: + await shutdown() + + +# ============================================================================ +# resolve() function +# ============================================================================ + + +@pytest.mark.asyncio +async def test_resolve_function(): + from pulsing.core import remote + from pulsing.core.remote import resolve + + @remote + class ResolveTarget: + def echo(self, msg): + return msg + + system = await init() + try: + await ResolveTarget.spawn(name="resolve_target_test", public=True) + ref = await resolve("resolve_target_test") + assert ref is not None + finally: + await shutdown() + + +@pytest.mark.asyncio +async def test_resolve_without_init(): + from pulsing.core.remote import resolve + + with pytest.raises(RuntimeError, match="not initialized"): + await resolve("anything") + + +# ============================================================================ +# _WrappedActor async on_start / on_stop +# ============================================================================ + + +@pytest.mark.asyncio +async def test_async_on_start(): + """Test that async on_start is properly handled.""" + from pulsing.core import remote + + on_start_called = [] + + @remote + class AsyncOnStartActor: + async def on_start(self, actor_id): + on_start_called.append(str(actor_id)) + + def ping(self): + return "pong" + + system = await init() + try: + actor = await AsyncOnStartActor.spawn() + assert await actor.ping() == "pong" + await asyncio.sleep(0.05) + assert len(on_start_called) >= 1 + finally: + await shutdown() + + +@pytest.mark.asyncio +async def test_async_on_stop(): + """Test that async on_stop is properly handled.""" + from pulsing.core import remote + + on_stop_called = [] + + @remote + class AsyncOnStopActor: + async def on_stop(self): + on_stop_called.append("stopped") + + def ping(self): + return "pong" + + system = await init() + try: + actor = await AsyncOnStopActor.spawn(name="async_stop_test") + assert await actor.ping() == "pong" + await get_system().stop("async_stop_test") + await asyncio.sleep(0.1) + assert "stopped" in on_stop_called + finally: + await shutdown() + + +# ============================================================================ +# _WrappedActor receive with invalid/private method via raw ask +# ============================================================================ + + +@pytest.mark.asyncio +async def test_receive_empty_method_name(): + """Empty method name in call should return error response.""" + from pulsing.core import remote + from pulsing.core.remote import _wrap_call + + @remote + class RawActor: + def ping(self): + return "pong" + + system = await init() + try: + actor = await RawActor.spawn() + msg = _wrap_call("", (), {}, False) + resp = await actor.ref.ask(msg) + assert isinstance(resp, dict) + # Should contain error about invalid method + finally: + await shutdown() + + +@pytest.mark.asyncio +async def test_receive_private_method_via_raw(): + """Private method call via raw ask should return error.""" + from pulsing.core import remote + from pulsing.core.remote import _wrap_call + + @remote + class RawActor2: + def ping(self): + return "pong" + + system = await init() + try: + actor = await RawActor2.spawn() + msg = _wrap_call("_secret", (), {}, False) + resp = await actor.ref.ask(msg) + assert isinstance(resp, dict) + finally: + await shutdown() + + +@pytest.mark.asyncio +async def test_receive_nonexistent_method_via_raw(): + """Nonexistent method call via raw ask should return error.""" + from pulsing.core import remote + from pulsing.core.remote import _wrap_call + + @remote + class RawActor3: + def ping(self): + return "pong" + + system = await init() + try: + actor = await RawActor3.spawn() + msg = _wrap_call("does_not_exist", (), {}, False) + resp = await actor.ref.ask(msg) + assert isinstance(resp, dict) + finally: + await shutdown() diff --git a/tests/python/core/test_remote_unit.py b/tests/python/core/test_remote_unit.py new file mode 100644 index 000000000..0359507b4 --- /dev/null +++ b/tests/python/core/test_remote_unit.py @@ -0,0 +1,509 @@ +"""Pure unit tests for core/remote.py — no ActorSystem required. + +Covers wire format helpers, SingleValueIterator, _extract_methods, +_register_actor_metadata, ActorProxy attribute validation, and +_DelayedCallProxy edge cases. +""" + +import asyncio + +import pytest + +from pulsing.core.remote import ( + _PULSING_WIRE_VERSION, + _extract_methods, + _register_actor_metadata, + _unwrap_call, + _unwrap_response, + _wrap_call, + _wrap_response, + get_actor_metadata, + ActorClass, + ActorProxy, + _actor_metadata_registry, +) + + +class _SingleValueIterator: + """Local test helper: minimal single-value async iterator.""" + + def __init__(self, value): + self._value = value + self._consumed = False + + def __aiter__(self): + return self + + async def __anext__(self): + if self._consumed: + raise StopAsyncIteration + self._consumed = True + return self._value + + +# ============================================================================ +# _wrap_call / _unwrap_call +# ============================================================================ + + +class TestWrapCall: + def test_basic(self): + msg = _wrap_call("greet", ("hello",), {"lang": "en"}, False) + assert msg["__pulsing_proto__"] == _PULSING_WIRE_VERSION + assert msg["__pulsing__"]["call"] == "greet" + assert msg["__pulsing__"]["async"] is False + assert msg["user_data"]["args"] == ("hello",) + assert msg["user_data"]["kwargs"] == {"lang": "en"} + + def test_async_flag(self): + msg = _wrap_call("stream", (), {}, True) + assert msg["__pulsing__"]["async"] is True + + def test_empty_args(self): + msg = _wrap_call("no_args", (), {}, False) + assert msg["user_data"]["args"] == () + assert msg["user_data"]["kwargs"] == {} + + +class TestUnwrapCall: + def test_roundtrip(self): + msg = _wrap_call("method", (1, 2, 3), {"key": "val"}, True) + method, args, kwargs, is_async = _unwrap_call(msg) + assert method == "method" + assert args == (1, 2, 3) + assert kwargs == {"key": "val"} + assert is_async is True + + def test_missing_fields(self): + method, args, kwargs, is_async = _unwrap_call({}) + assert method == "" + assert args == () + assert kwargs == {} + assert is_async is False + + def test_partial_message(self): + msg = {"__pulsing__": {"call": "foo"}, "user_data": {}} + method, args, kwargs, is_async = _unwrap_call(msg) + assert method == "foo" + assert args == () + assert kwargs == {} + assert is_async is False + + +# ============================================================================ +# _wrap_response / _unwrap_response +# ============================================================================ + + +class TestWrapResponse: + def test_success(self): + resp = _wrap_response(result=42) + assert resp["__pulsing__"]["result"] == 42 + assert resp["__pulsing_proto__"] == _PULSING_WIRE_VERSION + + def test_error(self): + resp = _wrap_response(error="something broke") + assert resp["__pulsing__"]["error"] == "something broke" + + def test_none_result(self): + resp = _wrap_response(result=None) + assert resp["__pulsing__"]["result"] is None + + +class TestUnwrapResponse: + def test_wire_format_result(self): + resp = _wrap_response(result="ok") + result, error = _unwrap_response(resp) + assert result == "ok" + assert error is None + + def test_wire_format_error(self): + resp = _wrap_response(error="fail") + result, error = _unwrap_response(resp) + assert result is None + assert error == "fail" + + def test_message_json_result(self): + result, error = _unwrap_response({"result": "top"}) + assert result == "top" + assert error is None + + def test_message_json_error(self): + result, error = _unwrap_response({"error": "top_err"}) + assert result is None + assert error == "top_err" + + def test_empty_dict(self): + result, error = _unwrap_response({}) + assert result is None + assert error is None + + def test_wire_takes_precedence_over_message_json(self): + resp = {"__pulsing__": {"error": "wire"}, "result": "message_json"} + result, error = _unwrap_response(resp) + assert error == "wire" + assert result is None + + def test_non_dict_pulsing_field_falls_back_to_message_json(self): + resp = {"__pulsing__": "not a dict", "result": "fallback"} + result, error = _unwrap_response(resp) + assert result == "fallback" + + def test_stream_frame_final(self): + # Stream frames now use __pulsing__ namespace + frame = {"__pulsing__": {"final": True, "result": 42}} + result, error = _unwrap_response(frame) + assert result == 42 + assert error is None + + def test_stream_frame_error(self): + frame = {"__pulsing__": {"error": "stream failed"}} + result, error = _unwrap_response(frame) + assert result is None + assert error == "stream failed" + + +# ============================================================================ +# _SingleValueIterator +# ============================================================================ + + +class TestSingleValueIterator: + @pytest.mark.asyncio + async def test_yields_one_value(self): + it = _SingleValueIterator(42) + assert await it.__anext__() == 42 + with pytest.raises(StopAsyncIteration): + await it.__anext__() + + @pytest.mark.asyncio + async def test_aiter_protocol(self): + it = _SingleValueIterator("hello") + assert it.__aiter__() is it + items = [] + async for item in it: + items.append(item) + assert items == ["hello"] + + @pytest.mark.asyncio + async def test_none_value(self): + it = _SingleValueIterator(None) + assert await it.__anext__() is None + with pytest.raises(StopAsyncIteration): + await it.__anext__() + + @pytest.mark.asyncio + async def test_dict_value(self): + val = {"key": "val"} + it = _SingleValueIterator(val) + result = await it.__anext__() + assert result == val + + +# ============================================================================ +# _extract_methods +# ============================================================================ + + +class TestExtractMethods: + def test_basic_class(self): + class MyClass: + def public_a(self): + pass + + async def public_b(self): + pass + + def _private(self): + pass + + methods, async_methods = _extract_methods(MyClass) + assert "public_a" in methods + assert "public_b" in methods + assert "_private" not in methods + assert "public_b" in async_methods + assert "public_a" not in async_methods + + def test_async_generator(self): + class GenClass: + async def stream(self): + yield 1 + + methods, async_methods = _extract_methods(GenClass) + assert "stream" in methods + assert "stream" in async_methods + + def test_actor_class_unwrap(self): + class Inner: + def method_x(self): + pass + + ac = ActorClass(Inner) + methods, async_methods = _extract_methods(ac) + assert "method_x" in methods + + def test_empty_class(self): + class Empty: + pass + + methods, async_methods = _extract_methods(Empty) + assert methods == [] + assert async_methods == set() + + +# ============================================================================ +# _register_actor_metadata / get_actor_metadata +# ============================================================================ + + +class TestActorMetadataRegistry: + def test_register_and_get(self): + class FakeActor: + pass + + _register_actor_metadata("test/fake", FakeActor) + meta = get_actor_metadata("test/fake") + assert meta is not None + assert "python_class" in meta + assert "FakeActor" in meta["python_class"] + + def test_get_nonexistent(self): + assert get_actor_metadata("nonexistent/actor") is None + + +# ============================================================================ +# ActorProxy (without ActorSystem — attribute validation only) +# ============================================================================ + + +class TestActorProxyAttributes: + def _make_proxy(self, methods=None, async_methods=None): + class FakeRef: + class actor_id: + id = 12345 + + return ActorProxy(FakeRef(), methods, async_methods) + + def test_private_attr_raises(self): + proxy = self._make_proxy(["foo"]) + with pytest.raises(AttributeError, match="private"): + _ = proxy._internal + + def test_unknown_method_raises(self): + proxy = self._make_proxy(["foo", "bar"]) + with pytest.raises(AttributeError, match="No method"): + _ = proxy.nonexistent + + def test_valid_method_returns_caller(self): + proxy = self._make_proxy(["greet"], {"greet"}) + caller = proxy.greet + assert caller is not None + + def test_any_proxy_allows_all(self): + proxy = self._make_proxy(None, None) + caller = proxy.any_method_name + assert caller is not None + + def test_as_any(self): + proxy = self._make_proxy(["foo"], {"foo"}) + any_proxy = proxy.as_any() + assert any_proxy._method_names is None + assert any_proxy._async_methods is None + + def test_ref_property(self): + class FakeRef: + class actor_id: + id = 1 + + ref = FakeRef() + proxy = ActorProxy(ref) + assert proxy.ref is ref + + def test_from_ref(self): + class FakeRef: + class actor_id: + id = 1 + + ref = FakeRef() + proxy = ActorProxy.from_ref(ref, methods=["a", "b"], async_methods={"b"}) + assert "a" in proxy._method_names + assert "b" in proxy._async_methods + + +# ============================================================================ +# ActorClass (class-level, no system) +# ============================================================================ + + +class TestActorClassUnit: + def test_direct_call_returns_instance(self): + class Counter: + def __init__(self, init=0): + self.value = init + + ac = ActorClass(Counter) + instance = ac(init=5) + assert instance.value == 5 + assert isinstance(instance, Counter) + + def test_methods_collected(self): + class Svc: + def method_a(self): + pass + + async def method_b(self): + pass + + def _private(self): + pass + + ac = ActorClass(Svc) + assert "method_a" in ac._methods + assert "method_b" in ac._methods + assert "_private" not in ac._methods + assert "method_b" in ac._async_methods + + def test_registered_in_registry(self): + from pulsing.core.remote import _actor_class_registry + + class UniqueTestCls: + def ping(self): + return "pong" + + ac = ActorClass(UniqueTestCls) + key = f"{UniqueTestCls.__module__}.{UniqueTestCls.__name__}" + assert key in _actor_class_registry + assert _actor_class_registry[key] is UniqueTestCls + + def test_supervision_params(self): + class Supervised: + pass + + ac = ActorClass( + Supervised, + restart_policy="on-failure", + max_restarts=10, + min_backoff=0.5, + max_backoff=60.0, + ) + assert ac._restart_policy == "on-failure" + assert ac._max_restarts == 10 + assert ac._min_backoff == 0.5 + assert ac._max_backoff == 60.0 + + +# ============================================================================ +# remote() decorator +# ============================================================================ + + +class TestRemoteDecorator: + def test_plain_decorator(self): + from pulsing.core.remote import remote + + @remote + class MyActor: + def hello(self): + return "hi" + + assert isinstance(MyActor, ActorClass) + assert "hello" in MyActor._methods + + def test_decorator_with_params(self): + from pulsing.core.remote import remote + + @remote(restart_policy="always", max_restarts=5) + class MyActor2: + def hello(self): + return "hi" + + assert isinstance(MyActor2, ActorClass) + assert MyActor2._restart_policy == "always" + assert MyActor2._max_restarts == 5 + + def test_decorator_preserves_class_name(self): + from pulsing.core.remote import remote + + @remote + class SpecificName: + pass + + assert "SpecificName" in SpecificName._class_name + + +# ============================================================================ +# _consume_task_exception +# ============================================================================ + + +class TestConsumeTaskException: + @pytest.mark.asyncio + async def test_cancelled_task(self): + from pulsing.core.remote import _consume_task_exception + + async def cancel_me(): + await asyncio.sleep(100) + + task = asyncio.create_task(cancel_me()) + task.cancel() + try: + await task + except asyncio.CancelledError: + pass + _consume_task_exception(task) + + @pytest.mark.asyncio + async def test_stream_closed_error(self): + from pulsing.core.remote import _consume_task_exception + + async def stream_closed(): + raise RuntimeError("stream closed by peer") + + task = asyncio.create_task(stream_closed()) + try: + await task + except RuntimeError: + pass + _consume_task_exception(task) + + @pytest.mark.asyncio + async def test_generic_exception(self): + from pulsing.core.remote import _consume_task_exception + + async def fail(): + raise ValueError("unexpected") + + task = asyncio.create_task(fail()) + try: + await task + except ValueError: + pass + _consume_task_exception(task) + + @pytest.mark.asyncio + async def test_connection_error(self): + from pulsing.core.remote import _consume_task_exception + + async def conn_err(): + raise ConnectionError("connection closed unexpectedly") + + task = asyncio.create_task(conn_err()) + try: + await task + except ConnectionError: + pass + _consume_task_exception(task) + + @pytest.mark.asyncio + async def test_os_error_non_stream(self): + from pulsing.core.remote import _consume_task_exception + + async def os_err(): + raise OSError("disk full") + + task = asyncio.create_task(os_err()) + try: + await task + except OSError: + pass + _consume_task_exception(task) diff --git a/tests/python/streaming/test_queue.py b/tests/python/streaming/test_queue.py index ae9a0a624..22b70a151 100644 --- a/tests/python/streaming/test_queue.py +++ b/tests/python/streaming/test_queue.py @@ -28,7 +28,6 @@ BucketStorage, Queue, QueueReader, - QueueWriter, read_queue, write_queue, ) @@ -282,7 +281,7 @@ async def test_write_queue_api(actor_system, temp_storage_path): storage_path=temp_storage_path, ) - assert isinstance(writer, QueueWriter) + assert isinstance(writer, Queue) # Write data result = await writer.put({"id": "test", "value": 1}) @@ -482,9 +481,8 @@ async def test_explicit_bucket_ids(actor_system, temp_storage_path): records = await reader.get(limit=100) # All records should be from bucket 0 - q = writer.queue for record in records: - bucket_id = q.get_bucket_id(record["id"]) + bucket_id = writer.get_bucket_id(record["id"]) assert bucket_id == 0 @@ -818,7 +816,7 @@ async def test_many_buckets(actor_system, temp_storage_path): await writer.flush() # Get stats - stats = await writer.queue.stats() + stats = await writer.stats() # Count non-empty buckets non_empty = sum(1 for b in stats["buckets"].values() if b.get("total_count", 0) > 0) @@ -947,13 +945,12 @@ async def test_data_integrity_under_stress(actor_system, temp_storage_path): @pytest.mark.asyncio async def test_bucket_storage_direct(actor_system, temp_storage_path): """Test BucketStorage actor directly with memory backend via proxy.""" - # Use BucketStorage.local() to create properly wrapped actor with proxy - bucket = await BucketStorage.local( - actor_system, + bucket = await BucketStorage.spawn( bucket_id=0, storage_path=f"{temp_storage_path}/direct_bucket", batch_size=5, backend="memory", + system=actor_system, name="test_bucket", ) @@ -980,12 +977,12 @@ async def test_bucket_storage_direct(actor_system, temp_storage_path): @pytest.mark.asyncio async def test_bucket_storage_get(actor_system, temp_storage_path): """Test BucketStorage get method via proxy.""" - bucket = await BucketStorage.local( - actor_system, + bucket = await BucketStorage.spawn( bucket_id=0, storage_path=f"{temp_storage_path}/get_bucket", batch_size=5, backend="memory", + system=actor_system, name="test_bucket_get", ) @@ -1005,12 +1002,12 @@ async def test_bucket_storage_get(actor_system, temp_storage_path): @pytest.mark.asyncio async def test_bucket_storage_put_batch(actor_system, temp_storage_path): """Test BucketStorage put_batch method via proxy.""" - bucket = await BucketStorage.local( - actor_system, + bucket = await BucketStorage.spawn( bucket_id=0, storage_path=f"{temp_storage_path}/batch_bucket", batch_size=100, backend="memory", + system=actor_system, name="test_bucket_batch", ) @@ -1104,7 +1101,7 @@ async def cleanup(): def test_sync_writer_reader_standalone(): - """Test SyncQueueWriter and SyncQueueReader.""" + """Test SyncQueue (write) and SyncQueueReader.""" import tempfile import shutil import threading diff --git a/tests/python/streaming/test_queue_backends.py b/tests/python/streaming/test_queue_backends.py index 1f87310b2..dc75d263d 100644 --- a/tests/python/streaming/test_queue_backends.py +++ b/tests/python/streaming/test_queue_backends.py @@ -250,13 +250,12 @@ async def test_bucket_storage_with_memory_backend( self, actor_system, temp_storage_path ): """Test BucketStorage with memory backend via proxy.""" - # Use BucketStorage.local() for proper @remote wrapping - bucket = await BucketStorage.local( - actor_system, + bucket = await BucketStorage.spawn( bucket_id=0, storage_path=f"{temp_storage_path}/bucket_memory", batch_size=10, backend="memory", + system=actor_system, name="bucket_memory_test", ) @@ -275,13 +274,12 @@ async def test_bucket_storage_with_memory_backend( @pytest.mark.asyncio async def test_bucket_storage_put_batch(self, actor_system, temp_storage_path): """Test BucketStorage put_batch method via proxy.""" - # Use BucketStorage.local() for proper @remote wrapping - bucket = await BucketStorage.local( - actor_system, + bucket = await BucketStorage.spawn( bucket_id=0, storage_path=f"{temp_storage_path}/bucket_batch", batch_size=100, backend="memory", + system=actor_system, name="bucket_batch_test", ) @@ -324,7 +322,7 @@ async def test_write_queue_with_memory_backend( assert result["status"] == "ok" # Check stats - stats = await writer.queue.stats() + stats = await writer.stats() total = sum(b.get("total_count", 0) for b in stats["buckets"].values()) assert total == 10 @@ -377,7 +375,7 @@ async def test_queue_with_registered_backend(self, actor_system, temp_storage_pa result = await writer.put({"id": f"test_{i}", "value": i}) assert result["status"] == "ok" - stats = await writer.queue.stats() + stats = await writer.stats() assert any(b.get("backend") == "memory" for b in stats["buckets"].values()) @pytest.mark.asyncio @@ -394,7 +392,7 @@ async def test_default_backend_is_memory(self, actor_system, temp_storage_path): await writer.put({"id": "test", "value": 1}) - stats = await writer.queue.stats() + stats = await writer.stats() assert any(b.get("backend") == "memory" for b in stats["buckets"].values()) @@ -538,13 +536,12 @@ def total_count(self) -> int: # Register and use register_backend("tracking", TrackingBackend) - # Use BucketStorage.local() for proper @remote wrapping - bucket = await BucketStorage.local( - actor_system, + bucket = await BucketStorage.spawn( bucket_id=0, storage_path=f"{temp_storage_path}/tracking_test", batch_size=100, backend="tracking", + system=actor_system, name="tracking_bucket", ) @@ -652,7 +649,7 @@ async def write_batch(writer_id: int): await asyncio.gather(*tasks) # Verify all written - stats = await writer.queue.stats() + stats = await writer.stats() total = sum(b.get("total_count", 0) for b in stats["buckets"].values()) assert total == num_writers * records_per_writer diff --git a/tests/python/test_actor_list.py b/tests/python/test_actor_list.py index b8ebbca63..f2980d3b1 100644 --- a/tests/python/test_actor_list.py +++ b/tests/python/test_actor_list.py @@ -28,9 +28,9 @@ async def test_actor_list_basic(): system = get_system() # Create some actors locally (list_actors only returns local actors) - await TestCounter.local(system, name="counter-1") - await TestCounter.local(system, name="counter-2") - await TestCalculator.local(system, name="calc") + await TestCounter.spawn(system=system, name="counter-1") + await TestCounter.spawn(system=system, name="counter-2") + await TestCalculator.spawn(system=system, name="calc") # Wait a bit for actors to be registered in the system await asyncio.sleep(0.2) @@ -101,7 +101,7 @@ async def test_actor_list_all(): system = get_system() # Create one user actor locally (list_actors only returns local actors) - await TestCounter.local(system, name="test-counter") + await TestCounter.spawn(system=system, name="test-counter") # Wait a bit for actors to be registered in the system await asyncio.sleep(0.2) @@ -160,7 +160,7 @@ async def test_actor_list_json(): system = get_system() # Create actor locally (list_actors only returns local actors) - await TestCounter.local(system, name="json-test") + await TestCounter.spawn(system=system, name="json-test") # Wait a bit for actors to be registered in the system await asyncio.sleep(0.2) diff --git a/tests/python/test_exceptions.py b/tests/python/test_exceptions.py new file mode 100644 index 000000000..88c2af431 --- /dev/null +++ b/tests/python/test_exceptions.py @@ -0,0 +1,192 @@ +"""Tests for pulsing.exceptions — cover all exception classes, constructors, and attributes.""" + +import pytest + +from pulsing.exceptions import ( + PulsingActorError, + PulsingBusinessError, + PulsingError, + PulsingRuntimeError, + PulsingSystemError, + PulsingTimeoutError, + PulsingUnsupportedError, +) + + +class TestPulsingError: + def test_base_exception(self): + err = PulsingError("base error") + assert str(err) == "base error" + assert isinstance(err, Exception) + + def test_is_catchable_as_exception(self): + with pytest.raises(Exception): + raise PulsingError("catch me") + + +class TestPulsingRuntimeError: + def test_basic(self): + err = PulsingRuntimeError("system down") + assert str(err) == "system down" + assert err.cause is None + assert isinstance(err, PulsingError) + + def test_with_cause(self): + cause = ConnectionError("refused") + err = PulsingRuntimeError("transport failed", cause=cause) + assert str(err) == "transport failed" + assert err.cause is cause + + def test_cause_none_explicit(self): + err = PulsingRuntimeError("msg", cause=None) + assert err.cause is None + + +class TestPulsingActorError: + def test_basic(self): + err = PulsingActorError("actor failed") + assert str(err) == "actor failed" + assert err.actor_name is None + assert err.cause is None + assert isinstance(err, PulsingError) + + def test_with_actor_name(self): + err = PulsingActorError("fail", actor_name="my_actor") + assert err.actor_name == "my_actor" + + def test_with_cause(self): + cause = ValueError("bad value") + err = PulsingActorError("fail", cause=cause) + assert err.cause is cause + + def test_with_all_params(self): + cause = RuntimeError("inner") + err = PulsingActorError("fail", actor_name="worker/1", cause=cause) + assert str(err) == "fail" + assert err.actor_name == "worker/1" + assert err.cause is cause + + +class TestPulsingBusinessError: + def test_basic(self): + err = PulsingBusinessError(400, "Bad Request") + assert str(err) == "[400] Bad Request" + assert err.code == 400 + assert err.message == "Bad Request" + assert err.details is None + assert isinstance(err, PulsingActorError) + + def test_with_details(self): + err = PulsingBusinessError( + 422, "Validation failed", details="age must be >= 18" + ) + assert err.code == 422 + assert err.message == "Validation failed" + assert err.details == "age must be >= 18" + assert str(err) == "[422] Validation failed" + + def test_inherits_actor_error(self): + err = PulsingBusinessError(500, "Server error") + assert isinstance(err, PulsingActorError) + assert isinstance(err, PulsingError) + assert err.cause is None + + def test_zero_code(self): + err = PulsingBusinessError(0, "Unknown") + assert err.code == 0 + assert str(err) == "[0] Unknown" + + +class TestPulsingSystemError: + def test_basic(self): + err = PulsingSystemError("OOM") + assert str(err) == "OOM" + assert err.error == "OOM" + assert err.recoverable is True + assert isinstance(err, PulsingActorError) + + def test_not_recoverable(self): + err = PulsingSystemError("fatal crash", recoverable=False) + assert err.recoverable is False + assert err.error == "fatal crash" + + def test_explicitly_recoverable(self): + err = PulsingSystemError("transient", recoverable=True) + assert err.recoverable is True + + +class TestPulsingTimeoutError: + def test_basic(self): + err = PulsingTimeoutError("fetch") + assert err.operation == "fetch" + assert err.duration_ms == 0 + assert "fetch" in str(err) + assert "0ms" in str(err) + assert isinstance(err, PulsingActorError) + + def test_with_duration(self): + err = PulsingTimeoutError("db_query", duration_ms=5000) + assert err.operation == "db_query" + assert err.duration_ms == 5000 + assert str(err) == "Operation 'db_query' timed out after 5000ms" + + def test_zero_duration(self): + err = PulsingTimeoutError("op", duration_ms=0) + assert err.duration_ms == 0 + + +class TestPulsingUnsupportedError: + def test_basic(self): + err = PulsingUnsupportedError("legacy_rpc") + assert err.operation == "legacy_rpc" + assert str(err) == "Unsupported operation: legacy_rpc" + assert isinstance(err, PulsingActorError) + + def test_inherits_full_chain(self): + err = PulsingUnsupportedError("op") + assert isinstance(err, PulsingActorError) + assert isinstance(err, PulsingError) + assert isinstance(err, Exception) + assert err.cause is None + + +class TestExceptionHierarchy: + """Verify the full inheritance chain for catch-all patterns.""" + + def test_catch_all_pulsing_error(self): + exceptions = [ + PulsingRuntimeError("rt"), + PulsingActorError("actor"), + PulsingBusinessError(400, "biz"), + PulsingSystemError("sys"), + PulsingTimeoutError("op"), + PulsingUnsupportedError("op"), + ] + for exc in exceptions: + assert isinstance( + exc, PulsingError + ), f"{type(exc).__name__} not PulsingError" + + def test_catch_actor_errors_only(self): + actor_errors = [ + PulsingActorError("a"), + PulsingBusinessError(400, "b"), + PulsingSystemError("s"), + PulsingTimeoutError("t"), + PulsingUnsupportedError("u"), + ] + for exc in actor_errors: + assert isinstance(exc, PulsingActorError) + + runtime_err = PulsingRuntimeError("rt") + assert not isinstance(runtime_err, PulsingActorError) + + def test_raise_and_catch_business(self): + with pytest.raises(PulsingBusinessError) as exc_info: + raise PulsingBusinessError(403, "Forbidden", details="not allowed") + assert exc_info.value.code == 403 + assert exc_info.value.details == "not allowed" + + def test_raise_and_catch_as_parent(self): + with pytest.raises(PulsingError): + raise PulsingTimeoutError("slow_op", duration_ms=10000) diff --git a/tests/python/test_resolve_as_any.py b/tests/python/test_resolve_as_any.py index 2a408887a..e90077c20 100644 --- a/tests/python/test_resolve_as_any.py +++ b/tests/python/test_resolve_as_any.py @@ -14,6 +14,8 @@ import pytest +from pulsing.exceptions import PulsingRuntimeError + import pulsing as pul from pulsing.core import Actor, ActorRef, as_any, remote @@ -306,6 +308,6 @@ async def test_counter_resolve_with_timeout(initialized_pul): @pytest.mark.asyncio async def test_counter_resolve_timeout_not_found(initialized_pul): - """Counter.resolve(name, timeout=...) raises after timeout if not found.""" - with pytest.raises(RuntimeError): + """Counter.resolve(name, timeout=...) raises PulsingRuntimeError if not found.""" + with pytest.raises(PulsingRuntimeError): await _ServiceWithMethods.resolve("nonexistent_actor", timeout=0.3) diff --git a/tests/python/test_sealed_message.py b/tests/python/test_sealed_message.py index aa1206ee0..5b7c66c09 100644 --- a/tests/python/test_sealed_message.py +++ b/tests/python/test_sealed_message.py @@ -1,9 +1,7 @@ -""" -Tests for SealedPyMessage - Python object serialization for Python-to-Python actor communication. +"""Tests for Python object serialization in Python-to-Python actor communication. Covers: -- SealedPyMessage seal/unseal functionality -- ask/tell with arbitrary Python objects +- ask/tell with arbitrary Python objects (pickle 由运行时内部处理) - receive returning arbitrary Python objects - Python-to-Python actor communication with isinstance-based dispatch - Backward compatibility with Message.from_json @@ -17,7 +15,6 @@ from pulsing.core import ( Actor, Message, - SealedPyMessage, ZeroCopyDescriptor, ) import pulsing as pul @@ -206,76 +203,6 @@ async def actor_system(): await system.shutdown() -# ============================================================================ -# SealedPyMessage Unit Tests -# ============================================================================ - - -def test_sealed_message_seal_unseal_dict(): - """Test sealing and unsealing a dict.""" - original = {"key": "value", "number": 42, "nested": {"a": 1}} - sealed = SealedPyMessage.seal(original) - - assert sealed is not None - assert len(sealed.data) > 0 - - unsealed = sealed.unseal() - assert unsealed == original - - -def test_sealed_message_seal_unseal_dataclass(): - """Test sealing and unsealing a dataclass.""" - original = IncrementCommand(n=10) - sealed = SealedPyMessage.seal(original) - - unsealed = sealed.unseal() - assert unsealed == original - assert isinstance(unsealed, IncrementCommand) - assert unsealed.n == 10 - - -def test_sealed_message_seal_unseal_list(): - """Test sealing and unsealing a list.""" - original = [1, 2, 3, "hello", {"key": "value"}] - sealed = SealedPyMessage.seal(original) - - unsealed = sealed.unseal() - assert unsealed == original - - -def test_sealed_message_seal_unseal_tuple(): - """Test sealing and unsealing a tuple.""" - original = (1, "two", 3.0) - sealed = SealedPyMessage.seal(original) - - unsealed = sealed.unseal() - assert unsealed == original - - -def test_sealed_message_seal_unseal_set(): - """Test sealing and unsealing a set.""" - original = {1, 2, 3, 4, 5} - sealed = SealedPyMessage.seal(original) - - unsealed = sealed.unseal() - assert unsealed == original - - -def test_sealed_message_seal_unseal_none(): - """Test sealing and unsealing None.""" - sealed = SealedPyMessage.seal(None) - unsealed = sealed.unseal() - assert unsealed is None - - -def test_sealed_message_repr(): - """Test SealedPyMessage repr.""" - sealed = SealedPyMessage.seal({"test": "data"}) - repr_str = repr(sealed) - assert "SealedPyMessage" in repr_str - assert "data_len=" in repr_str - - # ============================================================================ # Actor Communication Tests - Dataclass Messages # ============================================================================ diff --git a/tests/python/test_system_actor.py b/tests/python/test_system_actor.py index 92a1e2860..0bf71a4a3 100644 --- a/tests/python/test_system_actor.py +++ b/tests/python/test_system_actor.py @@ -231,7 +231,7 @@ def get_value(self): @pytest.mark.asyncio async def test_remote_local_creation(system): """@remote should allow local actor creation.""" - counter = await TestCounter.local(system, init_value=10) + counter = await TestCounter.spawn(system=system, init_value=10) # Should be able to call methods result = await counter.increment(5) From aacac22f61d2ac6deb7565695d0bf9afff458168 Mon Sep 17 00:00:00 2001 From: Reiase Date: Sun, 1 Mar 2026 01:51:57 +0800 Subject: [PATCH 2/5] Refactor Pulsing framework for improved actor management and API clarity - Removed unused imports and refactored actor service references in `__init__.py` for better readability. - Consolidated actor service registration in `actor_system` to enhance clarity and maintainability. - Streamlined the `Remote` API in `router.py` by introducing a helper method for chunk building, reducing code duplication. - Updated `RustSchedulerBase` and its subclasses to improve worker selection logic and enhance code organization. - Refactored `StorageManager` to simplify bucket and topic management, improving locking mechanisms and resource handling. - Enhanced `SyncQueue` and `SyncQueueReader` to utilize a common synchronous execution method, improving code consistency. - Updated tests to reflect changes in actor proxy handling and ensure robust functionality across various scenarios. --- python/pulsing/__init__.py | 12 +- python/pulsing/core/__init__.py | 131 +------- python/pulsing/core/remote.py | 334 +++++--------------- python/pulsing/serving/router.py | 101 +++--- python/pulsing/serving/scheduler.py | 194 ++---------- python/pulsing/streaming/__init__.py | 85 +---- python/pulsing/streaming/manager.py | 244 ++++++-------- python/pulsing/streaming/sync_queue.py | 35 +- tests/python/core/test_init_coverage.py | 42 --- tests/python/core/test_remote_edge_cases.py | 14 +- tests/python/core/test_remote_system_ops.py | 67 +--- tests/python/core/test_remote_unit.py | 4 +- tests/python/streaming/test_topic.py | 51 +-- tests/python/test_actor_list.py | 3 +- tests/python/test_remote_decorator.py | 23 +- tests/python/test_resolve_as_any.py | 19 +- 16 files changed, 358 insertions(+), 1001 deletions(-) diff --git a/python/pulsing/__init__.py b/python/pulsing/__init__.py index 7513aed5f..4305fb7bb 100644 --- a/python/pulsing/__init__.py +++ b/python/pulsing/__init__.py @@ -33,7 +33,6 @@ def incr(self): self.value += 1; return self.value remote, # Resolve function resolve, - as_any, # Mount (attach existing object to Pulsing network) mount, unmount, @@ -46,9 +45,9 @@ def incr(self): self.value += 1; return self.value Message, StreamMessage, SystemConfig, - # Service - PythonActorService, - PYTHON_ACTOR_SERVICE_NAME, + # Service (internal, used by actor_system()) + PythonActorService as _PythonActorService, + PYTHON_ACTOR_SERVICE_NAME as _PYTHON_ACTOR_SERVICE_NAME, ) @@ -205,8 +204,8 @@ async def actor_system( system = ActorSystem(inner) # Automatically register PythonActorService (for remote actor creation) - service = PythonActorService(inner) - await inner.spawn(service, name=PYTHON_ACTOR_SERVICE_NAME, public=True) + service = _PythonActorService(inner) + await inner.spawn(service, name=_PYTHON_ACTOR_SERVICE_NAME, public=True) return system @@ -324,7 +323,6 @@ async def read(self, topic, **kwargs): "spawn", "refer", "resolve", - "as_any", "get_system", "is_initialized", # Decorator diff --git a/python/pulsing/core/__init__.py b/python/pulsing/core/__init__.py index a205ed347..f2f4e5a6a 100644 --- a/python/pulsing/core/__init__.py +++ b/python/pulsing/core/__init__.py @@ -21,8 +21,6 @@ def incr(self): self.value += 1; return self.value """ import asyncio -from abc import ABC, abstractmethod -from typing import Any from pulsing._core import ( ActorId, @@ -104,7 +102,6 @@ async def init( loop = asyncio.get_running_loop() _global_system = await ActorSystem.create(config, loop) # Automatically register PythonActorService for remote actor creation - from .remote import PYTHON_ACTOR_SERVICE_NAME, PythonActorService service = PythonActorService(_global_system) await _global_system.spawn(service, name=PYTHON_ACTOR_SERVICE_NAME, public=True) @@ -136,84 +133,17 @@ def is_initialized() -> bool: return _global_system is not None -# ============================================================================= -# Timeout utilities for cancellation support -# ============================================================================= - -# Default timeout for ask operations (seconds) -DEFAULT_ASK_TIMEOUT = 30.0 - - -async def ask_with_timeout( - actor_ref: ActorRef, - msg: Any, - timeout: float = DEFAULT_ASK_TIMEOUT, -) -> Any: - """Send a message and wait for response with timeout. - - This is a convenience wrapper around ActorRef.ask() that adds timeout support. - When timeout occurs, the local task is cancelled. Note that this does NOT - guarantee the remote handler will stop - it relies on HTTP/2 RST_STREAM - propagation for stream cancellation. - - For handlers that may run long, implement idempotent operations and/or - check for stream closure in streaming scenarios. - - Args: - actor_ref: Target actor reference - msg: Message to send (any Python object or Message) - timeout: Timeout in seconds (default: 30.0) - - Returns: - Response from the actor - - Raises: - asyncio.TimeoutError: If timeout expires before response - Exception: Any error from the actor - - Example: - try: - result = await ask_with_timeout(actor_ref, {"action": "compute"}, timeout=10.0) - except asyncio.TimeoutError: - print("Request timed out") - """ - return await asyncio.wait_for(actor_ref.ask(msg), timeout=timeout) - - -async def tell_with_timeout( - actor_ref: ActorRef, - msg: Any, - timeout: float = DEFAULT_ASK_TIMEOUT, -) -> None: - """Send a fire-and-forget message with timeout. - - Args: - actor_ref: Target actor reference - msg: Message to send - timeout: Timeout in seconds (default: 30.0) - - Raises: - asyncio.TimeoutError: If timeout expires - """ - await asyncio.wait_for(actor_ref.tell(msg), timeout=timeout) - - from . import helpers from .remote import ( PYTHON_ACTOR_SERVICE_NAME, + Actor, ActorClass, ActorProxy, PythonActorService, PythonActorServiceProxy, SystemActorProxy, - as_any, - get_metrics, - get_node_info, get_python_actor_service, get_system_actor, - health_check, - list_actors, - ping, remote, resolve, ) @@ -244,68 +174,9 @@ async def tell_with_timeout( "ActorRef", "ActorId", "ActorProxy", - "as_any", "SystemActorProxy", - "PythonActorService", - "PYTHON_ACTOR_SERVICE_NAME", "ZeroCopyDescriptor", "PulsingError", "PulsingRuntimeError", "PulsingActorError", ] - - -class Actor(ABC): - """Base class for Python actors. Implement `receive` to handle messages. - - Python actors can receive and return arbitrary Python objects when communicating - with other Python actors. The objects are automatically pickled and unpickled. - - For communication with Rust actors, use Message.from_json() and msg.to_json(). - """ - - def on_start(self, actor_id: ActorId) -> None: # noqa: B027 - """Called when actor starts. Override to handle actor startup.""" - pass - - def on_stop(self) -> None: # noqa: B027 - """Called when actor stops. Override to handle actor cleanup.""" - pass - - def metadata(self) -> dict[str, str]: - """Return actor metadata for diagnostics""" - return {} - - @abstractmethod - async def receive(self, msg): - """ - Handle incoming message - - Args: - msg: Incoming message. Can be: - - Any Python object (when called from Python actors with ask/tell) - - Message object (when called from Rust actors or with Message.from_json) - - Returns: - - Any Python object: automatically pickled for Python-to-Python communication - - Message.from_json("Type", {...}): JSON response for Rust actor communication - - StreamMessage.create(...): Streaming response - - None: No response - - Example (Python-to-Python, simple objects): - # Caller: - result = await counter.ask({"action": "increment", "n": 10}) - - # Actor receive: - async def receive(self, msg): - if isinstance(msg, dict) and msg.get("action") == "increment": - self.value += msg["n"] - return {"value": self.value} - - Example (Rust actor communication): - async def receive(self, msg): - if isinstance(msg, Message) and msg.msg_type == "Ping": - return Message.from_json("Pong", {"count": 1}) - return None - """ - pass diff --git a/python/pulsing/core/remote.py b/python/pulsing/core/remote.py index fafc7db63..891b7b1fd 100644 --- a/python/pulsing/core/remote.py +++ b/python/pulsing/core/remote.py @@ -101,21 +101,52 @@ def _unwrap_response(resp: dict) -> tuple[Any, str | None]: return (None, None) -async def _ask_convert_errors(ref, msg) -> Any: - """Call ref.ask(msg); Rust raises typed Pulsing exceptions directly.""" - return await ref.ask(msg) +def _check_response(resp, ref) -> Any: + """Unwrap response dict/Message, raise PulsingActorError on errors, return result.""" + if isinstance(resp, dict): + result, error = _unwrap_response(resp) + if error: + raise PulsingActorError(error, actor_name=str(ref.actor_id.id)) + return result + if isinstance(resp, Message): + if resp.is_stream: + return resp + data = resp.to_json() + if resp.msg_type == "Error": + raise PulsingActorError( + data.get("error", "Remote call failed"), + actor_name=str(ref.actor_id.id), + ) + if isinstance(data, dict): + result, error = _unwrap_response(data) + if error: + raise PulsingActorError(error, actor_name=str(ref.actor_id.id)) + if result is not None: + return result + return data.get("result") + return resp + return resp + + +def _normalize_actor_name(cls_name: str, name: str | None) -> str: + """Build actor path from optional name and class name.""" + if name and "/" in name: + return name + if name: + return f"actors/{name}" + return f"actors/{cls_name}_{uuid.uuid4().hex[:8]}" logger = logging.getLogger(__name__) -class _ActorBase(ABC): - """Actor base class.""" +class Actor(ABC): + """Base class for Python actors. Implement `receive` to handle messages.""" - def on_start(self, actor_id) -> None: + def on_start(self, actor_id) -> None: # noqa: B027 pass - def on_stop(self) -> None: + def on_stop(self) -> None: # noqa: B027 pass def metadata(self) -> dict[str, str]: @@ -224,16 +255,6 @@ def ref(self) -> ActorRef: """Get underlying ActorRef.""" return self._ref - @classmethod - def from_ref( - cls, - actor_ref: ActorRef, - methods: list[str] | None = None, - async_methods: set[str] | None = None, - ) -> "ActorProxy": - """Create ActorProxy from ActorRef.""" - return cls(actor_ref, methods, async_methods) - class _MethodCaller: """Method caller. Supports two usage patterns: @@ -259,32 +280,11 @@ def __await__(self): async def _sync_call(self, *args, **kwargs) -> Any: """Synchronous method call.""" call_msg = _wrap_call(self._method, args, kwargs, False) - resp = await _ask_convert_errors(self._ref, call_msg) - - if isinstance(resp, dict): - result, error = _unwrap_response(resp) - if error: - raise PulsingActorError(error, actor_name=str(self._ref.actor_id.id)) - return result - elif isinstance(resp, Message): - if resp.is_stream: - # Sync generator: return an awaitable/iterable stream reader - return _AsyncMethodCall.from_message(self._ref, resp) - data = resp.to_json() - if not isinstance(data, dict): - return resp - if resp.msg_type == "Error": - raise PulsingActorError( - data.get("error", "Remote call failed"), - actor_name=str(self._ref.actor_id.id), - ) - result, error = _unwrap_response(data) - if error: - raise PulsingActorError(error, actor_name=str(self._ref.actor_id.id)) - if result is not None: - return result - return data.get("result") - return resp + resp = await self._ref.ask(call_msg) + result = _check_response(resp, self._ref) + if isinstance(result, Message) and result.is_stream: + return _AsyncMethodCall.from_message(self._ref, result) + return result class _AsyncMethodCall: @@ -331,38 +331,13 @@ async def _ensure_stream(self) -> None: return call_msg = _wrap_call(self._method, self._args, self._kwargs, True) - resp = await _ask_convert_errors(self._ref, call_msg) + resp = await self._ref.ask(call_msg) + result = _check_response(resp, self._ref) - if isinstance(resp, Message): - if resp.is_stream: - self._stream_reader = resp.stream_reader() - else: - data = resp.to_json() - if resp.msg_type == "Error": - raise PulsingActorError( - data.get("error", "Remote call failed"), - actor_name=str(self._ref.actor_id.id), - ) - result, error = _unwrap_response(data) - if error: - raise PulsingActorError( - error, actor_name=str(self._ref.actor_id.id) - ) - self._final_result = result - self._got_result = True + if isinstance(result, Message) and result.is_stream: + self._stream_reader = result.stream_reader() else: - # Direct dict from Python actor called with is_async=True - if isinstance(resp, dict): - pulsing = resp.get("__pulsing__", {}) - if isinstance(pulsing, dict): - if "error" in pulsing: - raise PulsingActorError( - pulsing["error"], actor_name=str(self._ref.actor_id.id) - ) - self._final_result = pulsing.get("result") - self._got_result = True - return - self._final_result = resp + self._final_result = result self._got_result = True def __aiter__(self): @@ -436,7 +411,7 @@ async def _send(): return caller -class _WrappedActor(_ActorBase): +class _WrappedActor(Actor): """Wraps user class as an Actor""" def __init__(self, instance: Any): @@ -520,17 +495,13 @@ async def receive(self, msg) -> Any: ) ) - # For async methods, use streaming response if is_async_method and is_async_call: - return self._handle_async_method(func, args, kwargs) + return self._stream_result(func(*args, **kwargs)) - # Regular method or not marked as async call try: result = func(*args, **kwargs) - # Check if result is a generator (sync or async) FIRST - # This must come before the coroutine check to avoid awaiting generators if inspect.isgenerator(result) or inspect.isasyncgen(result): - return self._handle_generator_result(result) + return self._stream_result(result) if asyncio.iscoroutine(result): result = await result return _wrap_response(result=result) @@ -585,26 +556,32 @@ async def _safe_stream_close(writer) -> None: except (RuntimeError, OSError, ConnectionError): pass - def _handle_generator_result(self, gen) -> StreamMessage: - """Handle generator result, return streaming response""" - stream_msg, writer = StreamMessage.create("GeneratorStream") + def _stream_result(self, result_or_gen) -> StreamMessage: + """Stream a generator, coroutine, or plain value back to the caller.""" + stream_msg, writer = StreamMessage.create("Stream") + + async def _iter_to_stream(gen): + if inspect.isasyncgen(gen): + async for item in gen: + if not await self._safe_stream_write(writer, {"__yield__": item}): + return + else: + for item in gen: + if not await self._safe_stream_write(writer, {"__yield__": item}): + return async def execute(): try: - if inspect.isasyncgen(gen): - async for item in gen: - if not await self._safe_stream_write( - writer, {"__yield__": item} - ): - return + r = result_or_gen + if inspect.isasyncgen(r) or inspect.isgenerator(r): + await _iter_to_stream(r) + final = None + elif asyncio.iscoroutine(r): + final = await r else: - for item in gen: - if not await self._safe_stream_write( - writer, {"__yield__": item} - ): - return + final = r await self._safe_stream_write( - writer, {"__pulsing__": {"final": True, "result": None}} + writer, {"__pulsing__": {"final": True, "result": final}} ) except Exception as e: await self._safe_stream_write( @@ -617,55 +594,8 @@ async def execute(): task.add_done_callback(_consume_task_exception) return stream_msg - def _handle_async_method(self, func, args, kwargs) -> StreamMessage: - """Handle async method, return streaming response""" - stream_msg, writer = StreamMessage.create("AsyncMethodStream") - - async def execute(): - try: - result = func(*args, **kwargs) - # Check result type - if inspect.isasyncgen(result): - async for item in result: - if not await self._safe_stream_write( - writer, {"__yield__": item} - ): - return - await self._safe_stream_write( - writer, {"__pulsing__": {"final": True, "result": None}} - ) - elif asyncio.iscoroutine(result): - final_result = await result - await self._safe_stream_write( - writer, {"__pulsing__": {"final": True, "result": final_result}} - ) - elif inspect.isgenerator(result): - for item in result: - if not await self._safe_stream_write( - writer, {"__yield__": item} - ): - return - await self._safe_stream_write( - writer, {"__pulsing__": {"final": True, "result": None}} - ) - else: - await self._safe_stream_write( - writer, {"__pulsing__": {"final": True, "result": result}} - ) - except Exception as e: - await self._safe_stream_write( - writer, {"__pulsing__": {"error": str(e)}} - ) - finally: - await self._safe_stream_close(writer) - - task = asyncio.create_task(execute()) - task.add_done_callback(_consume_task_exception) - return stream_msg - - -class PythonActorService(_ActorBase): +class PythonActorService(Actor): """Python Actor creation service - one per node, handles Python actor creation requests. Note: Rust SystemActor (path "system/core") handles system-level operations, @@ -853,14 +783,10 @@ def incr(self): self.value += 1; return self.value counter = await Counter.spawn(init=10) result = await counter.incr() """ - from . import _global_system - - if system is None: - system = _global_system if system is None: - raise PulsingRuntimeError( - "Actor system not initialized. Call 'await init()' first." - ) + from . import get_system + + system = get_system() if public is None: public = name is not None @@ -890,15 +816,7 @@ async def _spawn_local( public: bool = False, **kwargs, ) -> ActorProxy: - actor_name = ( - name - if (name and "/" in name) - else ( - f"actors/{name}" - if name - else f"actors/{self._cls.__name__}_{uuid.uuid4().hex[:8]}" - ) - ) + actor_name = _normalize_actor_name(self._cls.__name__, name) if self._restart_policy != "never": _wrapped_holder: list[_WrappedActor] = [] @@ -954,18 +872,9 @@ async def _spawn_remote( PYTHON_ACTOR_SERVICE_NAME, node_id=node_id ) - actor_name = ( - name - if (name and "/" in name) - else ( - f"actors/{name}" - if name - else f"actors/{self._cls.__name__}_{uuid.uuid4().hex[:8]}" - ) - ) + actor_name = _normalize_actor_name(self._cls.__name__, name) - resp = await _ask_convert_errors( - service_ref, + resp = await service_ref.ask( Message.from_json( "CreateActor", { @@ -1039,14 +948,10 @@ async def generate(self, prompt): ... # async method, streaming response # Or directly await to get final result final = await counter.generate("hello") """ - from . import _global_system - if system is None: - if _global_system is None: - raise RuntimeError( - "Actor system not initialized. Call 'await init()' first." - ) - system = _global_system + from . import get_system + + system = get_system() actor_ref = await system.resolve_named(name, node_id=node_id, timeout=timeout) return ActorProxy(actor_ref, self._methods, self._async_methods) @@ -1116,8 +1021,7 @@ def ref(self) -> ActorRef: async def _ask(self, msg_type: str) -> dict: """Send SystemMessage and return response.""" - resp = await _ask_convert_errors( - self._ref, + resp = await self._ref.ask( Message.from_json("SystemMessage", {"type": msg_type}), ) return resp.to_json() @@ -1194,9 +1098,7 @@ async def list_registry(self) -> list[str]: Returns: List of registered class names """ - resp = await _ask_convert_errors( - self._ref, Message.from_json("ListRegistry", {}) - ) + resp = await self._ref.ask(Message.from_json("ListRegistry", {})) data = resp.to_json() return data.get("classes", []) @@ -1231,8 +1133,7 @@ async def create_actor( Raises: RuntimeError: If creation fails """ - resp = await _ask_convert_errors( - self._ref, + resp = await self._ref.ask( Message.from_json( "CreateActor", { @@ -1275,42 +1176,6 @@ async def get_python_actor_service( return PythonActorServiceProxy(service_ref) -# Legacy helper functions (for backwards compatibility) -async def list_actors(system: ActorSystem) -> list[dict]: - """List all actors on the current node.""" - proxy = await get_system_actor(system) - return await proxy.list_actors() - - -async def get_metrics(system: ActorSystem) -> dict: - """Get system metrics.""" - proxy = await get_system_actor(system) - return await proxy.get_metrics() - - -async def get_node_info(system: ActorSystem) -> dict: - """Get node info.""" - proxy = await get_system_actor(system) - return await proxy.get_node_info() - - -async def health_check(system: ActorSystem) -> dict: - """Health check.""" - proxy = await get_system_actor(system) - return await proxy.health_check() - - -async def ping(system: ActorSystem, node_id: int | None = None) -> dict: - """Ping node. - - Args: - system: ActorSystem instance - node_id: Target node ID (None means local node) - """ - proxy = await get_system_actor(system, node_id) - return await proxy.ping() - - async def resolve( name: str, *, @@ -1351,31 +1216,6 @@ async def resolve( ref = await resolve("my_counter") result = await ref.ask({"__call__": "increment", "args": [], "kwargs": {}}) """ - from . import _global_system - - if _global_system is None: - raise RuntimeError("Actor system not initialized. Call 'await init()' first.") - - return await _global_system.resolve(name, node_id=node_id, timeout=timeout) - - -def as_any(ref: ActorRef) -> ActorProxy: - """Return an untyped proxy that forwards any method call to the remote actor. - - Use when you have an ActorRef and want to call methods by name - without the typed class. - - Args: - ref: ActorRef from resolve(name). - - Example: - ref = await resolve("channel.discord") - proxy = as_any(ref) # or proxy = ref.as_any() - await proxy.send_text(chat_id, content) - """ - return ref.as_any() - + from . import get_system -RemoteClass = ActorClass -# Keep old name as alias (backward compatibility) -SystemActor = PythonActorService + return await get_system().resolve(name, node_id=node_id, timeout=timeout) diff --git a/python/pulsing/serving/router.py b/python/pulsing/serving/router.py index 952b1c457..0399069a7 100644 --- a/python/pulsing/serving/router.py +++ b/python/pulsing/serving/router.py @@ -217,6 +217,22 @@ async def _sync_generate( res_data["choices"][0]["text"] = text return web.json_response(res_data) + def _build_chunk( + self, request_id, obj_type, created, model, text, finish_reason, is_chat + ) -> dict: + choice: dict = {"index": 0, "finish_reason": finish_reason} + if is_chat: + choice["delta"] = {"content": text} if text else {} + else: + choice["text"] = text or "" + return { + "id": request_id, + "object": obj_type, + "created": created, + "model": model or self.model_name, + "choices": [choice], + } + async def _stream_generate( self, request: web.Request, @@ -235,78 +251,65 @@ async def _stream_generate( obj_type = "chat.completion.chunk" if is_chat else "text_completion" + async def _send(data: dict): + await stream_response.write(f"data: {json.dumps(data)}\n\n".encode()) + try: worker = worker_ref.as_any() stream = worker.generate_stream(prompt=prompt, max_new_tokens=max_tokens) async for chunk in stream: if isinstance(chunk, dict) and chunk.get("error"): - await stream_response.write( - f"data: {json.dumps({'error': chunk['error']})}\n\n".encode() - ) + await _send({"error": chunk["error"]}) await stream_response.write(b"data: [DONE]\n\n") return stream_response try: finish_reason = chunk.get("finish_reason") text = chunk.get("text", "") - # Check if finished if finish_reason: - # Send final chunk (if has text) if text: - data = { - "id": request_id, - "object": obj_type, - "created": created, - "model": model or self.model_name, - "choices": [ - {"index": 0, "finish_reason": finish_reason} - ], - } - if is_chat: - data["choices"][0]["delta"] = {"content": text} - else: - data["choices"][0]["text"] = text - await stream_response.write( - f"data: {json.dumps(data)}\n\n".encode() + await _send( + self._build_chunk( + request_id, + obj_type, + created, + model, + text, + finish_reason, + is_chat, + ) ) break - # Only send non-empty text if text: - data = { - "id": request_id, - "object": obj_type, - "created": created, - "model": model or self.model_name, - "choices": [{"index": 0, "finish_reason": None}], - } - if is_chat: - data["choices"][0]["delta"] = {"content": text} - else: - data["choices"][0]["text"] = text - await stream_response.write( - f"data: {json.dumps(data)}\n\n".encode() + await _send( + self._build_chunk( + request_id, + obj_type, + created, + model, + text, + None, + is_chat, + ) ) except json.JSONDecodeError: continue except Exception as e: - await stream_response.write( - f"data: {json.dumps({'error': str(e)})}\n\n".encode() - ) + await _send({"error": str(e)}) - final = { - "id": request_id, - "object": obj_type, - "created": created, - "model": model or self.model_name, - "choices": [{"index": 0, "finish_reason": "stop"}], - } - if is_chat: - final["choices"][0]["delta"] = {} - else: - final["choices"][0]["text"] = "" - await stream_response.write(f"data: {json.dumps(final)}\n\n".encode()) + await _send( + self._build_chunk( + request_id, + obj_type, + created, + model, + "", + "stop", + is_chat, + ) + ) await stream_response.write(b"data: [DONE]\n\n") return stream_response diff --git a/python/pulsing/serving/scheduler.py b/python/pulsing/serving/scheduler.py index be4528936..a7bb760e9 100644 --- a/python/pulsing/serving/scheduler.py +++ b/python/pulsing/serving/scheduler.py @@ -173,50 +173,34 @@ async def select_worker( class RustSchedulerBase(Scheduler): - """Rust scheduler base class""" + """Rust scheduler base class — provides shared select_worker implementation.""" def __init__(self, actor_system, worker_name: str = "worker"): + if not RUST_POLICIES_AVAILABLE: + raise ImportError("Rust policies not available. Rebuild with maturin.") super().__init__(actor_system, worker_name) self._worker_info_cache: dict[str, WorkerInfo] = {} + self._policy = None # subclasses set this def _get_worker_info(self, worker_data: dict) -> WorkerInfo: - """Get or create WorkerInfo object""" node_id = worker_data.get("node_id", "") - if node_id not in self._worker_info_cache: url = worker_data.get("addr", f"http://{node_id}") model_id = worker_data.get("model_id", "default") self._worker_info_cache[node_id] = WorkerInfo(url, model_id) - worker_info = self._worker_info_cache[node_id] - - # Update health status - is_healthy = worker_data.get("status") == "Alive" - worker_info.is_healthy = is_healthy - + worker_info.is_healthy = worker_data.get("status") == "Alive" return worker_info def _workers_to_info_list(self, workers: list) -> list: - """Convert worker data to WorkerInfo list""" return [self._get_worker_info(w) for w in workers] - @abstractmethod - def _get_policy(self): - """Get Rust policy object""" - pass - - -class RustRandomScheduler(RustSchedulerBase): - """Random scheduler (Rust implementation, high performance)""" - - def __init__(self, actor_system, worker_name: str = "worker"): - if not RUST_POLICIES_AVAILABLE: - raise ImportError("Rust policies not available. Rebuild with maturin.") - super().__init__(actor_system, worker_name) - self._policy = RandomPolicy() + def _pre_select(self, worker_infos: list) -> None: + """Hook for subclasses that need setup before selection (e.g. CacheAware).""" - def _get_policy(self): - return self._policy + def _do_select(self, worker_infos, request_text, headers): + """Invoke the Rust policy. Override for policies that need extra args (e.g. headers).""" + return self._policy.select_worker(worker_infos, request_text) async def select_worker( self, @@ -228,161 +212,67 @@ async def select_worker( return None worker_infos = self._workers_to_info_list(workers) - selected_idx = self._policy.select_worker(worker_infos, request_text) + self._pre_select(worker_infos) + selected_idx = self._do_select(worker_infos, request_text, headers) if selected_idx is None: return None + return await pulsing.refer(workers[selected_idx].get("actor_id")) - selected_worker = workers[selected_idx] - return await pulsing.refer(selected_worker.get("actor_id")) + +class RustRandomScheduler(RustSchedulerBase): + """Random scheduler (Rust implementation, high performance)""" + + def __init__(self, actor_system, worker_name: str = "worker"): + super().__init__(actor_system, worker_name) + self._policy = RandomPolicy() class RustRoundRobinScheduler(RustSchedulerBase): """Round-robin scheduler (Rust implementation)""" def __init__(self, actor_system, worker_name: str = "worker"): - if not RUST_POLICIES_AVAILABLE: - raise ImportError("Rust policies not available. Rebuild with maturin.") super().__init__(actor_system, worker_name) self._policy = RoundRobinPolicy() - def _get_policy(self): - return self._policy - - async def select_worker( - self, - request_text: str | None = None, - headers: dict[str, str] | None = None, - ): - workers = await self.get_available_workers() - if not workers: - return None - - worker_infos = self._workers_to_info_list(workers) - selected_idx = self._policy.select_worker(worker_infos, request_text) - - if selected_idx is None: - return None - - selected_worker = workers[selected_idx] - return await pulsing.refer(selected_worker.get("actor_id")) - def reset(self): - """Reset round-robin counter""" self._policy.reset() class RustPowerOfTwoScheduler(RustSchedulerBase): - """Power-of-Two Choices scheduler (Rust implementation) - - Randomly selects two workers, then chooses the one with lower load. - Provides near-optimal load balancing in large-scale clusters. - """ + """Power-of-Two Choices scheduler (Rust implementation)""" def __init__(self, actor_system, worker_name: str = "worker"): - if not RUST_POLICIES_AVAILABLE: - raise ImportError("Rust policies not available. Rebuild with maturin.") super().__init__(actor_system, worker_name) self._policy = PowerOfTwoPolicy() - def _get_policy(self): - return self._policy - - async def select_worker( - self, - request_text: str | None = None, - headers: dict[str, str] | None = None, - ): - workers = await self.get_available_workers() - if not workers: - return None - - worker_infos = self._workers_to_info_list(workers) - selected_idx = self._policy.select_worker(worker_infos, request_text) - - if selected_idx is None: - return None - - selected_worker = workers[selected_idx] - return await pulsing.refer(selected_worker.get("actor_id")) - def update_loads(self, loads: dict[str, int]): - """Update cached load information - - Args: - loads: Mapping from worker URL to load value - """ self._policy.update_loads(loads) class RustConsistentHashScheduler(RustSchedulerBase): """Consistent hash scheduler (Rust implementation) - Routes based on session ID or user ID, ensuring requests from the same user are always routed to the same worker. - Supports extracting routing key from HTTP headers or request body. - - HTTP header priority (checked in order): - - x-session-id - - x-user-id - - x-tenant-id - - x-request-id - - x-correlation-id - - x-trace-id - - Request body field priority: - - session_params.session_id - - user - - session_id - - user_id + Routes based on session ID or user ID, ensuring requests from the same user + are always routed to the same worker. """ def __init__(self, actor_system, worker_name: str = "worker"): - if not RUST_POLICIES_AVAILABLE: - raise ImportError("Rust policies not available. Rebuild with maturin.") super().__init__(actor_system, worker_name) self._policy = ConsistentHashPolicy() - def _get_policy(self): - return self._policy - - async def select_worker( - self, - request_text: str | None = None, - headers: dict[str, str] | None = None, - ): - workers = await self.get_available_workers() - if not workers: - return None - - worker_infos = self._workers_to_info_list(workers) - selected_idx = self._policy.select_worker(worker_infos, request_text, headers) - - if selected_idx is None: - return None - - selected_worker = workers[selected_idx] - return await pulsing.refer(selected_worker.get("actor_id")) + def _do_select(self, worker_infos, request_text, headers): + return self._policy.select_worker(worker_infos, request_text, headers) def reset(self): - """Reset hash ring""" self._policy.reset() class RustCacheAwareScheduler(RustSchedulerBase): """Cache-aware scheduler (Rust implementation) - Combines cache affinity and load balancing strategies: - 1. When system load is balanced, use cache-aware routing (based on Radix Tree prefix matching) - 2. When system load is unbalanced, switch to shortest queue routing - - Particularly suitable for LLM inference scenarios, can improve KV Cache hit rate. - - Args: - cache_threshold: Prefix match threshold (0.0-1.0), use cache affinity routing when exceeded - balance_abs_threshold: Absolute threshold for load imbalance - balance_rel_threshold: Relative threshold for load imbalance - eviction_interval_secs: Cache eviction interval (seconds) - max_tree_size: Maximum number of Radix Tree nodes + Combines cache affinity and load balancing. Suitable for LLM inference + scenarios to improve KV Cache hit rate. """ def __init__( @@ -395,10 +285,7 @@ def __init__( eviction_interval_secs: int = 60, max_tree_size: int = 100000, ): - if not RUST_POLICIES_AVAILABLE: - raise ImportError("Rust policies not available. Rebuild with maturin.") super().__init__(actor_system, worker_name) - config = CacheAwareConfig( cache_threshold=cache_threshold, balance_abs_threshold=balance_abs_threshold, @@ -408,41 +295,16 @@ def __init__( ) self._policy = CacheAwarePolicy(config) - def _get_policy(self): - return self._policy - - async def select_worker( - self, - request_text: str | None = None, - headers: dict[str, str] | None = None, - ): - workers = await self.get_available_workers() - if not workers: - return None - - worker_infos = self._workers_to_info_list(workers) - - # Initialize workers (if first call) + def _pre_select(self, worker_infos: list) -> None: self._policy.init_workers(worker_infos) - selected_idx = self._policy.select_worker(worker_infos, request_text) - - if selected_idx is None: - return None - - selected_worker = workers[selected_idx] - return await pulsing.refer(selected_worker.get("actor_id")) - def add_worker(self, url: str, model_id: str = "default"): - """Add worker to cache tree""" self._policy.add_worker(url, model_id) def remove_worker(self, url: str): - """Remove worker from cache tree""" self._policy.remove_worker(url) def evict_cache(self, max_size: int): - """Manually trigger cache eviction""" self._policy.evict_cache(max_size) diff --git a/python/pulsing/streaming/__init__.py b/python/pulsing/streaming/__init__.py index e09252164..44cae4eb3 100644 --- a/python/pulsing/streaming/__init__.py +++ b/python/pulsing/streaming/__init__.py @@ -9,7 +9,7 @@ reader = await system.topic.read("events") """ -from typing import TYPE_CHECKING, Any +from typing import TYPE_CHECKING from .backend import ( MemoryBackend, @@ -42,88 +42,29 @@ class QueueAPI: - """Queue API entry point via system.queue""" + """Queue API entry point via system.queue — delegates to write_queue/read_queue.""" def __init__(self, system: "ActorSystem"): self._system = system - async def write( - self, - topic: str, - *, - bucket_column: str = "id", - num_buckets: int = 4, - batch_size: int = 100, - storage_path: str | None = None, - backend: str | type = "memory", - backend_options: dict[str, Any] | None = None, - ) -> Queue: - """Open queue for writing, returns a ``Queue``.""" - return await write_queue( - self._system, - topic, - bucket_column=bucket_column, - num_buckets=num_buckets, - batch_size=batch_size, - storage_path=storage_path, - backend=backend, - backend_options=backend_options, - ) - - async def read( - self, - topic: str, - *, - bucket_id: int | None = None, - bucket_ids: list[int] | None = None, - rank: int | None = None, - world_size: int | None = None, - num_buckets: int = 4, - storage_path: str | None = None, - backend: str | type = "memory", - backend_options: dict[str, Any] | None = None, - ) -> QueueReader: - """Open queue for reading""" - return await read_queue( - self._system, - topic, - bucket_id=bucket_id, - bucket_ids=bucket_ids, - rank=rank, - world_size=world_size, - num_buckets=num_buckets, - storage_path=storage_path, - backend=backend, - backend_options=backend_options, - ) + async def write(self, topic: str, **kwargs) -> Queue: + return await write_queue(self._system, topic, **kwargs) + + async def read(self, topic: str, **kwargs) -> QueueReader: + return await read_queue(self._system, topic, **kwargs) class TopicAPI: - """Topic API entry point via system.topic""" + """Topic API entry point via system.topic — delegates to write_topic/read_topic.""" def __init__(self, system: "ActorSystem"): self._system = system - async def write( - self, - topic: str, - *, - writer_id: str | None = None, - ) -> TopicWriter: - """Open topic for writing""" - return await write_topic(self._system, topic, writer_id=writer_id) - - async def read( - self, - topic: str, - *, - reader_id: str | None = None, - auto_start: bool = False, - ) -> TopicReader: - """Open topic for reading""" - return await read_topic( - self._system, topic, reader_id=reader_id, auto_start=auto_start - ) + async def write(self, topic: str, **kwargs) -> TopicWriter: + return await write_topic(self._system, topic, **kwargs) + + async def read(self, topic: str, **kwargs) -> TopicReader: + return await read_topic(self._system, topic, **kwargs) __all__ = [ diff --git a/python/pulsing/streaming/manager.py b/python/pulsing/streaming/manager.py index f8b93c948..ff1cabdfb 100644 --- a/python/pulsing/streaming/manager.py +++ b/python/pulsing/streaming/manager.py @@ -3,7 +3,7 @@ import asyncio import hashlib import logging -from typing import TYPE_CHECKING, Any +from typing import TYPE_CHECKING from pulsing.core import ActorId, ActorRef, ActorSystem, remote @@ -92,12 +92,9 @@ def __init__( self._buckets: dict[tuple[str, int], ActorRef] = {} # Topic brokers managed by this node: {topic_name: ActorRef} self._topics: dict[str, ActorRef] = {} - # Per-resource locks so different buckets/topics can be created in parallel - self._bucket_locks: dict[tuple[str, int], asyncio.Lock] = {} - self._topic_locks: dict[str, asyncio.Lock] = {} + self._locks: dict[str, asyncio.Lock] = {} self._locks_meta = asyncio.Lock() - # Cached cluster member information self._members: list[dict] = [] self._members_updated_at: float = 0 @@ -125,6 +122,30 @@ def _topic_key(self, topic_name: str) -> str: """Generate unique key for topic""" return f"topic:{topic_name}" + async def _get_or_create( + self, cache: dict, cache_key, actor_name: str, spawn_fn + ) -> ActorRef: + """Get or create a local Actor with per-key locking.""" + if cache_key in cache: + return cache[cache_key] + + async with self._locks_meta: + if actor_name not in self._locks: + self._locks[actor_name] = asyncio.Lock() + lock = self._locks[actor_name] + + async with lock: + if cache_key in cache: + return cache[cache_key] + try: + cache[cache_key] = await self.system.resolve_named(actor_name) + logger.debug(f"Resolved existing: {actor_name}") + except Exception: + proxy = await spawn_fn(actor_name) + cache[cache_key] = proxy.ref + logger.info(f"Created: {actor_name}") + return cache[cache_key] + async def _get_or_create_bucket( self, topic: str, @@ -134,77 +155,76 @@ async def _get_or_create_bucket( backend: str | type | None = None, backend_options: dict | None = None, ) -> ActorRef: - """Get or create local BucketStorage Actor. Per-key lock allows parallel creation.""" key = (topic, bucket_id) - if key in self._buckets: - return self._buckets[key] - - async with self._locks_meta: - if key not in self._bucket_locks: - self._bucket_locks[key] = asyncio.Lock() - lock = self._bucket_locks[key] + if storage_path: + bucket_storage_path = f"{storage_path}/bucket_{bucket_id}" + else: + bucket_storage_path = f"{self.base_storage_path}/{topic}/bucket_{bucket_id}" + + async def spawn(name): + return await BucketStorage.spawn( + bucket_id=bucket_id, + storage_path=bucket_storage_path, + batch_size=batch_size, + backend=backend or self.default_backend, + backend_options=backend_options, + system=self.system, + name=name, + public=True, + ) - async with lock: - if key in self._buckets: - return self._buckets[key] - actor_name = f"bucket_{topic}_{bucket_id}" - if storage_path: - bucket_storage_path = f"{storage_path}/bucket_{bucket_id}" - else: - bucket_storage_path = ( - f"{self.base_storage_path}/{topic}/bucket_{bucket_id}" - ) - try: - self._buckets[key] = await self.system.resolve_named(actor_name) - logger.debug(f"Resolved existing bucket: {actor_name}") - except Exception: - proxy = await BucketStorage.spawn( - bucket_id=bucket_id, - storage_path=bucket_storage_path, - batch_size=batch_size, - backend=backend or self.default_backend, - backend_options=backend_options, - system=self.system, - name=actor_name, - public=True, - ) - self._buckets[key] = proxy.ref - logger.info(f"Created bucket: {actor_name} at {bucket_storage_path}") - return self._buckets[key] + return await self._get_or_create( + self._buckets, key, f"bucket_{topic}_{bucket_id}", spawn + ) async def _get_or_create_topic_broker(self, topic_name: str) -> ActorRef: - """Get or create local TopicBroker Actor. Per-topic lock allows parallel creation.""" - if topic_name in self._topics: - return self._topics[topic_name] - - async with self._locks_meta: - if topic_name not in self._topic_locks: - self._topic_locks[topic_name] = asyncio.Lock() - lock = self._topic_locks[topic_name] + async def spawn(name): + from pulsing.streaming.broker import TopicBroker + + return await TopicBroker.spawn( + topic_name, + self.system, + system=self.system, + name=name, + public=True, + ) - async with lock: - if topic_name in self._topics: - return self._topics[topic_name] - actor_name = f"_topic_broker_{topic_name}" - try: - self._topics[topic_name] = await self.system.resolve_named(actor_name) - logger.debug(f"Resolved existing topic broker: {actor_name}") - except Exception: - from pulsing.streaming.broker import TopicBroker - - proxy = await TopicBroker.spawn( - topic_name, - self.system, - system=self.system, - name=actor_name, - public=True, - ) - self._topics[topic_name] = proxy.ref - logger.info(f"Created topic broker: {actor_name}") - return self._topics[topic_name] + return await self._get_or_create( + self._topics, topic_name, f"_topic_broker_{topic_name}", spawn + ) # ========== Public Remote Methods ========== + async def _route_resource( + self, resource_key: str, ready_type: str, extra_ready: dict, create_fn + ) -> dict: + """Common routing logic: check ownership via consistent hashing, create locally or redirect.""" + members = await self._refresh_members() + owner_node_id = _compute_owner(resource_key, members) + local_node_id = str(self.system.node_id.id) + + if owner_node_id is None or str(owner_node_id) == local_node_id: + ref = await create_fn() + return { + "_type": ready_type, + "actor_id": str(ref.actor_id.id), + "node_id": local_node_id, + **extra_ready, + } + + owner_addr = None + for m in members: + m_node_id = m.get("node_id") + if m_node_id is not None and str(m_node_id) == str(owner_node_id): + owner_addr = m.get("addr") + break + return { + "_type": "Redirect", + "owner_node_id": str(owner_node_id), + "owner_addr": owner_addr, + **extra_ready, + } + async def get_bucket( self, topic: str, @@ -214,84 +234,22 @@ async def get_bucket( backend: str | None = None, backend_options: dict | None = None, ) -> dict: - """Get bucket reference. - - Returns: - - {"_type": "BucketReady", "topic": ..., "bucket_id": ..., "actor_id": ..., "node_id": ...} - - {"_type": "Redirect", "topic": ..., "bucket_id": ..., "owner_node_id": ..., "owner_addr": ...} - """ - # Compute owner - bucket_key = self._bucket_key(topic, bucket_id) - members = await self._refresh_members() - owner_node_id = _compute_owner(bucket_key, members) - local_node_id = str(self.system.node_id.id) - - if owner_node_id is None or str(owner_node_id) == local_node_id: - # This node is responsible, create/return bucket - bucket_ref = await self._get_or_create_bucket( + return await self._route_resource( + self._bucket_key(topic, bucket_id), + "BucketReady", + {"topic": topic, "bucket_id": bucket_id}, + lambda: self._get_or_create_bucket( topic, bucket_id, batch_size, storage_path, backend, backend_options - ) - return { - "_type": "BucketReady", - "topic": topic, - "bucket_id": bucket_id, - "actor_id": str(bucket_ref.actor_id.id), - "node_id": str(local_node_id), - } - else: - # Not owned by this node, return redirect - owner_addr = None - for m in members: - m_node_id = m.get("node_id") - if m_node_id is not None and str(m_node_id) == str(owner_node_id): - owner_addr = m.get("addr") - break - - return { - "_type": "Redirect", - "topic": topic, - "bucket_id": bucket_id, - "owner_node_id": str(owner_node_id), - "owner_addr": owner_addr, - } + ), + ) async def get_topic(self, topic: str) -> dict: - """Get topic broker reference. - - Returns: - - {"_type": "TopicReady", "topic": ..., "actor_id": ..., "node_id": ...} - - {"_type": "Redirect", "topic": ..., "owner_node_id": ..., "owner_addr": ...} - """ - # Compute owner - topic_key = self._topic_key(topic) - members = await self._refresh_members() - owner_node_id = _compute_owner(topic_key, members) - local_node_id = str(self.system.node_id.id) - - if owner_node_id is None or str(owner_node_id) == local_node_id: - # This node is responsible, create/return topic broker - broker_ref = await self._get_or_create_topic_broker(topic) - return { - "_type": "TopicReady", - "topic": topic, - "actor_id": str(broker_ref.actor_id.id), - "node_id": str(local_node_id), - } - else: - # Not owned by this node, return redirect - owner_addr = None - for m in members: - m_node_id = m.get("node_id") - if m_node_id is not None and str(m_node_id) == str(owner_node_id): - owner_addr = m.get("addr") - break - - return { - "_type": "Redirect", - "topic": topic, - "owner_node_id": str(owner_node_id), - "owner_addr": owner_addr, - } + return await self._route_resource( + self._topic_key(topic), + "TopicReady", + {"topic": topic}, + lambda: self._get_or_create_topic_broker(topic), + ) async def list_buckets(self) -> list[dict]: """List all buckets managed by this node. diff --git a/python/pulsing/streaming/sync_queue.py b/python/pulsing/streaming/sync_queue.py index 1af1fc3af..1222093c0 100644 --- a/python/pulsing/streaming/sync_queue.py +++ b/python/pulsing/streaming/sync_queue.py @@ -13,6 +13,14 @@ from .queue import Queue, QueueReader +def _run_sync(loop: asyncio.AbstractEventLoop | None, coro): + if loop is None or not loop.is_running(): + raise RuntimeError( + "Event loop not running. Sync wrapper requires a running event loop." + ) + return asyncio.run_coroutine_threadsafe(coro, loop).result() + + class SyncQueue: """Synchronous queue wrapper""" @@ -20,16 +28,8 @@ def __init__(self, queue: "Queue"): self._queue = queue self._loop = queue._loop - def _run(self, coro): - """Run coroutine synchronously""" - if self._loop is None or not self._loop.is_running(): - raise RuntimeError( - "Event loop not running. Sync wrapper requires a running event loop." - ) - return asyncio.run_coroutine_threadsafe(coro, self._loop).result() - def put(self, record: dict[str, Any] | list[dict[str, Any]]): - return self._run(self._queue.put(record)) + return _run_sync(self._loop, self._queue.put(record)) def get( self, @@ -39,13 +39,15 @@ def get( wait: bool = False, timeout: float | None = None, ) -> list[dict[str, Any]]: - return self._run(self._queue.get(bucket_id, limit, offset, wait, timeout)) + return _run_sync( + self._loop, self._queue.get(bucket_id, limit, offset, wait, timeout) + ) def flush(self) -> None: - self._run(self._queue.flush()) + _run_sync(self._loop, self._queue.flush()) def stats(self) -> dict[str, Any]: - return self._run(self._queue.stats()) + return _run_sync(self._loop, self._queue.stats()) class SyncQueueReader: @@ -55,15 +57,8 @@ def __init__(self, reader: "QueueReader"): self._reader = reader self._loop = reader.queue._loop - def _run(self, coro): - if self._loop is None or not self._loop.is_running(): - raise RuntimeError( - "Event loop not running. Sync wrapper requires a running event loop." - ) - return asyncio.run_coroutine_threadsafe(coro, self._loop).result() - def get(self, limit: int = 100, wait: bool = False, timeout: float | None = None): - return self._run(self._reader.get(limit, wait, timeout)) + return _run_sync(self._loop, self._reader.get(limit, wait, timeout)) def reset(self) -> None: self._reader.reset() diff --git a/tests/python/core/test_init_coverage.py b/tests/python/core/test_init_coverage.py index fef24c9b6..ccfd6c56d 100644 --- a/tests/python/core/test_init_coverage.py +++ b/tests/python/core/test_init_coverage.py @@ -7,7 +7,6 @@ - init() idempotency (double init returns same system) - shutdown() when no system - Actor base class -- ask_with_timeout / tell_with_timeout """ import asyncio @@ -115,46 +114,6 @@ class BadActor(Actor): BadActor() -# ============================================================================ -# ask_with_timeout / tell_with_timeout -# ============================================================================ - - -class TestTimeoutUtilities: - @pytest.mark.asyncio - async def test_ask_with_timeout_success(self): - from pulsing.core import ask_with_timeout - - system = await init() - try: - system_ref = await system.system() - result = await ask_with_timeout( - system_ref, - Message.from_json("SystemMessage", {"type": "Ping"}), - timeout=5.0, - ) - assert result is not None - finally: - await shutdown() - - @pytest.mark.asyncio - async def test_ask_with_timeout_expired(self): - from pulsing.core import ask_with_timeout - - system = await init() - try: - system_ref = await system.system() - - class NeverRespond: - async def ask(self, msg): - await asyncio.sleep(100) - - with pytest.raises(asyncio.TimeoutError): - await ask_with_timeout(NeverRespond(), "msg", timeout=0.01) - finally: - await shutdown() - - # ============================================================================ # Module exports # ============================================================================ @@ -172,7 +131,6 @@ def test_core_exports(self): resolve, mount, unmount, - as_any, ) assert ActorClass is not None diff --git a/tests/python/core/test_remote_edge_cases.py b/tests/python/core/test_remote_edge_cases.py index 14a20de5a..e74329e68 100644 --- a/tests/python/core/test_remote_edge_cases.py +++ b/tests/python/core/test_remote_edge_cases.py @@ -693,14 +693,15 @@ def greet(self): @pytest.mark.asyncio async def test_actor_class_resolve_without_init(): - """Resolve without init raises RuntimeError.""" + """Resolve without init raises PulsingRuntimeError.""" + from pulsing.exceptions import PulsingRuntimeError @remote class NeverResolved: def ping(self): return "pong" - with pytest.raises(RuntimeError, match="not initialized"): + with pytest.raises(PulsingRuntimeError, match="not initialized"): await NeverResolved.resolve("nonexistent") @@ -807,14 +808,13 @@ def incr(self): # ============================================================================ -# as_any top-level function +# ref.as_any() instance method # ============================================================================ @pytest.mark.asyncio -async def test_as_any_function(): - """Test the module-level as_any() function.""" - from pulsing.core import as_any +async def test_ref_as_any(): + """Test the ref.as_any() instance method.""" @remote class AsAnyActor: @@ -825,7 +825,7 @@ def greet(self): try: actor = await AsAnyActor.spawn(name="as_any_test", public=True) ref = await get_system().resolve("as_any_test") - proxy = as_any(ref) + proxy = ref.as_any() assert await proxy.greet() == "hi" finally: await shutdown() diff --git a/tests/python/core/test_remote_system_ops.py b/tests/python/core/test_remote_system_ops.py index ab44f009e..d16350b6d 100644 --- a/tests/python/core/test_remote_system_ops.py +++ b/tests/python/core/test_remote_system_ops.py @@ -1,8 +1,7 @@ -"""Tests for remote.py system operation helpers and legacy functions. +"""Tests for remote.py system operation helpers. -Covers: list_actors, get_metrics, get_node_info, health_check, ping, -resolve, SystemActorProxy, PythonActorServiceProxy, get_system_actor, -get_python_actor_service. +Covers: SystemActorProxy, PythonActorServiceProxy, get_system_actor, +get_python_actor_service, resolve. """ import asyncio @@ -13,66 +12,33 @@ # ============================================================================ -# Legacy helper functions (call SystemActor under the hood) +# SystemActorProxy operations # ============================================================================ @pytest.mark.asyncio -async def test_list_actors(): - from pulsing.core.remote import list_actors +async def test_system_actor_proxy_legacy_ops(): + """Test system operations via SystemActorProxy (replaces legacy helper functions).""" + from pulsing.core.remote import get_system_actor system = await init() try: - actors = await list_actors(system) - assert isinstance(actors, list) - finally: - await shutdown() - + proxy = await get_system_actor(system) -@pytest.mark.asyncio -async def test_get_metrics(): - from pulsing.core.remote import get_metrics + actors = await proxy.list_actors() + assert isinstance(actors, list) - system = await init() - try: - metrics = await get_metrics(system) + metrics = await proxy.get_metrics() assert isinstance(metrics, dict) - finally: - await shutdown() - -@pytest.mark.asyncio -async def test_get_node_info(): - from pulsing.core.remote import get_node_info - - system = await init() - try: - info = await get_node_info(system) + info = await proxy.get_node_info() assert isinstance(info, dict) - finally: - await shutdown() - - -@pytest.mark.asyncio -async def test_health_check(): - from pulsing.core.remote import health_check - system = await init() - try: - result = await health_check(system) + result = await proxy.health_check() assert isinstance(result, dict) - finally: - await shutdown() - -@pytest.mark.asyncio -async def test_ping(): - from pulsing.core.remote import ping - - system = await init() - try: - result = await ping(system) - assert isinstance(result, dict) + pong = await proxy.ping() + assert isinstance(pong, dict) finally: await shutdown() @@ -201,8 +167,9 @@ def echo(self, msg): @pytest.mark.asyncio async def test_resolve_without_init(): from pulsing.core.remote import resolve + from pulsing.exceptions import PulsingRuntimeError - with pytest.raises(RuntimeError, match="not initialized"): + with pytest.raises(PulsingRuntimeError, match="not initialized"): await resolve("anything") diff --git a/tests/python/core/test_remote_unit.py b/tests/python/core/test_remote_unit.py index 0359507b4..d2eb0ea37 100644 --- a/tests/python/core/test_remote_unit.py +++ b/tests/python/core/test_remote_unit.py @@ -319,13 +319,13 @@ class actor_id: proxy = ActorProxy(ref) assert proxy.ref is ref - def test_from_ref(self): + def test_constructor_with_methods(self): class FakeRef: class actor_id: id = 1 ref = FakeRef() - proxy = ActorProxy.from_ref(ref, methods=["a", "b"], async_methods={"b"}) + proxy = ActorProxy(ref, method_names=["a", "b"], async_methods={"b"}) assert "a" in proxy._method_names assert "b" in proxy._async_methods diff --git a/tests/python/streaming/test_topic.py b/tests/python/streaming/test_topic.py index 08f64e195..37ba8c528 100644 --- a/tests/python/streaming/test_topic.py +++ b/tests/python/streaming/test_topic.py @@ -908,65 +908,45 @@ async def receive(self, msg): @pytest.mark.asyncio async def test_ask_with_timeout_success(actor_system): - """Test ask_with_timeout helper function (success case).""" - from pulsing.core import Actor, ActorId, ask_with_timeout + """Test ask with asyncio.wait_for (success case).""" + from pulsing.core import Actor, ActorId class EchoActor(Actor): - def on_start(self, actor_id: ActorId) -> None: - pass - - def on_stop(self) -> None: - pass - async def receive(self, msg): return {"echo": msg} echo = EchoActor() ref = await actor_system.spawn(echo, name="echo_timeout_test") - # ask_with_timeout success scenario - result = await ask_with_timeout(ref, {"hello": "world"}, timeout=5.0) + result = await asyncio.wait_for(ref.ask({"hello": "world"}), timeout=5.0) assert result["echo"]["hello"] == "world" @pytest.mark.asyncio async def test_ask_with_timeout_error(actor_system): - """Test ask_with_timeout raises TimeoutError when timeout expires.""" - from pulsing.core import Actor, ActorId, ask_with_timeout + """Test ask raises TimeoutError when timeout expires.""" + from pulsing.core import Actor, ActorId class SlowActor(Actor): - def on_start(self, actor_id: ActorId) -> None: - pass - - def on_stop(self) -> None: - pass - async def receive(self, msg): - await asyncio.sleep(5.0) # Intentionally slow + await asyncio.sleep(5.0) return {"done": True} slow = SlowActor() ref = await actor_system.spawn(slow, name="slow_timeout_test") - # ask_with_timeout timeout scenario with pytest.raises(asyncio.TimeoutError): - await ask_with_timeout(ref, {"hello": "world"}, timeout=0.1) + await asyncio.wait_for(ref.ask({"hello": "world"}), timeout=0.1) @pytest.mark.asyncio async def test_tell_with_timeout_success(actor_system): - """Test tell_with_timeout helper function (success case).""" - from pulsing.core import Actor, ActorId, tell_with_timeout + """Test tell with asyncio.wait_for (success case).""" + from pulsing.core import Actor, ActorId received = [] class CollectorActor(Actor): - def on_start(self, actor_id: ActorId) -> None: - pass - - def on_stop(self) -> None: - pass - async def receive(self, msg): received.append(msg) return None @@ -974,10 +954,8 @@ async def receive(self, msg): collector = CollectorActor() ref = await actor_system.spawn(collector, name="collector_timeout_test") - # tell_with_timeout success scenario (fire-and-forget doesn't wait for response) - await tell_with_timeout(ref, {"hello": "world"}, timeout=5.0) + await asyncio.wait_for(ref.tell({"hello": "world"}), timeout=5.0) - # Wait for message processing await asyncio.sleep(0.1) assert len(received) == 1 @@ -991,15 +969,6 @@ async def test_default_publish_timeout(): assert DEFAULT_PUBLISH_TIMEOUT == 30.0 -@pytest.mark.asyncio -async def test_default_ask_timeout(): - """Test that DEFAULT_ASK_TIMEOUT is reasonable.""" - from pulsing.core import DEFAULT_ASK_TIMEOUT - - # Default timeout should be a reasonable value (30 seconds) - assert DEFAULT_ASK_TIMEOUT == 30.0 - - # ============================================================================ # Subscriber Lifecycle Tests (P0-3 Fix Verification) # ============================================================================ diff --git a/tests/python/test_actor_list.py b/tests/python/test_actor_list.py index f2980d3b1..00d82a88a 100644 --- a/tests/python/test_actor_list.py +++ b/tests/python/test_actor_list.py @@ -3,7 +3,8 @@ import asyncio import pytest import json -from pulsing.core import init, remote, get_system, list_actors +from pulsing.core import init, remote, get_system +from pulsing.core.remote import get_system_actor from pulsing.cli.inspect import _print_actors_table import io import sys diff --git a/tests/python/test_remote_decorator.py b/tests/python/test_remote_decorator.py index aca7d26ab..156477268 100644 --- a/tests/python/test_remote_decorator.py +++ b/tests/python/test_remote_decorator.py @@ -48,7 +48,7 @@ def valid_method(self): # Dynamic proxy (no method list) allows any method system = get_system() raw_ref = await system.resolve("my_service") - dynamic_proxy = ActorProxy.from_ref(raw_ref) + dynamic_proxy = ActorProxy(raw_ref) # This creates the method caller but will fail on actual call caller = dynamic_proxy.any_method_name @@ -113,13 +113,13 @@ async def will_fail(self): # ============================================================================ -# ActorProxy.from_ref Tests +# ActorProxy constructor tests # ============================================================================ @pytest.mark.asyncio -async def test_actor_proxy_from_ref_dynamic_mode(): - """Test ActorProxy.from_ref in dynamic mode (no method list).""" +async def test_actor_proxy_dynamic_mode(): + """Test ActorProxy in dynamic mode (no method list).""" from pulsing.core import init, shutdown, remote, ActorProxy, get_system @remote @@ -138,10 +138,8 @@ def method_b(self): system = get_system() raw_ref = await system.resolve("dynamic_svc") - # Dynamic mode - any method name is allowed - proxy = ActorProxy.from_ref(raw_ref) + proxy = ActorProxy(raw_ref) - # These should work assert await proxy.method_a() == "a" assert await proxy.method_b() == "b" @@ -150,8 +148,8 @@ def method_b(self): @pytest.mark.asyncio -async def test_actor_proxy_from_ref_with_async_methods(): - """Test ActorProxy.from_ref with explicit async_methods set.""" +async def test_actor_proxy_with_async_methods(): + """Test ActorProxy with explicit async_methods set.""" from pulsing.core import init, shutdown, remote, ActorProxy, get_system @remote @@ -171,17 +169,14 @@ async def async_method(self): system = get_system() raw_ref = await system.resolve("hybrid_svc") - # Create proxy with async method info - proxy = ActorProxy.from_ref( + proxy = ActorProxy( raw_ref, - methods=["sync_method", "async_method"], + method_names=["sync_method", "async_method"], async_methods={"async_method"}, ) - # Sync method assert await proxy.sync_method() == "sync" - # Async method result = await proxy.async_method() assert result == "async" diff --git a/tests/python/test_resolve_as_any.py b/tests/python/test_resolve_as_any.py index e90077c20..d5fd74a70 100644 --- a/tests/python/test_resolve_as_any.py +++ b/tests/python/test_resolve_as_any.py @@ -1,11 +1,10 @@ """ -Tests for resolve().as_any() / .as_type() and as_any(ref): proxy generation on ActorRef. +Tests for resolve().as_any() / .as_type(): proxy generation on ActorRef. Covers: - resolve(name) returns ActorRef with .as_any() and .as_type() - ref.as_any() returns an untyped proxy - ref.as_type(cls) returns a typed proxy -- as_any(ref) function works with ref from resolve() or raw ActorRef - typed_proxy.as_any() returns an any proxy with the same underlying ref - ref.ask() / ref.tell() still work (backward compatibility) """ @@ -17,7 +16,7 @@ from pulsing.exceptions import PulsingRuntimeError import pulsing as pul -from pulsing.core import Actor, ActorRef, as_any, remote +from pulsing.core import Actor, ActorRef, remote # ============================================================================ @@ -149,32 +148,32 @@ async def test_as_any_proxy_method_with_args(initialized_pul): # ============================================================================ -# Test: as_any(ref) function +# Test: ref.as_any() instance method # ============================================================================ @pytest.mark.asyncio -async def test_as_any_function_with_ref_from_resolve(initialized_pul): - """as_any(ref) works when ref is from pul.resolve().""" +async def test_as_any_with_ref_from_resolve(initialized_pul): + """ref.as_any() works when ref is from pul.resolve().""" await _ServiceWithMethods.spawn(name="as_any_fn_svc", public=True) ref = await pul.resolve("as_any_fn_svc") - proxy = as_any(ref) + proxy = ref.as_any() result = await proxy.get_value() assert result == 0 @pytest.mark.asyncio -async def test_as_any_function_with_raw_ref(initialized_pul): - """as_any(ref) works when ref is raw ActorRef from system.resolve().""" +async def test_as_any_with_raw_ref(initialized_pul): + """ref.as_any() works when ref is raw ActorRef from system.resolve().""" from pulsing.core import get_system await _ServiceWithMethods.spawn(name="as_any_raw_svc", public=True) system = get_system() raw_ref = await system.resolve("as_any_raw_svc") - proxy = as_any(raw_ref) + proxy = raw_ref.as_any() result = await proxy.get_value() assert result == 0 From aa69ea17906def716330d3be12140026fc5e6a14 Mon Sep 17 00:00:00 2001 From: Reiase Date: Sun, 1 Mar 2026 15:00:20 +0800 Subject: [PATCH 3/5] Refactor Pulsing framework to enhance API clarity and actor management - Removed unused imports and consolidated actor service references in `__init__.py` for improved readability. - Updated `_GlobalQueueAPI` and `_GlobalTopicAPI` to utilize direct queue and topic writing/reading functions, enhancing clarity. - Introduced new `protocol.py` and `proxy.py` files to encapsulate message serialization and actor proxy functionalities, streamlining the codebase. - Refactored `remote.py` to remove redundant methods and improve organization, focusing on actor lifecycle management. - Enhanced `service.py` with a dedicated `PythonActorService` for better actor creation handling and improved error management. - Updated documentation and tests to reflect changes in actor management and API structure, ensuring robust functionality. --- python/pulsing/__init__.py | 24 +- python/pulsing/core/__init__.py | 48 +- python/pulsing/core/protocol.py | 94 ++ python/pulsing/core/proxy.py | 193 ++++ python/pulsing/core/remote.py | 824 ++---------------- python/pulsing/core/service.py | 222 +++++ python/pulsing/integrations/__init__.py | 4 - .../pulsing/integrations/autogen/runtime.py | 24 +- .../integrations/langgraph/executor.py | 2 +- .../pulsing/integrations/langgraph/wrapper.py | 2 +- python/pulsing/integrations/ray_compat.py | 372 -------- python/pulsing/serving/router.py | 59 +- python/pulsing/serving/scheduler.py | 91 +- python/pulsing/streaming/backend.py | 47 +- python/pulsing/streaming/manager.py | 105 +-- python/pulsing/streaming/storage.py | 23 +- tests/python/apis/ray_compat/__init__.py | 1 - .../apis/ray_compat/test_ray_compat_api.py | 284 ------ tests/python/core/test_init_coverage.py | 24 - tests/python/core/test_remote_edge_cases.py | 12 +- tests/python/core/test_remote_system_ops.py | 211 ++--- tests/python/core/test_remote_unit.py | 49 +- .../test_ray_compat_running_loop.py | 31 - tests/python/streaming/test_queue.py | 382 +------- tests/python/streaming/test_queue_stress.py | 387 ++++++++ 25 files changed, 1229 insertions(+), 2286 deletions(-) create mode 100644 python/pulsing/core/protocol.py create mode 100644 python/pulsing/core/proxy.py create mode 100644 python/pulsing/core/service.py delete mode 100644 python/pulsing/integrations/ray_compat.py delete mode 100644 tests/python/apis/ray_compat/__init__.py delete mode 100644 tests/python/apis/ray_compat/test_ray_compat_api.py delete mode 100644 tests/python/integrations/test_ray_compat_running_loop.py create mode 100644 tests/python/streaming/test_queue_stress.py diff --git a/python/pulsing/__init__.py b/python/pulsing/__init__.py index 4305fb7bb..fa8e9f62a 100644 --- a/python/pulsing/__init__.py +++ b/python/pulsing/__init__.py @@ -42,8 +42,6 @@ def incr(self): self.value += 1; return self.value ActorRef, ActorId, ActorProxy, - Message, - StreamMessage, SystemConfig, # Service (internal, used by actor_system()) PythonActorService as _PythonActorService, @@ -279,32 +277,28 @@ class _GlobalQueueAPI: """Lazy proxy for pul.queue that uses the global system.""" async def write(self, topic, **kwargs): - """Open queue for writing. See QueueAPI.write() for args.""" - from pulsing.streaming import QueueAPI + from pulsing.streaming import write_queue - return await QueueAPI(get_system()).write(topic, **kwargs) + return await write_queue(get_system(), topic, **kwargs) async def read(self, topic, **kwargs): - """Open queue for reading. See QueueAPI.read() for args.""" - from pulsing.streaming import QueueAPI + from pulsing.streaming import read_queue - return await QueueAPI(get_system()).read(topic, **kwargs) + return await read_queue(get_system(), topic, **kwargs) class _GlobalTopicAPI: """Lazy proxy for pul.topic that uses the global system.""" async def write(self, topic, **kwargs): - """Open topic for writing. See TopicAPI.write() for args.""" - from pulsing.streaming import TopicAPI + from pulsing.streaming import write_topic - return await TopicAPI(get_system()).write(topic, **kwargs) + return await write_topic(get_system(), topic, **kwargs) async def read(self, topic, **kwargs): - """Open topic for reading. See TopicAPI.read() for args.""" - from pulsing.streaming import TopicAPI + from pulsing.streaming import read_topic - return await TopicAPI(get_system()).read(topic, **kwargs) + return await read_topic(get_system(), topic, **kwargs) queue = _GlobalQueueAPI() @@ -346,8 +340,6 @@ async def read(self, topic, **kwargs): "ActorRef", "ActorId", "ActorProxy", - "Message", - "StreamMessage", # Exceptions "PulsingError", "PulsingRuntimeError", diff --git a/python/pulsing/core/__init__.py b/python/pulsing/core/__init__.py index f2f4e5a6a..1bbd8ca06 100644 --- a/python/pulsing/core/__init__.py +++ b/python/pulsing/core/__init__.py @@ -15,9 +15,6 @@ def incr(self): self.value += 1; return self.value result = await counter.incr() await shutdown() - -Advanced API: - from pulsing.core import ActorSystem, Actor, Message, SystemConfig """ import asyncio @@ -27,13 +24,15 @@ def incr(self): self.value += 1; return self.value ActorRef, ActorSystem, NodeId, - ZeroCopyDescriptor, StreamReader, StreamWriter, SystemConfig, + ZeroCopyDescriptor, ) -from .messaging import Message, StreamMessage - +from .messaging import ( + Message, + StreamMessage, +) # internal: used by service.py / integrations # ============================================================================= # Global system for simple API @@ -61,19 +60,6 @@ async def init( Returns: ActorSystem instance - - Example: - # Standalone mode - await init() - - # Cluster mode (Gossip + seed) - await init(addr="0.0.0.0:8001", seeds=["192.168.1.1:8000"]) - - # Head node - await init(addr="0.0.0.0:8000", is_head_node=True) - - # Worker node - await init(addr="0.0.0.0:8001", head_addr="192.168.1.1:8000") """ global _global_system @@ -83,7 +69,6 @@ async def init( if is_head_node and head_addr: raise ValueError("Cannot set both is_head_node and head_addr") - # Build config if addr: config = SystemConfig.with_addr(addr) else: @@ -101,7 +86,6 @@ async def init( loop = asyncio.get_running_loop() _global_system = await ActorSystem.create(config, loop) - # Automatically register PythonActorService for remote actor creation service = PythonActorService(_global_system) await _global_system.spawn(service, name=PYTHON_ACTOR_SERVICE_NAME, public=True) @@ -133,24 +117,25 @@ def is_initialized() -> bool: return _global_system is not None -from . import helpers -from .remote import ( - PYTHON_ACTOR_SERVICE_NAME, +from . import helpers # noqa: E402 +from .helpers import mount, unmount # noqa: E402 +from .proxy import ActorProxy # noqa: E402 +from .remote import ( # noqa: E402 Actor, ActorClass, - ActorProxy, + remote, + resolve, +) +from .service import ( # noqa: E402 + PYTHON_ACTOR_SERVICE_NAME, PythonActorService, PythonActorServiceProxy, SystemActorProxy, get_python_actor_service, get_system_actor, - remote, - resolve, ) -from .helpers import mount, unmount -# Import exceptions for convenience -from pulsing.exceptions import ( +from pulsing.exceptions import ( # noqa: E402 PulsingError, PulsingRuntimeError, PulsingActorError, @@ -167,15 +152,12 @@ def is_initialized() -> bool: "get_system_actor", "is_initialized", "Actor", - "Message", - "StreamMessage", "SystemConfig", "ActorSystem", "ActorRef", "ActorId", "ActorProxy", "SystemActorProxy", - "ZeroCopyDescriptor", "PulsingError", "PulsingRuntimeError", "PulsingActorError", diff --git a/python/pulsing/core/protocol.py b/python/pulsing/core/protocol.py new file mode 100644 index 000000000..b270d22cf --- /dev/null +++ b/python/pulsing/core/protocol.py @@ -0,0 +1,94 @@ +"""Wire protocol — message serialization helpers for the actor call/response format.""" + +import asyncio +import logging +import uuid +from typing import Any + +from pulsing._core import Message +from pulsing.exceptions import PulsingActorError + +logger = logging.getLogger(__name__) + + +def _consume_task_exception(task: asyncio.Task) -> None: + """Consume exception from background task to avoid 'Task exception was never retrieved'.""" + try: + task.result() + except asyncio.CancelledError: + pass + except (RuntimeError, OSError, ConnectionError) as e: + if "closed" in str(e).lower() or "stream" in str(e).lower(): + logger.debug("Stream closed before response: %s", e) + else: + logger.exception("Stream task failed: %s", e) + except Exception: + logger.exception("Stream task failed") + + +def _wrap_call(method: str, args: tuple, kwargs: dict, is_async: bool) -> dict: + """Build a flat call message. + + Format: {"__call__": method, "__async__": bool, "args": (...), "kwargs": {...}} + """ + return {"__call__": method, "__async__": is_async, "args": args, "kwargs": kwargs} + + +def _unwrap_call(msg: dict) -> tuple[str, tuple, dict, bool]: + """Parse a flat call message. Returns (method, args, kwargs, is_async).""" + return ( + msg.get("__call__", ""), + tuple(msg.get("args", ())), + dict(msg.get("kwargs", {})), + msg.get("__async__", False), + ) + + +def _wrap_response(result: Any = None, error: str | None = None) -> dict: + """Build a flat response message.""" + if error: + return {"__error__": error} + return {"__result__": result} + + +def _unwrap_response(resp: dict) -> tuple[Any, str | None]: + """Parse a response. Returns (result, error) — one will be None. + + Accepts flat format and Rust actor JSON ({"result": ...} / {"error": ...}). + """ + if "__error__" in resp: + return (None, resp["__error__"]) + if "__result__" in resp: + return (resp["__result__"], None) + if "error" in resp: + return (None, resp["error"]) + if "result" in resp: + return (resp["result"], None) + return (None, None) + + +def _check_response(resp, ref) -> Any: + """Unwrap response, raise PulsingActorError on errors, return result. + + Handles dict responses from Python actors and Message responses (streaming) + from the Rust runtime transparently. + """ + if isinstance(resp, Message): + if resp.is_stream: + return resp + resp = resp.to_json() + if isinstance(resp, dict): + result, error = _unwrap_response(resp) + if error: + raise PulsingActorError(error, actor_name=str(ref.actor_id.id)) + return result + return resp + + +def _normalize_actor_name(cls_name: str, name: str | None) -> str: + """Build actor path from optional name and class name.""" + if name and "/" in name: + return name + if name: + return f"actors/{name}" + return f"actors/{cls_name}_{uuid.uuid4().hex[:8]}" diff --git a/python/pulsing/core/proxy.py b/python/pulsing/core/proxy.py new file mode 100644 index 000000000..b25878ef0 --- /dev/null +++ b/python/pulsing/core/proxy.py @@ -0,0 +1,193 @@ +"""Caller-side proxy classes for actor method invocation.""" + +import asyncio +from typing import Any + +from pulsing._core import ActorRef, Message +from pulsing.exceptions import PulsingActorError + +from .protocol import _check_response, _wrap_call + + +class ActorProxy: + """Actor proxy.""" + + def __init__( + self, + actor_ref: ActorRef, + method_names: list[str] | None = None, + async_methods: set[str] | None = None, + ): + self._ref = actor_ref + self._method_names = set(method_names) if method_names else None + self._async_methods = async_methods + + def __getattr__(self, name: str): + if name.startswith("_"): + raise AttributeError(f"Cannot access private attribute: {name}") + if self._method_names is not None and name not in self._method_names: + raise AttributeError(f"No method '{name}'") + is_async = self._async_methods is None or name in self._async_methods + return _MethodCaller(self._ref, name, is_async=is_async) + + def as_any(self) -> "ActorProxy": + """Return an untyped proxy that forwards any method call to the remote actor.""" + return ActorProxy(self._ref, method_names=None, async_methods=None) + + @property + def ref(self) -> ActorRef: + """Get underlying ActorRef.""" + return self._ref + + +class _MethodCaller: + """Method caller. Supports two usage patterns: + - await proxy.method(args) — method call + - await proxy.attr — attribute access (no args) + """ + + def __init__(self, actor_ref: ActorRef, method_name: str, is_async: bool = False): + self._ref = actor_ref + self._method = method_name + self._is_async = is_async + + def __call__(self, *args, **kwargs): + if self._is_async: + return _AsyncMethodCall(self._ref, self._method, args, kwargs) + else: + return self._sync_call(*args, **kwargs) + + def __await__(self): + """Support await proxy.attr for direct attribute access""" + return self().__await__() + + async def _sync_call(self, *args, **kwargs) -> Any: + """Synchronous method call.""" + call_msg = _wrap_call(self._method, args, kwargs, False) + resp = await self._ref.ask(call_msg) + result = _check_response(resp, self._ref) + if isinstance(result, Message) and result.is_stream: + return _AsyncMethodCall.from_message(self._ref, result) + return result + + +class _AsyncMethodCall: + """Async method call — supports await (final result) and async for (stream). + + Usage: + result = await actor.generate("hello") # get final result + async for chunk in actor.generate("hello"): # stream chunks + print(chunk) + """ + + def __init__( + self, actor_ref: ActorRef, method_name: str, args: tuple, kwargs: dict + ): + self._ref = actor_ref + self._method = method_name + self._args = args + self._kwargs = kwargs + self._stream_reader = None + self._final_result = None + self._got_result = False + + @classmethod + def from_message(cls, ref: ActorRef, message: Message) -> "_AsyncMethodCall": + """Build from a pre-acquired streaming Message (sync generator return path).""" + obj = cls.__new__(cls) + obj._ref = ref + obj._method = "" + obj._args = () + obj._kwargs = {} + obj._stream_reader = message.stream_reader() + obj._final_result = None + obj._got_result = False + return obj + + async def _ensure_stream(self) -> None: + """Send RPC and resolve the response. + + For streaming responses, initialises _stream_reader. + For direct responses (non-streaming), resolves _final_result immediately + so __anext__ can stop without an extra iterator allocation. + """ + if self._stream_reader is not None or self._got_result: + return + + call_msg = _wrap_call(self._method, self._args, self._kwargs, True) + resp = await self._ref.ask(call_msg) + result = _check_response(resp, self._ref) + + if isinstance(result, Message) and result.is_stream: + self._stream_reader = result.stream_reader() + else: + self._final_result = result + self._got_result = True + + def __aiter__(self): + return self + + async def __anext__(self): + await self._ensure_stream() + if self._got_result: + raise StopAsyncIteration + try: + item = await self._stream_reader.__anext__() + if isinstance(item, dict): + if "__error__" in item: + raise PulsingActorError( + item["__error__"], actor_name=str(self._ref.actor_id.id) + ) + if item.get("__final__"): + self._final_result = item.get("__result__") + self._got_result = True + raise StopAsyncIteration + if "__yield__" in item: + return item["__yield__"] + return item + except StopAsyncIteration: + raise + + def __await__(self): + return self._await_result().__await__() + + async def _await_result(self): + async for _ in self: + pass + if self._got_result: + return self._final_result + return None + + +class _DelayedCallProxy: + """Proxy returned by ``self.delayed(sec)`` — any method call becomes a delayed message to self. + + Usage inside a @remote class:: + + task = self.delayed(5.0).some_method(arg1, arg2) + task.cancel() # cancel if needed + + Returns an ``asyncio.Task`` that fires after the delay. + """ + + __slots__ = ("_ref", "_delay_sec") + + def __init__(self, ref: ActorRef, delay_sec: float): + self._ref = ref + self._delay_sec = delay_sec + + def __getattr__(self, name: str): + if name.startswith("_"): + raise AttributeError(name) + + def caller(*args, **kwargs): + msg = _wrap_call(name, args, kwargs, is_async=True) + delay = max(0.0, self._delay_sec) + + async def _send(): + await asyncio.sleep(delay) + await self._ref.tell(msg) + + return asyncio.create_task(_send()) + + return caller diff --git a/python/pulsing/core/remote.py b/python/pulsing/core/remote.py index 891b7b1fd..dc228e43b 100644 --- a/python/pulsing/core/remote.py +++ b/python/pulsing/core/remote.py @@ -1,4 +1,4 @@ -"""Ray-like distributed object wrapper.""" +"""@remote decorator, ActorClass, and actor lifecycle management.""" import asyncio import inspect @@ -11,133 +11,22 @@ from pulsing._core import ActorRef, ActorSystem, Message, StreamMessage from pulsing.exceptions import PulsingActorError, PulsingRuntimeError +from .protocol import ( + _consume_task_exception, + _normalize_actor_name, + _unwrap_call, + _wrap_call, + _wrap_response, +) +from .proxy import ActorProxy, _DelayedCallProxy -def _consume_task_exception(task: asyncio.Task) -> None: - """Consume exception from background task to avoid 'Task exception was never retrieved'.""" - try: - task.result() - except asyncio.CancelledError: - pass - except (RuntimeError, OSError, ConnectionError) as e: - if "closed" in str(e).lower() or "stream" in str(e).lower(): - logging.getLogger(__name__).debug("Stream closed before response: %s", e) - else: - logging.getLogger(__name__).exception("Stream task failed: %s", e) - except Exception: - logging.getLogger(__name__).exception("Stream task failed") - - -# Wire format version (single protocol) -_PULSING_WIRE_VERSION = "1" - - -def _wrap_call(method: str, args: tuple, kwargs: dict, is_async: bool) -> dict: - """Wrap method call for wire format (namespace isolation). - - Format: - { - "__pulsing_proto__": version, - "__pulsing__": { "call": method_name, "async": is_async }, - "user_data": { "args": args, "kwargs": kwargs } - } - """ - return { - "__pulsing_proto__": _PULSING_WIRE_VERSION, - "__pulsing__": { - "call": method, - "async": is_async, - }, - "user_data": { - "args": args, - "kwargs": kwargs, - }, - } - - -def _unwrap_call(msg: dict) -> tuple[str, tuple, dict, bool]: - """Unwrap call message. Returns (method_name, args, kwargs, is_async).""" - pulsing = msg.get("__pulsing__", {}) - user_data = msg.get("user_data", {}) - return ( - pulsing.get("call", ""), - tuple(user_data.get("args", ())), - dict(user_data.get("kwargs", {})), - pulsing.get("async", False), - ) - - -def _wrap_response(result: Any = None, error: str | None = None) -> dict: - """Wrap response for wire format.""" - if error: - return { - "__pulsing_proto__": _PULSING_WIRE_VERSION, - "__pulsing__": {"error": error}, - "user_data": {}, - } - return { - "__pulsing_proto__": _PULSING_WIRE_VERSION, - "__pulsing__": {"result": result}, - "user_data": {}, - } - - -def _unwrap_response(resp: dict) -> tuple[Any, str | None]: - """Unwrap response. Returns (result, error) - one of them will be None. - - Accepts: - - Wire format: {"__pulsing__": {"result": ..., "error": ...}} - - Message JSON: {"result": ..., "error": ...} (Rust actor responses) - """ - pulsing = resp.get("__pulsing__", {}) - if isinstance(pulsing, dict): - if "error" in pulsing: - return (None, pulsing["error"]) - if "result" in pulsing: - return (pulsing["result"], None) - if "error" in resp: - return (None, resp["error"]) - if "result" in resp: - return (resp["result"], None) - return (None, None) - - -def _check_response(resp, ref) -> Any: - """Unwrap response dict/Message, raise PulsingActorError on errors, return result.""" - if isinstance(resp, dict): - result, error = _unwrap_response(resp) - if error: - raise PulsingActorError(error, actor_name=str(ref.actor_id.id)) - return result - if isinstance(resp, Message): - if resp.is_stream: - return resp - data = resp.to_json() - if resp.msg_type == "Error": - raise PulsingActorError( - data.get("error", "Remote call failed"), - actor_name=str(ref.actor_id.id), - ) - if isinstance(data, dict): - result, error = _unwrap_response(data) - if error: - raise PulsingActorError(error, actor_name=str(ref.actor_id.id)) - if result is not None: - return result - return data.get("result") - return resp - return resp - - -def _normalize_actor_name(cls_name: str, name: str | None) -> str: - """Build actor path from optional name and class name.""" - if name and "/" in name: - return name - if name: - return f"actors/{name}" - return f"actors/{cls_name}_{uuid.uuid4().hex[:8]}" +logger = logging.getLogger(__name__) +T = TypeVar("T") -logger = logging.getLogger(__name__) +# ============================================================================ +# Actor base class +# ============================================================================ class Actor(ABC): @@ -158,7 +47,9 @@ async def receive(self, msg) -> Any: pass -T = TypeVar("T") +# ============================================================================ +# Actor class registry +# ============================================================================ _actor_class_registry: dict[str, type] = {} @@ -167,8 +58,6 @@ async def receive(self, msg) -> Any: def _register_actor_metadata(name: str, cls: type): """Register actor metadata for later retrieval.""" - import inspect - metadata = { "python_class": f"{cls.__module__}.{cls.__name__}", "python_module": cls.__module__, @@ -188,16 +77,10 @@ def get_actor_metadata(name: str) -> dict[str, str] | None: return _actor_metadata_registry.get(name) -def _extract_methods(cls: type) -> tuple[list[str], set[str]]: - """Extract public method names and async method set from a class. - - Handles @pul.remote ActorClass and Ray-wrapped classes by unwrapping first. - """ - # If it's an ActorClass (@pul.remote decorated), extract the original class +def _unwrap_class(cls) -> type: + """Unwrap ActorClass / Ray ActorClass to get the original user class.""" if isinstance(cls, ActorClass): - cls = cls._cls - - # If it's a Ray ActorClass, extract the original class + return cls._cls try: from ray.actor import ActorClass as RayActorClass @@ -205,10 +88,18 @@ def _extract_methods(cls: type) -> tuple[list[str], set[str]]: if hasattr(cls, "__ray_metadata__"): meta = cls.__ray_metadata__ if hasattr(meta, "modified_class"): - cls = meta.modified_class + return meta.modified_class except ImportError: pass + return cls + + +def _extract_methods(cls: type) -> tuple[list[str], set[str]]: + """Extract public method names and async method set from a class. + Handles @pul.remote ActorClass and Ray-wrapped classes by unwrapping first. + """ + cls = _unwrap_class(cls) methods = [] async_methods = set() for name, method in inspect.getmembers(cls, predicate=inspect.isfunction): @@ -220,195 +111,9 @@ def _extract_methods(cls: type) -> tuple[list[str], set[str]]: return methods, async_methods -PYTHON_ACTOR_SERVICE_NAME = "system/python_actor_service" - - -class ActorProxy: - """Actor proxy.""" - - def __init__( - self, - actor_ref: ActorRef, - method_names: list[str] | None = None, - async_methods: set[str] | None = None, - ): - self._ref = actor_ref - self._method_names = set(method_names) if method_names else None - # None means "any proxy": allow any method, treat all as async (streaming support) - self._async_methods = async_methods - - def __getattr__(self, name: str): - if name.startswith("_"): - raise AttributeError(f"Cannot access private attribute: {name}") - if self._method_names is not None and name not in self._method_names: - raise AttributeError(f"No method '{name}'") - # When _async_methods is None (any proxy), treat all methods as async - is_async = self._async_methods is None or name in self._async_methods - return _MethodCaller(self._ref, name, is_async=is_async) - - def as_any(self) -> "ActorProxy": - """Return an untyped proxy that forwards any method call to the remote actor.""" - return ActorProxy(self._ref, method_names=None, async_methods=None) - - @property - def ref(self) -> ActorRef: - """Get underlying ActorRef.""" - return self._ref - - -class _MethodCaller: - """Method caller. Supports two usage patterns: - - await proxy.method(args) — method call - - await proxy.attr — attribute access (no args) - """ - - def __init__(self, actor_ref: ActorRef, method_name: str, is_async: bool = False): - self._ref = actor_ref - self._method = method_name - self._is_async = is_async - - def __call__(self, *args, **kwargs): - if self._is_async: - return _AsyncMethodCall(self._ref, self._method, args, kwargs) - else: - return self._sync_call(*args, **kwargs) - - def __await__(self): - """Support await proxy.attr for direct attribute access""" - return self().__await__() - - async def _sync_call(self, *args, **kwargs) -> Any: - """Synchronous method call.""" - call_msg = _wrap_call(self._method, args, kwargs, False) - resp = await self._ref.ask(call_msg) - result = _check_response(resp, self._ref) - if isinstance(result, Message) and result.is_stream: - return _AsyncMethodCall.from_message(self._ref, result) - return result - - -class _AsyncMethodCall: - """Async method call — supports await (final result) and async for (stream). - - Usage: - result = await actor.generate("hello") # get final result - async for chunk in actor.generate("hello"): # stream chunks - print(chunk) - """ - - def __init__( - self, actor_ref: ActorRef, method_name: str, args: tuple, kwargs: dict - ): - self._ref = actor_ref - self._method = method_name - self._args = args - self._kwargs = kwargs - self._stream_reader = None - self._final_result = None - self._got_result = False - - @classmethod - def from_message(cls, ref: ActorRef, message: Message) -> "_AsyncMethodCall": - """Build from a pre-acquired streaming Message (sync generator return path).""" - obj = cls.__new__(cls) - obj._ref = ref - obj._method = "" - obj._args = () - obj._kwargs = {} - obj._stream_reader = message.stream_reader() - obj._final_result = None - obj._got_result = False - return obj - - async def _ensure_stream(self) -> None: - """Send RPC and resolve the response. - - For streaming responses, initialises _stream_reader. - For direct responses (non-streaming), resolves _final_result immediately - so __anext__ can stop without an extra iterator allocation. - """ - if self._stream_reader is not None or self._got_result: - return - - call_msg = _wrap_call(self._method, self._args, self._kwargs, True) - resp = await self._ref.ask(call_msg) - result = _check_response(resp, self._ref) - - if isinstance(result, Message) and result.is_stream: - self._stream_reader = result.stream_reader() - else: - self._final_result = result - self._got_result = True - - def __aiter__(self): - return self - - async def __anext__(self): - await self._ensure_stream() - if self._got_result: - raise StopAsyncIteration - try: - item = await self._stream_reader.__anext__() - if isinstance(item, dict): - pulsing = item.get("__pulsing__", {}) - if isinstance(pulsing, dict): - if "error" in pulsing: - raise PulsingActorError( - pulsing["error"], actor_name=str(self._ref.actor_id.id) - ) - if pulsing.get("final"): - self._final_result = pulsing.get("result") - self._got_result = True - raise StopAsyncIteration - if "__yield__" in item: - return item["__yield__"] - return item - except StopAsyncIteration: - raise - - def __await__(self): - return self._await_result().__await__() - - async def _await_result(self): - async for _ in self: - pass - if self._got_result: - return self._final_result - return None - - -class _DelayedCallProxy: - """Proxy returned by ``self.delayed(sec)`` — any method call becomes a delayed message to self. - - Usage inside a @remote class:: - - task = self.delayed(5.0).some_method(arg1, arg2) - task.cancel() # cancel if needed - - Returns an ``asyncio.Task`` that fires after the delay. - """ - - __slots__ = ("_ref", "_delay_sec") - - def __init__(self, ref: ActorRef, delay_sec: float): - self._ref = ref - self._delay_sec = delay_sec - - def __getattr__(self, name: str): - if name.startswith("_"): - raise AttributeError(name) - - def caller(*args, **kwargs): - msg = _wrap_call(name, args, kwargs, is_async=True) - delay = max(0.0, self._delay_sec) - - async def _send(): - await asyncio.sleep(delay) - await self._ref.tell(msg) - - return asyncio.create_task(_send()) - - return caller +# ============================================================================ +# _WrappedActor — wraps a user class instance as an Actor +# ============================================================================ class _WrappedActor(Actor): @@ -416,22 +121,18 @@ class _WrappedActor(Actor): def __init__(self, instance: Any): self._instance = instance - # Store original class info for metadata extraction self._original_class = instance.__class__ @property def __original_module__(self): - """Return original class module for Rust metadata extraction""" return self._original_class.__module__ @property def __original_qualname__(self): - """Return original class qualified name for Rust metadata extraction""" return self._original_class.__qualname__ @property def __original_file__(self): - """Return original class file path for Rust metadata extraction""" try: return inspect.getfile(self._original_class) except (TypeError, OSError): @@ -444,7 +145,6 @@ def _inject_delayed(self, actor_ref: ActorRef) -> None: ) def on_start(self, actor_id): - """调用用户 on_start;若为 async 则返回 coroutine 供 Rust 端 run_coroutine_threadsafe 执行。""" if hasattr(self._instance, "on_start"): r = self._instance.on_start(actor_id) if asyncio.iscoroutine(r): @@ -452,7 +152,6 @@ def on_start(self, actor_id): return None def on_stop(self): - """调用用户 on_stop;若为 async 则返回 coroutine 供 Rust 端执行。""" if hasattr(self._instance, "on_stop"): r = self._instance.on_stop() if asyncio.iscoroutine(r): @@ -465,7 +164,6 @@ def metadata(self) -> dict[str, str]: return {} async def receive(self, msg) -> Any: - # Handle dict-based call format if isinstance(msg, dict): method, args, kwargs, is_async_call = _unwrap_call(msg) @@ -482,7 +180,6 @@ async def receive(self, msg) -> Any: func = attr - # Detect if it's an async method (including async generators) is_async_method = ( inspect.iscoroutinefunction(func) or inspect.isasyncgenfunction(func) @@ -508,38 +205,10 @@ async def receive(self, msg) -> Any: except Exception as e: return _wrap_response(error=str(e)) - # Handle legacy Message-based call format (for Rust actor compatibility) - if isinstance(msg, Message): - if msg.msg_type != "Call": - return Message.from_json("Error", {"error": f"Unknown: {msg.msg_type}"}) - - data = msg.to_json() - method = data.get("method") - args = data.get("args", []) - kwargs = data.get("kwargs", {}) - - if not method or method.startswith("_"): - return Message.from_json( - "Error", {"error": f"Invalid method: {method}"} - ) - - func = getattr(self._instance, method, None) - if func is None or not callable(func): - return Message.from_json("Error", {"error": f"Not found: {method}"}) - - try: - result = func(*args, **kwargs) - if asyncio.iscoroutine(result): - result = await result - return Message.from_json("Result", {"result": result}) - except Exception as e: - return Message.from_json("Error", {"error": str(e)}) - return _wrap_response(error=f"Unknown message type: {type(msg)}") @staticmethod async def _safe_stream_write(writer, obj: dict) -> bool: - """Write to stream; return False if stream already closed (e.g. caller cancelled).""" try: await writer.write(obj) return True @@ -550,7 +219,6 @@ async def _safe_stream_write(writer, obj: dict) -> bool: @staticmethod async def _safe_stream_close(writer) -> None: - """Close stream; ignore if already closed.""" try: await writer.close() except (RuntimeError, OSError, ConnectionError): @@ -581,12 +249,10 @@ async def execute(): else: final = r await self._safe_stream_write( - writer, {"__pulsing__": {"final": True, "result": final}} + writer, {"__final__": True, "__result__": final} ) except Exception as e: - await self._safe_stream_write( - writer, {"__pulsing__": {"error": str(e)}} - ) + await self._safe_stream_write(writer, {"__error__": str(e)}) finally: await self._safe_stream_close(writer) @@ -595,93 +261,11 @@ async def execute(): return stream_msg -class PythonActorService(Actor): - """Python Actor creation service - one per node, handles Python actor creation requests. - - Note: Rust SystemActor (path "system/core") handles system-level operations, - this service specifically handles Python actor creation. - """ - - def __init__(self, system: ActorSystem): - self.system = system - - async def receive(self, msg: Message) -> Message | None: - data = msg.to_json() - - if msg.msg_type == "CreateActor": - return await self._create_actor(data) - elif msg.msg_type == "ListRegistry": - # List registered actor classes - return Message.from_json( - "Registry", - {"classes": list(_actor_class_registry.keys())}, - ) - return Message.from_json("Error", {"error": f"Unknown: {msg.msg_type}"}) - - async def _create_actor(self, data: dict) -> Message: - class_name = data.get("class_name") - actor_name = data.get("actor_name") - args = data.get("args", []) - kwargs = data.get("kwargs", {}) - public = data.get("public", True) - - # Supervision config - restart_policy = data.get("restart_policy", "never") - max_restarts = data.get("max_restarts", 3) - min_backoff = data.get("min_backoff", 0.1) - max_backoff = data.get("max_backoff", 30.0) - - cls = _actor_class_registry.get(class_name) - if cls is None: - return Message.from_json( - "Error", {"error": f"Class '{class_name}' not found"} - ) - - try: - if restart_policy != "never": - # For supervision, we must provide a factory - def factory(): - instance = cls(*args, **kwargs) - return _WrappedActor(instance) - - actor_ref = await self.system.spawn( - factory, - name=actor_name, - public=public, - restart_policy=restart_policy, - max_restarts=max_restarts, - min_backoff=min_backoff, - max_backoff=max_backoff, - ) - else: - # Standard spawn - instance = cls(*args, **kwargs) - actor = _WrappedActor(instance) - actor_ref = await self.system.spawn( - actor, name=actor_name, public=public - ) - - # Register actor metadata - _register_actor_metadata(actor_name, cls) - - method_names = [ - n - for n, _ in inspect.getmembers(cls, predicate=inspect.isfunction) - if not n.startswith("_") - ] +# ============================================================================ +# ActorClass & @remote decorator +# ============================================================================ - return Message.from_json( - "Created", - { - # actor_id is now a UUID (u128), transmit as string for JSON - "actor_id": str(actor_ref.actor_id.id), - "node_id": str(self.system.node_id.id), - "methods": method_names, - }, - ) - except Exception as e: - logger.exception(f"Create actor failed: {e}") - return Message.from_json("Error", {"error": str(e)}) +from .service import PYTHON_ACTOR_SERVICE_NAME class ActorClass: @@ -696,19 +280,6 @@ class ActorClass: counter = await Counter.spawn(placement=node_id) # specific node """ - @staticmethod - def _unwrap_ray_class(cls): - """Extract original user class if cls is a Ray ActorClass.""" - try: - from ray.actor import ActorClass as RayActorClass - except ImportError: - return cls - if isinstance(cls, RayActorClass): - for base in type(cls).__bases__: - if base is not RayActorClass and base.__name__ != "Generic": - return base - return cls - def __init__( self, cls: type, @@ -717,8 +288,7 @@ def __init__( min_backoff: float = 0.1, max_backoff: float = 30.0, ): - unwrapped = self._unwrap_ray_class(cls) - # Keep Ray handle so .remote() remains available for Ray-wrapped classes + unwrapped = _unwrap_class(cls) self._ray_cls = cls if unwrapped is not cls else None cls = unwrapped self._cls = cls @@ -728,21 +298,10 @@ def __init__( self._min_backoff = min_backoff self._max_backoff = max_backoff - self._methods = [] - self._async_methods = set() - - for name, method in inspect.getmembers(cls, predicate=inspect.isfunction): - if name.startswith("_"): - continue - self._methods.append(name) - if inspect.iscoroutinefunction(method) or inspect.isasyncgenfunction( - method - ): - self._async_methods.add(name) + self._methods, self._async_methods = _extract_methods(cls) _actor_class_registry[self._class_name] = cls - # If original class was decorated with @ray.remote, expose Ray's .remote() if self._ray_cls is not None: self.remote = self._ray_cls.remote @@ -755,34 +314,7 @@ async def spawn( placement: "str | int" = "local", **kwargs, ) -> ActorProxy: - """Create an actor and return its proxy. - - Args: - *args: Positional arguments forwarded to the class constructor. - system: ActorSystem to use. Defaults to the global system - (requires ``await init()`` to have been called first). - name: Optional actor name. When given, ``public`` defaults to True. - public: Whether the actor is cluster-discoverable. - Defaults to True when *name* is set, False otherwise. - placement: Where to place the actor. - - ``"local"`` *(default)*: spawn on the current node. - - ``"remote"``: spawn on a randomly-chosen remote node; - falls back to local if no remote nodes are available. - - ``int``: spawn on the node with that specific node_id. - **kwargs: Keyword arguments forwarded to the class constructor. - - Example:: - - await init() - - @remote - class Counter: - def __init__(self, init=0): self.value = init - def incr(self): self.value += 1; return self.value - - counter = await Counter.spawn(init=10) - result = await counter.incr() - """ + """Create an actor and return its proxy.""" if system is None: from . import get_system @@ -917,37 +449,7 @@ async def resolve( node_id: int | None = None, timeout: float | None = None, ) -> ActorProxy: - """Resolve actor by name, return typed ActorProxy - - Args: - name: Actor name - system: ActorSystem instance, uses global system if not provided - node_id: Target node ID, searches in cluster if not provided - timeout: Seconds to wait for the name to appear (gossip convergence). - None means no wait (error immediately if not found). - - Returns: - ActorProxy: Proxy with method type information - - Example: - @remote - class Counter: - def __init__(self, init=0): self.value = init - async def generate(self, prompt): ... # async method, streaming response - - # Node A creates actor - counter = await Counter.spawn(name="my_counter") - - # Node B resolves and calls - counter = await Counter.resolve("my_counter") - - # Call async method, can stream results - result = counter.generate("hello") - async for chunk in result: - print(chunk) - # Or directly await to get final result - final = await counter.generate("hello") - """ + """Resolve actor by name, return typed ActorProxy.""" if system is None: from . import get_system @@ -965,21 +467,7 @@ def remote( min_backoff: float = 0.1, max_backoff: float = 30.0, ) -> ActorClass: - """@remote decorator - - Converts a regular class into a distributed deployable Actor. - - Supports supervision configuration: - - restart_policy: "never" (default), "always", "on-failure" - - max_restarts: maximum number of restarts (default: 3) - - min_backoff: minimum backoff in seconds (default: 0.1) - - max_backoff: maximum backoff in seconds (default: 30.0) - - Example: - @remote(restart_policy="on-failure", max_restarts=5) - class Counter: - ... - """ + """@remote decorator — converts a regular class into a distributed Actor.""" def wrapper(cls): return ActorClass( @@ -997,185 +485,10 @@ def wrapper(cls): # ============================================================================ -# System operation helper functions (calls Rust SystemActor) +# resolve() — top-level name resolution # ============================================================================ -class SystemActorProxy: - """Proxy for SystemActor with direct method calls. - - Example: - system_proxy = await get_system_actor(system) - actors = await system_proxy.list_actors() - metrics = await system_proxy.get_metrics() - await system_proxy.ping() - """ - - def __init__(self, actor_ref: ActorRef): - self._ref = actor_ref - - @property - def ref(self) -> ActorRef: - """Get underlying ActorRef.""" - return self._ref - - async def _ask(self, msg_type: str) -> dict: - """Send SystemMessage and return response.""" - resp = await self._ref.ask( - Message.from_json("SystemMessage", {"type": msg_type}), - ) - return resp.to_json() - - async def list_actors(self) -> list[dict]: - """List all actors on this node.""" - data = await self._ask("ListActors") - if data.get("type") == "Error": - # System error: system message failed - raise PulsingRuntimeError(data.get("message")) - return data.get("actors", []) - - async def get_metrics(self) -> dict: - """Get system metrics.""" - return await self._ask("GetMetrics") - - async def get_node_info(self) -> dict: - """Get node info.""" - return await self._ask("GetNodeInfo") - - async def health_check(self) -> dict: - """Health check.""" - return await self._ask("HealthCheck") - - async def ping(self) -> dict: - """Ping this node.""" - return await self._ask("Ping") - - -async def get_system_actor( - system: ActorSystem, node_id: int | None = None -) -> SystemActorProxy: - """Get SystemActorProxy for direct method calls. - - Args: - system: ActorSystem instance - node_id: Target node ID (None means local node) - - Returns: - SystemActorProxy with methods: list_actors(), get_metrics(), etc. - - Example: - sys = await get_system_actor(system) - actors = await sys.list_actors() - await sys.ping() - """ - if node_id is None: - actor_ref = await system.system() - else: - actor_ref = await system.remote_system(node_id) - return SystemActorProxy(actor_ref) - - -class PythonActorServiceProxy: - """Proxy for PythonActorService with direct method calls. - - Example: - service = await get_python_actor_service(system) - classes = await service.list_registry() - actor_ref = await service.create_actor("MyClass", name="my_actor") - """ - - def __init__(self, actor_ref: ActorRef): - self._ref = actor_ref - - @property - def ref(self) -> ActorRef: - """Get underlying ActorRef.""" - return self._ref - - async def list_registry(self) -> list[str]: - """List registered actor classes. - - Returns: - List of registered class names - """ - resp = await self._ref.ask(Message.from_json("ListRegistry", {})) - data = resp.to_json() - return data.get("classes", []) - - async def create_actor( - self, - class_name: str, - *args, - name: str | None = None, - public: bool = True, - restart_policy: str = "never", - max_restarts: int = 3, - min_backoff: float = 0.1, - max_backoff: float = 30.0, - **kwargs, - ) -> dict: - """Create a Python actor. - - Args: - class_name: Name of the registered actor class - *args: Positional arguments for the class constructor - name: Optional actor name - public: Whether the actor should be publicly resolvable - restart_policy: "never", "always", or "on_failure" - max_restarts: Maximum restart attempts - min_backoff: Minimum backoff time in seconds - max_backoff: Maximum backoff time in seconds - **kwargs: Keyword arguments for the class constructor - - Returns: - {"actor_id": "...", "node_id": "...", "actor_name": "..."} - - Raises: - RuntimeError: If creation fails - """ - resp = await self._ref.ask( - Message.from_json( - "CreateActor", - { - "class_name": class_name, - "actor_name": name, - "args": args, - "kwargs": kwargs, - "public": public, - "restart_policy": restart_policy, - "max_restarts": max_restarts, - "min_backoff": min_backoff, - "max_backoff": max_backoff, - }, - ), - ) - data = resp.to_json() - if resp.msg_type == "Error" or data.get("error"): - # System error: actor creation failed - raise PulsingRuntimeError(data.get("error", "Unknown error")) - return data - - -async def get_python_actor_service( - system: ActorSystem, node_id: int | None = None -) -> PythonActorServiceProxy: - """Get PythonActorServiceProxy for direct method calls. - - Args: - system: ActorSystem instance - node_id: Target node ID (None means local node) - - Returns: - PythonActorServiceProxy with methods: list_registry(), create_actor() - - Example: - service = await get_python_actor_service(system) - classes = await service.list_registry() - """ - service_ref = await system.resolve_named(PYTHON_ACTOR_SERVICE_NAME, node_id=node_id) - return PythonActorServiceProxy(service_ref) - - async def resolve( name: str, *, @@ -1185,37 +498,26 @@ async def resolve( """Resolve a named actor by name. Returns an ActorRef that supports .ask(), .tell(), .as_any(), and .as_type(). - Use .as_any() to get an untyped proxy that forwards any method call. - Use .as_type(Counter) to get a typed proxy with method validation. - - For typed ActorProxy with method calls, use Counter.resolve(name) instead. - - Args: - name: Actor name - node_id: Target node ID, searches in cluster if not provided - timeout: Seconds to wait for the name to appear (gossip convergence). - None means no wait (error immediately if not found). - - Returns: - ActorRef: Actor reference with .as_any() / .as_type() for proxy generation. - - Example: - from pulsing.core import init, remote, resolve - - await init() - - # By name only (no type needed) - ref = await resolve("channel.discord") - proxy = ref.as_any() - await proxy.send_text(chat_id, content) - - # Wait for name to appear (gossip convergence) - ref = await resolve("peer_node", timeout=30) - - # Low-level ask - ref = await resolve("my_counter") - result = await ref.ask({"__call__": "increment", "args": [], "kwargs": {}}) """ from . import get_system return await get_system().resolve(name, node_id=node_id, timeout=timeout) + + +# ============================================================================ +# Backward-compatible re-exports +# ============================================================================ +# Many modules do `from pulsing.core.remote import X`; keep them working. + +from .protocol import ( # noqa: E402, F401 + _check_response, + _unwrap_response, +) +from .proxy import _AsyncMethodCall, _MethodCaller # noqa: E402, F401 +from .service import ( # noqa: E402, F401 + PythonActorService, + PythonActorServiceProxy, + SystemActorProxy, + get_python_actor_service, + get_system_actor, +) diff --git a/python/pulsing/core/service.py b/python/pulsing/core/service.py new file mode 100644 index 000000000..2506265df --- /dev/null +++ b/python/pulsing/core/service.py @@ -0,0 +1,222 @@ +"""System-level actor services that communicate with the Rust runtime via Message protocol. + +- PythonActorService: per-node service for remote Python actor creation +- SystemActorProxy / PythonActorServiceProxy: typed proxies for system actors +""" + +import inspect +import logging + +from pulsing._core import ActorRef, ActorSystem, Message +from pulsing.exceptions import PulsingRuntimeError + +from .remote import ( + Actor, + _WrappedActor, + _actor_class_registry, + _extract_methods, + _register_actor_metadata, +) + +logger = logging.getLogger(__name__) + +PYTHON_ACTOR_SERVICE_NAME = "system/python_actor_service" + + +class PythonActorService(Actor): + """Python Actor creation service - one per node, handles Python actor creation requests. + + Note: Rust SystemActor (path "system/core") handles system-level operations, + this service specifically handles Python actor creation. + """ + + def __init__(self, system: ActorSystem): + self.system = system + + async def receive(self, msg: Message) -> Message | None: + data = msg.to_json() + + if msg.msg_type == "CreateActor": + return await self._create_actor(data) + elif msg.msg_type == "ListRegistry": + return Message.from_json( + "Registry", + {"classes": list(_actor_class_registry.keys())}, + ) + return Message.from_json("Error", {"error": f"Unknown: {msg.msg_type}"}) + + async def _create_actor(self, data: dict) -> Message: + class_name = data.get("class_name") + actor_name = data.get("actor_name") + args = data.get("args", []) + kwargs = data.get("kwargs", {}) + public = data.get("public", True) + + restart_policy = data.get("restart_policy", "never") + max_restarts = data.get("max_restarts", 3) + min_backoff = data.get("min_backoff", 0.1) + max_backoff = data.get("max_backoff", 30.0) + + cls = _actor_class_registry.get(class_name) + if cls is None: + return Message.from_json( + "Error", {"error": f"Class '{class_name}' not found"} + ) + + try: + if restart_policy != "never": + + def factory(): + instance = cls(*args, **kwargs) + return _WrappedActor(instance) + + actor_ref = await self.system.spawn( + factory, + name=actor_name, + public=public, + restart_policy=restart_policy, + max_restarts=max_restarts, + min_backoff=min_backoff, + max_backoff=max_backoff, + ) + else: + instance = cls(*args, **kwargs) + actor = _WrappedActor(instance) + actor_ref = await self.system.spawn( + actor, name=actor_name, public=public + ) + + _register_actor_metadata(actor_name, cls) + methods, _ = _extract_methods(cls) + + return Message.from_json( + "Created", + { + "actor_id": str(actor_ref.actor_id.id), + "node_id": str(self.system.node_id.id), + "methods": methods, + }, + ) + except Exception as e: + logger.exception(f"Create actor failed: {e}") + return Message.from_json("Error", {"error": str(e)}) + + +class SystemActorProxy: + """Proxy for SystemActor with direct method calls. + + Example: + system_proxy = await get_system_actor(system) + actors = await system_proxy.list_actors() + metrics = await system_proxy.get_metrics() + await system_proxy.ping() + """ + + def __init__(self, actor_ref: ActorRef): + self._ref = actor_ref + + @property + def ref(self) -> ActorRef: + """Get underlying ActorRef.""" + return self._ref + + async def _ask(self, msg_type: str) -> dict: + """Send SystemMessage and return response.""" + resp = await self._ref.ask( + Message.from_json("SystemMessage", {"type": msg_type}), + ) + return resp.to_json() + + async def list_actors(self) -> list[dict]: + """List all actors on this node.""" + data = await self._ask("ListActors") + if data.get("type") == "Error": + raise PulsingRuntimeError(data.get("message")) + return data.get("actors", []) + + async def get_metrics(self) -> dict: + """Get system metrics.""" + return await self._ask("GetMetrics") + + async def get_node_info(self) -> dict: + """Get node info.""" + return await self._ask("GetNodeInfo") + + async def health_check(self) -> dict: + """Health check.""" + return await self._ask("HealthCheck") + + async def ping(self) -> dict: + """Ping this node.""" + return await self._ask("Ping") + + +async def get_system_actor( + system: ActorSystem, node_id: int | None = None +) -> SystemActorProxy: + """Get SystemActorProxy for direct method calls.""" + if node_id is None: + actor_ref = await system.system() + else: + actor_ref = await system.remote_system(node_id) + return SystemActorProxy(actor_ref) + + +class PythonActorServiceProxy: + """Proxy for PythonActorService with direct method calls.""" + + def __init__(self, actor_ref: ActorRef): + self._ref = actor_ref + + @property + def ref(self) -> ActorRef: + """Get underlying ActorRef.""" + return self._ref + + async def list_registry(self) -> list[str]: + """List registered actor classes.""" + resp = await self._ref.ask(Message.from_json("ListRegistry", {})) + data = resp.to_json() + return data.get("classes", []) + + async def create_actor( + self, + class_name: str, + *args, + name: str | None = None, + public: bool = True, + restart_policy: str = "never", + max_restarts: int = 3, + min_backoff: float = 0.1, + max_backoff: float = 30.0, + **kwargs, + ) -> dict: + """Create a Python actor on a remote node.""" + resp = await self._ref.ask( + Message.from_json( + "CreateActor", + { + "class_name": class_name, + "actor_name": name, + "args": args, + "kwargs": kwargs, + "public": public, + "restart_policy": restart_policy, + "max_restarts": max_restarts, + "min_backoff": min_backoff, + "max_backoff": max_backoff, + }, + ), + ) + data = resp.to_json() + if resp.msg_type == "Error" or data.get("error"): + raise PulsingRuntimeError(data.get("error", "Unknown error")) + return data + + +async def get_python_actor_service( + system: ActorSystem, node_id: int | None = None +) -> PythonActorServiceProxy: + """Get PythonActorServiceProxy for direct method calls.""" + service_ref = await system.resolve_named(PYTHON_ACTOR_SERVICE_NAME, node_id=node_id) + return PythonActorServiceProxy(service_ref) diff --git a/python/pulsing/integrations/__init__.py b/python/pulsing/integrations/__init__.py index 3560c582b..e653774d4 100644 --- a/python/pulsing/integrations/__init__.py +++ b/python/pulsing/integrations/__init__.py @@ -1,5 +1 @@ """Third-party framework integrations.""" - -from . import ray_compat - -__all__ = ["ray_compat"] diff --git a/python/pulsing/integrations/autogen/runtime.py b/python/pulsing/integrations/autogen/runtime.py index d345d2dd4..225437240 100644 --- a/python/pulsing/integrations/autogen/runtime.py +++ b/python/pulsing/integrations/autogen/runtime.py @@ -37,12 +37,19 @@ Message, SystemConfig, ) -from pulsing.core.remote import PYTHON_ACTOR_SERVICE_NAME, PythonActorService +from pulsing.core.service import PYTHON_ACTOR_SERVICE_NAME, PythonActorService logger = logging.getLogger("pulsing.autogen") T = TypeVar("T") +def _parse_agent_id(id_like: Any) -> tuple[str, str]: + """Extract (agent_type, agent_key) from an AutoGen AgentId-like object or string.""" + agent_type = id_like.type if hasattr(id_like, "type") else str(id_like) + agent_key = id_like.key if hasattr(id_like, "key") else "default" + return agent_type, agent_key + + class PulsingRuntime: """AutoGen Compatible Runtime Based on Pulsing @@ -183,9 +190,7 @@ async def send_message( if not self._running: raise RuntimeError("Runtime is not running") - # Parse recipient - agent_type = recipient.type if hasattr(recipient, "type") else str(recipient) - agent_key = recipient.key if hasattr(recipient, "key") else "default" + agent_type, agent_key = _parse_agent_id(recipient) full_key = f"{agent_type}/{agent_key}" # Ensure Agent is created @@ -340,7 +345,7 @@ async def register_factory( expected_class: Expected Agent type (for validation) eager: Whether to immediately create Agent instance (True required for distributed mode) """ - agent_type = type.type if hasattr(type, "type") else str(type) + agent_type, _ = _parse_agent_id(type) if agent_type in self._agent_factories: raise ValueError(f"Agent type '{agent_type}' already registered") @@ -366,8 +371,7 @@ async def register_agent_instance( agent_id: Any, # AgentId ) -> Any: # AgentId """Register Agent instance""" - agent_type = agent_id.type if hasattr(agent_id, "type") else str(agent_id) - agent_key = agent_id.key if hasattr(agent_id, "key") else "default" + agent_type, agent_key = _parse_agent_id(agent_id) full_key = f"{agent_type}/{agent_key}" if full_key in self._instantiated_agents: @@ -425,10 +429,9 @@ async def get( ) -> Any: # AgentId """Get Agent ID""" if hasattr(id_or_type, "type") and hasattr(id_or_type, "key"): - # Already an AgentId return id_or_type - agent_type = id_or_type.type if hasattr(id_or_type, "type") else str(id_or_type) + agent_type, _ = _parse_agent_id(id_or_type) if not lazy: await self._ensure_agent(agent_type, key) @@ -446,8 +449,7 @@ async def try_get_underlying_agent_instance( type: Type[T] = object, # type: ignore ) -> T: """Get underlying Agent instance""" - agent_type = id.type if hasattr(id, "type") else str(id) - agent_key = id.key if hasattr(id, "key") else "default" + agent_type, agent_key = _parse_agent_id(id) full_key = f"{agent_type}/{agent_key}" if full_key not in self._instantiated_agents: diff --git a/python/pulsing/integrations/langgraph/executor.py b/python/pulsing/integrations/langgraph/executor.py index 36b26689e..0341aa344 100644 --- a/python/pulsing/integrations/langgraph/executor.py +++ b/python/pulsing/integrations/langgraph/executor.py @@ -16,7 +16,7 @@ from typing import Any, Callable, Dict from pulsing.core import Actor, ActorId, ActorRef, ActorSystem, SystemConfig -from pulsing.core.remote import PYTHON_ACTOR_SERVICE_NAME, PythonActorService +from pulsing.core.service import PYTHON_ACTOR_SERVICE_NAME, PythonActorService logger = logging.getLogger("pulsing.langgraph") diff --git a/python/pulsing/integrations/langgraph/wrapper.py b/python/pulsing/integrations/langgraph/wrapper.py index faac82954..e3c3f1625 100644 --- a/python/pulsing/integrations/langgraph/wrapper.py +++ b/python/pulsing/integrations/langgraph/wrapper.py @@ -9,7 +9,7 @@ from typing import Any, AsyncIterator, Dict, Optional, Union from pulsing.core import ActorSystem, SystemConfig -from pulsing.core.remote import PYTHON_ACTOR_SERVICE_NAME, PythonActorService +from pulsing.core.service import PYTHON_ACTOR_SERVICE_NAME, PythonActorService from .executor import NodeExecutorPool logger = logging.getLogger("pulsing.langgraph") diff --git a/python/pulsing/integrations/ray_compat.py b/python/pulsing/integrations/ray_compat.py deleted file mode 100644 index 7d1d0ea17..000000000 --- a/python/pulsing/integrations/ray_compat.py +++ /dev/null @@ -1,372 +0,0 @@ -""" -Ray-compatible API for Pulsing - -This module provides a Ray-like synchronous API for easy migration. -For new projects, we recommend using the native async API in pulsing.core. - -Migration from Ray: - # Before (Ray) - import ray - ray.init() - - @ray.remote - class Counter: - def __init__(self, init=0): self.value = init - def incr(self): self.value += 1; return self.value - - counter = Counter.remote(init=10) - result = ray.get(counter.incr.remote()) - ray.shutdown() - - # After (Pulsing compat) - from pulsing.integrations.ray_compat import ray # Only change this line! - - ray.init() - - @ray.remote - class Counter: - def __init__(self, init=0): self.value = init - def incr(self): self.value += 1; return self.value - - counter = Counter.remote(init=10) - result = ray.get(counter.incr.remote()) - ray.shutdown() - -Note: This is a synchronous wrapper around async Pulsing. -For better performance in async environments, use pulsing.core directly. -""" - -import asyncio -import concurrent.futures -import inspect -import threading -from typing import Any, TypeVar - -T = TypeVar("T") - -# Global state -_system = None -_loop = None -_thread = None -_loop_ready = None - - -def _ensure_not_initialized(ignore_reinit_error: bool) -> None: - global _system - if _system is not None: - if ignore_reinit_error: - return - raise RuntimeError("Already initialized. Call ray.shutdown() first.") - - -def _start_background_loop() -> None: - """Start a dedicated event loop in a background thread. - - This is required when the caller is already inside a running event loop. - """ - global _thread, _loop, _loop_ready - if _thread is not None: - return - - ready = threading.Event() - _loop_ready = ready - - def _thread_main(): - global _loop - loop = asyncio.new_event_loop() - asyncio.set_event_loop(loop) - _loop = loop - ready.set() - loop.run_forever() - try: - pending = asyncio.all_tasks(loop) - for task in pending: - task.cancel() - if pending: - loop.run_until_complete( - asyncio.gather(*pending, return_exceptions=True) - ) - finally: - loop.close() - - t = threading.Thread( - target=_thread_main, name="pulsing-compat-ray-loop", daemon=True - ) - _thread = t - t.start() - ready.wait() - - -def _run_coro_sync(coro: Any, timeout=None) -> Any: - """Run a coroutine to completion and return its result. - - - If we have a background loop thread: schedule via run_coroutine_threadsafe(). - - Otherwise: run on the local event loop with run_until_complete(). - """ - if _loop is None: - raise RuntimeError("Not initialized. Call ray.init() first.") - - if _thread is not None: - fut = asyncio.run_coroutine_threadsafe(coro, _loop) - try: - return fut.result(timeout=timeout) - except concurrent.futures.TimeoutError as e: - raise TimeoutError("Timed out waiting for result") from e - else: - # Local (non-running) loop only - return _loop.run_until_complete(coro) - - -class ObjectRef: - """Ray-compatible ObjectRef (wraps async coroutine)""" - - def __init__(self, coro_or_result: Any, is_ready: bool = False): - self._coro = coro_or_result - self._result = coro_or_result if is_ready else None - self._is_ready = is_ready - - def _get_sync(self, timeout: float = None) -> Any: - """Get result synchronously""" - if self._is_ready: - return self._result - - async def _get(): - return await self._coro - - if timeout is not None: - coro = asyncio.wait_for(_get(), timeout) - else: - coro = _get() - - self._result = _run_coro_sync(coro, timeout=timeout) - self._is_ready = True - return self._result - - -class _MethodCaller: - """Method caller that returns ObjectRef""" - - def __init__(self, proxy, method_name: str): - self._proxy = proxy - self._method = method_name - - def remote(self, *args, **kwargs) -> ObjectRef: - """Call method remotely (Ray-style)""" - method = getattr(self._proxy, self._method) - coro = method(*args, **kwargs) - return ObjectRef(coro) - - -class _ActorHandle: - """Ray-compatible actor handle""" - - def __init__(self, proxy, methods: list[str]): - self._proxy = proxy - self._methods = set(methods) - - def __getattr__(self, name: str) -> _MethodCaller: - if name.startswith("_"): - raise AttributeError(name) - if name not in self._methods: - raise AttributeError(f"No method '{name}'") - return _MethodCaller(self._proxy, name) - - -class _ActorClass: - """Ray-compatible actor class wrapper""" - - def __init__(self, cls: type): - self._cls = cls - self._pulsing_class = None - self._methods = [ - n - for n, _ in inspect.getmembers(cls, predicate=inspect.isfunction) - if not n.startswith("_") - ] - - def _ensure_wrapped(self): - if self._pulsing_class is None: - from pulsing.core import remote - - self._pulsing_class = remote(self._cls) - - def remote(self, *args, **kwargs) -> _ActorHandle: - """Create actor (Ray-style, synchronous)""" - if _system is None: - raise RuntimeError("Not initialized. Call ray.init() first.") - - self._ensure_wrapped() - - async def create(): - proxy = await self._pulsing_class.spawn(*args, system=_system, **kwargs) - return _ActorHandle(proxy, self._methods) - - return _run_coro_sync(create()) - - def options(self, **kwargs) -> "_ActorClass": - """Set actor options (Ray compatibility, limited support)""" - # TODO: Support num_cpus, num_gpus, etc. - return self - - def __call__(self, *args, **kwargs): - """Direct instantiation (not as actor)""" - return self._cls(*args, **kwargs) - - -def init( - address: str = None, - *, - ignore_reinit_error: bool = False, - **kwargs, -) -> None: - """Initialize Pulsing (Ray-compatible) - - Args: - address: Ignored (use SystemConfig for Pulsing configuration) - ignore_reinit_error: If True, ignore if already initialized - - Example: - from pulsing.integrations.ray_compat import ray - ray.init() - """ - global _system, _loop - - _ensure_not_initialized(ignore_reinit_error) - - from pulsing.core import ActorSystem, SystemConfig - from pulsing.core.remote import PYTHON_ACTOR_SERVICE_NAME, PythonActorService - - # If we're already inside a running event loop (e.g., Jupyter/pytest-asyncio), - # we must not call run_until_complete() on it. Use a dedicated background loop. - in_running_loop = True - try: - asyncio.get_running_loop() - except RuntimeError: - in_running_loop = False - - if in_running_loop: - _start_background_loop() - else: - _loop = asyncio.new_event_loop() - asyncio.set_event_loop(_loop) - - async def _create_system(): - system = await ActorSystem.create(SystemConfig.standalone(), _loop) - service = PythonActorService(system) - await system.spawn(service, name=PYTHON_ACTOR_SERVICE_NAME, public=True) - return system - - _system = _run_coro_sync(_create_system()) - - -def shutdown() -> None: - """Shutdown Pulsing (Ray-compatible)""" - global _system, _loop, _thread, _loop_ready - - if _system is not None: - try: - _run_coro_sync(_system.shutdown()) - except Exception: - pass - _system = None - if _thread is not None and _loop is not None: - try: - _loop.call_soon_threadsafe(_loop.stop) - except Exception: - pass - try: - _thread.join(timeout=2.0) - except Exception: - pass - _thread = None - _loop_ready = None - _loop = None - else: - _loop = None - - -def is_initialized() -> bool: - """Check if initialized""" - return _system is not None - - -def remote(cls: type[T]) -> _ActorClass: - """@ray.remote decorator (Ray-compatible) - - Example: - @ray.remote - class Counter: - def __init__(self, init=0): self.value = init - def incr(self): self.value += 1; return self.value - - counter = Counter.remote(init=10) - """ - return _ActorClass(cls) - - -def get(refs: Any, *, timeout: float = None) -> Any: - """Get results from ObjectRefs (Ray-compatible) - - Args: - refs: Single ObjectRef or list of ObjectRefs - timeout: Timeout in seconds - - Example: - result = ray.get(counter.incr.remote()) - results = ray.get([ref1, ref2, ref3]) - """ - if _system is None: - raise RuntimeError("Not initialized. Call ray.init() first.") - - if isinstance(refs, list): - return [r._get_sync(timeout) for r in refs] - return refs._get_sync(timeout) - - -def put(value: Any) -> ObjectRef: - """Put value (Ray-compatible) - - Note: Pulsing doesn't have distributed object store. - This just wraps the value for API compatibility. - """ - return ObjectRef(value, is_ready=True) - - -def wait( - refs: list, - *, - num_returns: int = 1, - timeout: float = None, -) -> tuple[list, list]: - """Wait for ObjectRefs (Ray-compatible) - - Returns: - (ready, remaining) tuple - """ - ready, remaining = [], list(refs) - for ref in refs[:num_returns]: - try: - get(ref, timeout=timeout) - ready.append(ref) - remaining.remove(ref) - except Exception: - break - return ready, remaining - - -import sys - -# Self-reference so that "from pulsing.integrations.ray_compat import ray" works -ray = sys.modules[__name__] - -__all__ = [ - "init", - "shutdown", - "is_initialized", - "remote", - "get", - "put", - "wait", - "ObjectRef", - "ray", -] diff --git a/python/pulsing/serving/router.py b/python/pulsing/serving/router.py index 0399069a7..c046a422a 100644 --- a/python/pulsing/serving/router.py +++ b/python/pulsing/serving/router.py @@ -314,28 +314,27 @@ async def _send(data: dict): return stream_response +def _create_app(system: ActorSystem, model_name: str, scheduler) -> web.Application: + """Build the aiohttp Application with OpenAI-compatible routes.""" + handler = _OpenAIHandler(system, model_name, scheduler) + app = web.Application() + app.router.add_get("/", handler.index) + app.router.add_get("/health", handler.health_check) + app.router.add_get("/v1/models", handler.list_models) + app.router.add_post("/v1/chat/completions", handler.chat_completions) + app.router.add_post("/v1/completions", handler.completions) + app["scheduler"] = scheduler + return app + + def _build_scheduler(system: ActorSystem, worker_name: str, scheduler_type: str): """Create a Scheduler instance from scheduler_type string.""" from .load_stream import StreamLoadScheduler - from .scheduler import ( - RUST_POLICIES_AVAILABLE, - RandomScheduler, - RoundRobinScheduler, - RustCacheAwareScheduler, - RustPowerOfTwoScheduler, - ) - - scheduler_map = { - "stream_load": StreamLoadScheduler, - "random": RandomScheduler, - "round_robin": RoundRobinScheduler, - } - if RUST_POLICIES_AVAILABLE: - scheduler_map["power_of_two"] = RustPowerOfTwoScheduler - scheduler_map["cache_aware"] = RustCacheAwareScheduler - - cls = scheduler_map.get(scheduler_type, StreamLoadScheduler) - return cls(system, worker_name) + from .scheduler import get_scheduler + + if scheduler_type == "stream_load": + return StreamLoadScheduler(system, worker_name) + return get_scheduler(scheduler_type, system, worker_name) @remote @@ -383,14 +382,7 @@ async def on_start(self, actor_id: ActorId) -> None: ) await self._scheduler.start() - handler = _OpenAIHandler(system, self.model_name, self._scheduler) - app = web.Application() - app.router.add_get("/", handler.index) - app.router.add_get("/health", handler.health_check) - app.router.add_get("/v1/models", handler.list_models) - app.router.add_post("/v1/chat/completions", handler.chat_completions) - app.router.add_post("/v1/completions", handler.completions) - + app = _create_app(system, self.model_name, self._scheduler) self._runner = web.AppRunner(app) await self._runner.setup() await web.TCPSite(self._runner, self.http_host, self.http_port).start() @@ -448,7 +440,6 @@ async def start_router( worker_name: str = "worker", scheduler_type: str = "stream_load", scheduler=None, - scheduler_class=None, ) -> web.AppRunner: """Start an OpenAI-compatible HTTP server without creating a Router actor. @@ -458,22 +449,12 @@ async def start_router( Returns: ``web.AppRunner`` — pass to ``stop_router()`` for cleanup. """ - if scheduler_class is not None and scheduler is None: - scheduler = scheduler_class(system, worker_name) if scheduler is None: scheduler = _build_scheduler(system, worker_name, scheduler_type) await scheduler.start() - handler = _OpenAIHandler(system, model_name, scheduler) - app = web.Application() - app.router.add_get("/", handler.index) - app.router.add_get("/health", handler.health_check) - app.router.add_get("/v1/models", handler.list_models) - app.router.add_post("/v1/chat/completions", handler.chat_completions) - app.router.add_post("/v1/completions", handler.completions) - app["scheduler"] = scheduler - + app = _create_app(system, model_name, scheduler) runner = web.AppRunner(app) await runner.setup() await web.TCPSite(runner, http_host, http_port).start() diff --git a/python/pulsing/serving/scheduler.py b/python/pulsing/serving/scheduler.py index a7bb760e9..7d528b779 100644 --- a/python/pulsing/serving/scheduler.py +++ b/python/pulsing/serving/scheduler.py @@ -1,16 +1,14 @@ """Worker scheduler - Load balancing strategies -Supports the following scheduling strategies: -- RandomScheduler: Random selection (Python implementation) -- RoundRobinScheduler: Round-robin selection (Python implementation) -- LeastConnectionScheduler: Least connections (Python implementation) -- RustRandomScheduler: Random selection (Rust implementation, high performance) -- RustRoundRobinScheduler: Round-robin selection (Rust implementation) -- RustPowerOfTwoScheduler: Power-of-Two Choices (Rust implementation) -- RustConsistentHashScheduler: Consistent hashing (Rust implementation, supports session affinity) -- RustCacheAwareScheduler: Cache-aware routing (Rust implementation, supports Radix Tree prefix matching) - -For load-aware scheduling, recommend using StreamLoadScheduler from load_stream module +Supports the following scheduling strategies (all Rust-implemented): +- RustRandomScheduler: Random selection +- RustRoundRobinScheduler: Round-robin selection +- RustPowerOfTwoScheduler: Power-of-Two Choices +- RustConsistentHashScheduler: Consistent hashing (session affinity) +- RustCacheAwareScheduler: Cache-aware routing (Radix Tree prefix matching) +- LeastConnectionScheduler: Least connections (Python) + +For load-aware scheduling, use StreamLoadScheduler from load_stream module. """ import asyncio @@ -101,46 +99,6 @@ async def select_worker( # ============================================================================ -class RoundRobinScheduler(Scheduler): - """Round-robin scheduler (Python implementation)""" - - def __init__(self, actor_system, worker_name: str = "worker"): - super().__init__(actor_system, worker_name) - self._index = 0 - - async def select_worker( - self, - request_text: str | None = None, - headers: dict[str, str] | None = None, - ): - workers = await self.get_available_workers() - if not workers: - return None - - async with self._lock: - self._index = (self._index + 1) % len(workers) - selected_worker = workers[self._index] - return await pulsing.refer(selected_worker.get("actor_id")) - - -class RandomScheduler(Scheduler): - """Random scheduler (Python implementation)""" - - async def select_worker( - self, - request_text: str | None = None, - headers: dict[str, str] | None = None, - ): - import random - - workers = await self.get_available_workers() - if not workers: - return None - - selected_worker = random.choice(workers) - return await pulsing.refer(selected_worker.get("actor_id")) - - class LeastConnectionScheduler(Scheduler): """Least connections scheduler (Python implementation)""" @@ -320,14 +278,12 @@ def get_scheduler( Args: policy_name: Policy name, supports: - - "random": Random (Rust implementation) - - "round_robin": Round robin (Rust implementation) - - "power_of_two": Power-of-Two Choices (Rust implementation) - - "consistent_hash": Consistent hash (Rust implementation) - - "cache_aware": Cache-aware (Rust implementation) - - "py_random": Random (Python implementation) - - "py_round_robin": Round robin (Python implementation) - - "least_connection": Least connections (Python implementation) + - "random": Random + - "round_robin": Round robin + - "power_of_two": Power-of-Two Choices + - "consistent_hash": Consistent hash + - "cache_aware": Cache-aware + - "least_connection": Least connections (Python) actor_system: Actor system instance worker_name: Worker actor name **kwargs: Policy-specific parameters (e.g., cache_threshold, etc.) @@ -347,17 +303,11 @@ def get_scheduler( scheduler = get_scheduler("round_robin", actor_system, "worker") """ policy_map = { - # Rust implementation (recommended) - "random": RustRandomScheduler if RUST_POLICIES_AVAILABLE else RandomScheduler, - "round_robin": ( - RustRoundRobinScheduler if RUST_POLICIES_AVAILABLE else RoundRobinScheduler - ), + "random": RustRandomScheduler, + "round_robin": RustRoundRobinScheduler, "power_of_two": RustPowerOfTwoScheduler, "consistent_hash": RustConsistentHashScheduler, "cache_aware": RustCacheAwareScheduler, - # Python implementation - "py_random": RandomScheduler, - "py_round_robin": RoundRobinScheduler, "least_connection": LeastConnectionScheduler, } @@ -376,20 +326,13 @@ def get_scheduler( # Exports __all__ = [ - # Base class "Scheduler", - # Python schedulers - "RandomScheduler", - "RoundRobinScheduler", "LeastConnectionScheduler", - # Rust schedulers "RustRandomScheduler", "RustRoundRobinScheduler", "RustPowerOfTwoScheduler", "RustConsistentHashScheduler", "RustCacheAwareScheduler", - # Factory "get_scheduler", - # Constants "RUST_POLICIES_AVAILABLE", ] diff --git a/python/pulsing/streaming/backend.py b/python/pulsing/streaming/backend.py index c4cfa59f6..771eb21a9 100644 --- a/python/pulsing/streaming/backend.py +++ b/python/pulsing/streaming/backend.py @@ -28,6 +28,31 @@ logger = logging.getLogger(__name__) +def build_batch_meta( + sampled: list[int], fields: list[str], partition_id: str = "default" +) -> dict: + """Build the standard batch-meta dict used by get_meta implementations.""" + return { + "samples": [ + { + "partition_id": partition_id, + "global_index": idx, + "fields": { + f: { + "name": f, + "dtype": None, + "shape": None, + "production_status": "ready", + } + for f in fields + }, + } + for idx in sampled + ], + "global_indexes": sampled, + } + + @runtime_checkable class StorageBackend(Protocol): """Core Storage Backend Protocol. @@ -260,25 +285,9 @@ async def get_meta( sampled, _ = sampler.sample(ready, batch_size, **sampling_kwargs) else: sampled = ready[:batch_size] - return { - "samples": [ - { - "partition_id": sampling_kwargs.get("partition_id", "default"), - "global_index": idx, - "fields": { - field: { - "name": field, - "dtype": None, - "shape": None, - "production_status": "ready", - } - for field in fields - }, - } - for idx in sampled - ], - "global_indexes": sampled, - } + return build_batch_meta( + sampled, fields, sampling_kwargs.get("partition_id", "default") + ) # ---- ConsumptionBackend methods ---- diff --git a/python/pulsing/streaming/manager.py b/python/pulsing/streaming/manager.py index ff1cabdfb..bbe1d3130 100644 --- a/python/pulsing/streaming/manager.py +++ b/python/pulsing/streaming/manager.py @@ -375,6 +375,45 @@ async def _get_remote_manager( ) from last_exc +async def _follow_redirects( + system: ActorSystem, + fetch_fn, + resolve_fn, + resource_name: str, + max_redirects: int = 3, +) -> "ActorProxy": + """Follow redirects from StorageManager until the resource is ready. + + Args: + fetch_fn: async (manager_proxy) -> resp_data dict + resolve_fn: async () -> ActorProxy (called when resource is ready) + resource_name: human-readable name for error messages + """ + manager = await get_storage_manager(system) + local_id = str(system.node_id.id) + + for attempt in range(max_redirects + 1): + resp = await fetch_fn(manager) + msg_type = resp.get("_type", "") + + if msg_type in ("BucketReady", "TopicReady"): + return await resolve_fn() + + if msg_type == "Redirect": + owner = str(resp.get("owner_node_id")) + if attempt >= max_redirects: + raise RuntimeError(f"Too many redirects for {resource_name}") + if owner == local_id: + raise RuntimeError(f"Redirect loop for {resource_name}") + logger.debug(f"Redirecting {resource_name} to node {owner}") + manager = await _get_remote_manager(system, owner) + continue + + raise RuntimeError(f"Unexpected response type: {msg_type}") + + raise RuntimeError(f"Failed to get {resource_name}") + + async def get_bucket_ref( system: ActorSystem, topic: str, @@ -386,44 +425,23 @@ async def get_bucket_ref( max_redirects: int = 3, ) -> "ActorProxy": """Get ActorProxy for the specified bucket, following redirects automatically.""" - manager = await get_storage_manager(system) backend_name = ( (backend if isinstance(backend, str) else backend.__name__) if backend else None ) - - for redirect_count in range(max_redirects + 1): - resp_data = await manager.get_bucket( + return await _follow_redirects( + system, + lambda mgr: mgr.get_bucket( topic=topic, bucket_id=bucket_id, batch_size=batch_size, storage_path=storage_path, backend=backend_name, backend_options=backend_options, - ) - msg_type = resp_data.get("_type", "") - - if msg_type == "BucketReady": - return await BucketStorage.resolve( - f"bucket_{topic}_{bucket_id}", system=system - ) - - if msg_type == "Redirect": - owner_node_id_str = str(resp_data.get("owner_node_id")) - if redirect_count >= max_redirects: - raise RuntimeError(f"Too many redirects for bucket {topic}:{bucket_id}") - if owner_node_id_str == str(system.node_id.id): - raise RuntimeError( - f"Redirect loop detected for bucket {topic}:{bucket_id}" - ) - logger.debug( - f"Redirecting bucket {topic}:{bucket_id} to node {owner_node_id_str}" - ) - manager = await _get_remote_manager(system, owner_node_id_str) - continue - - raise RuntimeError(f"Unexpected response type: {msg_type}") - - raise RuntimeError(f"Failed to get bucket {topic}:{bucket_id}") + ), + lambda: BucketStorage.resolve(f"bucket_{topic}_{bucket_id}", system=system), + f"bucket {topic}:{bucket_id}", + max_redirects, + ) async def get_topic_broker( @@ -434,25 +452,10 @@ async def get_topic_broker( """Get broker ActorProxy for the specified topic, following redirects automatically.""" from pulsing.streaming.broker import TopicBroker - manager = await get_storage_manager(system) - - for redirect_count in range(max_redirects + 1): - resp_data = await manager.get_topic(topic=topic) - msg_type = resp_data.get("_type", "") - - if msg_type == "TopicReady": - return await TopicBroker.resolve(f"_topic_broker_{topic}", system=system) - - if msg_type == "Redirect": - owner_node_id_str = str(resp_data["owner_node_id"]) - if redirect_count >= max_redirects: - raise RuntimeError(f"Too many redirects for topic: {topic}") - if owner_node_id_str == str(system.node_id.id): - raise RuntimeError(f"Redirect loop for topic: {topic}") - logger.debug(f"Redirecting topic {topic} to node {owner_node_id_str}") - manager = await _get_remote_manager(system, owner_node_id_str) - continue - - raise RuntimeError(f"Unexpected response type: {msg_type}") - - raise RuntimeError(f"Failed to get topic broker: {topic}") + return await _follow_redirects( + system, + lambda mgr: mgr.get_topic(topic=topic), + lambda: TopicBroker.resolve(f"_topic_broker_{topic}", system=system), + f"topic {topic}", + max_redirects, + ) diff --git a/python/pulsing/streaming/storage.py b/python/pulsing/streaming/storage.py index ac73206df..fcf5964db 100644 --- a/python/pulsing/streaming/storage.py +++ b/python/pulsing/streaming/storage.py @@ -10,6 +10,7 @@ ConsumptionBackend, StorageBackend, TensorBackend, + build_batch_meta, get_backend_class, ) @@ -215,25 +216,9 @@ async def get_meta( sampled = ready[:batch_size] marked = sampled consumed.update(marked) - return { - "samples": [ - { - "partition_id": sampling_kwargs.get("partition_id", "default"), - "global_index": idx, - "fields": { - f: { - "name": f, - "dtype": None, - "shape": None, - "production_status": "ready", - } - for f in fields - }, - } - for idx in sampled - ], - "global_indexes": sampled, - } + return build_batch_meta( + sampled, fields, sampling_kwargs.get("partition_id", "default") + ) async def get_data(self, batch_meta: dict, fields: list[str] | None = None) -> Any: if self._tensor_backend is not None: diff --git a/tests/python/apis/ray_compat/__init__.py b/tests/python/apis/ray_compat/__init__.py deleted file mode 100644 index 10fe11708..000000000 --- a/tests/python/apis/ray_compat/__init__.py +++ /dev/null @@ -1 +0,0 @@ -# Ray Compatible API Tests diff --git a/tests/python/apis/ray_compat/test_ray_compat_api.py b/tests/python/apis/ray_compat/test_ray_compat_api.py deleted file mode 100644 index b467bd849..000000000 --- a/tests/python/apis/ray_compat/test_ray_compat_api.py +++ /dev/null @@ -1,284 +0,0 @@ -""" -Tests for Ray Compatible API (llms.binding.md) - -Covers: -- ray.init() and ray.shutdown() -- ray.is_initialized() -- @ray.remote decorator -- MyActor.remote() -> _ActorHandle -- actor_handle.method.remote() -> ObjectRef -- ray.get() single and list -- ray.put() -- ray.wait() -""" - -import pytest -import time - -from pulsing.integrations.ray_compat import ray - - -# ============================================================================ -# Fixtures -# ============================================================================ - - -@pytest.fixture -def initialized_ray(): - """Initialize ray-compatible system for testing.""" - ray.init() - yield - ray.shutdown() - - -# ============================================================================ -# Test: ray.init() and ray.shutdown() -# ============================================================================ - - -def test_init_and_shutdown(): - """Test ray.init() and ray.shutdown().""" - ray.init() - assert ray.is_initialized() - ray.shutdown() - assert not ray.is_initialized() - - -def test_init_ignore_reinit_error(): - """Test ray.init(ignore_reinit_error=True).""" - ray.init() - # Should not raise - ray.init(ignore_reinit_error=True) - ray.shutdown() - - -def test_init_raises_on_reinit(): - """Test ray.init() raises if already initialized.""" - ray.init() - try: - with pytest.raises(RuntimeError): - ray.init() - finally: - ray.shutdown() - - -# ============================================================================ -# Test: ray.is_initialized() -# ============================================================================ - - -def test_is_initialized_false(): - """Test ray.is_initialized() returns False when not initialized.""" - assert not ray.is_initialized() - - -def test_is_initialized_true(initialized_ray): - """Test ray.is_initialized() returns True when initialized.""" - assert ray.is_initialized() - - -# ============================================================================ -# Test: @ray.remote decorator -# ============================================================================ - - -@ray.remote -class Counter: - """Counter actor for testing.""" - - def __init__(self, init=0): - self.value = init - - def incr(self): - self.value += 1 - return self.value - - def decr(self): - self.value -= 1 - return self.value - - def get(self): - return self.value - - def add(self, n): - self.value += n - return self.value - - -def test_remote_decorator_class(initialized_ray): - """Test @ray.remote decorator creates actor class wrapper.""" - # Counter should have .remote() method - assert hasattr(Counter, "remote") - - -def test_remote_actor_creation(initialized_ray): - """Test MyActor.remote() creates actor handle.""" - handle = Counter.remote(init=10) - assert handle is not None - - -# ============================================================================ -# Test: actor_handle.method.remote() -> ObjectRef -# ============================================================================ - - -def test_method_remote_returns_objectref(initialized_ray): - """Test actor_handle.method.remote() returns ObjectRef.""" - handle = Counter.remote(init=0) - ref = handle.incr.remote() - assert ref is not None - # ObjectRef should have _get_sync method - assert hasattr(ref, "_get_sync") - - -def test_method_with_args(initialized_ray): - """Test calling method with arguments.""" - handle = Counter.remote(init=0) - ref = handle.add.remote(5) - result = ray.get(ref) - assert result == 5 - - -# ============================================================================ -# Test: ray.get() - single and list -# ============================================================================ - - -def test_get_single(initialized_ray): - """Test ray.get() with single ObjectRef.""" - handle = Counter.remote(init=100) - ref = handle.get.remote() - result = ray.get(ref) - assert result == 100 - - -def test_get_list(initialized_ray): - """Test ray.get() with list of ObjectRefs.""" - handle = Counter.remote(init=0) - refs = [handle.incr.remote() for _ in range(5)] - results = ray.get(refs) - assert len(results) == 5 - # Last result should be 5 (incremented 5 times) - assert results[-1] == 5 - - -def test_get_with_timeout(initialized_ray): - """Test ray.get() with timeout parameter.""" - handle = Counter.remote(init=0) - ref = handle.get.remote() - result = ray.get(ref, timeout=5.0) - assert result == 0 - - -def test_get_multiple_actors(initialized_ray): - """Test ray.get() with refs from multiple actors.""" - h1 = Counter.remote(init=10) - h2 = Counter.remote(init=20) - h3 = Counter.remote(init=30) - - refs = [h1.get.remote(), h2.get.remote(), h3.get.remote()] - results = ray.get(refs) - assert results == [10, 20, 30] - - -# ============================================================================ -# Test: ray.put() -# ============================================================================ - - -def test_put_value(initialized_ray): - """Test ray.put() wraps value as ObjectRef.""" - ref = ray.put(42) - assert ref is not None - result = ray.get(ref) - assert result == 42 - - -def test_put_complex_value(initialized_ray): - """Test ray.put() with complex value.""" - data = {"key": "value", "numbers": [1, 2, 3]} - ref = ray.put(data) - result = ray.get(ref) - assert result == data - - -def test_put_list_of_refs(initialized_ray): - """Test ray.put() and ray.get() with list.""" - refs = [ray.put(i) for i in range(5)] - results = ray.get(refs) - assert results == [0, 1, 2, 3, 4] - - -# ============================================================================ -# Test: ray.wait() -# ============================================================================ - - -def test_wait_basic(initialized_ray): - """Test ray.wait() returns ready and remaining.""" - handle = Counter.remote(init=0) - refs = [handle.incr.remote() for _ in range(3)] - - ready, remaining = ray.wait(refs, num_returns=1) - assert len(ready) >= 1 - assert len(ready) + len(remaining) == 3 - - -def test_wait_num_returns(initialized_ray): - """Test ray.wait() with num_returns parameter.""" - handle = Counter.remote(init=0) - refs = [handle.incr.remote() for _ in range(5)] - - ready, remaining = ray.wait(refs, num_returns=3) - # At most 3 ready (depends on timing) - assert len(ready) <= 3 - - -def test_wait_with_put_refs(initialized_ray): - """Test ray.wait() with ray.put() refs (immediately ready).""" - refs = [ray.put(i) for i in range(5)] - - ready, remaining = ray.wait(refs, num_returns=5) - # put() refs are immediately ready - assert len(ready) == 5 - assert len(remaining) == 0 - - -# ============================================================================ -# Test: Full workflow -# ============================================================================ - - -def test_full_workflow(initialized_ray): - """Test complete Ray-compatible workflow.""" - # Create actors - c1 = Counter.remote(init=0) - c2 = Counter.remote(init=100) - - # Call methods - refs = [ - c1.incr.remote(), - c1.incr.remote(), - c2.decr.remote(), - c2.get.remote(), - ] - - # Get results - results = ray.get(refs) - assert results[0] == 1 # c1.incr() -> 1 - assert results[1] == 2 # c1.incr() -> 2 - assert results[2] == 99 # c2.decr() -> 99 - assert results[3] == 99 # c2.get() -> 99 (after decr) - - -def test_actor_state_persistence(initialized_ray): - """Test actor maintains state across calls.""" - handle = Counter.remote(init=0) - - for i in range(10): - ref = handle.incr.remote() - result = ray.get(ref) - assert result == i + 1 - - final = ray.get(handle.get.remote()) - assert final == 10 diff --git a/tests/python/core/test_init_coverage.py b/tests/python/core/test_init_coverage.py index ccfd6c56d..16925e700 100644 --- a/tests/python/core/test_init_coverage.py +++ b/tests/python/core/test_init_coverage.py @@ -112,27 +112,3 @@ class BadActor(Actor): pass BadActor() - - -# ============================================================================ -# Module exports -# ============================================================================ - - -class TestModuleExports: - def test_core_exports(self): - from pulsing.core import ( - ActorClass, - ActorProxy, - PulsingError, - PulsingRuntimeError, - PulsingActorError, - remote, - resolve, - mount, - unmount, - ) - - assert ActorClass is not None - assert ActorProxy is not None - assert remote is not None diff --git a/tests/python/core/test_remote_edge_cases.py b/tests/python/core/test_remote_edge_cases.py index e74329e68..6f0fec30d 100644 --- a/tests/python/core/test_remote_edge_cases.py +++ b/tests/python/core/test_remote_edge_cases.py @@ -265,10 +265,9 @@ async def test_protocol_call_format(): ) msg = _wrap_call("test_method", (1, 2), {"key": "value"}, True) - assert msg["__pulsing_proto__"] == "1" - assert msg["__pulsing__"]["call"] == "test_method" - assert msg["__pulsing__"]["async"] is True - assert msg["user_data"]["args"] == (1, 2) + assert msg["__call__"] == "test_method" + assert msg["__async__"] is True + assert msg["args"] == (1, 2) method, args, kwargs, is_async = _unwrap_call(msg) assert method == "test_method" @@ -277,14 +276,13 @@ async def test_protocol_call_format(): assert is_async is True resp = _wrap_response(result="success") - assert resp["__pulsing_proto__"] == "1" - assert resp["__pulsing__"]["result"] == "success" + assert resp["__result__"] == "success" result, error = _unwrap_response(resp) assert result == "success" assert error is None err_resp = _wrap_response(error="failed") - assert err_resp["__pulsing__"]["error"] == "failed" + assert err_resp["__error__"] == "failed" result, error = _unwrap_response(err_resp) assert result is None assert error == "failed" diff --git a/tests/python/core/test_remote_system_ops.py b/tests/python/core/test_remote_system_ops.py index d16350b6d..577dab990 100644 --- a/tests/python/core/test_remote_system_ops.py +++ b/tests/python/core/test_remote_system_ops.py @@ -11,68 +11,67 @@ from pulsing.core import init, shutdown, get_system +# ============================================================================ +# Shared fixture — replaces per-test init()/shutdown() boilerplate +# ============================================================================ + + +@pytest.fixture +async def system(): + sys = await init() + yield sys + await shutdown() + + # ============================================================================ # SystemActorProxy operations # ============================================================================ @pytest.mark.asyncio -async def test_system_actor_proxy_legacy_ops(): +async def test_system_actor_proxy_legacy_ops(system): """Test system operations via SystemActorProxy (replaces legacy helper functions).""" from pulsing.core.remote import get_system_actor - system = await init() - try: - proxy = await get_system_actor(system) - - actors = await proxy.list_actors() - assert isinstance(actors, list) - - metrics = await proxy.get_metrics() - assert isinstance(metrics, dict) + proxy = await get_system_actor(system) - info = await proxy.get_node_info() - assert isinstance(info, dict) + actors = await proxy.list_actors() + assert isinstance(actors, list) - result = await proxy.health_check() - assert isinstance(result, dict) + metrics = await proxy.get_metrics() + assert isinstance(metrics, dict) - pong = await proxy.ping() - assert isinstance(pong, dict) - finally: - await shutdown() + info = await proxy.get_node_info() + assert isinstance(info, dict) + result = await proxy.health_check() + assert isinstance(result, dict) -# ============================================================================ -# SystemActorProxy -# ============================================================================ + pong = await proxy.ping() + assert isinstance(pong, dict) @pytest.mark.asyncio -async def test_system_actor_proxy_all_methods(): +async def test_system_actor_proxy_all_methods(system): from pulsing.core.remote import get_system_actor - system = await init() - try: - proxy = await get_system_actor(system) - assert proxy.ref is not None + proxy = await get_system_actor(system) + assert proxy.ref is not None - actors = await proxy.list_actors() - assert isinstance(actors, list) + actors = await proxy.list_actors() + assert isinstance(actors, list) - metrics = await proxy.get_metrics() - assert isinstance(metrics, dict) + metrics = await proxy.get_metrics() + assert isinstance(metrics, dict) - node_info = await proxy.get_node_info() - assert isinstance(node_info, dict) + node_info = await proxy.get_node_info() + assert isinstance(node_info, dict) - health = await proxy.health_check() - assert isinstance(health, dict) + health = await proxy.health_check() + assert isinstance(health, dict) - pong = await proxy.ping() - assert isinstance(pong, dict) - finally: - await shutdown() + pong = await proxy.ping() + assert isinstance(pong, dict) # ============================================================================ @@ -81,7 +80,7 @@ async def test_system_actor_proxy_all_methods(): @pytest.mark.asyncio -async def test_python_actor_service_proxy_list_registry(): +async def test_python_actor_service_proxy_list_registry(system): from pulsing.core.remote import get_python_actor_service, remote @remote @@ -89,20 +88,16 @@ class RegisteredActor: def hello(self): return "hi" - system = await init() - try: - service = await get_python_actor_service(system) - assert service.ref is not None + service = await get_python_actor_service(system) + assert service.ref is not None - classes = await service.list_registry() - assert isinstance(classes, list) - assert any("RegisteredActor" in c for c in classes) - finally: - await shutdown() + classes = await service.list_registry() + assert isinstance(classes, list) + assert any("RegisteredActor" in c for c in classes) @pytest.mark.asyncio -async def test_python_actor_service_proxy_create_actor(): +async def test_python_actor_service_proxy_create_actor(system): from pulsing.core.remote import get_python_actor_service, remote @remote @@ -113,31 +108,21 @@ def __init__(self, val=0): def get_val(self): return self.val - system = await init() - try: - service = await get_python_actor_service(system) - class_name = f"{CreatableActor._cls.__module__}.{CreatableActor._cls.__name__}" - result = await service.create_actor(class_name, name="created_test", val=42) - assert "actor_id" in result - assert "node_id" in result - finally: - await shutdown() + service = await get_python_actor_service(system) + class_name = f"{CreatableActor._cls.__module__}.{CreatableActor._cls.__name__}" + result = await service.create_actor(class_name, name="created_test", val=42) + assert "actor_id" in result + assert "node_id" in result @pytest.mark.asyncio -async def test_python_actor_service_proxy_create_unknown_class(): +async def test_python_actor_service_proxy_create_unknown_class(system): from pulsing.core.remote import get_python_actor_service from pulsing.exceptions import PulsingRuntimeError - system = await init() - try: - service = await get_python_actor_service(system) - with pytest.raises(PulsingRuntimeError): - await service.create_actor( - "nonexistent.module.FakeClass", name="should_fail" - ) - finally: - await shutdown() + service = await get_python_actor_service(system) + with pytest.raises(PulsingRuntimeError): + await service.create_actor("nonexistent.module.FakeClass", name="should_fail") # ============================================================================ @@ -146,7 +131,7 @@ async def test_python_actor_service_proxy_create_unknown_class(): @pytest.mark.asyncio -async def test_resolve_function(): +async def test_resolve_function(system): from pulsing.core import remote from pulsing.core.remote import resolve @@ -155,13 +140,9 @@ class ResolveTarget: def echo(self, msg): return msg - system = await init() - try: - await ResolveTarget.spawn(name="resolve_target_test", public=True) - ref = await resolve("resolve_target_test") - assert ref is not None - finally: - await shutdown() + await ResolveTarget.spawn(name="resolve_target_test", public=True) + ref = await resolve("resolve_target_test") + assert ref is not None @pytest.mark.asyncio @@ -179,8 +160,7 @@ async def test_resolve_without_init(): @pytest.mark.asyncio -async def test_async_on_start(): - """Test that async on_start is properly handled.""" +async def test_async_on_start(system): from pulsing.core import remote on_start_called = [] @@ -193,19 +173,14 @@ async def on_start(self, actor_id): def ping(self): return "pong" - system = await init() - try: - actor = await AsyncOnStartActor.spawn() - assert await actor.ping() == "pong" - await asyncio.sleep(0.05) - assert len(on_start_called) >= 1 - finally: - await shutdown() + actor = await AsyncOnStartActor.spawn() + assert await actor.ping() == "pong" + await asyncio.sleep(0.05) + assert len(on_start_called) >= 1 @pytest.mark.asyncio -async def test_async_on_stop(): - """Test that async on_stop is properly handled.""" +async def test_async_on_stop(system): from pulsing.core import remote on_stop_called = [] @@ -218,15 +193,11 @@ async def on_stop(self): def ping(self): return "pong" - system = await init() - try: - actor = await AsyncOnStopActor.spawn(name="async_stop_test") - assert await actor.ping() == "pong" - await get_system().stop("async_stop_test") - await asyncio.sleep(0.1) - assert "stopped" in on_stop_called - finally: - await shutdown() + actor = await AsyncOnStopActor.spawn(name="async_stop_test") + assert await actor.ping() == "pong" + await get_system().stop("async_stop_test") + await asyncio.sleep(0.1) + assert "stopped" in on_stop_called # ============================================================================ @@ -235,8 +206,7 @@ def ping(self): @pytest.mark.asyncio -async def test_receive_empty_method_name(): - """Empty method name in call should return error response.""" +async def test_receive_empty_method_name(system): from pulsing.core import remote from pulsing.core.remote import _wrap_call @@ -245,20 +215,14 @@ class RawActor: def ping(self): return "pong" - system = await init() - try: - actor = await RawActor.spawn() - msg = _wrap_call("", (), {}, False) - resp = await actor.ref.ask(msg) - assert isinstance(resp, dict) - # Should contain error about invalid method - finally: - await shutdown() + actor = await RawActor.spawn() + msg = _wrap_call("", (), {}, False) + resp = await actor.ref.ask(msg) + assert isinstance(resp, dict) @pytest.mark.asyncio -async def test_receive_private_method_via_raw(): - """Private method call via raw ask should return error.""" +async def test_receive_private_method_via_raw(system): from pulsing.core import remote from pulsing.core.remote import _wrap_call @@ -267,19 +231,14 @@ class RawActor2: def ping(self): return "pong" - system = await init() - try: - actor = await RawActor2.spawn() - msg = _wrap_call("_secret", (), {}, False) - resp = await actor.ref.ask(msg) - assert isinstance(resp, dict) - finally: - await shutdown() + actor = await RawActor2.spawn() + msg = _wrap_call("_secret", (), {}, False) + resp = await actor.ref.ask(msg) + assert isinstance(resp, dict) @pytest.mark.asyncio -async def test_receive_nonexistent_method_via_raw(): - """Nonexistent method call via raw ask should return error.""" +async def test_receive_nonexistent_method_via_raw(system): from pulsing.core import remote from pulsing.core.remote import _wrap_call @@ -288,11 +247,7 @@ class RawActor3: def ping(self): return "pong" - system = await init() - try: - actor = await RawActor3.spawn() - msg = _wrap_call("does_not_exist", (), {}, False) - resp = await actor.ref.ask(msg) - assert isinstance(resp, dict) - finally: - await shutdown() + actor = await RawActor3.spawn() + msg = _wrap_call("does_not_exist", (), {}, False) + resp = await actor.ref.ask(msg) + assert isinstance(resp, dict) diff --git a/tests/python/core/test_remote_unit.py b/tests/python/core/test_remote_unit.py index d2eb0ea37..439cd0810 100644 --- a/tests/python/core/test_remote_unit.py +++ b/tests/python/core/test_remote_unit.py @@ -10,7 +10,6 @@ import pytest from pulsing.core.remote import ( - _PULSING_WIRE_VERSION, _extract_methods, _register_actor_metadata, _unwrap_call, @@ -49,20 +48,19 @@ async def __anext__(self): class TestWrapCall: def test_basic(self): msg = _wrap_call("greet", ("hello",), {"lang": "en"}, False) - assert msg["__pulsing_proto__"] == _PULSING_WIRE_VERSION - assert msg["__pulsing__"]["call"] == "greet" - assert msg["__pulsing__"]["async"] is False - assert msg["user_data"]["args"] == ("hello",) - assert msg["user_data"]["kwargs"] == {"lang": "en"} + assert msg["__call__"] == "greet" + assert msg["__async__"] is False + assert msg["args"] == ("hello",) + assert msg["kwargs"] == {"lang": "en"} def test_async_flag(self): msg = _wrap_call("stream", (), {}, True) - assert msg["__pulsing__"]["async"] is True + assert msg["__async__"] is True def test_empty_args(self): msg = _wrap_call("no_args", (), {}, False) - assert msg["user_data"]["args"] == () - assert msg["user_data"]["kwargs"] == {} + assert msg["args"] == () + assert msg["kwargs"] == {} class TestUnwrapCall: @@ -82,7 +80,7 @@ def test_missing_fields(self): assert is_async is False def test_partial_message(self): - msg = {"__pulsing__": {"call": "foo"}, "user_data": {}} + msg = {"__call__": "foo"} method, args, kwargs, is_async = _unwrap_call(msg) assert method == "foo" assert args == () @@ -98,37 +96,36 @@ def test_partial_message(self): class TestWrapResponse: def test_success(self): resp = _wrap_response(result=42) - assert resp["__pulsing__"]["result"] == 42 - assert resp["__pulsing_proto__"] == _PULSING_WIRE_VERSION + assert resp["__result__"] == 42 def test_error(self): resp = _wrap_response(error="something broke") - assert resp["__pulsing__"]["error"] == "something broke" + assert resp["__error__"] == "something broke" def test_none_result(self): resp = _wrap_response(result=None) - assert resp["__pulsing__"]["result"] is None + assert resp["__result__"] is None class TestUnwrapResponse: - def test_wire_format_result(self): + def test_flat_result(self): resp = _wrap_response(result="ok") result, error = _unwrap_response(resp) assert result == "ok" assert error is None - def test_wire_format_error(self): + def test_flat_error(self): resp = _wrap_response(error="fail") result, error = _unwrap_response(resp) assert result is None assert error == "fail" - def test_message_json_result(self): + def test_rust_json_result(self): result, error = _unwrap_response({"result": "top"}) assert result == "top" assert error is None - def test_message_json_error(self): + def test_rust_json_error(self): result, error = _unwrap_response({"error": "top_err"}) assert result is None assert error == "top_err" @@ -138,26 +135,20 @@ def test_empty_dict(self): assert result is None assert error is None - def test_wire_takes_precedence_over_message_json(self): - resp = {"__pulsing__": {"error": "wire"}, "result": "message_json"} + def test_flat_takes_precedence_over_rust_json(self): + resp = {"__error__": "flat", "result": "rust_json"} result, error = _unwrap_response(resp) - assert error == "wire" + assert error == "flat" assert result is None - def test_non_dict_pulsing_field_falls_back_to_message_json(self): - resp = {"__pulsing__": "not a dict", "result": "fallback"} - result, error = _unwrap_response(resp) - assert result == "fallback" - def test_stream_frame_final(self): - # Stream frames now use __pulsing__ namespace - frame = {"__pulsing__": {"final": True, "result": 42}} + frame = {"__final__": True, "__result__": 42} result, error = _unwrap_response(frame) assert result == 42 assert error is None def test_stream_frame_error(self): - frame = {"__pulsing__": {"error": "stream failed"}} + frame = {"__error__": "stream failed"} result, error = _unwrap_response(frame) assert result is None assert error == "stream failed" diff --git a/tests/python/integrations/test_ray_compat_running_loop.py b/tests/python/integrations/test_ray_compat_running_loop.py deleted file mode 100644 index 2af79e5c9..000000000 --- a/tests/python/integrations/test_ray_compat_running_loop.py +++ /dev/null @@ -1,31 +0,0 @@ -import asyncio - - -def test_ray_compat_init_inside_running_loop(): - """ray.init() should work even when called from within a running event loop. - - This covers environments like Jupyter or pytest-asyncio where an event loop - is already running on the main thread. - """ - from pulsing.integrations.ray_compat import ray - - async def main(): - ray.init() - try: - - @ray.remote - class Counter: - def __init__(self, value=0): - self.value = value - - def inc(self, n=1): - self.value += n - return self.value - - c = Counter.remote(value=1) - assert ray.get(c.inc.remote()) == 2 - assert ray.get(c.inc.remote(10)) == 12 - finally: - ray.shutdown() - - asyncio.run(main()) diff --git a/tests/python/streaming/test_queue.py b/tests/python/streaming/test_queue.py index 22b70a151..65311de22 100644 --- a/tests/python/streaming/test_queue.py +++ b/tests/python/streaming/test_queue.py @@ -7,19 +7,15 @@ - Memory-based storage (default backend) - Streaming and blocking - Distributed consumption (rank/world_size) -- Stress tests (high concurrency, large data) -Note: Persistence tests live in plugin packages (e.g. persisting). +Stress tests are in test_queue_stress.py. """ import asyncio import hashlib -import random import shutil -import string import tempfile import time -from pathlib import Path import pytest @@ -564,379 +560,6 @@ async def test_blocking_read_wakes_on_data(actor_system, temp_storage_path): assert "new_data" in ids -# ============================================================================ -# Stress Tests -# ============================================================================ - - -@pytest.mark.asyncio -async def test_high_concurrency_writes(actor_system, temp_storage_path): - """Stress test: many concurrent writes.""" - writer = await write_queue( - actor_system, - topic="stress_write", - bucket_column="id", - num_buckets=8, - batch_size=100, - storage_path=temp_storage_path, - ) - - num_writers = 10 - records_per_writer = 100 - - async def write_batch(writer_id: int): - results = [] - for i in range(records_per_writer): - result = await writer.put( - { - "id": f"writer_{writer_id}_record_{i}", - "writer_id": writer_id, - "seq": i, - } - ) - results.append(result) - return results - - # Concurrent writes - start = time.time() - tasks = [write_batch(i) for i in range(num_writers)] - all_results = await asyncio.gather(*tasks) - elapsed = time.time() - start - - # Verify all writes succeeded - total_writes = sum(len(r) for r in all_results) - assert total_writes == num_writers * records_per_writer - - for results in all_results: - for result in results: - assert result["status"] == "ok" - - print( - f"\nHigh concurrency writes: {total_writes} records in {elapsed:.2f}s " - f"({total_writes / elapsed:.0f} records/s)" - ) - - -@pytest.mark.asyncio -async def test_high_concurrency_reads(actor_system, temp_storage_path): - """Stress test: many concurrent reads.""" - writer = await write_queue( - actor_system, - topic="stress_read", - bucket_column="id", - num_buckets=4, - batch_size=50, - storage_path=temp_storage_path, - ) - - # Write test data - for i in range(500): - await writer.put({"id": f"record_{i}", "value": i}) - await writer.flush() - - num_readers = 10 - - async def read_all(reader_id: int): - reader = await read_queue( - actor_system, - topic="stress_read", - num_buckets=4, - storage_path=temp_storage_path, - ) - records = await reader.get(limit=500) - return reader_id, len(records) - - # Concurrent reads - start = time.time() - tasks = [read_all(i) for i in range(num_readers)] - results = await asyncio.gather(*tasks) - elapsed = time.time() - start - - # All readers should get data - for reader_id, count in results: - assert count > 0, f"Reader {reader_id} got no data" - - total_records = sum(count for _, count in results) - print( - f"\nHigh concurrency reads: {num_readers} readers, {total_records} total records " - f"in {elapsed:.2f}s" - ) - - -@pytest.mark.asyncio -async def test_large_records(actor_system, temp_storage_path): - """Stress test: large record payloads.""" - writer = await write_queue( - actor_system, - topic="large_records", - bucket_column="id", - num_buckets=4, - batch_size=10, - storage_path=temp_storage_path, - ) - - # Generate large records (1KB each) - def generate_large_record(i: int) -> dict: - return { - "id": f"large_{i}", - "data": "".join(random.choices(string.ascii_letters, k=1000)), - "seq": i, - } - - num_records = 100 - - start = time.time() - for i in range(num_records): - await writer.put(generate_large_record(i)) - await writer.flush() - elapsed = time.time() - start - - print(f"\nLarge records: {num_records} x 1KB records in {elapsed:.2f}s") - - # Verify read - reader = await read_queue( - actor_system, - topic="large_records", - num_buckets=4, - storage_path=temp_storage_path, - ) - - records = await reader.get(limit=num_records) - assert len(records) == num_records - - -@pytest.mark.asyncio -async def test_producer_consumer_stress(actor_system, temp_storage_path): - """Stress test: concurrent producers and consumers.""" - topic = "producer_consumer_stress" - num_buckets = 4 - - writer = await write_queue( - actor_system, - topic=topic, - bucket_column="id", - num_buckets=num_buckets, - batch_size=50, - storage_path=temp_storage_path, - ) - - num_producers = 5 - records_per_producer = 100 - num_consumers = 3 - - produced_ids = set() - consumed_ids = set() - produce_done = asyncio.Event() - lock = asyncio.Lock() - - async def producer(producer_id: int): - nonlocal produced_ids - for i in range(records_per_producer): - record_id = f"p{producer_id}_r{i}" - await writer.put({"id": record_id, "producer": producer_id, "seq": i}) - async with lock: - produced_ids.add(record_id) - await asyncio.sleep(0.001) # Small delay - if producer_id == num_producers - 1: - await writer.flush() - produce_done.set() - - async def consumer(consumer_id: int): - nonlocal consumed_ids - reader = await read_queue( - actor_system, - topic=topic, - rank=consumer_id, - world_size=num_consumers, - num_buckets=num_buckets, - storage_path=temp_storage_path, - ) - - while True: - records = await reader.get(limit=50, wait=True, timeout=0.5) - if records: - async with lock: - for r in records: - consumed_ids.add(r["id"]) - elif produce_done.is_set(): - # One more read after producers done - records = await reader.get(limit=100) - async with lock: - for r in records: - consumed_ids.add(r["id"]) - break - - # Start producers and consumers - start = time.time() - - producer_tasks = [asyncio.create_task(producer(i)) for i in range(num_producers)] - consumer_tasks = [asyncio.create_task(consumer(i)) for i in range(num_consumers)] - - # Wait for producers - await asyncio.gather(*producer_tasks) - - # Give consumers time to finish - await asyncio.sleep(1.0) - - # Cancel remaining consumer tasks - for task in consumer_tasks: - task.cancel() - - elapsed = time.time() - start - - total_produced = num_producers * records_per_producer - - print("\nProducer-Consumer stress test:") - print(f" Produced: {len(produced_ids)} records") - print(f" Consumed: {len(consumed_ids)} records") - print(f" Elapsed: {elapsed:.2f}s") - print(f" Throughput: {len(produced_ids) / elapsed:.0f} records/s") - - assert len(produced_ids) == total_produced - - -@pytest.mark.asyncio -async def test_many_buckets(actor_system, temp_storage_path): - """Stress test: many buckets.""" - num_buckets = 32 - - writer = await write_queue( - actor_system, - topic="many_buckets", - bucket_column="id", - num_buckets=num_buckets, - batch_size=20, - storage_path=temp_storage_path, - ) - - # Write to fill all buckets - num_records = 500 - for i in range(num_records): - await writer.put({"id": f"record_{i}", "value": i}) - await writer.flush() - - # Get stats - stats = await writer.stats() - - # Count non-empty buckets - non_empty = sum(1 for b in stats["buckets"].values() if b.get("total_count", 0) > 0) - - print(f"\nMany buckets test: {num_buckets} buckets, {non_empty} non-empty") - - # Most buckets should have data (probabilistic) - assert non_empty >= num_buckets // 2 - - -@pytest.mark.asyncio -async def test_rapid_flush_cycles(actor_system, temp_storage_path): - """Stress test: rapid write-flush cycles.""" - writer = await write_queue( - actor_system, - topic="rapid_flush", - bucket_column="id", - num_buckets=4, - batch_size=5, # Small batch size for frequent auto-flush - storage_path=temp_storage_path, - ) - - num_cycles = 50 - records_per_cycle = 10 - - start = time.time() - for cycle in range(num_cycles): - for i in range(records_per_cycle): - await writer.put({"id": f"c{cycle}_r{i}", "cycle": cycle, "seq": i}) - await writer.flush() - elapsed = time.time() - start - - total_records = num_cycles * records_per_cycle - - print( - f"\nRapid flush: {num_cycles} cycles, {total_records} records in {elapsed:.2f}s" - ) - - # Verify all data readable - reader = await read_queue( - actor_system, - topic="rapid_flush", - num_buckets=4, - storage_path=temp_storage_path, - ) - - all_records = [] - while True: - records = await reader.get(limit=100) - if not records: - break - all_records.extend(records) - - assert len(all_records) == total_records - - -@pytest.mark.asyncio -async def test_data_integrity_under_stress(actor_system, temp_storage_path): - """Stress test: verify data integrity under concurrent load.""" - writer = await write_queue( - actor_system, - topic="integrity_test", - bucket_column="id", - num_buckets=4, - batch_size=20, - storage_path=temp_storage_path, - ) - - # Write records with unique checksums - num_records = 200 - expected_data = {} - - for i in range(num_records): - record_id = f"integrity_{i}" - value = random.randint(0, 1000000) - checksum = hashlib.md5(f"{record_id}:{value}".encode()).hexdigest() - - await writer.put( - { - "id": record_id, - "value": value, - "checksum": checksum, - } - ) - expected_data[record_id] = (value, checksum) - - await writer.flush() - - # Read and verify - reader = await read_queue( - actor_system, - topic="integrity_test", - num_buckets=4, - storage_path=temp_storage_path, - ) - - all_records = [] - while True: - records = await reader.get(limit=100) - if not records: - break - all_records.extend(records) - - # Verify integrity - assert len(all_records) == num_records - - for record in all_records: - record_id = record["id"] - expected_value, expected_checksum = expected_data[record_id] - - # Verify checksum - actual_checksum = hashlib.md5( - f"{record_id}:{record['value']}".encode() - ).hexdigest() - assert ( - record["checksum"] == expected_checksum - ), f"Checksum mismatch for {record_id}" - assert actual_checksum == expected_checksum, f"Value corruption for {record_id}" - - # ============================================================================ # BucketStorage Direct Tests # ============================================================================ @@ -1037,7 +660,6 @@ def test_sync_queue_standalone(): import tempfile import shutil import threading - import asyncio temp_dir = tempfile.mkdtemp(prefix="sync_test_") @@ -1105,7 +727,6 @@ def test_sync_writer_reader_standalone(): import tempfile import shutil import threading - import asyncio temp_dir = tempfile.mkdtemp(prefix="sync_wr_test_") @@ -1172,7 +793,6 @@ def test_sync_reader_offset_standalone(): import tempfile import shutil import threading - import asyncio temp_dir = tempfile.mkdtemp(prefix="sync_offset_test_") diff --git a/tests/python/streaming/test_queue_stress.py b/tests/python/streaming/test_queue_stress.py new file mode 100644 index 000000000..cb040f0f4 --- /dev/null +++ b/tests/python/streaming/test_queue_stress.py @@ -0,0 +1,387 @@ +""" +Stress tests for the Pulsing Distributed Memory Queue. + +Separated from test_queue.py for faster CI — run with: + pytest tests/python/streaming/test_queue_stress.py -v +""" + +import asyncio +import hashlib +import random +import shutil +import string +import tempfile +import time + +import pytest + +import pulsing as pul +from pulsing.streaming import read_queue, write_queue + + +# ============================================================================ +# Fixtures +# ============================================================================ + + +@pytest.fixture +async def actor_system(): + system = await pul.actor_system() + yield system + await system.shutdown() + + +@pytest.fixture +def temp_storage_path(): + path = tempfile.mkdtemp(prefix="queue_stress_") + yield path + shutil.rmtree(path, ignore_errors=True) + + +# ============================================================================ +# Stress Tests +# ============================================================================ + + +@pytest.mark.asyncio +async def test_high_concurrency_writes(actor_system, temp_storage_path): + """Stress test: many concurrent writes.""" + writer = await write_queue( + actor_system, + topic="stress_write", + bucket_column="id", + num_buckets=8, + batch_size=100, + storage_path=temp_storage_path, + ) + + num_writers = 10 + records_per_writer = 100 + + async def write_batch(writer_id: int): + results = [] + for i in range(records_per_writer): + result = await writer.put( + { + "id": f"writer_{writer_id}_record_{i}", + "writer_id": writer_id, + "seq": i, + } + ) + results.append(result) + return results + + start = time.time() + tasks = [write_batch(i) for i in range(num_writers)] + all_results = await asyncio.gather(*tasks) + elapsed = time.time() - start + + total_writes = sum(len(r) for r in all_results) + assert total_writes == num_writers * records_per_writer + + for results in all_results: + for result in results: + assert result["status"] == "ok" + + print( + f"\nHigh concurrency writes: {total_writes} records in {elapsed:.2f}s " + f"({total_writes / elapsed:.0f} records/s)" + ) + + +@pytest.mark.asyncio +async def test_high_concurrency_reads(actor_system, temp_storage_path): + """Stress test: many concurrent reads.""" + writer = await write_queue( + actor_system, + topic="stress_read", + bucket_column="id", + num_buckets=4, + batch_size=50, + storage_path=temp_storage_path, + ) + + for i in range(500): + await writer.put({"id": f"record_{i}", "value": i}) + await writer.flush() + + num_readers = 10 + + async def read_all(reader_id: int): + reader = await read_queue( + actor_system, + topic="stress_read", + num_buckets=4, + storage_path=temp_storage_path, + ) + records = await reader.get(limit=500) + return reader_id, len(records) + + start = time.time() + tasks = [read_all(i) for i in range(num_readers)] + results = await asyncio.gather(*tasks) + elapsed = time.time() - start + + for reader_id, count in results: + assert count > 0, f"Reader {reader_id} got no data" + + total_records = sum(count for _, count in results) + print( + f"\nHigh concurrency reads: {num_readers} readers, {total_records} total records " + f"in {elapsed:.2f}s" + ) + + +@pytest.mark.asyncio +async def test_large_records(actor_system, temp_storage_path): + """Stress test: large record payloads.""" + writer = await write_queue( + actor_system, + topic="large_records", + bucket_column="id", + num_buckets=4, + batch_size=10, + storage_path=temp_storage_path, + ) + + def generate_large_record(i: int) -> dict: + return { + "id": f"large_{i}", + "data": "".join(random.choices(string.ascii_letters, k=1000)), + "seq": i, + } + + num_records = 100 + + start = time.time() + for i in range(num_records): + await writer.put(generate_large_record(i)) + await writer.flush() + elapsed = time.time() - start + + print(f"\nLarge records: {num_records} x 1KB records in {elapsed:.2f}s") + + reader = await read_queue( + actor_system, + topic="large_records", + num_buckets=4, + storage_path=temp_storage_path, + ) + + records = await reader.get(limit=num_records) + assert len(records) == num_records + + +@pytest.mark.asyncio +async def test_producer_consumer_stress(actor_system, temp_storage_path): + """Stress test: concurrent producers and consumers.""" + topic = "producer_consumer_stress" + num_buckets = 4 + + writer = await write_queue( + actor_system, + topic=topic, + bucket_column="id", + num_buckets=num_buckets, + batch_size=50, + storage_path=temp_storage_path, + ) + + num_producers = 5 + records_per_producer = 100 + num_consumers = 3 + + produced_ids = set() + consumed_ids = set() + produce_done = asyncio.Event() + lock = asyncio.Lock() + + async def producer(producer_id: int): + nonlocal produced_ids + for i in range(records_per_producer): + record_id = f"p{producer_id}_r{i}" + await writer.put({"id": record_id, "producer": producer_id, "seq": i}) + async with lock: + produced_ids.add(record_id) + await asyncio.sleep(0.001) + if producer_id == num_producers - 1: + await writer.flush() + produce_done.set() + + async def consumer(consumer_id: int): + nonlocal consumed_ids + reader = await read_queue( + actor_system, + topic=topic, + rank=consumer_id, + world_size=num_consumers, + num_buckets=num_buckets, + storage_path=temp_storage_path, + ) + + while True: + records = await reader.get(limit=50, wait=True, timeout=0.5) + if records: + async with lock: + for r in records: + consumed_ids.add(r["id"]) + elif produce_done.is_set(): + records = await reader.get(limit=100) + async with lock: + for r in records: + consumed_ids.add(r["id"]) + break + + start = time.time() + + producer_tasks = [asyncio.create_task(producer(i)) for i in range(num_producers)] + consumer_tasks = [asyncio.create_task(consumer(i)) for i in range(num_consumers)] + + await asyncio.gather(*producer_tasks) + await asyncio.sleep(1.0) + + for task in consumer_tasks: + task.cancel() + + elapsed = time.time() - start + + total_produced = num_producers * records_per_producer + + print("\nProducer-Consumer stress test:") + print(f" Produced: {len(produced_ids)} records") + print(f" Consumed: {len(consumed_ids)} records") + print(f" Elapsed: {elapsed:.2f}s") + print(f" Throughput: {len(produced_ids) / elapsed:.0f} records/s") + + assert len(produced_ids) == total_produced + + +@pytest.mark.asyncio +async def test_many_buckets(actor_system, temp_storage_path): + """Stress test: many buckets.""" + num_buckets = 32 + + writer = await write_queue( + actor_system, + topic="many_buckets", + bucket_column="id", + num_buckets=num_buckets, + batch_size=20, + storage_path=temp_storage_path, + ) + + num_records = 500 + for i in range(num_records): + await writer.put({"id": f"record_{i}", "value": i}) + await writer.flush() + + stats = await writer.stats() + non_empty = sum(1 for b in stats["buckets"].values() if b.get("total_count", 0) > 0) + + print(f"\nMany buckets test: {num_buckets} buckets, {non_empty} non-empty") + assert non_empty >= num_buckets // 2 + + +@pytest.mark.asyncio +async def test_rapid_flush_cycles(actor_system, temp_storage_path): + """Stress test: rapid write-flush cycles.""" + writer = await write_queue( + actor_system, + topic="rapid_flush", + bucket_column="id", + num_buckets=4, + batch_size=5, + storage_path=temp_storage_path, + ) + + num_cycles = 50 + records_per_cycle = 10 + + start = time.time() + for cycle in range(num_cycles): + for i in range(records_per_cycle): + await writer.put({"id": f"c{cycle}_r{i}", "cycle": cycle, "seq": i}) + await writer.flush() + elapsed = time.time() - start + + total_records = num_cycles * records_per_cycle + + print( + f"\nRapid flush: {num_cycles} cycles, {total_records} records in {elapsed:.2f}s" + ) + + reader = await read_queue( + actor_system, + topic="rapid_flush", + num_buckets=4, + storage_path=temp_storage_path, + ) + + all_records = [] + while True: + records = await reader.get(limit=100) + if not records: + break + all_records.extend(records) + + assert len(all_records) == total_records + + +@pytest.mark.asyncio +async def test_data_integrity_under_stress(actor_system, temp_storage_path): + """Stress test: verify data integrity under concurrent load.""" + writer = await write_queue( + actor_system, + topic="integrity_test", + bucket_column="id", + num_buckets=4, + batch_size=20, + storage_path=temp_storage_path, + ) + + num_records = 200 + expected_data = {} + + for i in range(num_records): + record_id = f"integrity_{i}" + value = random.randint(0, 1000000) + checksum = hashlib.md5(f"{record_id}:{value}".encode()).hexdigest() + + await writer.put( + { + "id": record_id, + "value": value, + "checksum": checksum, + } + ) + expected_data[record_id] = (value, checksum) + + await writer.flush() + + reader = await read_queue( + actor_system, + topic="integrity_test", + num_buckets=4, + storage_path=temp_storage_path, + ) + + all_records = [] + while True: + records = await reader.get(limit=100) + if not records: + break + all_records.extend(records) + + assert len(all_records) == num_records + + for record in all_records: + record_id = record["id"] + expected_value, expected_checksum = expected_data[record_id] + + actual_checksum = hashlib.md5( + f"{record_id}:{record['value']}".encode() + ).hexdigest() + assert ( + record["checksum"] == expected_checksum + ), f"Checksum mismatch for {record_id}" + assert actual_checksum == expected_checksum, f"Value corruption for {record_id}" From f5a3463dd0f191c627f0dc41cfd3aca72d3f6d77 Mon Sep 17 00:00:00 2001 From: Reiase Date: Sun, 1 Mar 2026 15:50:03 +0800 Subject: [PATCH 4/5] Refactor Python API documentation for Pulsing framework - Streamlined the Python API documentation by reorganizing sections for clarity, including lifecycle management, actor definition, and usage examples. - Enhanced actor management details, including initialization, spawning, and resolution methods, to improve user understanding. - Updated examples to reflect new API structures and best practices, ensuring consistency and clarity in usage. - Removed outdated sections and consolidated information to provide a more cohesive and user-friendly documentation experience. --- llms.binding.md | 578 +++++++++++++----------------------------------- 1 file changed, 151 insertions(+), 427 deletions(-) diff --git a/llms.binding.md b/llms.binding.md index f74c36560..768ea27df 100644 --- a/llms.binding.md +++ b/llms.binding.md @@ -31,429 +31,209 @@ await pul.shutdown() ## Python API -You must call `await pul.init()` before using `spawn`, `resolve`, or other APIs. +### 1. Init & Lifecycle ```python import pulsing as pul -# ── Lifecycle ── - await pul.init( - addr: str | None = None, + addr: str | None = None, # Bind address; None = standalone *, - seeds: list[str] | None = None, - passphrase: str | None = None + seeds: list[str] | None = None, # Seed nodes (gossip mode) + passphrase: str | None = None, # TLS passphrase + head_addr: str | None = None, # Worker mode: head node address + is_head_node: bool = False, # Head mode (mutually exclusive with head_addr) ) - await pul.shutdown() -# ── Define actor with @pul.remote ── +pul.get_system() -> ActorSystem # Get global system (raises if not init) +pul.is_initialized() -> bool +``` +### 2. Define & Use Actors + +```python @pul.remote class Counter: def __init__(self, init=0): self.value = init - def incr(self): # sync method - self.value += 1 - return self.value + def incr(self): # sync: blocks actor, sequential + self.value += 1; return self.value - async def fetch_and_add(self, url): # async method + async def fetch_and_add(self, url): # async: non-blocking during await data = await http_get(url) - self.value += data - return self.value + self.value += data; return self.value -# ── Create and call ── + async def stream(self, n): # generator → auto streaming response + for i in range(n): yield f"chunk_{i}" -counter = await Counter.spawn(name="counter") # create actor, returns typed proxy -result = await counter.incr() # call method directly - -# ── Resolve existing actor (e.g. from another process / node) ── -# Prefer typed proxy via Counter.resolve() when you know the actor type. -# Fall back to ref.as_any() when the remote type is unknown. - -# 1. Typed proxy (recommended) -proxy = await Counter.resolve("counter") -result = await proxy.incr() - -# 2. Typed proxy — manual bind -ref = await pul.resolve("counter", timeout=30) -proxy = ref.as_type(Counter) -result = await proxy.incr() - -# 3. Untyped proxy — when remote type is unknown -ref = await pul.resolve("service_name") -proxy = ref.as_any() -result = await proxy.any_method(args) +# ── Spawn ── +counter = await Counter.spawn(name="counter") +counter = await Counter.spawn(name="c2", placement="remote") # remote node +counter = await Counter.spawn(name="c3", placement=3) # specific node_id +counter = await Counter.spawn(system=system, name="c4") # explicit ActorSystem +# ── Call ── +result = await counter.incr() +async for chunk in counter.stream(10): print(chunk) + +# ── Resolve (cross-process / cross-node) ── +proxy = await Counter.resolve("counter") # typed proxy (recommended) +proxy = await Counter.resolve("counter", node_id=2, timeout=30) # with options +ref = await pul.resolve("counter", timeout=30) # untyped ActorRef +proxy = ref.as_type(Counter) # bind type +proxy = ref.as_any() # or use as_any() ``` -### Ray Integration - -`pul.mount` registers any Python object as a Pulsing actor, enabling tight integration between Ray actors and Pulsing. - -**Running Pulsing in a Ray cluster:** Use `pulsing.bootstrap(ray=True, torchrun=False, wait_timeout=...)` in the driver; set `ray.init(runtime_env={"worker_process_setup_hook": init_in_ray})` so every worker runs `init_in_ray` on startup. See [Cluster Networking](../docs/src/quickstart/cluster_networking.md) and `pulsing.bootstrap` for details. - -```python -import pulsing as pul - -# Mount object onto Pulsing network (sync, can be called in __init__) -pul.mount( - instance: Any, # Object to mount - *, - name: str, # Pulsing name, used for resolve discovery - public: bool = True, # Whether discoverable by other cluster nodes -) -> None -# Internally: -# 1. Initialize Pulsing (if not yet initialized in this process) -# 2. Wrap instance as a Pulsing actor -# 3. Register on Pulsing network, gossip broadcasts the name - -# Unmount (call when actor is destroyed) -pul.unmount(name: str) -> None - -# Cleanup Pulsing state in Ray environment (call before ray.shutdown()) -pul.cleanup_ray() -> None -``` -Example: Ray handles process scheduling, Pulsing handles inter-actor communication. +### 3. Supervision & Restart ```python -import ray, pulsing as pul - -@ray.remote -class Worker: - def __init__(self, name): - pul.mount(self, name=name) # One line to join Pulsing - - async def call_peer(self, peer_name, msg): - proxy = (await pul.resolve(peer_name, timeout=30)).as_any() - return await proxy.greet(msg) # Cross-process Pulsing call - - async def greet(self, msg): - return f"hello from {self.name}: {msg}" - -ray.init() -workers = [Worker.remote(f"w{i}") for i in range(3)] -ray.get(workers[0].call_peer.remote("w1", "hi")) # => "hello from w1: hi" -pul.cleanup_ray() +@pul.remote( + restart_policy="on_failure", # "never" | "on_failure" / "on-failure" | "always" + max_restarts=3, min_backoff=0.1, max_backoff=30.0, +) +class ResilientWorker: + def process(self, data): return heavy_computation(data) ``` -### Under the Hood: Actor System & Low-level APIs - -The global API is backed by an `ActorSystem` instance. You can create one explicitly when you need multiple systems or finer control. The low-level `spawn`/`refer`/`resolve` APIs operate on `ActorRef` (not typed proxy) and require actors to implement a `receive(self, msg)` method. +### 4. Queue (distributed data pipeline) ```python -import pulsing as pul - -# ── Explicit ActorSystem ── - -system = await pul.actor_system( - addr: str | None = None, - *, - seeds: list[str] | None = None, - passphrase: str | None = None -) -> ActorSystem - -await system.shutdown() - -# ── Low-level spawn (actor must have receive method) ── - -actorref = await pul.spawn( # global system - actor: Actor, - *, - name: str | None = None, - public: bool = False, - restart_policy: str = "never", - max_restarts: int = 3, - min_backoff: float = 0.1, - max_backoff: float = 30.0 -) -> ActorRef - -actorref = await system.spawn( # explicit system, same signature - actor: Actor, ... -) -> ActorRef - -# ── Low-level resolve / refer ── - -actorref = await pul.refer(actorid: ActorId | str) -> ActorRef -actorref = await pul.resolve(name: str, *, node_id=None, timeout=None) -> ActorRef -actorref = await system.resolve(name: str, *, node_id=None) -> ActorRef - -# ── ActorRef message passing ── - -response = await actorref.ask(request: Any) -> Any -await actorref.tell(msg: Any) -> None - -# ── @pul.remote with explicit system ── - -counter = await Counter.local(system, name="counter") # spawn on explicit system -result = await counter.incr() +writer = await pul.queue.write("my_queue", bucket_column="id", num_buckets=4, + batch_size=100, backend="memory") # -> Queue +await writer.put({"id": "u1", "data": "hello"}) +await writer.flush() -# Queue / Topic on explicit system (same API as pul.queue / pul.topic) -writer = await system.queue.write("my_queue") -reader = await system.queue.read("my_queue") -writer = await system.topic.write("events") -reader = await system.topic.read("events") +reader = await pul.queue.read("my_queue", rank=0, world_size=4) # -> QueueReader +records = await reader.get(limit=100, wait=False) ``` -### Actor Behavior - -#### Basic Actor (using `receive` method) +### 5. Topic (pub/sub) ```python -from pulsing.actor import Actor - -class EchoActor(Actor): - """receive method - sync or async, framework auto-detects""" +writer = await pul.topic.write("events") +await writer.publish({"type": "login", "user": "alice"}) - # Option 1: Synchronous - def receive(self, msg): - return msg +reader = await pul.topic.read("events") - # Option 2: Asynchronous (use when you need await) - async def receive(self, msg): - result = await some_async_operation() - return result +@reader.on_message +async def handle(msg): print(msg) -class FireAndForget(Actor): - """No return value (suitable for tell calls)""" - def receive(self, msg): - print(f"Received: {msg}") - # No return value +await reader.start() ``` -**Note:** `receive` can be `def` or `async def`, Pulsing auto-detects and handles both correctly. -Only use `async def` when the method body needs to `await` other coroutines. - -#### @pul.remote Decorator (Recommended) +### 6. Cluster & Integrations ```python -import pulsing as pul - -@pul.remote -class Counter: - def __init__(self, init=0): - self.value = init - - # Sync method - blocks actor, requests execute sequentially - # Best for: fast computation, state mutation - def incr(self): - self.value += 1 - return self.value - - # Async method - non-blocking, can handle other requests during await - # Best for: IO-bound operations (network, database) - async def fetch_and_add(self, url): - data = await http_get(url) # Other requests served during await - self.value += data - return self.value - - # No return value - suitable for tell() calls - def reset(self): - self.value = 0 - -# Sync vs async concurrency behavior: -# - def method(): Blocks actor, requests queued sequentially -# - async def method(): Non-blocking, concurrent during await - -# Usage -counter = await Counter.spawn(name="counter") -result = await counter.incr() # ask mode, waits for return -await counter.reset() # No return value, but still waits for completion +# ── Auto cluster formation (Ray / torchrun) ── +pul.bootstrap(*, ray=True, torchrun=True, on_ready=None, wait_timeout=None) -> bool | None + +# ── Per-worker init ── +pul.init_inside_ray() # join cluster from Ray worker +pul.init_inside_torchrun() # join cluster from torchrun worker +pul.cleanup_ray() # call before ray.shutdown() + +# Typical Ray driver: +from pulsing.integrations.ray import init_in_ray +ray.init(runtime_env={"worker_process_setup_hook": init_in_ray}) +pul.bootstrap(ray=True, wait_timeout=30) ``` -#### Message Passing Patterns +**mount / unmount** — register any object as a Pulsing actor (useful in Ray actors): ```python -# ask - send message and wait for response -response = await actorref.ask({"action": "get"}) +pul.mount(instance, *, name: str, public: bool = True) # sync, can be called in __init__ +pul.unmount(name: str) -# tell - send message, don't wait (fire-and-forget) -await actorref.tell({"action": "log", "data": "hello"}) -``` +# Example +@ray.remote +class Worker: + def __init__(self, name): + pul.mount(self, name=name) -#### Optional Zerocopy Descriptor Protocol + async def call_peer(self, peer, msg): + return await (await pul.resolve(peer, timeout=30)).as_any().greet(msg) +``` -Pulsing supports an optional zerocopy fast path to bypass pickle serialization for eligible -Python objects. If the object does not provide the protocol, Pulsing falls back to existing -pickle-based transport automatically. +### 7. Error Handling ```python -from pulsing.core import ZeroCopyDescriptor - -class MyTensorLike: - def __zerocopy__(self, ctx): - return ZeroCopyDescriptor( - buffers=[memoryview(self.buffer)], - dtype="float32", - shape=[1024], - strides=[4], - transport="inline", # e.g. inline/shm - checksum=None, # optional - version=1, - ) +from pulsing import ( + PulsingError, # Base + PulsingRuntimeError, # Framework-level (actor not found, transport, cluster) + PulsingActorError, # User actor execution errors + PulsingBusinessError, # Business logic (code, message, details) + PulsingSystemError, # Internal (error, recoverable) + PulsingTimeoutError, # Timeout (operation, duration_ms) + PulsingUnsupportedError, # Unsupported operation +) ``` -Rules: - -- `__zerocopy__(ctx)` is optional; missing protocol means fallback to pickle. -- Descriptor is the single source of truth (no separate `__metadata__`). -- Zerocopy is an optimization path for reduced serialization and buffer copies. -- `buffers` should provide contiguous Python buffer views (e.g. `memoryview`, tensor buffer, `bytearray`) to avoid extra Python-side copy. -- Payload validation failure or unsupported descriptor always falls back to pickle unless explicitly forced by runtime config. - -**Automatic stream transfer for large payloads:** +### 8. Advanced: Low-level API & Actor System -When the total buffer size exceeds a threshold (default 64 KB), Pulsing automatically uses a descriptor-first stream transfer instead of packing everything into a single message: - -1. A lightweight descriptor header (dtype, shape, strides, buffer lengths) is sent as the first stream frame. -2. Buffer data follows as a sequence of raw chunk frames, each up to `PULSING_ZEROCOPY_CHUNK_BYTES` (default 1 MB). -3. The receiver pre-allocates buffers based on the descriptor and fills them incrementally as chunks arrive. - -Small payloads below the threshold are still sent as a single message with descriptor + data packed together. This is transparent to the user — `actor.receive()` always gets a `ZeroCopyDescriptor` regardless of the transfer mode. - -Environment variables: -- `PULSING_ZEROCOPY`: `auto` (default) / `off` / `force` -- `PULSING_ZEROCOPY_STREAM_THRESHOLD`: minimum buffer size in bytes to trigger stream transfer (default 65536) -- `PULSING_ZEROCOPY_CHUNK_BYTES`: chunk size in bytes for stream transfer (default 1048576, minimum 4096) - -#### Actor Lifecycle +The global API is backed by an `ActorSystem`. Create one explicitly for finer control. Low-level APIs operate on `ActorRef` and require a `receive(self, msg)` method. ```python -from pulsing.actor import Actor, ActorId +system = await pul.actor_system(addr=..., seeds=..., passphrase=...) -class MyActor(Actor): - def on_start(self, actor_id: ActorId): - """Called when actor starts""" - print(f"Started: {actor_id}") +# Low-level spawn (actor must have receive method) +ref = await pul.spawn(actor, *, name=None, public=False, + restart_policy="never", max_restarts=3, + min_backoff=0.1, max_backoff=30.0) -> ActorRef - def on_stop(self): - """Called when actor stops""" - print("Stopping...") +# Message passing on ActorRef +response = await ref.ask(request) # request-response +await ref.tell(msg) # fire-and-forget - def metadata(self) -> dict[str, str]: - """Return actor metadata (for diagnostics)""" - return {"type": "worker", "version": "1.0"} +# Resolve / refer +ref = await pul.refer(actor_id) # by ActorId +ref = await pul.resolve(name, *, node_id=None, timeout=None) - async def receive(self, msg): - return msg +# Queue / Topic on explicit system +writer = await system.queue.write("q"); reader = await system.queue.read("q") +writer = await system.topic.write("t"); reader = await system.topic.read("t") ``` -#### Supervision and Restart Policies +**Actor base class** (for low-level use): ```python -@pul.remote( - restart_policy="on_failure", # "never" | "on_failure" | "always" - max_restarts=3, # Maximum restart attempts - min_backoff=0.1, # Minimum backoff time (seconds) - max_backoff=30.0, # Maximum backoff time (seconds) -) -class ResilientWorker: - def process(self, data): - # Actor auto-restarts on exception - return heavy_computation(data) -``` - -#### Streaming Responses +from pulsing.core import Actor, ActorId -```python -@pul.remote -class StreamingService: - # Return a generator, Pulsing auto-handles as streaming response - async def generate_stream(self, n): - for i in range(n): - yield f"chunk_{i}" - - # Sync generators also supported - def sync_stream(self, n): - for i in range(n): - yield f"item_{i}" - -# Usage -service = await StreamingService.spawn() - -# Client consumes stream -async for chunk in service.generate_stream(10): - print(chunk) # chunk_0, chunk_1, ... +class MyActor(Actor): + def on_start(self, actor_id: ActorId): ... # lifecycle hook + def on_stop(self): ... + def metadata(self) -> dict[str, str]: ... # diagnostics + async def receive(self, msg): return msg # sync or async, auto-detected ``` -**Note:** For `@pul.remote` classes, simply return a generator (sync or async) and Pulsing auto-detects and handles it as a streaming response. - -### Queue API - -Distributed queue with bucket-based partitioning, for data pipelines: +**Zerocopy** — optional fast path bypassing pickle for buffer objects: ```python -import pulsing as pul - -await pul.init() - -# ── Write ── -writer = await pul.queue.write( - "my_queue", - *, - bucket_column: str = "id", # Column for partitioning - num_buckets: int = 4, - batch_size: int = 100, - storage_path: str | None = None, - backend: str = "memory", # Pluggable: "memory" or custom -) -> QueueWriter - -await writer.put({"id": "u1", "data": "hello"}) -await writer.put([{"id": "u1", "data": "a"}, {"id": "u2", "data": "b"}]) -await writer.flush() - -# ── Read ── -reader = await pul.queue.read( - "my_queue", - *, - bucket_id: int | None = None, - bucket_ids: list[int] | None = None, - rank: int | None = None, # For distributed consumption - world_size: int | None = None, - num_buckets: int = 4, -) -> QueueReader +from pulsing.core import ZeroCopyDescriptor -records = await reader.get(limit=100, wait=False) +class MyTensor: + def __zerocopy__(self, ctx): + return ZeroCopyDescriptor( + buffers=[memoryview(self.buf)], dtype="float32", + shape=[1024], strides=[4], transport="inline", + ) +# Missing __zerocopy__ → automatic pickle fallback +# Large buffers (>64KB) auto-use stream transfer (descriptor + chunked data) ``` -### Topic API - -Lightweight pub/sub for real-time message distribution: - -```python -import pulsing as pul - -await pul.init() - -# ── Publish ── -writer = await pul.topic.write("events") -await writer.publish({"type": "user_login", "user": "alice"}) - -# ── Subscribe ── -reader = await pul.topic.read("events") - -@reader.on_message -async def handle(msg): - print(f"Received: {msg}") +Env vars: `PULSING_ZEROCOPY` (`auto`/`off`/`force`), `PULSING_ZEROCOPY_STREAM_THRESHOLD` (default 65536), `PULSING_ZEROCOPY_CHUNK_BYTES` (default 1048576). -await reader.start() -``` +--- ## Rust API -Rust API defines contracts via traits, organized in three layers: - ### Quick Start ```rust use pulsing_actor::prelude::*; -#[derive(Serialize, Deserialize)] -struct Ping(i32); - -#[derive(Serialize, Deserialize)] -struct Pong(i32); - +#[derive(Serialize, Deserialize)] struct Ping(i32); +#[derive(Serialize, Deserialize)] struct Pong(i32); struct Echo; #[async_trait] @@ -464,109 +244,53 @@ impl Actor for Echo { } } -#[tokio::main] -async fn main() -> anyhow::Result<()> { - let system = ActorSystem::builder().build().await?; - - // Named actor (discoverable via resolve, uses namespace/name format) - let actor = system.spawn_named("services/echo", Echo).await?; - let Pong(x): Pong = actor.ask(Ping(1)).await?; - - // Anonymous actor (accessible only via ActorRef) - let worker = system.spawn(Worker::new()).await?; - - system.shutdown().await?; - Ok(()) -} +let system = ActorSystem::builder().build().await?; +let echo = system.spawn_named("services/echo", Echo).await?; +let Pong(x): Pong = echo.ask(Ping(1)).await?; +system.shutdown().await?; ``` -### Trait Layers - -#### ActorSystemCoreExt (Main path, auto-imported via prelude) - -Core spawn and resolve capabilities: +### Spawn & Resolve ```rust -// Spawn - Simple API -system.spawn(actor).await?; // Anonymous actor (not resolvable) -system.spawn_named(name, actor).await?; // Named actor (resolvable) +// Simple +system.spawn(actor).await?; // anonymous +system.spawn_named(name, actor).await?; // named (resolvable) -// Spawn - Builder pattern (advanced configuration) +// Builder pattern system.spawning() - .name("services/counter") // Optional: named = resolvable + .name("services/counter") .supervision(SupervisionSpec::on_failure().max_restarts(3)) .mailbox_capacity(256) .spawn(actor).await?; -// Resolve - Simple -system.actor_ref(&actor_id).await?; // By ActorId -system.resolve(name).await?; // By name - -// Resolve - Builder pattern (advanced configuration) -system.resolving() - .node(node_id) // Optional: target node - .policy(RoundRobinPolicy::new()) // Optional: load balancing - .filter_alive(true) // Optional: alive nodes only - .resolve(name).await?; // Resolve single - -system.resolving() - .list(name).await?; // Get all instances +// Named + factory (restartable supervision) +system.spawn_named_factory(name, || Ok(Service::new()), options).await?; -system.resolving() - .lazy(name)?; // Lazy resolve +// Resolve +system.resolve(name).await?; // one-shot +system.resolving().lazy(name)?; // lazy + auto-refresh (~5s TTL) +system.resolving().node(id).policy(RoundRobinPolicy::new()).resolve(name).await?; ``` -#### ActorSystemAdvancedExt (Advanced: restartable supervision) - -Factory-pattern spawn with supervision restart (named actors only): +### Operations ```rust -// Named actor + factory (restartable + resolvable) -// Note: anonymous actors don't support supervision (cannot re-resolve) -system.spawn_named_factory(name, || Ok(Service::new()), options).await?; +system.node_id(); system.addr(); system.members().await; +system.all_named_actors().await; system.stop(name).await?; system.shutdown().await?; ``` -#### ActorSystemOpsExt (Operations / Diagnostics / Lifecycle) - -System info, cluster membership, stop/shutdown: +### Behavior (Type-safe, Akka Typed style) ```rust -system.node_id(); -system.addr(); -system.members().await; -system.all_named_actors().await; -system.stop(name).await?; -system.shutdown().await?; +fn counter(init: i32) -> Behavior { + stateful(init, |count, n, _ctx| { *count += n; BehaviorAction::Same }) +} +let c = system.spawn_named("actors/counter", counter(0)).await?; ``` ### Key Conventions -- **Message encoding**: `Message::pack(&T)` uses bincode + `type_name::()`; for cross-version protocols use `Message::single("TypeV1", bytes)`. -- **Optional zerocopy**: when payload objects implement `__zerocopy__(ctx)`, Pulsing may bypass pickle and send descriptor + buffers directly; otherwise it uses normal pickle/bytes paths. -- **Naming and resolution**: - - `spawn_named(name, actor)`: Creates a discoverable actor, name is the resolution path - - `resolve(name)`: One-shot resolve (may become stale after migration) - - `resolve_lazy(name)`: Lazy resolve + auto-refresh (~5s TTL) +- **Message encoding**: `Message::pack(&T)` (bincode); cross-version: `Message::single("TypeV1", bytes)`. - **Streaming**: Return `Message::Stream`, cancellation is best-effort. -- **Supervision**: Only `spawn_named_factory` supports failure restart; anonymous actors do not support supervision. - -### Behavior (Type-safe, Akka Typed style) - -- **Core**: `Behavior` + `TypedRef` + `BehaviorAction (Same/Become/Stop)` -- **Constraint**: `TypedRef` requires `M: Serialize + DeserializeOwned + Send + 'static` - -Defined using function syntax, otherwise identical to Actor: - -```rust -fn counter(init: i32) -> Behavior { - stateful(init, |count, n, _ctx| { - *count += n; - BehaviorAction::Same - }) -} - -// Behavior implements IntoActor, can be passed directly to spawn/spawn_named -// No manual wrapping needed, system converts automatically -let counter = system.spawn(counter(0)).await?; -let counter = system.spawn_named("actors/counter", counter(0)).await?; -``` +- **Supervision**: Only `spawn_named_factory` supports restart; anonymous actors do not. From 7b7a54eab96408f8de396f66c8248c166b589548 Mon Sep 17 00:00:00 2001 From: Reiase Date: Sun, 1 Mar 2026 20:58:52 +0800 Subject: [PATCH 5/5] Enhance Pulsing framework's actor resolution and documentation - Updated the `resolve` function to support typed proxies, allowing for direct method calls without needing to use `.as_type()` or `.as_any()`. - Improved documentation across multiple files to clarify the usage of `pul.resolve()` for both typed and untyped proxies, enhancing user understanding. - Refactored examples to demonstrate the new resolution capabilities and best practices for actor management. - Consolidated references to actor resolution methods in the API documentation, ensuring consistency and clarity for users transitioning from previous versions. --- docs/src/api/overview.md | 13 +- docs/src/api/overview.zh.md | 6 +- docs/src/api_reference.md | 19 +- docs/src/api_reference.zh.md | 20 +- docs/src/design/load_sync.md | 2 +- docs/src/design/load_sync.zh.md | 2 +- docs/src/guide/queue.md | 12 +- docs/src/guide/reliability.md | 6 +- docs/src/guide/reliability.zh.md | 6 +- docs/src/guide/remote_actors.md | 38 ++-- docs/src/guide/remote_actors.zh.md | 40 ++-- docs/src/guide/semantics.md | 2 + docs/src/guide/semantics.zh.md | 2 + docs/src/quickstart/migrate_from_ray.md | 6 +- docs/src/quickstart/migrate_from_ray.zh.md | 6 +- examples/agent/README.md | 4 +- examples/agent/autogen/README.md | 2 +- examples/agent/langgraph/README.md | 2 +- examples/python/README.md | 35 ++-- llms.binding.md | 22 +- python/pulsing/core/remote.py | 26 ++- python/pulsing/examples/counting_game.py | 2 +- .../python/apis/ray_like/test_ray_like_api.py | 7 +- tests/python/test_resolve_as_any.py | 190 +++++++++--------- 24 files changed, 266 insertions(+), 204 deletions(-) diff --git a/docs/src/api/overview.md b/docs/src/api/overview.md index 05aedf0e5..630a2a46c 100644 --- a/docs/src/api/overview.md +++ b/docs/src/api/overview.md @@ -89,16 +89,15 @@ result = await counter.incr() # direct method call ```python # Typed proxy — when you know the class -proxy = await Counter.resolve("counter") +proxy = await pul.resolve("counter", cls=Counter, timeout=30) result = await proxy.incr() -# Typed proxy — manual bind -ref = await pul.resolve("counter", timeout=30) -proxy = ref.as_type(Counter) +# Or via ActorClass (same result) +proxy = await Counter.resolve("counter") +result = await proxy.incr() -# Untyped proxy — when remote type is unknown -ref = await pul.resolve("service_name") -proxy = ref.as_any() +# Untyped proxy — when remote type is unknown (any method call) +proxy = await pul.resolve("service_name") result = await proxy.any_method(args) ``` diff --git a/docs/src/api/overview.zh.md b/docs/src/api/overview.zh.md index 697756b41..e32e8f0a0 100644 --- a/docs/src/api/overview.zh.md +++ b/docs/src/api/overview.zh.md @@ -93,12 +93,10 @@ proxy = await Counter.resolve("counter") result = await proxy.incr() # 类型化代理 — 手动绑定 -ref = await pul.resolve("counter", timeout=30) -proxy = ref.as_type(Counter) +proxy = await pul.resolve("counter", cls=Counter, timeout=30) # 无类型代理 — 远端类型未知时 -ref = await pul.resolve("service_name") -proxy = ref.as_any() +proxy = await pul.resolve("service_name") result = await proxy.any_method(args) ``` diff --git a/docs/src/api_reference.md b/docs/src/api_reference.md index 8a20e0248..3b8b11cb7 100644 --- a/docs/src/api_reference.md +++ b/docs/src/api_reference.md @@ -203,8 +203,8 @@ class ActorSystem: """Get ActorRef by ActorId.""" pass - async def resolve(self, name, *, node_id=None): - """Resolve actor by name.""" + async def resolve(self, name, *, node_id=None, timeout=None): + """Resolve actor by name. Returns ActorRef (low-level). For a ready-to-use proxy, use top-level pul.resolve(name, cls=...).""" pass async def shutdown(self): @@ -212,9 +212,22 @@ class ActorSystem: pass ``` +### Top-level resolve (recommended) + +`pul.resolve()` returns an **ActorProxy** directly (no need for `.as_type()` / `.as_any()`): + +```python +# Typed proxy +proxy = await pul.resolve("counter", cls=Counter, timeout=30) + +# Untyped proxy (any method) +proxy = await pul.resolve("service_name", timeout=30) +# Access underlying ActorRef if needed: proxy.ref +``` + ### ActorRef -Low-level reference to an actor. Use `ask()` and `tell()` to communicate. +Low-level reference (e.g. from `system.resolve()`). Use `ask()` and `tell()` to communicate, or `.as_any()` / `.as_type(cls)` to get an ActorProxy. ```python class ActorRef: diff --git a/docs/src/api_reference.zh.md b/docs/src/api_reference.zh.md index a4da617bb..4ccf19c17 100644 --- a/docs/src/api_reference.zh.md +++ b/docs/src/api_reference.zh.md @@ -210,13 +210,14 @@ class ActorSystem: """ pass - async def resolve(self, name, *, node_id=None): + async def resolve(self, name, *, node_id=None, timeout=None): """ - 通过名称解析 actor。 + 通过名称解析 actor。返回 ActorRef(低层 API)。若需即用型 proxy,请使用顶层 `pul.resolve(name, cls=...)`。 **参数:** - `name`: Actor 名称(str) - `node_id`: 目标节点 ID(int 或 None) + - `timeout`: 重试超时(秒,可选) **返回:** 对应 actor 的 ActorRef """ @@ -227,9 +228,22 @@ class ActorSystem: pass ``` +### 顶层 resolve(推荐) + +`pul.resolve()` 直接返回 **ActorProxy**,无需再调用 `.as_type()` / `.as_any()`: + +```python +# 有类型 proxy +proxy = await pul.resolve("counter", cls=Counter, timeout=30) + +# 无类型 proxy(任意方法) +proxy = await pul.resolve("service_name", timeout=30) +# 需要底层 ActorRef 时:proxy.ref +``` + ### ActorRef -Actor 的底层引用。使用 `ask()` 和 `tell()` 进行通信。 +低层引用(例如来自 `system.resolve()`)。使用 `ask()` / `tell()` 通信,或通过 `.as_any()` / `.as_type(cls)` 得到 ActorProxy。 ```python class ActorRef: diff --git a/docs/src/design/load_sync.md b/docs/src/design/load_sync.md index f0bbb2f00..a8596f654 100644 --- a/docs/src/design/load_sync.md +++ b/docs/src/design/load_sync.md @@ -64,7 +64,7 @@ runner = await start_router( system, http_port=8080, model_name="my-model", - scheduler_type="stream_load", # 默认值 + scheduler_type="stream_load", # 默认值;也支持 scheduler=... 实例。不支持 scheduler_class。 ) ``` diff --git a/docs/src/design/load_sync.zh.md b/docs/src/design/load_sync.zh.md index f0bbb2f00..a8596f654 100644 --- a/docs/src/design/load_sync.zh.md +++ b/docs/src/design/load_sync.zh.md @@ -64,7 +64,7 @@ runner = await start_router( system, http_port=8080, model_name="my-model", - scheduler_type="stream_load", # 默认值 + scheduler_type="stream_load", # 默认值;也支持 scheduler=... 实例。不支持 scheduler_class。 ) ``` diff --git a/docs/src/guide/queue.md b/docs/src/guide/queue.md index 9a02cb08a..c5fd7d765 100644 --- a/docs/src/guide/queue.md +++ b/docs/src/guide/queue.md @@ -251,12 +251,12 @@ Recommended patterns: ## Where to look in code -- `python/pulsing/queue/queue.py`: high-level `Queue`, `write_queue`, `read_queue` -- `python/pulsing/queue/manager.py`: `StorageManager` and bucket routing / redirects -- `python/pulsing/queue/storage.py`: `BucketStorage` (delegates to `StorageBackend`) -- `python/pulsing/queue/backend.py`: `StorageBackend` protocol and `MemoryBackend` -- `examples/python/distributed_queue.py`: end-to-end example -- `tests/python/test_queue.py`: behavior + stress tests +- `python/pulsing/streaming/queue.py`: high-level `Queue`, `write_queue`, `read_queue` +- `python/pulsing/streaming/manager.py`: `StorageManager` and bucket routing / redirects +- `python/pulsing/streaming/storage.py`: `BucketStorage` (delegates to `StorageBackend`) +- `python/pulsing/streaming/backend.py`: `StorageBackend` protocol and `MemoryBackend` +- `examples/python/distributed_queue.py`: end-to-end example (if present) +- `tests/python/test_queue*.py`: behavior + stress tests ## Related Projects diff --git a/docs/src/guide/reliability.md b/docs/src/guide/reliability.md index c30808094..c65d4d15b 100644 --- a/docs/src/guide/reliability.md +++ b/docs/src/guide/reliability.md @@ -13,11 +13,11 @@ This page collects **practical reliability rules** for building production syste Prefer explicit timeouts on `ask`: ```python -from pulsing.core import ask_with_timeout - -result = await ask_with_timeout(ref, {"op": "compute"}, timeout=10.0) +result = await asyncio.wait_for(ref.ask({"op": "compute"}), timeout=10.0) ``` +For proxy method calls: `await asyncio.wait_for(proxy.compute(), timeout=10.0)`. + ## Retries (application-level) Pulsing does not hide retries for you. If you retry, assume duplicates are possible. diff --git a/docs/src/guide/reliability.zh.md b/docs/src/guide/reliability.zh.md index 5104314f4..6f41fa847 100644 --- a/docs/src/guide/reliability.zh.md +++ b/docs/src/guide/reliability.zh.md @@ -13,11 +13,11 @@ 对 `ask` 建议显式加超时: ```python -from pulsing.core import ask_with_timeout - -result = await ask_with_timeout(ref, {"op": "compute"}, timeout=10.0) +result = await asyncio.wait_for(ref.ask({"op": "compute"}), timeout=10.0) ``` +对 proxy 方法调用:`await asyncio.wait_for(proxy.compute(), timeout=10.0)`。 + ## 重试(放在业务层) Pulsing 不会替你“隐式重试”。一旦你做重试,就要默认可能出现重复处理。 diff --git a/docs/src/guide/remote_actors.md b/docs/src/guide/remote_actors.md index 400c6973f..2aabb65f7 100644 --- a/docs/src/guide/remote_actors.md +++ b/docs/src/guide/remote_actors.md @@ -45,16 +45,18 @@ await asyncio.sleep(1.0) ## Finding Remote Actors -### Using system.resolve() +### Using pul.resolve() (recommended) + +`pul.resolve()` returns an **ActorProxy** directly — no need to call `.as_type()` or `.as_any()`: ```python -# Find actor by name (searches entire cluster) -remote_ref = await system.resolve("worker") -response = await remote_ref.ask({"action": "process", "data": "hello"}) +# Typed proxy — when you know the class +proxy = await pul.resolve("worker", cls=Worker, timeout=30) +result = await proxy.process("hello") -# Convert ActorRef to proxy -any_proxy = remote_ref.as_any() # Unspecified/unknown type -typed_proxy = remote_ref.as_type(Worker) # Typed proxy when class is known +# Untyped proxy — when remote type is unknown (any method) +proxy = await pul.resolve("worker", timeout=30) +result = await proxy.process("hello") ``` ### Using @remote Class.resolve() @@ -64,13 +66,23 @@ typed_proxy = remote_ref.as_type(Worker) # Typed proxy when class is known class Worker: def process(self, data): return f"processed: {data}" -# Resolve with type info - returns ActorProxy with methods +# Same as pul.resolve("worker", cls=Worker) worker = await Worker.resolve("worker") result = await worker.process("hello") # Direct method call ``` +### Using system.resolve() (low-level) + +When you need the raw **ActorRef** (e.g. for `.ask()` / `.tell()` or to pass to other APIs): + +```python +remote_ref = await system.resolve("worker") +response = await remote_ref.ask({"action": "process", "data": "hello"}) +# Get proxy from ref if needed: remote_ref.as_any() or remote_ref.as_type(Worker) +``` + !!! note - For new code, prefer `Class.resolve(name)` (typed proxy). Use `system.resolve(name)` when you only have a runtime name and then call `.as_type()` / `.as_any()` on the returned `ActorRef`. + Prefer `pul.resolve(name, cls=...)` or `Class.resolve(name)` for a ready-to-use proxy. Use `system.resolve(name)` only when you need the low-level `ActorRef`. ## Named vs Anonymous Actors @@ -172,22 +184,22 @@ except PulsingRuntimeError as e: Use timeouts for remote calls to avoid indefinite waits: ```python -from pulsing.core import ask_with_timeout - try: - response = await ask_with_timeout(remote_ref, msg, timeout=10.0) + response = await asyncio.wait_for(remote_ref.ask(msg), timeout=10.0) except asyncio.TimeoutError: print("Request timed out") except PulsingRuntimeError as e: print(f"Remote call failed: {e}") ``` +For proxy method calls: `await asyncio.wait_for(proxy.some_method(), timeout=10.0)`. + ## Best Practices 1. **Wait for cluster sync**: Add a small delay after joining a cluster 2. **Handle errors gracefully**: Wrap remote calls in try-except blocks 3. **Use named actors**: Actors that need remote access must have a `name` -4. **Use @remote with resolve()**: Get typed proxies for better API experience +4. **Use pul.resolve(name, cls=...) or Class.resolve(name)**: Get typed proxies for better API experience 5. **Use timeouts**: Consider adding timeouts for remote calls ## Example: Distributed Counter diff --git a/docs/src/guide/remote_actors.zh.md b/docs/src/guide/remote_actors.zh.md index 3fbba4584..1035419a6 100644 --- a/docs/src/guide/remote_actors.zh.md +++ b/docs/src/guide/remote_actors.zh.md @@ -45,16 +45,18 @@ await asyncio.sleep(1.0) ## 查找远程 Actor -### 使用 system.resolve() +### 使用 pul.resolve()(推荐) + +`pul.resolve()` 直接返回 **ActorProxy**,无需再调用 `.as_type()` 或 `.as_any()`: ```python -# 按名称查找 actor(搜索整个集群) -remote_ref = await system.resolve("worker") -response = await remote_ref.ask({"action": "process", "data": "hello"}) +# 类型化代理 — 已知 actor 类型时 +proxy = await pul.resolve("worker", cls=Worker, timeout=30) +result = await proxy.process("hello") -# 将 ActorRef 转换为代理 -any_proxy = remote_ref.as_any() # 未知类型时使用 -typed_proxy = remote_ref.as_type(Worker) # 已知类型时使用 +# 无类型代理 — 远端类型未知时(任意方法) +proxy = await pul.resolve("worker", timeout=30) +result = await proxy.process("hello") ``` ### 使用 @remote 类的 resolve() @@ -64,13 +66,23 @@ typed_proxy = remote_ref.as_type(Worker) # 已知类型时使用 class Worker: def process(self, data): return f"processed: {data}" -# 带类型信息解析 - 返回带方法的 ActorProxy +# 等价于 pul.resolve("worker", cls=Worker) worker = await Worker.resolve("worker") -result = await worker.process("hello") # 直接调用方法 +result = await worker.process("hello") # 直接方法调用 +``` + +### 使用 system.resolve()(低层) + +需要原始 **ActorRef** 时(例如使用 `.ask()` / `.tell()` 或传入其他 API): + +```python +remote_ref = await system.resolve("worker") +response = await remote_ref.ask({"action": "process", "data": "hello"}) +# 需要 proxy 时:remote_ref.as_any() 或 remote_ref.as_type(Worker) ``` !!! note - 新代码优先使用 `Class.resolve(name)`(typed proxy)。仅在只有运行时名称时使用 `system.resolve(name)`,随后对返回的 `ActorRef` 调用 `.as_type()` / `.as_any()`。 + 推荐使用 `pul.resolve(name, cls=...)` 或 `Class.resolve(name)` 获得即用型 proxy。仅在需要低层 `ActorRef` 时使用 `system.resolve(name)`。 ## 命名 vs 匿名 Actor @@ -172,22 +184,22 @@ except PulsingRuntimeError as e: 为远程调用使用超时,避免无限等待: ```python -from pulsing.core import ask_with_timeout - try: - response = await ask_with_timeout(remote_ref, msg, timeout=10.0) + response = await asyncio.wait_for(remote_ref.ask(msg), timeout=10.0) except asyncio.TimeoutError: print("请求超时") except PulsingRuntimeError as e: print(f"远程调用失败: {e}") ``` +对 proxy 方法调用可使用:`await asyncio.wait_for(proxy.some_method(), timeout=10.0)`。 + ## 最佳实践 1. **等待集群同步**:加入集群后添加短暂延迟 2. **优雅处理错误**:在 try-except 块中包装远程调用 3. **使用命名 actor**:需要远程访问的 actor 必须有 `name` -4. **使用 @remote 与 resolve()**:获取有类型的代理以获得更好的 API 体验 +4. **使用 pul.resolve(name, cls=...) 或 Class.resolve(name)**:获取有类型代理以获得更好的 API 体验 5. **使用超时**:考虑为远程调用添加超时 ## 示例:分布式计数器 diff --git a/docs/src/guide/semantics.md b/docs/src/guide/semantics.md index 2be48b8b4..24e4580f2 100644 --- a/docs/src/guide/semantics.md +++ b/docs/src/guide/semantics.md @@ -72,6 +72,8 @@ Practical implication: ## Streaming semantics (`StreamMessage`) +Use `from pulsing.core.messaging import Message, StreamMessage` when you need these types (they are not exported from top-level `pulsing`). + `StreamMessage.create(msg_type, buffer_size=32)` returns `(stream_msg, writer)`. ### Stream composition diff --git a/docs/src/guide/semantics.zh.md b/docs/src/guide/semantics.zh.md index 378893920..528b04beb 100644 --- a/docs/src/guide/semantics.zh.md +++ b/docs/src/guide/semantics.zh.md @@ -69,6 +69,8 @@ ## 流式语义(`StreamMessage`) +需要这些类型时请使用 `from pulsing.core.messaging import Message, StreamMessage`(顶层 `pulsing` 不导出)。 + `StreamMessage.create(msg_type, buffer_size=32)` 返回 `(stream_msg, writer)`。 ### 流的组成 diff --git a/docs/src/quickstart/migrate_from_ray.md b/docs/src/quickstart/migrate_from_ray.md index 0a13092ed..2911a3a12 100644 --- a/docs/src/quickstart/migrate_from_ray.md +++ b/docs/src/quickstart/migrate_from_ray.md @@ -25,7 +25,7 @@ class Worker: pul.mount(self, name=name) # One line: join the Pulsing network async def call_peer(self, peer_name, msg): - proxy = (await pul.resolve(peer_name, timeout=30)).as_any() + proxy = await pul.resolve(peer_name, timeout=30) return await proxy.greet(msg) # Cross-process Pulsing call async def greet(self, msg): @@ -142,8 +142,8 @@ result = await worker.process("hello") ## Notes -- Prefer typed proxy: `await Class.resolve(name)`. -- If only a runtime name is available: `ref = await pul.resolve(name)` then `ref.as_type(Class)` / `ref.as_any()`. +- Prefer typed proxy: `await pul.resolve(name, cls=Class)` or `await Class.resolve(name)`. +- If only a runtime name is available: `proxy = await pul.resolve(name)` — you can call any method directly (untyped). --- diff --git a/docs/src/quickstart/migrate_from_ray.zh.md b/docs/src/quickstart/migrate_from_ray.zh.md index 5eea3bfa8..1eec423fb 100644 --- a/docs/src/quickstart/migrate_from_ray.zh.md +++ b/docs/src/quickstart/migrate_from_ray.zh.md @@ -25,7 +25,7 @@ class Worker: pul.mount(self, name=name) # 一行代码:接入 Pulsing 网络 async def call_peer(self, peer_name, msg): - proxy = (await pul.resolve(peer_name, timeout=30)).as_any() + proxy = await pul.resolve(peer_name, timeout=30) return await proxy.greet(msg) # 跨进程 Pulsing 调用 async def greet(self, msg): @@ -142,8 +142,8 @@ result = await worker.process("hello") ## 说明 -- 优先使用 typed proxy:`await Class.resolve(name)`。 -- 若只有运行时名称:`ref = await pul.resolve(name)`,再使用 `ref.as_type(Class)` / `ref.as_any()`。 +- 优先使用 typed proxy:`await pul.resolve(name, cls=Class)` 或 `await Class.resolve(name)`。 +- 若只有运行时名称:`proxy = await pul.resolve(name)` 即可直接调用方法(无类型约束)。 --- diff --git a/examples/agent/README.md b/examples/agent/README.md index 9f6e925e4..3b921b18f 100644 --- a/examples/agent/README.md +++ b/examples/agent/README.md @@ -49,7 +49,7 @@ Pulsing 为主流 Agent 框架提供分布式运行时支持,让您的 Agent ### AutoGen ```python -from pulsing.autogen import PulsingRuntime +from pulsing.integrations.autogen import PulsingRuntime # 替代 SingleThreadedAgentRuntime runtime = PulsingRuntime(addr="0.0.0.0:8000") @@ -63,7 +63,7 @@ await runtime.send_message("Hello", AgentId("agent", "default")) ### LangGraph ```python -from pulsing.langgraph import with_pulsing +from pulsing.integrations.langgraph import with_pulsing app = graph.compile() diff --git a/examples/agent/autogen/README.md b/examples/agent/autogen/README.md index 366cdd3eb..9c40648b9 100644 --- a/examples/agent/autogen/README.md +++ b/examples/agent/autogen/README.md @@ -26,7 +26,7 @@ python simple.py ## 使用方式 ```python -from pulsing.autogen import PulsingRuntime +from pulsing.integrations.autogen import PulsingRuntime # 单机 runtime = PulsingRuntime() diff --git a/examples/agent/langgraph/README.md b/examples/agent/langgraph/README.md index ecec0c6c5..169bcb963 100644 --- a/examples/agent/langgraph/README.md +++ b/examples/agent/langgraph/README.md @@ -5,7 +5,7 @@ ## 核心用法 ```python -from pulsing.langgraph import with_pulsing +from pulsing.integrations.langgraph import with_pulsing app = graph.compile() diff --git a/examples/python/README.md b/examples/python/README.md index 7b4656ad4..95b968ca1 100644 --- a/examples/python/README.md +++ b/examples/python/README.md @@ -20,14 +20,9 @@ python examples/python/remote_actor_example.py python examples/python/native_async_example.py ``` -### Ray 兼容 API (`pulsing.compat.ray`) +### 与 Ray 一起使用 -一行代码从 Ray 迁移到 Pulsing: - -```bash -# Ray 风格 API,同步接口 -python examples/python/ray_compat_example.py -``` +在 Ray 中希望使用 Pulsing 做通信时,请用 **Bridge 模式**(保留 Ray 调度,用 `pul.mount()` 将现有对象挂到 Pulsing 网络),或参考文档 [Tutorial: Ray + Pulsing](../../docs/src/quickstart/migrate_from_ray.md)。Ray 风格兼容层(`pulsing.compat.ray`)已移除,推荐使用原生 `await pul.init()` + `@pul.remote` 或 Bridge 模式。 ### 基础示例 @@ -42,8 +37,7 @@ python examples/python/cluster.py # Multi-node (see --help) | API | 风格 | 适用场景 | |-----|------|----------| -| `import pulsing as pul` | 异步 (`async/await`) | 新项目,高性能需求 | -| `from pulsing.compat import ray` | 同步 (Ray 风格) | Ray 迁移,快速上手 | +| `import pulsing as pul` | 异步 (`async/await`) | 新项目、Ray Bridge、高性能需求 | ### 原生 API 示例 @@ -65,22 +59,19 @@ async def main(): await pul.shutdown() ``` -### Ray 兼容 API 示例 +### Ray Bridge 示例(在 Ray worker 内挂载 Pulsing) ```python -from pulsing.compat import ray - -ray.init() +import ray +import pulsing as pul @ray.remote -class Counter: - def __init__(self, value=0): - self.value = value - def inc(self): - self.value += 1 - return self.value +class Worker: + def __init__(self, name): + pul.mount(self, name=name) + + async def greet(self, msg): + return f"hello: {msg}" -counter = Counter.remote(value=0) -print(ray.get(counter.inc.remote())) # 1 -ray.shutdown() +# 见 docs/src/quickstart/migrate_from_ray.md 完整示例 ``` diff --git a/llms.binding.md b/llms.binding.md index 768ea27df..42818bd50 100644 --- a/llms.binding.md +++ b/llms.binding.md @@ -78,11 +78,9 @@ result = await counter.incr() async for chunk in counter.stream(10): print(chunk) # ── Resolve (cross-process / cross-node) ── -proxy = await Counter.resolve("counter") # typed proxy (recommended) -proxy = await Counter.resolve("counter", node_id=2, timeout=30) # with options -ref = await pul.resolve("counter", timeout=30) # untyped ActorRef -proxy = ref.as_type(Counter) # bind type -proxy = ref.as_any() # or use as_any() +proxy = await pul.resolve("counter", cls=Counter, timeout=30) # typed proxy +proxy = await pul.resolve("counter", timeout=30) # untyped proxy (any method) +proxy = await Counter.resolve("counter") # via ActorClass (also typed) ``` ### 3. Supervision & Restart @@ -152,7 +150,7 @@ class Worker: pul.mount(self, name=name) async def call_peer(self, peer, msg): - return await (await pul.resolve(peer, timeout=30)).as_any().greet(msg) + return await (await pul.resolve(peer, timeout=30)).greet(msg) ``` ### 7. Error Handling @@ -173,6 +171,14 @@ from pulsing import ( The global API is backed by an `ActorSystem`. Create one explicitly for finer control. Low-level APIs operate on `ActorRef` and require a `receive(self, msg)` method. +**Message / StreamMessage** are not exported from top-level `pulsing`. For low-level `receive()` or streaming, use: + +```python +from pulsing.core.messaging import Message, StreamMessage +``` + +**Wire protocol:** Python–runtime call/response uses a flat format: `__call__` / `__async__`, `args`, `kwargs` for requests; `__result__` / `__error__` for responses. The legacy namespaced format (`__pulsing_proto__` / `user_data`) is no longer used. + ```python system = await pul.actor_system(addr=..., seeds=..., passphrase=...) @@ -187,7 +193,7 @@ await ref.tell(msg) # fire-and-forget # Resolve / refer ref = await pul.refer(actor_id) # by ActorId -ref = await pul.resolve(name, *, node_id=None, timeout=None) +proxy = await pul.resolve(name, *, cls=None, node_id=None, timeout=None) # Queue / Topic on explicit system writer = await system.queue.write("q"); reader = await system.queue.read("q") @@ -206,7 +212,7 @@ class MyActor(Actor): async def receive(self, msg): return msg # sync or async, auto-detected ``` -**Zerocopy** — optional fast path bypassing pickle for buffer objects: +**Zerocopy** — optional fast path bypassing pickle for buffer objects (from `pulsing.core` or `pulsing._core`): ```python from pulsing.core import ZeroCopyDescriptor diff --git a/python/pulsing/core/remote.py b/python/pulsing/core/remote.py index dc228e43b..98551a015 100644 --- a/python/pulsing/core/remote.py +++ b/python/pulsing/core/remote.py @@ -492,16 +492,34 @@ def wrapper(cls): async def resolve( name: str, *, + cls: type | None = None, node_id: int | None = None, timeout: float | None = None, -): - """Resolve a named actor by name. +) -> ActorProxy: + """Resolve a named actor and return a ready-to-use proxy. - Returns an ActorRef that supports .ask(), .tell(), .as_any(), and .as_type(). + Args: + name: Actor name to resolve. + cls: Optional class for typed proxy (validates method names). + If omitted, returns an untyped proxy that accepts any method call. + node_id: Target node ID (None = any node via load balancing). + timeout: Retry timeout in seconds for waiting on gossip propagation. + + Examples:: + + proxy = await pul.resolve("counter", cls=Counter, timeout=30) + result = await proxy.incr() + + proxy = await pul.resolve("service") + result = await proxy.some_method() """ from . import get_system - return await get_system().resolve(name, node_id=node_id, timeout=timeout) + ref = await get_system().resolve(name, node_id=node_id, timeout=timeout) + if cls is not None: + methods, async_methods = _extract_methods(cls) + return ActorProxy(ref, methods, async_methods) + return ActorProxy(ref) # ============================================================================ diff --git a/python/pulsing/examples/counting_game.py b/python/pulsing/examples/counting_game.py index 2a33ea138..b8a92fcfb 100644 --- a/python/pulsing/examples/counting_game.py +++ b/python/pulsing/examples/counting_game.py @@ -33,7 +33,7 @@ async def yield_number(self): """Yield number: broadcast own number to all nodes""" num = self.peers.index(self.name) + 1 for peer in self.peers: - proxy = (await pul.resolve(peer, timeout=30)).as_type(Counter) + proxy = await pul.resolve(peer, cls=Counter, timeout=30) await proxy.on_number(num, self.name) async def on_number(self, num, from_who): diff --git a/tests/python/apis/ray_like/test_ray_like_api.py b/tests/python/apis/ray_like/test_ray_like_api.py index 998c9ea31..ffe28ddde 100644 --- a/tests/python/apis/ray_like/test_ray_like_api.py +++ b/tests/python/apis/ray_like/test_ray_like_api.py @@ -137,9 +137,10 @@ async def test_resolve_public_actor(initialized_pul): """Test pul.resolve() for public actor.""" await pul.spawn(SimpleActor(), name="resolve_public_test", public=True) - ref = await pul.resolve("resolve_public_test") - assert ref is not None - result = await ref.ask("resolved_msg") + proxy = await pul.resolve("resolve_public_test") + assert proxy is not None + assert isinstance(proxy, pul.ActorProxy) + result = await proxy.ref.ask("resolved_msg") assert result == "resolved_msg" diff --git a/tests/python/test_resolve_as_any.py b/tests/python/test_resolve_as_any.py index d5fd74a70..354b8d429 100644 --- a/tests/python/test_resolve_as_any.py +++ b/tests/python/test_resolve_as_any.py @@ -1,12 +1,11 @@ """ -Tests for resolve().as_any() / .as_type(): proxy generation on ActorRef. +Tests for resolve() proxy behavior. Covers: -- resolve(name) returns ActorRef with .as_any() and .as_type() -- ref.as_any() returns an untyped proxy -- ref.as_type(cls) returns a typed proxy +- resolve(name) returns untyped ActorProxy +- resolve(name, cls=X) returns typed ActorProxy - typed_proxy.as_any() returns an any proxy with the same underlying ref -- ref.ask() / ref.tell() still work (backward compatibility) +- proxy.ref gives underlying ActorRef for low-level .ask()/.tell() """ import asyncio @@ -16,7 +15,7 @@ from pulsing.exceptions import PulsingRuntimeError import pulsing as pul -from pulsing.core import Actor, ActorRef, remote +from pulsing.core import Actor, ActorRef, ActorProxy, remote # ============================================================================ @@ -33,42 +32,38 @@ async def initialized_pul(): # ============================================================================ -# Test: resolve() returns object with .as_any() +# Test: resolve() returns ActorProxy # ============================================================================ @pytest.mark.asyncio -async def test_resolve_returns_ref_view_with_as_any(initialized_pul): - """resolve(name) returns an object that has .as_any() method.""" +async def test_resolve_returns_untyped_proxy(initialized_pul): + """resolve(name) returns an untyped ActorProxy.""" await pul.spawn( _EchoActor(), - name="as_any_echo", + name="resolve_proxy_echo", public=True, ) - ref = await pul.resolve("as_any_echo") - assert ref is not None - assert hasattr(ref, "as_any") - assert callable(getattr(ref, "as_any")) - - proxy = ref.as_any() - assert proxy is not None + proxy = await pul.resolve("resolve_proxy_echo") + assert isinstance(proxy, ActorProxy) assert hasattr(proxy, "ref") @pytest.mark.asyncio -async def test_resolve_returns_actor_ref(initialized_pul): - """resolve(name) returns ActorRef with .as_any() and .as_type().""" - await pul.spawn(_EchoActor(), name="ref_view_echo", public=True) +async def test_resolve_with_cls_returns_typed_proxy(initialized_pul): + """resolve(name, cls=X) returns a typed ActorProxy.""" + await _ServiceWithMethods.spawn(name="resolve_typed_svc", public=True) - ref = await pul.resolve("ref_view_echo") - assert isinstance(ref, ActorRef) - assert hasattr(ref, "as_any") - assert hasattr(ref, "as_type") + proxy = await pul.resolve("resolve_typed_svc", cls=_ServiceWithMethods) + assert isinstance(proxy, ActorProxy) + + result = await proxy.get_value() + assert result == 0 # ============================================================================ -# Test: ref.as_any() proxy forwards any method call +# Test: untyped proxy forwards any method call # ============================================================================ @@ -83,7 +78,7 @@ async def receive(self, msg): @pul.remote class _ServiceWithMethods: - """Remote service with sync and async methods for as_any tests.""" + """Remote service with sync and async methods.""" def __init__(self): self.value = 0 @@ -104,12 +99,11 @@ def echo(self, text: str): @pytest.mark.asyncio -async def test_as_any_proxy_calls_sync_method(initialized_pul): - """ref.as_any() returns a proxy; await proxy.sync_method() works.""" - await _ServiceWithMethods.spawn(name="as_any_svc", public=True) +async def test_untyped_proxy_calls_sync_method(initialized_pul): + """Untyped proxy from resolve() forwards sync method calls.""" + await _ServiceWithMethods.spawn(name="untyped_svc", public=True) - ref = await pul.resolve("as_any_svc") - proxy = ref.as_any() + proxy = await pul.resolve("untyped_svc") result = await proxy.get_value() assert result == 0 @@ -122,12 +116,11 @@ async def test_as_any_proxy_calls_sync_method(initialized_pul): @pytest.mark.asyncio -async def test_as_any_proxy_calls_async_method(initialized_pul): - """await proxy.async_method() works through as_any() proxy.""" - await _ServiceWithMethods.spawn(name="as_any_async_svc", public=True) +async def test_untyped_proxy_calls_async_method(initialized_pul): + """Untyped proxy from resolve() forwards async method calls.""" + await _ServiceWithMethods.spawn(name="untyped_async_svc", public=True) - ref = await pul.resolve("as_any_async_svc") - proxy = ref.as_any() + proxy = await pul.resolve("untyped_async_svc") result = await proxy.async_incr() assert result == 1 @@ -136,47 +129,60 @@ async def test_as_any_proxy_calls_async_method(initialized_pul): @pytest.mark.asyncio -async def test_as_any_proxy_method_with_args(initialized_pul): - """proxy.method(args, kwargs) forwards correctly.""" - await _ServiceWithMethods.spawn(name="as_any_echo_svc", public=True) +async def test_untyped_proxy_method_with_args(initialized_pul): + """Untyped proxy forwards args and kwargs correctly.""" + await _ServiceWithMethods.spawn(name="untyped_echo_svc", public=True) - ref = await pul.resolve("as_any_echo_svc") - proxy = ref.as_any() + proxy = await pul.resolve("untyped_echo_svc") result = await proxy.echo("hello") assert result == "hello" # ============================================================================ -# Test: ref.as_any() instance method +# Test: typed proxy validates methods # ============================================================================ @pytest.mark.asyncio -async def test_as_any_with_ref_from_resolve(initialized_pul): - """ref.as_any() works when ref is from pul.resolve().""" - await _ServiceWithMethods.spawn(name="as_any_fn_svc", public=True) +async def test_typed_proxy_calls_method(initialized_pul): + """resolve(name, cls=X) proxy calls methods correctly.""" + await _ServiceWithMethods.spawn(name="typed_svc", public=True) - ref = await pul.resolve("as_any_fn_svc") - proxy = ref.as_any() + proxy = await pul.resolve("typed_svc", cls=_ServiceWithMethods) result = await proxy.get_value() assert result == 0 + result = await proxy.set_value(99) + assert result == 99 + + result = await proxy.get_value() + assert result == 99 + @pytest.mark.asyncio -async def test_as_any_with_raw_ref(initialized_pul): - """ref.as_any() works when ref is raw ActorRef from system.resolve().""" - from pulsing.core import get_system +async def test_typed_proxy_rejects_invalid_method(initialized_pul): + """Typed proxy rejects methods not on the class.""" + await _ServiceWithMethods.spawn(name="typed_reject_svc", public=True) - await _ServiceWithMethods.spawn(name="as_any_raw_svc", public=True) + proxy = await pul.resolve("typed_reject_svc", cls=_ServiceWithMethods) - system = get_system() - raw_ref = await system.resolve("as_any_raw_svc") - proxy = raw_ref.as_any() + with pytest.raises(AttributeError, match="No method"): + proxy.nonexistent_method - result = await proxy.get_value() - assert result == 0 + +@pytest.mark.asyncio +async def test_typed_proxy_async_method(initialized_pul): + """Typed proxy correctly handles async methods.""" + await _ServiceWithMethods.spawn(name="typed_async_svc", public=True) + + proxy = await pul.resolve("typed_async_svc", cls=_ServiceWithMethods) + + result = await proxy.async_incr() + assert result == 1 + result = await proxy.async_incr() + assert result == 2 # ============================================================================ @@ -186,10 +192,10 @@ async def test_as_any_with_raw_ref(initialized_pul): @pytest.mark.asyncio async def test_typed_proxy_as_any(initialized_pul): - """typed_proxy.as_any() returns a proxy that can call the same methods.""" + """typed_proxy.as_any() returns an untyped proxy with same underlying ref.""" await _ServiceWithMethods.spawn(name="typed_any_svc", public=True) - typed = await _ServiceWithMethods.resolve("typed_any_svc") + typed = await pul.resolve("typed_any_svc", cls=_ServiceWithMethods) result_typed = await typed.get_value() assert result_typed == 0 @@ -202,23 +208,25 @@ async def test_typed_proxy_as_any(initialized_pul): # ============================================================================ -# Test: backward compatibility — ref.ask() / ref.tell() still work +# Test: proxy.ref for low-level access # ============================================================================ @pytest.mark.asyncio -async def test_resolve_ref_ask_still_works(initialized_pul): - """After resolve(), ref.ask(msg) still works.""" +async def test_proxy_ref_ask_still_works(initialized_pul): + """proxy.ref.ask(msg) still works for low-level messaging.""" await pul.spawn(_EchoActor(), name="compat_ask_echo", public=True) - ref = await pul.resolve("compat_ask_echo") + proxy = await pul.resolve("compat_ask_echo") + ref = proxy.ref + assert isinstance(ref, ActorRef) result = await ref.ask({"echo": "hello"}) assert result == "hello" @pytest.mark.asyncio -async def test_resolve_ref_tell_still_works(initialized_pul): - """After resolve(), ref.tell(msg) still works.""" +async def test_proxy_ref_tell_still_works(initialized_pul): + """proxy.ref.tell(msg) still works for fire-and-forget.""" class _CountTell(Actor): def __init__(self): @@ -232,7 +240,8 @@ async def receive(self, msg): await pul.spawn(_CountTell(), name="compat_tell_count", public=True) - ref = await pul.resolve("compat_tell_count") + proxy = await pul.resolve("compat_tell_count") + ref = proxy.ref await ref.tell(None) await ref.tell(None) await asyncio.sleep(0.05) @@ -241,52 +250,38 @@ async def receive(self, msg): # ============================================================================ -# Test: ref.as_type(cls) — typed proxy from ActorRef +# Test: ActorRef.as_any() / .as_type() still work (low-level API) # ============================================================================ @pytest.mark.asyncio -async def test_as_type_on_actor_ref(initialized_pul): - """ref.as_type(cls) returns a typed proxy with method validation.""" - await _ServiceWithMethods.spawn(name="as_type_svc", public=True) - - ref = await pul.resolve("as_type_svc") - proxy = ref.as_type(_ServiceWithMethods) +async def test_raw_ref_as_any(initialized_pul): + """ActorRef.as_any() still works via system.resolve().""" + from pulsing.core import get_system - result = await proxy.get_value() - assert result == 0 + await _ServiceWithMethods.spawn(name="raw_ref_svc", public=True) - result = await proxy.set_value(99) - assert result == 99 + system = get_system() + raw_ref = await system.resolve("raw_ref_svc") + proxy = raw_ref.as_any() result = await proxy.get_value() - assert result == 99 + assert result == 0 @pytest.mark.asyncio -async def test_as_type_rejects_invalid_method(initialized_pul): - """Typed proxy from as_type() rejects methods not on the class.""" - await _ServiceWithMethods.spawn(name="as_type_reject_svc", public=True) - - ref = await pul.resolve("as_type_reject_svc") - proxy = ref.as_type(_ServiceWithMethods) - - with pytest.raises(AttributeError, match="No method"): - proxy.nonexistent_method # Access triggers __getattr__ validation - +async def test_raw_ref_as_type(initialized_pul): + """ActorRef.as_type(cls) still works via system.resolve().""" + from pulsing.core import get_system -@pytest.mark.asyncio -async def test_as_type_async_method(initialized_pul): - """as_type() proxy correctly handles async methods.""" - await _ServiceWithMethods.spawn(name="as_type_async_svc", public=True) + await _ServiceWithMethods.spawn(name="raw_ref_type_svc", public=True) - ref = await pul.resolve("as_type_async_svc") - proxy = ref.as_type(_ServiceWithMethods) + system = get_system() + raw_ref = await system.resolve("raw_ref_type_svc") + proxy = raw_ref.as_type(_ServiceWithMethods) - result = await proxy.async_incr() - assert result == 1 - result = await proxy.async_incr() - assert result == 2 + result = await proxy.get_value() + assert result == 0 # ============================================================================ @@ -299,7 +294,6 @@ async def test_counter_resolve_with_timeout(initialized_pul): """Counter.resolve(name, timeout=...) passes timeout to underlying resolve.""" await _ServiceWithMethods.spawn(name="timeout_svc", public=True) - # Should succeed with timeout (actor already exists) proxy = await _ServiceWithMethods.resolve("timeout_svc", timeout=5) result = await proxy.get_value() assert result == 0