Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -29,3 +29,25 @@ Feature: Tool call lifecycle
| status | marker | tool | input |
| completed | ✓ | list_dir | . |
And the public output should include "The directory contains README.md, notes.txt, and src/."

Scenario: Unknown tool returns an error result
Given an offline CodeWhale workspace containing:
| path | kind |
| README.md | file |
And the mocked LLM will request the "missing_tool" tool with:
| path |
| . |
And the mocked LLM will answer after the tool result:
| content |
| I could not run the requested missing tool. |
When the user asks "try a missing tool"
Then CodeWhale should send the user request to the mocked LLM
And the public tool lifecycle should show a running tool:
| status | marker | tool | input |
| running | [~] | missing_tool | . |
And the public tool result should report an error for "missing_tool"
And CodeWhale should send the tool error back to the mocked LLM
And the public tool lifecycle should show a failed tool:
| status | marker | tool | input |
| error | [!] | missing_tool | . |
And the public output should include "I could not run the requested missing tool."
88 changes: 82 additions & 6 deletions crates/tui/tests/tool_lifecycle_acceptance.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,11 @@ use wiremock::{Mock, MockServer, Request, ResponseTemplate};
const FEATURE_NAME: &str = "Tool call lifecycle";
const FEATURE_PATH: &str = concat!(
env!("CARGO_MANIFEST_DIR"),
"/tests/features/tool_lifecycle_happy_path.feature"
"/tests/features/tool_lifecycle.feature"
);
const HAPPY_PATH_SCENARIO: &str = "Happy path lists the current directory through a tool";
const TOOL_CALL_ID: &str = "call_list_dir";
const UNKNOWN_TOOL_SCENARIO: &str = "Unknown tool returns an error result";
const TOOL_CALL_ID: &str = "call_tool";
const TEST_MODEL: &str = "acceptance-model";

#[derive(Debug, Default, cucumber::World)]
Expand All @@ -29,6 +30,7 @@ struct ToolLifecycleWorld {
tool_name: Option<String>,
tool_input: Option<Value>,
final_answer: Option<String>,
prompt: Option<String>,
stdout: String,
stderr: String,
events: Vec<Value>,
Expand Down Expand Up @@ -87,6 +89,7 @@ async fn user_asks(world: &mut ToolLifecycleWorld, prompt: String) {
let server = start_mock_llm(world).await;
let output = run_codewhale_exec(world, &server, &prompt);

world.prompt = Some(prompt);
world.stdout = String::from_utf8_lossy(&output.stdout).into_owned();
world.stderr = String::from_utf8_lossy(&output.stderr).into_owned();
assert!(
Expand Down Expand Up @@ -120,7 +123,13 @@ fn codewhale_should_send_user_request_to_mocked_llm(world: &mut ToolLifecycleWor
.expect("expected an initial chat request");

assert!(
request_contains_user_text(first_request, "list the current directory"),
request_contains_user_text(
first_request,
world
.prompt
.as_deref()
.expect("scenario prompt should be set")
),
"initial request should include the user prompt:\n{first_request:#}"
);
assert!(
Expand Down Expand Up @@ -192,6 +201,48 @@ fn codewhale_should_send_tool_result_back_to_mocked_llm(world: &mut ToolLifecycl
}
}

#[then(regex = r#"^the public tool result should report an error for "([^"]+)"$"#)]
fn public_tool_result_should_report_error_for(world: &mut ToolLifecycleWorld, tool_name: String) {
let _ = tool_use_event(world, &tool_name);
let event = tool_result_event(world);

assert_eq!(event.get("status").and_then(Value::as_str), Some("error"));
let output = event
.get("output")
.and_then(Value::as_str)
.expect("tool_result error output");
assert!(
output.contains(&tool_name) && output.contains("not available"),
"tool_result error should name the unavailable tool:\n{output}"
);
}

#[then("CodeWhale should send the tool error back to the mocked LLM")]
fn codewhale_should_send_tool_error_back_to_mocked_llm(world: &mut ToolLifecycleWorld) {
let request = world
.requests
.iter()
.find(|request| request_contains_tool_result(request))
.expect("expected a follow-up chat request containing the tool error");
let tool_result = tool_result_message(request).expect("tool result message");
assert_eq!(
tool_result
.get("tool_call_id")
.and_then(serde_json::Value::as_str),
Some(TOOL_CALL_ID)
);

let content = tool_result
.get("content")
.and_then(serde_json::Value::as_str)
.expect("tool result content");
let tool_name = world.tool_name.as_deref().expect("tool name");
assert!(
content.contains(tool_name) && content.contains("not available"),
"tool error sent to LLM should describe the unavailable tool:\n{content}"
);
}

#[then("the public tool lifecycle should show a completed tool:")]
fn public_tool_lifecycle_should_show_completed_tool(world: &mut ToolLifecycleWorld, step: &Step) {
let expected = one_table_row(step);
Expand All @@ -208,6 +259,22 @@ fn public_tool_lifecycle_should_show_completed_tool(world: &mut ToolLifecycleWor
);
}

#[then("the public tool lifecycle should show a failed tool:")]
fn public_tool_lifecycle_should_show_failed_tool(world: &mut ToolLifecycleWorld, step: &Step) {
let expected = one_table_row(step);
assert_eq!(row_value(&expected, "status"), "error");
assert_eq!(row_value(&expected, "marker"), "[!]");

let event = tool_result_event(world);
assert_eq!(event.get("status").and_then(Value::as_str), Some("error"));

let tool_use = tool_use_event(world, &row_value(&expected, "tool"));
assert_eq!(
tool_use.get("input").and_then(|input| input.get("path")),
Some(&json!(row_value(&expected, "input")))
);
}

#[then(regex = r#"^the public output should include "([^"]+)"$"#)]
fn public_output_should_include(world: &mut ToolLifecycleWorld, expected: String) {
let content = world
Expand All @@ -226,10 +293,15 @@ fn public_output_should_include(world: &mut ToolLifecycleWorld, expected: String

#[tokio::test(flavor = "current_thread")]
async fn happy_path_lists_current_directory_through_tool() {
run_scenario(HAPPY_PATH_SCENARIO).await;
run_scenario(HAPPY_PATH_SCENARIO, 10).await;
}

async fn run_scenario(name: &'static str) {
#[tokio::test(flavor = "current_thread")]
async fn unknown_tool_returns_error_result() {
run_scenario(UNKNOWN_TOOL_SCENARIO, 10).await;
}

async fn run_scenario(name: &'static str, expected_steps: usize) {
let writer = ToolLifecycleWorld::cucumber()
.fail_on_skipped()
.with_default_cli()
Expand All @@ -239,7 +311,11 @@ async fn run_scenario(name: &'static str) {
.await;
assert_eq!(writer.failed_steps(), 0, "scenario failed: {name}");
assert_eq!(writer.skipped_steps(), 0, "scenario skipped steps: {name}");
assert_eq!(writer.passed_steps(), 10, "scenario did not run: {name}");
assert_eq!(
writer.passed_steps(),
expected_steps,
"scenario did not run: {name}"
);
}

async fn start_mock_llm(world: &ToolLifecycleWorld) -> MockServer {
Expand Down
Loading