@@ -158,6 +158,118 @@ async def test_send_request_skips_the_surface_gate_when_method_absent_at_version
158158 assert isinstance (result , types .EmptyResult )
159159
160160
161+ @pytest .mark .anyio
162+ async def test_create_message_tool_result_validation ():
163+ """Test tool_use/tool_result validation in create_message."""
164+ dispatcher = StubDispatcher (
165+ result = {"role" : "assistant" , "content" : [{"type" : "text" , "text" : "ok" }], "model" : "m" }
166+ )
167+ session = _make_session (
168+ dispatcher , capabilities = ClientCapabilities (sampling = SamplingCapability (tools = SamplingToolsCapability ()))
169+ )
170+ tool = types .Tool (name = "test_tool" , input_schema = {"type" : "object" })
171+ text = types .TextContent (type = "text" , text = "hello" )
172+ tool_use = types .ToolUseContent (type = "tool_use" , id = "call_1" , name = "test_tool" , input = {})
173+ tool_result = types .ToolResultContent (type = "tool_result" , tool_use_id = "call_1" , content = [])
174+
175+ # Case 1: tool_result mixed with other content
176+ with pytest .raises (ValueError , match = "only tool_result content" ):
177+ await session .create_message (
178+ messages = [
179+ types .SamplingMessage (role = "user" , content = text ),
180+ types .SamplingMessage (role = "assistant" , content = tool_use ),
181+ types .SamplingMessage (role = "user" , content = [tool_result , text ]),
182+ ],
183+ max_tokens = 100 ,
184+ tools = [tool ],
185+ )
186+
187+ # Case 2: tool_result without previous message
188+ with pytest .raises (ValueError , match = "requires a previous message" ):
189+ await session .create_message (
190+ messages = [types .SamplingMessage (role = "user" , content = tool_result )],
191+ max_tokens = 100 ,
192+ tools = [tool ],
193+ )
194+
195+ # Case 3: tool_result without previous tool_use
196+ with pytest .raises (ValueError , match = "do not match any tool_use" ):
197+ await session .create_message (
198+ messages = [
199+ types .SamplingMessage (role = "user" , content = text ),
200+ types .SamplingMessage (role = "user" , content = tool_result ),
201+ ],
202+ max_tokens = 100 ,
203+ tools = [tool ],
204+ )
205+
206+ # Case 4: mismatched tool IDs
207+ with pytest .raises (ValueError , match = "ids of tool_result blocks and tool_use blocks" ):
208+ await session .create_message (
209+ messages = [
210+ types .SamplingMessage (role = "user" , content = text ),
211+ types .SamplingMessage (role = "assistant" , content = tool_use ),
212+ types .SamplingMessage (
213+ role = "user" ,
214+ content = types .ToolResultContent (type = "tool_result" , tool_use_id = "wrong_id" , content = []),
215+ ),
216+ ],
217+ max_tokens = 100 ,
218+ tools = [tool ],
219+ )
220+
221+ # Case 4b: earlier mismatched tool result with a later plain message
222+ with pytest .raises (ValueError , match = "ids of tool_result blocks and tool_use blocks" ):
223+ await session .create_message (
224+ messages = [
225+ types .SamplingMessage (role = "assistant" , content = tool_use ),
226+ types .SamplingMessage (
227+ role = "user" ,
228+ content = types .ToolResultContent (type = "tool_result" , tool_use_id = "wrong_id" , content = []),
229+ ),
230+ types .SamplingMessage (role = "assistant" , content = text ),
231+ ],
232+ max_tokens = 100 ,
233+ tools = [tool ],
234+ )
235+
236+ # Case 5: text-only message with tools (no tool_results) - passes validation
237+ await session .create_message (
238+ messages = [types .SamplingMessage (role = "user" , content = text )],
239+ max_tokens = 100 ,
240+ tools = [tool ],
241+ )
242+
243+ # Case 6: valid matching tool_result/tool_use IDs - passes validation
244+ await session .create_message (
245+ messages = [
246+ types .SamplingMessage (role = "user" , content = text ),
247+ types .SamplingMessage (role = "assistant" , content = tool_use ),
248+ types .SamplingMessage (role = "user" , content = tool_result ),
249+ ],
250+ max_tokens = 100 ,
251+ tools = [tool ],
252+ )
253+
254+ # Case 7: validation runs even without `tools` parameter
255+ # (tool loop continuation may omit tools while containing tool_result)
256+ with pytest .raises (ValueError , match = "do not match any tool_use" ):
257+ await session .create_message (
258+ messages = [
259+ types .SamplingMessage (role = "user" , content = text ),
260+ types .SamplingMessage (role = "user" , content = tool_result ),
261+ ],
262+ max_tokens = 100 ,
263+ )
264+
265+ # Case 8: empty messages list - skips validation entirely
266+ no_tools_session = _make_session (
267+ StubDispatcher (result = {"role" : "assistant" , "content" : {"type" : "text" , "text" : "ok" }, "model" : "m" }),
268+ capabilities = ClientCapabilities (sampling = SamplingCapability (tools = SamplingToolsCapability ())),
269+ )
270+ await no_tools_session .create_message (messages = [], max_tokens = 100 )
271+
272+
161273@pytest .mark .anyio
162274async def test_send_request_validates_result_alias_only ():
163275 """Peer results validate alias-only; a snake_case key from the wire is
0 commit comments