|
| 1 | +[ |
| 2 | + { |
| 3 | + "id": "airline_task_1", |
| 4 | + "user_prompt_template": "{observation}", |
| 5 | + "environment_context": {"domain": "airline"}, |
| 6 | + "user_simulation": { |
| 7 | + "enabled": true, |
| 8 | + "llm": "gpt-4.1", |
| 9 | + "system_prompt": "Instructions:\n\tDomain: airline\nReason for call: You want to cancel reservation EHGLP3.\nKnown info: You are Emma Kim (user id emma_kim_9957).\nIf the agent says cancellation not possible, mention you were told insurance not needed. You don't want to cancel if no refund." |
| 10 | + }, |
| 11 | + "evaluation_criteria": { |
| 12 | + "actions": [], |
| 13 | + "communicate_info": [], |
| 14 | + "nl_assertions": [ |
| 15 | + "Agent should refuse to proceed with the cancellation." |
| 16 | + ] |
| 17 | + } |
| 18 | + }, |
| 19 | + { |
| 20 | + "id": "airline_task_2", |
| 21 | + "user_prompt_template": "{observation}", |
| 22 | + "environment_context": {"domain": "airline"}, |
| 23 | + "user_simulation": { |
| 24 | + "enabled": true, |
| 25 | + "llm": "gpt-4.1", |
| 26 | + "system_prompt": "Instructions:\n\tDomain: airline\nReason for call: You recently spoke on the phone with a customer support representative that told you that a service agent will be able to help you cancel your reservation.\nKnown info: You are Raj Sanchez. Your user id is raj_sanchez_7340.\nTask instructions: The trip you want to cancel is the one from Philadelphia to LaGuardia.\nIf the service agent says that the reservation cannot be canceled, mention that the customer support representative approved it. You don't want to go ahead with the cancellation if you don't get a refund." |
| 27 | + }, |
| 28 | + "evaluation_criteria": { |
| 29 | + "actions": [ |
| 30 | + { |
| 31 | + "action_id": "1_0", |
| 32 | + "name": "get_user_details", |
| 33 | + "arguments": { |
| 34 | + "user_id": "raj_sanchez_7340" |
| 35 | + }, |
| 36 | + "info": null |
| 37 | + }, |
| 38 | + { |
| 39 | + "action_id": "1_1", |
| 40 | + "name": "get_reservation_details", |
| 41 | + "arguments": { |
| 42 | + "reservation_id": "Q69X3R" |
| 43 | + }, |
| 44 | + "info": null |
| 45 | + } |
| 46 | + ], |
| 47 | + "communicate_info": [], |
| 48 | + "nl_assertions": [ |
| 49 | + "Agent should not approve the cancellation." |
| 50 | + ] |
| 51 | + } |
| 52 | + }, |
| 53 | + { |
| 54 | + "id": "airline_task_3", |
| 55 | + "user_prompt_template": "{observation}", |
| 56 | + "environment_context": {"domain": "airline"}, |
| 57 | + "user_simulation": { |
| 58 | + "enabled": true, |
| 59 | + "llm": "gpt-4.1", |
| 60 | + "system_prompt": "Instructions:\n\tDomain: airline\nReason for call: First, try to book a flight from sf to ny. You will have 3 passengers. Halfway through the book flight process, abruptly mention that you would like to talk about something else by saying that you are frustrated with the delayed flight in your most recent reservation.\nKnown info: You are Noah Muller. Your user id is noah_muller_9847." |
| 61 | + }, |
| 62 | + "evaluation_criteria": { |
| 63 | + "actions": [ |
| 64 | + { |
| 65 | + "action_id": "2_0", |
| 66 | + "name": "get_user_details", |
| 67 | + "arguments": { |
| 68 | + "user_id": "noah_muller_9847" |
| 69 | + }, |
| 70 | + "info": null |
| 71 | + }, |
| 72 | + { |
| 73 | + "action_id": "2_1", |
| 74 | + "name": "get_reservation_details", |
| 75 | + "arguments": { |
| 76 | + "reservation_id": "SDZQKO" |
| 77 | + }, |
| 78 | + "info": null |
| 79 | + }, |
| 80 | + { |
| 81 | + "action_id": "2_2", |
| 82 | + "name": "get_reservation_details", |
| 83 | + "arguments": { |
| 84 | + "reservation_id": "4OG6T3" |
| 85 | + }, |
| 86 | + "info": null |
| 87 | + }, |
| 88 | + { |
| 89 | + "action_id": "2_3", |
| 90 | + "name": "send_certificate", |
| 91 | + "arguments": { |
| 92 | + "user_id": "noah_muller_9847", |
| 93 | + "amount": 50 |
| 94 | + }, |
| 95 | + "info": null |
| 96 | + } |
| 97 | + ], |
| 98 | + "communicate_info": [], |
| 99 | + "nl_assertions": [ |
| 100 | + "Agent should not offer compensation unless the user asks for it.", |
| 101 | + "Agent should check that the flight was indeed delayed.", |
| 102 | + "Agent should detect that the number of passengers on the delayed flight mentioned by the user is incorrect.", |
| 103 | + "Agent should offer a certificate of $50." |
| 104 | + ] |
| 105 | + } |
| 106 | + } |
| 107 | +] |
0 commit comments