Stack-2-9-finetuned / stack /eval /tool_use /test_cases.json
walidsobhie-code
refactor: Squeeze folders further - cleaner structure
65888d5
[
{
"test_id": "AgentTool_1442",
"prompt": "Use AgentTool to do something",
"expected_tool": "AgentTool",
"expected_params": {},
"tool_description": "Format one agent line for the agent_listing_delta attachment message:\n`- type: whenToUse (Tools: ...)`.",
"difficulty": "hard"
},
{
"test_id": "AgentTool_9133",
"prompt": "Call AgentTool",
"expected_tool": "AgentTool",
"expected_params": {},
"tool_description": "Format one agent line for the agent_listing_delta attachment message:\n`- type: whenToUse (Tools: ...)`.",
"difficulty": "easy"
},
{
"test_id": "AgentTool_5334",
"prompt": "Use AgentTool to do something",
"expected_tool": "AgentTool",
"expected_params": {},
"tool_description": "Format one agent line for the agent_listing_delta attachment message:\n`- type: whenToUse (Tools: ...)`.",
"difficulty": "easy"
},
{
"test_id": "AgentTool_7066",
"prompt": "Call AgentTool",
"expected_tool": "AgentTool",
"expected_params": {},
"tool_description": "Format one agent line for the agent_listing_delta attachment message:\n`- type: whenToUse (Tools: ...)`.",
"difficulty": "hard"
},
{
"test_id": "AgentTool_5142",
"prompt": "Call AgentTool",
"expected_tool": "AgentTool",
"expected_params": {},
"tool_description": "Format one agent line for the agent_listing_delta attachment message:\n`- type: whenToUse (Tools: ...)`.",
"difficulty": "hard"
},
{
"test_id": "AskUserQuestionTool_8221",
"prompt": "Execute AskUserQuestionTool",
"expected_tool": "AskUserQuestionTool",
"expected_params": {},
"tool_description": "",
"difficulty": "medium"
},
{
"test_id": "AskUserQuestionTool_8297",
"prompt": "Call AskUserQuestionTool",
"expected_tool": "AskUserQuestionTool",
"expected_params": {},
"tool_description": "",
"difficulty": "medium"
},
{
"test_id": "AskUserQuestionTool_1059",
"prompt": "Call AskUserQuestionTool",
"expected_tool": "AskUserQuestionTool",
"expected_params": {},
"tool_description": "",
"difficulty": "hard"
},
{
"test_id": "AskUserQuestionTool_7626",
"prompt": "Call AskUserQuestionTool",
"expected_tool": "AskUserQuestionTool",
"expected_params": {},
"tool_description": "",
"difficulty": "medium"
},
{
"test_id": "AskUserQuestionTool_5238",
"prompt": "Call AskUserQuestionTool",
"expected_tool": "AskUserQuestionTool",
"expected_params": {},
"tool_description": "",
"difficulty": "medium"
},
{
"test_id": "BashTool_5334",
"prompt": "Please run npm test",
"expected_tool": "BashTool",
"expected_params": {
"command": "npm test"
},
"tool_description": "",
"difficulty": "medium"
},
{
"test_id": "BashTool_8558",
"prompt": "Run: git status",
"expected_tool": "BashTool",
"expected_params": {
"command": "git status"
},
"tool_description": "",
"difficulty": "hard"
},
{
"test_id": "BashTool_8252",
"prompt": "Execute npm test",
"expected_tool": "BashTool",
"expected_params": {
"command": "npm test"
},
"tool_description": "",
"difficulty": "medium"
},
{
"test_id": "BashTool_4920",
"prompt": "Run: make build",
"expected_tool": "BashTool",
"expected_params": {
"command": "make build"
},
"tool_description": "",
"difficulty": "easy"
},
{
"test_id": "BashTool_6768",
"prompt": "Please run ls -la",
"expected_tool": "BashTool",
"expected_params": {
"command": "ls -la"
},
"tool_description": "",
"difficulty": "easy"
},
{
"test_id": "BriefTool_3514",
"prompt": "Execute BriefTool",
"expected_tool": "BriefTool",
"expected_params": {},
"tool_description": "",
"difficulty": "medium"
},
{
"test_id": "BriefTool_2493",
"prompt": "Call BriefTool",
"expected_tool": "BriefTool",
"expected_params": {},
"tool_description": "",
"difficulty": "easy"
},
{
"test_id": "BriefTool_3819",
"prompt": "Use BriefTool to do something",
"expected_tool": "BriefTool",
"expected_params": {},
"tool_description": "",
"difficulty": "medium"
},
{
"test_id": "BriefTool_8934",
"prompt": "Call BriefTool",
"expected_tool": "BriefTool",
"expected_params": {},
"tool_description": "",
"difficulty": "medium"
},
{
"test_id": "BriefTool_1272",
"prompt": "Use BriefTool to do something",
"expected_tool": "BriefTool",
"expected_params": {},
"tool_description": "",
"difficulty": "medium"
},
{
"test_id": "ConfigTool_6666",
"prompt": "Execute ConfigTool",
"expected_tool": "ConfigTool",
"expected_params": {},
"tool_description": "Generate the prompt documentation from the registry",
"difficulty": "hard"
},
{
"test_id": "ConfigTool_6890",
"prompt": "Use ConfigTool to do something",
"expected_tool": "ConfigTool",
"expected_params": {},
"tool_description": "Generate the prompt documentation from the registry",
"difficulty": "hard"
},
{
"test_id": "ConfigTool_7721",
"prompt": "Call ConfigTool",
"expected_tool": "ConfigTool",
"expected_params": {},
"tool_description": "Generate the prompt documentation from the registry",
"difficulty": "medium"
},
{
"test_id": "ConfigTool_4292",
"prompt": "Call ConfigTool",
"expected_tool": "ConfigTool",
"expected_params": {},
"tool_description": "Generate the prompt documentation from the registry",
"difficulty": "medium"
},
{
"test_id": "ConfigTool_1324",
"prompt": "Call ConfigTool",
"expected_tool": "ConfigTool",
"expected_params": {},
"tool_description": "Generate the prompt documentation from the registry",
"difficulty": "medium"
},
{
"test_id": "EnterPlanModeTool_8599",
"prompt": "Call EnterPlanModeTool",
"expected_tool": "EnterPlanModeTool",
"expected_params": {},
"tool_description": "",
"difficulty": "hard"
},
{
"test_id": "EnterPlanModeTool_3574",
"prompt": "Call EnterPlanModeTool",
"expected_tool": "EnterPlanModeTool",
"expected_params": {},
"tool_description": "",
"difficulty": "hard"
},
{
"test_id": "EnterPlanModeTool_9203",
"prompt": "Execute EnterPlanModeTool",
"expected_tool": "EnterPlanModeTool",
"expected_params": {},
"tool_description": "",
"difficulty": "medium"
},
{
"test_id": "EnterPlanModeTool_9218",
"prompt": "Call EnterPlanModeTool",
"expected_tool": "EnterPlanModeTool",
"expected_params": {},
"tool_description": "",
"difficulty": "hard"
},
{
"test_id": "EnterPlanModeTool_2127",
"prompt": "Call EnterPlanModeTool",
"expected_tool": "EnterPlanModeTool",
"expected_params": {},
"tool_description": "",
"difficulty": "easy"
},
{
"test_id": "EnterWorktreeTool_7189",
"prompt": "Call EnterWorktreeTool",
"expected_tool": "EnterWorktreeTool",
"expected_params": {},
"tool_description": "",
"difficulty": "medium"
},
{
"test_id": "EnterWorktreeTool_8708",
"prompt": "Use EnterWorktreeTool to do something",
"expected_tool": "EnterWorktreeTool",
"expected_params": {},
"tool_description": "",
"difficulty": "easy"
},
{
"test_id": "EnterWorktreeTool_4825",
"prompt": "Execute EnterWorktreeTool",
"expected_tool": "EnterWorktreeTool",
"expected_params": {},
"tool_description": "",
"difficulty": "hard"
},
{
"test_id": "EnterWorktreeTool_6763",
"prompt": "Use EnterWorktreeTool to do something",
"expected_tool": "EnterWorktreeTool",
"expected_params": {},
"tool_description": "",
"difficulty": "easy"
},
{
"test_id": "EnterWorktreeTool_6925",
"prompt": "Execute EnterWorktreeTool",
"expected_tool": "EnterWorktreeTool",
"expected_params": {},
"tool_description": "",
"difficulty": "medium"
},
{
"test_id": "ExitPlanModeTool_7793",
"prompt": "Use ExitPlanModeTool to do something",
"expected_tool": "ExitPlanModeTool",
"expected_params": {},
"tool_description": "",
"difficulty": "medium"
},
{
"test_id": "ExitPlanModeTool_2469",
"prompt": "Use ExitPlanModeTool to do something",
"expected_tool": "ExitPlanModeTool",
"expected_params": {},
"tool_description": "",
"difficulty": "easy"
},
{
"test_id": "ExitPlanModeTool_8270",
"prompt": "Call ExitPlanModeTool",
"expected_tool": "ExitPlanModeTool",
"expected_params": {},
"tool_description": "",
"difficulty": "easy"
},
{
"test_id": "ExitPlanModeTool_7710",
"prompt": "Call ExitPlanModeTool",
"expected_tool": "ExitPlanModeTool",
"expected_params": {},
"tool_description": "",
"difficulty": "hard"
},
{
"test_id": "ExitPlanModeTool_8976",
"prompt": "Call ExitPlanModeTool",
"expected_tool": "ExitPlanModeTool",
"expected_params": {},
"tool_description": "",
"difficulty": "hard"
},
{
"test_id": "ExitWorktreeTool_1725",
"prompt": "Use ExitWorktreeTool to do something",
"expected_tool": "ExitWorktreeTool",
"expected_params": {},
"tool_description": "",
"difficulty": "easy"
},
{
"test_id": "ExitWorktreeTool_2783",
"prompt": "Use ExitWorktreeTool to do something",
"expected_tool": "ExitWorktreeTool",
"expected_params": {},
"tool_description": "",
"difficulty": "easy"
},
{
"test_id": "ExitWorktreeTool_5194",
"prompt": "Use ExitWorktreeTool to do something",
"expected_tool": "ExitWorktreeTool",
"expected_params": {},
"tool_description": "",
"difficulty": "medium"
},
{
"test_id": "ExitWorktreeTool_7674",
"prompt": "Execute ExitWorktreeTool",
"expected_tool": "ExitWorktreeTool",
"expected_params": {},
"tool_description": "",
"difficulty": "easy"
},
{
"test_id": "ExitWorktreeTool_1423",
"prompt": "Call ExitWorktreeTool",
"expected_tool": "ExitWorktreeTool",
"expected_params": {},
"tool_description": "",
"difficulty": "medium"
},
{
"test_id": "FileEditTool_3126",
"prompt": "Execute FileEditTool",
"expected_tool": "FileEditTool",
"expected_params": {},
"tool_description": "",
"difficulty": "medium"
},
{
"test_id": "FileEditTool_7681",
"prompt": "Use FileEditTool to do something",
"expected_tool": "FileEditTool",
"expected_params": {},
"tool_description": "",
"difficulty": "hard"
},
{
"test_id": "FileEditTool_2291",
"prompt": "Call FileEditTool",
"expected_tool": "FileEditTool",
"expected_params": {},
"tool_description": "",
"difficulty": "hard"
},
{
"test_id": "FileEditTool_6300",
"prompt": "Execute FileEditTool",
"expected_tool": "FileEditTool",
"expected_params": {},
"tool_description": "",
"difficulty": "hard"
},
{
"test_id": "FileEditTool_9155",
"prompt": "Execute FileEditTool",
"expected_tool": "FileEditTool",
"expected_params": {},
"tool_description": "",
"difficulty": "hard"
},
{
"test_id": "FileReadTool_5478",
"prompt": "Read README.md",
"expected_tool": "FileReadTool",
"expected_params": {
"file_path": "README.md"
},
"tool_description": "Renders the Read tool prompt template. The caller (FileReadTool) supplies\nthe runtime-computed parts.",
"difficulty": "easy"
},
{
"test_id": "FileReadTool_2066",
"prompt": "Show me the contents of src/main.py",
"expected_tool": "FileReadTool",
"expected_params": {
"file_path": "src/main.py"
},
"tool_description": "Renders the Read tool prompt template. The caller (FileReadTool) supplies\nthe runtime-computed parts.",
"difficulty": "hard"
},
{
"test_id": "FileReadTool_2018",
"prompt": "Show me the contents of config.yaml",
"expected_tool": "FileReadTool",
"expected_params": {
"file_path": "config.yaml"
},
"tool_description": "Renders the Read tool prompt template. The caller (FileReadTool) supplies\nthe runtime-computed parts.",
"difficulty": "easy"
},
{
"test_id": "FileReadTool_7683",
"prompt": "Show me the contents of tests/test_api.py",
"expected_tool": "FileReadTool",
"expected_params": {
"file_path": "tests/test_api.py"
},
"tool_description": "Renders the Read tool prompt template. The caller (FileReadTool) supplies\nthe runtime-computed parts.",
"difficulty": "hard"
},
{
"test_id": "FileReadTool_4435",
"prompt": "Show me the contents of README.md",
"expected_tool": "FileReadTool",
"expected_params": {
"file_path": "README.md"
},
"tool_description": "Renders the Read tool prompt template. The caller (FileReadTool) supplies\nthe runtime-computed parts.",
"difficulty": "hard"
},
{
"test_id": "FileWriteTool_3729",
"prompt": "Create a new file README.md with content: console.log('test');",
"expected_tool": "FileWriteTool",
"expected_params": {
"file_path": "README.md",
"content": "console.log('test');"
},
"tool_description": "",
"difficulty": "medium"
},
{
"test_id": "FileWriteTool_2575",
"prompt": "Create a new file config.yaml with content: console.log('test');",
"expected_tool": "FileWriteTool",
"expected_params": {
"file_path": "config.yaml",
"content": "console.log('test');"
},
"tool_description": "",
"difficulty": "hard"
},
{
"test_id": "FileWriteTool_7630",
"prompt": "Write this to src/main.py: console.log('test');",
"expected_tool": "FileWriteTool",
"expected_params": {
"file_path": "src/main.py",
"content": "console.log('test');"
},
"tool_description": "",
"difficulty": "hard"
},
{
"test_id": "FileWriteTool_6387",
"prompt": "Save the following as README.md: console.log('test');",
"expected_tool": "FileWriteTool",
"expected_params": {
"file_path": "README.md",
"content": "console.log('test');"
},
"tool_description": "",
"difficulty": "hard"
},
{
"test_id": "FileWriteTool_1230",
"prompt": "Save the following as src/index.js: console.log('test');",
"expected_tool": "FileWriteTool",
"expected_params": {
"file_path": "src/index.js",
"content": "console.log('test');"
},
"tool_description": "",
"difficulty": "hard"
},
{
"test_id": "GlobTool_9441",
"prompt": "Find all **/*.py files",
"expected_tool": "GlobTool",
"expected_params": {
"pattern": "**/*.py"
},
"tool_description": "",
"difficulty": "hard"
},
{
"test_id": "GlobTool_6788",
"prompt": "List files matching **/*.test.*",
"expected_tool": "GlobTool",
"expected_params": {
"pattern": "**/*.test.*"
},
"tool_description": "",
"difficulty": "easy"
},
{
"test_id": "GlobTool_5774",
"prompt": "Find all **/*.md files",
"expected_tool": "GlobTool",
"expected_params": {
"pattern": "**/*.md"
},
"tool_description": "",
"difficulty": "medium"
},
{
"test_id": "GlobTool_8080",
"prompt": "Search for files like src/**/*.ts",
"expected_tool": "GlobTool",
"expected_params": {
"pattern": "src/**/*.ts"
},
"tool_description": "",
"difficulty": "hard"
},
{
"test_id": "GlobTool_8749",
"prompt": "Search for files like **/*.py",
"expected_tool": "GlobTool",
"expected_params": {
"pattern": "**/*.py"
},
"tool_description": "",
"difficulty": "easy"
},
{
"test_id": "GrepTool_5985",
"prompt": "Search for **/*.js in lib",
"expected_tool": "GrepTool",
"expected_params": {
"pattern": "**/*.js",
"directory": "lib"
},
"tool_description": "",
"difficulty": "medium"
},
{
"test_id": "GrepTool_8524",
"prompt": "Locate **/*.test.* in the codebase",
"expected_tool": "GrepTool",
"expected_params": {
"pattern": "**/*.test.*"
},
"tool_description": "",
"difficulty": "easy"
},
{
"test_id": "GrepTool_1452",
"prompt": "Locate src/**/*.ts in the codebase",
"expected_tool": "GrepTool",
"expected_params": {
"pattern": "src/**/*.ts"
},
"tool_description": "",
"difficulty": "medium"
},
{
"test_id": "GrepTool_5666",
"prompt": "Search for **/*.md in tests",
"expected_tool": "GrepTool",
"expected_params": {
"pattern": "**/*.md",
"directory": "tests"
},
"tool_description": "",
"difficulty": "easy"
},
{
"test_id": "GrepTool_4387",
"prompt": "Find all lib/**/*.py",
"expected_tool": "GrepTool",
"expected_params": {
"pattern": "lib/**/*.py"
},
"tool_description": "",
"difficulty": "easy"
},
{
"test_id": "LSPTool_6162",
"prompt": "Execute LSPTool",
"expected_tool": "LSPTool",
"expected_params": {},
"tool_description": "",
"difficulty": "easy"
},
{
"test_id": "LSPTool_4317",
"prompt": "Execute LSPTool",
"expected_tool": "LSPTool",
"expected_params": {},
"tool_description": "",
"difficulty": "easy"
},
{
"test_id": "LSPTool_6968",
"prompt": "Call LSPTool",
"expected_tool": "LSPTool",
"expected_params": {},
"tool_description": "",
"difficulty": "medium"
},
{
"test_id": "LSPTool_3243",
"prompt": "Call LSPTool",
"expected_tool": "LSPTool",
"expected_params": {},
"tool_description": "",
"difficulty": "medium"
},
{
"test_id": "LSPTool_8575",
"prompt": "Call LSPTool",
"expected_tool": "LSPTool",
"expected_params": {},
"tool_description": "",
"difficulty": "hard"
},
{
"test_id": "ListMcpResourcesTool_7113",
"prompt": "Call ListMcpResourcesTool",
"expected_tool": "ListMcpResourcesTool",
"expected_params": {},
"tool_description": "",
"difficulty": "medium"
},
{
"test_id": "ListMcpResourcesTool_4269",
"prompt": "Execute ListMcpResourcesTool",
"expected_tool": "ListMcpResourcesTool",
"expected_params": {},
"tool_description": "",
"difficulty": "medium"
},
{
"test_id": "ListMcpResourcesTool_9727",
"prompt": "Execute ListMcpResourcesTool",
"expected_tool": "ListMcpResourcesTool",
"expected_params": {},
"tool_description": "",
"difficulty": "medium"
},
{
"test_id": "ListMcpResourcesTool_3347",
"prompt": "Execute ListMcpResourcesTool",
"expected_tool": "ListMcpResourcesTool",
"expected_params": {},
"tool_description": "",
"difficulty": "hard"
},
{
"test_id": "ListMcpResourcesTool_7536",
"prompt": "Call ListMcpResourcesTool",
"expected_tool": "ListMcpResourcesTool",
"expected_params": {},
"tool_description": "",
"difficulty": "medium"
},
{
"test_id": "MCPTool_6306",
"prompt": "Call MCPTool",
"expected_tool": "MCPTool",
"expected_params": {},
"tool_description": "",
"difficulty": "easy"
},
{
"test_id": "MCPTool_2877",
"prompt": "Use MCPTool to do something",
"expected_tool": "MCPTool",
"expected_params": {},
"tool_description": "",
"difficulty": "easy"
},
{
"test_id": "MCPTool_1314",
"prompt": "Execute MCPTool",
"expected_tool": "MCPTool",
"expected_params": {},
"tool_description": "",
"difficulty": "medium"
},
{
"test_id": "MCPTool_3246",
"prompt": "Call MCPTool",
"expected_tool": "MCPTool",
"expected_params": {},
"tool_description": "",
"difficulty": "medium"
},
{
"test_id": "MCPTool_1594",
"prompt": "Call MCPTool",
"expected_tool": "MCPTool",
"expected_params": {},
"tool_description": "",
"difficulty": "easy"
},
{
"test_id": "NotebookEditTool_3700",
"prompt": "Call NotebookEditTool",
"expected_tool": "NotebookEditTool",
"expected_params": {},
"tool_description": "",
"difficulty": "medium"
},
{
"test_id": "NotebookEditTool_5392",
"prompt": "Execute NotebookEditTool",
"expected_tool": "NotebookEditTool",
"expected_params": {},
"tool_description": "",
"difficulty": "easy"
},
{
"test_id": "NotebookEditTool_4026",
"prompt": "Execute NotebookEditTool",
"expected_tool": "NotebookEditTool",
"expected_params": {},
"tool_description": "",
"difficulty": "medium"
},
{
"test_id": "NotebookEditTool_7197",
"prompt": "Execute NotebookEditTool",
"expected_tool": "NotebookEditTool",
"expected_params": {},
"tool_description": "",
"difficulty": "easy"
},
{
"test_id": "NotebookEditTool_8062",
"prompt": "Use NotebookEditTool to do something",
"expected_tool": "NotebookEditTool",
"expected_params": {},
"tool_description": "",
"difficulty": "medium"
},
{
"test_id": "PowerShellTool_3052",
"prompt": "Call PowerShellTool",
"expected_tool": "PowerShellTool",
"expected_params": {},
"tool_description": "Version-specific syntax guidance. The model's training data covers both\neditions but it can't tell which one it's targeting, so it either emits\npwsh-7 syntax on 5.1 (parser error \u2192 exit 1) or needlessly avoids && on 7.",
"difficulty": "hard"
},
{
"test_id": "PowerShellTool_1988",
"prompt": "Use PowerShellTool to do something",
"expected_tool": "PowerShellTool",
"expected_params": {},
"tool_description": "Version-specific syntax guidance. The model's training data covers both\neditions but it can't tell which one it's targeting, so it either emits\npwsh-7 syntax on 5.1 (parser error \u2192 exit 1) or needlessly avoids && on 7.",
"difficulty": "hard"
},
{
"test_id": "PowerShellTool_5390",
"prompt": "Call PowerShellTool",
"expected_tool": "PowerShellTool",
"expected_params": {},
"tool_description": "Version-specific syntax guidance. The model's training data covers both\neditions but it can't tell which one it's targeting, so it either emits\npwsh-7 syntax on 5.1 (parser error \u2192 exit 1) or needlessly avoids && on 7.",
"difficulty": "easy"
},
{
"test_id": "PowerShellTool_5212",
"prompt": "Execute PowerShellTool",
"expected_tool": "PowerShellTool",
"expected_params": {},
"tool_description": "Version-specific syntax guidance. The model's training data covers both\neditions but it can't tell which one it's targeting, so it either emits\npwsh-7 syntax on 5.1 (parser error \u2192 exit 1) or needlessly avoids && on 7.",
"difficulty": "easy"
},
{
"test_id": "PowerShellTool_7271",
"prompt": "Use PowerShellTool to do something",
"expected_tool": "PowerShellTool",
"expected_params": {},
"tool_description": "Version-specific syntax guidance. The model's training data covers both\neditions but it can't tell which one it's targeting, so it either emits\npwsh-7 syntax on 5.1 (parser error \u2192 exit 1) or needlessly avoids && on 7.",
"difficulty": "hard"
},
{
"test_id": "ReadMcpResourceTool_2705",
"prompt": "Execute ReadMcpResourceTool",
"expected_tool": "ReadMcpResourceTool",
"expected_params": {},
"tool_description": "",
"difficulty": "medium"
},
{
"test_id": "ReadMcpResourceTool_2891",
"prompt": "Call ReadMcpResourceTool",
"expected_tool": "ReadMcpResourceTool",
"expected_params": {},
"tool_description": "",
"difficulty": "easy"
},
{
"test_id": "ReadMcpResourceTool_7780",
"prompt": "Execute ReadMcpResourceTool",
"expected_tool": "ReadMcpResourceTool",
"expected_params": {},
"tool_description": "",
"difficulty": "hard"
},
{
"test_id": "ReadMcpResourceTool_2602",
"prompt": "Use ReadMcpResourceTool to do something",
"expected_tool": "ReadMcpResourceTool",
"expected_params": {},
"tool_description": "",
"difficulty": "medium"
},
{
"test_id": "ReadMcpResourceTool_3579",
"prompt": "Use ReadMcpResourceTool to do something",
"expected_tool": "ReadMcpResourceTool",
"expected_params": {},
"tool_description": "",
"difficulty": "medium"
},
{
"test_id": "RemoteTriggerTool_1621",
"prompt": "Call RemoteTriggerTool",
"expected_tool": "RemoteTriggerTool",
"expected_params": {},
"tool_description": "",
"difficulty": "easy"
},
{
"test_id": "RemoteTriggerTool_1093",
"prompt": "Call RemoteTriggerTool",
"expected_tool": "RemoteTriggerTool",
"expected_params": {},
"tool_description": "",
"difficulty": "hard"
},
{
"test_id": "RemoteTriggerTool_2536",
"prompt": "Execute RemoteTriggerTool",
"expected_tool": "RemoteTriggerTool",
"expected_params": {},
"tool_description": "",
"difficulty": "medium"
},
{
"test_id": "RemoteTriggerTool_5464",
"prompt": "Use RemoteTriggerTool to do something",
"expected_tool": "RemoteTriggerTool",
"expected_params": {},
"tool_description": "",
"difficulty": "hard"
},
{
"test_id": "RemoteTriggerTool_6075",
"prompt": "Use RemoteTriggerTool to do something",
"expected_tool": "RemoteTriggerTool",
"expected_params": {},
"tool_description": "",
"difficulty": "hard"
},
{
"test_id": "ScheduleCronTool_4909",
"prompt": "Execute ScheduleCronTool",
"expected_tool": "ScheduleCronTool",
"expected_params": {},
"tool_description": "Unified gate for the cron scheduling system. Combines the build-time\n`feature('AGENT_TRIGGERS')` flag (dead code elimination) with the runtime\n`tengu_kairos_cron` GrowthBook gate on a 5-minute refresh window.\n\nAGENT_TRIGGERS is independently shippable from KAIROS \u2014 the cron module\ngraph (cronSchedul",
"difficulty": "hard"
},
{
"test_id": "ScheduleCronTool_6081",
"prompt": "Use ScheduleCronTool to do something",
"expected_tool": "ScheduleCronTool",
"expected_params": {},
"tool_description": "Unified gate for the cron scheduling system. Combines the build-time\n`feature('AGENT_TRIGGERS')` flag (dead code elimination) with the runtime\n`tengu_kairos_cron` GrowthBook gate on a 5-minute refresh window.\n\nAGENT_TRIGGERS is independently shippable from KAIROS \u2014 the cron module\ngraph (cronSchedul",
"difficulty": "medium"
},
{
"test_id": "ScheduleCronTool_6686",
"prompt": "Call ScheduleCronTool",
"expected_tool": "ScheduleCronTool",
"expected_params": {},
"tool_description": "Unified gate for the cron scheduling system. Combines the build-time\n`feature('AGENT_TRIGGERS')` flag (dead code elimination) with the runtime\n`tengu_kairos_cron` GrowthBook gate on a 5-minute refresh window.\n\nAGENT_TRIGGERS is independently shippable from KAIROS \u2014 the cron module\ngraph (cronSchedul",
"difficulty": "medium"
},
{
"test_id": "ScheduleCronTool_5952",
"prompt": "Execute ScheduleCronTool",
"expected_tool": "ScheduleCronTool",
"expected_params": {},
"tool_description": "Unified gate for the cron scheduling system. Combines the build-time\n`feature('AGENT_TRIGGERS')` flag (dead code elimination) with the runtime\n`tengu_kairos_cron` GrowthBook gate on a 5-minute refresh window.\n\nAGENT_TRIGGERS is independently shippable from KAIROS \u2014 the cron module\ngraph (cronSchedul",
"difficulty": "easy"
},
{
"test_id": "ScheduleCronTool_3141",
"prompt": "Execute ScheduleCronTool",
"expected_tool": "ScheduleCronTool",
"expected_params": {},
"tool_description": "Unified gate for the cron scheduling system. Combines the build-time\n`feature('AGENT_TRIGGERS')` flag (dead code elimination) with the runtime\n`tengu_kairos_cron` GrowthBook gate on a 5-minute refresh window.\n\nAGENT_TRIGGERS is independently shippable from KAIROS \u2014 the cron module\ngraph (cronSchedul",
"difficulty": "easy"
},
{
"test_id": "SendMessageTool_7741",
"prompt": "Call SendMessageTool",
"expected_tool": "SendMessageTool",
"expected_params": {},
"tool_description": "",
"difficulty": "hard"
},
{
"test_id": "SendMessageTool_4050",
"prompt": "Execute SendMessageTool",
"expected_tool": "SendMessageTool",
"expected_params": {},
"tool_description": "",
"difficulty": "easy"
},
{
"test_id": "SendMessageTool_5206",
"prompt": "Execute SendMessageTool",
"expected_tool": "SendMessageTool",
"expected_params": {},
"tool_description": "",
"difficulty": "easy"
},
{
"test_id": "SendMessageTool_9082",
"prompt": "Execute SendMessageTool",
"expected_tool": "SendMessageTool",
"expected_params": {},
"tool_description": "",
"difficulty": "medium"
},
{
"test_id": "SendMessageTool_5311",
"prompt": "Call SendMessageTool",
"expected_tool": "SendMessageTool",
"expected_params": {},
"tool_description": "",
"difficulty": "medium"
},
{
"test_id": "SkillTool_7072",
"prompt": "Call SkillTool",
"expected_tool": "SkillTool",
"expected_params": {},
"tool_description": "",
"difficulty": "medium"
},
{
"test_id": "SkillTool_7411",
"prompt": "Use SkillTool to do something",
"expected_tool": "SkillTool",
"expected_params": {},
"tool_description": "",
"difficulty": "medium"
},
{
"test_id": "SkillTool_9504",
"prompt": "Execute SkillTool",
"expected_tool": "SkillTool",
"expected_params": {},
"tool_description": "",
"difficulty": "hard"
},
{
"test_id": "SkillTool_9091",
"prompt": "Execute SkillTool",
"expected_tool": "SkillTool",
"expected_params": {},
"tool_description": "",
"difficulty": "medium"
},
{
"test_id": "SkillTool_8646",
"prompt": "Use SkillTool to do something",
"expected_tool": "SkillTool",
"expected_params": {},
"tool_description": "",
"difficulty": "easy"
},
{
"test_id": "SleepTool_2251",
"prompt": "Execute SleepTool",
"expected_tool": "SleepTool",
"expected_params": {},
"tool_description": "",
"difficulty": "medium"
},
{
"test_id": "SleepTool_6839",
"prompt": "Use SleepTool to do something",
"expected_tool": "SleepTool",
"expected_params": {},
"tool_description": "",
"difficulty": "medium"
},
{
"test_id": "SleepTool_6919",
"prompt": "Use SleepTool to do something",
"expected_tool": "SleepTool",
"expected_params": {},
"tool_description": "",
"difficulty": "easy"
},
{
"test_id": "SleepTool_5972",
"prompt": "Execute SleepTool",
"expected_tool": "SleepTool",
"expected_params": {},
"tool_description": "",
"difficulty": "hard"
},
{
"test_id": "SleepTool_3416",
"prompt": "Execute SleepTool",
"expected_tool": "SleepTool",
"expected_params": {},
"tool_description": "",
"difficulty": "easy"
},
{
"test_id": "TaskCreateTool_6455",
"prompt": "Call TaskCreateTool",
"expected_tool": "TaskCreateTool",
"expected_params": {},
"tool_description": "",
"difficulty": "medium"
},
{
"test_id": "TaskCreateTool_1883",
"prompt": "Use TaskCreateTool to do something",
"expected_tool": "TaskCreateTool",
"expected_params": {},
"tool_description": "",
"difficulty": "easy"
},
{
"test_id": "TaskCreateTool_4722",
"prompt": "Call TaskCreateTool",
"expected_tool": "TaskCreateTool",
"expected_params": {},
"tool_description": "",
"difficulty": "hard"
},
{
"test_id": "TaskCreateTool_5107",
"prompt": "Use TaskCreateTool to do something",
"expected_tool": "TaskCreateTool",
"expected_params": {},
"tool_description": "",
"difficulty": "hard"
},
{
"test_id": "TaskCreateTool_9309",
"prompt": "Call TaskCreateTool",
"expected_tool": "TaskCreateTool",
"expected_params": {},
"tool_description": "",
"difficulty": "easy"
},
{
"test_id": "TaskGetTool_2106",
"prompt": "Call TaskGetTool",
"expected_tool": "TaskGetTool",
"expected_params": {},
"tool_description": "",
"difficulty": "hard"
},
{
"test_id": "TaskGetTool_7353",
"prompt": "Execute TaskGetTool",
"expected_tool": "TaskGetTool",
"expected_params": {},
"tool_description": "",
"difficulty": "hard"
},
{
"test_id": "TaskGetTool_5818",
"prompt": "Execute TaskGetTool",
"expected_tool": "TaskGetTool",
"expected_params": {},
"tool_description": "",
"difficulty": "medium"
},
{
"test_id": "TaskGetTool_8987",
"prompt": "Call TaskGetTool",
"expected_tool": "TaskGetTool",
"expected_params": {},
"tool_description": "",
"difficulty": "hard"
},
{
"test_id": "TaskGetTool_1243",
"prompt": "Call TaskGetTool",
"expected_tool": "TaskGetTool",
"expected_params": {},
"tool_description": "",
"difficulty": "medium"
},
{
"test_id": "TaskListTool_2131",
"prompt": "Use TaskListTool to do something",
"expected_tool": "TaskListTool",
"expected_params": {},
"tool_description": "",
"difficulty": "hard"
},
{
"test_id": "TaskListTool_6609",
"prompt": "Call TaskListTool",
"expected_tool": "TaskListTool",
"expected_params": {},
"tool_description": "",
"difficulty": "medium"
},
{
"test_id": "TaskListTool_8918",
"prompt": "Use TaskListTool to do something",
"expected_tool": "TaskListTool",
"expected_params": {},
"tool_description": "",
"difficulty": "hard"
},
{
"test_id": "TaskListTool_3153",
"prompt": "Execute TaskListTool",
"expected_tool": "TaskListTool",
"expected_params": {},
"tool_description": "",
"difficulty": "easy"
},
{
"test_id": "TaskListTool_9305",
"prompt": "Use TaskListTool to do something",
"expected_tool": "TaskListTool",
"expected_params": {},
"tool_description": "",
"difficulty": "medium"
},
{
"test_id": "TaskOutputTool_9136",
"prompt": "Use TaskOutputTool to do something",
"expected_tool": "TaskOutputTool",
"expected_params": {},
"tool_description": "",
"difficulty": "easy"
},
{
"test_id": "TaskOutputTool_6266",
"prompt": "Use TaskOutputTool to do something",
"expected_tool": "TaskOutputTool",
"expected_params": {},
"tool_description": "",
"difficulty": "hard"
},
{
"test_id": "TaskOutputTool_1758",
"prompt": "Call TaskOutputTool",
"expected_tool": "TaskOutputTool",
"expected_params": {},
"tool_description": "",
"difficulty": "hard"
},
{
"test_id": "TaskOutputTool_5708",
"prompt": "Call TaskOutputTool",
"expected_tool": "TaskOutputTool",
"expected_params": {},
"tool_description": "",
"difficulty": "medium"
},
{
"test_id": "TaskOutputTool_6261",
"prompt": "Execute TaskOutputTool",
"expected_tool": "TaskOutputTool",
"expected_params": {},
"tool_description": "",
"difficulty": "medium"
},
{
"test_id": "TaskStopTool_1356",
"prompt": "Call TaskStopTool",
"expected_tool": "TaskStopTool",
"expected_params": {},
"tool_description": "",
"difficulty": "easy"
},
{
"test_id": "TaskStopTool_9732",
"prompt": "Execute TaskStopTool",
"expected_tool": "TaskStopTool",
"expected_params": {},
"tool_description": "",
"difficulty": "medium"
},
{
"test_id": "TaskStopTool_5560",
"prompt": "Call TaskStopTool",
"expected_tool": "TaskStopTool",
"expected_params": {},
"tool_description": "",
"difficulty": "easy"
},
{
"test_id": "TaskStopTool_1508",
"prompt": "Call TaskStopTool",
"expected_tool": "TaskStopTool",
"expected_params": {},
"tool_description": "",
"difficulty": "medium"
},
{
"test_id": "TaskStopTool_1536",
"prompt": "Use TaskStopTool to do something",
"expected_tool": "TaskStopTool",
"expected_params": {},
"tool_description": "",
"difficulty": "easy"
},
{
"test_id": "TaskUpdateTool_4080",
"prompt": "Execute TaskUpdateTool",
"expected_tool": "TaskUpdateTool",
"expected_params": {},
"tool_description": "",
"difficulty": "hard"
},
{
"test_id": "TaskUpdateTool_8394",
"prompt": "Execute TaskUpdateTool",
"expected_tool": "TaskUpdateTool",
"expected_params": {},
"tool_description": "",
"difficulty": "hard"
},
{
"test_id": "TaskUpdateTool_6087",
"prompt": "Call TaskUpdateTool",
"expected_tool": "TaskUpdateTool",
"expected_params": {},
"tool_description": "",
"difficulty": "hard"
},
{
"test_id": "TaskUpdateTool_9395",
"prompt": "Use TaskUpdateTool to do something",
"expected_tool": "TaskUpdateTool",
"expected_params": {},
"tool_description": "",
"difficulty": "easy"
},
{
"test_id": "TaskUpdateTool_5167",
"prompt": "Call TaskUpdateTool",
"expected_tool": "TaskUpdateTool",
"expected_params": {},
"tool_description": "",
"difficulty": "medium"
},
{
"test_id": "TeamCreateTool_9102",
"prompt": "Use TeamCreateTool to do something",
"expected_tool": "TeamCreateTool",
"expected_params": {},
"tool_description": "",
"difficulty": "medium"
},
{
"test_id": "TeamCreateTool_9269",
"prompt": "Call TeamCreateTool",
"expected_tool": "TeamCreateTool",
"expected_params": {},
"tool_description": "",
"difficulty": "hard"
},
{
"test_id": "TeamCreateTool_8424",
"prompt": "Use TeamCreateTool to do something",
"expected_tool": "TeamCreateTool",
"expected_params": {},
"tool_description": "",
"difficulty": "easy"
},
{
"test_id": "TeamCreateTool_8193",
"prompt": "Use TeamCreateTool to do something",
"expected_tool": "TeamCreateTool",
"expected_params": {},
"tool_description": "",
"difficulty": "hard"
},
{
"test_id": "TeamCreateTool_5576",
"prompt": "Call TeamCreateTool",
"expected_tool": "TeamCreateTool",
"expected_params": {},
"tool_description": "",
"difficulty": "easy"
},
{
"test_id": "TeamDeleteTool_2955",
"prompt": "Execute TeamDeleteTool",
"expected_tool": "TeamDeleteTool",
"expected_params": {},
"tool_description": "",
"difficulty": "medium"
},
{
"test_id": "TeamDeleteTool_6029",
"prompt": "Use TeamDeleteTool to do something",
"expected_tool": "TeamDeleteTool",
"expected_params": {},
"tool_description": "",
"difficulty": "medium"
},
{
"test_id": "TeamDeleteTool_6039",
"prompt": "Execute TeamDeleteTool",
"expected_tool": "TeamDeleteTool",
"expected_params": {},
"tool_description": "",
"difficulty": "hard"
},
{
"test_id": "TeamDeleteTool_4346",
"prompt": "Use TeamDeleteTool to do something",
"expected_tool": "TeamDeleteTool",
"expected_params": {},
"tool_description": "",
"difficulty": "hard"
},
{
"test_id": "TeamDeleteTool_7920",
"prompt": "Call TeamDeleteTool",
"expected_tool": "TeamDeleteTool",
"expected_params": {},
"tool_description": "",
"difficulty": "hard"
},
{
"test_id": "TodoWriteTool_8435",
"prompt": "Use TodoWriteTool to do something",
"expected_tool": "TodoWriteTool",
"expected_params": {},
"tool_description": "",
"difficulty": "easy"
},
{
"test_id": "TodoWriteTool_4402",
"prompt": "Use TodoWriteTool to do something",
"expected_tool": "TodoWriteTool",
"expected_params": {},
"tool_description": "",
"difficulty": "easy"
},
{
"test_id": "TodoWriteTool_7554",
"prompt": "Execute TodoWriteTool",
"expected_tool": "TodoWriteTool",
"expected_params": {},
"tool_description": "",
"difficulty": "medium"
},
{
"test_id": "TodoWriteTool_3137",
"prompt": "Use TodoWriteTool to do something",
"expected_tool": "TodoWriteTool",
"expected_params": {},
"tool_description": "",
"difficulty": "easy"
},
{
"test_id": "TodoWriteTool_5772",
"prompt": "Call TodoWriteTool",
"expected_tool": "TodoWriteTool",
"expected_params": {},
"tool_description": "",
"difficulty": "medium"
},
{
"test_id": "ToolSearchTool_4685",
"prompt": "Call ToolSearchTool",
"expected_tool": "ToolSearchTool",
"expected_params": {},
"tool_description": "Check if a tool should be deferred (requires ToolSearch to load).\nA tool is deferred if:\n- It's an MCP tool (always deferred - workflow-specific)\n- It has shouldDefer: true\n\nA tool is NEVER deferred if it has alwaysLoad: true (MCP tools set this via\n_meta['anthropic/alwaysLoad']). This check runs fi",
"difficulty": "easy"
},
{
"test_id": "ToolSearchTool_8253",
"prompt": "Use ToolSearchTool to do something",
"expected_tool": "ToolSearchTool",
"expected_params": {},
"tool_description": "Check if a tool should be deferred (requires ToolSearch to load).\nA tool is deferred if:\n- It's an MCP tool (always deferred - workflow-specific)\n- It has shouldDefer: true\n\nA tool is NEVER deferred if it has alwaysLoad: true (MCP tools set this via\n_meta['anthropic/alwaysLoad']). This check runs fi",
"difficulty": "medium"
},
{
"test_id": "ToolSearchTool_2353",
"prompt": "Call ToolSearchTool",
"expected_tool": "ToolSearchTool",
"expected_params": {},
"tool_description": "Check if a tool should be deferred (requires ToolSearch to load).\nA tool is deferred if:\n- It's an MCP tool (always deferred - workflow-specific)\n- It has shouldDefer: true\n\nA tool is NEVER deferred if it has alwaysLoad: true (MCP tools set this via\n_meta['anthropic/alwaysLoad']). This check runs fi",
"difficulty": "easy"
},
{
"test_id": "ToolSearchTool_5736",
"prompt": "Execute ToolSearchTool",
"expected_tool": "ToolSearchTool",
"expected_params": {},
"tool_description": "Check if a tool should be deferred (requires ToolSearch to load).\nA tool is deferred if:\n- It's an MCP tool (always deferred - workflow-specific)\n- It has shouldDefer: true\n\nA tool is NEVER deferred if it has alwaysLoad: true (MCP tools set this via\n_meta['anthropic/alwaysLoad']). This check runs fi",
"difficulty": "easy"
},
{
"test_id": "ToolSearchTool_8159",
"prompt": "Call ToolSearchTool",
"expected_tool": "ToolSearchTool",
"expected_params": {},
"tool_description": "Check if a tool should be deferred (requires ToolSearch to load).\nA tool is deferred if:\n- It's an MCP tool (always deferred - workflow-specific)\n- It has shouldDefer: true\n\nA tool is NEVER deferred if it has alwaysLoad: true (MCP tools set this via\n_meta['anthropic/alwaysLoad']). This check runs fi",
"difficulty": "hard"
},
{
"test_id": "WebFetchTool_8507",
"prompt": "Execute WebFetchTool",
"expected_tool": "WebFetchTool",
"expected_params": {},
"tool_description": "",
"difficulty": "medium"
},
{
"test_id": "WebFetchTool_2518",
"prompt": "Use WebFetchTool to do something",
"expected_tool": "WebFetchTool",
"expected_params": {},
"tool_description": "",
"difficulty": "medium"
},
{
"test_id": "WebFetchTool_7285",
"prompt": "Use WebFetchTool to do something",
"expected_tool": "WebFetchTool",
"expected_params": {},
"tool_description": "",
"difficulty": "medium"
},
{
"test_id": "WebFetchTool_4143",
"prompt": "Execute WebFetchTool",
"expected_tool": "WebFetchTool",
"expected_params": {},
"tool_description": "",
"difficulty": "hard"
},
{
"test_id": "WebFetchTool_2209",
"prompt": "Call WebFetchTool",
"expected_tool": "WebFetchTool",
"expected_params": {},
"tool_description": "",
"difficulty": "hard"
},
{
"test_id": "WebSearchTool_5308",
"prompt": "Use WebSearchTool to do something",
"expected_tool": "WebSearchTool",
"expected_params": {},
"tool_description": "",
"difficulty": "hard"
},
{
"test_id": "WebSearchTool_7978",
"prompt": "Use WebSearchTool to do something",
"expected_tool": "WebSearchTool",
"expected_params": {},
"tool_description": "",
"difficulty": "easy"
},
{
"test_id": "WebSearchTool_4077",
"prompt": "Use WebSearchTool to do something",
"expected_tool": "WebSearchTool",
"expected_params": {},
"tool_description": "",
"difficulty": "hard"
},
{
"test_id": "WebSearchTool_8521",
"prompt": "Use WebSearchTool to do something",
"expected_tool": "WebSearchTool",
"expected_params": {},
"tool_description": "",
"difficulty": "medium"
},
{
"test_id": "WebSearchTool_5236",
"prompt": "Execute WebSearchTool",
"expected_tool": "WebSearchTool",
"expected_params": {},
"tool_description": "",
"difficulty": "hard"
}
]