Spaces:
Running
Running
| spec_version: 1 | |
| name: financial_task_env | |
| type: space | |
| runtime: fastapi | |
| app: server.app:app | |
| port: 8000 | |
| tasks: | |
| - id: task_1 | |
| name: Count Plants in Spreadsheet | |
| difficulty: easy | |
| max_steps: 15 | |
| grader: | |
| type: programmatic | |
| description: "QA grading β extracts numbers from agent answer, compares against reference (85). Score 0.001β0.999 based on numeric match with 5% tolerance." | |
| - id: task_2 | |
| name: Retrieve TW EOL Charge | |
| difficulty: easy | |
| max_steps: 15 | |
| grader: | |
| type: programmatic | |
| description: "QA grading β extracts numbers from agent answer, compares against reference (113291). Score 0.001β0.999 based on numeric match with 5% tolerance." | |
| - id: task_3 | |
| name: Portfolio Mark-to-Market Change | |
| difficulty: easy | |
| max_steps: 15 | |
| grader: | |
| type: programmatic | |
| description: "QA grading β extracts numbers from agent answer, compares against reference ($1,989,600 and 27.9%). Score 0.001β0.999 based on numeric match + keyword overlap." | |
| - id: task_4 | |
| name: Summarize Pipeline Imbalances | |
| difficulty: medium | |
| max_steps: 15 | |
| grader: | |
| type: programmatic | |
| description: "MODIFY grading β compares agent output xlsx cell-by-cell against reference workbook. 30% sheet-name match + 70% cell-level match (2% numeric tolerance). Score 0.001β0.999." | |
| - id: task_5 | |
| name: Audit and Correct Formula Errors | |
| difficulty: medium | |
| max_steps: 15 | |
| grader: | |
| type: programmatic | |
| description: "MODIFY grading β compares agent output xlsx cell-by-cell against reference workbook. 30% sheet-name match + 70% cell-level match (2% numeric tolerance). Score 0.001β0.999." | |
| - id: task_6 | |
| name: Create Table and Apply Filter | |
| difficulty: medium | |
| max_steps: 15 | |
| grader: | |
| type: programmatic | |
| description: "MODIFY grading β compares agent output xlsx cell-by-cell against reference workbook. 30% sheet-name match + 70% cell-level match (2% numeric tolerance). Score 0.001β0.999." | |
| - id: task_7 | |
| name: Add Weekday Row and Data Entry | |
| difficulty: medium | |
| max_steps: 15 | |
| grader: | |
| type: programmatic | |
| description: "MODIFY grading β compares agent output xlsx cell-by-cell against reference workbook. 30% sheet-name match + 70% cell-level match (2% numeric tolerance). Score 0.001β0.999." | |
| - id: task_8 | |
| name: Balance Sheet Validation and Indicators | |
| difficulty: hard | |
| max_steps: 15 | |
| grader: | |
| type: programmatic | |
| description: "MODIFY grading β compares agent output xlsx cell-by-cell against reference workbook. 30% sheet-name match + 70% cell-level match (2% numeric tolerance). Score 0.001β0.999." | |
| - id: task_9 | |
| name: Create Scenario3 Worksheet | |
| difficulty: hard | |
| max_steps: 15 | |
| grader: | |
| type: programmatic | |
| description: "MODIFY grading β compares agent output xlsx cell-by-cell against reference workbook. 30% sheet-name match + 70% cell-level match (2% numeric tolerance). Score 0.001β0.999." | |
| - id: task_10 | |
| name: Consolidate by Type and Area | |
| difficulty: hard | |
| max_steps: 15 | |
| grader: | |
| type: programmatic | |
| description: "MODIFY grading β compares agent output xlsx cell-by-cell against reference workbook. 30% sheet-name match + 70% cell-level match (2% numeric tolerance). Score 0.001β0.999." | |