Upload report_generation.jsonl
Browse files- report_generation.jsonl +1 -1
report_generation.jsonl
CHANGED
|
@@ -61,4 +61,4 @@
|
|
| 61 |
{"Model": "MiniMax/MiniMax-M2.5 (Thinking)", "Category": "Open-weight Reasoning", "Overall": 42.2, "Physics": 29.6, "Chemistry": 50.5, "Finance": 24.8, "Consulting": 63.8, "Extraction": 30.2, "Reasoning": 41.5, "Style": 62.6, "Response Characters": 15835, "Input Tokens": 484, "Output Tokens": 20003, "Cost": 3.86}
|
| 62 |
{"Model": "MiniMax/MiniMax-M2.5", "Category": "Open-weight Instruct", "Overall": 40.0, "Physics": 29.6, "Chemistry": 43.9, "Finance": 22.8, "Consulting": 63.5, "Extraction": 28.9, "Reasoning": 39.8, "Style": 53.6, "Response Characters": 5547, "Input Tokens": 484, "Output Tokens": 18356, "Cost": 3.55}
|
| 63 |
{"Model": "Anthropic/claude-sonnet-4.6 (Thinking)", "Category": "Closed-source Reasoning", "Overall": 55.6, "Physics": 41.8, "Chemistry": 65.4, "Finance": 48.1, "Consulting": 67.3, "Extraction": 49.6, "Reasoning": 56.4, "Style": 55.0, "Response Characters": 19002, "Input Tokens": 531, "Output Tokens": 7205, "Cost": 17.55}
|
| 64 |
-
{"Model": "Google/Gemini-3.1-Pro-Preview", "Category": "Closed-source Reasoning", "Overall":
|
|
|
|
| 61 |
{"Model": "MiniMax/MiniMax-M2.5 (Thinking)", "Category": "Open-weight Reasoning", "Overall": 42.2, "Physics": 29.6, "Chemistry": 50.5, "Finance": 24.8, "Consulting": 63.8, "Extraction": 30.2, "Reasoning": 41.5, "Style": 62.6, "Response Characters": 15835, "Input Tokens": 484, "Output Tokens": 20003, "Cost": 3.86}
|
| 62 |
{"Model": "MiniMax/MiniMax-M2.5", "Category": "Open-weight Instruct", "Overall": 40.0, "Physics": 29.6, "Chemistry": 43.9, "Finance": 22.8, "Consulting": 63.5, "Extraction": 28.9, "Reasoning": 39.8, "Style": 53.6, "Response Characters": 5547, "Input Tokens": 484, "Output Tokens": 18356, "Cost": 3.55}
|
| 63 |
{"Model": "Anthropic/claude-sonnet-4.6 (Thinking)", "Category": "Closed-source Reasoning", "Overall": 55.6, "Physics": 41.8, "Chemistry": 65.4, "Finance": 48.1, "Consulting": 67.3, "Extraction": 49.6, "Reasoning": 56.4, "Style": 55.0, "Response Characters": 19002, "Input Tokens": 531, "Output Tokens": 7205, "Cost": 17.55}
|
| 64 |
+
{"Model": "Google/Gemini-3.1-Pro-Preview", "Category": "Closed-source Reasoning", "Overall": 55.2, "Physics": 41.3, "Chemistry": 71.5, "Finance": 36.3, "Consulting": 71.7, "Extraction": 49.9, "Reasoning": 55.3, "Style": 66.8, "Response Characters": 5358, "Input Tokens": 479, "Output Tokens": 12724, "Cost": 24.58}
|