{ "time": "2025-01-09 17:13:45", "results": { "IO": { "META": { "Algorithm": "IO", "LLM": "gpt-3.5-turbo", "Eval Date": "2025/01/07" }, "gsm8k": { "Score": 37.83, "Cost($)": 0.3328 }, "AQuA": { "Score": 38.98, "Cost($)": 0.0380 } }, "COT": { "META": { "Algorithm": "COT", "LLM": "gpt-3.5-turbo", "Eval Date": "2025/01/07" }, "gsm8k": { "Score": 78.70, "Cost($)": 0.6788 }, "AQuA": { "Score": 61.02, "Cost($)": 0.0957 } }, "SC-COT": { "META": { "Algorithm": "SC-COT", "LLM": "gpt-3.5-turbo", "Eval Date": "2025/01/07" }, "gsm8k": { "Score": 80.06, "Cost($)": 5.0227 }, "AQuA": { "Score": 67.32, "Cost($)": 0.6491 } }, "POT": { "META": { "Algorithm": "POT", "LLM": "gpt-3.5-turbo", "Eval Date": "2025/01/07" }, "gsm8k": { "Score": 76.88, "Cost($)": 0.6902 }, "AQuA": { "Score": 51.97, "Cost($)": 0.1557 } }, "ReAct-Pro*": { "META": { "Algorithm": "ReAct-Pro*", "LLM": "gpt-3.5-turbo", "Eval Date": "2025/01/07" }, "gsm8k": { "Score": 74.91, "Cost($)": 3.4633 }, "AQuA": { "Score": 64.57, "Cost($)": 0.4928 } }, "IO-Doubao": { "META": { "Algorithm": "IO", "LLM": "Doubao-lite-32k", "Eval Date": "2025/01/07" }, "gsm8k": { "Score": 72.02, "Cost($)": 0.0354 }, "AQuA": { "Score": 79.13, "Cost($)": 0.0058 } }, "COT-Doubao": { "META": { "Algorithm": "COT", "LLM": "Doubao-lite-32k", "Eval Date": "2025/01/07" }, "gsm8k": { "Score": 89.31, "Cost($)": 0.0557 }, "AQuA": { "Score": 82.68, "Cost($)": 0.0066 } }, "SC-COT-Doubao": { "META": { "Algorithm": "SC-COT", "LLM": "Doubao-lite-32k", "Eval Date": "2025/01/07" }, "gsm8k": { "Score": 88.63, "Cost($)": 0.1533 }, "AQuA": { "Score": 83.46, "Cost($)": 0.0409 } }, "POT-Doubao": { "META": { "Algorithm": "POT", "LLM": "Doubao-lite-32k", "Eval Date": "2025/01/07" }, "gsm8k": { "Score": 79.15, "Cost($)": 0.0575 }, "AQuA": { "Score": 52.36, "Cost($)": 0.0142 } }, "ReAct-Pro-Doubao": { "META": { "Algorithm": "ReAct-Pro", "LLM": "Doubao-lite-32k", "Eval Date": "2025/01/07" }, "gsm8k": { "Score": 85.60, "Cost($)": 0.2513 }, "AQuA": { "Score": 77.56, "Cost($)": 0.0446 } } } }