Spaces:
Runtime error
Runtime error
import os | |
# from benchmark.run_benchmark import run_benchmark | |
def run_benchmark_main( | |
tool_name, | |
model_name, | |
num_questions, | |
openai_api_key, | |
anthropic_api_key, | |
openrouter_api_key, | |
): | |
"""Run the benchmark using the provided function and API key.""" | |
print("Running benchmark for the provided api keys") | |
# Empyt the results directory | |
os.system("rm -rf results/*") | |
# Set the benchmark parameters | |
kwargs = {} | |
if not num_questions: | |
num_questions = 10 | |
kwargs["num_questions"] = num_questions | |
kwargs["tools"] = [tool_name] | |
if model_name: | |
kwargs["model"] = [model_name] | |
kwargs["api_keys"] = {} | |
if openai_api_key: | |
kwargs["api_keys"]["openai"] = openai_api_key | |
if anthropic_api_key: | |
kwargs["api_keys"]["anthropic"] = anthropic_api_key | |
if openrouter_api_key: | |
kwargs["api_keys"]["openrouter"] = openrouter_api_key | |
if "gpt" in model_name: | |
kwargs["llm_provider"] = "openai" | |
elif "claude" in model_name: | |
kwargs["llm_provider"] = "anthropic" | |
else: | |
kwargs["llm_provider"] = "openrouter" | |
if ( | |
tool_name == "prediction-request-reasoning" | |
or tool_name == "prediction-request-rag" | |
): | |
if not openai_api_key: | |
return f"Error: Tools that use RAG also require an OpenAI API Key" | |
kwargs["num_urls"] = 3 | |
kwargs["num_words"] = 300 | |
kwargs["provide_source_links"] = True | |
print(f"Running benchmark") | |
# Run the benchmark | |
try: | |
# run_benchmark(kwargs=kwargs) | |
return "completed" | |
except Exception as e: | |
return f"Error running benchmark: {e}" | |