leaderboard-docker / tabs /run_benchmark.py
arshy's picture
initial commit
52938ea
raw
history blame
1.2 kB
import os
import importlib
import sys
# from pathlib import Path
from benchmark.run_benchmark import run_benchmark
# from dotenv import load_dotenv
# file_path = Path(__file__).resolve()
# parent_path = file_path.parent.parent
# load_dotenv(parent_path / ".env")
def run_benchmark_main(tool_name, model_name, openai_api_key, anthropic_api_key):
"""Run the benchmark using the provided function and API key."""
# Empyt the results directory
os.system("rm -rf results/*")
# Set the benchmark parameters
kwargs = {}
kwargs["num_questions"] = 2
kwargs["tools"] = [tool_name]
if model_name:
kwargs["model"] = [model_name]
kwargs["api_keys"] = {}
if openai_api_key:
kwargs["api_keys"]["openai"] = openai_api_key
if anthropic_api_key:
kwargs["api_keys"]["anthropic"] = anthropic_api_key
kwargs["num_urls"] = 3
kwargs["num_words"] = 300
kwargs["provide_source_links"] = True
print(f"Running benchmark with the following parameters: {kwargs}")
# Run the benchmark
try:
run_benchmark(kwargs=kwargs)
return "completed"
except Exception as e:
return f"Error running benchmark: {e}"