Spaces:
Runtime error
Runtime error
| # file: 03_run_evaluation.py | |
| import os | |
| from dotenv import load_dotenv | |
| from langsmith import Client | |
| from langchain_openai import ChatOpenAI | |
| from agent_graph_factory import create_graph_app | |
| OPENROUTER_API_URL = "https://openrouter.ai/api/v1" | |
| def main(): | |
| """Main function to run A/B test evaluations on our agent.""" | |
| load_dotenv() | |
| client = Client() | |
| # Define the models we want to A/B test: Premium vs. Free | |
| models_to_test = [ | |
| "anthropic/claude-sonnet-4", | |
| "qwen/qwen3-coder" | |
| ] | |
| dataset_name = "word-count-golden-set" | |
| print(f"--- Running Evaluations on Dataset: {dataset_name} ---") | |
| for model_name in models_to_test: | |
| print(f"--- Testing Model: {model_name} ---") | |
| llm = ChatOpenAI( | |
| model=model_name, | |
| temperature=0, | |
| openai_api_key=os.getenv("OPENROUTER_API_KEY"), | |
| openai_api_base=OPENROUTER_API_URL, | |
| ) | |
| app = create_graph_app(llm) | |
| # Sanitize the model name for use as a project name | |
| sanitized_model_name = model_name.replace('/', '-').replace(':', '_') | |
| project_name = f"test-{sanitized_model_name}" | |
| client.run_on_dataset( | |
| dataset_name=dataset_name, | |
| llm_or_chain_factory=lambda: app, | |
| project_name=project_name, | |
| ) | |
| print(f"--- Test complete. Results in project: {project_name} ---") | |
| if __name__ == "__main__": | |
| main() |