Spaces:
Runtime error
Runtime error
| import os | |
| import json | |
| import argparse | |
| from openai import OpenAI | |
| from core.environment import EmailOpsEnv | |
| from core.models import Action | |
| # Mandatory environment variables with defaults per OpenEnv spec | |
| API_BASE_URL = os.getenv("API_BASE_URL", "https://api.openai.com/v1") | |
| MODEL_NAME = os.getenv("MODEL_NAME", "gpt-4o-mini") | |
| HF_TOKEN = os.getenv("HF_TOKEN") # No default for token | |
| def run_baseline(api_key: str, model_name: str, base_url: str): | |
| client = OpenAI(api_key=api_key, base_url=base_url) | |
| env = EmailOpsEnv() | |
| tasks = ["easy", "medium", "hard"] | |
| print(f"Running baseline on model: {model_name}") | |
| print("=" * 40) | |
| for task_name in tasks: | |
| # START: Structured logging for OpenEnv automated grading | |
| print(f"START: {task_name}") | |
| obs = env.reset(task_name) | |
| step_count = 0 | |
| max_steps = 15 | |
| is_done = False | |
| total_reward = 0.0 | |
| while not is_done and step_count < max_steps: | |
| system_prompt = ( | |
| "You are an intelligent email operations agent. " | |
| f"Your current goal is: {env.task.description}\n" | |
| "You must perform actions to achieve this goal. Once you are finished, output the 'submit' action.\n" | |
| "Available action types:\n" | |
| " - open_email (requires email_id)\n" | |
| " - close_email\n" | |
| " - move_email (requires email_id, folder_name)\n" | |
| " - reply (requires email_id, reply_body)\n" | |
| " - delete_email (requires email_id)\n" | |
| " - flag_email (requires email_id)\n" | |
| " - submit" | |
| ) | |
| try: | |
| response = client.beta.chat.completions.parse( | |
| model=model_name, | |
| messages=[ | |
| {"role": "system", "content": system_prompt}, | |
| {"role": "user", "content": f"Current Observation:\n{obs.model_dump_json(indent=2)}\nWhat is your next action?"} | |
| ], | |
| response_format=Action, | |
| temperature=0.1 | |
| ) | |
| action = response.choices[0].message.parsed | |
| if not action: | |
| break | |
| # STEP: Structured logging for OpenEnv automated grading | |
| print(f"STEP: {action.model_dump_json()}") | |
| obs, reward, is_done, metrics = env.step(action) | |
| total_reward = reward | |
| if action.action_type == "submit": | |
| break | |
| except Exception as e: | |
| print(f"Error during inference: {e}") | |
| break | |
| step_count += 1 | |
| # END: Structured logging for OpenEnv automated grading | |
| result = { | |
| "task": task_name, | |
| "steps": step_count, | |
| "reward": total_reward, | |
| "metrics": env.metrics | |
| } | |
| print(f"END: {json.dumps(result)}") | |
| print("-" * 40) | |
| if __name__ == "__main__": | |
| parser = argparse.ArgumentParser() | |
| # Prioritizing environment variables as per requirements | |
| parser.add_argument("--api-key", type=str, default=HF_TOKEN) | |
| parser.add_argument("--model", type=str, default=MODEL_NAME) | |
| parser.add_argument("--base-url", type=str, default=API_BASE_URL) | |
| args = parser.parse_args() | |
| # HF_TOKEN is mandatory for automated submissions | |
| if not args.api_key: | |
| print("Please set HF_TOKEN environment variable.") | |
| exit(1) | |
| run_baseline(args.api_key, args.model, args.base_url) | |