AI-Trainer / ai_server_admin /server /ai_server_admin_environment.py
V1vex's picture
first-commit
625b444
import os, json, random, requests
from uuid import uuid4
from openenv.core.env_server.interfaces import Environment
from openenv.core.env_server.types import State
try:
from ..models import AiServerAdminAction, AiServerAdminObservation
except ImportError:
from models import AiServerAdminAction, AiServerAdminObservation
class AiServerAdminEnvironment(Environment):
SUPPORTS_CONCURRENT_SESSIONS: bool = True
def __init__(self):
self._state = State(episode_id=str(uuid4()), step_count=0)
self.current_task = None
tasks_path = os.path.join(os.path.dirname(__file__), "..", "tasks.json")
try:
with open(tasks_path, "r") as f: self.tasks = json.load(f)
except Exception:
with open("tasks.json", "r") as f: self.tasks = json.load(f)
def reset(self) -> AiServerAdminObservation:
self._state = State(episode_id=str(uuid4()), step_count=0)
self.current_task = random.choice(self.tasks)
return AiServerAdminObservation(
echoed_message=f"[NEW TASK]: {self.current_task['prompt']}",
message_length=0, done=False, reward=0.0
)
def step(self, action: AiServerAdminAction) -> AiServerAdminObservation:
self._state.step_count += 1
agent_answer = action.message
reward = self._judge_code(agent_answer)
return AiServerAdminObservation(
echoed_message="Evaluation Complete.",
message_length=len(agent_answer), done=True, reward=reward,
)
def _judge_code(self, agent_answer: str) -> float:
openai_key = os.environ.get("OPENAI_API_KEY", "")
if not openai_key: return 0.5
headers = {"Authorization": f"Bearer {openai_key}", "Content-Type": "application/json"}
payload = {
"model": "gpt-4o-mini",
"messages": [
{"role": "system", "content": "You are a strict AI Judge. Evaluate the answer based on the rubric. Output ONLY a single float number between 0.0 and 1.0. No extra text."},
{"role": "user", "content": f"Task: {self.current_task['prompt']}\nRubric: {self.current_task['rubric']}\nAgent Answer: {agent_answer}"}
]
}
try:
resp = requests.post("https://api.openai.com/v1/chat/completions", json=payload, headers=headers)
resp.raise_for_status()
score_str = resp.json()["choices"][0]["message"]["content"].strip()
return min(max(float(score_str), 0.0), 1.0)
except Exception:
return 0.0
@property
def state(self) -> State: return self._state