EvoPlatformV3 / inference.py
HemanM's picture
Update inference.py
0a6b186 verified
# inference.py
import os
import csv
import random
import torch
import pandas as pd
import psutil
import platform
import GPUtil
import torch.nn.functional as F
from transformers import AutoTokenizer
from evo_model import EvoTransformerV22
from evo_architecture import (
build_model_from_config,
mutate_genome,
log_genome,
save_best_genome,
load_best_genome
)
import openai
# πŸ” API Key
openai.api_key = os.getenv("OPENAI_API_KEY", "sk-...")
# πŸ“¦ Setup
tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# πŸ” Current genome state
current_config = load_best_genome()
model = build_model_from_config(current_config).to(device)
model.eval()
FEEDBACK_LOG = "feedback_log.csv"
# 🧠 Evo prediction
def evo_chat_predict(history, question, options):
inputs = [f"{question} {opt}" for opt in options]
enc = tokenizer(inputs, padding=True, truncation=True, max_length=128, return_tensors="pt").to(device)
with torch.no_grad():
logits = model(enc["input_ids"])
probs = torch.sigmoid(logits).squeeze().tolist()
best_idx = int(torch.argmax(torch.tensor(probs)))
reasoning = f"{options[0]}: {probs[0]:.3f} vs {options[1]}: {probs[1]:.3f}"
return {
"answer": options[best_idx],
"confidence": round(probs[best_idx], 3),
"reasoning": reasoning,
"context_used": question
}
# πŸ€– GPT response
def get_gpt_response(prompt):
try:
client = openai.OpenAI()
response = client.chat.completions.create(
model="gpt-3.5-turbo",
messages=[{"role": "user", "content": prompt}]
)
return response.choices[0].message.content.strip()
except Exception as e:
return f"(GPT Error) {e}"
# πŸ“Š Genome stats
def get_model_config():
return {
"num_layers": current_config.get("num_layers", "?"),
"num_heads": current_config.get("num_heads", "?"),
"ffn_dim": current_config.get("ffn_dim", "?"),
"memory_enabled": current_config.get("memory_enabled", "?"),
"accuracy": current_config.get("accuracy", "N/A")
}
# πŸ–₯️ System info
def get_system_stats():
mem = psutil.virtual_memory()
cpu = psutil.cpu_percent()
try:
gpus = GPUtil.getGPUs()
gpu = gpus[0] if gpus else None
gpu_name = gpu.name if gpu else "N/A"
gpu_mem_used = round(gpu.memoryUsed / 1024, 2) if gpu else 0
gpu_mem_total = round(gpu.memoryTotal / 1024, 2) if gpu else 0
except:
gpu_name, gpu_mem_used, gpu_mem_total = "N/A", 0, 0
return {
"device": device.type,
"cpu_usage_percent": cpu,
"memory_used_gb": round(mem.used / 1024**3, 2),
"memory_total_gb": round(mem.total / 1024**3, 2),
"gpu_name": gpu_name,
"gpu_memory_used_gb": gpu_mem_used,
"gpu_memory_total_gb": gpu_mem_total,
"platform": platform.platform()
}
# πŸ” Evo retrain logic
def retrain_from_feedback_csv():
global current_config, model
if not os.path.exists(FEEDBACK_LOG):
return "⚠️ No feedback log found."
df = pd.read_csv(FEEDBACK_LOG)
if df.empty or "vote" not in df.columns:
return "⚠️ No usable feedback data. Please vote on Evo or GPT."
df = df[df["vote"].isin(["Evo", "GPT"])]
if df.empty:
return "⚠️ No usable feedback data. Please vote on Evo or GPT."
data = []
for _, row in df.iterrows():
label = 1 if row["vote"] == "Evo" else 0
text = f"{row['question']} {row['option1']} {row['option2']}"
data.append((text, label))
if not data:
return "⚠️ No usable feedback data."
new_config = mutate_genome(current_config)
model = build_model_from_config(new_config).to(device)
current_config = new_config
log_genome(new_config)
# πŸ” Fine-tune
model.train()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)
for epoch in range(3):
random.shuffle(data)
for text, label in data:
enc = tokenizer(text, padding="max_length", truncation=True, max_length=128, return_tensors="pt").to(device)
input_ids = enc["input_ids"]
label_tensor = torch.tensor([label], dtype=torch.float32).to(device)
logits = model(input_ids).squeeze(1)
loss = F.binary_cross_entropy_with_logits(logits, label_tensor)
optimizer.zero_grad()
loss.backward()
optimizer.step()
model.eval()
save_best_genome({**new_config, "accuracy": "Live-Finetuned"})
return f"βœ… Evo retrained on {len(data)} feedback entries."
# πŸ”„ Reload trigger
def load_model(force_reload=False):
global model
model.eval()