Spaces:
Sleeping
Sleeping
# inference.py | |
import os | |
import csv | |
import random | |
import torch | |
import pandas as pd | |
import psutil | |
import platform | |
import GPUtil | |
import torch.nn.functional as F | |
from transformers import AutoTokenizer | |
from evo_model import EvoTransformerV22 | |
from evo_architecture import ( | |
build_model_from_config, | |
mutate_genome, | |
log_genome, | |
save_best_genome, | |
load_best_genome | |
) | |
import openai | |
# π API Key | |
openai.api_key = os.getenv("OPENAI_API_KEY", "sk-...") | |
# π¦ Setup | |
tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased") | |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |
# π Current genome state | |
current_config = load_best_genome() | |
model = build_model_from_config(current_config).to(device) | |
model.eval() | |
FEEDBACK_LOG = "feedback_log.csv" | |
# π§ Evo prediction | |
def evo_chat_predict(history, question, options): | |
inputs = [f"{question} {opt}" for opt in options] | |
enc = tokenizer(inputs, padding=True, truncation=True, max_length=128, return_tensors="pt").to(device) | |
with torch.no_grad(): | |
logits = model(enc["input_ids"]) | |
probs = torch.sigmoid(logits).squeeze().tolist() | |
best_idx = int(torch.argmax(torch.tensor(probs))) | |
reasoning = f"{options[0]}: {probs[0]:.3f} vs {options[1]}: {probs[1]:.3f}" | |
return { | |
"answer": options[best_idx], | |
"confidence": round(probs[best_idx], 3), | |
"reasoning": reasoning, | |
"context_used": question | |
} | |
# π€ GPT response | |
def get_gpt_response(prompt): | |
try: | |
client = openai.OpenAI() | |
response = client.chat.completions.create( | |
model="gpt-3.5-turbo", | |
messages=[{"role": "user", "content": prompt}] | |
) | |
return response.choices[0].message.content.strip() | |
except Exception as e: | |
return f"(GPT Error) {e}" | |
# π Genome stats | |
def get_model_config(): | |
return { | |
"num_layers": current_config.get("num_layers", "?"), | |
"num_heads": current_config.get("num_heads", "?"), | |
"ffn_dim": current_config.get("ffn_dim", "?"), | |
"memory_enabled": current_config.get("memory_enabled", "?"), | |
"accuracy": current_config.get("accuracy", "N/A") | |
} | |
# π₯οΈ System info | |
def get_system_stats(): | |
mem = psutil.virtual_memory() | |
cpu = psutil.cpu_percent() | |
try: | |
gpus = GPUtil.getGPUs() | |
gpu = gpus[0] if gpus else None | |
gpu_name = gpu.name if gpu else "N/A" | |
gpu_mem_used = round(gpu.memoryUsed / 1024, 2) if gpu else 0 | |
gpu_mem_total = round(gpu.memoryTotal / 1024, 2) if gpu else 0 | |
except: | |
gpu_name, gpu_mem_used, gpu_mem_total = "N/A", 0, 0 | |
return { | |
"device": device.type, | |
"cpu_usage_percent": cpu, | |
"memory_used_gb": round(mem.used / 1024**3, 2), | |
"memory_total_gb": round(mem.total / 1024**3, 2), | |
"gpu_name": gpu_name, | |
"gpu_memory_used_gb": gpu_mem_used, | |
"gpu_memory_total_gb": gpu_mem_total, | |
"platform": platform.platform() | |
} | |
# π Evo retrain logic | |
def retrain_from_feedback_csv(): | |
global current_config, model | |
if not os.path.exists(FEEDBACK_LOG): | |
return "β οΈ No feedback log found." | |
df = pd.read_csv(FEEDBACK_LOG) | |
if df.empty or "vote" not in df.columns: | |
return "β οΈ No usable feedback data. Please vote on Evo or GPT." | |
df = df[df["vote"].isin(["Evo", "GPT"])] | |
if df.empty: | |
return "β οΈ No usable feedback data. Please vote on Evo or GPT." | |
data = [] | |
for _, row in df.iterrows(): | |
label = 1 if row["vote"] == "Evo" else 0 | |
text = f"{row['question']} {row['option1']} {row['option2']}" | |
data.append((text, label)) | |
if not data: | |
return "β οΈ No usable feedback data." | |
new_config = mutate_genome(current_config) | |
model = build_model_from_config(new_config).to(device) | |
current_config = new_config | |
log_genome(new_config) | |
# π Fine-tune | |
model.train() | |
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4) | |
for epoch in range(3): | |
random.shuffle(data) | |
for text, label in data: | |
enc = tokenizer(text, padding="max_length", truncation=True, max_length=128, return_tensors="pt").to(device) | |
input_ids = enc["input_ids"] | |
label_tensor = torch.tensor([label], dtype=torch.float32).to(device) | |
logits = model(input_ids).squeeze(1) | |
loss = F.binary_cross_entropy_with_logits(logits, label_tensor) | |
optimizer.zero_grad() | |
loss.backward() | |
optimizer.step() | |
model.eval() | |
save_best_genome({**new_config, "accuracy": "Live-Finetuned"}) | |
return f"β Evo retrained on {len(data)} feedback entries." | |
# π Reload trigger | |
def load_model(force_reload=False): | |
global model | |
model.eval() | |