Keeby-smilyai commited on
Commit
05a2a2b
·
verified ·
1 Parent(s): 016533a

Update backend.py

Browse files
Files changed (1) hide show
  1. backend.py +29 -44
backend.py CHANGED
@@ -1,4 +1,4 @@
1
- # backend.py — ENHANCED WITH AGENT CONVERSATION LOGS + RAM/VRAM MONITORING
2
  import sqlite3
3
  import os
4
  import json
@@ -152,18 +152,18 @@ def get_vram_usage_gb():
152
  except:
153
  return 0.0
154
 
155
- # ------------------------------ MODEL LOADING ------------------------------
156
  MODEL_REGISTRY = {
157
  "understander": "Qwen/Qwen2.5-Coder-0.6B-Instruct",
158
  "architect": "Qwen/Qwen2.5-Coder-0.6B-Instruct",
159
- "ceo": "google/gemma-2-2b-it",
160
  "manager": "microsoft/Phi-3-mini-4k-instruct",
161
  "worker": "Qwen/Qwen2.5-Coder-0.6B-Instruct",
162
  "reviewer": "microsoft/Phi-3-mini-4k-instruct",
163
  "editor": "Qwen/Qwen2.5-Coder-0.6B-Instruct",
164
  "tester": "microsoft/Phi-3-mini-4k-instruct",
165
  "publisher": "Qwen/Qwen2.5-Coder-0.6B-Instruct",
166
- "final_ceo": "google/gemma-2-2b-it",
167
  }
168
 
169
  _MODEL_CACHE = {}
@@ -177,7 +177,8 @@ def load_model(model_name):
177
  model_name,
178
  torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
179
  device_map="auto",
180
- trust_remote_code=True
 
181
  )
182
  _MODEL_CACHE[model_name] = (tokenizer, model)
183
  return tokenizer, model
@@ -418,51 +419,35 @@ def zip_project(user_id, project_name: str):
418
  zf.write(full_path, arcname)
419
  return zip_path
420
 
421
- # ------------------------------ LLM GENERATOR (WITH LOGGING) ------------------------------
422
  def generate_with_model(role: str, prompt: str, context: dict = {}) -> str:
423
  try:
424
  model_name = MODEL_REGISTRY[role]
425
  tokenizer, model = load_model(model_name)
426
 
427
- # Format message based on role
428
- if role == "understander":
429
- messages = [{"role": "system", "content": ROLE_PROMPTS["understander"]},
430
- {"role": "user", "content": prompt}]
431
- elif role == "architect":
432
- messages = [{"role": "system", "content": ROLE_PROMPTS["architect"]},
433
- {"role": "user", "content": prompt}]
434
- elif role == "ceo":
435
- messages = [{"role": "system", "content": ROLE_PROMPTS["ceo"]},
436
- {"role": "user", "content": prompt}]
437
- elif role == "manager":
438
- messages = [{"role": "system", "content": ROLE_PROMPTS["manager"]},
439
- {"role": "user", "content": prompt}]
440
- elif role == "worker":
441
- messages = [{"role": "system", "content": ROLE_PROMPTS["worker"].format(file=context.get("file"), instructions=context.get("instructions"))},
442
- {"role": "user", "content": ""}]
443
- elif role == "reviewer":
444
- messages = [{"role": "system", "content": ROLE_PROMPTS["reviewer"]},
445
- {"role": "user", "content": prompt}]
446
- elif role == "editor":
447
- messages = [{"role": "system", "content": ROLE_PROMPTS["editor"]},
448
- {"role": "user", "content": prompt}]
449
- elif role == "tester":
450
- messages = [{"role": "system", "content": ROLE_PROMPTS["tester"]},
451
- {"role": "user", "content": prompt}]
452
- elif role == "publisher":
453
- messages = [{"role": "system", "content": ROLE_PROMPTS["publisher"]},
454
- {"role": "user", "content": prompt}]
455
- elif role == "final_ceo":
456
- messages = [{"role": "system", "content": ROLE_PROMPTS["final_ceo"]},
457
- {"role": "user", "content": prompt}]
458
  else:
459
- messages = [{"role": "user", "content": prompt}]
460
-
461
- text = tokenizer.apply_chat_template(
462
- messages,
463
- tokenize=False,
464
- add_generation_prompt=True
465
- )
 
 
 
466
 
467
  inputs = tokenizer(text, return_tensors="pt").to(model.device)
468
 
 
1
+ # backend.py — FINAL VERSION PHASE-3 MINI INSTRUCT + CACHING + LOGGING
2
  import sqlite3
3
  import os
4
  import json
 
152
  except:
153
  return 0.0
154
 
155
+ # ------------------------------ MODEL LOADING (PER ROLE) ------------------------------
156
  MODEL_REGISTRY = {
157
  "understander": "Qwen/Qwen2.5-Coder-0.6B-Instruct",
158
  "architect": "Qwen/Qwen2.5-Coder-0.6B-Instruct",
159
+ "ceo": "microsoft/Phi-3-mini-4k-instruct",
160
  "manager": "microsoft/Phi-3-mini-4k-instruct",
161
  "worker": "Qwen/Qwen2.5-Coder-0.6B-Instruct",
162
  "reviewer": "microsoft/Phi-3-mini-4k-instruct",
163
  "editor": "Qwen/Qwen2.5-Coder-0.6B-Instruct",
164
  "tester": "microsoft/Phi-3-mini-4k-instruct",
165
  "publisher": "Qwen/Qwen2.5-Coder-0.6B-Instruct",
166
+ "final_ceo": "microsoft/Phi-3-mini-4k-instruct",
167
  }
168
 
169
  _MODEL_CACHE = {}
 
177
  model_name,
178
  torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
179
  device_map="auto",
180
+ trust_remote_code=True,
181
+ attn_implementation="eager" # ✅ Avoids flash-attn requirement — safe on all hardware
182
  )
183
  _MODEL_CACHE[model_name] = (tokenizer, model)
184
  return tokenizer, model
 
419
  zf.write(full_path, arcname)
420
  return zip_path
421
 
422
+ # ------------------------------ LLM GENERATOR (WITH CHAT TEMPLATE FOR PHI-3) ------------------------------
423
  def generate_with_model(role: str, prompt: str, context: dict = {}) -> str:
424
  try:
425
  model_name = MODEL_REGISTRY[role]
426
  tokenizer, model = load_model(model_name)
427
 
428
+ # Format message using Phi-3 or Qwen chat template
429
+ if "Phi-3" in model_name:
430
+ # Phi-3 format
431
+ messages = [
432
+ {"role": "system", "content": ROLE_PROMPTS[role]},
433
+ {"role": "user", "content": prompt}
434
+ ]
435
+ text = tokenizer.apply_chat_template(
436
+ messages,
437
+ tokenize=False,
438
+ add_generation_prompt=True
439
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
440
  else:
441
+ # Qwen format
442
+ messages = [
443
+ {"role": "system", "content": ROLE_PROMPTS[role]},
444
+ {"role": "user", "content": prompt}
445
+ ]
446
+ text = tokenizer.apply_chat_template(
447
+ messages,
448
+ tokenize=False,
449
+ add_generation_prompt=True
450
+ )
451
 
452
  inputs = tokenizer(text, return_tensors="pt").to(model.device)
453