Nexari-G1.1 / coder_model.py
Nexari-Research's picture
Update coder_model.py
38160c8 verified
import os
import logging
import asyncio
from huggingface_hub import hf_hub_download
from llama_cpp import Llama
logger = logging.getLogger("nexari.coder")
BASE_DIR = "./models/coder"
model = None
# === OPTIMIZED FOR 2 vCPU: Qwen 2.5 Coder 3B (Q4_K_M) ===
# TECHNIQUE: Q6 (Heavy) -> Q4 (Fast).
# Quality drop is negligible, but speed boosts by ~40%.
REPO_ID = "Qwen/Qwen2.5-Coder-3B-Instruct-GGUF"
FILENAME = "qwen2.5-coder-3b-instruct-q4_k_m.gguf"
def load_model(local_dir: str = None):
global model
if not local_dir: local_dir = BASE_DIR
try:
os.makedirs(local_dir, exist_ok=True)
path = os.path.join(local_dir, FILENAME)
# Download (~1.7 GB instead of 2.8 GB)
if not os.path.exists(path):
logger.info(f"⬇️ Downloading Qwen 3B Coder (Fast Q4)...")
hf_hub_download(repo_id=REPO_ID, filename=FILENAME, local_dir=local_dir)
model = Llama(
model_path=path,
n_ctx=8192,
n_threads=2,
n_batch=512, # Batch size increased to ingest prompts faster
verbose=False
)
logger.info("✅ Coder Model Ready (Qwen 3B - Turbo Mode)")
return model
except Exception as e:
logger.error(f"Coder Load Error: {e}")
model = None
async def load_model_async():
return await asyncio.to_thread(load_model)