# --- HF model lists (single, light model for HF Spaces CPU) --- # We standardize on a small, fast model that runs reliably via HF Inference # and is suitable for free CPU Spaces constraints. THINKING_MODELS = ["Qwen/Qwen2.5-3B-Instruct"] INSTRUCT_MODELS = ["Qwen/Qwen2.5-3B-Instruct"] def _current_models(): return THINKING_MODELS if STATE.get("mode") == "thinking" else INSTRUCT_MODELS # app.py import os import json import hashlib import logging import threading from pathlib import Path from typing import List, Dict, Any, Tuple # --- Model mode state (thinking/instruct) with simple persistence --- from pathlib import Path APP_DIR = Path(__file__).parent DATA_DIR = APP_DIR / "data" DATA_DIR.mkdir(parents=True, exist_ok=True) STATE_PATH = DATA_DIR / "state.json" def _load_state(): if STATE_PATH.exists(): try: return json.loads(STATE_PATH.read_text(encoding="utf-8")) except Exception: pass return {"mode": "instruct"} def _save_state(s: dict): STATE_PATH.write_text(json.dumps(s, ensure_ascii=False, indent=2), encoding="utf-8") STATE = _load_state() from fastapi import FastAPI, HTTPException from fastapi.middleware.cors import CORSMiddleware from pydantic import BaseModel from huggingface_hub import HfApi, hf_hub_download from monitor import get_current_metrics, start_monitoring_thread from memory import get_history, save_history # ========================= # إعداد السجلّات # ========================= logging.basicConfig( level=logging.INFO, format="🪵 [%(asctime)s] [%(levelname)s] %(message)s" ) logger = logging.getLogger("app") # ========================= # ثوابت ومسارات # ========================= DATA_DIR = Path("data") CACHE_DIR = DATA_DIR / "cache" INDEX_DIR = DATA_DIR / "index" FILES_DIR = DATA_DIR / "files" # تخزين النص الكامل لكل ملف REPORT_FILE = DATA_DIR / "analysis_report.md" GRAPH_FILE = DATA_DIR / "code_graph.json" EMB_FILE = INDEX_DIR / "embeddings.faiss" META_FILE = INDEX_DIR / "chunks.pkl" HASH_MAP_FILE = INDEX_DIR / "hash_map.json" for p in [DATA_DIR, CACHE_DIR, INDEX_DIR, FILES_DIR]: p.mkdir(parents=True, exist_ok=True) # Env HF_TOKEN = os.getenv("HF_TOKEN", "") # Use a single, smaller model by default for faster responses and fewer 5xx MODEL_REPO = os.getenv("MODEL_REPO", "Qwen/Qwen2.5-3B-Instruct") # No fallbacks by default (can be provided via env if desired) FALLBACK_MODELS = [ m.strip() for m in os.getenv("FALLBACK_MODELS", "").split(",") if m.strip() ] # GGUF المحلي (إن توفر) LOCAL_GGUF_REPO = os.getenv("LOCAL_GGUF_REPO", "Qwen/Qwen2.5-3B-Instruct-GGUF") LOCAL_GGUF_FILE = os.getenv("LOCAL_GGUF_FILE", "qwen2.5-3b-instruct-q4_k_m.gguf") LOCAL_GGUF_PATH = CACHE_DIR / LOCAL_GGUF_FILE # تقسيم الشيفرة (قيمة قصوى للقراءة المؤقتة) MAX_FILE_BYTES = int(os.getenv("MAX_FILE_BYTES", str(10 * 1024 * 1024))) # 10MB احتياطيًا SYSTEM_PROMPT = """<|im_start|>system You are a senior AI code analyst. Analyze projects with hybrid indexing (code graph + retrieval). Return structured, accurate, concise answers. Use Arabic + English labels in the final report. <|im_end|>""" # ========================= # الحالة العالمية والقفل # ========================= all_chunks: List[Tuple[str, str]] = [] code_graph: Dict[str, Any] = {"files": {}} hash_map: Dict[str, str] = {} index_lock = threading.RLock() # ✅ لتأمين الفهرسة/الاسترجاع # ========================= # LLM (محلي عبر GGUF) # ========================= try: from llama_cpp import Llama except Exception: Llama = None llm = None logger.info(f"HF_TOKEN length: {len(HF_TOKEN)}") # تحقق من طول الtoken def load_local_model_if_configured(): """تحميل نموذج GGUF من HuggingFace Hub مباشرة.""" global llm if Llama is None: logger.warning("⚠️ llama_cpp غير متاح. لن يعمل النموذج المحلي.") return try: logger.info(f"⬇️ تحميل نموذج GGUF: {LOCAL_GGUF_REPO}/{LOCAL_GGUF_FILE}") llm = Llama.from_pretrained( repo_id=LOCAL_GGUF_REPO, filename=LOCAL_GGUF_FILE, # Llama params n_ctx=int(os.getenv("N_CTX", "32768")), n_threads=int(os.getenv("N_THREADS", "2")), n_batch=int(os.getenv("N_BATCH", "64")), n_gpu_layers=int(os.getenv("N_GPU_LAYERS", "0")), use_mlock=False, verbose=False, ) logger.info("✅ تم تحميل نموذج GGUF المحلي بنجاح.") except Exception as e: llm = None logger.error(f"❌ فشل تحميل/تشغيل GGUF: {e}") def call_local_llm(prompt: str, max_tokens: int = 800) -> str: if llm is None: raise RuntimeError("النموذج المحلي غير محمل") try: res = llm( prompt, max_tokens=max_tokens, temperature=0.4, top_p=0.9, stop=["<|im_end|>", "<|im_start|>"], echo=False ) return (res.get("choices", [{}])[0].get("text") or "").strip() except Exception as e: logger.error(f"❌ خطأ في استدعاء النموذج المحلي: {e}") raise RuntimeError(f"فشل استدعاء النموذج المحلي: {e}") def _call_hf_single_model(model_repo: str, prompt: str, max_new_tokens: int = 900) -> str: import requests if not HF_TOKEN: logger.error("❌ HF_TOKEN غير معرف.") raise RuntimeError("التوكن HF_TOKEN غير مضبوط ولا يوجد نموذج محلي.") url = f"https://api-inference.huggingface.co/models/{model_repo}" headers = {"Authorization": f"Bearer {HF_TOKEN}"} payload = { "inputs": prompt, "parameters": { "max_new_tokens": max_new_tokens, "temperature": 0.4, "top_p": 0.9, "return_full_text": False } } r = requests.post(url, headers=headers, json=payload, timeout=120) if r.status_code == 503: data = {} try: data = r.json() except Exception: pass eta = data.get("estimated_time") raise RuntimeError("النموذج قيد التحميل من HF (503)." + (f" متوقع {eta:.0f}ث" if isinstance(eta, (int, float)) else "")) try: r.raise_for_status() except requests.exceptions.HTTPError as e: status = e.response.status_code if status == 401: raise RuntimeError("التوكن مفقود أو غير صالح (401). تأكد من HF_TOKEN.") if status == 403: msg = "" try: msg = (e.response.json().get("error") or "").lower() except Exception: pass if "gated" in msg or "accept" in msg: raise RuntimeError("النموذج مسيَّج (403). يجب دخول صفحة النموذج والضغط على Accept.") raise RuntimeError("صلاحية الوصول مرفوضة (403).") if status == 404: raise RuntimeError("النموذج غير موجود أو غير متاح عبر السيرفرلس (404).") if status == 429: raise RuntimeError("تم تجاوز الحد المسموح للطلبات (429). جرّب لاحقًا.") try: err = e.response.json() except Exception: err = {"error": e.response.text} raise RuntimeError(f"خطأ HF ({status}): {err.get('error') or err}") data = r.json() if isinstance(data, list) and data and "generated_text" in data[0]: return data[0]["generated_text"] if isinstance(data, dict) and "generated_text" in data: return data["generated_text"] if isinstance(data, dict) and "error" in data: raise RuntimeError(f"HF error: {data['error']}") return json.dumps(data) def call_hf_inference(prompt: str, max_new_tokens: int = 900) -> str: raise RuntimeError("تم تعطيل HF Inference. النموذج المحلي مستخدم فقط.") def call_llm(prompt: str, max_tokens: int = 900) -> str: return call_local_llm(prompt, max_tokens) # ========================= # بناء الـ Prompt للدردشة (نسخة مبسطة) # ========================= def build_chat_prompt(history: List[List[str]], message: str, extra: str = "") -> str: prompt = f"<|im_start|>system\n{SYSTEM_PROMPT}\n<|im_end|>\n" for user_msg, ai_msg in history: prompt += f"<|im_start|>user\n{user_msg}\n<|im_end|>\n" prompt += f"<|im_start|>assistant\n{ai_msg}\n<|im_end|>\n" prompt += f"<|im_start|>user\n{message}\n{extra}\n<|im_end|>\n" prompt += f"<|im_start|>assistant\n" return prompt # ========================= # FastAPI # ========================= # NOTE: Warm-up moved to startup_event after helper functions are defined app = FastAPI(title="AI Code Analyst") # --- Root endpoint for Hugging Face health checks and simple UI --- from fastapi.responses import PlainTextResponse, HTMLResponse, JSONResponse @app.get("/", response_class=HTMLResponse) def root(logs: str | None = None): """ Minimal root endpoint so HF / healthcheck returns 200 OK. Use `/?logs=container` to tail last lines from data/app.log. """ if logs == "container": log_file = Path(DATA_DIR) / "app.log" if log_file.exists(): tail = "".join(log_file.read_text(encoding="utf-8", errors="ignore").splitlines(True)[-200:]) return PlainTextResponse(tail) return PlainTextResponse("No logs yet.", status_code=200) # Minimal HTML with quick chat form html = """
Try /docs, /hf-check, or /metrics. | Logs: tail