Spaces:

AhmadA82
/

coder-demo

Running

AhmadA82 commited on 3 days ago

Commit

17dca91

verified ·

1 Parent(s): ca02a50

8

Files changed (1) hide show

app.py CHANGED Viewed

@@ -121,12 +121,11 @@ def load_local_model_if_configured():
         logger.warning("⚠️ llama_cpp غير متاح. لن يعمل النموذج المحلي.")
         return
-        try:
         logger.info(f"⬇️ تحميل نموذج GGUF: {LOCAL_GGUF_REPO}/{LOCAL_GGUF_FILE}")
         llm = Llama.from_pretrained(
-                repo_id=LOCAL_GGUF_REPO,
-                filename=LOCAL_GGUF_FILE,
             # Llama params
             n_ctx=int(os.getenv("N_CTX", "32768")),
             n_threads=int(os.getenv("N_THREADS", "2")),
@@ -213,7 +212,7 @@ def call_hf_inference(prompt: str, max_new_tokens: int = 900) -> str:
     raise RuntimeError("تم تعطيل HF Inference. النموذج المحلي مستخدم فقط.")
 def call_llm(prompt: str, max_tokens: int = 900) -> str:
-        return call_local_llm(prompt, max_tokens)
 # =========================
 # بناء الـ Prompt للدردشة (نسخة مبسطة)

         logger.warning("⚠️ llama_cpp غير متاح. لن يعمل النموذج المحلي.")
         return
+    try:
         logger.info(f"⬇️ تحميل نموذج GGUF: {LOCAL_GGUF_REPO}/{LOCAL_GGUF_FILE}")
         llm = Llama.from_pretrained(
+            repo_id=LOCAL_GGUF_REPO,
+            filename=LOCAL_GGUF_FILE,
             # Llama params
             n_ctx=int(os.getenv("N_CTX", "32768")),
             n_threads=int(os.getenv("N_THREADS", "2")),
     raise RuntimeError("تم تعطيل HF Inference. النموذج المحلي مستخدم فقط.")
 def call_llm(prompt: str, max_tokens: int = 900) -> str:
+    return call_local_llm(prompt, max_tokens)
 # =========================
 # بناء الـ Prompt للدردشة (نسخة مبسطة)