Spaces:

drdudddd
/

Dep

Build error

drdudddd commited on about 1 month ago

Commit

fbc54cd

verified ·

1 Parent(s): 66a650d

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,31 +1,49 @@
 import gradio as gr
-from ctransformers import AutoModelForCausalLM
-print("Lade Modell... bitte warten (CPU Modus)")
-# Wir nutzen 'llama' als Typ, da dies der stabilste CPU-Loader ist
-llm = AutoModelForCausalLM.from_pretrained(
-    "unsloth/DeepSeek-R1-Distill-Qwen-1.5B-GGUF",
-    model_file="DeepSeek-R1-Distill-Qwen-1.5B-Q4_K_M.gguf",
-    model_type="llama", # Das verhindert die Suche nach exllama/gpu Modulen
-    threads=4,           # Nutzt die volle Power der HF-CPU
-    context_length=1024
 )
 def respond(message, history):
-    # DeepSeek-R1 Prompt-Struktur
     prompt = f"User: {message}\nAssistant: <think>\n"
     response = ""
-    # Generierung mit Streaming
-    for token in llm(prompt, stream=True):
-        response += token
-        yield response
 demo = gr.ChatInterface(
     fn=respond,
-    title="DeepSeek-R1 CPU (Stable)",
-    description="Dieser Space nutzt ctransformers ohne GPU-Abhängigkeiten."
 )
 if __name__ == "__main__":

 import gradio as gr
+from huggingface_hub import hf_hub_download
+from llama_cpp import Llama
+import os
+print("Lade DeepSeek-R1 GGUF...")
+# Modell-Download
+model_path = hf_hub_download(
+    repo_id="unsloth/DeepSeek-R1-Distill-Qwen-1.5B-GGUF",
+    filename="DeepSeek-R1-Distill-Qwen-1.5B-Q4_K_M.gguf"
+)
+# Initialisierung (n_ctx auf 1024 begrenzt für CPU-Stabilität)
+llm = Llama(
+    model_path=model_path,
+    n_ctx=1024,
+    n_threads=2,
+    verbose=False
 )
 def respond(message, history):
+    # DeepSeek Format: Wir starten direkt mit dem Denk-Tag
     prompt = f"User: {message}\nAssistant: <think>\n"
     response = ""
+    # Streaming-Generierung
+    stream = llm(
+        prompt,
+        max_tokens=512,
+        stop=["User:", "<|endoftext|>"],
+        stream=True,
+        temperature=0.7
+    )
+    for chunk in stream:
+        if "text" in chunk["choices"][0]:
+            token = chunk["choices"][0]["text"]
+            response += token
+            yield response
+# Gradio Interface
 demo = gr.ChatInterface(
     fn=respond,
+    title="DeepSeek-R1 CPU (GGUF Safe Mode)",
+    description="Läuft stabil auf Hugging Face CPU Hardware."
 )
 if __name__ == "__main__":