Spaces:

TheMaisk
/

Mistral-7B-Instruct-v0.2-GGUF

Runtime error

App Files Files Community

TheMaisk commited on Jan 8

Commit

9ce2a9a

•

1 Parent(s): 90fe568

Create app.py

Browse files

Files changed (1) hide show

app.py +61 -0

app.py ADDED Viewed

	@@ -0,0 +1,61 @@

+import json
+import subprocess
+import requests
+import gradio as gr
+# URL zum Herunterladen des Modells von Hugging Face
+url = "https://huggingface.co/TheBloke/dolphin-2.6-mistral-7B-GGUF/blob/main/dolphin-2.6-mistral-7b.Q6_K.gguf?download=true"
+response = requests.get(url)
+with open("./model.gguf", mode="wb") as file:
+    file.write(response.content)
+print("Modell heruntergeladen.")
+# Starten des Llama-Modellservers
+command = ["python3", "-m", "llama_cpp.server", "--model", "./model.gguf", "--host", "0.0.0.0", "--port", "2600", "--n_threads", "2"]
+subprocess.Popen(command)
+print("Modell bereit!")
+# Funktion zur Behandlung der Chat-Antwort
+def response(message, history):
+    # Lokale Server-URL
+    url = "http://0.0.0.0:2600/v1/completions"
+    body = {
+        "prompt": "[INST]" + message + "[/INST]",
+        "max_tokens": 500,
+        "echo": False,
+        "stream": True
+    }
+    response_text = ""
+    buffer = ""
+    for text in requests.post(url, json=body, stream=True):
+        if buffer is None:
+            buffer = ""
+        buffer = str("".join(buffer))
+        text = text.decode('utf-8')
+        if text.startswith(": ping -") is False and len(text.strip("\n\r")) > 0:
+            buffer += str(text)
+        buffer = buffer.split('"finish_reason": null}]}')
+        if len(buffer) == 1:
+            buffer = "".join(buffer)
+        if len(buffer) == 2:
+            part = buffer[0] + '"finish_reason": null}]}'
+            if part.lstrip('\n\r').startswith("data: "):
+                part = part.lstrip('\n\r').replace("data: ", "")
+            try:
+                part = str(json.loads(part)["choices"][0]["text"])
+                print(part, end="", flush=True)
+                response_text += part
+                buffer = ""  # Zurücksetzen des Buffers
+            except Exception as e:
+                print("Exception:" + str(e))
+    return response_text
+# Gradio-Schnittstelle mit spezifiziertem Theme
+gr_interface = gr.ChatInterface(
+    fn=response,
+    title="Mixtral_7Bx2_MoE-GGUF Chatbot",
+    theme='syddharth/gray-minimal'
+)
+# Starten des Gradio-Interfaces
+gr_interface.queue().launch(share=True)