TheMaisk commited on
Commit
9ce2a9a
1 Parent(s): 90fe568

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +61 -0
app.py ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import subprocess
3
+ import requests
4
+ import gradio as gr
5
+
6
+ # URL zum Herunterladen des Modells von Hugging Face
7
+ url = "https://huggingface.co/TheBloke/dolphin-2.6-mistral-7B-GGUF/blob/main/dolphin-2.6-mistral-7b.Q6_K.gguf?download=true"
8
+ response = requests.get(url)
9
+ with open("./model.gguf", mode="wb") as file:
10
+ file.write(response.content)
11
+ print("Modell heruntergeladen.")
12
+
13
+ # Starten des Llama-Modellservers
14
+ command = ["python3", "-m", "llama_cpp.server", "--model", "./model.gguf", "--host", "0.0.0.0", "--port", "2600", "--n_threads", "2"]
15
+ subprocess.Popen(command)
16
+ print("Modell bereit!")
17
+
18
+ # Funktion zur Behandlung der Chat-Antwort
19
+ def response(message, history):
20
+ # Lokale Server-URL
21
+ url = "http://0.0.0.0:2600/v1/completions"
22
+ body = {
23
+ "prompt": "[INST]" + message + "[/INST]",
24
+ "max_tokens": 500,
25
+ "echo": False,
26
+ "stream": True
27
+ }
28
+ response_text = ""
29
+ buffer = ""
30
+ for text in requests.post(url, json=body, stream=True):
31
+ if buffer is None:
32
+ buffer = ""
33
+ buffer = str("".join(buffer))
34
+ text = text.decode('utf-8')
35
+ if text.startswith(": ping -") is False and len(text.strip("\n\r")) > 0:
36
+ buffer += str(text)
37
+ buffer = buffer.split('"finish_reason": null}]}')
38
+ if len(buffer) == 1:
39
+ buffer = "".join(buffer)
40
+ if len(buffer) == 2:
41
+ part = buffer[0] + '"finish_reason": null}]}'
42
+ if part.lstrip('\n\r').startswith("data: "):
43
+ part = part.lstrip('\n\r').replace("data: ", "")
44
+ try:
45
+ part = str(json.loads(part)["choices"][0]["text"])
46
+ print(part, end="", flush=True)
47
+ response_text += part
48
+ buffer = "" # Zurücksetzen des Buffers
49
+ except Exception as e:
50
+ print("Exception:" + str(e))
51
+ return response_text
52
+
53
+ # Gradio-Schnittstelle mit spezifiziertem Theme
54
+ gr_interface = gr.ChatInterface(
55
+ fn=response,
56
+ title="Mixtral_7Bx2_MoE-GGUF Chatbot",
57
+ theme='syddharth/gray-minimal'
58
+ )
59
+
60
+ # Starten des Gradio-Interfaces
61
+ gr_interface.queue().launch(share=True)