Spaces:

TheMaisk
/

Mistral-7B-Instruct-v0.2-GGUF

Runtime error

App Files Files Community

Mistral-7B-Instruct-v0.2-GGUF / app.py

TheMaisk

Update app.py

c4bddef verified 6 months ago

raw

history blame contribute delete

No virus

2.32 kB

	import json
	import subprocess
	import requests
	import gradio as gr
	import os

	# Laden der geheimen Umgebungsvariablen für den Systemprompt
	SYSTEM_PROMPT_SECRET = os.environ.get('HF_SYSTEM_PROMPT_SECRET')

	# URL zum Herunterladen des Modells von Hugging Face
	url = "https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.2-GGUF/resolve/main/mistral-7b-instruct-v0.2.Q4_0.gguf?download=true"
	response = requests.get(url)
	with open("./model.gguf", mode="wb") as file:
	file.write(response.content)
	print("Modell heruntergeladen.")

	# Starten des Llama-Modellservers
	command = ["python3", "-m", "llama_cpp.server", "--model", "./model.gguf", "--host", "0.0.0.0", "--port", "2600", "--n_threads", "2"]
	subprocess.Popen(command)
	print("Modell bereit!")

	# Funktion zur Behandlung der Chat-Antwort
	def response(message, history):
	# Lokale Server-URL
	url = "http://0.0.0.0:2600/v1/completions"
	body = {
	"prompt": SYSTEM_PROMPT_SECRET + message, # Hinzufügen des Systemprompts
	"max_tokens": 1500,
	"echo": False,
	"stream": True
	}
	response_text = ""
	buffer = ""
	for text in requests.post(url, json=body, stream=True):
	if buffer is None:
	buffer = ""
	buffer = str("".join(buffer))
	text = text.decode('utf-8')
	if text.startswith(": ping -") is False and len(text.strip("\n\r")) > 0:
	buffer += str(text)
	buffer = buffer.split('"finish_reason": null}]}')
	if len(buffer) == 1:
	buffer = "".join(buffer)
	if len(buffer) == 2:
	part = buffer[0] + '"finish_reason": null}]}'
	if part.lstrip('\n\r').startswith("data: "):
	part = part.lstrip('\n\r').replace("data: ", "")
	try:
	part = str(json.loads(part)["choices"][0]["text"])
	print(part, end="", flush=True)
	response_text += part
	buffer = "" # Zurücksetzen des Buffers
	except Exception as e:
	print("Exception:" + str(e))
	return response_text

	# Gradio-Schnittstelle mit spezifiziertem Theme
	gr_interface = gr.ChatInterface(
	fn=response,
	title="Mixtral_7Bx2_MoE-GGUF Chatbot",
	theme='ParityError/Anime'
	)

	# Starten des Gradio-Interfaces
	gr_interface.queue().launch(share=True)