Spaces:
Running
Running
AFischer1985
commited on
Commit
•
beaf90e
1
Parent(s):
7b16373
Update run.py
Browse files
run.py
CHANGED
@@ -2,7 +2,7 @@
|
|
2 |
# Title: German AI-Interface with advanced RAG
|
3 |
# Author: Andreas Fischer
|
4 |
# Date: January 31st, 2023
|
5 |
-
# Last update: February
|
6 |
##########################################################################################
|
7 |
|
8 |
#https://github.com/abetlen/llama-cpp-python/issues/306
|
@@ -30,7 +30,7 @@ dbPath = "/home/af/Schreibtisch/Code/gradio/Chroma/db"
|
|
30 |
onPrem = True if(os.path.exists(dbPath)) else False
|
31 |
if(onPrem==False): dbPath="/home/user/app/db"
|
32 |
|
33 |
-
onPrem=
|
34 |
print(dbPath)
|
35 |
|
36 |
#client = chromadb.Client()
|
@@ -164,12 +164,11 @@ else:
|
|
164 |
import os
|
165 |
import requests
|
166 |
import subprocess
|
167 |
-
modelPath="/home/af/gguf/models/Discolm_german_7b_v1.Q4_0.gguf"
|
|
|
168 |
if(os.path.exists(modelPath)==False):
|
169 |
-
#url="https://huggingface.co/TheBloke/WizardLM-13B-V1.2-GGUF/resolve/main/wizardlm-13b-v1.2.Q4_0.gguf"
|
170 |
-
url="https://huggingface.co/TheBloke/Mixtral-8x7B-Instruct-v0.1-GGUF/resolve/main/mixtral-8x7b-instruct-v0.1.Q4_0.gguf?download=true"
|
171 |
-
#url="https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.2-GGUF/resolve/main/mistral-7b-instruct-v0.2.Q4_0.gguf?download=true"
|
172 |
#url="https://huggingface.co/TheBloke/DiscoLM_German_7b_v1-GGUF/resolve/main/discolm_german_7b_v1.Q4_0.gguf?download=true"
|
|
|
173 |
response = requests.get(url)
|
174 |
with open("./Mixtral-8x7b-instruct.gguf", mode="wb") as file:
|
175 |
file.write(response.content)
|
@@ -183,10 +182,15 @@ else:
|
|
183 |
print("Server ready!")
|
184 |
|
185 |
|
|
|
|
|
|
|
|
|
|
|
186 |
# Gradio-GUI
|
187 |
#------------
|
188 |
-
|
189 |
-
def extend_prompt(message="", history=None, system=None, RAGAddon=None, system2=None, zeichenlimit=None,historylimit=4):
|
190 |
startOfString=""
|
191 |
if zeichenlimit is None: zeichenlimit=1000000000 # :-)
|
192 |
template0=" [INST]{system}\n [/INST] </s>"
|
@@ -229,13 +233,18 @@ def extend_prompt(message="", history=None, system=None, RAGAddon=None, system2=
|
|
229 |
prompt += template0.format(system=system) #"<s>"
|
230 |
if history is not None:
|
231 |
for user_message, bot_response in history[-historylimit:]:
|
232 |
-
if user_message is
|
233 |
-
if bot_response is
|
234 |
-
|
|
|
|
|
|
|
|
|
235 |
if system2 is not None:
|
236 |
prompt += system2
|
237 |
return startOfString+prompt
|
238 |
|
|
|
239 |
import gradio as gr
|
240 |
import requests
|
241 |
import json
|
@@ -244,7 +253,8 @@ import os
|
|
244 |
import re
|
245 |
|
246 |
def response(message, history):
|
247 |
-
settings="
|
|
|
248 |
|
249 |
# Preprocessing to revent simple forms of prompt injection:
|
250 |
#----------------------------------------------------------
|
@@ -253,12 +263,12 @@ def response(message, history):
|
|
253 |
message=message.replace("[/INST]","")
|
254 |
message=re.sub("<[|](im_start|im_end|end_of_turn)[|]>", '', message)
|
255 |
|
256 |
-
# Load Memory if
|
257 |
#-------------------------------------
|
258 |
-
if (settings=="
|
259 |
if((len(history)==0)&(os.path.isfile(filename))): history=json.load(open(filename,'r',encoding="utf-8")) # retrieve history (if available)
|
260 |
|
261 |
-
system="Du bist ein deutschsprachiges wortkarges KI-basiertes Assistenzsystem.
|
262 |
|
263 |
#RAG-layer 0: Intention-RAG
|
264 |
#---------------------------
|
@@ -354,7 +364,13 @@ def response(message, history):
|
|
354 |
rag, # RAG-component added to the system prompt
|
355 |
system2, # fictive first words of the AI (neither displayed nor stored)
|
356 |
historylimit=historylimit # number of past messages to consider for response to current message
|
|
|
357 |
)
|
|
|
|
|
|
|
|
|
|
|
358 |
print("\n\n*** Prompt:\n"+prompt+"\n***\n\n")
|
359 |
|
360 |
## Request response from model
|
@@ -383,13 +399,14 @@ def response(message, history):
|
|
383 |
part=text.token.text
|
384 |
#print(part, end="", flush=True)
|
385 |
response += part
|
|
|
386 |
yield response
|
387 |
if((myType=="1a")): #add RAG-results to chat-output if appropriate
|
388 |
-
|
389 |
-
yield
|
390 |
history.append((message, response)) # add current dialog to history
|
391 |
-
# Store current state in DB if
|
392 |
-
if (settings=="
|
393 |
x=collection.get(include=[])["ids"] # add current dialog to db
|
394 |
collection.add(
|
395 |
documents=[message,response],
|
@@ -405,7 +422,8 @@ def response(message, history):
|
|
405 |
# url="https://afischer1985-wizardlm-13b-v1-2-q4-0-gguf.hf.space/v1/completions"
|
406 |
url="http://0.0.0.0:2600/v1/completions"
|
407 |
body={"prompt":prompt,"max_tokens":None, "echo":"False","stream":"True"} # e.g. Mixtral-Instruct
|
408 |
-
if("
|
|
|
409 |
response="" #+"("+myType+")\n"
|
410 |
buffer=""
|
411 |
#print("URL: "+url)
|
@@ -432,13 +450,13 @@ def response(message, history):
|
|
432 |
except Exception as e:
|
433 |
print("Exception:"+str(e))
|
434 |
pass
|
|
|
435 |
yield response
|
436 |
if((myType=="1a")): #add RAG-results to chat-output if appropriate
|
437 |
-
|
438 |
-
yield
|
439 |
-
|
440 |
-
|
441 |
-
if (settings=="Permanent"):
|
442 |
x=collection.get(include=[])["ids"] # add current dialog to db
|
443 |
collection.add(
|
444 |
documents=[message,response],
|
@@ -453,9 +471,11 @@ def response(message, history):
|
|
453 |
|
454 |
gr.ChatInterface(
|
455 |
response,
|
456 |
-
chatbot=gr.Chatbot(value=[[None,"Herzlich willkommen! Ich bin ein KI-basiertes Assistenzsystem, das für jede Anfrage die am besten geeigneten KI-Tools empfiehlt
|
457 |
-
title="German AI-Interface with advanced RAG",
|
458 |
-
#additional_inputs=[gr.Dropdown(["
|
459 |
).queue().launch(share=True) #False, server_name="0.0.0.0", server_port=7864)
|
460 |
print("Interface up and running!")
|
461 |
|
|
|
|
|
|
2 |
# Title: German AI-Interface with advanced RAG
|
3 |
# Author: Andreas Fischer
|
4 |
# Date: January 31st, 2023
|
5 |
+
# Last update: February 26st, 2024
|
6 |
##########################################################################################
|
7 |
|
8 |
#https://github.com/abetlen/llama-cpp-python/issues/306
|
|
|
30 |
onPrem = True if(os.path.exists(dbPath)) else False
|
31 |
if(onPrem==False): dbPath="/home/user/app/db"
|
32 |
|
33 |
+
#onPrem=True # uncomment to override automatic detection
|
34 |
print(dbPath)
|
35 |
|
36 |
#client = chromadb.Client()
|
|
|
164 |
import os
|
165 |
import requests
|
166 |
import subprocess
|
167 |
+
#modelPath="/home/af/gguf/models/Discolm_german_7b_v1.Q4_0.gguf"
|
168 |
+
modelPath="/home/af/gguf/models/Mixtral-8x7b-instruct-v0.1.Q4_0.gguf"
|
169 |
if(os.path.exists(modelPath)==False):
|
|
|
|
|
|
|
170 |
#url="https://huggingface.co/TheBloke/DiscoLM_German_7b_v1-GGUF/resolve/main/discolm_german_7b_v1.Q4_0.gguf?download=true"
|
171 |
+
url="https://huggingface.co/TheBloke/Mixtral-8x7B-Instruct-v0.1-GGUF/resolve/main/mixtral-8x7b-instruct-v0.1.Q4_0.gguf?download=true"
|
172 |
response = requests.get(url)
|
173 |
with open("./Mixtral-8x7b-instruct.gguf", mode="wb") as file:
|
174 |
file.write(response.content)
|
|
|
182 |
print("Server ready!")
|
183 |
|
184 |
|
185 |
+
#import llama_cpp
|
186 |
+
#llama_cpp.llama_backend_init(numa=False)
|
187 |
+
#params=llama_cpp.llama_context_default_params()
|
188 |
+
#params.n_ctx
|
189 |
+
|
190 |
# Gradio-GUI
|
191 |
#------------
|
192 |
+
import re
|
193 |
+
def extend_prompt(message="", history=None, system=None, RAGAddon=None, system2=None, zeichenlimit=None,historylimit=4, removeHTML=True):
|
194 |
startOfString=""
|
195 |
if zeichenlimit is None: zeichenlimit=1000000000 # :-)
|
196 |
template0=" [INST]{system}\n [/INST] </s>"
|
|
|
233 |
prompt += template0.format(system=system) #"<s>"
|
234 |
if history is not None:
|
235 |
for user_message, bot_response in history[-historylimit:]:
|
236 |
+
if user_message is None: user_message = ""
|
237 |
+
if bot_response is None: bot_response = ""
|
238 |
+
bot_response = re.sub("\n\n<details>((.|\n)*?)</details>","", bot_response) # remove RAG-compontents
|
239 |
+
if removeHTML==True: bot_response = re.sub("<(.*?)>","\n", bot_response) # remove HTML-components in general (may cause bugs with markdown-rendering)
|
240 |
+
if user_message is not None: prompt += template1.format(message=user_message[:zeichenlimit])
|
241 |
+
if bot_response is not None: prompt += template2.format(response=bot_response[:zeichenlimit])
|
242 |
+
if message is not None: prompt += template1.format(message=message[:zeichenlimit])
|
243 |
if system2 is not None:
|
244 |
prompt += system2
|
245 |
return startOfString+prompt
|
246 |
|
247 |
+
|
248 |
import gradio as gr
|
249 |
import requests
|
250 |
import json
|
|
|
253 |
import re
|
254 |
|
255 |
def response(message, history):
|
256 |
+
settings="Memory Off"
|
257 |
+
removeHTML=True
|
258 |
|
259 |
# Preprocessing to revent simple forms of prompt injection:
|
260 |
#----------------------------------------------------------
|
|
|
263 |
message=message.replace("[/INST]","")
|
264 |
message=re.sub("<[|](im_start|im_end|end_of_turn)[|]>", '', message)
|
265 |
|
266 |
+
# Load Memory if memory is turned on
|
267 |
#-------------------------------------
|
268 |
+
if (settings=="Memory On"):
|
269 |
if((len(history)==0)&(os.path.isfile(filename))): history=json.load(open(filename,'r',encoding="utf-8")) # retrieve history (if available)
|
270 |
|
271 |
+
system="Du bist ein deutschsprachiges wortkarges KI-basiertes Assistenzsystem. Antworte kurz, in deutsche Sprache und verzichte auf HTML und Code jeder Art."
|
272 |
|
273 |
#RAG-layer 0: Intention-RAG
|
274 |
#---------------------------
|
|
|
364 |
rag, # RAG-component added to the system prompt
|
365 |
system2, # fictive first words of the AI (neither displayed nor stored)
|
366 |
historylimit=historylimit # number of past messages to consider for response to current message
|
367 |
+
removeHTML=removeHTML # remove HTML-components from History (to prevent bugs with Markdown)
|
368 |
)
|
369 |
+
#print("\n\nMESSAGE:"+str(message))
|
370 |
+
#print("\n\nHISTORY:"+str(history))
|
371 |
+
#print("\n\nSYSTEM:"+str(system))
|
372 |
+
#print("\n\nRAG:"+str(rag))
|
373 |
+
#print("\n\nSYSTEM2:"+str(system2))
|
374 |
print("\n\n*** Prompt:\n"+prompt+"\n***\n\n")
|
375 |
|
376 |
## Request response from model
|
|
|
399 |
part=text.token.text
|
400 |
#print(part, end="", flush=True)
|
401 |
response += part
|
402 |
+
if removeHTML==True: response = re.sub("<(.*?)>","\n", response) # remove HTML-components in general (may cause bugs with markdown-rendering)
|
403 |
yield response
|
404 |
if((myType=="1a")): #add RAG-results to chat-output if appropriate
|
405 |
+
response=response+"\n\n<details><summary><strong>Sources</strong></summary><br><ul>"+ "".join(["<li>" + s + "</li>" for s in combination])+"</ul></details>"
|
406 |
+
yield response
|
407 |
history.append((message, response)) # add current dialog to history
|
408 |
+
# Store current state in DB if memory is turned on
|
409 |
+
if (settings=="Memory On"):
|
410 |
x=collection.get(include=[])["ids"] # add current dialog to db
|
411 |
collection.add(
|
412 |
documents=[message,response],
|
|
|
422 |
# url="https://afischer1985-wizardlm-13b-v1-2-q4-0-gguf.hf.space/v1/completions"
|
423 |
url="http://0.0.0.0:2600/v1/completions"
|
424 |
body={"prompt":prompt,"max_tokens":None, "echo":"False","stream":"True"} # e.g. Mixtral-Instruct
|
425 |
+
if("Discolm_german_7b" in modelPath): body.update({"stop": ["<|im_end|>"]}) # fix stop-token of DiscoLM
|
426 |
+
if("Gemma-" in modelPath): body.update({"stop": ["<|im_end|>","</end_of_turn>"]}) # fix stop-token of Gemma
|
427 |
response="" #+"("+myType+")\n"
|
428 |
buffer=""
|
429 |
#print("URL: "+url)
|
|
|
450 |
except Exception as e:
|
451 |
print("Exception:"+str(e))
|
452 |
pass
|
453 |
+
if removeHTML==True: response = re.sub("<(.*?)>","\n", response) # remove HTML-components in general (may cause bugs with markdown-rendering)
|
454 |
yield response
|
455 |
if((myType=="1a")): #add RAG-results to chat-output if appropriate
|
456 |
+
response=response+"\n\n<details><summary><strong>Sources</strong></summary><br><ul>"+ "".join(["<li>" + s + "</li>" for s in combination])+"</ul></details>"
|
457 |
+
yield response
|
458 |
+
# Store current state in DB if memory is turned on
|
459 |
+
if (settings=="Memory On"):
|
|
|
460 |
x=collection.get(include=[])["ids"] # add current dialog to db
|
461 |
collection.add(
|
462 |
documents=[message,response],
|
|
|
471 |
|
472 |
gr.ChatInterface(
|
473 |
response,
|
474 |
+
chatbot=gr.Chatbot(value=[[None,"Herzlich willkommen! Ich bin ein KI-basiertes Assistenzsystem, das für jede Anfrage die am besten geeigneten KI-Tools empfiehlt.\nAktuell bin ich wenig mehr als eine Tech-Demo und kenne nur 7 KI-Modelle - also sei bitte nicht zu streng mit mir.<ul><li>Wenn du ein KI-Modell suchst, antworte ich auf Basis der Liste</li><li>Wenn du Fragen zur Benutzung eines KI-Modells hast, verweise ich an andere Stellen</li><li>Wenn du andre Fragen hast, antworte ich frei und berücksichtige dabei Relevantes aus dem gesamten bisherigen Dialog.</li></ul>\nWas ist dein Anliegen?"]],render_markdown=True),
|
475 |
+
title="German AI-Interface with advanced RAG (on prem)" if onPrem else "German AI-Interface with advanced RAG (HFHub)",
|
476 |
+
#additional_inputs=[gr.Dropdown(["Memory On","Memory Off"],value="Memory Off",label="Memory")]
|
477 |
).queue().launch(share=True) #False, server_name="0.0.0.0", server_port=7864)
|
478 |
print("Interface up and running!")
|
479 |
|
480 |
+
|
481 |
+
|