AI-RAG-Interface-to-Hub

Running

App Files Files Community

AFischer1985 commited on Feb 27

Commit

125389b

•

1 Parent(s): 8d8b439

Update run.py

Browse files

Files changed (1) hide show

run.py +55 -8

run.py CHANGED Viewed

@@ -73,13 +73,60 @@ client = InferenceClient(
 import gradio as gr
 import json
-def format_prompt(message, history):
-  prompt = "<s>"
-  #for user_prompt, bot_response in history:
-  #  prompt += f"[INST] {user_prompt} [/INST]"
-  #  prompt += f" {bot_response}</s> "
-  prompt += f"[INST] {message} [/INST]"
-  return prompt
 def response(
     prompt, history, temperature=0.9, max_new_tokens=500, top_p=0.95, repetition_penalty=1.0,
@@ -112,7 +159,7 @@ def response(
       addon=" Bitte berücksichtige bei deiner Antwort ggf. folgende Auszüge aus unserer Datenbank, sofern sie für die Antwort erforderlich sind. Beantworte die Frage knapp und präzise. Ignoriere unpassende Datenbank-Auszüge OHNE sie zu kommentieren, zu erwähnen oder aufzulisten:\n"+"\n".join(results)
     system="Du bist ein deutschsprachiges KI-basiertes Assistenzsystem, das zu jedem Anliegen möglichst geeignete KI-Tools empfiehlt."+addon+"\n\nUser-Anliegen:"
     #body={"prompt":system+"### Instruktion:\n"+message+"\n\n### Antwort:","max_tokens":500, "echo":"False","stream":"True"} #e.g. SauerkrautLM
-    formatted_prompt = format_prompt(system+"\n"+prompt, history)
     stream = client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False)
     output = ""
     for response in stream:

 import gradio as gr
 import json
+def extend_prompt(message="", history=None, system=None, RAGAddon=None, system2=None, zeichenlimit=None,historylimit=4, removeHTML=False):
+  startOfString=""
+  if zeichenlimit is None: zeichenlimit=1000000000 # :-)
+  template0=" [INST]{system}\n  [/INST] </s>"
+  template1=" [INST] {message} [/INST]"
+  template2=" {response}</s>"
+  if("Gemma-" in modelPath): # https://huggingface.co/mistralai/Mixtral-8x7B-Instruct-v0.1
+    template0="<start_of_turn>user{system}</end_of_turn>"
+    template1="<start_of_turn>user{message}</end_of_turn><start_of_turn>model"
+    template2="{response}</end_of_turn>"
+  if("Mixtral-8x7b-instruct" in modelPath): # https://huggingface.co/mistralai/Mixtral-8x7B-Instruct-v0.1
+    startOfString="<s>"
+    template0=" [INST]{system}\n  [/INST] </s>"
+    template1=" [INST] {message} [/INST]"
+    template2=" {response}</s>"
+  if("Mistral-7B-Instruct" in modelPath): #https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2
+    startOfString="<s>"
+    template0="[INST]{system}\n [/INST]</s>"
+    template1="[INST] {message} [/INST]"
+    template2=" {response}</s>"
+  if("Openchat-3.5" in modelPath): #https://huggingface.co/TheBloke/openchat-3.5-0106-GGUF
+    template0="GPT4 Correct User: {system}<|end_of_turn|>GPT4 Correct Assistant: Okay.<|end_of_turn|>"
+    template1="GPT4 Correct User: {message}<|end_of_turn|>GPT4 Correct Assistant: "
+    template2="{response}<|end_of_turn|>"
+  if(("Discolm_german_7b" in modelPath) or ("SauerkrautLM-7b-HerO" in modelPath)):  #https://huggingface.co/VAGOsolutions/SauerkrautLM-7b-HerO
+    template0="<|im_start|>system\n{system}<|im_end|>\n"
+    template1="<|im_start|>user\n{message}<|im_end|>\n<|im_start|>assistant\n"
+    template2="{response}<|im_end|>\n"
+  if("WizardLM-13B-V1.2" in modelPath): #https://huggingface.co/WizardLM/WizardLM-13B-V1.2
+    template0="{system} " #<s>
+    template1="USER: {message} ASSISTANT: "
+    template2="{response}</s>"
+  if("Phi-2" in modelPath): #https://huggingface.co/TheBloke/phi-2-GGUF
+    template0="Instruct: {system}\nOutput: Okay.\n"
+    template1="Instruct: {message}\nOutput:"
+    template2="{response}\n"
+  prompt = ""
+  if RAGAddon is not None:
+    system += RAGAddon
+  if system is not None:
+    prompt += template0.format(system=system) #"<s>"
+  if history is not None:
+    for user_message, bot_response in history[-historylimit:]:
+      if user_message is None: user_message = ""
+      if bot_response is None: bot_response = ""
+      bot_response = re.sub("\n\n<details>((.|\n)*?)</details>","", bot_response) # remove RAG-compontents
+      if removeHTML==True: bot_response = re.sub("<(.*?)>","\n", bot_response) # remove HTML-components in general (may cause bugs with markdown-rendering)
+      if user_message is not None: prompt += template1.format(message=user_message[:zeichenlimit])
+      if bot_response is not None: prompt += template2.format(response=bot_response[:zeichenlimit])
+  if message is not None: prompt += template1.format(message=message[:zeichenlimit])
+  if system2 is not None:
+    prompt += system2
+  return startOfString+prompt
 def response(
     prompt, history, temperature=0.9, max_new_tokens=500, top_p=0.95, repetition_penalty=1.0,
       addon=" Bitte berücksichtige bei deiner Antwort ggf. folgende Auszüge aus unserer Datenbank, sofern sie für die Antwort erforderlich sind. Beantworte die Frage knapp und präzise. Ignoriere unpassende Datenbank-Auszüge OHNE sie zu kommentieren, zu erwähnen oder aufzulisten:\n"+"\n".join(results)
     system="Du bist ein deutschsprachiges KI-basiertes Assistenzsystem, das zu jedem Anliegen möglichst geeignete KI-Tools empfiehlt."+addon+"\n\nUser-Anliegen:"
     #body={"prompt":system+"### Instruktion:\n"+message+"\n\n### Antwort:","max_tokens":500, "echo":"False","stream":"True"} #e.g. SauerkrautLM
+    formatted_prompt = extend_prompt(system+"\n"+prompt, history)
     stream = client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False)
     output = ""
     for response in stream: