Spaces:

LennardZuendorf
/

thesis

Runtime error

App Files Files Community

LennardZuendorf commited on Nov 25, 2023

Commit

c4b5a8c

•

1 Parent(s): d19acd3

feat: switched model to Mistral AI 7B

Browse files

Files changed (2) hide show

app.py +45 -6
chatmodel.py +37 -50

app.py CHANGED Viewed

@@ -10,15 +10,54 @@ with gr.Blocks() as ui:
             # Thesis Demo - AI Chat Application with XAI
             ### Select between tabs below for the different views.
             """)
-    with gr.Tab("LlaMa 2 ChatBot"):
         with gr.Row():
             gr.Markdown(
                 """
                 ### ChatBot Demo
-                LlaMa 2 7B Model fine-tuned for chat and transformed to huggingface format (see at [HGF](https://huggingface.co/meta-llama/Llama-2-7b-chat-hf))
                 """)
         with gr.Row():
-            gr.ChatInterface(chat.interference)
     with gr.Tab("SHAP Dashboard"):
         with gr.Row():
@@ -36,12 +75,12 @@ with gr.Blocks() as ui:
                 Visualization Dashboard adopted from [BERTViz](https://github.com/jessevig/bertviz)
                 """)
-    with gr.Tab("LlaMa 2 Model Overview"):
         with gr.Row():
             gr.Markdown(
                 """
-                ### LlaMa 2 Model & Data Overview for Transparency
-                Adopted from official [model paper](https://arxiv.org/abs/2307.09288) by Meta AI
                 """)

             # Thesis Demo - AI Chat Application with XAI
             ### Select between tabs below for the different views.
             """)
+    with gr.Tab("Mistral AI ChatBot"):
         with gr.Row():
             gr.Markdown(
                 """
                 ### ChatBot Demo
+                Mitral AI 7B Model fine-tuned for instruction and fully open source (see at [HGF](https://huggingface.co/mistralai/Mistral-7B-v0.1))
                 """)
         with gr.Row():
+            gr.ChatInterface(
+                chat.interference
+            )
+        with gr.Row():
+            gr.Slider(
+                label="Temperature",
+                value=0.7,
+                minimum=0.0,
+                maximum=1.0,
+                step=0.05,
+                interactive=True,
+                info="Higher values produce more diverse outputs",
+            ),
+            gr.Slider(
+                label="Max new tokens",
+                value=256,
+                minimum=0,
+                maximum=1024,
+                step=64,
+                interactive=True,
+                info="The maximum numbers of new tokens",
+            ),
+            gr.Slider(
+                label="Top-p (nucleus sampling)",
+                value=0.95,
+                minimum=0.0,
+                maximum=1,
+                step=0.05,
+                interactive=True,
+                info="Higher values sample more low-probability tokens",
+            ),
+            gr.Slider(
+                label="Repetition penalty",
+                value=1.1,
+                minimum=1.0,
+                maximum=2.0,
+                step=0.05,
+                interactive=True,
+                info="Penalize repeated tokens",
+            )
     with gr.Tab("SHAP Dashboard"):
         with gr.Row():
                 Visualization Dashboard adopted from [BERTViz](https://github.com/jessevig/bertviz)
                 """)
+    with gr.Tab("Mitral Model Overview"):
         with gr.Row():
             gr.Markdown(
                 """
+                ### Mistral 7B Model & Data Overview for Transparency
+                Adopted from official [model paper](https://arxiv.org/abs/2310.06825) by Mistral AI
                 """)

chatmodel.py CHANGED Viewed

@@ -1,61 +1,48 @@
-from transformers import pipeline
-import torch
-from transformers import AutoTokenizer
 import os
 token = os.environ.get("HGFTOKEN")
-model = "meta-llama/Llama-2-7b-chat-hf"
-tokenizer = AutoTokenizer.from_pretrained(model, token=token)
-llama_pipeline = pipeline(
-    "text-generation",
-    model=model,
-    torch_dtype=torch.float32,
-    device_map="auto",
-    token = token
 )
-# Formatting function for message and history
-def format_message(message: str, history: list, system_prompt:str, memory_limit: int = 3) -> str:
-    if len(history) > memory_limit:
-        history = history[-memory_limit:]
-    system_prompt="<s>[INST] <<SYS>>\n"+system_prompt+"\n<</SYS>>"
-    if len(history) == 0:
-        return system_prompt + f"{message} [/INST]"
-    formatted_message = system_prompt + f"{history[0][0]} [/INST] {history[0][1]} </s>"
-    # Handle conversation history
-    for user_msg, model_answer in history[1:]:
-        formatted_message += f"<s>[INST] {user_msg} [/INST] {model_answer} </s>"
-    # Handle the current message
-    formatted_message += f"<s>[INST] {message} [/INST]"
-    return formatted_message
-# Generate a response from the Llama model
-def interference(message: str, history: list, ) -> str:
-    system_prompt="You are a helpful assistant providing reasonable answers."
-    query = format_message(message, history, system_prompt)
-    response = ""
-    sequences = llama_pipeline(
-        query,
-        do_sample=True,
-        top_k=10,
-        num_return_sequences=1,
-        eos_token_id=tokenizer.eos_token_id,
-        max_length=1024,
-    )
-    generated_text = sequences[0]['generated_text']
-    response = generated_text[len(query):]  # Remove the prompt from the output
-    print("Chatbot:", response.strip())
-    return response.strip()

+from huggingface_hub import InferenceClient
 import os
+import gradio as gr
 token = os.environ.get("HGFTOKEN")
+client = InferenceClient(
+    "mistralai/Mistral-7B-Instruct-v0.1"
 )
+def format_prompt(message, history):
+    prompt = "<s>"
+    for user_prompt, bot_response in history:
+        prompt += f"[INST] {user_prompt} [/INST]"
+        prompt += f" {bot_response}</s> "
+    prompt += f"[INST] {message} [/INST]"
+    return prompt
+def interference(
+        prompt, history, temperature=0.7, max_new_tokens=256, top_p=0.95, repetition_penalty=1.1,
+):
+    temperature = float(temperature)
+    if temperature < 1e-2:
+        temperature = 1e-2
+    top_p = float(top_p)
+    generate_kwargs = dict(
+        temperature=temperature,
+        max_new_tokens=max_new_tokens,
+        top_p=top_p,
+        repetition_penalty=repetition_penalty,
+        do_sample=True,
+        seed=42,
+    )
+    formatted_prompt = format_prompt(prompt, history)
+    stream = client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False)
+    output = ""
+    for response in stream:
+        output += response.token.text
+        yield output
+    return output
+custom=[
+]