test_mistral_7b_on_cpu

Sleeping

thobuiq commited on Jan 27

Commit

62c56f5

•

1 Parent(s): d15855c

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,35 +1,34 @@
 import os
-import chainlit as cl
 from ctransformers import AutoModelForCausalLM
-# Runs when the chat starts
-@cl.on_chat_start
-def main():
-    # Create the llm
-    llm = AutoModelForCausalLM.from_pretrained("TheBloke/Mistral-7B-Instruct-v0.1-GGUF",
-                                               model_file="mistral-7b-instruct-v0.1.Q4_K_M.gguf",
-                                               model_type="mistral",
-                                               temperature=0.7,
-                                               gpu_layers=0,
-                                               stream=True,
-                                               threads=int(os.cpu_count() / 2),
-                                               max_new_tokens=10000)
-    # Store the llm in the user session
-    cl.user_session.set("llm", llm)
-# Runs when a message is sent
-@cl.on_message
-async def main(message: cl.Message):
-    # Retrieve the chain from the user session
-    llm = cl.user_session.get("llm")
-    msg = cl.Message(
-        content="",
-    )
-    prompt = f"[INST]{message.content}[/INST]"
     for text in llm(prompt=prompt):
-        await msg.stream_token(text)
-    await msg.send()

+import gradio as gr
+from threading import Thread
 import os
 from ctransformers import AutoModelForCausalLM
+llm = AutoModelForCausalLM.from_pretrained("TheBloke/Mistral-7B-Instruct-v0.1-GGUF",
+                                            model_file="mistral-7b-instruct-v0.1.Q4_K_M.gguf",
+                                            model_type="mistral",
+                                            temperature=0.7,
+                                            gpu_layers=0,
+                                            stream=True,
+                                            threads=int(os.cpu_count() / 2),
+                                            max_new_tokens=10000)
+# Function to generate model predictions.
+def predict(message, history):
+    history_transformer_format = history + [[message, ""]]
+    # Formatting the input for the model.
+    messages = "</s>".join(["</s>".join(["\n<|user|>:" + item[0], "\n<|assistant|>:" + item[1]])
+                        for item in history_transformer_format])
+    prompt = f"[INST]{messages.content}[/INST]"
     for text in llm(prompt=prompt):
+        yield text
+# Setting up the Gradio chat interface.
+gr.ChatInterface(predict,
+                 title="Test Mistral 7B",
+                 description="Ask Mistral any questions",
+                 examples=['How to cook a fish?', 'Who is the president of US now?']
+                 ).launch()  # Launching the web interface.