Spaces:
Running
on
Zero
Running
on
Zero
Locutusque
commited on
Commit
•
3f2900f
1
Parent(s):
1b4ae3b
Upload app.py
Browse files
app.py
ADDED
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
from transformers import pipeline, set_seed
|
3 |
+
import torch
|
4 |
+
|
5 |
+
# Function to generate responses using the entire conversation history
|
6 |
+
def generate_response(messages, model_name, sampling_temperature, max_tokens, top_p):
|
7 |
+
generator = pipeline('text-generation', model=model_name, torch_dtype=torch.float16)
|
8 |
+
set_seed(42) # You can set a different seed for reproducibility
|
9 |
+
|
10 |
+
# Combine entire conversation history
|
11 |
+
conversation = ""
|
12 |
+
for message in messages:
|
13 |
+
role = message['role']
|
14 |
+
content = message['content']
|
15 |
+
conversation += f"<|im_start|>{role}\n{content}<|im_end|>\n"
|
16 |
+
|
17 |
+
# Generate response
|
18 |
+
response = generator(conversation, max_length=2048, temperature=sampling_temperature, max_tokens=max_tokens, top_p=top_p, repetition_penalty=1.1, top_k=12)
|
19 |
+
|
20 |
+
return [{'content': response[0]['generated_text'], 'role': 'assistant'}]
|
21 |
+
|
22 |
+
# Gradio chatbot interface with conversation history
|
23 |
+
iface = gr.Interface(
|
24 |
+
fn=generate_response,
|
25 |
+
inputs=[
|
26 |
+
gr.Chat("You", "Chatbot"),
|
27 |
+
gr.Dropdown("Select Model", ["Locutusque/TinyMistral-248M-v2.5-Instruct", "Locutusque/Hercules-1.0-Mistral-7B", "Locutusque/UltraQwen-1_8B"]),
|
28 |
+
gr.Slider("Sampling Temperature", 0.1, 2.0, 1.0, 0.1),
|
29 |
+
gr.Slider("Max Tokens", 5, 200, 50, 5),
|
30 |
+
gr.Slider("Top P", 0.1, 0.5, 0.75, 0.1)
|
31 |
+
],
|
32 |
+
outputs=gr.Chat(role="Chatbot"),
|
33 |
+
live=True,
|
34 |
+
capture_session=True
|
35 |
+
)
|
36 |
+
|
37 |
+
# Launch Gradio chatbot interface
|
38 |
+
iface.launch()
|
39 |
+
|