Spaces:
Running
on
Zero
Running
on
Zero
artificialguybr
commited on
Update app.py
Browse files
app.py
CHANGED
@@ -1,6 +1,7 @@
|
|
|
|
1 |
import os
|
2 |
import torch
|
3 |
-
from transformers import AutoTokenizer, AutoModelForCausalLM
|
4 |
import gradio as gr
|
5 |
from threading import Thread
|
6 |
|
@@ -32,6 +33,7 @@ tokenizer = AutoTokenizer.from_pretrained(MODEL, trust_remote_code=True)
|
|
32 |
model = AutoModelForCausalLM.from_pretrained(MODEL, torch_dtype=torch.bfloat16, trust_remote_code=True, device_map="auto")
|
33 |
model = model.eval()
|
34 |
|
|
|
35 |
def stream_chat(
|
36 |
message: str,
|
37 |
history: list,
|
@@ -44,12 +46,10 @@ def stream_chat(
|
|
44 |
print(f'message: {message}')
|
45 |
print(f'history: {history}')
|
46 |
|
47 |
-
# Prepare the conversation history
|
48 |
chat_history = []
|
49 |
for prompt, answer in history:
|
50 |
chat_history.append((prompt, answer))
|
51 |
|
52 |
-
# Generate the response
|
53 |
for response, _ in model.stream_chat(
|
54 |
tokenizer,
|
55 |
message,
|
@@ -70,11 +70,12 @@ with gr.Blocks(css=CSS, theme="soft") as demo:
|
|
70 |
fn=stream_chat,
|
71 |
chatbot=chatbot,
|
72 |
fill_height=True,
|
73 |
-
additional_inputs_accordion=gr.Accordion(label="⚙️ Parameters", open=False),
|
74 |
additional_inputs=[
|
75 |
gr.Textbox(
|
76 |
value="You are a helpful assistant capable of generating long-form content.",
|
77 |
label="System Prompt",
|
|
|
78 |
),
|
79 |
gr.Slider(
|
80 |
minimum=0,
|
@@ -82,6 +83,7 @@ with gr.Blocks(css=CSS, theme="soft") as demo:
|
|
82 |
step=0.1,
|
83 |
value=0.5,
|
84 |
label="Temperature",
|
|
|
85 |
),
|
86 |
gr.Slider(
|
87 |
minimum=1024,
|
@@ -89,6 +91,7 @@ with gr.Blocks(css=CSS, theme="soft") as demo:
|
|
89 |
step=1024,
|
90 |
value=32768,
|
91 |
label="Max new tokens",
|
|
|
92 |
),
|
93 |
gr.Slider(
|
94 |
minimum=0.0,
|
@@ -96,6 +99,7 @@ with gr.Blocks(css=CSS, theme="soft") as demo:
|
|
96 |
step=0.1,
|
97 |
value=1.0,
|
98 |
label="Top p",
|
|
|
99 |
),
|
100 |
gr.Slider(
|
101 |
minimum=1,
|
@@ -103,6 +107,7 @@ with gr.Blocks(css=CSS, theme="soft") as demo:
|
|
103 |
step=1,
|
104 |
value=50,
|
105 |
label="Top k",
|
|
|
106 |
),
|
107 |
],
|
108 |
examples=[
|
|
|
1 |
+
import spaces
|
2 |
import os
|
3 |
import torch
|
4 |
+
from transformers import AutoTokenizer, AutoModelForCausalLM, TextIteratorStreamer
|
5 |
import gradio as gr
|
6 |
from threading import Thread
|
7 |
|
|
|
33 |
model = AutoModelForCausalLM.from_pretrained(MODEL, torch_dtype=torch.bfloat16, trust_remote_code=True, device_map="auto")
|
34 |
model = model.eval()
|
35 |
|
36 |
+
@spaces.GPU
|
37 |
def stream_chat(
|
38 |
message: str,
|
39 |
history: list,
|
|
|
46 |
print(f'message: {message}')
|
47 |
print(f'history: {history}')
|
48 |
|
|
|
49 |
chat_history = []
|
50 |
for prompt, answer in history:
|
51 |
chat_history.append((prompt, answer))
|
52 |
|
|
|
53 |
for response, _ in model.stream_chat(
|
54 |
tokenizer,
|
55 |
message,
|
|
|
70 |
fn=stream_chat,
|
71 |
chatbot=chatbot,
|
72 |
fill_height=True,
|
73 |
+
additional_inputs_accordion=gr.Accordion(label="⚙️ Parameters", open=False, render=False),
|
74 |
additional_inputs=[
|
75 |
gr.Textbox(
|
76 |
value="You are a helpful assistant capable of generating long-form content.",
|
77 |
label="System Prompt",
|
78 |
+
render=False,
|
79 |
),
|
80 |
gr.Slider(
|
81 |
minimum=0,
|
|
|
83 |
step=0.1,
|
84 |
value=0.5,
|
85 |
label="Temperature",
|
86 |
+
render=False,
|
87 |
),
|
88 |
gr.Slider(
|
89 |
minimum=1024,
|
|
|
91 |
step=1024,
|
92 |
value=32768,
|
93 |
label="Max new tokens",
|
94 |
+
render=False,
|
95 |
),
|
96 |
gr.Slider(
|
97 |
minimum=0.0,
|
|
|
99 |
step=0.1,
|
100 |
value=1.0,
|
101 |
label="Top p",
|
102 |
+
render=False,
|
103 |
),
|
104 |
gr.Slider(
|
105 |
minimum=1,
|
|
|
107 |
step=1,
|
108 |
value=50,
|
109 |
label="Top k",
|
110 |
+
render=False,
|
111 |
),
|
112 |
],
|
113 |
examples=[
|