artificialguybr commited on
Commit
f6fc9d2
·
verified ·
1 Parent(s): 4f7a72f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +9 -4
app.py CHANGED
@@ -1,6 +1,7 @@
 
1
  import os
2
  import torch
3
- from transformers import AutoTokenizer, AutoModelForCausalLM
4
  import gradio as gr
5
  from threading import Thread
6
 
@@ -32,6 +33,7 @@ tokenizer = AutoTokenizer.from_pretrained(MODEL, trust_remote_code=True)
32
  model = AutoModelForCausalLM.from_pretrained(MODEL, torch_dtype=torch.bfloat16, trust_remote_code=True, device_map="auto")
33
  model = model.eval()
34
 
 
35
  def stream_chat(
36
  message: str,
37
  history: list,
@@ -44,12 +46,10 @@ def stream_chat(
44
  print(f'message: {message}')
45
  print(f'history: {history}')
46
 
47
- # Prepare the conversation history
48
  chat_history = []
49
  for prompt, answer in history:
50
  chat_history.append((prompt, answer))
51
 
52
- # Generate the response
53
  for response, _ in model.stream_chat(
54
  tokenizer,
55
  message,
@@ -70,11 +70,12 @@ with gr.Blocks(css=CSS, theme="soft") as demo:
70
  fn=stream_chat,
71
  chatbot=chatbot,
72
  fill_height=True,
73
- additional_inputs_accordion=gr.Accordion(label="⚙️ Parameters", open=False),
74
  additional_inputs=[
75
  gr.Textbox(
76
  value="You are a helpful assistant capable of generating long-form content.",
77
  label="System Prompt",
 
78
  ),
79
  gr.Slider(
80
  minimum=0,
@@ -82,6 +83,7 @@ with gr.Blocks(css=CSS, theme="soft") as demo:
82
  step=0.1,
83
  value=0.5,
84
  label="Temperature",
 
85
  ),
86
  gr.Slider(
87
  minimum=1024,
@@ -89,6 +91,7 @@ with gr.Blocks(css=CSS, theme="soft") as demo:
89
  step=1024,
90
  value=32768,
91
  label="Max new tokens",
 
92
  ),
93
  gr.Slider(
94
  minimum=0.0,
@@ -96,6 +99,7 @@ with gr.Blocks(css=CSS, theme="soft") as demo:
96
  step=0.1,
97
  value=1.0,
98
  label="Top p",
 
99
  ),
100
  gr.Slider(
101
  minimum=1,
@@ -103,6 +107,7 @@ with gr.Blocks(css=CSS, theme="soft") as demo:
103
  step=1,
104
  value=50,
105
  label="Top k",
 
106
  ),
107
  ],
108
  examples=[
 
1
+ import spaces
2
  import os
3
  import torch
4
+ from transformers import AutoTokenizer, AutoModelForCausalLM, TextIteratorStreamer
5
  import gradio as gr
6
  from threading import Thread
7
 
 
33
  model = AutoModelForCausalLM.from_pretrained(MODEL, torch_dtype=torch.bfloat16, trust_remote_code=True, device_map="auto")
34
  model = model.eval()
35
 
36
+ @spaces.GPU
37
  def stream_chat(
38
  message: str,
39
  history: list,
 
46
  print(f'message: {message}')
47
  print(f'history: {history}')
48
 
 
49
  chat_history = []
50
  for prompt, answer in history:
51
  chat_history.append((prompt, answer))
52
 
 
53
  for response, _ in model.stream_chat(
54
  tokenizer,
55
  message,
 
70
  fn=stream_chat,
71
  chatbot=chatbot,
72
  fill_height=True,
73
+ additional_inputs_accordion=gr.Accordion(label="⚙️ Parameters", open=False, render=False),
74
  additional_inputs=[
75
  gr.Textbox(
76
  value="You are a helpful assistant capable of generating long-form content.",
77
  label="System Prompt",
78
+ render=False,
79
  ),
80
  gr.Slider(
81
  minimum=0,
 
83
  step=0.1,
84
  value=0.5,
85
  label="Temperature",
86
+ render=False,
87
  ),
88
  gr.Slider(
89
  minimum=1024,
 
91
  step=1024,
92
  value=32768,
93
  label="Max new tokens",
94
+ render=False,
95
  ),
96
  gr.Slider(
97
  minimum=0.0,
 
99
  step=0.1,
100
  value=1.0,
101
  label="Top p",
102
+ render=False,
103
  ),
104
  gr.Slider(
105
  minimum=1,
 
107
  step=1,
108
  value=50,
109
  label="Top k",
110
+ render=False,
111
  ),
112
  ],
113
  examples=[