ehristoforu commited on
Commit
c2dd1cb
β€’
1 Parent(s): bd77505

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +55 -63
app.py CHANGED
@@ -39,10 +39,10 @@ model.config.sliding_window = 4096
39
  model.eval()
40
 
41
 
42
- @spaces.GPU(duration=50)
43
  def generate(
44
  message: str,
45
- system_prompt: str,
46
  max_new_tokens: int = 1024,
47
  temperature: float = 0.6,
48
  top_p: float = 0.9,
@@ -50,7 +50,13 @@ def generate(
50
  repetition_penalty: float = 1.2,
51
  ) -> Iterator[str]:
52
  conversation = []
53
- conversation.append({"role": "system", "content": system_prompt})
 
 
 
 
 
 
54
  conversation.append({"role": "user", "content": message})
55
 
56
  input_ids = tokenizer.apply_chat_template(conversation, add_generation_prompt=True, return_tensors="pt")
@@ -79,68 +85,54 @@ def generate(
79
  outputs.append(text)
80
  yield "".join(outputs)
81
 
82
- message = gr.Textbox(
83
- label="Message",
84
- max_lines=5,
85
- lines=2,
86
- interactive=True,
87
- )
88
 
89
- system_prompt = gr.Textbox(
90
- label="System prompt",
91
- max_lines=5,
92
- lines=2,
93
- interactive=True,
94
- )
95
- max_tokens = gr.Slider(
96
- label="Max new tokens",
97
- minimum=1,
98
- maximum=MAX_MAX_NEW_TOKENS,
99
- step=1,
100
- value=DEFAULT_MAX_NEW_TOKENS,
101
- )
102
- temperature = gr.Slider(
103
- label="Temperature",
104
- minimum=0.1,
105
- maximum=4.0,
106
- step=0.1,
107
- value=0.6,
108
- )
109
- top_p = gr.Slider(
110
- label="Top-p (nucleus sampling)",
111
- minimum=0.05,
112
- maximum=1.0,
113
- step=0.05,
114
- value=0.9,
115
- )
116
- top_k = gr.Slider(
117
- label="Top-k",
118
- minimum=1,
119
- maximum=1000,
120
- step=1,
121
- value=50,
122
- )
123
- repeat_penalty = gr.Slider(
124
- label="Repetition penalty",
125
- minimum=1.0,
126
- maximum=2.0,
127
- step=0.05,
128
- value=1.2,
129
- )
130
-
131
- output = gr.Textbox(
132
- label="Output",
133
- max_lines=16,
134
- lines=10,
135
- interactive=True,
136
- )
137
-
138
- chat_interface = gr.Interface(
139
  fn=generate,
140
- inputs=[message, system_prompt, max_tokens, temperature, top_p, top_k, repeat_penalty],
141
- outputs=output,
142
- api_name="/run",
143
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
144
  )
145
 
146
  with gr.Blocks(css="style.css", fill_height=True) as demo:
 
39
  model.eval()
40
 
41
 
42
+ @spaces.GPU(duration=90)
43
  def generate(
44
  message: str,
45
+ chat_history: list[tuple[str, str]],
46
  max_new_tokens: int = 1024,
47
  temperature: float = 0.6,
48
  top_p: float = 0.9,
 
50
  repetition_penalty: float = 1.2,
51
  ) -> Iterator[str]:
52
  conversation = []
53
+ for user, assistant in chat_history:
54
+ conversation.extend(
55
+ [
56
+ {"role": "user", "content": user},
57
+ {"role": "assistant", "content": assistant},
58
+ ]
59
+ )
60
  conversation.append({"role": "user", "content": message})
61
 
62
  input_ids = tokenizer.apply_chat_template(conversation, add_generation_prompt=True, return_tensors="pt")
 
85
  outputs.append(text)
86
  yield "".join(outputs)
87
 
 
 
 
 
 
 
88
 
89
+ chat_interface = gr.ChatInterface(
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
90
  fn=generate,
91
+ additional_inputs=[
92
+ gr.Slider(
93
+ label="Max new tokens",
94
+ minimum=1,
95
+ maximum=MAX_MAX_NEW_TOKENS,
96
+ step=1,
97
+ value=DEFAULT_MAX_NEW_TOKENS,
98
+ ),
99
+ gr.Slider(
100
+ label="Temperature",
101
+ minimum=0.1,
102
+ maximum=4.0,
103
+ step=0.1,
104
+ value=0.6,
105
+ ),
106
+ gr.Slider(
107
+ label="Top-p (nucleus sampling)",
108
+ minimum=0.05,
109
+ maximum=1.0,
110
+ step=0.05,
111
+ value=0.9,
112
+ ),
113
+ gr.Slider(
114
+ label="Top-k",
115
+ minimum=1,
116
+ maximum=1000,
117
+ step=1,
118
+ value=50,
119
+ ),
120
+ gr.Slider(
121
+ label="Repetition penalty",
122
+ minimum=1.0,
123
+ maximum=2.0,
124
+ step=0.05,
125
+ value=1.2,
126
+ ),
127
+ ],
128
+ stop_btn=None,
129
+ examples=[
130
+ ["Hello there! How are you doing?"],
131
+ ["Can you explain briefly to me what is the Python programming language?"],
132
+ ["Explain the plot of Cinderella in a sentence."],
133
+ ["How many hours does it take a man to eat a Helicopter?"],
134
+ ["Write a 100-word article on 'Benefits of Open-Source in AI research'"],
135
+ ],
136
  )
137
 
138
  with gr.Blocks(css="style.css", fill_height=True) as demo: