rubenroy commited on
Commit
4706d9e
Β·
verified Β·
1 Parent(s): 7786282

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +66 -58
app.py CHANGED
@@ -12,10 +12,10 @@ model = AutoModelForCausalLM.from_pretrained(
12
  tokenizer = AutoTokenizer.from_pretrained(model_name)
13
 
14
  @spaces.GPU
15
- def generate(prompt, history, temperature, top_p, top_k, max_new_tokens, repetition_penalty):
16
  messages = [
17
  {"role": "system", "content": "You are Zurich, a 7 billion parameter Large Language model built on the Qwen 2.5 7B model developed by Alibaba Cloud, and fine-tuned by Ruben Roy. You have been fine-tuned with the GammaCorpus v2 dataset, a dataset filled with structured and filtered multi-turn conversations and was also created by Ruben Roy. You are a helpful assistant."},
18
- {"role": "user", "content": prompt}
19
  ]
20
  text = tokenizer.apply_chat_template(
21
  messages,
@@ -25,12 +25,12 @@ def generate(prompt, history, temperature, top_p, top_k, max_new_tokens, repetit
25
  model_inputs = tokenizer([text], return_tensors="pt").to(model.device)
26
  generated_ids = model.generate(
27
  **model_inputs,
28
- temperature=temperature,
29
- top_p=top_p,
30
- top_k=top_k,
31
- max_new_tokens=max_new_tokens,
32
- repetition_penalty=repetition_penalty,
33
- do_sample=True if temperature > 0 else False
34
  )
35
  generated_ids = [
36
  output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
@@ -162,63 +162,71 @@ examples = [
162
  ["What are the key differences between machine learning and deep learning?"]
163
  ]
164
 
165
- def create_generation_settings():
166
- with gr.Group():
167
- with gr.Accordion("Generation Settings", open=False):
168
- temperature = gr.Slider(
169
- minimum=0.0,
170
- maximum=2.0,
171
- value=0.7,
172
- step=0.1,
173
- label="Temperature",
174
- info="Higher values make the output more random, lower values make it more focused and deterministic"
175
- )
176
- top_p = gr.Slider(
177
- minimum=0.0,
178
- maximum=1.0,
179
- value=0.9,
180
- step=0.05,
181
- label="Top P",
182
- info="Used for nucleus sampling - controls the cumulative probability of tokens to consider"
183
- )
184
- top_k = gr.Slider(
185
- minimum=1,
186
- maximum=100,
187
- value=50,
188
- step=1,
189
- label="Top K",
190
- info="Limits the number of tokens to consider for each step of text generation"
191
- )
192
- max_new_tokens = gr.Slider(
193
- minimum=1,
194
- maximum=2048,
195
- value=512,
196
- step=1,
197
- label="Max New Tokens",
198
- info="Maximum number of tokens to generate in the response"
199
- )
200
- repetition_penalty = gr.Slider(
201
- minimum=1.0,
202
- maximum=2.0,
203
- value=1.1,
204
- step=0.1,
205
- label="Repetition Penalty",
206
- info="Higher values prevent the model from repeating the same information"
207
- )
208
- return temperature, top_p, top_k, max_new_tokens, repetition_penalty
209
-
210
  with gr.Blocks() as demo:
211
  gr.HTML(TITLE_HTML)
212
 
213
- # Create generation settings
214
- temperature, top_p, top_k, max_new_tokens, repetition_penalty = create_generation_settings()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
215
 
216
- # Create the chat interface with the additional parameters
217
  chatbot = gr.ChatInterface(
218
- fn=lambda msg, history: generate(msg, history, temperature.value, top_p.value, top_k.value, max_new_tokens.value, repetition_penalty.value),
 
 
 
 
 
 
 
219
  examples=examples,
220
  title="Chat with Zurich",
221
- description="Ask me anything! I'm here to help with explanations, coding, math, writing, and more.",
222
  )
223
 
224
  demo.launch(share=True)
 
12
  tokenizer = AutoTokenizer.from_pretrained(model_name)
13
 
14
  @spaces.GPU
15
+ def generate(message, chat_history, temperature=0.7, top_p=0.9, top_k=50, max_new_tokens=512, repetition_penalty=1.1):
16
  messages = [
17
  {"role": "system", "content": "You are Zurich, a 7 billion parameter Large Language model built on the Qwen 2.5 7B model developed by Alibaba Cloud, and fine-tuned by Ruben Roy. You have been fine-tuned with the GammaCorpus v2 dataset, a dataset filled with structured and filtered multi-turn conversations and was also created by Ruben Roy. You are a helpful assistant."},
18
+ {"role": "user", "content": message}
19
  ]
20
  text = tokenizer.apply_chat_template(
21
  messages,
 
25
  model_inputs = tokenizer([text], return_tensors="pt").to(model.device)
26
  generated_ids = model.generate(
27
  **model_inputs,
28
+ temperature=float(temperature),
29
+ top_p=float(top_p),
30
+ top_k=int(top_k),
31
+ max_new_tokens=int(max_new_tokens),
32
+ repetition_penalty=float(repetition_penalty),
33
+ do_sample=True if float(temperature) > 0 else False
34
  )
35
  generated_ids = [
36
  output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
 
162
  ["What are the key differences between machine learning and deep learning?"]
163
  ]
164
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
165
  with gr.Blocks() as demo:
166
  gr.HTML(TITLE_HTML)
167
 
168
+ with gr.Accordion("Generation Settings", open=False):
169
+ with gr.Row():
170
+ with gr.Column():
171
+ temperature = gr.Slider(
172
+ minimum=0.0,
173
+ maximum=2.0,
174
+ value=0.7,
175
+ step=0.1,
176
+ label="Temperature",
177
+ info="Higher values make the output more random, lower values make it more focused and deterministic",
178
+ interactive=True
179
+ )
180
+ top_p = gr.Slider(
181
+ minimum=0.0,
182
+ maximum=1.0,
183
+ value=0.9,
184
+ step=0.05,
185
+ label="Top P",
186
+ info="Controls the cumulative probability threshold for nucleus sampling",
187
+ interactive=True
188
+ )
189
+ top_k = gr.Slider(
190
+ minimum=1,
191
+ maximum=100,
192
+ value=50,
193
+ step=1,
194
+ label="Top K",
195
+ info="Limits the number of tokens to consider for each generation step",
196
+ interactive=True
197
+ )
198
+ with gr.Column():
199
+ max_new_tokens = gr.Slider(
200
+ minimum=1,
201
+ maximum=2048,
202
+ value=512,
203
+ step=1,
204
+ label="Max New Tokens",
205
+ info="Maximum number of tokens to generate in the response",
206
+ interactive=True
207
+ )
208
+ repetition_penalty = gr.Slider(
209
+ minimum=1.0,
210
+ maximum=2.0,
211
+ value=1.1,
212
+ step=0.1,
213
+ label="Repetition Penalty",
214
+ info="Higher values prevent the model from repeating the same information",
215
+ interactive=True
216
+ )
217
 
 
218
  chatbot = gr.ChatInterface(
219
+ fn=generate,
220
+ additional_inputs=[
221
+ temperature,
222
+ top_p,
223
+ top_k,
224
+ max_new_tokens,
225
+ repetition_penalty
226
+ ],
227
  examples=examples,
228
  title="Chat with Zurich",
229
+ description="Ask me anything! I'm here to help with explanations, coding, math, writing, and more."
230
  )
231
 
232
  demo.launch(share=True)