Tonic commited on
Commit
ea82e95
1 Parent(s): ca5433e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +6 -4
app.py CHANGED
@@ -17,7 +17,8 @@ model = AutoModelForCausalLM.from_pretrained("larryvrh/Yi-34B-200K-Llamafied", d
17
  # tokenizer = YiTokenizer.from_pretrained("./")
18
  # model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto", torch_dtype="auto", trust_remote_code=True, quantization_config=gptq_config)
19
 
20
- def predict(message, max_new_tokens=4056, temperature=3.5, top_p=0.9, top_k=800):
 
21
  prompt = message.strip()
22
  input_ids = tokenizer.encode(prompt, return_tensors='pt')
23
  input_ids = input_ids.to(model.device)
@@ -28,7 +29,7 @@ def predict(message, max_new_tokens=4056, temperature=3.5, top_p=0.9, top_k=800)
28
  top_p=top_p,
29
  top_k=top_k,
30
  pad_token_id=tokenizer.eos_token_id,
31
- do_sample=True
32
  )
33
  response = tokenizer.decode(response_ids[:, input_ids.shape[-1]:][0], skip_special_tokens=True)
34
  return [("bot", response)]
@@ -47,14 +48,15 @@ with gr.Blocks(theme='ParityError/Anime') as demo:
47
  chatbot = gr.Chatbot(label='TonicYi-30B-200K')
48
 
49
  with gr.Accordion(label='Advanced options', open=False):
50
- max_new_tokens = gr.Slider(label='Max New Tokens', minimum=1, maximum=55000, step=1, value=3800)
51
  temperature = gr.Slider(label='Temperature', minimum=0.1, maximum=4.0, step=0.1, value=1.2)
52
  top_p = gr.Slider(label='Top-P (nucleus sampling)', minimum=0.05, maximum=1.0, step=0.05, value=0.9)
53
  top_k = gr.Slider(label='Top-K', minimum=1, maximum=1000, step=1, value=900)
 
54
 
55
  submit_button.click(
56
  fn=predict,
57
- inputs=[textbox, max_new_tokens, temperature, top_p, top_k],
58
  outputs=chatbot
59
  )
60
 
 
17
  # tokenizer = YiTokenizer.from_pretrained("./")
18
  # model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto", torch_dtype="auto", trust_remote_code=True, quantization_config=gptq_config)
19
 
20
+ def predict(message, max_new_tokens=4056, temperature=3.5, top_p=0.9, top_k=800, do_sample=False):
21
+
22
  prompt = message.strip()
23
  input_ids = tokenizer.encode(prompt, return_tensors='pt')
24
  input_ids = input_ids.to(model.device)
 
29
  top_p=top_p,
30
  top_k=top_k,
31
  pad_token_id=tokenizer.eos_token_id,
32
+ do_sample=do_sample
33
  )
34
  response = tokenizer.decode(response_ids[:, input_ids.shape[-1]:][0], skip_special_tokens=True)
35
  return [("bot", response)]
 
48
  chatbot = gr.Chatbot(label='TonicYi-30B-200K')
49
 
50
  with gr.Accordion(label='Advanced options', open=False):
51
+ max_new_tokens = gr.Slider(label='Max New Tokens', minimum=1, maximum=55000, step=1, value=8000)
52
  temperature = gr.Slider(label='Temperature', minimum=0.1, maximum=4.0, step=0.1, value=1.2)
53
  top_p = gr.Slider(label='Top-P (nucleus sampling)', minimum=0.05, maximum=1.0, step=0.05, value=0.9)
54
  top_k = gr.Slider(label='Top-K', minimum=1, maximum=1000, step=1, value=900)
55
+ do_sample_checkbox = gr.Checkbox(label='Do Sample', value=True, tooltip="Disable for faster inference")
56
 
57
  submit_button.click(
58
  fn=predict,
59
+ inputs=[textbox, max_new_tokens, temperature, top_p, top_k, do_sample_checkbox],
60
  outputs=chatbot
61
  )
62