thesven commited on
Commit
a61f894
1 Parent(s): f91c3ab
Files changed (1) hide show
  1. app.py +3 -3
app.py CHANGED
@@ -15,7 +15,7 @@ DESCRIPTION = """
15
  """
16
 
17
  tokenizer = AutoTokenizer.from_pretrained(model_id, device_map="auto", trust_remote_code=True)
18
- model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto", torch_dtype=torch.bfloat16, trust_remote_code=True)
19
 
20
  def format_prompt(user_message, system_message="You are an expert developer in all programming languages. Help me with my code. Answer any questions I have with code examples."):
21
  prompt = f"<|im_start|>assistant\n{system_message}<|im_end|>\n<|im_start|>\nuser\n{user_message}<|im_end|>\nassistant\n"
@@ -59,8 +59,8 @@ with gr.Blocks() as demo:
59
  submit_button = gr.Button('Submit', variant='primary')
60
 
61
  with gr.Accordion(label='Advanced options', open=False):
62
- max_new_tokens = gr.Slider(label='Max New Tokens', minimum=1, maximum=55000, step=1, value=4056)
63
- temperature = gr.Slider(label='Temperature', minimum=0.1, maximum=4.0, step=0.1, value=1.2)
64
  top_p = gr.Slider(label='Top-P (nucleus sampling)', minimum=0.05, maximum=1.0, step=0.05, value=0.9)
65
  top_k = gr.Slider(label='Top-K', minimum=1, maximum=1000, step=1, value=40)
66
  do_sample_checkbox = gr.Checkbox(label='Disable for faster inference', value=True)
 
15
  """
16
 
17
  tokenizer = AutoTokenizer.from_pretrained(model_id, device_map="auto", trust_remote_code=True)
18
+ model = AutoModelForCausalLM.from_pretrained(model_id, device_map="cuda", torch_dtype=torch.bfloat16, trust_remote_code=True)
19
 
20
  def format_prompt(user_message, system_message="You are an expert developer in all programming languages. Help me with my code. Answer any questions I have with code examples."):
21
  prompt = f"<|im_start|>assistant\n{system_message}<|im_end|>\n<|im_start|>\nuser\n{user_message}<|im_end|>\nassistant\n"
 
59
  submit_button = gr.Button('Submit', variant='primary')
60
 
61
  with gr.Accordion(label='Advanced options', open=False):
62
+ max_new_tokens = gr.Slider(label='Max New Tokens', minimum=1, maximum=55000, step=1, value=512)
63
+ temperature = gr.Slider(label='Temperature', minimum=0.1, maximum=4.0, step=0.1, value=0.1)
64
  top_p = gr.Slider(label='Top-P (nucleus sampling)', minimum=0.05, maximum=1.0, step=0.05, value=0.9)
65
  top_k = gr.Slider(label='Top-K', minimum=1, maximum=1000, step=1, value=40)
66
  do_sample_checkbox = gr.Checkbox(label='Disable for faster inference', value=True)