lewtun HF staff commited on
Commit
af96a70
1 Parent(s): 51fe19a

Switch to inference API and extend max_new_tokens

Browse files
Files changed (1) hide show
  1. app.py +4 -4
app.py CHANGED
@@ -18,7 +18,7 @@ DIALOGUES_DATASET = "HuggingFaceH4/starchat_playground_dialogues"
18
 
19
  model2endpoint = {
20
  "starchat-alpha": "https://api-inference.huggingface.co/models/HuggingFaceH4/starcoderbase-finetuned-oasst1",
21
- "starchat-beta": "https://ddimh86h0wqthbhy.us-east-1.aws.endpoints.huggingface.cloud",
22
  }
23
  model_names = list(model2endpoint.keys())
24
 
@@ -136,7 +136,7 @@ def generate(
136
  top_p=top_p,
137
  repetition_penalty=repetition_penalty,
138
  do_sample=True,
139
- truncate=999,
140
  seed=42,
141
  stop_sequences=["<|end|>"],
142
  )
@@ -295,9 +295,9 @@ with gr.Blocks(analytics_enabled=False, css=custom_css) as demo:
295
  )
296
  max_new_tokens = gr.Slider(
297
  label="Max new tokens",
298
- value=512,
299
  minimum=0,
300
- maximum=1024,
301
  step=4,
302
  interactive=True,
303
  info="The maximum numbers of new tokens",
 
18
 
19
  model2endpoint = {
20
  "starchat-alpha": "https://api-inference.huggingface.co/models/HuggingFaceH4/starcoderbase-finetuned-oasst1",
21
+ "starchat-beta": "https://api-inference.huggingface.co/models/HuggingFaceH4/starchat-beta",
22
  }
23
  model_names = list(model2endpoint.keys())
24
 
 
136
  top_p=top_p,
137
  repetition_penalty=repetition_penalty,
138
  do_sample=True,
139
+ truncate=4096,
140
  seed=42,
141
  stop_sequences=["<|end|>"],
142
  )
 
295
  )
296
  max_new_tokens = gr.Slider(
297
  label="Max new tokens",
298
+ value=1024,
299
  minimum=0,
300
+ maximum=2048,
301
  step=4,
302
  interactive=True,
303
  info="The maximum numbers of new tokens",