Felladrin commited on
Commit
a322642
·
1 Parent(s): 7cca115

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -14
app.py CHANGED
@@ -9,19 +9,18 @@ def generate(
9
  template_name,
10
  user_input,
11
  temperature=0.4,
12
- top_p=0.95,
13
- top_k=50,
14
  max_new_tokens=256,
15
  ):
16
  pipe = load_model(model_name)
17
- # Need to add additional options later.
18
- if template_name == "Falcon 1B Template":
19
  message_template = [
20
  {"role": "user", "content": "Hello!"},
21
- {"role": "assistant", "content": "Hello! How can I assist you today?"},
22
  {"role": "user", "content": user_input},
23
  ]
24
- else: # Default to "TinyLlama Template"
25
  message_template = [
26
  {
27
  "role": "system",
@@ -30,15 +29,14 @@ def generate(
30
  {"role": "user", "content": user_input},
31
  ]
32
 
33
- # Set tokenize correctly. Otherwise ticking the box breaks it.
34
  prompt = pipe.tokenizer.apply_chat_template(message_template, tokenize=False, add_generation_prompt=True)
35
  outputs = pipe(prompt, max_new_tokens=max_new_tokens, do_sample=True,
36
  temperature=temperature, top_k=top_k, top_p=top_p, repetition_penalty=1.10)
37
  return outputs[0]["generated_text"]
38
 
39
- model_choices = ["TinyLlama/TinyLlama-1.1B-Chat-v1.0", "ericzzz/falcon-rw-1b-chat"]
40
- template_choices = ["TinyLlama Template", "Falcon Template"]
41
- # What at the best options?
42
  g = gr.Interface(
43
  fn=generate,
44
  inputs=[
@@ -46,13 +44,13 @@ g = gr.Interface(
46
  gr.components.Dropdown(choices=template_choices, label="Template", value=template_choices[0], interactive=True),
47
  gr.components.Textbox(lines=2, label="Prompt", value="How many planets are in our solar system?"),
48
  gr.components.Slider(minimum=0, maximum=1, value=0.4, label="Temperature"),
49
- gr.components.Slider(minimum=0, maximum=1, value=0.95, label="Top p"),
50
- gr.components.Slider(minimum=0, maximum=100, step=1, value=50, label="Top k"),
51
  gr.components.Slider(minimum=1, maximum=1024, step=1, value=256, label="Max tokens"),
52
  ],
53
  outputs=[gr.Textbox(lines=10, label="Output")],
54
- title="Hugging Face Transformers Model",
55
- description="A simple interface for generating text with a Hugging Face Transformers model.",
56
  concurrency_limit=1
57
  )
58
 
 
9
  template_name,
10
  user_input,
11
  temperature=0.4,
12
+ top_p=0.25,
13
+ top_k=7,
14
  max_new_tokens=256,
15
  ):
16
  pipe = load_model(model_name)
17
+ if template_name == "User-Assistant":
 
18
  message_template = [
19
  {"role": "user", "content": "Hello!"},
20
+ {"role": "assistant", "content": "Hi! How can I assist you today?"},
21
  {"role": "user", "content": user_input},
22
  ]
23
+ else:
24
  message_template = [
25
  {
26
  "role": "system",
 
29
  {"role": "user", "content": user_input},
30
  ]
31
 
 
32
  prompt = pipe.tokenizer.apply_chat_template(message_template, tokenize=False, add_generation_prompt=True)
33
  outputs = pipe(prompt, max_new_tokens=max_new_tokens, do_sample=True,
34
  temperature=temperature, top_k=top_k, top_p=top_p, repetition_penalty=1.10)
35
  return outputs[0]["generated_text"]
36
 
37
+ model_choices = ["Felladrin/Pythia-31M-Chat-v1", "Felladrin/Llama-160M-Chat-v1", "Felladrin/Smol-Llama-101M-Chat-v1", "Felladrin/TinyMistral-248M-SFT-v4"]
38
+ template_choices = ["System-User-Assistant", "User-Assistant"]
39
+
40
  g = gr.Interface(
41
  fn=generate,
42
  inputs=[
 
44
  gr.components.Dropdown(choices=template_choices, label="Template", value=template_choices[0], interactive=True),
45
  gr.components.Textbox(lines=2, label="Prompt", value="How many planets are in our solar system?"),
46
  gr.components.Slider(minimum=0, maximum=1, value=0.4, label="Temperature"),
47
+ gr.components.Slider(minimum=0, maximum=1, value=0.25, label="Top p"),
48
+ gr.components.Slider(minimum=0, maximum=100, step=1, value=7, label="Top k"),
49
  gr.components.Slider(minimum=1, maximum=1024, step=1, value=256, label="Max tokens"),
50
  ],
51
  outputs=[gr.Textbox(lines=10, label="Output")],
52
+ title="Chat with Felladrin's LLMs",
53
+ description="Note that the inference happens on free-tier hardware, which may lead to slower outputs during periods of high demand.",
54
  concurrency_limit=1
55
  )
56