Tonic commited on
Commit
fd37061
1 Parent(s): 714d3ed

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +41 -23
app.py CHANGED
@@ -5,9 +5,10 @@ from transformers import AutoModelForCausalLM, AutoTokenizer
5
  import torch
6
  import gradio as gr
7
  import sentencepiece
 
8
 
9
  title = "Welcome to 🙋🏻‍♂️Tonic's🌷Tulu Chat!"
10
- description = "[allenai/tulu-2-dpo-7b](https://huggingface.co/allenai/tulu-2-dpo-7b) and larger Tulu-2 models are Instruct Llama Finetunes using the [mistralai/Mistral-7B](https://huggingface.co/mistralai/Mistral-7B-v0.1) recipe. You can use [allenai/tulu-2-13b](https://huggingface.co/allenai/tulu-2-13b) here via API using Gradio by scrolling down and clicking Use 'Via API' or privately by [cloning this space on huggingface](https://huggingface.co/spaces/Tonic1/TuluDemo?duplicate=true) See also the large model here : [allenai/tulu-2-dpo-70b](https://huggingface.co/allenai/tulu-2-dpo-70b) . [Join my active builders' server on discord](https://discord.gg/VqTxc76K3u). Let's build together!. [add this space as a discord bot on your server](https://discord.com/oauth2/authorize?client_id=1176628808212828231&scope=bot+applications.commands&permissions=326417525824) Big thanks to 🤗Huggingface for the🫂Community Grant!"
11
 
12
  os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'max_split_size_mb:50'
13
  device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
@@ -28,7 +29,7 @@ class TuluChatBot:
28
  prompt = f"<|assistant|>\n {self.system_message}\n\n <|user|>{user_message}\n\n<|assistant|>\n"
29
  return prompt
30
 
31
- def predict(self, user_message, temperature=0.4, max_new_tokens=70, top_p=0.99, repetition_penalty=1.9):
32
  prompt = self.format_prompt(user_message)
33
  inputs = self.tokenizer(prompt, return_tensors='pt', add_special_tokens=False)
34
  input_ids = inputs["input_ids"].to(self.model.device)
@@ -42,33 +43,50 @@ class TuluChatBot:
42
  temperature=temperature,
43
  top_p=top_p,
44
  repetition_penalty=repetition_penalty,
45
- do_sample=True
46
  )
47
 
48
  response = self.tokenizer.decode(output_ids[0], skip_special_tokens=True)
49
  return response
50
 
51
- def gradio_predict(user_message, system_message, max_new_tokens, temperature, top_p, repetition_penalty):
52
  Tulu_bot.set_system_message(system_message)
53
- response = Tulu_bot.predict(user_message, temperature, max_new_tokens, top_p, repetition_penalty)
 
 
 
 
 
54
  return response
55
-
56
  Tulu_bot = TuluChatBot(model, tokenizer)
57
 
58
- iface = gr.Interface(
59
- fn=gradio_predict,
60
- title=title,
61
- description=description,
62
- inputs=[
63
- gr.Textbox(label="Your Message", type="text", lines=3),
64
- gr.Textbox(label="Introduce a Character Here or Set a Scene (system prompt)", type="text", lines=2),
65
- gr.Slider(label="Max new tokens", value=1269, minimum=550, maximum=3200, step=1),
66
- gr.Slider(label="Temperature", value=1.2, minimum=0.05, maximum=4.0, step=0.05),
67
- gr.Slider(label="Top-p (nucleus sampling)", value=0.90, minimum=0.01, maximum=0.99, step=0.05),
68
- gr.Slider(label="Repetition penalty", value=1.9, minimum=1.0, maximum=2.0, step=0.05)
69
- ],
70
- outputs="text",
71
- theme="ParityError/Anime"
72
- )
73
-
74
- iface.queue(max_size=5).launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
5
  import torch
6
  import gradio as gr
7
  import sentencepiece
8
+ from tokenization_xgen import XgenTokenizer
9
 
10
  title = "Welcome to 🙋🏻‍♂️Tonic's🌷Tulu Chat!"
11
+ description = "[allenai/tulu-2-dpo-7b](https://huggingface.co/allenai/tulu-2-dpo-7b) and larger Tulu-2 models are Instruct Llama Finetunes using the [mistralai/Mistral-7B](https://huggingface.co/mistralai/Mistral-7B-v0.1) recipe. You can use [allenai/tulu-2-13b](https://huggingface.co/allenai/tulu-2-13b) here via API using Gradio by scrolling down and clicking Use 'Via API' or privately by [cloning this space on huggingface](https://huggingface.co/spaces/Tonic1/TuluDemo?duplicate=true) See also the large model here : [allenai/tulu-2-dpo-70b](https://huggingface.co/allenai/tulu-2-dpo-70b) . [Join my active builders' server on discord](https://discord.gg/VqTxc76K3u). Let's build together!."
12
 
13
  os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'max_split_size_mb:50'
14
  device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
 
29
  prompt = f"<|assistant|>\n {self.system_message}\n\n <|user|>{user_message}\n\n<|assistant|>\n"
30
  return prompt
31
 
32
+ def predict(self, user_message, temperature, max_new_tokens, top_p, repetition_penalty, do_sample):
33
  prompt = self.format_prompt(user_message)
34
  inputs = self.tokenizer(prompt, return_tensors='pt', add_special_tokens=False)
35
  input_ids = inputs["input_ids"].to(self.model.device)
 
43
  temperature=temperature,
44
  top_p=top_p,
45
  repetition_penalty=repetition_penalty,
46
+ do_sample=do_sample
47
  )
48
 
49
  response = self.tokenizer.decode(output_ids[0], skip_special_tokens=True)
50
  return response
51
 
52
+ def gradio_predict(user_message, system_message, max_new_tokens, temperature, top_p, repetition_penalty, do_sample):
53
  Tulu_bot.set_system_message(system_message)
54
+ if not do_sample:
55
+ max_length = 1269
56
+ temperature = 1.2 # Default value
57
+ top_p = 0.9 # Default value
58
+ repetition_penalty = 0.9 # Default value
59
+ response = Tulu_bot.predict(user_message, temperature, max_new_tokens, top_p, repetition_penalty, do_sample)
60
  return response
61
+
62
  Tulu_bot = TuluChatBot(model, tokenizer)
63
 
64
+
65
+ with gr.Blocks() as demo:
66
+ with gr.Row():
67
+ system_message = gr.Textbox(label="Optional 🌷Tulu Assistant Message", lines=2)
68
+ user_message = gr.Textbox(label="Your Message", lines=3)
69
+ with gr.Row():
70
+ do_sample = gr.Checkbox(label="Advanced", value=False)
71
+
72
+ with gr.Accordion("Advanced Settings", open=lambda do_sample: do_sample):
73
+ with gr.Row():
74
+ max_new_tokens = gr.Slider(label="Max new tokens", value=1269, minimum=550, maximum=3200, step=1)
75
+ temperature = gr.Slider(label="Temperature", value=1.2, minimum=0.05, maximum=4.0, step=0.05)
76
+ top_p = gr.Slider(label="Top-p (nucleus sampling)", value=0.90, minimum=0.01, maximum=0.99, step=0.05)
77
+ repetition_penalty = gr.Slider(label="Repetition penalty", value=1.9, minimum=1.0, maximum=2.0, step=0.05)
78
+
79
+ submit_button = gr.Button("Submit")
80
+ output_text = gr.Textbox(label="🌷Tulu Response")
81
+
82
+ def process(user_message, system_message, max_new_tokens, temperature, top_p, repetition_penalty, do_sample):
83
+ return gradio_predict(user_message, system_message, max_new_tokens, temperature, top_p, repetition_penalty, do_sample)
84
+
85
+ submit_button.click(
86
+ process,
87
+ inputs=[user_message, system_message, max_new_tokens, temperature, top_p, repetition_penalty, do_sample],
88
+ outputs=output_text
89
+ )
90
+
91
+ theme = "ParityError/Anime"
92
+ demo.launch()