tonic commited on
Commit
0c6cf3e
1 Parent(s): 2c11956

initial commit

Browse files
Files changed (2) hide show
  1. app.py +55 -69
  2. requirements.txt +3 -3
app.py CHANGED
@@ -1,90 +1,76 @@
1
  import spaces
2
- import gradio as gr
3
  import torch
4
  import sys
5
  import html
6
  from transformers import AutoTokenizer, AutoModelForCausalLM, TextIteratorStreamer
7
  from threading import Thread
 
 
8
 
9
- model_name_or_path = 'TencentARC/Mistral_Pro_8B_v0.1'
10
-
11
- tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, use_fast=False)
12
- model = AutoModelForCausalLM.from_pretrained(model_name_or_path)
13
 
14
- model.half().cuda()
15
 
16
- def convert_message(message):
17
- message_text = ""
18
- if message["content"] is None and message["role"] == "assistant":
19
- message_text += "<|assistant|>\n" # final msg
20
- elif message["role"] == "system":
21
- message_text += "<|system|>\n" + message["content"].strip() + "\n"
22
- elif message["role"] == "user":
23
- message_text += "<|user|>\n" + message["content"].strip() + "\n"
24
- elif message["role"] == "assistant":
25
- message_text += "<|assistant|>\n" + message["content"].strip() + "\n"
26
- else:
27
- raise ValueError("Invalid role: {}".format(message["role"]))
28
- # gradio cleaning - it converts stuff to html entities
29
- # we would need special handling for where we want to keep the html...
30
- message_text = html.unescape(message_text)
31
- # it also converts newlines to <br>, undo this.
32
- message_text = message_text.replace("<br>", "\n")
33
- return message_text
34
 
35
- def convert_history(chat_history, max_input_length=1024):
36
- history_text = ""
37
- idx = len(chat_history) - 1
38
- # add messages in reverse order until we hit max_input_length
39
- while len(tokenizer(history_text).input_ids) < max_input_length and idx >= 0:
40
- user_message, chatbot_message = chat_history[idx]
41
- user_message = convert_message({"role": "user", "content": user_message})
42
- chatbot_message = convert_message({"role": "assistant", "content": chatbot_message})
43
- history_text = user_message + chatbot_message + history_text
44
- idx = idx - 1
45
- # if nothing was added, add <|assistant|> to start generation.
46
- if history_text == "":
47
- history_text = "<|assistant|>\n"
48
- return history_text
49
 
50
- @spaces.GPU
51
  @torch.inference_mode()
52
- def instruct(instruction, max_token_output=1024):
53
- input_text = instruction
54
- input_ids = tokenizer(input_text, return_tensors='pt', truncation=True).to('cuda')
55
- outputs = model.generate(**input_ids, max_length=max_token_output, do_sample=False)
56
- return tokenizer.decode(outputs[0], skip_special_tokens=True)
57
-
58
 
59
- with gr.Blocks() as demo:
60
- # chatbot-style model
61
- with gr.Tab("Chatbot"):
62
- chatbot = gr.Chatbot([], elem_id="chatbot")
63
- msg = gr.Textbox()
64
- clear = gr.Button("Clear")
65
- send_button = gr.Button("Send") # Add a send button
66
- # fn to add user message to history
67
- def user(user_message, history):
68
- return "", history + [[user_message, None]]
69
 
70
- def bot(history):
71
- prompt = convert_history(history)
72
- streaming_out = instruct(prompt)
73
- history[-1][1] = ""
74
- for new_token in streaming_out:
75
- history[-1][1] += new_token
76
- yield history
77
 
78
- msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False).then(
79
- bot, chatbot, chatbot
80
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
81
 
82
- send_button.click(user, [msg, chatbot], [msg, chatbot], queue=False).then(
83
- bot, chatbot, chatbot
 
 
84
  )
85
 
86
- clear.click(lambda: None, None, chatbot, queue=False)
 
 
 
 
87
 
 
88
 
89
  if __name__ == "__main__":
90
- demo.queue().launch(share=True)
 
1
  import spaces
 
2
  import torch
3
  import sys
4
  import html
5
  from transformers import AutoTokenizer, AutoModelForCausalLM, TextIteratorStreamer
6
  from threading import Thread
7
+ import gradio as gr
8
+ from gradio_rich_textbox import RichTextbox
9
 
 
 
 
 
10
 
 
11
 
12
+ title = """# 🙋🏻‍♂️Welcome to🌟Tonic's🫡📉MetaMath
13
+ this is Tencent's mistral DPO finetune for mathematics. You can build with this endpoint using🫡📉MetaMath available here : [TencentARC/Mistral_Pro_8B_v0.1](https://huggingface.co/TencentARC/Mistral_Pro_8B_v0.1). We're using 🤖[introspector/unimath](https://huggingface.co/datasets/introspector/unimath) for cool examples, check it out below ! The demo is still a work in progress and we're looking forward to build downstream tasks that showcase outstanding mathematical reasoning. Have any ideas ? join us below !
14
+ You can also use 🫡📉MetaMath by cloning this space. Simply click here: <a style="display:inline-block" href="https://huggingface.co/spaces/Tonic/MetaMath?duplicate=true"><img src="https://img.shields.io/badge/-Duplicate%20Space-blue?labelColor=white&style=flat&logo=data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAABAAAAAQCAYAAAAf8/9hAAAAAXNSR0IArs4c6QAAAP5JREFUOE+lk7FqAkEURY+ltunEgFXS2sZGIbXfEPdLlnxJyDdYB62sbbUKpLbVNhyYFzbrrA74YJlh9r079973psed0cvUD4A+4HoCjsA85X0Dfn/RBLBgBDxnQPfAEJgBY+A9gALA4tcbamSzS4xq4FOQAJgCDwV2CPKV8tZAJcAjMMkUe1vX+U+SMhfAJEHasQIWmXNN3abzDwHUrgcRGmYcgKe0bxrblHEB4E/pndMazNpSZGcsZdBlYJcEL9Afo75molJyM2FxmPgmgPqlWNLGfwZGG6UiyEvLzHYDmoPkDDiNm9JR9uboiONcBXrpY1qmgs21x1QwyZcpvxt9NS09PlsPAAAAAElFTkSuQmCC&logoWidth=14" alt="Duplicate Space"></a></h3>
15
+ Join us : 🌟TeamTonic🌟 is always making cool demos! Join our active builder's 🛠️community 👻 [![Join us on Discord](https://img.shields.io/discord/1109943800132010065?label=Discord&logo=discord&style=flat-square)](https://discord.gg/GWpVpekp) On 🤗Huggingface: [TeamTonic](https://huggingface.co/TeamTonic) & [MultiTransformer](https://huggingface.co/MultiTransformer) Math with [introspector](https://huggingface.co/introspector) On 🌐Github: [Tonic-AI](https://github.com/tonic-ai) & contribute to🌟 [SciTonic](https://github.com/Tonic-AI/scitonic)🤗Big thanks to Yuvi Sharma and all the folks at huggingface for the community grant 🤗
16
+ """
 
 
 
 
 
 
 
 
 
 
 
 
 
17
 
18
+ model_name = 'TencentARC/Mistral_Pro_8B_v0.1'
19
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
20
+ model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.bfloat16, device_map="auto")
21
+ model.generation_config = GenerationConfig.from_pretrained(model_name)
22
+ model.generation_config.pad_token_id = model.generation_config.eos_token_id
 
 
 
 
 
 
 
 
 
23
 
 
24
  @torch.inference_mode()
25
+ @spaces.GPU
26
+ def predict_math_bot(user_message, system_message, max_new_tokens, temperature, top_p, repetition_penalty):
27
+ prompt = f"<|system|>\n{self.system_message}\n<|user|>\n{user_message}<|assistant|>" if system_message else user_message
28
+ inputs = tokenizer(prompt, return_tensors='pt', add_special_tokens=False)
29
+ input_ids = inputs["input_ids"].to(model.device)
 
30
 
31
+ output_ids = model.generate(
32
+ input_ids,
33
+ max_length=input_ids.shape[1] + max_new_tokens,
34
+ temperature=temperature,
35
+ top_p=top_p,
36
+ repetition_penalty=repetition_penalty,
37
+ pad_token_id=tokenizer.eos_token_id,
38
+ do_sample=True
39
+ )
 
40
 
41
+ response = tokenizer.decode(output_ids[0], skip_special_tokens=True)
42
+ return response
 
 
 
 
 
43
 
44
+ def main():
45
+ with gr.Blocks() as demo:
46
+ gr.Markdown(title)
47
+ output_text = gr.RichTextbox(label="🫡📉MetaMath", showlabel=True, interactive=True)
48
+
49
+ with gr.Accordion("Advanced Settings"):
50
+ with gr.Row():
51
+ max_new_tokens = gr.Slider(label="Max new tokens", value=125, minimum=25, maximum=1250)
52
+ temperature = gr.Slider(label="Temperature", value=0.1, minimum=0.05, maximum=1.0)
53
+ top_p = gr.Slider(label="Top-p (nucleus sampling)", value=0.90, minimum=0.01, maximum=0.99)
54
+ repetition_penalty = gr.Slider(label="Repetition penalty", value=1.9, minimum=1.0, maximum=2.0)
55
+ use_custom_settings = gr.Checkbox(label="Use custom settings", value=False)
56
+
57
+ with gr.Row():
58
+ user_message = gr.Textbox(label="🫡Your Message", lines=3, placeholder="Enter your math query here...")
59
+ system_message = gr.Textbox(label="📉System Prompt", lines=2, placeholder="Optional: Set a scene or introduce a character...")
60
 
61
+ gr.Button("Generate").click(
62
+ predict_math_bot,
63
+ inputs=[user_message, system_message, max_new_tokens, temperature, top_p, repetition_penalty, use_custom_settings],
64
+ outputs=output_text
65
  )
66
 
67
+ # Example placeholders
68
+ demo.add_examples([
69
+ ["Calculate the integral of x^2 from 0 to 1", "produce a complete mathematical explanation of the following in rich text format:", 125, 0.1, 0.9, 1.9, False],
70
+ ["What is the derivative of sin(x)?", "produce a complete mathematical explanation of the following in rich text format:", 125, 0.1, 0.9, 1.9, False]
71
+ ])
72
 
73
+ demo.launch()
74
 
75
  if __name__ == "__main__":
76
+ main()
requirements.txt CHANGED
@@ -1,8 +1,8 @@
 
 
1
  accelerate
2
  bitsandbytes
3
- gradio
4
  protobuf
5
  scipy
6
  sentencepiece
7
- torch
8
- git+https://github.com/huggingface/transformers@main
 
1
+ torch
2
+ transformers
3
  accelerate
4
  bitsandbytes
 
5
  protobuf
6
  scipy
7
  sentencepiece
8
+ gradio_rich_textbox