InternLM-Math commited on
Commit
d21c507
1 Parent(s): dc6726d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +129 -176
app.py CHANGED
@@ -1,177 +1,130 @@
1
- from transformers import AutoTokenizer, AutoModelForCausalLM, GenerationConfig
2
- import torch
3
- import streamlit as st
4
- from openxlab.model import download
5
- from modelscope import snapshot_download
6
  import os
7
-
8
- # level = os.getenv('level')
9
- level = '7'
10
-
11
- with st.sidebar:
12
- st.markdown('[InternLM Math GitHub Page](https://github.com/InternLM/InternLM-Math)')
13
- max_length = st.slider("max_length", 0, 1024, 512, step=1)
14
- # system_prompt = st.text_input("System_Prompt", "")
15
-
16
- st.title("InternLM2-math-7B")
17
- st.caption("🚀 Powered By Shanghai Ai Lab")
18
-
19
- # 定义模型路径
20
- ## ModelScope
21
- # model_id = 'Shanghai_AI_Laboratory/internlm2-chat-'+ str(level) +'b'
22
- # mode_name_or_path = snapshot_download(model_id, revision='master')
23
- mode_name_or_path = "internlm/internlm2-math-7b"
24
- # OpenXLab
25
- # model_repo = "OpenLMLab/internlm2-chat-7b"
26
- # mode_name_or_path = download(model_repo=model_repo)
27
-
28
-
29
- @st.cache_resource
30
- def get_model():
31
- tokenizer = AutoTokenizer.from_pretrained(mode_name_or_path, trust_remote_code=True)
32
- model = AutoModelForCausalLM.from_pretrained(mode_name_or_path, trust_remote_code=True, torch_dtype=torch.bfloat16).cuda()
33
- model.eval()
34
- return tokenizer, model
35
-
36
- tokenizer, model = get_model()
37
- if "messages" not in st.session_state:
38
- st.session_state["messages"] = []
39
- for msg in st.session_state.messages:
40
- st.chat_message("user").write(msg[0])
41
- st.chat_message("assistant").write(msg[1])
42
- if prompt := st.chat_input():
43
- st.chat_message("user").write(prompt)
44
- response, history = model.chat(tokenizer, prompt, meta_instruction='', history=st.session_state.messages)
45
- st.session_state.messages.append((prompt, response))
46
- st.chat_message("assistant").write(response)
47
-
48
- # import os
49
- # os.system("pip uninstall -y gradio")
50
- # os.system("pip install gradio==3.43.0")
51
- # from lmdeploy.serve.gradio.turbomind_coupled import *
52
- # from lmdeploy.messages import TurbomindEngineConfig
53
- # from lmdeploy import ChatTemplateConfig
54
-
55
- # chat_template = ChatTemplateConfig(model_name='internlm2-chat-7b', system='', eosys='', meta_instruction='')
56
- # backend_config = TurbomindEngineConfig(model_name='internlm2-chat-7b', max_batch_size=1, cache_max_entry_count=0.05)#, model_format='awq')
57
- # model_path = 'internlm/internlm2-math-7b'
58
-
59
- # InterFace.async_engine = AsyncEngine(
60
- # model_path=model_path,
61
- # backend='turbomind',
62
- # backend_config=backend_config,
63
- # chat_template_config=chat_template,
64
- # tp=1)
65
-
66
- # async def reset_local_func(instruction_txtbox: gr.Textbox,
67
- # state_chatbot: Sequence, session_id: int):
68
- # """reset the session.
69
-
70
- # Args:
71
- # instruction_txtbox (str): user's prompt
72
- # state_chatbot (Sequence): the chatting history
73
- # session_id (int): the session id
74
- # """
75
- # state_chatbot = []
76
- # # end the session
77
- # with InterFace.lock:
78
- # InterFace.global_session_id += 1
79
- # session_id = InterFace.global_session_id
80
- # return (state_chatbot, state_chatbot, gr.Textbox.update(value=''), session_id)
81
-
82
- # async def cancel_local_func(state_chatbot: Sequence, cancel_btn: gr.Button,
83
- # reset_btn: gr.Button, session_id: int):
84
- # """stop the session.
85
-
86
- # Args:
87
- # instruction_txtbox (str): user's prompt
88
- # state_chatbot (Sequence): the chatting history
89
- # cancel_btn (gr.Button): the cancel button
90
- # reset_btn (gr.Button): the reset button
91
- # session_id (int): the session id
92
- # """
93
- # yield (state_chatbot, disable_btn, disable_btn, session_id)
94
- # InterFace.async_engine.stop_session(session_id)
95
- # # pytorch backend does not support resume chat history now
96
- # if InterFace.async_engine.backend == 'pytorch':
97
- # yield (state_chatbot, disable_btn, enable_btn, session_id)
98
- # else:
99
- # with InterFace.lock:
100
- # InterFace.global_session_id += 1
101
- # session_id = InterFace.global_session_id
102
- # messages = []
103
- # for qa in state_chatbot:
104
- # messages.append(dict(role='user', content=qa[0]))
105
- # if qa[1] is not None:
106
- # messages.append(dict(role='assistant', content=qa[1]))
107
- # gen_config = GenerationConfig(max_new_tokens=0)
108
- # async for out in InterFace.async_engine.generate(messages,
109
- # session_id,
110
- # gen_config=gen_config,
111
- # stream_response=True,
112
- # sequence_start=True,
113
- # sequence_end=False):
114
- # pass
115
- # yield (state_chatbot, disable_btn, enable_btn, session_id)
116
-
117
- # with gr.Blocks(css=CSS, theme=THEME) as demo:
118
- # state_chatbot = gr.State([])
119
- # state_session_id = gr.State(0)
120
-
121
- # with gr.Column(elem_id='container'):
122
- # gr.Markdown('## LMDeploy Playground')
123
- # gr.Markdown('[InternLM Math GitHub Page](https://github.com/InternLM/InternLM-Math)')
124
-
125
- # chatbot = gr.Chatbot(
126
- # elem_id='chatbot',
127
- # label=InterFace.async_engine.engine.model_name)
128
- # instruction_txtbox = gr.Textbox(
129
- # placeholder='Please input the instruction',
130
- # label='Instruction')
131
- # with gr.Row():
132
- # cancel_btn = gr.Button(value='Cancel', interactive=False)
133
- # reset_btn = gr.Button(value='Reset')
134
- # with gr.Row():
135
- # request_output_len = gr.Slider(1,
136
- # 1024,
137
- # value=512,
138
- # step=1,
139
- # label='Maximum new tokens')
140
- # top_p = gr.Slider(0.01, 1, value=1.0, step=0.01, label='Top_p')
141
- # temperature = gr.Slider(0.01,
142
- # 1.5,
143
- # value=0.01,
144
- # step=0.01,
145
- # label='Temperature')
146
-
147
- # send_event = instruction_txtbox.submit(chat_stream_local, [
148
- # instruction_txtbox, state_chatbot, cancel_btn, reset_btn,
149
- # state_session_id, top_p, temperature, request_output_len
150
- # ], [state_chatbot, chatbot, cancel_btn, reset_btn])
151
- # instruction_txtbox.submit(
152
- # lambda: gr.Textbox.update(value=''),
153
- # [],
154
- # [instruction_txtbox],
155
- # )
156
- # cancel_btn.click(
157
- # cancel_local_func,
158
- # [state_chatbot, cancel_btn, reset_btn, state_session_id],
159
- # [state_chatbot, cancel_btn, reset_btn, state_session_id],
160
- # cancels=[send_event])
161
-
162
- # reset_btn.click(reset_local_func,
163
- # [instruction_txtbox, state_chatbot, state_session_id],
164
- # [state_chatbot, chatbot, instruction_txtbox, state_session_id],
165
- # cancels=[send_event])
166
-
167
- # def init():
168
- # with InterFace.lock:
169
- # InterFace.global_session_id += 1
170
- # new_session_id = InterFace.global_session_id
171
- # return new_session_id
172
-
173
- # demo.load(init, inputs=None, outputs=[state_session_id])
174
-
175
- # # demo.queue(concurrency_count=InterFace.async_engine.instance_num,
176
- # # max_size=100).launch()
177
- # demo.queue(max_size=1000).launch(max_threads=InterFace.async_engine.instance_num)
 
 
 
 
 
 
1
  import os
2
+ os.system("pip uninstall -y gradio")
3
+ os.system("pip install gradio==3.43.0")
4
+ from lmdeploy.serve.gradio.turbomind_coupled import *
5
+ from lmdeploy.messages import TurbomindEngineConfig
6
+ from lmdeploy import ChatTemplateConfig
7
+
8
+ chat_template = ChatTemplateConfig(model_name='internlm2-chat-7b', system='', eosys='', meta_instruction='')
9
+ backend_config = TurbomindEngineConfig(model_name='internlm2-chat-7b', max_batch_size=1, cache_max_entry_count=0.05)#, model_format='awq')
10
+ model_path = 'internlm/internlm2-math-7b'
11
+
12
+ InterFace.async_engine = AsyncEngine(
13
+ model_path=model_path,
14
+ backend='turbomind',
15
+ backend_config=backend_config,
16
+ chat_template_config=chat_template,
17
+ tp=1)
18
+
19
+ async def reset_local_func(instruction_txtbox: gr.Textbox,
20
+ state_chatbot: Sequence, session_id: int):
21
+ """reset the session.
22
+
23
+ Args:
24
+ instruction_txtbox (str): user's prompt
25
+ state_chatbot (Sequence): the chatting history
26
+ session_id (int): the session id
27
+ """
28
+ state_chatbot = []
29
+ # end the session
30
+ with InterFace.lock:
31
+ InterFace.global_session_id += 1
32
+ session_id = InterFace.global_session_id
33
+ return (state_chatbot, state_chatbot, gr.Textbox.update(value=''), session_id)
34
+
35
+ async def cancel_local_func(state_chatbot: Sequence, cancel_btn: gr.Button,
36
+ reset_btn: gr.Button, session_id: int):
37
+ """stop the session.
38
+
39
+ Args:
40
+ instruction_txtbox (str): user's prompt
41
+ state_chatbot (Sequence): the chatting history
42
+ cancel_btn (gr.Button): the cancel button
43
+ reset_btn (gr.Button): the reset button
44
+ session_id (int): the session id
45
+ """
46
+ yield (state_chatbot, disable_btn, disable_btn, session_id)
47
+ InterFace.async_engine.stop_session(session_id)
48
+ # pytorch backend does not support resume chat history now
49
+ if InterFace.async_engine.backend == 'pytorch':
50
+ yield (state_chatbot, disable_btn, enable_btn, session_id)
51
+ else:
52
+ with InterFace.lock:
53
+ InterFace.global_session_id += 1
54
+ session_id = InterFace.global_session_id
55
+ messages = []
56
+ for qa in state_chatbot:
57
+ messages.append(dict(role='user', content=qa[0]))
58
+ if qa[1] is not None:
59
+ messages.append(dict(role='assistant', content=qa[1]))
60
+ gen_config = GenerationConfig(max_new_tokens=0)
61
+ async for out in InterFace.async_engine.generate(messages,
62
+ session_id,
63
+ gen_config=gen_config,
64
+ stream_response=True,
65
+ sequence_start=True,
66
+ sequence_end=False):
67
+ pass
68
+ yield (state_chatbot, disable_btn, enable_btn, session_id)
69
+
70
+ with gr.Blocks(css=CSS, theme=THEME) as demo:
71
+ state_chatbot = gr.State([])
72
+ state_session_id = gr.State(0)
73
+
74
+ with gr.Column(elem_id='container'):
75
+ gr.Markdown('## LMDeploy Playground')
76
+ gr.Markdown('[InternLM Math GitHub Page](https://github.com/InternLM/InternLM-Math)')
77
+
78
+ chatbot = gr.Chatbot(
79
+ elem_id='chatbot',
80
+ label=InterFace.async_engine.engine.model_name)
81
+ instruction_txtbox = gr.Textbox(
82
+ placeholder='Please input the instruction',
83
+ label='Instruction')
84
+ with gr.Row():
85
+ cancel_btn = gr.Button(value='Cancel', interactive=False)
86
+ reset_btn = gr.Button(value='Reset')
87
+ with gr.Row():
88
+ request_output_len = gr.Slider(1,
89
+ 2048,
90
+ value=1024,
91
+ step=1,
92
+ label='Maximum new tokens')
93
+ top_p = gr.Slider(0.01, 1, value=1.0, step=0.01, label='Top_p')
94
+ temperature = gr.Slider(0.01,
95
+ 1.5,
96
+ value=0.01,
97
+ step=0.01,
98
+ label='Temperature')
99
+
100
+ send_event = instruction_txtbox.submit(chat_stream_local, [
101
+ instruction_txtbox, state_chatbot, cancel_btn, reset_btn,
102
+ state_session_id, top_p, temperature, request_output_len
103
+ ], [state_chatbot, chatbot, cancel_btn, reset_btn])
104
+ instruction_txtbox.submit(
105
+ lambda: gr.Textbox.update(value=''),
106
+ [],
107
+ [instruction_txtbox],
108
+ )
109
+ cancel_btn.click(
110
+ cancel_local_func,
111
+ [state_chatbot, cancel_btn, reset_btn, state_session_id],
112
+ [state_chatbot, cancel_btn, reset_btn, state_session_id],
113
+ cancels=[send_event])
114
+
115
+ reset_btn.click(reset_local_func,
116
+ [instruction_txtbox, state_chatbot, state_session_id],
117
+ [state_chatbot, chatbot, instruction_txtbox, state_session_id],
118
+ cancels=[send_event])
119
+
120
+ def init():
121
+ with InterFace.lock:
122
+ InterFace.global_session_id += 1
123
+ new_session_id = InterFace.global_session_id
124
+ return new_session_id
125
+
126
+ demo.load(init, inputs=None, outputs=[state_session_id])
127
+
128
+ # demo.queue(concurrency_count=InterFace.async_engine.instance_num,
129
+ # max_size=100).launch()
130
+ demo.queue(max_size=1000).launch(max_threads=InterFace.async_engine.instance_num)