InternLM-Math commited on
Commit
d85bad6
1 Parent(s): 1ace293

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +176 -129
app.py CHANGED
@@ -1,130 +1,177 @@
 
 
 
 
 
1
  import os
2
- os.system("pip uninstall -y gradio")
3
- os.system("pip install gradio==3.43.0")
4
- from lmdeploy.serve.gradio.turbomind_coupled import *
5
- from lmdeploy.messages import TurbomindEngineConfig
6
- from lmdeploy import ChatTemplateConfig
7
-
8
- chat_template = ChatTemplateConfig(model_name='internlm2-chat-7b', system='', eosys='', meta_instruction='')
9
- backend_config = TurbomindEngineConfig(model_name='internlm2-chat-7b', max_batch_size=1, cache_max_entry_count=0.05)#, model_format='awq')
10
- model_path = 'internlm/internlm2-math-7b'
11
-
12
- InterFace.async_engine = AsyncEngine(
13
- model_path=model_path,
14
- backend='turbomind',
15
- backend_config=backend_config,
16
- chat_template_config=chat_template,
17
- tp=1)
18
-
19
- async def reset_local_func(instruction_txtbox: gr.Textbox,
20
- state_chatbot: Sequence, session_id: int):
21
- """reset the session.
22
-
23
- Args:
24
- instruction_txtbox (str): user's prompt
25
- state_chatbot (Sequence): the chatting history
26
- session_id (int): the session id
27
- """
28
- state_chatbot = []
29
- # end the session
30
- with InterFace.lock:
31
- InterFace.global_session_id += 1
32
- session_id = InterFace.global_session_id
33
- return (state_chatbot, state_chatbot, gr.Textbox.update(value=''), session_id)
34
-
35
- async def cancel_local_func(state_chatbot: Sequence, cancel_btn: gr.Button,
36
- reset_btn: gr.Button, session_id: int):
37
- """stop the session.
38
-
39
- Args:
40
- instruction_txtbox (str): user's prompt
41
- state_chatbot (Sequence): the chatting history
42
- cancel_btn (gr.Button): the cancel button
43
- reset_btn (gr.Button): the reset button
44
- session_id (int): the session id
45
- """
46
- yield (state_chatbot, disable_btn, disable_btn, session_id)
47
- InterFace.async_engine.stop_session(session_id)
48
- # pytorch backend does not support resume chat history now
49
- if InterFace.async_engine.backend == 'pytorch':
50
- yield (state_chatbot, disable_btn, enable_btn, session_id)
51
- else:
52
- with InterFace.lock:
53
- InterFace.global_session_id += 1
54
- session_id = InterFace.global_session_id
55
- messages = []
56
- for qa in state_chatbot:
57
- messages.append(dict(role='user', content=qa[0]))
58
- if qa[1] is not None:
59
- messages.append(dict(role='assistant', content=qa[1]))
60
- gen_config = GenerationConfig(max_new_tokens=0)
61
- async for out in InterFace.async_engine.generate(messages,
62
- session_id,
63
- gen_config=gen_config,
64
- stream_response=True,
65
- sequence_start=True,
66
- sequence_end=False):
67
- pass
68
- yield (state_chatbot, disable_btn, enable_btn, session_id)
69
-
70
- with gr.Blocks(css=CSS, theme=THEME) as demo:
71
- state_chatbot = gr.State([])
72
- state_session_id = gr.State(0)
73
-
74
- with gr.Column(elem_id='container'):
75
- gr.Markdown('## LMDeploy Playground')
76
- gr.Markdown('[InternLM Math GitHub Page](https://github.com/InternLM/InternLM-Math)')
77
-
78
- chatbot = gr.Chatbot(
79
- elem_id='chatbot',
80
- label=InterFace.async_engine.engine.model_name)
81
- instruction_txtbox = gr.Textbox(
82
- placeholder='Please input the instruction',
83
- label='Instruction')
84
- with gr.Row():
85
- cancel_btn = gr.Button(value='Cancel', interactive=False)
86
- reset_btn = gr.Button(value='Reset')
87
- with gr.Row():
88
- request_output_len = gr.Slider(1,
89
- 1024,
90
- value=512,
91
- step=1,
92
- label='Maximum new tokens')
93
- top_p = gr.Slider(0.01, 1, value=1.0, step=0.01, label='Top_p')
94
- temperature = gr.Slider(0.01,
95
- 1.5,
96
- value=0.01,
97
- step=0.01,
98
- label='Temperature')
99
-
100
- send_event = instruction_txtbox.submit(chat_stream_local, [
101
- instruction_txtbox, state_chatbot, cancel_btn, reset_btn,
102
- state_session_id, top_p, temperature, request_output_len
103
- ], [state_chatbot, chatbot, cancel_btn, reset_btn])
104
- instruction_txtbox.submit(
105
- lambda: gr.Textbox.update(value=''),
106
- [],
107
- [instruction_txtbox],
108
- )
109
- cancel_btn.click(
110
- cancel_local_func,
111
- [state_chatbot, cancel_btn, reset_btn, state_session_id],
112
- [state_chatbot, cancel_btn, reset_btn, state_session_id],
113
- cancels=[send_event])
114
-
115
- reset_btn.click(reset_local_func,
116
- [instruction_txtbox, state_chatbot, state_session_id],
117
- [state_chatbot, chatbot, instruction_txtbox, state_session_id],
118
- cancels=[send_event])
119
-
120
- def init():
121
- with InterFace.lock:
122
- InterFace.global_session_id += 1
123
- new_session_id = InterFace.global_session_id
124
- return new_session_id
125
-
126
- demo.load(init, inputs=None, outputs=[state_session_id])
127
-
128
- # demo.queue(concurrency_count=InterFace.async_engine.instance_num,
129
- # max_size=100).launch()
130
- demo.queue(max_size=1000).launch(max_threads=InterFace.async_engine.instance_num)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import AutoTokenizer, AutoModelForCausalLM, GenerationConfig
2
+ import torch
3
+ import streamlit as st
4
+ from openxlab.model import download
5
+ from modelscope import snapshot_download
6
  import os
7
+
8
+ # level = os.getenv('level')
9
+ level = '7'
10
+
11
+ with st.sidebar:
12
+ st.markdown('[InternLM Math GitHub Page](https://github.com/InternLM/InternLM-Math)')
13
+ max_length = st.slider("max_length", 0, 1024, 512, step=1)
14
+ # system_prompt = st.text_input("System_Prompt", "")
15
+
16
+ st.title("InternLM2-math-7B")
17
+ st.caption("🚀 Powered By Shanghai Ai Lab")
18
+
19
+ # 定义模型路径
20
+ ## ModelScope
21
+ # model_id = 'Shanghai_AI_Laboratory/internlm2-chat-'+ str(level) +'b'
22
+ # mode_name_or_path = snapshot_download(model_id, revision='master')
23
+ mode_name_or_path = "internlm/internlm2-math-7b"
24
+ # OpenXLab
25
+ # model_repo = "OpenLMLab/internlm2-chat-7b"
26
+ # mode_name_or_path = download(model_repo=model_repo)
27
+
28
+
29
+ @st.cache_resource
30
+ def get_model():
31
+ tokenizer = AutoTokenizer.from_pretrained(mode_name_or_path, trust_remote_code=True)
32
+ model = AutoModelForCausalLM.from_pretrained(mode_name_or_path, trust_remote_code=True, torch_dtype=torch.bfloat16).cuda()
33
+ model.eval()
34
+ return tokenizer, model
35
+
36
+ tokenizer, model = get_model()
37
+ if "messages" not in st.session_state:
38
+ st.session_state["messages"] = []
39
+ for msg in st.session_state.messages:
40
+ st.chat_message("user").write(msg[0])
41
+ st.chat_message("assistant").write(msg[1])
42
+ if prompt := st.chat_input():
43
+ st.chat_message("user").write(prompt)
44
+ response, history = model.chat(tokenizer, prompt, meta_instruction='', history=st.session_state.messages)
45
+ st.session_state.messages.append((prompt, response))
46
+ st.chat_message("assistant").write(response)
47
+
48
+ # import os
49
+ # os.system("pip uninstall -y gradio")
50
+ # os.system("pip install gradio==3.43.0")
51
+ # from lmdeploy.serve.gradio.turbomind_coupled import *
52
+ # from lmdeploy.messages import TurbomindEngineConfig
53
+ # from lmdeploy import ChatTemplateConfig
54
+
55
+ # chat_template = ChatTemplateConfig(model_name='internlm2-chat-7b', system='', eosys='', meta_instruction='')
56
+ # backend_config = TurbomindEngineConfig(model_name='internlm2-chat-7b', max_batch_size=1, cache_max_entry_count=0.05)#, model_format='awq')
57
+ # model_path = 'internlm/internlm2-math-7b'
58
+
59
+ # InterFace.async_engine = AsyncEngine(
60
+ # model_path=model_path,
61
+ # backend='turbomind',
62
+ # backend_config=backend_config,
63
+ # chat_template_config=chat_template,
64
+ # tp=1)
65
+
66
+ # async def reset_local_func(instruction_txtbox: gr.Textbox,
67
+ # state_chatbot: Sequence, session_id: int):
68
+ # """reset the session.
69
+
70
+ # Args:
71
+ # instruction_txtbox (str): user's prompt
72
+ # state_chatbot (Sequence): the chatting history
73
+ # session_id (int): the session id
74
+ # """
75
+ # state_chatbot = []
76
+ # # end the session
77
+ # with InterFace.lock:
78
+ # InterFace.global_session_id += 1
79
+ # session_id = InterFace.global_session_id
80
+ # return (state_chatbot, state_chatbot, gr.Textbox.update(value=''), session_id)
81
+
82
+ # async def cancel_local_func(state_chatbot: Sequence, cancel_btn: gr.Button,
83
+ # reset_btn: gr.Button, session_id: int):
84
+ # """stop the session.
85
+
86
+ # Args:
87
+ # instruction_txtbox (str): user's prompt
88
+ # state_chatbot (Sequence): the chatting history
89
+ # cancel_btn (gr.Button): the cancel button
90
+ # reset_btn (gr.Button): the reset button
91
+ # session_id (int): the session id
92
+ # """
93
+ # yield (state_chatbot, disable_btn, disable_btn, session_id)
94
+ # InterFace.async_engine.stop_session(session_id)
95
+ # # pytorch backend does not support resume chat history now
96
+ # if InterFace.async_engine.backend == 'pytorch':
97
+ # yield (state_chatbot, disable_btn, enable_btn, session_id)
98
+ # else:
99
+ # with InterFace.lock:
100
+ # InterFace.global_session_id += 1
101
+ # session_id = InterFace.global_session_id
102
+ # messages = []
103
+ # for qa in state_chatbot:
104
+ # messages.append(dict(role='user', content=qa[0]))
105
+ # if qa[1] is not None:
106
+ # messages.append(dict(role='assistant', content=qa[1]))
107
+ # gen_config = GenerationConfig(max_new_tokens=0)
108
+ # async for out in InterFace.async_engine.generate(messages,
109
+ # session_id,
110
+ # gen_config=gen_config,
111
+ # stream_response=True,
112
+ # sequence_start=True,
113
+ # sequence_end=False):
114
+ # pass
115
+ # yield (state_chatbot, disable_btn, enable_btn, session_id)
116
+
117
+ # with gr.Blocks(css=CSS, theme=THEME) as demo:
118
+ # state_chatbot = gr.State([])
119
+ # state_session_id = gr.State(0)
120
+
121
+ # with gr.Column(elem_id='container'):
122
+ # gr.Markdown('## LMDeploy Playground')
123
+ # gr.Markdown('[InternLM Math GitHub Page](https://github.com/InternLM/InternLM-Math)')
124
+
125
+ # chatbot = gr.Chatbot(
126
+ # elem_id='chatbot',
127
+ # label=InterFace.async_engine.engine.model_name)
128
+ # instruction_txtbox = gr.Textbox(
129
+ # placeholder='Please input the instruction',
130
+ # label='Instruction')
131
+ # with gr.Row():
132
+ # cancel_btn = gr.Button(value='Cancel', interactive=False)
133
+ # reset_btn = gr.Button(value='Reset')
134
+ # with gr.Row():
135
+ # request_output_len = gr.Slider(1,
136
+ # 1024,
137
+ # value=512,
138
+ # step=1,
139
+ # label='Maximum new tokens')
140
+ # top_p = gr.Slider(0.01, 1, value=1.0, step=0.01, label='Top_p')
141
+ # temperature = gr.Slider(0.01,
142
+ # 1.5,
143
+ # value=0.01,
144
+ # step=0.01,
145
+ # label='Temperature')
146
+
147
+ # send_event = instruction_txtbox.submit(chat_stream_local, [
148
+ # instruction_txtbox, state_chatbot, cancel_btn, reset_btn,
149
+ # state_session_id, top_p, temperature, request_output_len
150
+ # ], [state_chatbot, chatbot, cancel_btn, reset_btn])
151
+ # instruction_txtbox.submit(
152
+ # lambda: gr.Textbox.update(value=''),
153
+ # [],
154
+ # [instruction_txtbox],
155
+ # )
156
+ # cancel_btn.click(
157
+ # cancel_local_func,
158
+ # [state_chatbot, cancel_btn, reset_btn, state_session_id],
159
+ # [state_chatbot, cancel_btn, reset_btn, state_session_id],
160
+ # cancels=[send_event])
161
+
162
+ # reset_btn.click(reset_local_func,
163
+ # [instruction_txtbox, state_chatbot, state_session_id],
164
+ # [state_chatbot, chatbot, instruction_txtbox, state_session_id],
165
+ # cancels=[send_event])
166
+
167
+ # def init():
168
+ # with InterFace.lock:
169
+ # InterFace.global_session_id += 1
170
+ # new_session_id = InterFace.global_session_id
171
+ # return new_session_id
172
+
173
+ # demo.load(init, inputs=None, outputs=[state_session_id])
174
+
175
+ # # demo.queue(concurrency_count=InterFace.async_engine.instance_num,
176
+ # # max_size=100).launch()
177
+ # demo.queue(max_size=1000).launch(max_threads=InterFace.async_engine.instance_num)