Spaces:
Runtime error
Runtime error
from transformers import AutoTokenizer, AutoModelForCausalLM, GenerationConfig | |
import torch | |
import streamlit as st | |
from openxlab.model import download | |
from modelscope import snapshot_download | |
import os | |
# level = os.getenv('level') | |
level = '7' | |
with st.sidebar: | |
st.markdown('[InternLM Math GitHub Page](https://github.com/InternLM/InternLM-Math)') | |
max_length = st.slider("max_length", 0, 1024, 512, step=1) | |
# system_prompt = st.text_input("System_Prompt", "") | |
st.title("InternLM2-math-7B") | |
st.caption("🚀 Powered By Shanghai Ai Lab") | |
# 定义模型路径 | |
## ModelScope | |
# model_id = 'Shanghai_AI_Laboratory/internlm2-chat-'+ str(level) +'b' | |
# mode_name_or_path = snapshot_download(model_id, revision='master') | |
mode_name_or_path = "internlm/internlm2-math-7b" | |
# OpenXLab | |
# model_repo = "OpenLMLab/internlm2-chat-7b" | |
# mode_name_or_path = download(model_repo=model_repo) | |
def get_model(): | |
tokenizer = AutoTokenizer.from_pretrained(mode_name_or_path, trust_remote_code=True) | |
model = AutoModelForCausalLM.from_pretrained(mode_name_or_path, trust_remote_code=True, torch_dtype=torch.bfloat16).cuda() | |
model.eval() | |
return tokenizer, model | |
tokenizer, model = get_model() | |
if "messages" not in st.session_state: | |
st.session_state["messages"] = [] | |
for msg in st.session_state.messages: | |
st.chat_message("user").write(msg[0]) | |
st.chat_message("assistant").write(msg[1]) | |
if prompt := st.chat_input(): | |
st.chat_message("user").write(prompt) | |
response, history = model.chat(tokenizer, prompt, meta_instruction='', history=st.session_state.messages) | |
st.session_state.messages.append((prompt, response)) | |
st.chat_message("assistant").write(response) | |
# import os | |
# os.system("pip uninstall -y gradio") | |
# os.system("pip install gradio==3.43.0") | |
# from lmdeploy.serve.gradio.turbomind_coupled import * | |
# from lmdeploy.messages import TurbomindEngineConfig | |
# from lmdeploy import ChatTemplateConfig | |
# chat_template = ChatTemplateConfig(model_name='internlm2-chat-7b', system='', eosys='', meta_instruction='') | |
# backend_config = TurbomindEngineConfig(model_name='internlm2-chat-7b', max_batch_size=1, cache_max_entry_count=0.05)#, model_format='awq') | |
# model_path = 'internlm/internlm2-math-7b' | |
# InterFace.async_engine = AsyncEngine( | |
# model_path=model_path, | |
# backend='turbomind', | |
# backend_config=backend_config, | |
# chat_template_config=chat_template, | |
# tp=1) | |
# async def reset_local_func(instruction_txtbox: gr.Textbox, | |
# state_chatbot: Sequence, session_id: int): | |
# """reset the session. | |
# Args: | |
# instruction_txtbox (str): user's prompt | |
# state_chatbot (Sequence): the chatting history | |
# session_id (int): the session id | |
# """ | |
# state_chatbot = [] | |
# # end the session | |
# with InterFace.lock: | |
# InterFace.global_session_id += 1 | |
# session_id = InterFace.global_session_id | |
# return (state_chatbot, state_chatbot, gr.Textbox.update(value=''), session_id) | |
# async def cancel_local_func(state_chatbot: Sequence, cancel_btn: gr.Button, | |
# reset_btn: gr.Button, session_id: int): | |
# """stop the session. | |
# Args: | |
# instruction_txtbox (str): user's prompt | |
# state_chatbot (Sequence): the chatting history | |
# cancel_btn (gr.Button): the cancel button | |
# reset_btn (gr.Button): the reset button | |
# session_id (int): the session id | |
# """ | |
# yield (state_chatbot, disable_btn, disable_btn, session_id) | |
# InterFace.async_engine.stop_session(session_id) | |
# # pytorch backend does not support resume chat history now | |
# if InterFace.async_engine.backend == 'pytorch': | |
# yield (state_chatbot, disable_btn, enable_btn, session_id) | |
# else: | |
# with InterFace.lock: | |
# InterFace.global_session_id += 1 | |
# session_id = InterFace.global_session_id | |
# messages = [] | |
# for qa in state_chatbot: | |
# messages.append(dict(role='user', content=qa[0])) | |
# if qa[1] is not None: | |
# messages.append(dict(role='assistant', content=qa[1])) | |
# gen_config = GenerationConfig(max_new_tokens=0) | |
# async for out in InterFace.async_engine.generate(messages, | |
# session_id, | |
# gen_config=gen_config, | |
# stream_response=True, | |
# sequence_start=True, | |
# sequence_end=False): | |
# pass | |
# yield (state_chatbot, disable_btn, enable_btn, session_id) | |
# with gr.Blocks(css=CSS, theme=THEME) as demo: | |
# state_chatbot = gr.State([]) | |
# state_session_id = gr.State(0) | |
# with gr.Column(elem_id='container'): | |
# gr.Markdown('## LMDeploy Playground') | |
# gr.Markdown('[InternLM Math GitHub Page](https://github.com/InternLM/InternLM-Math)') | |
# chatbot = gr.Chatbot( | |
# elem_id='chatbot', | |
# label=InterFace.async_engine.engine.model_name) | |
# instruction_txtbox = gr.Textbox( | |
# placeholder='Please input the instruction', | |
# label='Instruction') | |
# with gr.Row(): | |
# cancel_btn = gr.Button(value='Cancel', interactive=False) | |
# reset_btn = gr.Button(value='Reset') | |
# with gr.Row(): | |
# request_output_len = gr.Slider(1, | |
# 1024, | |
# value=512, | |
# step=1, | |
# label='Maximum new tokens') | |
# top_p = gr.Slider(0.01, 1, value=1.0, step=0.01, label='Top_p') | |
# temperature = gr.Slider(0.01, | |
# 1.5, | |
# value=0.01, | |
# step=0.01, | |
# label='Temperature') | |
# send_event = instruction_txtbox.submit(chat_stream_local, [ | |
# instruction_txtbox, state_chatbot, cancel_btn, reset_btn, | |
# state_session_id, top_p, temperature, request_output_len | |
# ], [state_chatbot, chatbot, cancel_btn, reset_btn]) | |
# instruction_txtbox.submit( | |
# lambda: gr.Textbox.update(value=''), | |
# [], | |
# [instruction_txtbox], | |
# ) | |
# cancel_btn.click( | |
# cancel_local_func, | |
# [state_chatbot, cancel_btn, reset_btn, state_session_id], | |
# [state_chatbot, cancel_btn, reset_btn, state_session_id], | |
# cancels=[send_event]) | |
# reset_btn.click(reset_local_func, | |
# [instruction_txtbox, state_chatbot, state_session_id], | |
# [state_chatbot, chatbot, instruction_txtbox, state_session_id], | |
# cancels=[send_event]) | |
# def init(): | |
# with InterFace.lock: | |
# InterFace.global_session_id += 1 | |
# new_session_id = InterFace.global_session_id | |
# return new_session_id | |
# demo.load(init, inputs=None, outputs=[state_session_id]) | |
# # demo.queue(concurrency_count=InterFace.async_engine.instance_num, | |
# # max_size=100).launch() | |
# demo.queue(max_size=1000).launch(max_threads=InterFace.async_engine.instance_num) |