Spaces:

xzyun2011
/

wulewule

Running

App Files Files Community

wulewule / app.py

xzyun2011

solve env conflict

d05a9e7 6 months ago

raw

history blame contribute delete

9.25 kB

	import hydra
	from hydra.core.global_hydra import GlobalHydra
	from omegaconf import DictConfig, OmegaConf
	import streamlit as st
	from PIL import Image
	import os
	import sys
	sys.path.append(os.path.dirname(__file__))
	from download_models import download_model


	@st.cache_resource
	def load_simple_rag(config, used_lmdeploy=False):
	## load config
	data_source_dir = config["data_source_dir"]
	db_persist_directory = config["db_persist_directory"]
	llm_model = config["llm_model"]
	embeddings_model = config["embeddings_model"]
	reranker_model = config["reranker_model"]
	llm_system_prompt = config["llm_system_prompt"]
	rag_prompt_template = config["rag_prompt_template"]
	from rag.simple_rag import WuleRAG

	if not used_lmdeploy:
	from rag.simple_rag import InternLM, WuleRAG
	base_mode = InternLM(model_path=llm_model, llm_system_prompt=llm_system_prompt)
	else:
	from deploy.lmdeploy_model import LmdeployLM, GenerationConfig
	cache_max_entry_count = config.get("cache_max_entry_count", 0.2)
	base_mode = LmdeployLM(model_path=llm_model, llm_system_prompt=llm_system_prompt, cache_max_entry_count=cache_max_entry_count)

	## loda final rag model
	wulewule_rag = WuleRAG(data_source_dir, db_persist_directory, base_mode, embeddings_model, reranker_model, rag_prompt_template)
	return wulewule_rag


	@st.cache_resource
	def load_wulewule_agent(config):
	from agent.wulewule_agent import MultiModalAssistant, Settings
	use_remote = config["use_remote"]
	SiliconFlow_api = config["SiliconFlow_api"]
	data_source_dir = config["data_source_dir"]
	if len(SiliconFlow_api)<51 and os.environ.get('SiliconFlow_api', ""):
	SiliconFlow_api = os.environ.get('SiliconFlow_api')

	print(f"======= loading llm =======")
	if use_remote:
	from llama_index.llms.siliconflow import SiliconFlow
	from llama_index.embeddings.siliconflow import SiliconFlowEmbedding
	api_base_url = "https://api.siliconflow.cn/v1/chat/completions"
	# model = "Qwen/Qwen2.5-72B-Instruct"
	# model = "deepseek-ai/DeepSeek-V2.5"
	remote_llm = config["remote_llm"]
	remote_embeddings_model = config["remote_embeddings_model"]
	llm = SiliconFlow( model=remote_llm, base_url=api_base_url, api_key=SiliconFlow_api, max_tokens=4096)
	embed_model = SiliconFlowEmbedding( model=remote_embeddings_model, api_key=SiliconFlow_api)
	else:
	from llama_index.embeddings.huggingface import HuggingFaceEmbedding
	from llama_index.llms.huggingface import HuggingFaceLLM
	local_llm = config["llm_model"]
	local_embeddings_model = config["agent_embeddings_model"]
	llm = HuggingFaceLLM(
	model_name=local_llm,
	tokenizer_name=local_llm,
	model_kwargs={"trust_remote_code":True},
	tokenizer_kwargs={"trust_remote_code":True},
	# context_window=4096,
	# max_new_tokens=4096,
	)
	embed_model = HuggingFaceEmbedding(
	model_name=local_embeddings_model
	)
	# settings
	Settings.llm = llm
	Settings.embed_model = embed_model
	wulewule_assistant = MultiModalAssistant(data_source_dir, llm, SiliconFlow_api)
	print(f"======= finished loading ! =======")
	return wulewule_assistant


	GlobalHydra.instance().clear()
	@hydra.main(version_base=None, config_path="./configs", config_name="model_cfg")
	def main(cfg):
	# omegaconf.dictcfg.DictConfig 转换为普通字典
	config_dict = OmegaConf.to_container(cfg, resolve=True)

	## download model from modelscope
	if not config_dict["use_remote"] and not os.path.exists(config_dict["llm_model"]):
	download_model(llm_model_path =config_dict["llm_model"])

	## agent mode, used llama-index, rturn off lmdeloy and chroma rag
	if cfg.agent_mode:
	## load wulewule agent
	wulewule_assistant = load_wulewule_agent(config_dict)
	cfg.use_rag = False
	cfg.use_lmdepoly = False

	if cfg.use_rag:
	## load rag model
	wulewule_model = load_simple_rag(config_dict, used_lmdeploy=cfg.use_lmdepoly)
	elif ( cfg.use_lmdepoly):
	## load lmdeploy model
	from deploy.lmdeploy_model import load_turbomind_model, GenerationConfig
	wulewule_model = load_turbomind_model(config_dict["llm_model"], config_dict["llm_system_prompt"], config_dict["cache_max_entry_count"])

	## streamlit setting
	if "messages" not in st.session_state:
	st.session_state["messages"] = []
	# 在侧边栏中创建一个标题和一个链接
	with st.sidebar:
	st.markdown("## 悟了悟了💡")
	logo_path = "assets/sd_wulewule.webp"
	if os.path.exists(logo_path):
	image = Image.open(logo_path)
	st.image(image, caption='wulewule')
	"[InternLM](https://github.com/InternLM)"
	"[悟了悟了](https://github.com/xzyun2011/wulewule.git)"


	# 创建一个标题
	st.title("悟了悟了：黑神话悟空AI助手🐒")

	# 遍历session_state中的所有消息，并显示在聊天界面上
	for msg in st.session_state.messages:
	st.chat_message("user").write(msg["user"])
	assistant_res = msg["assistant"]
	if isinstance(assistant_res, str):
	st.chat_message("assistant").write(assistant_res)
	elif cfg.agent_mode and isinstance(assistant_res, dict):
	image_url = assistant_res["image_url"]
	audio_text = assistant_res["audio_text"]
	st.chat_message("assistant").write(assistant_res["response"])
	if image_url:
	# 使用st.image展示URL图像，并设置使用列宽
	st.image( image_url, width=256 )
	if audio_text:
	# 使用st.audio函数播放音频
	st.audio("audio.mp3")
	st.write(f"语音内容为: \n\n{audio_text}")

	# Get user input
	if prompt := st.chat_input("请输入你的问题，换行使用Shfit+Enter。"):
	# Display user input
	st.chat_message("user").write(prompt)
	## 初始化完整的回答字符串
	full_answer = ""
	if cfg.agent_mode:
	with st.chat_message('robot'):
	message_placeholder = st.empty()
	response_dict = wulewule_assistant.chat(prompt)
	image_url = response_dict["image_url"]
	audio_text = response_dict["audio_text"]
	for cur_response in response_dict["response"]:
	full_answer += cur_response
	# Display robot response in chat message container
	message_placeholder.markdown(full_answer + '▌')
	message_placeholder.markdown(full_answer)
	# 将问答结果添加到 session_state 的消息历史中
	st.session_state.messages.append({"user": prompt, "assistant": response_dict})
	if image_url:
	# 使用st.image展示URL图像，并设置使用列宽
	st.image( image_url, width=256 )

	if audio_text:
	# 使用st.audio函数播放音频
	st.audio("audio.mp3")
	st.write(f"语音内容为: \n\n{audio_text}")

	# 流式显示, used streaming result
	else:
	if cfg.stream_response:
	# rag
	with st.chat_message('robot'):
	message_placeholder = st.empty()
	if cfg.use_rag:
	for cur_response in wulewule_model.query_stream(prompt):
	full_answer += cur_response
	# Display robot response in chat message container
	message_placeholder.markdown(full_answer + '▌')
	elif cfg.use_lmdepoly:
	# gen_config = GenerationConfig(top_p=0.8,
	# top_k=40,
	# temperature=0.8,
	# max_new_tokens=2048,
	# repetition_penalty=1.05)
	messages = [{'role': 'user', 'content': f'{prompt}'}]
	for response in wulewule_model.stream_infer(messages):
	full_answer += response.text
	# Display robot response in chat message container
	message_placeholder.markdown(full_answer + '▌')

	message_placeholder.markdown(full_answer)
	# 一次性显示结果
	else:
	if cfg.use_lmdepoly:
	messages = [{'role': 'user', 'content': f'{prompt}'}]
	full_answer = wulewule_model(messages).text
	elif cfg.use_rag:
	full_answer = wulewule_model.query(prompt)
	# 显示回答
	st.chat_message("assistant").write(full_answer)

	# 将问答结果添加到 session_state 的消息历史中
	st.session_state.messages.append({"user": prompt, "assistant": full_answer})


	if __name__ == "__main__":
	main()