File size: 5,158 Bytes
d573b56
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
import hydra
from hydra.core.global_hydra import GlobalHydra
from omegaconf import DictConfig, OmegaConf
import streamlit as st
from PIL import Image
import os
import sys
sys.path.append(os.path.dirname(__file__))
import torch
from download_models import download_model


@st.cache_resource
def load_simple_rag(config, used_lmdeploy=False):
    ## load config
    data_source_dir = config["data_source_dir"]
    db_persist_directory = config["db_persist_directory"]
    llm_model = config["llm_model"]
    embeddings_model = config["embeddings_model"]
    reranker_model = config["reranker_model"]
    llm_system_prompt = config["llm_system_prompt"]
    rag_prompt_template = config["rag_prompt_template"]
    from rag.simple_rag import  WuleRAG

    if not used_lmdeploy:
        from rag.simple_rag import InternLM, WuleRAG
        base_mode = InternLM(model_path=llm_model, llm_system_prompt=llm_system_prompt)
    else:
        from deploy.lmdeploy_model import LmdeployLM, GenerationConfig
        cache_max_entry_count = config.get("cache_max_entry_count", 0.2)
        base_mode = LmdeployLM(model_path=llm_model, llm_system_prompt=llm_system_prompt, cache_max_entry_count=cache_max_entry_count)
    
    ## loda final rag model
    wulewule_rag = WuleRAG(data_source_dir, db_persist_directory, base_mode, embeddings_model, reranker_model, rag_prompt_template)
    return wulewule_rag

GlobalHydra.instance().clear()
@hydra.main(version_base=None, config_path="./configs", config_name="model_cfg")
def main(cfg):
    # omegaconf.dictcfg.DictConfig 转换为普通字典
    config_dict = OmegaConf.to_container(cfg, resolve=True)

    ## download model from modelscope
    if not os.path.exists(config_dict["llm_model"]):
        download_model(llm_model_path =config_dict["llm_model"])

    if cfg.use_rag:
        ## load rag model
        wulewule_model = load_simple_rag(config_dict, used_lmdeploy=cfg.use_lmdepoly)
    elif ( cfg.use_lmdepoly):
        ## load lmdeploy model
        from deploy.lmdeploy_model import load_turbomind_model, GenerationConfig
        wulewule_model = load_turbomind_model(config_dict["llm_model"], config_dict["llm_system_prompt"], config_dict["cache_max_entry_count"])

    ## streamlit setting
    if "messages" not in st.session_state:
        st.session_state["messages"] = []
    # 在侧边栏中创建一个标题和一个链接
    with st.sidebar:
        st.markdown("## 悟了悟了💡")
        logo_path = "assets/sd_wulewule.webp"
        if os.path.exists(logo_path):
            image = Image.open(logo_path)
            st.image(image, caption='wulewule')
        "[InternLM](https://github.com/InternLM)"
        "[悟了悟了](https://github.com/xzyun2011/wulewule.git)"


    # 创建一个标题
    st.title("悟了悟了:黑神话悟空AI助手🐒")

    # 遍历session_state中的所有消息,并显示在聊天界面上
    for msg in st.session_state.messages:
        st.chat_message("user").write(msg["user"])
        st.chat_message("assistant").write(msg["assistant"])

    # Get user input
    if prompt := st.chat_input("请输入你的问题,换行使用Shfit+Enter。"):
        # Display user input
        st.chat_message("user").write(prompt)
        # 流式显示, used streaming result
        if cfg.stream_response:
            # rag
            ## 初始化完整的回答字符串
            full_answer = ""
            with st.chat_message('robot'):
                message_placeholder = st.empty()
                if cfg.use_rag:
                    for cur_response in wulewule_model.query_stream(prompt):
                        full_answer += cur_response
                        # Display robot response in chat message container
                        message_placeholder.markdown(full_answer + '▌')
                elif cfg.use_lmdepoly:
                    # gen_config = GenerationConfig(top_p=0.8,
                    #             top_k=40,
                    #             temperature=0.8,
                    #             max_new_tokens=2048,
                    #             repetition_penalty=1.05)
                    messages = [{'role': 'user', 'content': f'{prompt}'}]
                    for response in wulewule_model.stream_infer(messages):
                        full_answer += response.text
                        # Display robot response in chat message container
                        message_placeholder.markdown(full_answer + '▌')

                message_placeholder.markdown(full_answer)
        # 一次性显示结果
        else:
            if cfg.use_lmdepoly:
                    messages = [{'role': 'user', 'content': f'{prompt}'}]
                    full_answer = wulewule_model(messages).text
            elif cfg.use_rag:         
                full_answer = wulewule_model.query(prompt)
            # 显示回答
            st.chat_message("assistant").write(full_answer)

        # 将问答结果添加到 session_state 的消息历史中
        st.session_state.messages.append({"user": prompt, "assistant": full_answer})
        torch.cuda.empty_cache()


if __name__ == "__main__":
    main()