Rebase v3.0
Browse files- .gitignore +0 -1
- Dockerfile+ChatGLM +50 -0
- config.py +4 -1
- crazy_functional.py +6 -1
- crazy_functions/crazy_utils.py +6 -2
- crazy_functions/代码重写为全英文_多线程.py +1 -1
- crazy_functions/解析项目源代码.py +1 -1
- crazy_functions/询问多个大语言模型.py +28 -0
- main.py +5 -2
- request_llm/README.md +32 -14
- request_llm/bridge_all.py +135 -0
- request_llm/bridge_chatglm.py +83 -0
- request_llm/bridge_tgui.py +33 -29
- request_llm/requirements_chatglm.txt +6 -0
- requirements.txt +1 -1
- toolbox.py +3 -62
- version +2 -2
.gitignore
CHANGED
@@ -55,7 +55,6 @@ coverage.xml
|
|
55 |
*.pot
|
56 |
github
|
57 |
.github
|
58 |
-
.idea/
|
59 |
TEMP
|
60 |
TRASH
|
61 |
|
|
|
55 |
*.pot
|
56 |
github
|
57 |
.github
|
|
|
58 |
TEMP
|
59 |
TRASH
|
60 |
|
Dockerfile+ChatGLM
ADDED
@@ -0,0 +1,50 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# How to build | 如何构建: docker build -t gpt-academic --network=host -f Dockerfile+ChatGLM .
|
2 |
+
# How to run | 如何运行 (1) 直接运行: docker run --rm -it --net=host --gpus=all gpt-academic
|
3 |
+
# How to run | 如何运行 (2) 我想运行之前进容器做一些调整: docker run --rm -it --net=host --gpus=all gpt-academic bash
|
4 |
+
|
5 |
+
# 从NVIDIA源,从而支持显卡运损(检查宿主的nvidia-smi中的cuda版本必须>=11.3)
|
6 |
+
FROM nvidia/cuda:11.3.1-runtime-ubuntu20.04
|
7 |
+
ARG useProxyNetwork=''
|
8 |
+
RUN apt-get update
|
9 |
+
RUN apt-get install -y curl proxychains curl
|
10 |
+
RUN apt-get install -y git python python3 python-dev python3-dev --fix-missing
|
11 |
+
|
12 |
+
# 配置代理网络(构建Docker镜像时使用)
|
13 |
+
# # comment out below if you do not need proxy network | 如果不需要翻墙 - 从此行向下删除
|
14 |
+
RUN $useProxyNetwork curl cip.cc
|
15 |
+
RUN sed -i '$ d' /etc/proxychains.conf
|
16 |
+
RUN sed -i '$ d' /etc/proxychains.conf
|
17 |
+
RUN echo "socks5 127.0.0.1 10880" >> /etc/proxychains.conf
|
18 |
+
ARG useProxyNetwork=proxychains
|
19 |
+
# # comment out above if you do not need proxy network | 如果不需要翻墙 - 从此行向上删除
|
20 |
+
|
21 |
+
|
22 |
+
# use python3 as the system default python
|
23 |
+
RUN curl -sS https://bootstrap.pypa.io/get-pip.py | python3.8
|
24 |
+
|
25 |
+
# 下载分支
|
26 |
+
WORKDIR /gpt
|
27 |
+
RUN $useProxyNetwork git clone https://github.com/binary-husky/chatgpt_academic.git -b v3.0
|
28 |
+
WORKDIR /gpt/chatgpt_academic
|
29 |
+
RUN $useProxyNetwork python3 -m pip install -r requirements.txt
|
30 |
+
RUN $useProxyNetwork python3 -m pip install -r request_llm/requirements_chatglm.txt
|
31 |
+
RUN $useProxyNetwork python3 -m pip install torch --extra-index-url https://download.pytorch.org/whl/cu113
|
32 |
+
|
33 |
+
# 预热CHATGLM参数(非必要 可选步骤)
|
34 |
+
RUN echo ' \n\
|
35 |
+
from transformers import AutoModel, AutoTokenizer \n\
|
36 |
+
chatglm_tokenizer = AutoTokenizer.from_pretrained("THUDM/chatglm-6b", trust_remote_code=True) \n\
|
37 |
+
chatglm_model = AutoModel.from_pretrained("THUDM/chatglm-6b", trust_remote_code=True).float() ' >> warm_up_chatglm.py
|
38 |
+
RUN python3 -u warm_up_chatglm.py
|
39 |
+
RUN $useProxyNetwork git pull
|
40 |
+
|
41 |
+
# 为chatgpt-academic配置代理和API-KEY (非必要 可选步骤)
|
42 |
+
RUN echo ' \n\
|
43 |
+
API_KEY = "sk-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx" \n\
|
44 |
+
USE_PROXY = True \n\
|
45 |
+
LLM_MODEL = "chatglm" \n\
|
46 |
+
LOCAL_MODEL_DEVICE = "cuda" \n\
|
47 |
+
proxies = { "http": "socks5h://localhost:10880", "https": "socks5h://localhost:10880", } ' >> config_private.py
|
48 |
+
|
49 |
+
# 启动
|
50 |
+
CMD ["python3", "-u", "main.py"]
|
config.py
CHANGED
@@ -45,7 +45,10 @@ WEB_PORT = -1
|
|
45 |
MAX_RETRY = 2
|
46 |
|
47 |
# OpenAI模型选择是(gpt4现在只对申请成功的人开放)
|
48 |
-
LLM_MODEL = "gpt-3.5-turbo"
|
|
|
|
|
|
|
49 |
|
50 |
# OpenAI的API_URL
|
51 |
API_URL = "https://api.openai.com/v1/chat/completions"
|
|
|
45 |
MAX_RETRY = 2
|
46 |
|
47 |
# OpenAI模型选择是(gpt4现在只对申请成功的人开放)
|
48 |
+
LLM_MODEL = "gpt-3.5-turbo" # 可选 "chatglm", "tgui:anymodel@localhost:7865"
|
49 |
+
|
50 |
+
# 本地LLM模型如ChatGLM的执行方式 CPU/GPU
|
51 |
+
LOCAL_MODEL_DEVICE = "cpu" # 可选 "cuda"
|
52 |
|
53 |
# OpenAI的API_URL
|
54 |
API_URL = "https://api.openai.com/v1/chat/completions"
|
crazy_functional.py
CHANGED
@@ -16,15 +16,20 @@ def get_crazy_functions():
|
|
16 |
from crazy_functions.高级功能函数模板 import 高阶功能模板函数
|
17 |
from crazy_functions.代码重写为全英文_多线程 import 全项目切换英文
|
18 |
from crazy_functions.Latex全文润色 import Latex英文润色
|
|
|
19 |
from crazy_functions.解析项目源代码 import 解析一个Lua项目
|
20 |
function_plugins = {
|
21 |
-
|
|
|
|
|
|
|
22 |
"解析整个Python项目": {
|
23 |
"Color": "stop", # 按钮颜色
|
24 |
"Function": HotReload(解析一个Python项目)
|
25 |
},
|
26 |
"解析整个C++项目头文件": {
|
27 |
"Color": "stop", # 按钮颜色
|
|
|
28 |
"Function": HotReload(解析一个C项目的头文件)
|
29 |
},
|
30 |
"解析整个C++项目(.cpp/.hpp/.c/.h)": {
|
|
|
16 |
from crazy_functions.高级功能函数模板 import 高阶功能模板函数
|
17 |
from crazy_functions.代码重写为全英文_多线程 import 全项目切换英文
|
18 |
from crazy_functions.Latex全文润色 import Latex英文润色
|
19 |
+
from crazy_functions.询问多个大语言模型 import 同时问询
|
20 |
from crazy_functions.解析项目源代码 import 解析一个Lua项目
|
21 |
function_plugins = {
|
22 |
+
"询问多个GPT模型": {
|
23 |
+
"Color": "stop", # 按钮颜色
|
24 |
+
"Function": HotReload(同时问询)
|
25 |
+
},
|
26 |
"解析整个Python项目": {
|
27 |
"Color": "stop", # 按钮颜色
|
28 |
"Function": HotReload(解析一个Python项目)
|
29 |
},
|
30 |
"解析整个C++项目头文件": {
|
31 |
"Color": "stop", # 按钮颜色
|
32 |
+
"AsButton": False, # 加入下拉菜单中
|
33 |
"Function": HotReload(解析一个C项目的头文件)
|
34 |
},
|
35 |
"解析整个C++项目(.cpp/.hpp/.c/.h)": {
|
crazy_functions/crazy_utils.py
CHANGED
@@ -61,7 +61,7 @@ def request_gpt_model_in_new_thread_with_ui_alive(
|
|
61 |
"""
|
62 |
import time
|
63 |
from concurrent.futures import ThreadPoolExecutor
|
64 |
-
from request_llm.
|
65 |
# 用户反馈
|
66 |
chatbot.append([inputs_show_user, ""])
|
67 |
yield from update_ui(chatbot=chatbot, history=[]) # 刷新界面
|
@@ -167,13 +167,17 @@ def request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency(
|
|
167 |
"""
|
168 |
import time, random
|
169 |
from concurrent.futures import ThreadPoolExecutor
|
170 |
-
from request_llm.
|
171 |
assert len(inputs_array) == len(history_array)
|
172 |
assert len(inputs_array) == len(sys_prompt_array)
|
173 |
if max_workers == -1: # 读取配置文件
|
174 |
try: max_workers, = get_conf('DEFAULT_WORKER_NUM')
|
175 |
except: max_workers = 8
|
176 |
if max_workers <= 0 or max_workers >= 20: max_workers = 8
|
|
|
|
|
|
|
|
|
177 |
executor = ThreadPoolExecutor(max_workers=max_workers)
|
178 |
n_frag = len(inputs_array)
|
179 |
# 用户反馈
|
|
|
61 |
"""
|
62 |
import time
|
63 |
from concurrent.futures import ThreadPoolExecutor
|
64 |
+
from request_llm.bridge_all import predict_no_ui_long_connection
|
65 |
# 用户反馈
|
66 |
chatbot.append([inputs_show_user, ""])
|
67 |
yield from update_ui(chatbot=chatbot, history=[]) # 刷新界面
|
|
|
167 |
"""
|
168 |
import time, random
|
169 |
from concurrent.futures import ThreadPoolExecutor
|
170 |
+
from request_llm.bridge_all import predict_no_ui_long_connection
|
171 |
assert len(inputs_array) == len(history_array)
|
172 |
assert len(inputs_array) == len(sys_prompt_array)
|
173 |
if max_workers == -1: # 读取配置文件
|
174 |
try: max_workers, = get_conf('DEFAULT_WORKER_NUM')
|
175 |
except: max_workers = 8
|
176 |
if max_workers <= 0 or max_workers >= 20: max_workers = 8
|
177 |
+
# 屏蔽掉 chatglm的多线程,可能会导致严重卡顿
|
178 |
+
if not llm_kwargs['llm_model'].startswith('gpt-'):
|
179 |
+
max_workers = 1
|
180 |
+
|
181 |
executor = ThreadPoolExecutor(max_workers=max_workers)
|
182 |
n_frag = len(inputs_array)
|
183 |
# 用户反馈
|
crazy_functions/代码重写为全英文_多线程.py
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
import threading
|
2 |
-
from request_llm.
|
3 |
from toolbox import update_ui
|
4 |
from toolbox import CatchException, write_results_to_file, report_execption
|
5 |
from .crazy_utils import breakdown_txt_to_satisfy_token_limit
|
|
|
1 |
import threading
|
2 |
+
from request_llm.bridge_all import predict_no_ui_long_connection
|
3 |
from toolbox import update_ui
|
4 |
from toolbox import CatchException, write_results_to_file, report_execption
|
5 |
from .crazy_utils import breakdown_txt_to_satisfy_token_limit
|
crazy_functions/解析项目源代码.py
CHANGED
@@ -12,7 +12,7 @@ def 解析源代码新(file_manifest, project_folder, llm_kwargs, plugin_kwargs,
|
|
12 |
sys_prompt_array = []
|
13 |
report_part_1 = []
|
14 |
|
15 |
-
assert len(file_manifest) <=
|
16 |
############################## <第一步,逐个文件分析,多线程> ##################################
|
17 |
for index, fp in enumerate(file_manifest):
|
18 |
# 读取文件
|
|
|
12 |
sys_prompt_array = []
|
13 |
report_part_1 = []
|
14 |
|
15 |
+
assert len(file_manifest) <= 512, "源文件太多(超过512个), 请缩减输入文件的数量。或者,您也可以选择删除此行警告,并修改代码拆分file_manifest列表,从而实现分批次处理。"
|
16 |
############################## <第一步,逐个文件分析,多线程> ##################################
|
17 |
for index, fp in enumerate(file_manifest):
|
18 |
# 读取文件
|
crazy_functions/询问多个大语言模型.py
ADDED
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from toolbox import CatchException, update_ui
|
2 |
+
from .crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
|
3 |
+
import datetime
|
4 |
+
@CatchException
|
5 |
+
def 同时问询(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
|
6 |
+
"""
|
7 |
+
txt 输入栏用户输入的文本,例如需要翻译的一段话,再例如一个包含了待处理文件的路径
|
8 |
+
llm_kwargs gpt模型参数,如温度和top_p等,一般原样传递下去就行
|
9 |
+
plugin_kwargs 插件模型的参数,如温度和top_p等,一般原样传递下去就行
|
10 |
+
chatbot 聊天显示框的句柄,用于显示给用户
|
11 |
+
history 聊天历史,前情提要
|
12 |
+
system_prompt 给gpt的静默提醒
|
13 |
+
web_port 当前软件运行的端口号
|
14 |
+
"""
|
15 |
+
history = [] # 清空历史,以免输入溢出
|
16 |
+
chatbot.append((txt, "正在同时咨询ChatGPT和ChatGLM……"))
|
17 |
+
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 由于请求gpt需要一段时间,我们先及时地做一次界面更新
|
18 |
+
|
19 |
+
llm_kwargs['llm_model'] = 'chatglm&gpt-3.5-turbo'
|
20 |
+
gpt_say = yield from request_gpt_model_in_new_thread_with_ui_alive(
|
21 |
+
inputs=txt, inputs_show_user=txt,
|
22 |
+
llm_kwargs=llm_kwargs, chatbot=chatbot, history=history,
|
23 |
+
sys_prompt=system_prompt
|
24 |
+
)
|
25 |
+
|
26 |
+
history.append(txt)
|
27 |
+
history.append(gpt_say)
|
28 |
+
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 界面更新
|
main.py
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
import os; os.environ['no_proxy'] = '*' # 避免代理网络产生意外污染
|
2 |
import gradio as gr
|
3 |
-
from request_llm.
|
4 |
from toolbox import format_io, find_free_port, on_file_uploaded, on_report_generated, get_conf, ArgsGeneralWrapper, DummyWith
|
5 |
|
6 |
# 建议您复制一个config_private.py放自己的秘密, 如API和代理网址, 避免不小心传github被别人看到
|
@@ -97,7 +97,10 @@ with gr.Blocks(title="ChatGPT 学术优化", theme=set_theme, analytics_enabled=
|
|
97 |
system_prompt = gr.Textbox(show_label=True, placeholder=f"System Prompt", label="System prompt", value=initial_prompt)
|
98 |
top_p = gr.Slider(minimum=-0, maximum=1.0, value=1.0, step=0.01,interactive=True, label="Top-p (nucleus sampling)",)
|
99 |
temperature = gr.Slider(minimum=-0, maximum=2.0, value=1.0, step=0.01, interactive=True, label="Temperature",)
|
|
|
100 |
checkboxes = gr.CheckboxGroup(["基础功能区", "函数插件区", "底部输入区"], value=["基础功能区", "函数插件区"], label="显示/隐藏功能区")
|
|
|
|
|
101 |
gr.Markdown(description)
|
102 |
with gr.Accordion("备选输入区", open=True, visible=False) as area_input_secondary:
|
103 |
with gr.Row():
|
@@ -118,7 +121,7 @@ with gr.Blocks(title="ChatGPT 学术优化", theme=set_theme, analytics_enabled=
|
|
118 |
return ret
|
119 |
checkboxes.select(fn_area_visibility, [checkboxes], [area_basic_fn, area_crazy_fn, area_input_primary, area_input_secondary, txt, txt2] )
|
120 |
# 整理反复出现的控件句柄组合
|
121 |
-
input_combo = [cookies, txt, txt2, top_p, temperature, chatbot, history, system_prompt]
|
122 |
output_combo = [cookies, chatbot, history, status]
|
123 |
predict_args = dict(fn=ArgsGeneralWrapper(predict), inputs=input_combo, outputs=output_combo)
|
124 |
# 提交按钮、重置按钮
|
|
|
1 |
import os; os.environ['no_proxy'] = '*' # 避免代理网络产生意外污染
|
2 |
import gradio as gr
|
3 |
+
from request_llm.bridge_all import predict
|
4 |
from toolbox import format_io, find_free_port, on_file_uploaded, on_report_generated, get_conf, ArgsGeneralWrapper, DummyWith
|
5 |
|
6 |
# 建议您复制一个config_private.py放自己的秘密, 如API和代理网址, 避免不小心传github被别人看到
|
|
|
97 |
system_prompt = gr.Textbox(show_label=True, placeholder=f"System Prompt", label="System prompt", value=initial_prompt)
|
98 |
top_p = gr.Slider(minimum=-0, maximum=1.0, value=1.0, step=0.01,interactive=True, label="Top-p (nucleus sampling)",)
|
99 |
temperature = gr.Slider(minimum=-0, maximum=2.0, value=1.0, step=0.01, interactive=True, label="Temperature",)
|
100 |
+
max_length_sl = gr.Slider(minimum=256, maximum=4096, value=512, step=1, interactive=True, label="MaxLength",)
|
101 |
checkboxes = gr.CheckboxGroup(["基础功能区", "函数插件区", "底部输入区"], value=["基础功能区", "函数插件区"], label="显示/隐藏功能区")
|
102 |
+
md_dropdown = gr.Dropdown(["gpt-3.5-turbo", "chatglm"], value=LLM_MODEL, label="").style(container=False)
|
103 |
+
|
104 |
gr.Markdown(description)
|
105 |
with gr.Accordion("备选输入区", open=True, visible=False) as area_input_secondary:
|
106 |
with gr.Row():
|
|
|
121 |
return ret
|
122 |
checkboxes.select(fn_area_visibility, [checkboxes], [area_basic_fn, area_crazy_fn, area_input_primary, area_input_secondary, txt, txt2] )
|
123 |
# 整理反复出现的控件句柄组合
|
124 |
+
input_combo = [cookies, max_length_sl, md_dropdown, txt, txt2, top_p, temperature, chatbot, history, system_prompt]
|
125 |
output_combo = [cookies, chatbot, history, status]
|
126 |
predict_args = dict(fn=ArgsGeneralWrapper(predict), inputs=input_combo, outputs=output_combo)
|
127 |
# 提交按钮、重置按钮
|
request_llm/README.md
CHANGED
@@ -1,35 +1,53 @@
|
|
1 |
# 如何使用其他大语言模型(v3.0分支测试中)
|
2 |
|
3 |
-
##
|
|
|
|
|
|
|
|
|
4 |
``` sh
|
5 |
-
|
6 |
-
|
|
|
|
|
|
|
|
|
7 |
|
8 |
-
# 安装text-generation的额外依赖
|
9 |
-
pip install accelerate bitsandbytes flexgen gradio llamacpp markdown numpy peft requests rwkv safetensors sentencepiece tqdm datasets git+https://github.com/huggingface/transformers
|
10 |
|
11 |
-
|
12 |
-
|
13 |
|
14 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
15 |
python download-model.py facebook/galactica-1.3b
|
16 |
# 其他可选如 facebook/opt-1.3b
|
|
|
17 |
# facebook/galactica-6.7b
|
18 |
# facebook/galactica-120b
|
19 |
# facebook/pygmalion-1.3b 等
|
20 |
# 详情见 https://github.com/oobabooga/text-generation-webui
|
21 |
|
22 |
-
# 启动text-generation
|
23 |
-
python server.py --cpu --listen --listen-port
|
24 |
```
|
25 |
|
26 |
-
|
|
|
27 |
``` sh
|
28 |
-
# LLM_MODEL
|
29 |
-
LLM_MODEL = "
|
30 |
```
|
31 |
|
32 |
-
|
33 |
``` sh
|
34 |
cd chatgpt-academic
|
35 |
python main.py
|
|
|
1 |
# 如何使用其他大语言模型(v3.0分支测试中)
|
2 |
|
3 |
+
## ChatGLM
|
4 |
+
|
5 |
+
- 安装依赖 `pip install -r request_llm/requirements_chatglm.txt`
|
6 |
+
- 修改配置,在config.py中将LLM_MODEL的值改为"chatglm"
|
7 |
+
|
8 |
``` sh
|
9 |
+
LLM_MODEL = "chatglm"
|
10 |
+
```
|
11 |
+
- 运行!
|
12 |
+
``` sh
|
13 |
+
`python main.py`
|
14 |
+
```
|
15 |
|
|
|
|
|
16 |
|
17 |
+
---
|
18 |
+
## Text-Generation-UI (TGUI)
|
19 |
|
20 |
+
### 1. 部署TGUI
|
21 |
+
``` sh
|
22 |
+
# 1 下载模型
|
23 |
+
git clone https://github.com/oobabooga/text-generation-webui.git
|
24 |
+
# 2 这个仓库的最新代码有问题,回滚到几周之前
|
25 |
+
git reset --hard fcda3f87767e642d1c0411776e549e1d3894843d
|
26 |
+
# 3 切换路径
|
27 |
+
cd text-generation-webui
|
28 |
+
# 4 安装text-generation的额外依赖
|
29 |
+
pip install accelerate bitsandbytes flexgen gradio llamacpp markdown numpy peft requests rwkv safetensors sentencepiece tqdm datasets git+https://github.com/huggingface/transformers
|
30 |
+
# 5 下载模型
|
31 |
python download-model.py facebook/galactica-1.3b
|
32 |
# 其他可选如 facebook/opt-1.3b
|
33 |
+
# facebook/galactica-1.3b
|
34 |
# facebook/galactica-6.7b
|
35 |
# facebook/galactica-120b
|
36 |
# facebook/pygmalion-1.3b 等
|
37 |
# 详情见 https://github.com/oobabooga/text-generation-webui
|
38 |
|
39 |
+
# 6 启动text-generation
|
40 |
+
python server.py --cpu --listen --listen-port 7865 --model facebook_galactica-1.3b
|
41 |
```
|
42 |
|
43 |
+
### 2. 修改config.py
|
44 |
+
|
45 |
``` sh
|
46 |
+
# LLM_MODEL格式: tgui:[模型]@[ws地址]:[ws端口] , 端口要和上面给定的端口一致
|
47 |
+
LLM_MODEL = "tgui:galactica-1.3b@localhost:7860"
|
48 |
```
|
49 |
|
50 |
+
### 3. 运行!
|
51 |
``` sh
|
52 |
cd chatgpt-academic
|
53 |
python main.py
|
request_llm/bridge_all.py
ADDED
@@ -0,0 +1,135 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
"""
|
3 |
+
该文件中主要包含2个函数
|
4 |
+
|
5 |
+
不具备多线程能力的函数:
|
6 |
+
1. predict: 正常对话时使用,具备完备的交互功能,不可多线程
|
7 |
+
|
8 |
+
具备多线程调用能力的函数
|
9 |
+
2. predict_no_ui_long_connection:在实验过程中发现调用predict_no_ui处理长文档时,和openai的连接容易断掉,这个函数用stream的方式解决这个问题,同样支持多线程
|
10 |
+
"""
|
11 |
+
|
12 |
+
from concurrent.futures import ThreadPoolExecutor
|
13 |
+
|
14 |
+
from .bridge_chatgpt import predict_no_ui_long_connection as chatgpt_noui
|
15 |
+
from .bridge_chatgpt import predict as chatgpt_ui
|
16 |
+
|
17 |
+
from .bridge_chatglm import predict_no_ui_long_connection as chatglm_noui
|
18 |
+
from .bridge_chatglm import predict as chatglm_ui
|
19 |
+
|
20 |
+
from .bridge_tgui import predict_no_ui_long_connection as tgui_noui
|
21 |
+
from .bridge_tgui import predict as tgui_ui
|
22 |
+
|
23 |
+
methods = {
|
24 |
+
"openai-no-ui": chatgpt_noui,
|
25 |
+
"openai-ui": chatgpt_ui,
|
26 |
+
|
27 |
+
"chatglm-no-ui": chatglm_noui,
|
28 |
+
"chatglm-ui": chatglm_ui,
|
29 |
+
|
30 |
+
"tgui-no-ui": tgui_noui,
|
31 |
+
"tgui-ui": tgui_ui,
|
32 |
+
}
|
33 |
+
|
34 |
+
def predict_no_ui_long_connection(inputs, llm_kwargs, history, sys_prompt, observe_window, console_slience=False):
|
35 |
+
"""
|
36 |
+
发送至LLM,等待回复,一次性完成,不显示中间过程。但内部用stream的方法避免中途网线被掐。
|
37 |
+
inputs:
|
38 |
+
是本次问询的输入
|
39 |
+
sys_prompt:
|
40 |
+
系统静默prompt
|
41 |
+
llm_kwargs:
|
42 |
+
LLM的内部调优参数
|
43 |
+
history:
|
44 |
+
是之前的对话列表
|
45 |
+
observe_window = None:
|
46 |
+
用于负责跨越线程传递已经输出的部分,大部分时候仅仅为了fancy的视觉效果,留空即可。observe_window[0]:观测窗。observe_window[1]:看门狗
|
47 |
+
"""
|
48 |
+
import threading, time, copy
|
49 |
+
|
50 |
+
model = llm_kwargs['llm_model']
|
51 |
+
n_model = 1
|
52 |
+
if '&' not in model:
|
53 |
+
assert not model.startswith("tgui"), "TGUI不支持函数插件的实现"
|
54 |
+
|
55 |
+
# 如果只询问1个大语言模型:
|
56 |
+
if model.startswith('gpt'):
|
57 |
+
method = methods['openai-no-ui']
|
58 |
+
elif model == 'chatglm':
|
59 |
+
method = methods['chatglm-no-ui']
|
60 |
+
elif model.startswith('tgui'):
|
61 |
+
method = methods['tgui-no-ui']
|
62 |
+
return method(inputs, llm_kwargs, history, sys_prompt, observe_window, console_slience)
|
63 |
+
else:
|
64 |
+
# 如果同时询问多个大语言模型:
|
65 |
+
executor = ThreadPoolExecutor(max_workers=16)
|
66 |
+
models = model.split('&')
|
67 |
+
n_model = len(models)
|
68 |
+
|
69 |
+
window_len = len(observe_window)
|
70 |
+
if window_len==0:
|
71 |
+
window_mutex = [[] for _ in range(n_model)] + [True]
|
72 |
+
elif window_len==1:
|
73 |
+
window_mutex = [[""] for _ in range(n_model)] + [True]
|
74 |
+
elif window_len==2:
|
75 |
+
window_mutex = [["", time.time()] for _ in range(n_model)] + [True]
|
76 |
+
|
77 |
+
futures = []
|
78 |
+
for i in range(n_model):
|
79 |
+
model = models[i]
|
80 |
+
if model.startswith('gpt'):
|
81 |
+
method = methods['openai-no-ui']
|
82 |
+
elif model == 'chatglm':
|
83 |
+
method = methods['chatglm-no-ui']
|
84 |
+
elif model.startswith('tgui'):
|
85 |
+
method = methods['tgui-no-ui']
|
86 |
+
llm_kwargs_feedin = copy.deepcopy(llm_kwargs)
|
87 |
+
llm_kwargs_feedin['llm_model'] = model
|
88 |
+
future = executor.submit(method, inputs, llm_kwargs_feedin, history, sys_prompt, window_mutex[i], console_slience)
|
89 |
+
futures.append(future)
|
90 |
+
|
91 |
+
def mutex_manager(window_mutex, observe_window):
|
92 |
+
while True:
|
93 |
+
time.sleep(0.2)
|
94 |
+
if not window_mutex[-1]: break
|
95 |
+
# 看门狗(watchdog)
|
96 |
+
for i in range(n_model):
|
97 |
+
window_mutex[i][1] = observe_window[1]
|
98 |
+
# 观察窗(window)
|
99 |
+
chat_string = []
|
100 |
+
for i in range(n_model):
|
101 |
+
chat_string.append( f"[{str(models[i])} 说]: {window_mutex[i][0]}" )
|
102 |
+
res = '\n\n---\n\n'.join(chat_string)
|
103 |
+
# # # # # # # # # # #
|
104 |
+
observe_window[0] = res
|
105 |
+
|
106 |
+
t_model = threading.Thread(target=mutex_manager, args=(window_mutex, observe_window), daemon=True)
|
107 |
+
t_model.start()
|
108 |
+
|
109 |
+
return_string_collect = []
|
110 |
+
for i, future in enumerate(futures): # wait and get
|
111 |
+
return_string_collect.append( f"[{str(models[i])} 说]: {future.result()}" )
|
112 |
+
window_mutex[-1] = False # stop mutex thread
|
113 |
+
res = '\n\n---\n\n'.join(return_string_collect)
|
114 |
+
return res
|
115 |
+
|
116 |
+
|
117 |
+
def predict(inputs, llm_kwargs, *args, **kwargs):
|
118 |
+
"""
|
119 |
+
发送至LLM,流式获取输出。
|
120 |
+
用于基础的对话功能。
|
121 |
+
inputs 是本次问询的输入
|
122 |
+
top_p, temperature是LLM的内部调优参数
|
123 |
+
history 是之前的对话列表(注意无论是inputs还是history,内容太长了都会触发token数量溢出的错误)
|
124 |
+
chatbot 为WebUI中显示的���话列表,修改它,然后yeild出去,可以直接修改对话界面内容
|
125 |
+
additional_fn代表点击的哪个按钮,按钮见functional.py
|
126 |
+
"""
|
127 |
+
if llm_kwargs['llm_model'].startswith('gpt'):
|
128 |
+
method = methods['openai-ui']
|
129 |
+
elif llm_kwargs['llm_model'] == 'chatglm':
|
130 |
+
method = methods['chatglm-ui']
|
131 |
+
elif llm_kwargs['llm_model'].startswith('tgui'):
|
132 |
+
method = methods['tgui-ui']
|
133 |
+
|
134 |
+
yield from method(inputs, llm_kwargs, *args, **kwargs)
|
135 |
+
|
request_llm/bridge_chatglm.py
ADDED
@@ -0,0 +1,83 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
from transformers import AutoModel, AutoTokenizer
|
3 |
+
import time
|
4 |
+
import importlib
|
5 |
+
from toolbox import update_ui, get_conf
|
6 |
+
|
7 |
+
|
8 |
+
global chatglm_model, chatglm_tokenizer
|
9 |
+
|
10 |
+
chatglm_model = None
|
11 |
+
chatglm_tokenizer = None
|
12 |
+
|
13 |
+
def model_loader():
|
14 |
+
global chatglm_model, chatglm_tokenizer
|
15 |
+
if chatglm_tokenizer is None:
|
16 |
+
chatglm_tokenizer = AutoTokenizer.from_pretrained("THUDM/chatglm-6b", trust_remote_code=True)
|
17 |
+
if chatglm_model is None: # 尚未加载
|
18 |
+
device, = get_conf('LOCAL_MODEL_DEVICE')
|
19 |
+
if device=='cpu':
|
20 |
+
chatglm_model = AutoModel.from_pretrained("THUDM/chatglm-6b", trust_remote_code=True).float()
|
21 |
+
else:
|
22 |
+
chatglm_model = AutoModel.from_pretrained("THUDM/chatglm-6b", trust_remote_code=True).half().cuda()
|
23 |
+
chatglm_model = chatglm_model.eval()
|
24 |
+
chatglm_model = chatglm_model.eval()
|
25 |
+
|
26 |
+
def predict_no_ui_long_connection(inputs, llm_kwargs, history=[], sys_prompt="", observe_window=None, console_slience=False):
|
27 |
+
"""
|
28 |
+
函数的说明请见 request_llm/bridge_all.py
|
29 |
+
"""
|
30 |
+
global chatglm_model, chatglm_tokenizer
|
31 |
+
if chatglm_model is None:
|
32 |
+
observe_window[0] = "ChatGLM尚未加载,加载需要一段时间 ……"
|
33 |
+
|
34 |
+
model_loader()
|
35 |
+
# chatglm 没有 sys_prompt 接口,因此把prompt加入 history
|
36 |
+
history_feedin = []
|
37 |
+
for i in range(len(history)//2):
|
38 |
+
history_feedin.append(["What can I do?", sys_prompt] )
|
39 |
+
history_feedin.append([history[2*i], history[2*i+1]] )
|
40 |
+
|
41 |
+
watch_dog_patience = 5 # 看门狗 (watchdog) 的耐心, 设置5秒即可
|
42 |
+
response = ""
|
43 |
+
for response, history in chatglm_model.stream_chat(chatglm_tokenizer, inputs, history=history_feedin, max_length=llm_kwargs['max_length'],
|
44 |
+
top_p=llm_kwargs['top_p'], temperature=llm_kwargs['temperature']):
|
45 |
+
# 观测窗,把已经获取的数据显示出去
|
46 |
+
observe_window[0] = response
|
47 |
+
# 看门狗 (watchdog),如果超过期限没有喂狗,则终止
|
48 |
+
if len(observe_window) >= 2:
|
49 |
+
if (time.time()-observe_window[1]) > watch_dog_patience:
|
50 |
+
raise RuntimeError("程序终止。")
|
51 |
+
# if not console_slience:
|
52 |
+
# print(response)
|
53 |
+
return response
|
54 |
+
|
55 |
+
|
56 |
+
def predict(inputs, llm_kwargs, plugin_kwargs, chatbot, history=[], system_prompt='', stream = True, additional_fn=None):
|
57 |
+
"""
|
58 |
+
函数的说明请见 request_llm/bridge_all.py
|
59 |
+
"""
|
60 |
+
global chatglm_model, chatglm_tokenizer
|
61 |
+
chatbot.append((inputs, ""))
|
62 |
+
if chatglm_model is None:
|
63 |
+
chatbot[-1] = (inputs, "ChatGLM尚未加载,加载需要一段时间 ……")
|
64 |
+
yield from update_ui(chatbot=chatbot, history=[])
|
65 |
+
model_loader()
|
66 |
+
|
67 |
+
if additional_fn is not None:
|
68 |
+
import core_functional
|
69 |
+
importlib.reload(core_functional) # 热更新prompt
|
70 |
+
core_functional = core_functional.get_core_functions()
|
71 |
+
if "PreProcess" in core_functional[additional_fn]: inputs = core_functional[additional_fn]["PreProcess"](inputs) # 获取预处理函数(如果有的话)
|
72 |
+
inputs = core_functional[additional_fn]["Prefix"] + inputs + core_functional[additional_fn]["Suffix"]
|
73 |
+
|
74 |
+
|
75 |
+
history_feedin = []
|
76 |
+
for i in range(len(history)//2):
|
77 |
+
history_feedin.append(["What can I do?", system_prompt] )
|
78 |
+
history_feedin.append([history[2*i], history[2*i+1]] )
|
79 |
+
|
80 |
+
for response, history in chatglm_model.stream_chat(chatglm_tokenizer, inputs, history=history_feedin, max_length=llm_kwargs['max_length'],
|
81 |
+
top_p=llm_kwargs['top_p'], temperature=llm_kwargs['temperature']):
|
82 |
+
chatbot[-1] = (inputs, response)
|
83 |
+
yield from update_ui(chatbot=chatbot, history=history)
|
request_llm/bridge_tgui.py
CHANGED
@@ -13,23 +13,18 @@ import time
|
|
13 |
import threading
|
14 |
import importlib
|
15 |
from toolbox import get_conf, update_ui
|
16 |
-
LLM_MODEL, = get_conf('LLM_MODEL')
|
17 |
|
18 |
-
# "TGUI:galactica-1.3b@localhost:7860"
|
19 |
-
model_name, addr_port = LLM_MODEL.split('@')
|
20 |
-
assert ':' in addr_port, "LLM_MODEL 格式不正确!" + LLM_MODEL
|
21 |
-
addr, port = addr_port.split(':')
|
22 |
|
23 |
def random_hash():
|
24 |
letters = string.ascii_lowercase + string.digits
|
25 |
return ''.join(random.choice(letters) for i in range(9))
|
26 |
|
27 |
-
async def run(context, max_token
|
28 |
params = {
|
29 |
'max_new_tokens': max_token,
|
30 |
'do_sample': True,
|
31 |
-
'temperature':
|
32 |
-
'top_p':
|
33 |
'typical_p': 1,
|
34 |
'repetition_penalty': 1.05,
|
35 |
'encoder_repetition_penalty': 1.0,
|
@@ -90,7 +85,7 @@ async def run(context, max_token=512):
|
|
90 |
|
91 |
|
92 |
|
93 |
-
def
|
94 |
"""
|
95 |
发送至chatGPT,流式获取输出。
|
96 |
用于基础的对话功能。
|
@@ -108,18 +103,26 @@ def predict_tgui(inputs, top_p, temperature, chatbot, history=[], system_prompt=
|
|
108 |
inputs = core_functional[additional_fn]["Prefix"] + inputs + core_functional[additional_fn]["Suffix"]
|
109 |
|
110 |
raw_input = "What I would like to say is the following: " + inputs
|
111 |
-
logging.info(f'[raw_input] {raw_input}')
|
112 |
history.extend([inputs, ""])
|
113 |
chatbot.append([inputs, ""])
|
114 |
yield from update_ui(chatbot=chatbot, history=history, msg="等待响应") # 刷新界面
|
115 |
|
116 |
-
prompt =
|
117 |
tgui_say = ""
|
118 |
|
|
|
|
|
|
|
|
|
|
|
119 |
mutable = ["", time.time()]
|
120 |
def run_coorotine(mutable):
|
121 |
async def get_result(mutable):
|
122 |
-
|
|
|
|
|
|
|
|
|
123 |
print(response[len(mutable[0]):])
|
124 |
mutable[0] = response
|
125 |
if (time.time() - mutable[1]) > 3:
|
@@ -140,28 +143,29 @@ def predict_tgui(inputs, top_p, temperature, chatbot, history=[], system_prompt=
|
|
140 |
chatbot[-1] = (history[-2], history[-1])
|
141 |
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
142 |
|
143 |
-
logging.info(f'[response] {tgui_say}')
|
144 |
|
145 |
|
146 |
|
147 |
-
def
|
148 |
raw_input = "What I would like to say is the following: " + inputs
|
149 |
-
prompt =
|
150 |
tgui_say = ""
|
151 |
-
|
152 |
-
|
153 |
-
|
154 |
-
|
155 |
-
|
156 |
-
|
157 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
158 |
print('exit when no listener')
|
159 |
break
|
160 |
-
asyncio.run(get_result(
|
161 |
-
thread_listen = threading.Thread(target=run_coorotine, args=(
|
162 |
thread_listen.start()
|
163 |
-
|
164 |
-
time.sleep(1)
|
165 |
-
mutable[1] = time.time()
|
166 |
-
tgui_say = mutable[0]
|
167 |
-
return tgui_say
|
|
|
13 |
import threading
|
14 |
import importlib
|
15 |
from toolbox import get_conf, update_ui
|
|
|
16 |
|
|
|
|
|
|
|
|
|
17 |
|
18 |
def random_hash():
|
19 |
letters = string.ascii_lowercase + string.digits
|
20 |
return ''.join(random.choice(letters) for i in range(9))
|
21 |
|
22 |
+
async def run(context, max_token, temperature, top_p, addr, port):
|
23 |
params = {
|
24 |
'max_new_tokens': max_token,
|
25 |
'do_sample': True,
|
26 |
+
'temperature': temperature,
|
27 |
+
'top_p': top_p,
|
28 |
'typical_p': 1,
|
29 |
'repetition_penalty': 1.05,
|
30 |
'encoder_repetition_penalty': 1.0,
|
|
|
85 |
|
86 |
|
87 |
|
88 |
+
def predict(inputs, llm_kwargs, plugin_kwargs, chatbot, history=[], system_prompt='', stream = True, additional_fn=None):
|
89 |
"""
|
90 |
发送至chatGPT,流式获取输出。
|
91 |
用于基础的对话功能。
|
|
|
103 |
inputs = core_functional[additional_fn]["Prefix"] + inputs + core_functional[additional_fn]["Suffix"]
|
104 |
|
105 |
raw_input = "What I would like to say is the following: " + inputs
|
|
|
106 |
history.extend([inputs, ""])
|
107 |
chatbot.append([inputs, ""])
|
108 |
yield from update_ui(chatbot=chatbot, history=history, msg="等待响应") # 刷新界面
|
109 |
|
110 |
+
prompt = raw_input
|
111 |
tgui_say = ""
|
112 |
|
113 |
+
model_name, addr_port = llm_kwargs['llm_model'].split('@')
|
114 |
+
assert ':' in addr_port, "LLM_MODEL 格式不正确!" + llm_kwargs['llm_model']
|
115 |
+
addr, port = addr_port.split(':')
|
116 |
+
|
117 |
+
|
118 |
mutable = ["", time.time()]
|
119 |
def run_coorotine(mutable):
|
120 |
async def get_result(mutable):
|
121 |
+
# "tgui:galactica-1.3b@localhost:7860"
|
122 |
+
|
123 |
+
async for response in run(context=prompt, max_token=llm_kwargs['max_length'],
|
124 |
+
temperature=llm_kwargs['temperature'],
|
125 |
+
top_p=llm_kwargs['top_p'], addr=addr, port=port):
|
126 |
print(response[len(mutable[0]):])
|
127 |
mutable[0] = response
|
128 |
if (time.time() - mutable[1]) > 3:
|
|
|
143 |
chatbot[-1] = (history[-2], history[-1])
|
144 |
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
145 |
|
|
|
146 |
|
147 |
|
148 |
|
149 |
+
def predict_no_ui_long_connection(inputs, llm_kwargs, history, sys_prompt, observe_window, console_slience=False):
|
150 |
raw_input = "What I would like to say is the following: " + inputs
|
151 |
+
prompt = raw_input
|
152 |
tgui_say = ""
|
153 |
+
model_name, addr_port = llm_kwargs['llm_model'].split('@')
|
154 |
+
assert ':' in addr_port, "LLM_MODEL 格式不正确!" + llm_kwargs['llm_model']
|
155 |
+
addr, port = addr_port.split(':')
|
156 |
+
|
157 |
+
|
158 |
+
def run_coorotine(observe_window):
|
159 |
+
async def get_result(observe_window):
|
160 |
+
async for response in run(context=prompt, max_token=llm_kwargs['max_length'],
|
161 |
+
temperature=llm_kwargs['temperature'],
|
162 |
+
top_p=llm_kwargs['top_p'], addr=addr, port=port):
|
163 |
+
print(response[len(observe_window[0]):])
|
164 |
+
observe_window[0] = response
|
165 |
+
if (time.time() - observe_window[1]) > 5:
|
166 |
print('exit when no listener')
|
167 |
break
|
168 |
+
asyncio.run(get_result(observe_window))
|
169 |
+
thread_listen = threading.Thread(target=run_coorotine, args=(observe_window,))
|
170 |
thread_listen.start()
|
171 |
+
return observe_window[0]
|
|
|
|
|
|
|
|
request_llm/requirements_chatglm.txt
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
protobuf
|
2 |
+
transformers==4.27.1
|
3 |
+
cpm_kernels
|
4 |
+
torch>=1.10
|
5 |
+
mdtex2html
|
6 |
+
sentencepiece
|
requirements.txt
CHANGED
@@ -1,4 +1,4 @@
|
|
1 |
-
gradio
|
2 |
tiktoken>=0.3.3
|
3 |
requests[socks]
|
4 |
transformers
|
|
|
1 |
+
gradio>=3.25.0
|
2 |
tiktoken>=0.3.3
|
3 |
requests[socks]
|
4 |
transformers
|
toolbox.py
CHANGED
@@ -27,7 +27,7 @@ def ArgsGeneralWrapper(f):
|
|
27 |
"""
|
28 |
装饰器函数,用于重组输入参数,改变输入参数的顺序与结构。
|
29 |
"""
|
30 |
-
def decorated(cookies, txt, txt2, top_p, temperature, chatbot, history, system_prompt, *args):
|
31 |
txt_passon = txt
|
32 |
if txt == "" and txt2 != "": txt_passon = txt2
|
33 |
# 引入一个有cookie的chatbot
|
@@ -37,8 +37,9 @@ def ArgsGeneralWrapper(f):
|
|
37 |
})
|
38 |
llm_kwargs = {
|
39 |
'api_key': cookies['api_key'],
|
40 |
-
'llm_model':
|
41 |
'top_p':top_p,
|
|
|
42 |
'temperature':temperature,
|
43 |
}
|
44 |
plugin_kwargs = {
|
@@ -75,66 +76,6 @@ def get_reduce_token_percent(text):
|
|
75 |
except:
|
76 |
return 0.5, '不详'
|
77 |
|
78 |
-
def predict_no_ui_but_counting_down(i_say, i_say_show_user, chatbot, llm_kwargs, history=[], sys_prompt='', long_connection=True):
|
79 |
-
"""
|
80 |
-
* 此函数未来将被弃用(替代函数 request_gpt_model_in_new_thread_with_ui_alive 文件 chatgpt_academic/crazy_functions/crazy_utils)
|
81 |
-
|
82 |
-
调用简单的predict_no_ui接口,但是依然保留了些许界面心跳功能,当对话太长时,会自动采用二分法截断
|
83 |
-
i_say: 当前输入
|
84 |
-
i_say_show_user: 显示到对话界面上的当前输入,例如,输入整个文件时,你绝对不想把文件的内容都糊到对话界面上
|
85 |
-
chatbot: 对话界面句柄
|
86 |
-
top_p, temperature: gpt参数
|
87 |
-
history: gpt参数 对话历史
|
88 |
-
sys_prompt: gpt参数 sys_prompt
|
89 |
-
long_connection: 是否采用更稳定的连接方式(推荐)(已弃用)
|
90 |
-
"""
|
91 |
-
import time
|
92 |
-
from request_llm.bridge_chatgpt import predict_no_ui_long_connection
|
93 |
-
from toolbox import get_conf
|
94 |
-
TIMEOUT_SECONDS, MAX_RETRY = get_conf('TIMEOUT_SECONDS', 'MAX_RETRY')
|
95 |
-
# 多线程的时候,需要一个mutable结构在不同线程之间传递信息
|
96 |
-
# list就是最简单的mutable结构,我们第一个位置放gpt输出,第二个位置传递报错信息
|
97 |
-
mutable = [None, '']
|
98 |
-
# multi-threading worker
|
99 |
-
|
100 |
-
def mt(i_say, history):
|
101 |
-
while True:
|
102 |
-
try:
|
103 |
-
mutable[0] = predict_no_ui_long_connection(
|
104 |
-
inputs=i_say, llm_kwargs=llm_kwargs, history=history, sys_prompt=sys_prompt)
|
105 |
-
|
106 |
-
except ConnectionAbortedError as token_exceeded_error:
|
107 |
-
# 尝试计算比例,尽可能多地保留文本
|
108 |
-
p_ratio, n_exceed = get_reduce_token_percent(
|
109 |
-
str(token_exceeded_error))
|
110 |
-
if len(history) > 0:
|
111 |
-
history = [his[int(len(his) * p_ratio):]
|
112 |
-
for his in history if his is not None]
|
113 |
-
else:
|
114 |
-
i_say = i_say[: int(len(i_say) * p_ratio)]
|
115 |
-
mutable[1] = f'警告,文本过长将进行截断,Token溢出数:{n_exceed},截断比例:{(1-p_ratio):.0%}。'
|
116 |
-
except TimeoutError as e:
|
117 |
-
mutable[0] = '[Local Message] 请求超时。'
|
118 |
-
raise TimeoutError
|
119 |
-
except Exception as e:
|
120 |
-
mutable[0] = f'[Local Message] 异常:{str(e)}.'
|
121 |
-
raise RuntimeError(f'[Local Message] 异常:{str(e)}.')
|
122 |
-
# 创建新线程发出http请求
|
123 |
-
thread_name = threading.Thread(target=mt, args=(i_say, history))
|
124 |
-
thread_name.start()
|
125 |
-
# 原来的线程则负责持续更新UI,实现一个超时倒计时,并等待新线程的任务完成
|
126 |
-
cnt = 0
|
127 |
-
while thread_name.is_alive():
|
128 |
-
cnt += 1
|
129 |
-
chatbot[-1] = (i_say_show_user,
|
130 |
-
f"[Local Message] {mutable[1]}waiting gpt response {cnt}/{TIMEOUT_SECONDS*2*(MAX_RETRY+1)}"+''.join(['.']*(cnt % 4)))
|
131 |
-
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
132 |
-
time.sleep(1)
|
133 |
-
# 把gpt的输出从mutable中取出来
|
134 |
-
gpt_say = mutable[0]
|
135 |
-
if gpt_say == '[Local Message] Failed with timeout.':
|
136 |
-
raise TimeoutError
|
137 |
-
return gpt_say
|
138 |
|
139 |
|
140 |
def write_results_to_file(history, file_name=None):
|
|
|
27 |
"""
|
28 |
装饰器函数,用于重组输入参数,改变输入参数的顺序与结构。
|
29 |
"""
|
30 |
+
def decorated(cookies, max_length, llm_model, txt, txt2, top_p, temperature, chatbot, history, system_prompt, *args):
|
31 |
txt_passon = txt
|
32 |
if txt == "" and txt2 != "": txt_passon = txt2
|
33 |
# 引入一个有cookie的chatbot
|
|
|
37 |
})
|
38 |
llm_kwargs = {
|
39 |
'api_key': cookies['api_key'],
|
40 |
+
'llm_model': llm_model,
|
41 |
'top_p':top_p,
|
42 |
+
'max_length': max_length,
|
43 |
'temperature':temperature,
|
44 |
}
|
45 |
plugin_kwargs = {
|
|
|
76 |
except:
|
77 |
return 0.5, '不详'
|
78 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
79 |
|
80 |
|
81 |
def write_results_to_file(history, file_name=None):
|
version
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
{
|
2 |
-
"version":
|
3 |
"show_feature": true,
|
4 |
-
"new_feature": "
|
5 |
}
|
|
|
1 |
{
|
2 |
+
"version": 3.0,
|
3 |
"show_feature": true,
|
4 |
+
"new_feature": "支持ChatGLM <-> 支持多LLM模型同时对话"
|
5 |
}
|