Spaces:
Running
Running
提升稳定性
Browse files- crazy_functional.py +1 -1
- crazy_functions/Latex全文润色.py +70 -0
- crazy_functions/crazy_utils.py +225 -24
- crazy_functions/代码重写为全英文_多线程.py +4 -5
- crazy_functions/批量翻译PDF文档_多线程.py +2 -1
- crazy_functions/解析项目源代码.py +83 -79
- objdump.tmp +0 -0
- request_llm/bridge_chatgpt.py +3 -4
- toolbox.py +2 -0
- version +2 -2
crazy_functional.py
CHANGED
|
@@ -29,7 +29,7 @@ def get_crazy_functions():
|
|
| 29 |
"Color": "stop", # 按钮颜色
|
| 30 |
"Function": HotReload(解析一个C项目的头文件)
|
| 31 |
},
|
| 32 |
-
"解析整个C++项目(.cpp/.h)": {
|
| 33 |
"Color": "stop", # 按钮颜色
|
| 34 |
"AsButton": False, # 加入下拉菜单中
|
| 35 |
"Function": HotReload(解析一个C项目)
|
|
|
|
| 29 |
"Color": "stop", # 按钮颜色
|
| 30 |
"Function": HotReload(解析一个C项目的头文件)
|
| 31 |
},
|
| 32 |
+
"解析整个C++项目(.cpp/.hpp/.c/.h)": {
|
| 33 |
"Color": "stop", # 按钮颜色
|
| 34 |
"AsButton": False, # 加入下拉菜单中
|
| 35 |
"Function": HotReload(解析一个C项目)
|
crazy_functions/Latex全文润色.py
ADDED
|
@@ -0,0 +1,70 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from request_llm.bridge_chatgpt import predict_no_ui
|
| 2 |
+
from toolbox import CatchException, report_execption, write_results_to_file, predict_no_ui_but_counting_down
|
| 3 |
+
fast_debug = False
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
def 解析Paper(file_manifest, project_folder, top_p, temperature, chatbot, history, systemPromptTxt):
|
| 7 |
+
import time, glob, os
|
| 8 |
+
print('begin analysis on:', file_manifest)
|
| 9 |
+
for index, fp in enumerate(file_manifest):
|
| 10 |
+
with open(fp, 'r', encoding='utf-8') as f:
|
| 11 |
+
file_content = f.read()
|
| 12 |
+
|
| 13 |
+
prefix = "接下来请你逐文件分析下面的论文文件,概括其内容" if index==0 else ""
|
| 14 |
+
i_say = prefix + f'请对下面的文章片段用中文做一个概述,文件名是{os.path.relpath(fp, project_folder)},文章内容是 ```{file_content}```'
|
| 15 |
+
i_say_show_user = prefix + f'[{index}/{len(file_manifest)}] 请对下面的文章片段做一个概述: {os.path.abspath(fp)}'
|
| 16 |
+
chatbot.append((i_say_show_user, "[Local Message] waiting gpt response."))
|
| 17 |
+
print('[1] yield chatbot, history')
|
| 18 |
+
yield chatbot, history, '正常'
|
| 19 |
+
|
| 20 |
+
if not fast_debug:
|
| 21 |
+
msg = '正常'
|
| 22 |
+
# ** gpt request **
|
| 23 |
+
gpt_say = yield from predict_no_ui_but_counting_down(i_say, i_say_show_user, chatbot, top_p, temperature, history=[]) # 带超时倒计时
|
| 24 |
+
|
| 25 |
+
print('[2] end gpt req')
|
| 26 |
+
chatbot[-1] = (i_say_show_user, gpt_say)
|
| 27 |
+
history.append(i_say_show_user); history.append(gpt_say)
|
| 28 |
+
print('[3] yield chatbot, history')
|
| 29 |
+
yield chatbot, history, msg
|
| 30 |
+
print('[4] next')
|
| 31 |
+
if not fast_debug: time.sleep(2)
|
| 32 |
+
|
| 33 |
+
all_file = ', '.join([os.path.relpath(fp, project_folder) for index, fp in enumerate(file_manifest)])
|
| 34 |
+
i_say = f'根据以上你自己的分析,对全文进行概括,用学术性语言写一段中文摘要,然后再写一段英文摘要(包括{all_file})。'
|
| 35 |
+
chatbot.append((i_say, "[Local Message] waiting gpt response."))
|
| 36 |
+
yield chatbot, history, '正常'
|
| 37 |
+
|
| 38 |
+
if not fast_debug:
|
| 39 |
+
msg = '正常'
|
| 40 |
+
# ** gpt request **
|
| 41 |
+
gpt_say = yield from predict_no_ui_but_counting_down(i_say, i_say, chatbot, top_p, temperature, history=history) # 带超时倒计时
|
| 42 |
+
|
| 43 |
+
chatbot[-1] = (i_say, gpt_say)
|
| 44 |
+
history.append(i_say); history.append(gpt_say)
|
| 45 |
+
yield chatbot, history, msg
|
| 46 |
+
res = write_results_to_file(history)
|
| 47 |
+
chatbot.append(("完成了吗?", res))
|
| 48 |
+
yield chatbot, history, msg
|
| 49 |
+
|
| 50 |
+
|
| 51 |
+
|
| 52 |
+
@CatchException
|
| 53 |
+
def 读文章写摘要(txt, top_p, temperature, chatbot, history, systemPromptTxt, WEB_PORT):
|
| 54 |
+
history = [] # 清空历史,以免输入溢出
|
| 55 |
+
import glob, os
|
| 56 |
+
if os.path.exists(txt):
|
| 57 |
+
project_folder = txt
|
| 58 |
+
else:
|
| 59 |
+
if txt == "": txt = '空空如也的输入栏'
|
| 60 |
+
report_execption(chatbot, history, a = f"解析项目: {txt}", b = f"找不到本地项目或无权访问: {txt}")
|
| 61 |
+
yield chatbot, history, '正常'
|
| 62 |
+
return
|
| 63 |
+
file_manifest = [f for f in glob.glob(f'{project_folder}/**/*.tex', recursive=True)] # + \
|
| 64 |
+
# [f for f in glob.glob(f'{project_folder}/**/*.cpp', recursive=True)] + \
|
| 65 |
+
# [f for f in glob.glob(f'{project_folder}/**/*.c', recursive=True)]
|
| 66 |
+
if len(file_manifest) == 0:
|
| 67 |
+
report_execption(chatbot, history, a = f"解析项目: {txt}", b = f"找不到任何.tex文件: {txt}")
|
| 68 |
+
yield chatbot, history, '正常'
|
| 69 |
+
return
|
| 70 |
+
yield from 解析Paper(file_manifest, project_folder, top_p, temperature, chatbot, history, systemPromptTxt)
|
crazy_functions/crazy_utils.py
CHANGED
|
@@ -1,19 +1,115 @@
|
|
| 1 |
import traceback
|
|
|
|
| 2 |
|
| 3 |
-
def
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4 |
import time
|
| 5 |
from concurrent.futures import ThreadPoolExecutor
|
| 6 |
from request_llm.bridge_chatgpt import predict_no_ui_long_connection
|
| 7 |
# 用户反馈
|
| 8 |
chatbot.append([inputs_show_user, ""])
|
| 9 |
msg = '正常'
|
| 10 |
-
yield chatbot, []
|
| 11 |
executor = ThreadPoolExecutor(max_workers=16)
|
| 12 |
mutable = ["", time.time()]
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 17 |
while True:
|
| 18 |
# yield一次以刷新前端页面
|
| 19 |
time.sleep(refresh_interval)
|
|
@@ -27,8 +123,42 @@ def request_gpt_model_in_new_thread_with_ui_alive(inputs, inputs_show_user, top_
|
|
| 27 |
return future.result()
|
| 28 |
|
| 29 |
|
| 30 |
-
def request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency(
|
| 31 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 32 |
from concurrent.futures import ThreadPoolExecutor
|
| 33 |
from request_llm.bridge_chatgpt import predict_no_ui_long_connection
|
| 34 |
assert len(inputs_array) == len(history_array)
|
|
@@ -40,20 +170,61 @@ def request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency(inp
|
|
| 40 |
msg = '正常'
|
| 41 |
yield chatbot, [], msg
|
| 42 |
# 异步原子
|
| 43 |
-
mutable = [["", time.time()] for _ in range(n_frag)]
|
| 44 |
|
| 45 |
def _req_gpt(index, inputs, history, sys_prompt):
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 57 |
# 异步任务开始
|
| 58 |
futures = [executor.submit(_req_gpt, index, inputs, history, sys_prompt) for index, inputs, history, sys_prompt in zip(
|
| 59 |
range(len(inputs_array)), inputs_array, history_array, sys_prompt_array)]
|
|
@@ -68,6 +239,7 @@ def request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency(inp
|
|
| 68 |
break
|
| 69 |
# 更好的UI视觉效果
|
| 70 |
observe_win = []
|
|
|
|
| 71 |
# 每个线程都要“喂狗”(看门狗)
|
| 72 |
for thread_index, _ in enumerate(worker_done):
|
| 73 |
mutable[thread_index][1] = time.time()
|
|
@@ -77,10 +249,10 @@ def request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency(inp
|
|
| 77 |
replace('\n', '').replace('```', '...').replace(
|
| 78 |
' ', '.').replace('<br/>', '.....').replace('$', '.')+"`... ]"
|
| 79 |
observe_win.append(print_something_really_funny)
|
| 80 |
-
stat_str = ''.join([f'
|
| 81 |
-
|
| 82 |
-
|
| 83 |
-
|
| 84 |
msg = "正常"
|
| 85 |
yield chatbot, [], msg
|
| 86 |
# 异步任务结束
|
|
@@ -88,9 +260,38 @@ def request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency(inp
|
|
| 88 |
for inputs_show_user, f in zip(inputs_show_user_array, futures):
|
| 89 |
gpt_res = f.result()
|
| 90 |
gpt_response_collection.extend([inputs_show_user, gpt_res])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 91 |
return gpt_response_collection
|
| 92 |
|
| 93 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 94 |
def breakdown_txt_to_satisfy_token_limit(txt, get_token_fn, limit):
|
| 95 |
def cut(txt_tocut, must_break_at_empty_line): # 递归
|
| 96 |
if get_token_fn(txt_tocut) <= limit:
|
|
|
|
| 1 |
import traceback
|
| 2 |
+
from toolbox import update_ui
|
| 3 |
|
| 4 |
+
def input_clipping(inputs, history, max_token_limit):
|
| 5 |
+
import tiktoken
|
| 6 |
+
import numpy as np
|
| 7 |
+
from toolbox import get_conf
|
| 8 |
+
enc = tiktoken.encoding_for_model(*get_conf('LLM_MODEL'))
|
| 9 |
+
def get_token_num(txt): return len(enc.encode(txt))
|
| 10 |
+
|
| 11 |
+
mode = 'input-and-history'
|
| 12 |
+
# 当 输入部分的token占比 小于 全文的一半时,只裁剪历史
|
| 13 |
+
input_token_num = get_token_num(inputs)
|
| 14 |
+
if input_token_num < max_token_limit//2:
|
| 15 |
+
mode = 'only-history'
|
| 16 |
+
max_token_limit = max_token_limit - input_token_num
|
| 17 |
+
|
| 18 |
+
everything = [inputs] if mode == 'input-and-history' else ['']
|
| 19 |
+
everything.extend(history)
|
| 20 |
+
n_token = get_token_num('\n'.join(everything))
|
| 21 |
+
everything_token = [get_token_num(e) for e in everything]
|
| 22 |
+
delta = max(everything_token) // 16 # 截断时的颗粒度
|
| 23 |
+
|
| 24 |
+
while n_token > max_token_limit:
|
| 25 |
+
where = np.argmax(everything_token)
|
| 26 |
+
encoded = enc.encode(everything[where])
|
| 27 |
+
clipped_encoded = encoded[:len(encoded)-delta]
|
| 28 |
+
everything[where] = enc.decode(clipped_encoded)[:-1] # -1 to remove the may-be illegal char
|
| 29 |
+
everything_token[where] = get_token_num(everything[where])
|
| 30 |
+
n_token = get_token_num('\n'.join(everything))
|
| 31 |
+
|
| 32 |
+
if mode == 'input-and-history':
|
| 33 |
+
inputs = everything[0]
|
| 34 |
+
else:
|
| 35 |
+
pass
|
| 36 |
+
history = everything[1:]
|
| 37 |
+
return inputs, history
|
| 38 |
+
|
| 39 |
+
def request_gpt_model_in_new_thread_with_ui_alive(
|
| 40 |
+
inputs, inputs_show_user, top_p, temperature,
|
| 41 |
+
chatbot, history, sys_prompt, refresh_interval=0.2,
|
| 42 |
+
handle_token_exceed=True,
|
| 43 |
+
retry_times_at_unknown_error=2,
|
| 44 |
+
):
|
| 45 |
+
"""
|
| 46 |
+
Request GPT model,请求GPT模型同时维持用户界面活跃。
|
| 47 |
+
|
| 48 |
+
输入参数 Args (以_array结尾的输入变量都是列表,列表长度为子任务的数量,执行时,会把列表拆解,放到每个子线程中分别执行):
|
| 49 |
+
inputs (string): List of inputs (输入)
|
| 50 |
+
inputs_show_user (string): List of inputs to show user(展现在报告中的输入,借助此参数,在汇总报告中隐藏啰嗦的真实输入,增强报告的可读性)
|
| 51 |
+
top_p (float): Top p value for sampling from model distribution (GPT参数,浮点数)
|
| 52 |
+
temperature (float): Temperature value for sampling from model distribution(GPT参数,浮点数)
|
| 53 |
+
chatbot: chatbot inputs and outputs (用户界面对话窗口句柄,用于数据流可视化)
|
| 54 |
+
history (list): List of chat history (历史,对话历史列表)
|
| 55 |
+
sys_prompt (string): List of system prompts (系统输入,列表,用于输入给GPT的前提提示,比如你是翻译官怎样怎样)
|
| 56 |
+
refresh_interval (float, optional): Refresh interval for UI (default: 0.2) (刷新时间间隔频率,建议低于1,不可高于3,仅仅服务于视觉效果)
|
| 57 |
+
handle_token_exceed:是否自动处理token溢出的情况,如果选择自动处理,则会在溢出时暴力截断,默认开启
|
| 58 |
+
retry_times_at_unknown_error:失败时的重试次数
|
| 59 |
+
|
| 60 |
+
输出 Returns:
|
| 61 |
+
future: 输出,GPT返回的结果
|
| 62 |
+
"""
|
| 63 |
import time
|
| 64 |
from concurrent.futures import ThreadPoolExecutor
|
| 65 |
from request_llm.bridge_chatgpt import predict_no_ui_long_connection
|
| 66 |
# 用户反馈
|
| 67 |
chatbot.append([inputs_show_user, ""])
|
| 68 |
msg = '正常'
|
| 69 |
+
yield from update_ui(chatbot=chatbot, history=[])
|
| 70 |
executor = ThreadPoolExecutor(max_workers=16)
|
| 71 |
mutable = ["", time.time()]
|
| 72 |
+
def _req_gpt(inputs, history, sys_prompt):
|
| 73 |
+
retry_op = retry_times_at_unknown_error
|
| 74 |
+
exceeded_cnt = 0
|
| 75 |
+
while True:
|
| 76 |
+
try:
|
| 77 |
+
# 【第一种情况】:顺利完成
|
| 78 |
+
result = predict_no_ui_long_connection(
|
| 79 |
+
inputs=inputs, top_p=top_p, temperature=temperature,
|
| 80 |
+
history=history, sys_prompt=sys_prompt, observe_window=mutable)
|
| 81 |
+
return result
|
| 82 |
+
except ConnectionAbortedError as token_exceeded_error:
|
| 83 |
+
# 【第二种情况】:Token溢出,
|
| 84 |
+
if handle_token_exceed:
|
| 85 |
+
exceeded_cnt += 1
|
| 86 |
+
# 【选择处理】 尝试计算比例,尽可能多地保留文本
|
| 87 |
+
from toolbox import get_reduce_token_percent
|
| 88 |
+
p_ratio, n_exceed = get_reduce_token_percent(str(token_exceeded_error))
|
| 89 |
+
MAX_TOKEN = 4096
|
| 90 |
+
EXCEED_ALLO = 512 + 512 * exceeded_cnt
|
| 91 |
+
inputs, history = input_clipping(inputs, history, max_token_limit=MAX_TOKEN-EXCEED_ALLO)
|
| 92 |
+
mutable[0] += f'[Local Message] 警告,文本过长将进行截断,Token溢出数:{n_exceed}。\n\n'
|
| 93 |
+
continue # 返回重试
|
| 94 |
+
else:
|
| 95 |
+
# 【选择放弃】
|
| 96 |
+
tb_str = '```\n' + traceback.format_exc() + '```'
|
| 97 |
+
mutable[0] += f"[Local Message] 警告,在执行过程中遭遇问题, Traceback:\n\n{tb_str}\n\n"
|
| 98 |
+
return mutable[0] # 放弃
|
| 99 |
+
except:
|
| 100 |
+
# 【第三种情况】:其他错误
|
| 101 |
+
tb_str = '```\n' + traceback.format_exc() + '```'
|
| 102 |
+
mutable[0] += f"[Local Message] 警告,在执行过程中遭遇问题, Traceback:\n\n{tb_str}\n\n"
|
| 103 |
+
if retry_op > 0:
|
| 104 |
+
retry_op -= 1
|
| 105 |
+
mutable[0] += f"[Local Message] 重试中 {retry_times_at_unknown_error-retry_op}/{retry_times_at_unknown_error}:\n\n"
|
| 106 |
+
time.sleep(5)
|
| 107 |
+
continue # 返回重试
|
| 108 |
+
else:
|
| 109 |
+
time.sleep(5)
|
| 110 |
+
return mutable[0] # 放弃
|
| 111 |
+
|
| 112 |
+
future = executor.submit(_req_gpt, inputs, history, sys_prompt)
|
| 113 |
while True:
|
| 114 |
# yield一次以刷新前端页面
|
| 115 |
time.sleep(refresh_interval)
|
|
|
|
| 123 |
return future.result()
|
| 124 |
|
| 125 |
|
| 126 |
+
def request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency(
|
| 127 |
+
inputs_array, inputs_show_user_array, top_p, temperature,
|
| 128 |
+
chatbot, history_array, sys_prompt_array,
|
| 129 |
+
refresh_interval=0.2, max_workers=10, scroller_max_len=30,
|
| 130 |
+
handle_token_exceed=True, show_user_at_complete=False,
|
| 131 |
+
retry_times_at_unknown_error=2,
|
| 132 |
+
):
|
| 133 |
+
"""
|
| 134 |
+
Request GPT model using multiple threads with UI and high efficiency
|
| 135 |
+
请求GPT模型的[多线程]版。
|
| 136 |
+
具备以下功能:
|
| 137 |
+
实时在UI上反馈远程数据流
|
| 138 |
+
使用线程池,可调节线程池的大小避免openai的流量限制错误
|
| 139 |
+
处理中途中止的情况
|
| 140 |
+
网络等出问题时,会把traceback和已经接收的数据转入输出
|
| 141 |
+
|
| 142 |
+
输入参数 Args (以_array结尾的输入变量都是列表,列表长度为子任务的数量,执行时,会把列表拆解,放到每个子线程中分别执行):
|
| 143 |
+
inputs_array (list): List of inputs (每个子任务的输入)
|
| 144 |
+
inputs_show_user_array (list): List of inputs to show user(每个子任务展现在报告中的输入,借助此参数,在汇总报告中隐藏啰嗦的真实输入,增强报告的可读性)
|
| 145 |
+
top_p (float): Top p value for sampling from model distribution (GPT参数,浮点数)
|
| 146 |
+
temperature (float): Temperature value for sampling from model distribution(GPT参数,浮点数)
|
| 147 |
+
chatbot: chatbot (用户界面对话窗口句柄,用于数据流可视化)
|
| 148 |
+
history_array (list): List of chat history (历史对话输入,双层列表,第一层列表是子任务分解,第二层列表是对话历史)
|
| 149 |
+
sys_prompt_array (list): List of system prompts (系统输入,列表,用于输入给GPT的前提提示,比如你是翻译官怎样怎样)
|
| 150 |
+
refresh_interval (float, optional): Refresh interval for UI (default: 0.2) (刷新时间间隔频率,建议低于1,不可高于3,仅仅服务于视觉效果)
|
| 151 |
+
max_workers (int, optional): Maximum number of threads (default: 10) (最大线程数,如果子任务非常多,需要用此选项防止高频地请求openai导致错误)
|
| 152 |
+
scroller_max_len (int, optional): Maximum length for scroller (default: 30)(数据流的显示最后收到的多少个字符,仅仅服务于视觉效果)
|
| 153 |
+
handle_token_exceed (bool, optional): (是否在输入过长时,自动缩减文本)
|
| 154 |
+
handle_token_exceed:是否自动处理token溢出的情况,如果选择自动处理,则会在溢出时暴力截断,默认开启
|
| 155 |
+
show_user_at_complete (bool, optional): (在结束时,把完整输入-输出结果显示在聊天框)
|
| 156 |
+
retry_times_at_unknown_error:子任务失败时的重试次数
|
| 157 |
+
|
| 158 |
+
输出 Returns:
|
| 159 |
+
list: List of GPT model responses (每个子任务的输出汇总,如果某个子任务出错,response中会携带traceback报错信息,方便调试和定位问题。)
|
| 160 |
+
"""
|
| 161 |
+
import time, random
|
| 162 |
from concurrent.futures import ThreadPoolExecutor
|
| 163 |
from request_llm.bridge_chatgpt import predict_no_ui_long_connection
|
| 164 |
assert len(inputs_array) == len(history_array)
|
|
|
|
| 170 |
msg = '正常'
|
| 171 |
yield chatbot, [], msg
|
| 172 |
# 异步原子
|
| 173 |
+
mutable = [["", time.time(), "等待中"] for _ in range(n_frag)]
|
| 174 |
|
| 175 |
def _req_gpt(index, inputs, history, sys_prompt):
|
| 176 |
+
gpt_say = ""
|
| 177 |
+
retry_op = retry_times_at_unknown_error
|
| 178 |
+
exceeded_cnt = 0
|
| 179 |
+
mutable[index][2] = "执行中"
|
| 180 |
+
while True:
|
| 181 |
+
try:
|
| 182 |
+
# 【第一种情况】:顺利完成
|
| 183 |
+
# time.sleep(10); raise RuntimeError("测试")
|
| 184 |
+
gpt_say = predict_no_ui_long_connection(
|
| 185 |
+
inputs=inputs, top_p=top_p, temperature=temperature, history=history,
|
| 186 |
+
sys_prompt=sys_prompt, observe_window=mutable[index], console_slience=True
|
| 187 |
+
)
|
| 188 |
+
mutable[index][2] = "已成功"
|
| 189 |
+
return gpt_say
|
| 190 |
+
except ConnectionAbortedError as token_exceeded_error:
|
| 191 |
+
# 【第二种情况】:Token溢出,
|
| 192 |
+
if handle_token_exceed:
|
| 193 |
+
exceeded_cnt += 1
|
| 194 |
+
# 【选择处理】 尝试计算比例,尽可能多地保留文本
|
| 195 |
+
from toolbox import get_reduce_token_percent
|
| 196 |
+
p_ratio, n_exceed = get_reduce_token_percent(str(token_exceeded_error))
|
| 197 |
+
MAX_TOKEN = 4096
|
| 198 |
+
EXCEED_ALLO = 512 + 512 * exceeded_cnt
|
| 199 |
+
inputs, history = input_clipping(inputs, history, max_token_limit=MAX_TOKEN-EXCEED_ALLO)
|
| 200 |
+
gpt_say += f'[Local Message] 警告,文本过长将进行截断,Token溢出数:{n_exceed}。\n\n'
|
| 201 |
+
mutable[index][2] = f"截断重试"
|
| 202 |
+
continue # 返回重试
|
| 203 |
+
else:
|
| 204 |
+
# 【选择放弃】
|
| 205 |
+
tb_str = '```\n' + traceback.format_exc() + '```'
|
| 206 |
+
gpt_say += f"[Local Message] 警告,线程{index}在执行过程中遭遇问题, Traceback:\n\n{tb_str}\n\n"
|
| 207 |
+
if len(mutable[index][0]) > 0: gpt_say += "此线程失败前收到的回答:\n\n" + mutable[index][0]
|
| 208 |
+
mutable[index][2] = "输入过长已放弃"
|
| 209 |
+
return gpt_say # 放弃
|
| 210 |
+
except:
|
| 211 |
+
# 【第三种情况】:其他错误
|
| 212 |
+
tb_str = '```\n' + traceback.format_exc() + '```'
|
| 213 |
+
gpt_say += f"[Local Message] 警告,线程{index}在执行过程中遭遇问题, Traceback:\n\n{tb_str}\n\n"
|
| 214 |
+
if len(mutable[index][0]) > 0: gpt_say += "此线程失败前收到的回答:\n\n" + mutable[index][0]
|
| 215 |
+
if retry_op > 0:
|
| 216 |
+
retry_op -= 1
|
| 217 |
+
wait = random.randint(5, 20)
|
| 218 |
+
for i in range(wait):# 也许等待十几秒后,情况会好转
|
| 219 |
+
mutable[index][2] = f"等待重试 {wait-i}"; time.sleep(1)
|
| 220 |
+
mutable[index][2] = f"重试中 {retry_times_at_unknown_error-retry_op}/{retry_times_at_unknown_error}"
|
| 221 |
+
continue # 返回重试
|
| 222 |
+
else:
|
| 223 |
+
mutable[index][2] = "已失败"
|
| 224 |
+
wait = 5
|
| 225 |
+
time.sleep(5)
|
| 226 |
+
return gpt_say # 放弃
|
| 227 |
+
|
| 228 |
# 异步任务开始
|
| 229 |
futures = [executor.submit(_req_gpt, index, inputs, history, sys_prompt) for index, inputs, history, sys_prompt in zip(
|
| 230 |
range(len(inputs_array)), inputs_array, history_array, sys_prompt_array)]
|
|
|
|
| 239 |
break
|
| 240 |
# 更好的UI视觉效果
|
| 241 |
observe_win = []
|
| 242 |
+
# print([mutable[thread_index][2] for thread_index, _ in enumerate(worker_done)])
|
| 243 |
# 每个线程都要“喂狗”(看门狗)
|
| 244 |
for thread_index, _ in enumerate(worker_done):
|
| 245 |
mutable[thread_index][1] = time.time()
|
|
|
|
| 249 |
replace('\n', '').replace('```', '...').replace(
|
| 250 |
' ', '.').replace('<br/>', '.....').replace('$', '.')+"`... ]"
|
| 251 |
observe_win.append(print_something_really_funny)
|
| 252 |
+
stat_str = ''.join([f'`{mutable[thread_index][2]}`: {obs}\n\n'
|
| 253 |
+
if not done else f'`{mutable[thread_index][2]}`\n\n'
|
| 254 |
+
for thread_index, done, obs in zip(range(len(worker_done)), worker_done, observe_win)])
|
| 255 |
+
chatbot[-1] = [chatbot[-1][0], f'多线程操作已经开始,完成情况: \n\n{stat_str}' + ''.join(['.']*(cnt % 10+1))]
|
| 256 |
msg = "正常"
|
| 257 |
yield chatbot, [], msg
|
| 258 |
# 异步任务结束
|
|
|
|
| 260 |
for inputs_show_user, f in zip(inputs_show_user_array, futures):
|
| 261 |
gpt_res = f.result()
|
| 262 |
gpt_response_collection.extend([inputs_show_user, gpt_res])
|
| 263 |
+
|
| 264 |
+
if show_user_at_complete:
|
| 265 |
+
for inputs_show_user, f in zip(inputs_show_user_array, futures):
|
| 266 |
+
gpt_res = f.result()
|
| 267 |
+
chatbot.append([inputs_show_user, gpt_res])
|
| 268 |
+
yield chatbot, [], msg
|
| 269 |
+
time.sleep(1)
|
| 270 |
return gpt_response_collection
|
| 271 |
|
| 272 |
|
| 273 |
+
def WithRetry(f):
|
| 274 |
+
"""
|
| 275 |
+
装饰器函数,用于自动重试。
|
| 276 |
+
"""
|
| 277 |
+
def decorated(retry, res_when_fail, *args, **kwargs):
|
| 278 |
+
assert retry >= 0
|
| 279 |
+
while True:
|
| 280 |
+
try:
|
| 281 |
+
res = yield from f(*args, **kwargs)
|
| 282 |
+
return res
|
| 283 |
+
except:
|
| 284 |
+
retry -= 1
|
| 285 |
+
if retry<0:
|
| 286 |
+
print("达到最大重试次数")
|
| 287 |
+
break
|
| 288 |
+
else:
|
| 289 |
+
print("重试中……")
|
| 290 |
+
continue
|
| 291 |
+
return res_when_fail
|
| 292 |
+
return decorated
|
| 293 |
+
|
| 294 |
+
|
| 295 |
def breakdown_txt_to_satisfy_token_limit(txt, get_token_fn, limit):
|
| 296 |
def cut(txt_tocut, must_break_at_empty_line): # 递归
|
| 297 |
if get_token_fn(txt_tocut) <= limit:
|
crazy_functions/代码重写为全英文_多线程.py
CHANGED
|
@@ -58,11 +58,10 @@ def 全项目切换英文(txt, top_p, temperature, chatbot, history, sys_prompt,
|
|
| 58 |
|
| 59 |
# 第5步:Token限制下的截断与处理
|
| 60 |
MAX_TOKEN = 3000
|
| 61 |
-
|
| 62 |
-
|
| 63 |
-
|
| 64 |
-
get_token_fn
|
| 65 |
-
print('加载tokenizer结束')
|
| 66 |
|
| 67 |
|
| 68 |
# 第6步:任务函数
|
|
|
|
| 58 |
|
| 59 |
# 第5步:Token限制下的截断与处理
|
| 60 |
MAX_TOKEN = 3000
|
| 61 |
+
import tiktoken
|
| 62 |
+
from toolbox import get_conf
|
| 63 |
+
enc = tiktoken.encoding_for_model(*get_conf('LLM_MODEL'))
|
| 64 |
+
def get_token_fn(txt): return len(enc.encode(txt))
|
|
|
|
| 65 |
|
| 66 |
|
| 67 |
# 第6步:任务函数
|
crazy_functions/批量翻译PDF文档_多线程.py
CHANGED
|
@@ -148,7 +148,8 @@ def 解析PDF(file_manifest, project_folder, top_p, temperature, chatbot, histor
|
|
| 148 |
file_content, page_one = read_and_clean_pdf_text(fp)
|
| 149 |
# 递归地切割PDF文件
|
| 150 |
from .crazy_utils import breakdown_txt_to_satisfy_token_limit_for_pdf
|
| 151 |
-
|
|
|
|
| 152 |
def get_token_num(txt): return len(enc.encode(txt))
|
| 153 |
# 分解文本
|
| 154 |
paper_fragments = breakdown_txt_to_satisfy_token_limit_for_pdf(
|
|
|
|
| 148 |
file_content, page_one = read_and_clean_pdf_text(fp)
|
| 149 |
# 递归地切割PDF文件
|
| 150 |
from .crazy_utils import breakdown_txt_to_satisfy_token_limit_for_pdf
|
| 151 |
+
from toolbox import get_conf
|
| 152 |
+
enc = tiktoken.encoding_for_model(*get_conf('LLM_MODEL'))
|
| 153 |
def get_token_num(txt): return len(enc.encode(txt))
|
| 154 |
# 分解文本
|
| 155 |
paper_fragments = breakdown_txt_to_satisfy_token_limit_for_pdf(
|
crazy_functions/解析项目源代码.py
CHANGED
|
@@ -2,92 +2,96 @@ from request_llm.bridge_chatgpt import predict_no_ui
|
|
| 2 |
from toolbox import CatchException, report_execption, write_results_to_file, predict_no_ui_but_counting_down
|
| 3 |
fast_debug = False
|
| 4 |
|
| 5 |
-
|
| 6 |
-
|
| 7 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 8 |
for index, fp in enumerate(file_manifest):
|
| 9 |
with open(fp, 'r', encoding='utf-8') as f:
|
| 10 |
file_content = f.read()
|
| 11 |
-
|
| 12 |
prefix = "接下来请你逐文件分析下面的工程" if index==0 else ""
|
| 13 |
i_say = prefix + f'请对下面的程序文件做一个概述文件名是{os.path.relpath(fp, project_folder)},文件代码是 ```{file_content}```'
|
| 14 |
i_say_show_user = prefix + f'[{index}/{len(file_manifest)}] 请对下面的程序文件做一个概述: {os.path.abspath(fp)}'
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 47 |
|
| 48 |
|
| 49 |
@CatchException
|
| 50 |
def 解析项目本身(txt, top_p, temperature, chatbot, history, systemPromptTxt, WEB_PORT):
|
| 51 |
history = [] # 清空历史,以免输入溢出
|
| 52 |
-
import
|
| 53 |
file_manifest = [f for f in glob.glob('./*.py') if ('test_project' not in f) and ('gpt_log' not in f)] + \
|
| 54 |
-
[f for f in glob.glob('./crazy_functions/*.py') if ('test_project' not in f) and ('gpt_log' not in f)]
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
|
| 59 |
-
|
| 60 |
-
prefix = "接下来请你分析自己的程序构成,别紧张," if index==0 else ""
|
| 61 |
-
i_say = prefix + f'请对下面的程序文件做一个概述文件名是{fp},文件代码是 ```{file_content}```'
|
| 62 |
-
i_say_show_user = prefix + f'[{index}/{len(file_manifest)}] 请对下面的程序文件做一个概述: {os.path.abspath(fp)}'
|
| 63 |
-
chatbot.append((i_say_show_user, "[Local Message] waiting gpt response."))
|
| 64 |
-
yield chatbot, history, '正常'
|
| 65 |
-
|
| 66 |
-
if not fast_debug:
|
| 67 |
-
# ** gpt request **
|
| 68 |
-
# gpt_say = predict_no_ui(inputs=i_say, top_p=top_p, temperature=temperature)
|
| 69 |
-
gpt_say = yield from predict_no_ui_but_counting_down(i_say, i_say_show_user, chatbot, top_p, temperature, history=[], long_connection=True) # 带超时倒计时
|
| 70 |
-
|
| 71 |
-
chatbot[-1] = (i_say_show_user, gpt_say)
|
| 72 |
-
history.append(i_say_show_user); history.append(gpt_say)
|
| 73 |
-
yield chatbot, history, '正常'
|
| 74 |
-
time.sleep(2)
|
| 75 |
-
|
| 76 |
-
i_say = f'根据以上你自己的分析,对程序的整体功能和构架做出概括。然后用一张markdown表格整理每个文件的功能(包括{file_manifest})。'
|
| 77 |
-
chatbot.append((i_say, "[Local Message] waiting gpt response."))
|
| 78 |
-
yield chatbot, history, '正常'
|
| 79 |
-
|
| 80 |
-
if not fast_debug:
|
| 81 |
-
# ** gpt request **
|
| 82 |
-
# gpt_say = predict_no_ui(inputs=i_say, top_p=top_p, temperature=temperature, history=history)
|
| 83 |
-
gpt_say = yield from predict_no_ui_but_counting_down(i_say, i_say, chatbot, top_p, temperature, history=history, long_connection=True) # 带超时倒计时
|
| 84 |
-
|
| 85 |
-
chatbot[-1] = (i_say, gpt_say)
|
| 86 |
-
history.append(i_say); history.append(gpt_say)
|
| 87 |
-
yield chatbot, history, '正常'
|
| 88 |
-
res = write_results_to_file(history)
|
| 89 |
-
chatbot.append(("完成了吗?", res))
|
| 90 |
yield chatbot, history, '正常'
|
|
|
|
|
|
|
| 91 |
|
| 92 |
@CatchException
|
| 93 |
def 解析一个Python项目(txt, top_p, temperature, chatbot, history, systemPromptTxt, WEB_PORT):
|
|
@@ -105,7 +109,7 @@ def 解析一个Python项目(txt, top_p, temperature, chatbot, history, systemPr
|
|
| 105 |
report_execption(chatbot, history, a = f"解析项目: {txt}", b = f"找不到任何python文件: {txt}")
|
| 106 |
yield chatbot, history, '正常'
|
| 107 |
return
|
| 108 |
-
yield from
|
| 109 |
|
| 110 |
|
| 111 |
@CatchException
|
|
@@ -126,7 +130,7 @@ def 解析一个C项目的头文件(txt, top_p, temperature, chatbot, history, s
|
|
| 126 |
report_execption(chatbot, history, a = f"解析项目: {txt}", b = f"找不到任何.h头文件: {txt}")
|
| 127 |
yield chatbot, history, '正常'
|
| 128 |
return
|
| 129 |
-
yield from
|
| 130 |
|
| 131 |
@CatchException
|
| 132 |
def 解析一个C项目(txt, top_p, temperature, chatbot, history, systemPromptTxt, WEB_PORT):
|
|
@@ -147,7 +151,7 @@ def 解析一个C项目(txt, top_p, temperature, chatbot, history, systemPromptT
|
|
| 147 |
report_execption(chatbot, history, a = f"解析项目: {txt}", b = f"找不到任何.h头文件: {txt}")
|
| 148 |
yield chatbot, history, '正常'
|
| 149 |
return
|
| 150 |
-
yield from
|
| 151 |
|
| 152 |
|
| 153 |
@CatchException
|
|
@@ -169,7 +173,7 @@ def 解析一个Java项目(txt, top_p, temperature, chatbot, history, systemProm
|
|
| 169 |
report_execption(chatbot, history, a=f"解析项目: {txt}", b=f"找不到任何java文件: {txt}")
|
| 170 |
yield chatbot, history, '正常'
|
| 171 |
return
|
| 172 |
-
yield from
|
| 173 |
|
| 174 |
|
| 175 |
@CatchException
|
|
@@ -192,7 +196,7 @@ def 解析一个Rect项目(txt, top_p, temperature, chatbot, history, systemProm
|
|
| 192 |
report_execption(chatbot, history, a=f"解析项目: {txt}", b=f"找不到任何Rect文件: {txt}")
|
| 193 |
yield chatbot, history, '正常'
|
| 194 |
return
|
| 195 |
-
yield from
|
| 196 |
|
| 197 |
|
| 198 |
@CatchException
|
|
@@ -211,4 +215,4 @@ def 解析一个Golang项目(txt, top_p, temperature, chatbot, history, systemPr
|
|
| 211 |
report_execption(chatbot, history, a=f"解析项目: {txt}", b=f"找不到任何golang文件: {txt}")
|
| 212 |
yield chatbot, history, '正常'
|
| 213 |
return
|
| 214 |
-
yield from
|
|
|
|
| 2 |
from toolbox import CatchException, report_execption, write_results_to_file, predict_no_ui_but_counting_down
|
| 3 |
fast_debug = False
|
| 4 |
|
| 5 |
+
|
| 6 |
+
def 解析源代码新(file_manifest, project_folder, top_p, temperature, chatbot, history, systemPromptTxt):
|
| 7 |
+
import os, copy
|
| 8 |
+
from .crazy_utils import request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency
|
| 9 |
+
from .crazy_utils import request_gpt_model_in_new_thread_with_ui_alive, WithRetry
|
| 10 |
+
msg = '正常'
|
| 11 |
+
inputs_array = []
|
| 12 |
+
inputs_show_user_array = []
|
| 13 |
+
history_array = []
|
| 14 |
+
sys_prompt_array = []
|
| 15 |
+
report_part_1 = []
|
| 16 |
+
|
| 17 |
+
############################## <第一步,逐个文件分析,多线程> ##################################
|
| 18 |
for index, fp in enumerate(file_manifest):
|
| 19 |
with open(fp, 'r', encoding='utf-8') as f:
|
| 20 |
file_content = f.read()
|
|
|
|
| 21 |
prefix = "接下来请你逐文件分析下面的工程" if index==0 else ""
|
| 22 |
i_say = prefix + f'请对下面的程序文件做一个概述文件名是{os.path.relpath(fp, project_folder)},文件代码是 ```{file_content}```'
|
| 23 |
i_say_show_user = prefix + f'[{index}/{len(file_manifest)}] 请对下面的程序文件做一个概述: {os.path.abspath(fp)}'
|
| 24 |
+
# 装载请求内容
|
| 25 |
+
inputs_array.append(i_say)
|
| 26 |
+
inputs_show_user_array.append(i_say_show_user)
|
| 27 |
+
history_array.append([])
|
| 28 |
+
sys_prompt_array.append("你是一个程序架构分析师,正在分析一个源代码项目。你的回答必须简单明了。")
|
| 29 |
+
|
| 30 |
+
gpt_response_collection = yield from request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency(
|
| 31 |
+
inputs_array = inputs_array,
|
| 32 |
+
inputs_show_user_array = inputs_show_user_array,
|
| 33 |
+
history_array = history_array,
|
| 34 |
+
sys_prompt_array = sys_prompt_array,
|
| 35 |
+
top_p = top_p,
|
| 36 |
+
temperature = temperature,
|
| 37 |
+
chatbot = chatbot,
|
| 38 |
+
show_user_at_complete = True
|
| 39 |
+
)
|
| 40 |
+
|
| 41 |
+
report_part_1 = copy.deepcopy(gpt_response_collection)
|
| 42 |
+
history_to_return = report_part_1
|
| 43 |
+
res = write_results_to_file(report_part_1)
|
| 44 |
+
chatbot.append(("完成?", "逐个文件分析已完成。" + res + "\n\n正在开始汇总。"))
|
| 45 |
+
yield chatbot, history_to_return, msg
|
| 46 |
+
|
| 47 |
+
############################## <第二步,综合,单线程,分组+迭代处理> ##################################
|
| 48 |
+
batchsize = 16 # 10个文件为一组
|
| 49 |
+
report_part_2 = []
|
| 50 |
+
previous_iteration_files = []
|
| 51 |
+
while True:
|
| 52 |
+
if len(file_manifest) == 0: break
|
| 53 |
+
this_iteration_file_manifest = file_manifest[:batchsize]
|
| 54 |
+
this_iteration_gpt_response_collection = gpt_response_collection[:batchsize*2]
|
| 55 |
+
file_rel_path = [os.path.relpath(fp, project_folder) for index, fp in enumerate(this_iteration_file_manifest)]
|
| 56 |
+
# 把“请对下面的程序文件做一个概述” 替换成 精简的 "文件名:{all_file[index]}"
|
| 57 |
+
for index, content in enumerate(this_iteration_gpt_response_collection):
|
| 58 |
+
if index%2==0: this_iteration_gpt_response_collection[index] = f"文件名:{file_rel_path[index//2]}"
|
| 59 |
+
previous_iteration_files.extend([os.path.relpath(fp, project_folder) for index, fp in enumerate(this_iteration_file_manifest)])
|
| 60 |
+
previous_iteration_files_string = ', '.join(previous_iteration_files)
|
| 61 |
+
current_iteration_focus = ', '.join([os.path.relpath(fp, project_folder) for index, fp in enumerate(this_iteration_file_manifest)])
|
| 62 |
+
i_say = f'根据以上分析,对程序的整体功能和构架重新做出概括。然后用一张markdown表格整理每个文件的功能(包括{previous_iteration_files_string})。'
|
| 63 |
+
inputs_show_user = f'根据以上分析,对程序的整体功能和构架重新做出概括,由于输入长度限制,可能需要分组处理,本组文件为 {current_iteration_focus} + 已经汇总的文件组。'
|
| 64 |
+
this_iteration_history = copy.deepcopy(this_iteration_gpt_response_collection)
|
| 65 |
+
this_iteration_history.extend(report_part_2)
|
| 66 |
+
result = yield from request_gpt_model_in_new_thread_with_ui_alive(
|
| 67 |
+
inputs=i_say, inputs_show_user=inputs_show_user, top_p=top_p, temperature=temperature, chatbot=chatbot,
|
| 68 |
+
history=this_iteration_history, # 迭代之前的分析
|
| 69 |
+
sys_prompt="你是一个程序架构分析师,正在分析一个源代码项目。")
|
| 70 |
+
report_part_2.extend([i_say, result])
|
| 71 |
+
|
| 72 |
+
file_manifest = file_manifest[batchsize:]
|
| 73 |
+
gpt_response_collection = gpt_response_collection[batchsize*2:]
|
| 74 |
+
|
| 75 |
+
############################## <END> ##################################
|
| 76 |
+
history_to_return.extend(report_part_2)
|
| 77 |
+
res = write_results_to_file(history_to_return)
|
| 78 |
+
chatbot.append(("完成了吗?", res))
|
| 79 |
+
yield chatbot, history_to_return, msg
|
| 80 |
|
| 81 |
|
| 82 |
@CatchException
|
| 83 |
def 解析项目本身(txt, top_p, temperature, chatbot, history, systemPromptTxt, WEB_PORT):
|
| 84 |
history = [] # 清空历史,以免输入溢出
|
| 85 |
+
import glob
|
| 86 |
file_manifest = [f for f in glob.glob('./*.py') if ('test_project' not in f) and ('gpt_log' not in f)] + \
|
| 87 |
+
[f for f in glob.glob('./crazy_functions/*.py') if ('test_project' not in f) and ('gpt_log' not in f)]+ \
|
| 88 |
+
[f for f in glob.glob('./request_llm/*.py') if ('test_project' not in f) and ('gpt_log' not in f)]
|
| 89 |
+
project_folder = './'
|
| 90 |
+
if len(file_manifest) == 0:
|
| 91 |
+
report_execption(chatbot, history, a = f"解析项目: {txt}", b = f"找不到任何python文件: {txt}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 92 |
yield chatbot, history, '正常'
|
| 93 |
+
return
|
| 94 |
+
yield from 解析源代码新(file_manifest, project_folder, top_p, temperature, chatbot, history, systemPromptTxt)
|
| 95 |
|
| 96 |
@CatchException
|
| 97 |
def 解析一个Python项目(txt, top_p, temperature, chatbot, history, systemPromptTxt, WEB_PORT):
|
|
|
|
| 109 |
report_execption(chatbot, history, a = f"解析项目: {txt}", b = f"找不到任何python文件: {txt}")
|
| 110 |
yield chatbot, history, '正常'
|
| 111 |
return
|
| 112 |
+
yield from 解析源代码新(file_manifest, project_folder, top_p, temperature, chatbot, history, systemPromptTxt)
|
| 113 |
|
| 114 |
|
| 115 |
@CatchException
|
|
|
|
| 130 |
report_execption(chatbot, history, a = f"解析项目: {txt}", b = f"找不到任何.h头文件: {txt}")
|
| 131 |
yield chatbot, history, '正常'
|
| 132 |
return
|
| 133 |
+
yield from 解析源代码新(file_manifest, project_folder, top_p, temperature, chatbot, history, systemPromptTxt)
|
| 134 |
|
| 135 |
@CatchException
|
| 136 |
def 解析一个C项目(txt, top_p, temperature, chatbot, history, systemPromptTxt, WEB_PORT):
|
|
|
|
| 151 |
report_execption(chatbot, history, a = f"解析项目: {txt}", b = f"找不到任何.h头文件: {txt}")
|
| 152 |
yield chatbot, history, '正常'
|
| 153 |
return
|
| 154 |
+
yield from 解析源代码新(file_manifest, project_folder, top_p, temperature, chatbot, history, systemPromptTxt)
|
| 155 |
|
| 156 |
|
| 157 |
@CatchException
|
|
|
|
| 173 |
report_execption(chatbot, history, a=f"解析项目: {txt}", b=f"找不到任何java文件: {txt}")
|
| 174 |
yield chatbot, history, '正常'
|
| 175 |
return
|
| 176 |
+
yield from 解析源代码新(file_manifest, project_folder, top_p, temperature, chatbot, history, systemPromptTxt)
|
| 177 |
|
| 178 |
|
| 179 |
@CatchException
|
|
|
|
| 196 |
report_execption(chatbot, history, a=f"解析项目: {txt}", b=f"找不到任何Rect文件: {txt}")
|
| 197 |
yield chatbot, history, '正常'
|
| 198 |
return
|
| 199 |
+
yield from 解析源代码新(file_manifest, project_folder, top_p, temperature, chatbot, history, systemPromptTxt)
|
| 200 |
|
| 201 |
|
| 202 |
@CatchException
|
|
|
|
| 215 |
report_execption(chatbot, history, a=f"解析项目: {txt}", b=f"找不到任何golang文件: {txt}")
|
| 216 |
yield chatbot, history, '正常'
|
| 217 |
return
|
| 218 |
+
yield from 解析源代码新(file_manifest, project_folder, top_p, temperature, chatbot, history, systemPromptTxt)
|
objdump.tmp
ADDED
|
Binary file (26.6 kB). View file
|
|
|
request_llm/bridge_chatgpt.py
CHANGED
|
@@ -72,7 +72,7 @@ def predict_no_ui(inputs, top_p, temperature, history=[], sys_prompt=""):
|
|
| 72 |
raise ConnectionAbortedError("Json解析不合常规,可能是文本过长" + response.text)
|
| 73 |
|
| 74 |
|
| 75 |
-
def predict_no_ui_long_connection(inputs, top_p, temperature, history=[], sys_prompt="", observe_window=None):
|
| 76 |
"""
|
| 77 |
发送至chatGPT,等待回复,一次性完成,不显示中间过程。但内部用stream的方法避免中途网线被掐。
|
| 78 |
inputs:
|
|
@@ -121,7 +121,7 @@ def predict_no_ui_long_connection(inputs, top_p, temperature, history=[], sys_pr
|
|
| 121 |
if "role" in delta: continue
|
| 122 |
if "content" in delta:
|
| 123 |
result += delta["content"]
|
| 124 |
-
print(delta["content"], end='')
|
| 125 |
if observe_window is not None:
|
| 126 |
# 观测窗,把已经获取的数据显示出去
|
| 127 |
if len(observe_window) >= 1: observe_window[0] += delta["content"]
|
|
@@ -264,8 +264,7 @@ def generate_payload(inputs, top_p, temperature, history, system_prompt, stream)
|
|
| 264 |
"presence_penalty": 0,
|
| 265 |
"frequency_penalty": 0,
|
| 266 |
}
|
| 267 |
-
|
| 268 |
-
print(f" {LLM_MODEL} : {conversation_cnt} : {inputs}")
|
| 269 |
return headers,payload
|
| 270 |
|
| 271 |
|
|
|
|
| 72 |
raise ConnectionAbortedError("Json解析不合常规,可能是文本过长" + response.text)
|
| 73 |
|
| 74 |
|
| 75 |
+
def predict_no_ui_long_connection(inputs, top_p, temperature, history=[], sys_prompt="", observe_window=None, console_slience=False):
|
| 76 |
"""
|
| 77 |
发送至chatGPT,等待回复,一次性完成,不显示中间过程。但内部用stream的方法避免中途网线被掐。
|
| 78 |
inputs:
|
|
|
|
| 121 |
if "role" in delta: continue
|
| 122 |
if "content" in delta:
|
| 123 |
result += delta["content"]
|
| 124 |
+
if not console_slience: print(delta["content"], end='')
|
| 125 |
if observe_window is not None:
|
| 126 |
# 观测窗,把已经获取的数据显示出去
|
| 127 |
if len(observe_window) >= 1: observe_window[0] += delta["content"]
|
|
|
|
| 264 |
"presence_penalty": 0,
|
| 265 |
"frequency_penalty": 0,
|
| 266 |
}
|
| 267 |
+
print(f" {LLM_MODEL} : {conversation_cnt} : {inputs[:100]}")
|
|
|
|
| 268 |
return headers,payload
|
| 269 |
|
| 270 |
|
toolbox.py
CHANGED
|
@@ -21,6 +21,8 @@ def ArgsGeneralWrapper(f):
|
|
| 21 |
yield from f(txt_passon, *args, **kwargs)
|
| 22 |
return decorated
|
| 23 |
|
|
|
|
|
|
|
| 24 |
|
| 25 |
def get_reduce_token_percent(text):
|
| 26 |
try:
|
|
|
|
| 21 |
yield from f(txt_passon, *args, **kwargs)
|
| 22 |
return decorated
|
| 23 |
|
| 24 |
+
def update_ui(chatbot, history, msg='正常', *args, **kwargs):
|
| 25 |
+
yield chatbot, history, msg
|
| 26 |
|
| 27 |
def get_reduce_token_percent(text):
|
| 28 |
try:
|
version
CHANGED
|
@@ -1,5 +1,5 @@
|
|
| 1 |
{
|
| 2 |
-
"version": 2.
|
| 3 |
"show_feature": true,
|
| 4 |
-
"new_feature": "
|
| 5 |
}
|
|
|
|
| 1 |
{
|
| 2 |
+
"version": 2.6,
|
| 3 |
"show_feature": true,
|
| 4 |
+
"new_feature": "增强多线程稳定性(涉及代码解析、PDF翻译等)<->修复Token计数错误(解决PDF翻译的分割不合理的问题)"
|
| 5 |
}
|