gpt-academic

Build error

App Files Files Community

Jasonai commited on Mar 30, 2023

Commit

44e77dc

1 Parent(s): 80e0c4e

feat(toolbox):调整了空格的问题

Browse files

Files changed (2) hide show

requirements.txt +2 -9
toolbox.py +27 -51

requirements.txt CHANGED Viewed

@@ -1,10 +1,3 @@
 gradio>=3.23
-requests[socks]~=2.28.2
-mdtex2html~=1.2.0
-markdown~=3.4.3
-latex2mathml~=3.75.1
-numpy~=1.21.6
-rarfile~=4.0
-py7zr~=0.20.4

 gradio>=3.23
+requests[socks]
+mdtex2html

toolbox.py CHANGED Viewed

@@ -2,7 +2,6 @@ import markdown, mdtex2html, threading, importlib, traceback
 from show_math import convert as convert_math
 from functools import wraps
 def predict_no_ui_but_counting_down(i_say, i_say_show_user, chatbot, top_p, temperature, history=[], sys_prompt=''):
     """
         调用简单的predict_no_ui接口，但是依然保留了些许界面心跳功能，当对话太长时，会自动采用二分法截断
@@ -14,43 +13,36 @@ def predict_no_ui_but_counting_down(i_say, i_say_show_user, chatbot, top_p, temp
     # 多线程的时候，需要一个mutable结构在不同线程之间传递信息
     # list就是最简单的mutable结构，我们第一个位置放gpt输出，第二个位置传递报错信息
     mutable = [None, '']
     # multi-threading worker
     def mt(i_say, history):
         while True:
             try:
-                mutable[0] = predict_no_ui(inputs=i_say, top_p=top_p, temperature=temperature, history=history,
-                                           sys_prompt=sys_prompt)
                 break
             except ConnectionAbortedError as e:
                 if len(history) > 0:
-                    history = [his[len(his) // 2:] for his in history if his is not None]
                     mutable[1] = 'Warning! History conversation is too long, cut into half. '
                 else:
-                    i_say = i_say[:len(i_say) // 2]
                     mutable[1] = 'Warning! Input file is too long, cut into half. '
             except TimeoutError as e:
                 mutable[0] = '[Local Message] Failed with timeout.'
                 raise TimeoutError
     # 创建新线程发出http请求
-    thread_name = threading.Thread(target=mt, args=(i_say, history));
-    thread_name.start()
     # 原来的线程则负责持续更新UI，实现一个超时倒计时，并等待新线程的任务完成
     cnt = 0
     while thread_name.is_alive():
         cnt += 1
-        chatbot[-1] = (i_say_show_user,
-                       f"[Local Message] {mutable[1]}waiting gpt response {cnt}/{TIMEOUT_SECONDS * 2 * (MAX_RETRY + 1)}" + ''.join(
-                           ['.'] * (cnt % 4)))
         yield chatbot, history, '正常'
         time.sleep(1)
     # 把gpt的输出从mutable中取出来
     gpt_say = mutable[0]
-    if gpt_say == '[Local Message] Failed with timeout.': raise TimeoutError
     return gpt_say
 def write_results_to_file(history, file_name=None):
     """
         将对话记录history以Markdown格式写入文件中。如果没有指定文件名，则使用当前时间生成文件名。
@@ -60,17 +52,16 @@ def write_results_to_file(history, file_name=None):
         # file_name = time.strftime("chatGPT分析报告%Y-%m-%d-%H-%M-%S", time.localtime()) + '.md'
         file_name = 'chatGPT分析报告' + time.strftime("%Y-%m-%d-%H-%M-%S", time.localtime()) + '.md'
     os.makedirs('./gpt_log/', exist_ok=True)
-    with open(f'./gpt_log/{file_name}', 'w', encoding='utf8') as f:
         f.write('# chatGPT 分析报告\n')
         for i, content in enumerate(history):
-            if i % 2 == 0: f.write('## ')
             f.write(content)
             f.write('\n\n')
     res = '以上材料已经被写入' + os.path.abspath(f'./gpt_log/{file_name}')
     print(res)
     return res
 def regular_txt_to_markdown(text):
     """
         将普通文本转换为Markdown格式的文本。
@@ -80,12 +71,10 @@ def regular_txt_to_markdown(text):
     text = text.replace('\n\n\n', '\n\n')
     return text
 def CatchException(f):
     """
         装饰器函数，捕捉函数f中的异常并封装到一个生成器中返回，并显示到聊天当中。
     """
     @wraps(f)
     def decorated(txt, top_p, temperature, chatbot, history, systemPromptTxt, WEB_PORT):
         try:
@@ -95,21 +84,16 @@ def CatchException(f):
             from toolbox import get_conf
             proxies, = get_conf('proxies')
             tb_str = regular_txt_to_markdown(traceback.format_exc())
-            chatbot[-1] = (
-                chatbot[-1][0], f"[Local Message] 实验性函数调用出错: \n\n {tb_str} \n\n 当前代理可用性: \n\n {check_proxy(proxies)}")
             yield chatbot, history, f'异常 {e}'
     return decorated
 def report_execption(chatbot, history, a, b):
     """
         向chatbot中添加错误信息
     """
     chatbot.append((a, b))
-    history.append(a);
-    history.append(b)
 def text_divide_paragraph(text):
     """
@@ -126,16 +110,15 @@ def text_divide_paragraph(text):
         text = "</br>".join(lines)
         return text
 def markdown_convertion(txt):
     """
         将Markdown格式的文本转换为HTML格式。如果包含数学公式，则先将公式转换为HTML格式。
     """
     if ('$' in txt) and ('```' not in txt):
-        return markdown.markdown(txt, extensions=['fenced_code', 'tables']) + '<br><br>' + \
-               markdown.markdown(convert_math(txt, splitParagraphs=False), extensions=['fenced_code', 'tables'])
     else:
-        return markdown.markdown(txt, extensions=['fenced_code', 'tables'])
 def format_io(self, y):
@@ -144,9 +127,9 @@ def format_io(self, y):
     """
     if y is None or y == []: return []
     i_ask, gpt_reply = y[-1]
-    i_ask = text_divide_paragraph(i_ask)  # 输入部分太自由，预处理一波
     y[-1] = (
-        None if i_ask is None else markdown.markdown(i_ask, extensions=['fenced_code', 'tables']),
         None if gpt_reply is None else markdown_convertion(gpt_reply)
     )
     return y
@@ -168,7 +151,6 @@ def extract_archive(file_path, dest_dir):
     import zipfile
     import tarfile
     import os
     # Get the file extension of the input file
     file_extension = os.path.splitext(file_path)[1]
@@ -183,17 +165,18 @@ def extract_archive(file_path, dest_dir):
             tarobj.extractall(path=dest_dir)
             print("Successfully extracted tar archive to {}".format(dest_dir))
     elif file_extension == '.rar':
-        # 这是个第三方库，需要预先pip install rarfile
-        # 此外，Windows上还需要安装winrar软件，配置其Path环境变量，如"C:\Program Files\WinRAR"才可以
         try:
             import rarfile
             with rarfile.RarFile(file_path) as rf:
                 rf.extractall(path=dest_dir)
                 print("Successfully extracted rar archive to {}".format(dest_dir))
         except:
-            print("rar格式需要安装额外依赖")
     elif file_extension == '.7z':
         try:
             import py7zr
@@ -201,12 +184,11 @@ def extract_archive(file_path, dest_dir):
                 f.extractall(path=dest_dir)
                 print("Successfully extracted 7z archive to {}".format(dest_dir))
         except:
-            print("7z格式需要安装额外依赖")
     else:
         return
 def find_recent_files(directory):
     """
         me: find files that is created with in one minutes under a directory with python, write a function
@@ -233,10 +215,8 @@ def on_file_uploaded(files, chatbot, txt):
     if len(files) == 0: return chatbot, txt
     import shutil, os, time, glob
     from toolbox import extract_archive
-    try:
-        shutil.rmtree('./private_upload/')
-    except:
-        pass
     time_tag = time.strftime("%Y-%m-%d-%H-%M-%S", time.localtime())
     os.makedirs(f'private_upload/{time_tag}', exist_ok=True)
     for file in files:
@@ -260,25 +240,21 @@ def on_report_generated(files, chatbot):
     chatbot.append(['汇总报告如何远程获取？', '汇总报告已经添加到右侧文件上传区，请查收。'])
     return report_files, chatbot
 def get_conf(*args):
     # 建议您复制一个config_private.py放自己的秘密, 如API和代理网址, 避免不小心传github被别人看到
     res = []
     for arg in args:
-        try:
-            r = getattr(importlib.import_module('config_private'), arg)
-        except:
-            r = getattr(importlib.import_module('config'), arg)
         res.append(r)
         # 在读取API_KEY时，检查一下是不是忘了改config
-        if arg == 'API_KEY' and len(r) != 51:
             assert False, "正确��API_KEY密钥是51位，请在config文件中修改API密钥, 添加海外代理之后再运行。" + \
-                          "（如果您刚更新过代码，请确保旧版config_private文件中没有遗留任何新增键值）"
     return res
 def clear_line_break(txt):
     txt = txt.replace('\n', ' ')
     txt = txt.replace('  ', ' ')
     txt = txt.replace('  ', ' ')
-    return txt

 from show_math import convert as convert_math
 from functools import wraps
 def predict_no_ui_but_counting_down(i_say, i_say_show_user, chatbot, top_p, temperature, history=[], sys_prompt=''):
     """
         调用简单的predict_no_ui接口，但是依然保留了些许界面心跳功能，当对话太长时，会自动采用二分法截断
     # 多线程的时候，需要一个mutable结构在不同线程之间传递信息
     # list就是最简单的mutable结构，我们第一个位置放gpt输出，第二个位置传递报错信息
     mutable = [None, '']
     # multi-threading worker
     def mt(i_say, history):
         while True:
             try:
+                mutable[0] = predict_no_ui(inputs=i_say, top_p=top_p, temperature=temperature, history=history, sys_prompt=sys_prompt)
                 break
             except ConnectionAbortedError as e:
                 if len(history) > 0:
+                    history = [his[len(his)//2:] for his in history if his is not None]
                     mutable[1] = 'Warning! History conversation is too long, cut into half. '
                 else:
+                    i_say = i_say[:len(i_say)//2]
                     mutable[1] = 'Warning! Input file is too long, cut into half. '
             except TimeoutError as e:
                 mutable[0] = '[Local Message] Failed with timeout.'
                 raise TimeoutError
     # 创建新线程发出http请求
+    thread_name = threading.Thread(target=mt, args=(i_say, history)); thread_name.start()
     # 原来的线程则负责持续更新UI，实现一个超时倒计时，并等待新线程的任务完成
     cnt = 0
     while thread_name.is_alive():
         cnt += 1
+        chatbot[-1] = (i_say_show_user, f"[Local Message] {mutable[1]}waiting gpt response {cnt}/{TIMEOUT_SECONDS*2*(MAX_RETRY+1)}"+''.join(['.']*(cnt%4)))
         yield chatbot, history, '正常'
         time.sleep(1)
     # 把gpt的输出从mutable中取出来
     gpt_say = mutable[0]
+    if gpt_say=='[Local Message] Failed with timeout.': raise TimeoutError
     return gpt_say
 def write_results_to_file(history, file_name=None):
     """
         将对话记录history以Markdown格式写入文件中。如果没有指定文件名，则使用当前时间生成文件名。
         # file_name = time.strftime("chatGPT分析报告%Y-%m-%d-%H-%M-%S", time.localtime()) + '.md'
         file_name = 'chatGPT分析报告' + time.strftime("%Y-%m-%d-%H-%M-%S", time.localtime()) + '.md'
     os.makedirs('./gpt_log/', exist_ok=True)
+    with open(f'./gpt_log/{file_name}', 'w', encoding = 'utf8') as f:
         f.write('# chatGPT 分析报告\n')
         for i, content in enumerate(history):
+            if i%2==0: f.write('## ')
             f.write(content)
             f.write('\n\n')
     res = '以上材料已经被写入' + os.path.abspath(f'./gpt_log/{file_name}')
     print(res)
     return res
 def regular_txt_to_markdown(text):
     """
         将普通文本转换为Markdown格式的文本。
     text = text.replace('\n\n\n', '\n\n')
     return text
 def CatchException(f):
     """
         装饰器函数，捕捉函数f中的异常并封装到一个生成器中返回，并显示到聊天当中。
     """
     @wraps(f)
     def decorated(txt, top_p, temperature, chatbot, history, systemPromptTxt, WEB_PORT):
         try:
             from toolbox import get_conf
             proxies, = get_conf('proxies')
             tb_str = regular_txt_to_markdown(traceback.format_exc())
+            chatbot[-1] = (chatbot[-1][0], f"[Local Message] 实验性函数调用出错: \n\n {tb_str} \n\n 当前代理可用性: \n\n {check_proxy(proxies)}")
             yield chatbot, history, f'异常 {e}'
     return decorated
 def report_execption(chatbot, history, a, b):
     """
         向chatbot中添加错误信息
     """
     chatbot.append((a, b))
+    history.append(a); history.append(b)
 def text_divide_paragraph(text):
     """
         text = "</br>".join(lines)
         return text
 def markdown_convertion(txt):
     """
         将Markdown格式的文本转换为HTML格式。如果包含数学公式，则先将公式转换为HTML格式。
     """
     if ('$' in txt) and ('```' not in txt):
+        return markdown.markdown(txt,extensions=['fenced_code','tables']) + '<br><br>' + \
+            markdown.markdown(convert_math(txt, splitParagraphs=False),extensions=['fenced_code','tables'])
     else:
+        return markdown.markdown(txt,extensions=['fenced_code','tables'])
 def format_io(self, y):
     """
     if y is None or y == []: return []
     i_ask, gpt_reply = y[-1]
+    i_ask = text_divide_paragraph(i_ask) # 输入部分太自由，预处理一波
     y[-1] = (
+        None if i_ask is None else markdown.markdown(i_ask, extensions=['fenced_code','tables']),
         None if gpt_reply is None else markdown_convertion(gpt_reply)
     )
     return y
     import zipfile
     import tarfile
     import os
     # Get the file extension of the input file
     file_extension = os.path.splitext(file_path)[1]
             tarobj.extractall(path=dest_dir)
             print("Successfully extracted tar archive to {}".format(dest_dir))
+    # 第三方库，需要预先pip install rarfile
+    # 此外，Windows上还需要安装winrar软件，配置其Path环境变量，如"C:\Program Files\WinRAR"才可以
     elif file_extension == '.rar':
         try:
             import rarfile
             with rarfile.RarFile(file_path) as rf:
                 rf.extractall(path=dest_dir)
                 print("Successfully extracted rar archive to {}".format(dest_dir))
         except:
+            print("Rar format requires additional dependencies to install")
+    # 第三方库，需要预先pip install py7zr
     elif file_extension == '.7z':
         try:
             import py7zr
                 f.extractall(path=dest_dir)
                 print("Successfully extracted 7z archive to {}".format(dest_dir))
         except:
+            print("7z format requires additional dependencies to install")
     else:
         return
 def find_recent_files(directory):
     """
         me: find files that is created with in one minutes under a directory with python, write a function
     if len(files) == 0: return chatbot, txt
     import shutil, os, time, glob
     from toolbox import extract_archive
+    try: shutil.rmtree('./private_upload/')
+    except: pass
     time_tag = time.strftime("%Y-%m-%d-%H-%M-%S", time.localtime())
     os.makedirs(f'private_upload/{time_tag}', exist_ok=True)
     for file in files:
     chatbot.append(['汇总报告如何远程获取？', '汇总报告已经添加到右侧文件上传区，请查收。'])
     return report_files, chatbot
 def get_conf(*args):
     # 建议您复制一个config_private.py放自己的秘密, 如API和代理网址, 避免不小心传github被别人看到
     res = []
     for arg in args:
+        try: r = getattr(importlib.import_module('config_private'), arg)
+        except: r = getattr(importlib.import_module('config'), arg)
         res.append(r)
         # 在读取API_KEY时，检查一下是不是忘了改config
+        if arg=='API_KEY' and len(r) != 51:
             assert False, "正确��API_KEY密钥是51位，请在config文件中修改API密钥, 添加海外代理之后再运行。" + \
+                        "（如果您刚更新过代码，请确保旧版config_private文件中没有遗留任何新增键值）"
     return res
 def clear_line_break(txt):
     txt = txt.replace('\n', ' ')
     txt = txt.replace('  ', ' ')
     txt = txt.replace('  ', ' ')
+    return txt