UTF8 Ignore read file errors
Browse files- Dockerfile+ChatGLM +47 -0
- crazy_functions/Latex全文润色.py +1 -1
- crazy_functions/Latex全文翻译.py +1 -1
- crazy_functions/代码重写为全英文_多线程.py +2 -2
- crazy_functions/批量总结PDF文档pdfminer.py +1 -1
- crazy_functions/生成函数注释.py +1 -1
- crazy_functions/解析项目源代码.py +1 -1
- crazy_functions/读文章写摘要.py +1 -1
Dockerfile+ChatGLM
ADDED
@@ -0,0 +1,47 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# How to build | 如何构建: docker build -t gpt-academic --network=host -f Dockerfile+ChatGLM .
|
2 |
+
# How to run | 如何运行 (1) 直接运行: docker run --rm -it --net=host gpt-academic
|
3 |
+
# How to run | 如何运行 (2) 我想运行之前进容器做一些调整: docker run --rm -it --net=host --gpu=all gpt-academic bash
|
4 |
+
|
5 |
+
# 从NVIDIA源,从而支持显卡运损(检查宿主的nvidia-smi中的cuda版本必须>=11.3)
|
6 |
+
FROM nvidia/cuda:11.3.1-runtime-ubuntu20.04
|
7 |
+
ARG useProxyNetwork=''
|
8 |
+
RUN apt-get update
|
9 |
+
RUN apt-get install -y curl proxychains curl
|
10 |
+
RUN apt-get install -y git python python3 python-dev python3-dev --fix-missing
|
11 |
+
|
12 |
+
# 配置代理网络(构建Docker镜像时使用)
|
13 |
+
# # comment out below if you do not need proxy network | 如果不需要翻墙 - 从此行向下删除
|
14 |
+
RUN $useProxyNetwork curl cip.cc
|
15 |
+
RUN sed -i '$ d' /etc/proxychains.conf
|
16 |
+
RUN sed -i '$ d' /etc/proxychains.conf
|
17 |
+
RUN echo "socks5 127.0.0.1 10880" >> /etc/proxychains.conf
|
18 |
+
ARG useProxyNetwork=proxychains
|
19 |
+
# # comment out above if you do not need proxy network | 如果不需要翻墙 - 从此行向上删除
|
20 |
+
|
21 |
+
|
22 |
+
# use python3 as the system default python
|
23 |
+
RUN curl -sS https://bootstrap.pypa.io/get-pip.py | python3.8
|
24 |
+
|
25 |
+
# 下载分支
|
26 |
+
WORKDIR /gpt
|
27 |
+
RUN $useProxyNetwork git clone https://github.com/binary-husky/chatgpt_academic.git -b v3.0
|
28 |
+
WORKDIR /gpt/chatgpt_academic
|
29 |
+
RUN $useProxyNetwork python3 -m pip install -r requirements.txt
|
30 |
+
RUN $useProxyNetwork python3 -m pip install -r request_llm/requirements_chatglm.txt
|
31 |
+
RUN $useProxyNetwork python3 -m pip install torch --extra-index-url https://download.pytorch.org/whl/cu113
|
32 |
+
|
33 |
+
# 为chatgpt-academic配置代理和API-KEY (非必要 可选步骤)
|
34 |
+
RUN echo ' \n\
|
35 |
+
API_KEY = "sk-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx" \n\
|
36 |
+
USE_PROXY = True \n\
|
37 |
+
proxies = { "http": "socks5h://localhost:10880", "https": "socks5h://localhost:10880", } ' >> config_private.py
|
38 |
+
|
39 |
+
# 预热CHATGLM参数(非必要 可选步骤)
|
40 |
+
RUN echo ' \n\
|
41 |
+
from transformers import AutoModel, AutoTokenizer \n\
|
42 |
+
chatglm_tokenizer = AutoTokenizer.from_pretrained("THUDM/chatglm-6b", trust_remote_code=True) \n\
|
43 |
+
chatglm_model = AutoModel.from_pretrained("THUDM/chatglm-6b", trust_remote_code=True).float() ' >> warm_up_chatglm.py
|
44 |
+
RUN python3 -u warm_up_chatglm.py
|
45 |
+
|
46 |
+
# 启动
|
47 |
+
CMD ["python3", "-u", "main.py"]
|
crazy_functions/Latex全文润色.py
CHANGED
@@ -45,7 +45,7 @@ def 多文件润色(file_manifest, project_folder, llm_kwargs, plugin_kwargs, ch
|
|
45 |
pfg = PaperFileGroup()
|
46 |
|
47 |
for index, fp in enumerate(file_manifest):
|
48 |
-
with open(fp, 'r', encoding='utf-8') as f:
|
49 |
file_content = f.read()
|
50 |
# 定义注释的正则表达式
|
51 |
comment_pattern = r'%.*'
|
|
|
45 |
pfg = PaperFileGroup()
|
46 |
|
47 |
for index, fp in enumerate(file_manifest):
|
48 |
+
with open(fp, 'r', encoding='utf-8', errors='replace') as f:
|
49 |
file_content = f.read()
|
50 |
# 定义注释的正则表达式
|
51 |
comment_pattern = r'%.*'
|
crazy_functions/Latex全文翻译.py
CHANGED
@@ -44,7 +44,7 @@ def 多文件翻译(file_manifest, project_folder, llm_kwargs, plugin_kwargs, ch
|
|
44 |
pfg = PaperFileGroup()
|
45 |
|
46 |
for index, fp in enumerate(file_manifest):
|
47 |
-
with open(fp, 'r', encoding='utf-8') as f:
|
48 |
file_content = f.read()
|
49 |
# 定义注释的正则表达式
|
50 |
comment_pattern = r'%.*'
|
|
|
44 |
pfg = PaperFileGroup()
|
45 |
|
46 |
for index, fp in enumerate(file_manifest):
|
47 |
+
with open(fp, 'r', encoding='utf-8', errors='replace') as f:
|
48 |
file_content = f.read()
|
49 |
# 定义注释的正则表达式
|
50 |
comment_pattern = r'%.*'
|
crazy_functions/代码重写为全英文_多线程.py
CHANGED
@@ -49,7 +49,7 @@ def 全项目切换英文(txt, llm_kwargs, plugin_kwargs, chatbot, history, sys_
|
|
49 |
# 第4步:随便显示点什么防止卡顿的感觉
|
50 |
for index, fp in enumerate(file_manifest):
|
51 |
# if 'test_project' in fp: continue
|
52 |
-
with open(fp, 'r', encoding='utf-8') as f:
|
53 |
file_content = f.read()
|
54 |
i_say_show_user =f'[{index}/{len(file_manifest)}] 接下来请将以下代码中包含的所有中文转化为英文,只输出转化后的英文代码,请用代码块输出代码: {os.path.abspath(fp)}'
|
55 |
i_say_show_user_buffer.append(i_say_show_user)
|
@@ -72,7 +72,7 @@ def 全项目切换英文(txt, llm_kwargs, plugin_kwargs, chatbot, history, sys_
|
|
72 |
if index > 10:
|
73 |
time.sleep(60)
|
74 |
print('Openai 限制免费用户每分钟20次请求,降低请求频率中。')
|
75 |
-
with open(fp, 'r', encoding='utf-8') as f:
|
76 |
file_content = f.read()
|
77 |
i_say_template = lambda fp, file_content: f'接下来请将以下代码中包含的所有中文转化为英文,只输出代码,文件名是{fp},文件代码是 ```{file_content}```'
|
78 |
try:
|
|
|
49 |
# 第4步:随便显示点什么防止卡顿的感觉
|
50 |
for index, fp in enumerate(file_manifest):
|
51 |
# if 'test_project' in fp: continue
|
52 |
+
with open(fp, 'r', encoding='utf-8', errors='replace') as f:
|
53 |
file_content = f.read()
|
54 |
i_say_show_user =f'[{index}/{len(file_manifest)}] 接下来请将以下代码中包含的所有中文转化为英文,只输出转化后的英文代码,请用代码块输出代码: {os.path.abspath(fp)}'
|
55 |
i_say_show_user_buffer.append(i_say_show_user)
|
|
|
72 |
if index > 10:
|
73 |
time.sleep(60)
|
74 |
print('Openai 限制免费用户每分钟20次请求,降低请求频率中。')
|
75 |
+
with open(fp, 'r', encoding='utf-8', errors='replace') as f:
|
76 |
file_content = f.read()
|
77 |
i_say_template = lambda fp, file_content: f'接下来请将以下代码中包含的所有中文转化为英文,只输出代码,文件名是{fp},文件代码是 ```{file_content}```'
|
78 |
try:
|
crazy_functions/批量总结PDF文档pdfminer.py
CHANGED
@@ -68,7 +68,7 @@ def 解析Paper(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbo
|
|
68 |
print('begin analysis on:', file_manifest)
|
69 |
for index, fp in enumerate(file_manifest):
|
70 |
if ".tex" in fp:
|
71 |
-
with open(fp, 'r', encoding='utf-8') as f:
|
72 |
file_content = f.read()
|
73 |
if ".pdf" in fp.lower():
|
74 |
file_content = readPdf(fp)
|
|
|
68 |
print('begin analysis on:', file_manifest)
|
69 |
for index, fp in enumerate(file_manifest):
|
70 |
if ".tex" in fp:
|
71 |
+
with open(fp, 'r', encoding='utf-8', errors='replace') as f:
|
72 |
file_content = f.read()
|
73 |
if ".pdf" in fp.lower():
|
74 |
file_content = readPdf(fp)
|
crazy_functions/生成函数注释.py
CHANGED
@@ -7,7 +7,7 @@ def 生成函数注释(file_manifest, project_folder, llm_kwargs, plugin_kwargs,
|
|
7 |
import time, os
|
8 |
print('begin analysis on:', file_manifest)
|
9 |
for index, fp in enumerate(file_manifest):
|
10 |
-
with open(fp, 'r', encoding='utf-8') as f:
|
11 |
file_content = f.read()
|
12 |
|
13 |
i_say = f'请对下面的程序文件做一个概述,并对文件中的所有函数生成注释,使用markdown表格输出结果,文件名是{os.path.relpath(fp, project_folder)},文件内容是 ```{file_content}```'
|
|
|
7 |
import time, os
|
8 |
print('begin analysis on:', file_manifest)
|
9 |
for index, fp in enumerate(file_manifest):
|
10 |
+
with open(fp, 'r', encoding='utf-8', errors='replace') as f:
|
11 |
file_content = f.read()
|
12 |
|
13 |
i_say = f'请对下面的程序文件做一个概述,并对文件中的所有函数生成注释,使用markdown表格输出结果,文件名是{os.path.relpath(fp, project_folder)},文件内容是 ```{file_content}```'
|
crazy_functions/解析项目源代码.py
CHANGED
@@ -14,7 +14,7 @@ def 解析源代码新(file_manifest, project_folder, llm_kwargs, plugin_kwargs,
|
|
14 |
|
15 |
############################## <第一步,逐个文件分析,多线程> ##################################
|
16 |
for index, fp in enumerate(file_manifest):
|
17 |
-
with open(fp, 'r', encoding='utf-8') as f:
|
18 |
file_content = f.read()
|
19 |
prefix = "接下来请你逐文件分析下面的工程" if index==0 else ""
|
20 |
i_say = prefix + f'请对下面的程序文件做一个概述文件名是{os.path.relpath(fp, project_folder)},文件代码是 ```{file_content}```'
|
|
|
14 |
|
15 |
############################## <第一步,逐个文件分析,多线程> ##################################
|
16 |
for index, fp in enumerate(file_manifest):
|
17 |
+
with open(fp, 'r', encoding='utf-8', errors='replace') as f:
|
18 |
file_content = f.read()
|
19 |
prefix = "接下来请你逐文件分析下面的工程" if index==0 else ""
|
20 |
i_say = prefix + f'请对下面的程序文件做一个概述文件名是{os.path.relpath(fp, project_folder)},文件代码是 ```{file_content}```'
|
crazy_functions/读文章写摘要.py
CHANGED
@@ -8,7 +8,7 @@ def 解析Paper(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbo
|
|
8 |
import time, glob, os
|
9 |
print('begin analysis on:', file_manifest)
|
10 |
for index, fp in enumerate(file_manifest):
|
11 |
-
with open(fp, 'r', encoding='utf-8') as f:
|
12 |
file_content = f.read()
|
13 |
|
14 |
prefix = "接下来请你逐文件分析下面的论文文件,概括其内容" if index==0 else ""
|
|
|
8 |
import time, glob, os
|
9 |
print('begin analysis on:', file_manifest)
|
10 |
for index, fp in enumerate(file_manifest):
|
11 |
+
with open(fp, 'r', encoding='utf-8', errors='replace') as f:
|
12 |
file_content = f.read()
|
13 |
|
14 |
prefix = "接下来请你逐文件分析下面的论文文件,概括其内容" if index==0 else ""
|