3v324v23 commited on
Commit
bf3eb0b
·
1 Parent(s): 15d9d9a

加入 arxiv 小助手插件

Browse files
crazy_functions/下载arxiv论文翻译摘要.py ADDED
@@ -0,0 +1,186 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from predict import predict_no_ui
2
+ from toolbox import CatchException, report_execption, write_results_to_file, predict_no_ui_but_counting_down, get_conf
3
+ import re, requests, unicodedata, os
4
+
5
+ def download_arxiv_(url_pdf):
6
+ if 'arxiv.org' not in url_pdf:
7
+ if ('.' in url_pdf) and ('/' not in url_pdf):
8
+ new_url = 'https://arxiv.org/abs/'+url_pdf
9
+ print('下载编号:', url_pdf, '自动定位:', new_url)
10
+ # download_arxiv_(new_url)
11
+ return download_arxiv_(new_url)
12
+ else:
13
+ print('不能识别的URL!')
14
+ return None
15
+ if 'abs' in url_pdf:
16
+ url_pdf = url_pdf.replace('abs', 'pdf')
17
+ url_pdf = url_pdf + '.pdf'
18
+
19
+ url_abs = url_pdf.replace('.pdf', '').replace('pdf', 'abs')
20
+ title, other_info = get_name(_url_=url_abs)
21
+
22
+ paper_id = title.split()[0] # '[1712.00559]'
23
+ if '2' in other_info['year']:
24
+ title = other_info['year'] + ' ' + title
25
+
26
+ known_conf = ['NeurIPS', 'NIPS', 'Nature', 'Science', 'ICLR', 'AAAI']
27
+ for k in known_conf:
28
+ if k in other_info['comment']:
29
+ title = k + ' ' + title
30
+
31
+ download_dir = './gpt_log/arxiv/'
32
+ os.makedirs(download_dir, exist_ok=True)
33
+
34
+ title_str = title.replace('?', '?')\
35
+ .replace(':', ':')\
36
+ .replace('\"', '“')\
37
+ .replace('\n', '')\
38
+ .replace(' ', ' ')\
39
+ .replace(' ', ' ')
40
+
41
+ requests_pdf_url = url_pdf
42
+ file_path = download_dir+title_str
43
+ # if os.path.exists(file_path):
44
+ # print('返回缓存文件')
45
+ # return './gpt_log/arxiv/'+title_str
46
+
47
+ print('下载中')
48
+ proxies, = get_conf('proxies')
49
+ r = requests.get(requests_pdf_url, proxies=proxies)
50
+ with open(file_path, 'wb+') as f:
51
+ f.write(r.content)
52
+ print('下载完成')
53
+
54
+ # print('输出下载命令:','aria2c -o \"%s\" %s'%(title_str,url_pdf))
55
+ # subprocess.call('aria2c --all-proxy=\"172.18.116.150:11084\" -o \"%s\" %s'%(download_dir+title_str,url_pdf), shell=True)
56
+
57
+ x = "%s %s %s.bib" % (paper_id, other_info['year'], other_info['authors'])
58
+ x = x.replace('?', '?')\
59
+ .replace(':', ':')\
60
+ .replace('\"', '“')\
61
+ .replace('\n', '')\
62
+ .replace(' ', ' ')\
63
+ .replace(' ', ' ')
64
+ return './gpt_log/arxiv/'+title_str, other_info
65
+
66
+
67
+ def get_name(_url_):
68
+ import os
69
+ from bs4 import BeautifulSoup
70
+ print('正在获取文献名!')
71
+ print(_url_)
72
+
73
+ # arxiv_recall = {}
74
+ # if os.path.exists('./arxiv_recall.pkl'):
75
+ # with open('./arxiv_recall.pkl', 'rb') as f:
76
+ # arxiv_recall = pickle.load(f)
77
+
78
+ # if _url_ in arxiv_recall:
79
+ # print('在缓存中')
80
+ # return arxiv_recall[_url_]
81
+
82
+ proxies, = get_conf('proxies')
83
+ res = requests.get(_url_, proxies=proxies)
84
+
85
+ bs = BeautifulSoup(res.text, 'html.parser')
86
+ other_details = {}
87
+
88
+ # get year
89
+ try:
90
+ year = bs.find_all(class_='dateline')[0].text
91
+ year = re.search(r'(\d{4})', year, re.M | re.I).group(1)
92
+ other_details['year'] = year
93
+ abstract = bs.find_all(class_='abstract mathjax')[0].text
94
+ other_details['abstract'] = abstract
95
+ except:
96
+ other_details['year'] = ''
97
+ print('年份获取失败')
98
+
99
+ # get author
100
+ try:
101
+ authors = bs.find_all(class_='authors')[0].text
102
+ authors = authors.split('Authors:')[1]
103
+ other_details['authors'] = authors
104
+ except:
105
+ other_details['authors'] = ''
106
+ print('authors获取失败')
107
+
108
+ # get comment
109
+ try:
110
+ comment = bs.find_all(class_='metatable')[0].text
111
+ real_comment = None
112
+ for item in comment.replace('\n', ' ').split(' '):
113
+ if 'Comments' in item:
114
+ real_comment = item
115
+ if real_comment is not None:
116
+ other_details['comment'] = real_comment
117
+ else:
118
+ other_details['comment'] = ''
119
+ except:
120
+ other_details['comment'] = ''
121
+ print('年份获取失败')
122
+
123
+ title_str = BeautifulSoup(
124
+ res.text, 'html.parser').find('title').contents[0]
125
+ print('获取成功:', title_str)
126
+ # arxiv_recall[_url_] = (title_str+'.pdf', other_details)
127
+ # with open('./arxiv_recall.pkl', 'wb') as f:
128
+ # pickle.dump(arxiv_recall, f)
129
+
130
+ return title_str+'.pdf', other_details
131
+
132
+
133
+
134
+ @CatchException
135
+ def 下载arxiv论文并翻译摘要(txt, top_p, temperature, chatbot, history, systemPromptTxt, WEB_PORT):
136
+
137
+ CRAZY_FUNCTION_INFO = "下载arxiv论文并翻译摘要,函数插件作者[binary-husky]。正在提取摘要并下载PDF文档……"
138
+ import glob
139
+ import os
140
+
141
+ # 基本信息:功能、贡献者
142
+ chatbot.append(["函数插件功能?", CRAZY_FUNCTION_INFO])
143
+ yield chatbot, history, '正常'
144
+
145
+ # 尝试导入依赖,如果缺少依赖,则给出安装建议
146
+ try:
147
+ import pdfminer, bs4
148
+ except:
149
+ report_execption(chatbot, history,
150
+ a = f"解析项目: {txt}",
151
+ b = f"导入软件依赖失败。使用该模块需要额外依赖,安装方法```pip install --upgrade pdfminer beautifulsoup4```。")
152
+ yield chatbot, history, '正常'
153
+ return
154
+
155
+ # 清空历史,以免输入溢出
156
+ history = []
157
+
158
+ # 提取摘要,下载PDF文档
159
+ try:
160
+ pdf_path, info = download_arxiv_(txt)
161
+ except:
162
+ report_execption(chatbot, history,
163
+ a = f"解析项目: {txt}",
164
+ b = f"下载pdf文件未成功")
165
+ yield chatbot, history, '正常'
166
+ return
167
+
168
+ # 翻译摘要等
169
+ i_say = f"请你阅读以下学术论文相关的材料,提取摘要,翻译为中文。材料如下:{str(info)}"
170
+ i_say_show_user = f'请你阅读以下学术论文相关的材料,提取摘要,翻译为中文。论文:{pdf_path}'
171
+ chatbot.append((i_say_show_user, "[Local Message] waiting gpt response."))
172
+ yield chatbot, history, '正常'
173
+ msg = '正常'
174
+ # ** gpt request **
175
+ gpt_say = yield from predict_no_ui_but_counting_down(i_say, i_say_show_user, chatbot, top_p, temperature, history=[]) # 带超时倒计时
176
+ chatbot[-1] = (i_say_show_user, gpt_say)
177
+ history.append(i_say_show_user); history.append(gpt_say)
178
+ yield chatbot, history, msg
179
+ # 写入文件
180
+ import shutil
181
+ # 重置文件的创建时间
182
+ shutil.copyfile(pdf_path, f'./gpt_log/{os.path.basename(pdf_path)}'); os.remove(pdf_path)
183
+ res = write_results_to_file(history)
184
+ chatbot.append(("完成了吗?", res + "\n\nPDF文件也已经下载"))
185
+ yield chatbot, history, msg
186
+
functional_crazy.py CHANGED
@@ -1,13 +1,8 @@
1
  from toolbox import HotReload # HotReload 的意思是热更新,修改函数插件后,不需要重启程序,代码直接生效
2
 
3
- # UserVisibleLevel是过滤器参数。
4
- # 由于UI界面空间有限,所以通过这种方式决定UI界面中显示哪些插件
5
- # 默认函数插件 VisibleLevel 是 0
6
- # 当 UserVisibleLevel >= 函数插件的 VisibleLevel 时,该函数插件才会被显示出来
7
- UserVisibleLevel = 1
8
-
9
-
10
  def get_crazy_functionals():
 
 
11
  from crazy_functions.读文章写摘要 import 读文章写摘要
12
  from crazy_functions.生成函数注释 import 批量生成函数注释
13
  from crazy_functions.解析项目源代码 import 解析项目本身
@@ -52,33 +47,44 @@ def get_crazy_functionals():
52
  "Function": HotReload(高阶功能模板函数)
53
  },
54
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
55
 
56
- # VisibleLevel=1 经过测试,但功能上距离达到完美状态还差一点点
57
- if UserVisibleLevel >= 1:
58
- from crazy_functions.批量总结PDF文档 import 批量总结PDF文档
59
- from crazy_functions.批量总结PDF文档pdfminer import 批量总结PDF文档pdfminer
60
- from crazy_functions.总结word文档 import 总结word文档
61
  function_plugins.update({
62
- "[仅供开发调试] 批量总结PDF文档": {
63
- "Color": "stop",
64
- "Function": HotReload(批量总结PDF文档) # HotReload 的意思是热更新,修改函数插件代码后,不需要重启程序,代码直接生效
65
- },
66
- "[仅供开发调试] 批量总结PDF文档pdfminer": {
67
  "Color": "stop",
68
  "AsButton": False, # 加入下拉菜单中
69
- "Function": HotReload(批量总结PDF文档pdfminer)
70
- },
71
- "[仅供开发调试] 批量总结Word文档": {
72
- "Color": "stop",
73
- "Function": HotReload(总结word文档)
74
- },
75
  })
 
 
 
76
 
77
- # VisibleLevel=2 尚未充分测试的函数插件,放在这里
78
- if UserVisibleLevel >= 2:
79
- function_plugins.update({
80
- })
81
 
 
82
  return function_plugins
83
 
84
 
 
1
  from toolbox import HotReload # HotReload 的意思是热更新,修改函数插件后,不需要重启程序,代码直接生效
2
 
 
 
 
 
 
 
 
3
  def get_crazy_functionals():
4
+ ###################### 第一组插件 ###########################
5
+ # [第一组插件]: 最早期编写的项目插件和一些demo
6
  from crazy_functions.读文章写摘要 import 读文章写摘要
7
  from crazy_functions.生成函数注释 import 批量生成函数注释
8
  from crazy_functions.解析项目源代码 import 解析项目本身
 
47
  "Function": HotReload(高阶功能模板函数)
48
  },
49
  }
50
+ ###################### 第二组插件 ###########################
51
+ # [第二组插件]: 经过充分测试,但功能上距离达到完美状态还差一点点
52
+ from crazy_functions.批量总结PDF文档 import 批量总结PDF文档
53
+ from crazy_functions.批量总结PDF文档pdfminer import 批量总结PDF文档pdfminer
54
+ from crazy_functions.总结word文档 import 总结word文档
55
+ function_plugins.update({
56
+ "[仅供开发调试] 批量总结PDF文档": {
57
+ "Color": "stop",
58
+ "Function": HotReload(批量总结PDF文档) # HotReload 的意思是热更新,修改函数插件代码后,不需要重启程序,代码直接生效
59
+ },
60
+ "[仅供开发调试] 批量总结PDF文档pdfminer": {
61
+ "Color": "stop",
62
+ "AsButton": False, # 加入下拉菜单中
63
+ "Function": HotReload(批量总结PDF文档pdfminer)
64
+ },
65
+ "[仅供开发调试] 批量总结Word文档": {
66
+ "Color": "stop",
67
+ "Function": HotReload(总结word文档)
68
+ },
69
+ })
70
 
71
+ ###################### 第三组插件 ###########################
72
+ # [第三组插件]: 尚未充分测试的函数插件,放在这里
73
+ try:
74
+ from crazy_functions.下载arxiv论文翻译摘要 import 下载arxiv论文并翻译摘要
 
75
  function_plugins.update({
76
+ "下载arxiv论文并翻译摘要": {
 
 
 
 
77
  "Color": "stop",
78
  "AsButton": False, # 加入下拉菜单中
79
+ "Function": HotReload(下载arxiv论文并翻译摘要)
80
+ }
 
 
 
 
81
  })
82
+ except Exception as err:
83
+ print(f'[下载arxiv论文并翻译摘要] 插件导入失败 {str(err)}')
84
+
85
 
 
 
 
 
86
 
87
+ ###################### 第n组插件 ###########################
88
  return function_plugins
89
 
90
 
main.py CHANGED
@@ -119,7 +119,7 @@ with gr.Blocks(theme=set_theme, analytics_enabled=False, css=advanced_css) as de
119
  dropdown.select(on_dropdown_changed, [dropdown], [switchy_bt] )
120
  # 随变按钮的回调函数注册
121
  def route(k, *args, **kwargs):
122
- if k in [r"打开插件列表", r"先从插件列表中选择"]: return
123
  yield from crazy_fns[k]["Function"](*args, **kwargs)
124
  click_handle = switchy_bt.click(route,[switchy_bt, *input_combo, gr.State(PORT)], output_combo)
125
  click_handle.then(on_report_generated, [file_upload, chatbot], [file_upload, chatbot])
 
119
  dropdown.select(on_dropdown_changed, [dropdown], [switchy_bt] )
120
  # 随变按钮的回调函数注册
121
  def route(k, *args, **kwargs):
122
+ if k in [r"打开插件列表", r"请先从插件列表中选择"]: return
123
  yield from crazy_fns[k]["Function"](*args, **kwargs)
124
  click_handle = switchy_bt.click(route,[switchy_bt, *input_combo, gr.State(PORT)], output_combo)
125
  click_handle.then(on_report_generated, [file_upload, chatbot], [file_upload, chatbot])