qingxu99 commited on
Commit
0b3f7b8
·
1 Parent(s): e8cf757

format file

Browse files
crazy_functions/crazy_utils.py CHANGED
@@ -1,31 +1,32 @@
1
 
2
 
3
-
4
  def request_gpt_model_in_new_thread_with_ui_alive(inputs, inputs_show_user, top_p, temperature, chatbot, history, sys_prompt, refresh_interval=0.2):
5
  import time
6
  from concurrent.futures import ThreadPoolExecutor
7
  from request_llm.bridge_chatgpt import predict_no_ui_long_connection
8
  # 用户反馈
9
- chatbot.append([inputs_show_user, ""]); msg = '正常'
 
10
  yield chatbot, [], msg
11
  executor = ThreadPoolExecutor(max_workers=16)
12
  mutable = ["", time.time()]
13
  future = executor.submit(lambda:
14
- predict_no_ui_long_connection(inputs=inputs, top_p=top_p, temperature=temperature, history=history, sys_prompt=sys_prompt, observe_window=mutable)
15
- )
 
16
  while True:
17
  # yield一次以刷新前端页面
18
  time.sleep(refresh_interval)
19
  # “喂狗”(看门狗)
20
  mutable[1] = time.time()
21
- if future.done(): break
22
- chatbot[-1] = [chatbot[-1][0], mutable[0]]; msg = "正常"
 
 
23
  yield chatbot, [], msg
24
  return future.result()
25
 
26
 
27
-
28
-
29
  def request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency(inputs_array, inputs_show_user_array, top_p, temperature, chatbot, history_array, sys_prompt_array, refresh_interval=0.2, max_workers=10, scroller_max_len=30):
30
  import time
31
  from concurrent.futures import ThreadPoolExecutor
@@ -35,34 +36,46 @@ def request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency(inp
35
  executor = ThreadPoolExecutor(max_workers=max_workers)
36
  n_frag = len(inputs_array)
37
  # 用户反馈
38
- chatbot.append(["请开始多线程操作。", ""]); msg = '正常'
 
39
  yield chatbot, [], msg
40
  # 异步原子
41
  mutable = [["", time.time()] for _ in range(n_frag)]
 
42
  def _req_gpt(index, inputs, history, sys_prompt):
43
  gpt_say = predict_no_ui_long_connection(
44
- inputs=inputs, top_p=top_p, temperature=temperature, history=history, sys_prompt=sys_prompt, observe_window=mutable[index]
 
45
  )
46
  return gpt_say
47
  # 异步任务开始
48
- futures = [executor.submit(_req_gpt, index, inputs, history, sys_prompt) for index, inputs, history, sys_prompt in zip(range(len(inputs_array)), inputs_array, history_array, sys_prompt_array)]
 
49
  cnt = 0
50
  while True:
51
  # yield一次以刷新前端页面
52
- time.sleep(refresh_interval); cnt += 1
 
53
  worker_done = [h.done() for h in futures]
54
- if all(worker_done): executor.shutdown(); break
 
 
55
  # 更好的UI视觉效果
56
  observe_win = []
57
  # 每个线程都要“喂狗”(看门狗)
58
- for thread_index, _ in enumerate(worker_done): mutable[thread_index][1] = time.time()
 
59
  # 在前端打印些好玩的东西
60
- for thread_index, _ in enumerate(worker_done):
61
  print_something_really_funny = "[ ...`"+mutable[thread_index][0][-scroller_max_len:].\
62
- replace('\n','').replace('```','...').replace(' ','.').replace('<br/>','.....').replace('$','.')+"`... ]"
 
63
  observe_win.append(print_something_really_funny)
64
- stat_str = ''.join([f'执行中: {obs}\n\n' if not done else '已完成\n\n' for done, obs in zip(worker_done, observe_win)])
65
- chatbot[-1] = [chatbot[-1][0], f'多线程操作已经开始,完成情况: \n\n{stat_str}' + ''.join(['.']*(cnt%10+1))]; msg = "正常"
 
 
 
66
  yield chatbot, [], msg
67
  # 异步任务结束
68
  gpt_response_collection = []
@@ -72,23 +85,23 @@ def request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency(inp
72
  return gpt_response_collection
73
 
74
 
75
-
76
-
77
  def breakdown_txt_to_satisfy_token_limit(txt, get_token_fn, limit):
78
- def cut(txt_tocut, must_break_at_empty_line): # 递归
79
  if get_token_fn(txt_tocut) <= limit:
80
  return [txt_tocut]
81
  else:
82
  lines = txt_tocut.split('\n')
83
- estimated_line_cut = limit / get_token_fn(txt_tocut) * len(lines)
84
  estimated_line_cut = int(estimated_line_cut)
85
  for cnt in reversed(range(estimated_line_cut)):
86
- if must_break_at_empty_line:
87
- if lines[cnt] != "": continue
 
88
  print(cnt)
89
  prev = "\n".join(lines[:cnt])
90
  post = "\n".join(lines[cnt:])
91
- if get_token_fn(prev) < limit: break
 
92
  if cnt == 0:
93
  print('what the fuck ?')
94
  raise RuntimeError("存在一行极长的文本!")
@@ -102,22 +115,25 @@ def breakdown_txt_to_satisfy_token_limit(txt, get_token_fn, limit):
102
  except RuntimeError:
103
  return cut(txt, must_break_at_empty_line=False)
104
 
 
105
  def breakdown_txt_to_satisfy_token_limit_for_pdf(txt, get_token_fn, limit):
106
- def cut(txt_tocut, must_break_at_empty_line): # 递归
107
  if get_token_fn(txt_tocut) <= limit:
108
  return [txt_tocut]
109
  else:
110
  lines = txt_tocut.split('\n')
111
- estimated_line_cut = limit / get_token_fn(txt_tocut) * len(lines)
112
  estimated_line_cut = int(estimated_line_cut)
113
  cnt = 0
114
  for cnt in reversed(range(estimated_line_cut)):
115
- if must_break_at_empty_line:
116
- if lines[cnt] != "": continue
 
117
  print(cnt)
118
  prev = "\n".join(lines[:cnt])
119
  post = "\n".join(lines[cnt:])
120
- if get_token_fn(prev) < limit: break
 
121
  if cnt == 0:
122
  # print('what the fuck ? 存在一行极长的文本!')
123
  raise RuntimeError("存在一行极长的文本!")
@@ -135,4 +151,3 @@ def breakdown_txt_to_satisfy_token_limit_for_pdf(txt, get_token_fn, limit):
135
  # 这个中文的句号是故意的,作为一个标识而存在
136
  res = cut(txt.replace('.', '。\n'), must_break_at_empty_line=False)
137
  return [r.replace('。\n', '.') for r in res]
138
-
 
1
 
2
 
 
3
  def request_gpt_model_in_new_thread_with_ui_alive(inputs, inputs_show_user, top_p, temperature, chatbot, history, sys_prompt, refresh_interval=0.2):
4
  import time
5
  from concurrent.futures import ThreadPoolExecutor
6
  from request_llm.bridge_chatgpt import predict_no_ui_long_connection
7
  # 用户反馈
8
+ chatbot.append([inputs_show_user, ""])
9
+ msg = '正常'
10
  yield chatbot, [], msg
11
  executor = ThreadPoolExecutor(max_workers=16)
12
  mutable = ["", time.time()]
13
  future = executor.submit(lambda:
14
+ predict_no_ui_long_connection(
15
+ inputs=inputs, top_p=top_p, temperature=temperature, history=history, sys_prompt=sys_prompt, observe_window=mutable)
16
+ )
17
  while True:
18
  # yield一次以刷新前端页面
19
  time.sleep(refresh_interval)
20
  # “喂狗”(看门狗)
21
  mutable[1] = time.time()
22
+ if future.done():
23
+ break
24
+ chatbot[-1] = [chatbot[-1][0], mutable[0]]
25
+ msg = "正常"
26
  yield chatbot, [], msg
27
  return future.result()
28
 
29
 
 
 
30
  def request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency(inputs_array, inputs_show_user_array, top_p, temperature, chatbot, history_array, sys_prompt_array, refresh_interval=0.2, max_workers=10, scroller_max_len=30):
31
  import time
32
  from concurrent.futures import ThreadPoolExecutor
 
36
  executor = ThreadPoolExecutor(max_workers=max_workers)
37
  n_frag = len(inputs_array)
38
  # 用户反馈
39
+ chatbot.append(["请开始多线程操作。", ""])
40
+ msg = '正常'
41
  yield chatbot, [], msg
42
  # 异步原子
43
  mutable = [["", time.time()] for _ in range(n_frag)]
44
+
45
  def _req_gpt(index, inputs, history, sys_prompt):
46
  gpt_say = predict_no_ui_long_connection(
47
+ inputs=inputs, top_p=top_p, temperature=temperature, history=history, sys_prompt=sys_prompt, observe_window=mutable[
48
+ index]
49
  )
50
  return gpt_say
51
  # 异步任务开始
52
+ futures = [executor.submit(_req_gpt, index, inputs, history, sys_prompt) for index, inputs, history, sys_prompt in zip(
53
+ range(len(inputs_array)), inputs_array, history_array, sys_prompt_array)]
54
  cnt = 0
55
  while True:
56
  # yield一次以刷新前端页面
57
+ time.sleep(refresh_interval)
58
+ cnt += 1
59
  worker_done = [h.done() for h in futures]
60
+ if all(worker_done):
61
+ executor.shutdown()
62
+ break
63
  # 更好的UI视觉效果
64
  observe_win = []
65
  # 每个线程都要“喂狗”(看门狗)
66
+ for thread_index, _ in enumerate(worker_done):
67
+ mutable[thread_index][1] = time.time()
68
  # 在前端打印些好玩的东西
69
+ for thread_index, _ in enumerate(worker_done):
70
  print_something_really_funny = "[ ...`"+mutable[thread_index][0][-scroller_max_len:].\
71
+ replace('\n', '').replace('```', '...').replace(
72
+ ' ', '.').replace('<br/>', '.....').replace('$', '.')+"`... ]"
73
  observe_win.append(print_something_really_funny)
74
+ stat_str = ''.join([f'执行中: {obs}\n\n' if not done else '已完成\n\n' for done, obs in zip(
75
+ worker_done, observe_win)])
76
+ chatbot[-1] = [chatbot[-1][0],
77
+ f'多线程操作已经开始,完成情况: \n\n{stat_str}' + ''.join(['.']*(cnt % 10+1))]
78
+ msg = "正常"
79
  yield chatbot, [], msg
80
  # 异步任务结束
81
  gpt_response_collection = []
 
85
  return gpt_response_collection
86
 
87
 
 
 
88
  def breakdown_txt_to_satisfy_token_limit(txt, get_token_fn, limit):
89
+ def cut(txt_tocut, must_break_at_empty_line): # 递归
90
  if get_token_fn(txt_tocut) <= limit:
91
  return [txt_tocut]
92
  else:
93
  lines = txt_tocut.split('\n')
94
+ estimated_line_cut = limit / get_token_fn(txt_tocut) * len(lines)
95
  estimated_line_cut = int(estimated_line_cut)
96
  for cnt in reversed(range(estimated_line_cut)):
97
+ if must_break_at_empty_line:
98
+ if lines[cnt] != "":
99
+ continue
100
  print(cnt)
101
  prev = "\n".join(lines[:cnt])
102
  post = "\n".join(lines[cnt:])
103
+ if get_token_fn(prev) < limit:
104
+ break
105
  if cnt == 0:
106
  print('what the fuck ?')
107
  raise RuntimeError("存在一行极长的文本!")
 
115
  except RuntimeError:
116
  return cut(txt, must_break_at_empty_line=False)
117
 
118
+
119
  def breakdown_txt_to_satisfy_token_limit_for_pdf(txt, get_token_fn, limit):
120
+ def cut(txt_tocut, must_break_at_empty_line): # 递归
121
  if get_token_fn(txt_tocut) <= limit:
122
  return [txt_tocut]
123
  else:
124
  lines = txt_tocut.split('\n')
125
+ estimated_line_cut = limit / get_token_fn(txt_tocut) * len(lines)
126
  estimated_line_cut = int(estimated_line_cut)
127
  cnt = 0
128
  for cnt in reversed(range(estimated_line_cut)):
129
+ if must_break_at_empty_line:
130
+ if lines[cnt] != "":
131
+ continue
132
  print(cnt)
133
  prev = "\n".join(lines[:cnt])
134
  post = "\n".join(lines[cnt:])
135
+ if get_token_fn(prev) < limit:
136
+ break
137
  if cnt == 0:
138
  # print('what the fuck ? 存在一行极长的文本!')
139
  raise RuntimeError("存在一行极长的文本!")
 
151
  # 这个中文的句号是故意的,作为一个标识而存在
152
  res = cut(txt.replace('.', '。\n'), must_break_at_empty_line=False)
153
  return [r.replace('。\n', '.') for r in res]
 
crazy_functions/批量翻译PDF文档_多线程.py CHANGED
@@ -2,6 +2,7 @@ from toolbox import CatchException, report_execption, write_results_to_file
2
  from .crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
3
  from .crazy_utils import request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency
4
 
 
5
  def read_and_clean_pdf_text(fp):
6
  """
7
  **输入参数说明**
@@ -20,7 +21,8 @@ def read_and_clean_pdf_text(fp):
20
  - 清除重复的换行
21
  - 将每个换行符替换为两个换行符,使每个段落之间有两个换行符分隔
22
  """
23
- import fitz, re
 
24
  import numpy as np
25
  # file_content = ""
26
  with fitz.open(fp) as doc:
@@ -31,10 +33,13 @@ def read_and_clean_pdf_text(fp):
31
  text_areas = page.get_text("dict") # 获取页面上的文本信息
32
 
33
  # 块元提取 for each word segment with in line for each line cross-line words for each block
34
- meta_txt.extend( [ " ".join(["".join( [wtf['text'] for wtf in l['spans'] ]) for l in t['lines'] ]).replace('- ','') for t in text_areas['blocks'] if 'lines' in t])
35
- meta_font.extend([ np.mean( [ np.mean([wtf['size'] for wtf in l['spans'] ]) for l in t['lines'] ]) for t in text_areas['blocks'] if 'lines' in t])
36
- if index==0:
37
- page_one_meta = [" ".join(["".join( [wtf['text'] for wtf in l['spans'] ]) for l in t['lines'] ]).replace('- ','') for t in text_areas['blocks'] if 'lines' in t]
 
 
 
38
 
39
  def 把字符太少的块清除为回车(meta_txt):
40
  for index, block_txt in enumerate(meta_txt):
@@ -61,8 +66,10 @@ def read_and_clean_pdf_text(fp):
61
  for _ in range(100):
62
  for index, block_txt in enumerate(meta_txt):
63
  if starts_with_lowercase_word(block_txt):
64
- if meta_txt[index-1]!='\n': meta_txt[index-1] += ' '
65
- else: meta_txt[index-1] = ''
 
 
66
  meta_txt[index-1] += meta_txt[index]
67
  meta_txt[index] = '\n'
68
  return meta_txt
@@ -72,13 +79,14 @@ def read_and_clean_pdf_text(fp):
72
  meta_txt = '\n'.join(meta_txt)
73
  # 清除重复的换行
74
  for _ in range(5):
75
- meta_txt = meta_txt.replace('\n\n','\n')
76
 
77
  # 换行 -> 双换行
78
  meta_txt = meta_txt.replace('\n', '\n\n')
79
 
80
  return meta_txt, page_one_meta
81
 
 
82
  @CatchException
83
  def 批量翻译PDF文档(txt, top_p, temperature, chatbot, history, sys_prompt, WEB_PORT):
84
  import glob
@@ -92,7 +100,8 @@ def 批量翻译PDF文档(txt, top_p, temperature, chatbot, history, sys_prompt,
92
 
93
  # 尝试导入依赖,如果缺少依赖,则给出安装建议
94
  try:
95
- import fitz, tiktoken
 
96
  except:
97
  report_execption(chatbot, history,
98
  a=f"解析项目: {txt}",
@@ -129,13 +138,8 @@ def 批量翻译PDF文档(txt, top_p, temperature, chatbot, history, sys_prompt,
129
  yield from 解析PDF(file_manifest, project_folder, top_p, temperature, chatbot, history, sys_prompt)
130
 
131
 
132
-
133
-
134
  def 解析PDF(file_manifest, project_folder, top_p, temperature, chatbot, history, sys_prompt):
135
- import time
136
- import glob
137
  import os
138
- import fitz
139
  import tiktoken
140
  TOKEN_LIMIT_PER_FRAGMENT = 1600
141
  generated_conclusion_files = []
@@ -145,39 +149,44 @@ def 解析PDF(file_manifest, project_folder, top_p, temperature, chatbot, histor
145
  # 递归地切割PDF文件
146
  from .crazy_utils import breakdown_txt_to_satisfy_token_limit_for_pdf
147
  enc = tiktoken.get_encoding("gpt2")
148
- get_token_num = lambda txt: len(enc.encode(txt))
149
  # 分解文本
150
- paper_fragments = breakdown_txt_to_satisfy_token_limit_for_pdf(
151
  txt=file_content, get_token_fn=get_token_num, limit=TOKEN_LIMIT_PER_FRAGMENT)
152
  page_one_fragments = breakdown_txt_to_satisfy_token_limit_for_pdf(
153
  txt=str(page_one), get_token_fn=get_token_num, limit=TOKEN_LIMIT_PER_FRAGMENT//4)
154
  # 为了更好的效果,我们剥离Introduction之后的部分
155
- paper_meta = page_one_fragments[0].split('introduction')[0].split('Introduction')[0].split('INTRODUCTION')[0]
 
156
  # 单线,获取文章meta信息
157
  paper_meta_info = yield from request_gpt_model_in_new_thread_with_ui_alive(
158
- inputs=f"以下是一篇学术论文的基础信息,请从中提取出“标题”、“收录会议或期刊”、“作者”、“摘要”、“编号”、“作者邮箱”这六个部分。请用markdown格式输出,最后用中文翻译摘要部分。请提取:{paper_meta}",
159
- inputs_show_user=f"请从{fp}中提取出“标题”、“收录会议或期刊”等基本信息。",
160
  top_p=top_p, temperature=temperature,
161
  chatbot=chatbot, history=[],
162
  sys_prompt="Your job is to collect information from materials。",
163
  )
164
  # 多线,翻译
165
  gpt_response_collection = yield from request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency(
166
- inputs_array = [f"以下是你需要翻译的文章段落:\n{frag}" for frag in paper_fragments],
167
- inputs_show_user_array = [f"" for _ in paper_fragments],
 
168
  top_p=top_p, temperature=temperature,
169
  chatbot=chatbot,
170
  history_array=[[paper_meta] for _ in paper_fragments],
171
- sys_prompt_array=["请你作为一个学术翻译,把整个段落翻译成中文,要求语言简洁,禁止重复输出原文。" for _ in paper_fragments],
172
- max_workers=16 # OpenAI所允许的最大并行过载
 
173
  )
174
 
175
  final = ["", paper_meta_info + '\n\n---\n\n---\n\n---\n\n']
176
  final.extend(gpt_response_collection)
177
  create_report_file_name = f"{os.path.basename(fp)}.trans.md"
178
  res = write_results_to_file(final, file_name=create_report_file_name)
179
- generated_conclusion_files.append(f'./gpt_log/{create_report_file_name}')
180
- chatbot.append((f"{fp}完成了吗?", res)); msg = "完成"
 
 
181
  yield chatbot, history, msg
182
 
183
  # 准备文件的下载
@@ -185,8 +194,10 @@ def 解析PDF(file_manifest, project_folder, top_p, temperature, chatbot, histor
185
  for pdf_path in generated_conclusion_files:
186
  # 重命名文件
187
  rename_file = f'./gpt_log/总结论文-{os.path.basename(pdf_path)}'
188
- if os.path.exists(rename_file): os.remove(rename_file)
189
- shutil.copyfile(pdf_path, rename_file);
190
- if os.path.exists(pdf_path): os.remove(pdf_path)
 
 
191
  chatbot.append(("给出输出文件清单", str(generated_conclusion_files)))
192
- yield chatbot, history, msg
 
2
  from .crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
3
  from .crazy_utils import request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency
4
 
5
+
6
  def read_and_clean_pdf_text(fp):
7
  """
8
  **输入参数说明**
 
21
  - 清除重复的换行
22
  - 将每个换行符替换为两个换行符,使每个段落之间有两个换行符分隔
23
  """
24
+ import fitz
25
+ import re
26
  import numpy as np
27
  # file_content = ""
28
  with fitz.open(fp) as doc:
 
33
  text_areas = page.get_text("dict") # 获取页面上的文本信息
34
 
35
  # 块元提取 for each word segment with in line for each line cross-line words for each block
36
+ meta_txt.extend([" ".join(["".join([wtf['text'] for wtf in l['spans']]) for l in t['lines']]).replace(
37
+ '- ', '') for t in text_areas['blocks'] if 'lines' in t])
38
+ meta_font.extend([np.mean([np.mean([wtf['size'] for wtf in l['spans']])
39
+ for l in t['lines']]) for t in text_areas['blocks'] if 'lines' in t])
40
+ if index == 0:
41
+ page_one_meta = [" ".join(["".join([wtf['text'] for wtf in l['spans']]) for l in t['lines']]).replace(
42
+ '- ', '') for t in text_areas['blocks'] if 'lines' in t]
43
 
44
  def 把字符太少的块清除为回车(meta_txt):
45
  for index, block_txt in enumerate(meta_txt):
 
66
  for _ in range(100):
67
  for index, block_txt in enumerate(meta_txt):
68
  if starts_with_lowercase_word(block_txt):
69
+ if meta_txt[index-1] != '\n':
70
+ meta_txt[index-1] += ' '
71
+ else:
72
+ meta_txt[index-1] = ''
73
  meta_txt[index-1] += meta_txt[index]
74
  meta_txt[index] = '\n'
75
  return meta_txt
 
79
  meta_txt = '\n'.join(meta_txt)
80
  # 清除重复的换行
81
  for _ in range(5):
82
+ meta_txt = meta_txt.replace('\n\n', '\n')
83
 
84
  # 换行 -> 双换行
85
  meta_txt = meta_txt.replace('\n', '\n\n')
86
 
87
  return meta_txt, page_one_meta
88
 
89
+
90
  @CatchException
91
  def 批量翻译PDF文档(txt, top_p, temperature, chatbot, history, sys_prompt, WEB_PORT):
92
  import glob
 
100
 
101
  # 尝试导入依赖,如果缺少依赖,则给出安装建议
102
  try:
103
+ import fitz
104
+ import tiktoken
105
  except:
106
  report_execption(chatbot, history,
107
  a=f"解析项目: {txt}",
 
138
  yield from 解析PDF(file_manifest, project_folder, top_p, temperature, chatbot, history, sys_prompt)
139
 
140
 
 
 
141
  def 解析PDF(file_manifest, project_folder, top_p, temperature, chatbot, history, sys_prompt):
 
 
142
  import os
 
143
  import tiktoken
144
  TOKEN_LIMIT_PER_FRAGMENT = 1600
145
  generated_conclusion_files = []
 
149
  # 递归地切割PDF文件
150
  from .crazy_utils import breakdown_txt_to_satisfy_token_limit_for_pdf
151
  enc = tiktoken.get_encoding("gpt2")
152
+ def get_token_num(txt): return len(enc.encode(txt))
153
  # 分解文本
154
+ paper_fragments = breakdown_txt_to_satisfy_token_limit_for_pdf(
155
  txt=file_content, get_token_fn=get_token_num, limit=TOKEN_LIMIT_PER_FRAGMENT)
156
  page_one_fragments = breakdown_txt_to_satisfy_token_limit_for_pdf(
157
  txt=str(page_one), get_token_fn=get_token_num, limit=TOKEN_LIMIT_PER_FRAGMENT//4)
158
  # 为了更好的效果,我们剥离Introduction之后的部分
159
+ paper_meta = page_one_fragments[0].split('introduction')[0].split(
160
+ 'Introduction')[0].split('INTRODUCTION')[0]
161
  # 单线,获取文章meta信息
162
  paper_meta_info = yield from request_gpt_model_in_new_thread_with_ui_alive(
163
+ inputs=f"以下是一篇学术论文的基础信息,请从中提取出“标题”、“收录会议或期刊”、“作者”、“摘要”、“编号”、“作者邮箱”这六个部分。请用markdown格式输出,最后用中文翻译摘要部分。请提取:{paper_meta}",
164
+ inputs_show_user=f"请从{fp}中提取出“标题”、“收录会议或期刊”等基本信息。",
165
  top_p=top_p, temperature=temperature,
166
  chatbot=chatbot, history=[],
167
  sys_prompt="Your job is to collect information from materials。",
168
  )
169
  # 多线,翻译
170
  gpt_response_collection = yield from request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency(
171
+ inputs_array=[
172
+ f"以下是你需要翻译的文章段落:\n{frag}" for frag in paper_fragments],
173
+ inputs_show_user_array=[f"" for _ in paper_fragments],
174
  top_p=top_p, temperature=temperature,
175
  chatbot=chatbot,
176
  history_array=[[paper_meta] for _ in paper_fragments],
177
+ sys_prompt_array=[
178
+ "请你作为一个学术翻译,把整个段落翻译成中文,要求语言简洁,禁止重复输出原文。" for _ in paper_fragments],
179
+ max_workers=16 # OpenAI所允许的最大并行过载
180
  )
181
 
182
  final = ["", paper_meta_info + '\n\n---\n\n---\n\n---\n\n']
183
  final.extend(gpt_response_collection)
184
  create_report_file_name = f"{os.path.basename(fp)}.trans.md"
185
  res = write_results_to_file(final, file_name=create_report_file_name)
186
+ generated_conclusion_files.append(
187
+ f'./gpt_log/{create_report_file_name}')
188
+ chatbot.append((f"{fp}完成了吗?", res))
189
+ msg = "完成"
190
  yield chatbot, history, msg
191
 
192
  # 准备文件的下载
 
194
  for pdf_path in generated_conclusion_files:
195
  # 重命名文件
196
  rename_file = f'./gpt_log/总结论文-{os.path.basename(pdf_path)}'
197
+ if os.path.exists(rename_file):
198
+ os.remove(rename_file)
199
+ shutil.copyfile(pdf_path, rename_file)
200
+ if os.path.exists(pdf_path):
201
+ os.remove(pdf_path)
202
  chatbot.append(("给出输出文件清单", str(generated_conclusion_files)))
203
+ yield chatbot, history, msg