Spaces:
Running
Running
wangrongsheng
commited on
Commit
•
1328244
1
Parent(s):
757bf4e
Upload app.py
Browse files
app.py
CHANGED
@@ -13,8 +13,7 @@ import gradio
|
|
13 |
import markdown
|
14 |
import json
|
15 |
import tiktoken
|
16 |
-
|
17 |
-
from optimizeOpenAI import chatPaper
|
18 |
def parse_text(text):
|
19 |
lines = text.split("\n")
|
20 |
for i,line in enumerate(lines):
|
@@ -31,47 +30,26 @@ def parse_text(text):
|
|
31 |
lines[i] = '<br/>'+line.replace(" ", " ")
|
32 |
return "".join(lines)
|
33 |
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
valid_api_keys = []
|
49 |
|
50 |
-
def
|
51 |
try:
|
52 |
-
|
53 |
-
|
54 |
-
return api_key
|
55 |
-
else:
|
56 |
-
return None
|
57 |
except:
|
58 |
-
return
|
59 |
-
|
60 |
-
def valid_apikey(api_keys):
|
61 |
-
api_keys = api_keys.replace(' ', '')
|
62 |
-
api_key_list = api_keys.split(',')
|
63 |
-
print(api_key_list)
|
64 |
-
global valid_api_keys
|
65 |
-
with concurrent.futures.ThreadPoolExecutor() as executor:
|
66 |
-
future_results = {executor.submit(api_key_check, api_key): api_key for api_key in api_key_list}
|
67 |
-
for future in concurrent.futures.as_completed(future_results):
|
68 |
-
result = future.result()
|
69 |
-
if result:
|
70 |
-
valid_api_keys.append(result)
|
71 |
-
if len(valid_api_keys) > 0:
|
72 |
-
return "有效的api-key一共有{}个,分别是:{}, 现在可以提交你的paper".format(len(valid_api_keys), valid_api_keys)
|
73 |
-
return "无效的api-key"
|
74 |
-
|
75 |
|
76 |
class Paper:
|
77 |
def __init__(self, path, title='', url='', abs='', authers=[], sl=[]):
|
@@ -325,9 +303,8 @@ class Reader:
|
|
325 |
def __init__(self, key_word='', query='', filter_keys='',
|
326 |
root_path='./',
|
327 |
gitee_key='',
|
328 |
-
sort=arxiv.SortCriterion.SubmittedDate, user_name='defualt', language='cn',
|
329 |
-
self.
|
330 |
-
self.chatPaper = chatPaper( api_keys = self.api_keys, apiTimeInterval=10 , temperature=temperature,top_p=p,model_name=model_name) #openAI api封装
|
331 |
self.user_name = user_name # 读者姓名
|
332 |
self.key_word = key_word # 读者感兴趣的关键词
|
333 |
self.query = query # 读者输入的搜索查询
|
@@ -458,7 +435,7 @@ class Reader:
|
|
458 |
|
459 |
return image_url
|
460 |
|
461 |
-
def summary_with_chat(self, paper_list):
|
462 |
htmls = []
|
463 |
utoken = 0
|
464 |
ctoken = 0
|
@@ -474,7 +451,7 @@ class Reader:
|
|
474 |
text += list(paper.section_text_dict.values())[0]
|
475 |
#max_token = 2500 * 4
|
476 |
#text = text[:max_token]
|
477 |
-
chat_summary_text, utoken1, ctoken1, ttoken1 = self.chat_summary(text=text)
|
478 |
htmls.append(chat_summary_text)
|
479 |
|
480 |
# TODO 往md文档中插入论文里的像素最大的一张图片,这个方案可以弄的更加智能一些:
|
@@ -492,7 +469,7 @@ class Reader:
|
|
492 |
# methods
|
493 |
method_text += paper.section_text_dict[method_key]
|
494 |
text = summary_text + "\n<Methods>:\n" + method_text
|
495 |
-
chat_method_text, utoken2, ctoken2, ttoken2 = self.chat_method(text=text)
|
496 |
htmls.append(chat_method_text)
|
497 |
else:
|
498 |
chat_method_text = ''
|
@@ -515,7 +492,7 @@ class Reader:
|
|
515 |
text = summary_text + "\n <Conclusion>:\n" + conclusion_text
|
516 |
else:
|
517 |
text = summary_text
|
518 |
-
chat_conclusion_text, utoken3, ctoken3, ttoken3 = self.chat_conclusion(text=text)
|
519 |
htmls.append(chat_conclusion_text)
|
520 |
htmls.append("\n")
|
521 |
# token统计
|
@@ -530,20 +507,24 @@ class Reader:
|
|
530 |
"cost": str(cost),
|
531 |
}
|
532 |
md_text = "\n".join(htmls)
|
|
|
533 |
return markdown.markdown(md_text), pos_count
|
534 |
|
535 |
|
536 |
@tenacity.retry(wait=tenacity.wait_exponential(multiplier=1, min=4, max=10),
|
537 |
stop=tenacity.stop_after_attempt(5),
|
538 |
reraise=True)
|
539 |
-
def chat_conclusion(self, text):
|
|
|
540 |
conclusion_prompt_token = 650
|
541 |
text_token = len(self.encoding.encode(text))
|
542 |
clip_text_index = int(len(text)*(self.max_token_num-conclusion_prompt_token)/text_token)
|
543 |
-
clip_text = text[:clip_text_index]
|
544 |
-
|
545 |
-
|
546 |
-
|
|
|
|
|
547 |
8. Make the following summary.Be sure to use Chinese answers (proper nouns need to be marked in English).
|
548 |
- (1):What is the significance of this piece of work?
|
549 |
- (2):Summarize the strengths and weaknesses of this article in three dimensions: innovation point, performance, and workload.
|
@@ -554,26 +535,42 @@ class Reader:
|
|
554 |
- (2):Innovation point: xxx; Performance: xxx; Workload: xxx;\n
|
555 |
|
556 |
Be sure to use Chinese answers (proper nouns need to be marked in English), statements as concise and academic as possible, do not repeat the content of the previous <summary>, the value of the use of the original numbers, be sure to strictly follow the format, the corresponding content output to xxx, in accordance with \n line feed, ....... means fill in according to the actual requirements, if not, you can not write.
|
557 |
-
"""
|
558 |
-
|
559 |
-
|
560 |
-
|
561 |
-
|
|
|
|
|
|
|
562 |
)
|
563 |
-
|
564 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
565 |
|
566 |
@tenacity.retry(wait=tenacity.wait_exponential(multiplier=1, min=4, max=10),
|
567 |
stop=tenacity.stop_after_attempt(5),
|
568 |
reraise=True)
|
569 |
-
def chat_method(self, text):
|
|
|
570 |
method_prompt_token = 650
|
571 |
text_token = len(self.encoding.encode(text))
|
572 |
clip_text_index = int(len(text)*(self.max_token_num-method_prompt_token)/text_token)
|
573 |
-
clip_text = text[:clip_text_index]
|
574 |
-
|
575 |
-
|
576 |
-
|
|
|
577 |
7. Describe in detail the methodological idea of this article. Be sure to use Chinese answers (proper nouns need to be marked in English). For example, its steps are.
|
578 |
- (1):...
|
579 |
- (2):...
|
@@ -587,26 +584,42 @@ class Reader:
|
|
587 |
....... \n\n
|
588 |
|
589 |
Be sure to use Chinese answers (proper nouns need to be marked in English), statements as concise and academic as possible, do not repeat the content of the previous <summary>, the value of the use of the original numbers, be sure to strictly follow the format, the corresponding content output to xxx, in accordance with \n line feed, ....... means fill in according to the actual requirements, if not, you can not write.
|
590 |
-
"""
|
591 |
-
|
592 |
-
|
593 |
-
|
594 |
-
|
|
|
|
|
595 |
)
|
596 |
-
|
597 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
598 |
|
599 |
@tenacity.retry(wait=tenacity.wait_exponential(multiplier=1, min=4, max=10),
|
600 |
stop=tenacity.stop_after_attempt(5),
|
601 |
reraise=True)
|
602 |
-
def chat_summary(self, text):
|
|
|
603 |
summary_prompt_token = 1000
|
604 |
text_token = len(self.encoding.encode(text))
|
605 |
clip_text_index = int(len(text)*(self.max_token_num-summary_prompt_token)/text_token)
|
606 |
clip_text = text[:clip_text_index]
|
607 |
-
|
608 |
-
|
609 |
-
|
|
|
610 |
1. Mark the title of the paper (with Chinese translation)
|
611 |
2. list all the authors' names (use English)
|
612 |
3. mark the first author's affiliation (output Chinese translation only)
|
@@ -630,14 +643,29 @@ class Reader:
|
|
630 |
- (4):xxx.\n\n
|
631 |
|
632 |
Be sure to use Chinese answers (proper nouns need to be marked in English), statements as concise and academic as possible, do not have too much repetitive information, numerical values using the original numbers, be sure to strictly follow the format, the corresponding content output to xxx, in accordance with \n line feed.
|
633 |
-
"""
|
634 |
-
|
635 |
-
|
636 |
-
|
637 |
-
|
|
|
|
|
|
|
638 |
)
|
639 |
-
|
640 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
641 |
|
642 |
def export_to_markdown(self, text, file_name, mode='w'):
|
643 |
# 使用markdown模块的convert方法,将文本转换为html格式
|
@@ -653,16 +681,10 @@ class Reader:
|
|
653 |
print(f"Query: {self.query}")
|
654 |
print(f"Sort: {self.sort}")
|
655 |
|
656 |
-
def upload_pdf(
|
657 |
# 检查两个输入都不为空
|
658 |
-
|
659 |
-
if api_keys:
|
660 |
-
api_key_list = api_keys.split(',')
|
661 |
-
elif not api_keys and valid_api_keys!=[]:
|
662 |
-
api_key_list = valid_api_keys
|
663 |
-
if not text or not file or not api_key_list:
|
664 |
return "两个输入都不能为空,请输入字符并上传 PDF 文件!"
|
665 |
-
|
666 |
# 判断PDF文件
|
667 |
#if file and file.name.split(".")[-1].lower() != "pdf":
|
668 |
# return '请勿上传非 PDF 文件!'
|
@@ -670,29 +692,22 @@ def upload_pdf(api_keys, text, model_name, p, temperature, file):
|
|
670 |
section_list = text.split(',')
|
671 |
paper_list = [Paper(path=file, sl=section_list)]
|
672 |
# 创建一个Reader对象
|
673 |
-
|
674 |
-
|
675 |
-
sum_info, cost = reader.summary_with_chat(paper_list=paper_list) # type: ignore
|
676 |
return cost, sum_info
|
677 |
|
678 |
api_title = "api-key可用验证"
|
679 |
api_description = '''<div align='left'>
|
680 |
-
|
681 |
<img src='https://visitor-badge.laobi.icu/badge?page_id=https://huggingface.co/spaces/wangrongsheng/ChatPaper'>
|
682 |
-
|
683 |
<img align='right' src='https://i.328888.xyz/2023/03/12/vH9dU.png' width="150">
|
684 |
-
|
685 |
Use ChatGPT to summary the papers.Star our Github [🌟ChatPaper](https://github.com/kaixindelele/ChatPaper) .
|
686 |
-
|
687 |
💗如果您觉得我们的项目对您有帮助,还请您给我们一些鼓励!💗
|
688 |
-
|
689 |
🔴请注意:千万不要用于严肃的学术场景,只能用于论文阅读前的初筛!
|
690 |
-
|
691 |
</div>
|
692 |
'''
|
693 |
|
694 |
api_input = [
|
695 |
-
gradio.inputs.Textbox(label="请输入你的
|
696 |
]
|
697 |
api_gui = gradio.Interface(fn=valid_apikey, inputs=api_input, outputs="text", title=api_title, description=api_description)
|
698 |
|
@@ -700,26 +715,20 @@ api_gui = gradio.Interface(fn=valid_apikey, inputs=api_input, outputs="text", ti
|
|
700 |
title = "ChatPaper"
|
701 |
# 描述
|
702 |
description = '''<div align='left'>
|
703 |
-
|
704 |
<img src='https://visitor-badge.laobi.icu/badge?page_id=https://huggingface.co/spaces/wangrongsheng/ChatPaper'>
|
705 |
-
|
706 |
<img align='right' src='https://i.328888.xyz/2023/03/12/vH9dU.png' width="150">
|
707 |
-
|
708 |
Use ChatGPT to summary the papers.Star our Github [🌟ChatPaper](https://github.com/kaixindelele/ChatPaper) .
|
709 |
-
|
710 |
💗如果您觉得我们的项目对您有帮助,还请您给我们一些鼓励!💗
|
711 |
-
|
712 |
🔴请注意:千万不要用于严肃的学术场景,只能用于论文阅读前的初筛!
|
713 |
-
|
714 |
</div>
|
715 |
'''
|
716 |
# 创建Gradio界面
|
717 |
ip = [
|
718 |
-
gradio.inputs.Textbox(label="请输入你的
|
719 |
gradio.inputs.Textbox(label="请输入论文大标题索引(用英文逗号隔开,必填)", default="'Abstract,Introduction,Related Work,Background,Preliminary,Problem Formulation,Methods,Methodology,Method,Approach,Approaches,Materials and Methods,Experiment Settings,Experiment,Experimental Results,Evaluation,Experiments,Results,Findings,Data Analysis,Discussion,Results and Discussion,Conclusion,References'"),
|
720 |
gradio.inputs.Radio(choices=["gpt-3.5-turbo", "gpt-3.5-turbo-0301"], default="gpt-3.5-turbo", label="Select model"),
|
721 |
gradio.inputs.Slider(minimum=-0, maximum=1.0, default=1.0, step=0.05, label="Top-p (nucleus sampling)"),
|
722 |
-
gradio.inputs.Slider(minimum=-0, maximum=5.0, default=0
|
723 |
gradio.inputs.File(label="请上传论文PDF(必填)")
|
724 |
]
|
725 |
|
|
|
13 |
import markdown
|
14 |
import json
|
15 |
import tiktoken
|
16 |
+
|
|
|
17 |
def parse_text(text):
|
18 |
lines = text.split("\n")
|
19 |
for i,line in enumerate(lines):
|
|
|
30 |
lines[i] = '<br/>'+line.replace(" ", " ")
|
31 |
return "".join(lines)
|
32 |
|
33 |
+
def get_response(system, context, myKey, raw = False):
|
34 |
+
openai.api_key = myKey
|
35 |
+
response = openai.ChatCompletion.create(
|
36 |
+
model="gpt-3.5-turbo",
|
37 |
+
messages=[system, *context],
|
38 |
+
)
|
39 |
+
openai.api_key = ""
|
40 |
+
if raw:
|
41 |
+
return response
|
42 |
+
else:
|
43 |
+
message = response["choices"][0]["message"]["content"]
|
44 |
+
message_with_stats = f'{message}'
|
45 |
+
return message, parse_text(message_with_stats)
|
|
|
|
|
46 |
|
47 |
+
def valid_apikey(api_key):
|
48 |
try:
|
49 |
+
get_response({"role": "system", "content": "You are a helpful assistant."}, [{"role": "user", "content": "test"}], api_key)
|
50 |
+
return "可用的api-key"
|
|
|
|
|
|
|
51 |
except:
|
52 |
+
return "无效的api-key"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
53 |
|
54 |
class Paper:
|
55 |
def __init__(self, path, title='', url='', abs='', authers=[], sl=[]):
|
|
|
303 |
def __init__(self, key_word='', query='', filter_keys='',
|
304 |
root_path='./',
|
305 |
gitee_key='',
|
306 |
+
sort=arxiv.SortCriterion.SubmittedDate, user_name='defualt', language='cn', key='', model_name="gpt-3.5-turbo", p=1.0, temperature=1.0):
|
307 |
+
self.key = str(key) # OpenAI key
|
|
|
308 |
self.user_name = user_name # 读者姓名
|
309 |
self.key_word = key_word # 读者感兴趣的关键词
|
310 |
self.query = query # 读者输入的搜索查询
|
|
|
435 |
|
436 |
return image_url
|
437 |
|
438 |
+
def summary_with_chat(self, paper_list, key, model_name, p, temperature):
|
439 |
htmls = []
|
440 |
utoken = 0
|
441 |
ctoken = 0
|
|
|
451 |
text += list(paper.section_text_dict.values())[0]
|
452 |
#max_token = 2500 * 4
|
453 |
#text = text[:max_token]
|
454 |
+
chat_summary_text, utoken1, ctoken1, ttoken1 = self.chat_summary(text=text, key=str(key), model_name=str(model_name), p=p, temperature=temperature)
|
455 |
htmls.append(chat_summary_text)
|
456 |
|
457 |
# TODO 往md文档中插入论文里的像素最大的一张图片,这个方案可以弄的更加智能一些:
|
|
|
469 |
# methods
|
470 |
method_text += paper.section_text_dict[method_key]
|
471 |
text = summary_text + "\n<Methods>:\n" + method_text
|
472 |
+
chat_method_text, utoken2, ctoken2, ttoken2 = self.chat_method(text=text, key=str(key), model_name=str(model_name), p=p, temperature=temperature)
|
473 |
htmls.append(chat_method_text)
|
474 |
else:
|
475 |
chat_method_text = ''
|
|
|
492 |
text = summary_text + "\n <Conclusion>:\n" + conclusion_text
|
493 |
else:
|
494 |
text = summary_text
|
495 |
+
chat_conclusion_text, utoken3, ctoken3, ttoken3 = self.chat_conclusion(text=text, key=str(key), model_name=str(model_name), p=p, temperature=temperature)
|
496 |
htmls.append(chat_conclusion_text)
|
497 |
htmls.append("\n")
|
498 |
# token统计
|
|
|
507 |
"cost": str(cost),
|
508 |
}
|
509 |
md_text = "\n".join(htmls)
|
510 |
+
|
511 |
return markdown.markdown(md_text), pos_count
|
512 |
|
513 |
|
514 |
@tenacity.retry(wait=tenacity.wait_exponential(multiplier=1, min=4, max=10),
|
515 |
stop=tenacity.stop_after_attempt(5),
|
516 |
reraise=True)
|
517 |
+
def chat_conclusion(self, text, key, model_name, p, temperature):
|
518 |
+
openai.api_key = key
|
519 |
conclusion_prompt_token = 650
|
520 |
text_token = len(self.encoding.encode(text))
|
521 |
clip_text_index = int(len(text)*(self.max_token_num-conclusion_prompt_token)/text_token)
|
522 |
+
clip_text = text[:clip_text_index]
|
523 |
+
|
524 |
+
messages=[
|
525 |
+
{"role": "system", "content": "You are a reviewer in the field of ["+self.key_word+"] and you need to critically review this article"}, # chatgpt 角色
|
526 |
+
{"role": "assistant", "content": "This is the <summary> and <conclusion> part of an English literature, where <summary> you have already summarized, but <conclusion> part, I need your help to summarize the following questions:"+clip_text}, # 背景知识,可以参考OpenReview的审稿流程
|
527 |
+
{"role": "user", "content": """
|
528 |
8. Make the following summary.Be sure to use Chinese answers (proper nouns need to be marked in English).
|
529 |
- (1):What is the significance of this piece of work?
|
530 |
- (2):Summarize the strengths and weaknesses of this article in three dimensions: innovation point, performance, and workload.
|
|
|
535 |
- (2):Innovation point: xxx; Performance: xxx; Workload: xxx;\n
|
536 |
|
537 |
Be sure to use Chinese answers (proper nouns need to be marked in English), statements as concise and academic as possible, do not repeat the content of the previous <summary>, the value of the use of the original numbers, be sure to strictly follow the format, the corresponding content output to xxx, in accordance with \n line feed, ....... means fill in according to the actual requirements, if not, you can not write.
|
538 |
+
"""},
|
539 |
+
]
|
540 |
+
response = openai.ChatCompletion.create(
|
541 |
+
model=model_name,
|
542 |
+
# prompt需要用英语替换,少占用token。
|
543 |
+
messages=messages,
|
544 |
+
temperature=temperature, # What sampling temperature to use, between 0 and 2. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic.
|
545 |
+
top_p=p # An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered.
|
546 |
)
|
547 |
+
|
548 |
+
result = ''
|
549 |
+
for choice in response.choices:
|
550 |
+
result += choice.message.content
|
551 |
+
#print("prompt_token_used:", response.usage.prompt_tokens,
|
552 |
+
# "completion_token_used:", response.usage.completion_tokens,
|
553 |
+
# "total_token_used:", response.usage.total_tokens)
|
554 |
+
#print("response_time:", response.response_ms/1000.0, 's')
|
555 |
+
usage_token = response.usage.prompt_tokens
|
556 |
+
com_token = response.usage.completion_tokens
|
557 |
+
total_token = response.usage.total_tokens
|
558 |
+
|
559 |
+
return result, usage_token, com_token, total_token
|
560 |
|
561 |
@tenacity.retry(wait=tenacity.wait_exponential(multiplier=1, min=4, max=10),
|
562 |
stop=tenacity.stop_after_attempt(5),
|
563 |
reraise=True)
|
564 |
+
def chat_method(self, text, key, model_name, p, temperature):
|
565 |
+
openai.api_key = key
|
566 |
method_prompt_token = 650
|
567 |
text_token = len(self.encoding.encode(text))
|
568 |
clip_text_index = int(len(text)*(self.max_token_num-method_prompt_token)/text_token)
|
569 |
+
clip_text = text[:clip_text_index]
|
570 |
+
messages=[
|
571 |
+
{"role": "system", "content": "You are a researcher in the field of ["+self.key_word+"] who is good at summarizing papers using concise statements"}, # chatgpt 角色
|
572 |
+
{"role": "assistant", "content": "This is the <summary> and <Method> part of an English document, where <summary> you have summarized, but the <Methods> part, I need your help to read and summarize the following questions."+clip_text}, # 背景知识
|
573 |
+
{"role": "user", "content": """
|
574 |
7. Describe in detail the methodological idea of this article. Be sure to use Chinese answers (proper nouns need to be marked in English). For example, its steps are.
|
575 |
- (1):...
|
576 |
- (2):...
|
|
|
584 |
....... \n\n
|
585 |
|
586 |
Be sure to use Chinese answers (proper nouns need to be marked in English), statements as concise and academic as possible, do not repeat the content of the previous <summary>, the value of the use of the original numbers, be sure to strictly follow the format, the corresponding content output to xxx, in accordance with \n line feed, ....... means fill in according to the actual requirements, if not, you can not write.
|
587 |
+
"""},
|
588 |
+
]
|
589 |
+
response = openai.ChatCompletion.create(
|
590 |
+
model=model_name,
|
591 |
+
messages=messages,
|
592 |
+
temperature=temperature, # What sampling temperature to use, between 0 and 2. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic.
|
593 |
+
top_p=p # An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered.
|
594 |
)
|
595 |
+
|
596 |
+
result = ''
|
597 |
+
for choice in response.choices:
|
598 |
+
result += choice.message.content
|
599 |
+
print("method_result:\n", result)
|
600 |
+
#print("prompt_token_used:", response.usage.prompt_tokens,
|
601 |
+
# "completion_token_used:", response.usage.completion_tokens,
|
602 |
+
# "total_token_used:", response.usage.total_tokens)
|
603 |
+
#print("response_time:", response.response_ms/1000.0, 's')
|
604 |
+
usage_token = response.usage.prompt_tokens
|
605 |
+
com_token = response.usage.completion_tokens
|
606 |
+
total_token = response.usage.total_tokens
|
607 |
+
|
608 |
+
return result, usage_token, com_token, total_token
|
609 |
|
610 |
@tenacity.retry(wait=tenacity.wait_exponential(multiplier=1, min=4, max=10),
|
611 |
stop=tenacity.stop_after_attempt(5),
|
612 |
reraise=True)
|
613 |
+
def chat_summary(self, text, key, model_name, p, temperature):
|
614 |
+
openai.api_key = key
|
615 |
summary_prompt_token = 1000
|
616 |
text_token = len(self.encoding.encode(text))
|
617 |
clip_text_index = int(len(text)*(self.max_token_num-summary_prompt_token)/text_token)
|
618 |
clip_text = text[:clip_text_index]
|
619 |
+
messages=[
|
620 |
+
{"role": "system", "content": "You are a researcher in the field of ["+self.key_word+"] who is good at summarizing papers using concise statements"},
|
621 |
+
{"role": "assistant", "content": "This is the title, author, link, abstract and introduction of an English document. I need your help to read and summarize the following questions: "+clip_text},
|
622 |
+
{"role": "user", "content": """
|
623 |
1. Mark the title of the paper (with Chinese translation)
|
624 |
2. list all the authors' names (use English)
|
625 |
3. mark the first author's affiliation (output Chinese translation only)
|
|
|
643 |
- (4):xxx.\n\n
|
644 |
|
645 |
Be sure to use Chinese answers (proper nouns need to be marked in English), statements as concise and academic as possible, do not have too much repetitive information, numerical values using the original numbers, be sure to strictly follow the format, the corresponding content output to xxx, in accordance with \n line feed.
|
646 |
+
"""},
|
647 |
+
]
|
648 |
+
|
649 |
+
response = openai.ChatCompletion.create(
|
650 |
+
model=model_name,
|
651 |
+
messages=messages,
|
652 |
+
temperature=temperature, # What sampling temperature to use, between 0 and 2. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic.
|
653 |
+
top_p=p # An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered.
|
654 |
)
|
655 |
+
|
656 |
+
result = ''
|
657 |
+
for choice in response.choices:
|
658 |
+
result += choice.message.content
|
659 |
+
print("summary_result:\n", result)
|
660 |
+
#print("prompt_token_used:", response.usage.prompt_tokens,
|
661 |
+
# "completion_token_used:", response.usage.completion_tokens,
|
662 |
+
# "total_token_used:", response.usage.total_tokens)
|
663 |
+
#print("response_time:", response.response_ms/1000.0, 's')
|
664 |
+
usage_token = response.usage.prompt_tokens
|
665 |
+
com_token = response.usage.completion_tokens
|
666 |
+
total_token = response.usage.total_tokens
|
667 |
+
|
668 |
+
return result, usage_token, com_token, total_token
|
669 |
|
670 |
def export_to_markdown(self, text, file_name, mode='w'):
|
671 |
# 使用markdown模块的convert方法,将文本转换为html格式
|
|
|
681 |
print(f"Query: {self.query}")
|
682 |
print(f"Sort: {self.sort}")
|
683 |
|
684 |
+
def upload_pdf(key, text, model_name, p, temperature, file):
|
685 |
# 检查两个输入都不为空
|
686 |
+
if not key or not text or not file:
|
|
|
|
|
|
|
|
|
|
|
687 |
return "两个输入都不能为空,请输入字符并上传 PDF 文件!"
|
|
|
688 |
# 判断PDF文件
|
689 |
#if file and file.name.split(".")[-1].lower() != "pdf":
|
690 |
# return '请勿上传非 PDF 文件!'
|
|
|
692 |
section_list = text.split(',')
|
693 |
paper_list = [Paper(path=file, sl=section_list)]
|
694 |
# 创建一个Reader对象
|
695 |
+
reader = Reader()
|
696 |
+
sum_info, cost = reader.summary_with_chat(paper_list=paper_list, key=key, model_name=model_name, p=p, temperature=temperature)
|
|
|
697 |
return cost, sum_info
|
698 |
|
699 |
api_title = "api-key可用验证"
|
700 |
api_description = '''<div align='left'>
|
|
|
701 |
<img src='https://visitor-badge.laobi.icu/badge?page_id=https://huggingface.co/spaces/wangrongsheng/ChatPaper'>
|
|
|
702 |
<img align='right' src='https://i.328888.xyz/2023/03/12/vH9dU.png' width="150">
|
|
|
703 |
Use ChatGPT to summary the papers.Star our Github [🌟ChatPaper](https://github.com/kaixindelele/ChatPaper) .
|
|
|
704 |
💗如果您觉得我们的项目对您有帮助,还请您给我们一些鼓励!💗
|
|
|
705 |
🔴请注意:千万不要用于严肃的学术场景,只能用于论文阅读前的初筛!
|
|
|
706 |
</div>
|
707 |
'''
|
708 |
|
709 |
api_input = [
|
710 |
+
gradio.inputs.Textbox(label="请输入你的api-key(必填)", default="", type='password')
|
711 |
]
|
712 |
api_gui = gradio.Interface(fn=valid_apikey, inputs=api_input, outputs="text", title=api_title, description=api_description)
|
713 |
|
|
|
715 |
title = "ChatPaper"
|
716 |
# 描述
|
717 |
description = '''<div align='left'>
|
|
|
718 |
<img src='https://visitor-badge.laobi.icu/badge?page_id=https://huggingface.co/spaces/wangrongsheng/ChatPaper'>
|
|
|
719 |
<img align='right' src='https://i.328888.xyz/2023/03/12/vH9dU.png' width="150">
|
|
|
720 |
Use ChatGPT to summary the papers.Star our Github [🌟ChatPaper](https://github.com/kaixindelele/ChatPaper) .
|
|
|
721 |
💗如果您觉得我们的项目对您有帮助,还请您给我们一些鼓励!💗
|
|
|
722 |
🔴请注意:千万不要用于严肃的学术场景,只能用于论文阅读前的初筛!
|
|
|
723 |
</div>
|
724 |
'''
|
725 |
# 创建Gradio界面
|
726 |
ip = [
|
727 |
+
gradio.inputs.Textbox(label="请输入你的api-key(必填)", default="", type='password'),
|
728 |
gradio.inputs.Textbox(label="请输入论文大标题索引(用英文逗号隔开,必填)", default="'Abstract,Introduction,Related Work,Background,Preliminary,Problem Formulation,Methods,Methodology,Method,Approach,Approaches,Materials and Methods,Experiment Settings,Experiment,Experimental Results,Evaluation,Experiments,Results,Findings,Data Analysis,Discussion,Results and Discussion,Conclusion,References'"),
|
729 |
gradio.inputs.Radio(choices=["gpt-3.5-turbo", "gpt-3.5-turbo-0301"], default="gpt-3.5-turbo", label="Select model"),
|
730 |
gradio.inputs.Slider(minimum=-0, maximum=1.0, default=1.0, step=0.05, label="Top-p (nucleus sampling)"),
|
731 |
+
gradio.inputs.Slider(minimum=-0, maximum=5.0, default=1.0, step=0.1, label="Temperature"),
|
732 |
gradio.inputs.File(label="请上传论文PDF(必填)")
|
733 |
]
|
734 |
|