|
import numpy as np |
|
import os |
|
import re |
|
import jieba |
|
from io import BytesIO |
|
import datetime |
|
import time |
|
import openai, tenacity |
|
import argparse |
|
import configparser |
|
import json |
|
import tiktoken |
|
import PyPDF2 |
|
import gradio |
|
|
|
|
|
def contains_chinese(text): |
|
for ch in text: |
|
if u'\u4e00' <= ch <= u'\u9fff': |
|
return True |
|
return False |
|
|
|
def insert_sentence(text, sentence, interval): |
|
lines = text.split('\n') |
|
new_lines = [] |
|
|
|
for line in lines: |
|
if contains_chinese(line): |
|
words = list(jieba.cut(line)) |
|
separator = '' |
|
else: |
|
words = line.split() |
|
separator = ' ' |
|
|
|
new_words = [] |
|
count = 0 |
|
|
|
for word in words: |
|
new_words.append(word) |
|
count += 1 |
|
|
|
if count % interval == 0: |
|
new_words.append(sentence) |
|
|
|
new_lines.append(separator.join(new_words)) |
|
|
|
return '\n'.join(new_lines) |
|
|
|
|
|
class Reviewer: |
|
|
|
def __init__(self, api, review_format, paper_pdf, language): |
|
self.api = api |
|
self.review_format = review_format |
|
|
|
self.language = language |
|
self.paper_pdf = paper_pdf |
|
self.max_token_num = 14097 |
|
self.encoding = tiktoken.get_encoding("gpt2") |
|
|
|
|
|
def review_by_chatgpt(self, paper_list): |
|
text = self.extract_chapter(self.paper_pdf) |
|
chat_review_text, total_token_used = self.chat_review(text=text) |
|
return chat_review_text, total_token_used |
|
|
|
|
|
|
|
@tenacity.retry(wait=tenacity.wait_exponential(multiplier=1, min=4, max=10), |
|
stop=tenacity.stop_after_attempt(5), |
|
reraise=True) |
|
def chat_review(self, text): |
|
openai.api_key = self.api |
|
review_prompt_token = 1000 |
|
try: |
|
text_token = len(self.encoding.encode(text)) |
|
except: |
|
text_token = 3000 |
|
input_text_index = int(len(text)*(self.max_token_num-review_prompt_token)/(text_token+1)) |
|
input_text = "This is the paper for your review:" + text[:input_text_index] |
|
messages=[ |
|
{"role": "system", "content": "You are a professional reviewer. Now I will give you a paper. You need to give a complete review opinion according to the following requirements and format:"+ self.review_format + "Be sure to use {} answers".format(self.language)} , |
|
{"role": "user", "content": input_text + " Translate the output into {}.".format(self.language)}, |
|
] |
|
try: |
|
response = openai.ChatCompletion.create( |
|
model="gpt-3.5-turbo-16k", |
|
messages=messages, |
|
temperature=0.7 |
|
) |
|
result = '' |
|
for choice in response.choices: |
|
result += choice.message.content |
|
|
|
result += "\n\n⚠声明/Ethics statement:\n--以上内容仅供参考,请合理使用本工具!\n--The above content is for reference only. Please use this tool responsibly!" |
|
usage = response.usage.total_tokens |
|
except Exception as e: |
|
|
|
result = "⚠:非常抱歉>_<,生了一个错误:"+ str(e) |
|
usage = 'xxxxx' |
|
print("********"*10) |
|
print(result) |
|
print("********"*10) |
|
return result, usage |
|
|
|
|
|
|
|
|
|
|
|
def extract_chapter(self, pdf_path): |
|
file_object = BytesIO(pdf_path) |
|
pdf_reader = PyPDF2.PdfReader(file_object) |
|
|
|
num_pages = len(pdf_reader.pages) |
|
|
|
extraction_started = False |
|
extracted_text = "" |
|
|
|
for page_number in range(num_pages): |
|
page = pdf_reader.pages[page_number] |
|
page_text = page.extract_text() |
|
|
|
|
|
extraction_started = True |
|
page_number_start = page_number |
|
|
|
if extraction_started: |
|
extracted_text += page_text |
|
|
|
if page_number_start + 1 < page_number: |
|
break |
|
return extracted_text |
|
|
|
def main(api, review_format, paper_pdf, language): |
|
start_time = time.time() |
|
comments = '' |
|
output2 = '' |
|
if not api or not review_format or not paper_pdf: |
|
comments = "⚠:API-key或审稿要求或论文pdf未输入!请检测!" |
|
output2 = "⚠:API-key或审稿要求或论文pdf未输入!请检测!" |
|
|
|
else: |
|
|
|
reviewer1 = Reviewer(api, review_format, paper_pdf, language) |
|
|
|
comments, total_token_used = reviewer1.review_by_chatgpt(paper_list=paper_pdf) |
|
time_used = time.time() - start_time |
|
output2 ="使用token数:"+ str(total_token_used)+"\n花费时间:"+ str(round(time_used, 2)) +"秒" |
|
return comments, output2 |
|
|
|
|
|
|
|
|
|
|
|
title = "论文真实性验证" |
|
|
|
|
|
description = '''<div align='center'><p><strong>ChatReviewer是一款基于ChatGPT-3.5的API开发的智能论文分析助手。</strong></p> |
|
|
|
<p>其用途如下:</p> |
|
|
|
<p>⭐️针对论文内容真实性记性验证,并针对文章内容随机生成问题,提供给学生进行回答,从而让评审人员判断文章是否为学生自行创作。</p> |
|
|
|
</div> |
|
''' |
|
|
|
|
|
inp = [gradio.inputs.Textbox(label="请输入你的API-key(sk开头的字符串)", |
|
default="", |
|
type='password'), |
|
gradio.inputs.Textbox(lines=5, |
|
label="请输入特定的分析要求和格式(否则为默认格式)", |
|
default="""Please provide a numbered, specific, detailed, and clear list of questions that is customized to my essay based on the following points: |
|
(1) My motivation for writing this specific essay |
|
(2) Research I've done to support the settings I created in this specific essay |
|
(3) Character development I've done for the characters in this specific essay |
|
(4) My word choices in this specific essay |
|
(5) My writing style in the specific essay |
|
(6) Obstacles I faced in the specific writing process |
|
(7) My takeaways from the specific writing process |
|
|
|
These questions should challenge the details of the creative process, the evaluation settings, or additional qualities supporting the my work. The questions should be formulated in a way that allows for a comprehensive evaluation of my essay's originality, creativity and quality after the I have answered them during the oral defense.""" |
|
), |
|
gradio.inputs.File(label="请上传论文PDF文件(请务必等pdf上传完成后再点击Submit!)",type="bytes"), |
|
gradio.inputs.Radio(choices=["English", "Chinese", "French", "German","Japenese"], |
|
default="English", |
|
label="选择输出语言"), |
|
] |
|
|
|
chat_reviewer_gui = gradio.Interface(fn=main, |
|
inputs=inp, |
|
outputs = [gradio.Textbox(lines=25, label="分析结果"), gradio.Textbox(lines=2, label="资源统计")], |
|
title=title, |
|
description=description) |
|
|
|
|
|
chat_reviewer_gui .launch(quiet=True, show_api=False) |