File size: 7,703 Bytes
5fbcf17
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7b9c400
5fbcf17
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ee7e585
5fbcf17
 
ee7e585
5fbcf17
 
 
ee7e585
5fbcf17
 
 
 
 
 
820271c
5fbcf17
 
 
5e6feaa
 
 
 
 
 
 
 
 
 
5fbcf17
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
import numpy as np
import os
import re
import jieba
from io import BytesIO
import datetime
import time
import openai, tenacity
import argparse
import configparser
import json
import tiktoken
import PyPDF2
import gradio


def contains_chinese(text):
    for ch in text:
        if u'\u4e00' <= ch <= u'\u9fff':
            return True
    return False

def insert_sentence(text, sentence, interval):
    lines = text.split('\n')
    new_lines = []

    for line in lines:
        if contains_chinese(line):
            words = list(jieba.cut(line))
            separator = ''
        else:
            words = line.split()
            separator = ' '

        new_words = []
        count = 0

        for word in words:
            new_words.append(word)
            count += 1

            if count % interval == 0:
                new_words.append(sentence)

        new_lines.append(separator.join(new_words))

    return '\n'.join(new_lines)
    
# 定义Reviewer类
class Reviewer:
    # 初始化方法,设置属性
    def __init__(self, api, review_format, paper_pdf, language):
        self.api = api
        self.review_format = review_format

        self.language = language
        self.paper_pdf = paper_pdf
        self.max_token_num = 14097
        self.encoding = tiktoken.get_encoding("gpt2")


    def review_by_chatgpt(self, paper_list):
        text = self.extract_chapter(self.paper_pdf)
        chat_review_text, total_token_used = self.chat_review(text=text)            
        return chat_review_text, total_token_used

   

    @tenacity.retry(wait=tenacity.wait_exponential(multiplier=1, min=4, max=10),
                    stop=tenacity.stop_after_attempt(5),
                    reraise=True)
    def chat_review(self, text):
        openai.api_key = self.api   # 读取api
        review_prompt_token = 1000        
        try:
            text_token = len(self.encoding.encode(text))
        except:
            text_token = 3000
        input_text_index = int(len(text)*(self.max_token_num-review_prompt_token)/(text_token+1))
        input_text = "This is the paper for your review:" + text[:input_text_index] 
        messages=[
                {"role": "system", "content": "You are a professional reviewer. Now I will give you a paper. You need to give a complete review opinion according to the following requirements and format:"+ self.review_format + "Be sure to use {} answers".format(self.language)} ,
                {"role": "user", "content": input_text + " Translate the output into {}.".format(self.language)},
            ]
        try:
            response = openai.ChatCompletion.create(
                model="gpt-3.5-turbo-16k",
                messages=messages,
                temperature=0.7
            )
            result = ''
            for choice in response.choices:
                result += choice.message.content 
            # result = insert_sentence(result, '**Generated by ChatGPT, no copying allowed!**', 50)
            result += "\n\n⚠声明/Ethics statement:\n--以上内容仅供参考,请合理使用本工具!\n--The above content is for reference only. Please use this tool responsibly!"
            usage = response.usage.total_tokens
        except Exception as e:  
        # 处理其他的异常  
            result = "⚠:非常抱歉>_<,生了一个错误:"+ str(e)
            usage  = 'xxxxx'
        print("********"*10)
        print(result)
        print("********"*10)      
        return result, usage     


        
        

    def extract_chapter(self, pdf_path):
        file_object = BytesIO(pdf_path)
        pdf_reader = PyPDF2.PdfReader(file_object)
        # 获取PDF的总页数
        num_pages = len(pdf_reader.pages)
        # 初始化提取状态和提取文本
        extraction_started = False
        extracted_text = ""
        # 遍历PDF中的每一页
        for page_number in range(num_pages):
            page = pdf_reader.pages[page_number]
            page_text = page.extract_text()

            # 开始提取
            extraction_started = True
            page_number_start = page_number
            # 如果提取已开始,将页面文本添加到提取文本中
            if extraction_started:
                extracted_text += page_text
                # 停止提取
                if page_number_start + 1 < page_number:
                    break
        return extracted_text

def main(api, review_format, paper_pdf, language):  
    start_time = time.time()
    comments = ''
    output2 = ''
    if not api or not review_format or not paper_pdf:
        comments =  "⚠:API-key或审稿要求或论文pdf未输入!请检测!"
        output2 =  "⚠:API-key或审稿要求或论文pdf未输入!请检测!"
    # 判断PDF文件
    else:
        # 创建一个Reader对象
        reviewer1 = Reviewer(api, review_format, paper_pdf, language)
        # 开始判断是路径还是文件:   
        comments, total_token_used = reviewer1.review_by_chatgpt(paper_list=paper_pdf)
        time_used = time.time() - start_time
        output2 ="使用token数:"+ str(total_token_used)+"\n花费时间:"+ str(round(time_used, 2)) +"秒"
    return comments, output2
        


########################################################################################################    
# 标题
title = "论文真实性验证"
# 描述

description = '''<div align='center'><p><strong>ChatReviewer是一款基于ChatGPT-3.5的API开发的智能论文分析助手。</strong></p>

<p>其用途如下:</p>

<p>⭐️针对论文内容真实性记性验证,并针对文章内容随机生成问题,提供给学生进行回答,从而让评审人员判断文章是否为学生自行创作。</p>

</div>
'''

# 创建Gradio界面
inp = [gradio.inputs.Textbox(label="请输入你的API-key(sk开头的字符串)",
                          default="",
                          type='password'),
    gradio.inputs.Textbox(lines=5,
        label="请输入特定的分析要求和格式(否则为默认格式)",
        default="""Please provide a numbered, specific, detailed, and clear list of questions that is customized to my essay based on the following points:
(1) My motivation for writing this specific essay
(2) Research I've done to support the settings I created in this specific essay
(3) Character development I've done for the characters in this specific essay
(4) My word choices in this specific essay
(5) My writing style in the specific essay
(6) Obstacles I faced in the specific writing process
(7) My takeaways from the specific writing process

These questions should challenge the details of the creative process, the evaluation settings, or additional qualities supporting the my work. The questions should be formulated in a way that allows for a comprehensive evaluation of my essay's originality, creativity and quality after the I have answered them during the oral defense."""
    ),
    gradio.inputs.File(label="请上传论文PDF文件(请务必等pdf上传完成后再点击Submit!)",type="bytes"),
    gradio.inputs.Radio(choices=["English", "Chinese", "French", "German","Japenese"],
                        default="English",
                        label="选择输出语言"),
]

chat_reviewer_gui = gradio.Interface(fn=main,
                                 inputs=inp,
                                 outputs = [gradio.Textbox(lines=25, label="分析结果"), gradio.Textbox(lines=2, label="资源统计")],
                                 title=title,
                                 description=description)

# Start server
chat_reviewer_gui .launch(quiet=True, show_api=False)