import numpy as np import os import re import jieba from io import BytesIO import datetime import time import openai, tenacity import argparse import configparser import json import tiktoken import PyPDF2 import gradio def contains_chinese(text): for ch in text: if u'\u4e00' <= ch <= u'\u9fff': return True return False def insert_sentence(text, sentence, interval): lines = text.split('\n') new_lines = [] for line in lines: if contains_chinese(line): words = list(jieba.cut(line)) separator = '' else: words = line.split() separator = ' ' new_words = [] count = 0 for word in words: new_words.append(word) count += 1 if count % interval == 0: new_words.append(sentence) new_lines.append(separator.join(new_words)) return '\n'.join(new_lines) # 定义Reviewer类 class Reviewer: # 初始化方法,设置属性 def __init__(self, api, review_format, paper_pdf, language): self.api = api self.review_format = review_format self.language = language self.paper_pdf = paper_pdf self.max_token_num = 12000 self.encoding = tiktoken.get_encoding("gpt2") def review_by_chatgpt(self, paper_list): text = self.extract_chapter(self.paper_pdf) chat_review_text, total_token_used = self.chat_review(text=text) return chat_review_text, total_token_used @tenacity.retry(wait=tenacity.wait_exponential(multiplier=1, min=4, max=10), stop=tenacity.stop_after_attempt(5), reraise=True) def chat_review(self, text): openai.api_key = self.api # 读取api review_prompt_token = 1000 try: text_token = len(self.encoding.encode(text)) except: text_token = 13000 input_text_index = int(len(text)*(self.max_token_num-review_prompt_token)/(text_token+1)) input_text = "This is the paper for your review:" + text[:input_text_index] messages=[ {"role": "system", "content": "You are a professional reviewer. Now I will give you a paper. You need to give a complete review opinion according to the following requirements and format:"+ self.review_format + "Be sure to use Chinese answers"} , {"role": "user", "content": input_text + " Translate the output into Chinese."}, ] try: response = openai.ChatCompletion.create( model="gpt-3.5-turbo-16k", messages=messages, temperature=0.7 ) result = '' for choice in response.choices: result += choice.message.content usage = response.usage.total_tokens except Exception as e: # 处理其他的异常 result = "⚠:非常抱歉>_<,生了一个错误:"+ str(e) usage = 'xxxxx' print("********"*10) print(result) print("********"*10) return result, usage def extract_chapter(self, pdf_path): file_object = BytesIO(pdf_path) pdf_reader = PyPDF2.PdfReader(file_object) # 获取PDF的总页数 num_pages = len(pdf_reader.pages) # 初始化提取状态和提取文本 extraction_started = False extracted_text = "" # 遍历PDF中的每一页 for page_number in range(num_pages): page = pdf_reader.pages[page_number] page_text = page.extract_text() # 开始提取 extraction_started = True page_number_start = page_number # 如果提取已开始,将页面文本添加到提取文本中 if extraction_started: extracted_text += page_text # 停止提取 if page_number_start + 1 < page_number: break return extracted_text def main(api, review_format, paper_pdf, language): start_time = time.time() comments = '' output2 = '' if not api or not review_format or not paper_pdf: comments = "⚠:API-key或审稿要求或论文pdf未输入!请检测!" output2 = "⚠:API-key或审稿要求或论文pdf未输入!请检测!" # 判断PDF文件 else: # 创建一个Reader对象 reviewer1 = Reviewer(api, review_format, paper_pdf, language) # 开始判断是路径还是文件: comments, total_token_used = reviewer1.review_by_chatgpt(paper_list=paper_pdf) time_used = time.time() - start_time output2 ="使用token数:"+ str(total_token_used)+"\n花费时间:"+ str(round(time_used, 2)) +"秒" return comments, output2 ######################################################################################################## # 标题 title = "🤖ChatReviewer🤖" # 描述 description = "" # 创建Gradio界面 inp = [gradio.inputs.Textbox(label="请输入你的API-key(sk开头的字符串)", default="", type='password'), gradio.inputs.Textbox(lines=5, label="请输入特定的分析要求和格式(否则为默认格式)", default="""* Overall Review Please briefly summarize the main points and contributions of this paper. xxx * Paper Strength Please provide a list of the strengths of this paper, including but not limited to: innovative and practical methodology, insightful empirical findings or in-depth theoretical analysis, well-structured review of relevant literature, and any other factors that may make the paper valuable to readers. (Maximum length: 2,000 characters) (1) xxx (2) xxx (3) xxx * Paper Weakness Please provide a numbered list of your main concerns regarding this paper (so authors could respond to the concerns individually). These may include, but are not limited to: inadequate implementation details for reproducing the study, limited evaluation and ablation studies for the proposed method, correctness of the theoretical analysis or experimental results, lack of comparisons or discussions with widely-known baselines in the field, lack of clarity in exposition, or any other factors that may impede the reader's understanding or benefit from the paper. Please kindly refrain from providing a general assessment of the paper's novelty without providing detailed explanations. (Maximum length: 2,000 characters) (1) xxx (2) xxx (3) xxx * Questions To Authors And Suggestions For Rebuttal Please provide a numbered list of specific and clear questions that pertain to the details of the proposed method, evaluation setting, or additional results that would aid in supporting the authors' claims. The questions should be formulated in a manner that, after the authors have answered them during the rebuttal, it would enable a more thorough assessment of the paper's quality. (Maximum length: 2,000 characters) *Overall score (1-10) The paper is scored on a scale of 1-10, with 10 being the full mark, and 6 stands for borderline accept. Then give the reason for your rating. xxx""" ), gradio.inputs.File(label="请上传论文PDF文件(请务必等pdf上传完成后再点击Submit!)",type="bytes"), ] chat_reviewer_gui = gradio.Interface(fn=main, inputs=inp, outputs = [gradio.Textbox(lines=25, label="分析结果"), gradio.Textbox(lines=2, label="资源统计")], title=title, description=description) # Start server chat_reviewer_gui .launch(quiet=True, show_api=False)