import numpy as np import os import re import jieba from io import BytesIO import datetime import time import openai, tenacity import argparse import configparser import json import tiktoken import PyPDF2 import gradio def contains_chinese(text): for ch in text: if u'\u4e00' <= ch <= u'\u9fff': return True return False def insert_sentence(text, sentence, interval): lines = text.split('\n') new_lines = [] for line in lines: if contains_chinese(line): words = list(jieba.cut(line)) separator = '' else: words = line.split() separator = ' ' new_words = [] count = 0 for word in words: new_words.append(word) count += 1 if count % interval == 0: new_words.append(sentence) new_lines.append(separator.join(new_words)) return '\n'.join(new_lines) # 定义Reviewer类 class Reviewer: # 初始化方法,设置属性 def __init__(self, api, review_format, paper_pdf, language): self.api = api self.review_format = review_format self.language = language self.paper_pdf = paper_pdf self.max_token_num = 14097 self.encoding = tiktoken.get_encoding("gpt2") def review_by_chatgpt(self, paper_list): text = self.extract_chapter(self.paper_pdf) chat_review_text, total_token_used = self.chat_review(text=text) return chat_review_text, total_token_used @tenacity.retry(wait=tenacity.wait_exponential(multiplier=1, min=4, max=10), stop=tenacity.stop_after_attempt(5), reraise=True) def chat_review(self, text): openai.api_key = self.api # 读取api review_prompt_token = 1000 try: text_token = len(self.encoding.encode(text)) except: text_token = 3000 input_text_index = int(len(text)*(self.max_token_num-review_prompt_token)/(text_token+1)) input_text = "This is the paper for your review:" + text[:input_text_index] messages=[ {"role": "system", "content": "You are a professional reviewer. Now I will give you a paper. You need to give a complete review opinion according to the following requirements and format:"+ self.review_format + "Be sure to use {} answers".format(self.language)} , {"role": "user", "content": input_text + " Translate the output into {}.".format(self.language)}, ] try: response = openai.ChatCompletion.create( model="gpt-3.5-turbo-16k", messages=messages, temperature=0.7 ) result = '' for choice in response.choices: result += choice.message.content # result = insert_sentence(result, '**Generated by ChatGPT, no copying allowed!**', 50) result += "\n\n⚠声明/Ethics statement:\n--以上内容仅供参考,请合理使用本工具!\n--The above content is for reference only. Please use this tool responsibly!" usage = response.usage.total_tokens except Exception as e: # 处理其他的异常 result = "⚠:非常抱歉>_<,生了一个错误:"+ str(e) usage = 'xxxxx' print("********"*10) print(result) print("********"*10) return result, usage def extract_chapter(self, pdf_path): file_object = BytesIO(pdf_path) pdf_reader = PyPDF2.PdfReader(file_object) # 获取PDF的总页数 num_pages = len(pdf_reader.pages) # 初始化提取状态和提取文本 extraction_started = False extracted_text = "" # 遍历PDF中的每一页 for page_number in range(num_pages): page = pdf_reader.pages[page_number] page_text = page.extract_text() # 开始提取 extraction_started = True page_number_start = page_number # 如果提取已开始,将页面文本添加到提取文本中 if extraction_started: extracted_text += page_text # 停止提取 if page_number_start + 1 < page_number: break return extracted_text def main(api, review_format, paper_pdf, language): start_time = time.time() comments = '' output2 = '' if not api or not review_format or not paper_pdf: comments = "⚠:API-key或审稿要求或论文pdf未输入!请检测!" output2 = "⚠:API-key或审稿要求或论文pdf未输入!请检测!" # 判断PDF文件 else: # 创建一个Reader对象 reviewer1 = Reviewer(api, review_format, paper_pdf, language) # 开始判断是路径还是文件: comments, total_token_used = reviewer1.review_by_chatgpt(paper_list=paper_pdf) time_used = time.time() - start_time output2 ="使用token数:"+ str(total_token_used)+"\n花费时间:"+ str(round(time_used, 2)) +"秒" return comments, output2 ######################################################################################################## # 标题 title = "论文真实性验证" # 描述 description = '''
ChatReviewer是一款基于ChatGPT-3.5的API开发的智能论文分析助手。
其用途如下:
⭐️针对论文内容真实性记性验证,并针对文章内容随机生成问题,提供给学生进行回答,从而让评审人员判断文章是否为学生自行创作。