ChatPDF-GUI / gpt_reader /pdf_reader.py
epochs-demos's picture
Duplicate from Alpaca233/ChatPDF-GUI
f87ab8f
raw
history blame contribute delete
No virus
5.43 kB
from PyPDF2 import PdfReader
import openai
from .prompt import BASE_POINTS, READING_PROMT_V2
from .paper import Paper
from .model_interface import OpenAIModel
# Setting the API key to use the OpenAI API
class PaperReader:
"""
A class for summarizing research papers using the OpenAI API.
Attributes:
openai_key (str): The API key to use the OpenAI API.
token_length (int): The length of text to send to the API at a time.
model (str): The GPT model to use for summarization.
points_to_focus (str): The key points to focus on while summarizing.
verbose (bool): A flag to enable/disable verbose logging.
"""
def __init__(self, openai_key, token_length=4000, model="gpt-3.5-turbo",
points_to_focus=BASE_POINTS, verbose=False):
# Setting the API key to use the OpenAI API
openai.api_key = openai_key
# Initializing prompts for the conversation
self.init_prompt = READING_PROMT_V2.format(points_to_focus)
self.summary_prompt = 'You are a researcher helper bot. Now you need to read the summaries of a research paper.'
self.messages = [] # Initializing the conversation messages
self.summary_msg = [] # Initializing the summary messages
self.token_len = token_length # Setting the token length to use
self.keep_round = 2 # Rounds of previous dialogues to keep in conversation
self.model = model # Setting the GPT model to use
self.verbose = verbose # Flag to enable/disable verbose logging
self.model = OpenAIModel(api_key=openai_key, model=model)
def drop_conversation(self, msg):
# This method is used to drop previous messages from the conversation and keep only recent ones
if len(msg) >= (self.keep_round + 1) * 2 + 1:
new_msg = [msg[0]]
for i in range(3, len(msg)):
new_msg.append(msg[i])
return new_msg
else:
return msg
def send_msg(self, msg):
return self.model.send_msg(msg)
def _chat(self, message):
# This method is used to send a message and get a response from the OpenAI API
# Adding the user message to the conversation messages
self.messages.append({"role": "user", "content": message})
# Sending the messages to the API and getting the response
response = self.send_msg(self.messages)
# Adding the system response to the conversation messages
self.messages.append({"role": "system", "content": response})
# Dropping previous conversation messages to keep the conversation history short
self.messages = self.drop_conversation(self.messages)
# Returning the system response
return response
def summarize(self, paper: Paper):
# This method is used to summarize a given research paper
# Adding the initial prompt to the conversation messages
self.messages = [
{"role": "system", "content": self.init_prompt},
]
# Adding the summary prompt to the summary messages
self.summary_msg = [{"role": "system", "content": self.summary_prompt}]
# Reading and summarizing each part of the research paper
for (page_idx, part_idx, text) in paper.iter_pages():
print('page: {}, part: {}'.format(page_idx, part_idx))
# Sending the text to the API and getting the response
summary = self._chat('now I send you page {}, part {}:{}'.format(page_idx, part_idx, text))
# Logging the summary if verbose logging is enabled
if self.verbose:
print(summary)
# Adding the summary of the part to the summary messages
self.summary_msg.append({"role": "user", "content": '{}'.format(summary)})
# Adding a prompt for the user to summarize the whole paper to the summary messages
self.summary_msg.append({"role": "user", "content": 'Now please make a summary of the whole paper'})
# Sending the summary messages to the API and getting the response
result = self.send_msg(self.summary_msg)
# Returning the summary of the whole paper
return result
def read_pdf_and_summarize(self, pdf_path):
# This method is used to read a research paper from a PDF file and summarize it
# Creating a PdfReader object to read the PDF file
pdf_reader = PdfReader(pdf_path)
paper = Paper(pdf_reader)
# Summarizing the full text of the research paper and returning the summary
print('reading pdf finished')
summary = self.summarize(paper)
return summary
def get_summary_of_each_part(self):
# This method is used to get the summary of each part of the research paper
return self.summary_msg
def question(self, question):
# This method is used to ask a question after summarizing a paper
# Adding the question to the summary messages
self.summary_msg.append({"role": "user", "content": question})
# Sending the summary messages to the API and getting the response
response = self.send_msg(self.summary_msg)
# Adding the system response to the summary messages
self.summary_msg.append({"role": "system", "content": response})
# Returning the system response
return response