ChatPDF-GUI

Running

File size: 5,426 Bytes

f87ab8f

from PyPDF2 import PdfReader
import openai
from .prompt import BASE_POINTS, READING_PROMT_V2
from .paper import Paper
from .model_interface import OpenAIModel


# Setting the API key to use the OpenAI API
class PaperReader:

    """
    A class for summarizing research papers using the OpenAI API.

    Attributes:
        openai_key (str): The API key to use the OpenAI API.
        token_length (int): The length of text to send to the API at a time.
        model (str): The GPT model to use for summarization.
        points_to_focus (str): The key points to focus on while summarizing.
        verbose (bool): A flag to enable/disable verbose logging.

    """

    def __init__(self, openai_key, token_length=4000, model="gpt-3.5-turbo",
                 points_to_focus=BASE_POINTS, verbose=False):

        # Setting the API key to use the OpenAI API
        openai.api_key = openai_key

        # Initializing prompts for the conversation
        self.init_prompt = READING_PROMT_V2.format(points_to_focus)

        self.summary_prompt = 'You are a researcher helper bot. Now you need to read the summaries of a research paper.'
        self.messages = []  # Initializing the conversation messages
        self.summary_msg = []  # Initializing the summary messages
        self.token_len = token_length  # Setting the token length to use
        self.keep_round = 2  # Rounds of previous dialogues to keep in conversation
        self.model = model  # Setting the GPT model to use
        self.verbose = verbose  # Flag to enable/disable verbose logging
        self.model = OpenAIModel(api_key=openai_key, model=model)

    def drop_conversation(self, msg):
        # This method is used to drop previous messages from the conversation and keep only recent ones
        if len(msg) >= (self.keep_round + 1) * 2 + 1:
            new_msg = [msg[0]]
            for i in range(3, len(msg)):
                new_msg.append(msg[i])
            return new_msg
        else:
            return msg

    def send_msg(self, msg):
        return self.model.send_msg(msg)

    def _chat(self, message):
        # This method is used to send a message and get a response from the OpenAI API

        # Adding the user message to the conversation messages
        self.messages.append({"role": "user", "content": message})
        # Sending the messages to the API and getting the response
        response = self.send_msg(self.messages)
        # Adding the system response to the conversation messages
        self.messages.append({"role": "system", "content": response})
        # Dropping previous conversation messages to keep the conversation history short
        self.messages = self.drop_conversation(self.messages)
        # Returning the system response
        return response

    def summarize(self, paper: Paper):
        # This method is used to summarize a given research paper

        # Adding the initial prompt to the conversation messages
        self.messages = [
            {"role": "system", "content": self.init_prompt},
        ]
        # Adding the summary prompt to the summary messages
        self.summary_msg = [{"role": "system", "content": self.summary_prompt}]

         # Reading and summarizing each part of the research paper
        for (page_idx, part_idx, text) in paper.iter_pages():
            print('page: {}, part: {}'.format(page_idx, part_idx))
            # Sending the text to the API and getting the response
            summary = self._chat('now I send you page {}, part {}：{}'.format(page_idx, part_idx, text))
            # Logging the summary if verbose logging is enabled
            if self.verbose:
                print(summary)
            # Adding the summary of the part to the summary messages
            self.summary_msg.append({"role": "user", "content": '{}'.format(summary)})

        # Adding a prompt for the user to summarize the whole paper to the summary messages
        self.summary_msg.append({"role": "user", "content": 'Now please make a summary of the whole paper'})
        # Sending the summary messages to the API and getting the response
        result = self.send_msg(self.summary_msg)
        # Returning the summary of the whole paper
        return result

    def read_pdf_and_summarize(self, pdf_path):
        # This method is used to read a research paper from a PDF file and summarize it
        
        # Creating a PdfReader object to read the PDF file
        pdf_reader = PdfReader(pdf_path)
        paper = Paper(pdf_reader)
        # Summarizing the full text of the research paper and returning the summary
        print('reading pdf finished')
        summary = self.summarize(paper)
        return summary

    def get_summary_of_each_part(self):
        # This method is used to get the summary of each part of the research paper
        return self.summary_msg

    def question(self, question):
        # This method is used to ask a question after summarizing a paper

        # Adding the question to the summary messages
        self.summary_msg.append({"role": "user", "content": question})
        # Sending the summary messages to the API and getting the response
        response = self.send_msg(self.summary_msg)
        # Adding the system response to the summary messages
        self.summary_msg.append({"role": "system", "content": response})
        # Returning the system response
        return response