File size: 5,426 Bytes
f87ab8f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
from PyPDF2 import PdfReader
import openai
from .prompt import BASE_POINTS, READING_PROMT_V2
from .paper import Paper
from .model_interface import OpenAIModel


# Setting the API key to use the OpenAI API
class PaperReader:

    """
    A class for summarizing research papers using the OpenAI API.

    Attributes:
        openai_key (str): The API key to use the OpenAI API.
        token_length (int): The length of text to send to the API at a time.
        model (str): The GPT model to use for summarization.
        points_to_focus (str): The key points to focus on while summarizing.
        verbose (bool): A flag to enable/disable verbose logging.

    """

    def __init__(self, openai_key, token_length=4000, model="gpt-3.5-turbo",
                 points_to_focus=BASE_POINTS, verbose=False):

        # Setting the API key to use the OpenAI API
        openai.api_key = openai_key

        # Initializing prompts for the conversation
        self.init_prompt = READING_PROMT_V2.format(points_to_focus)

        self.summary_prompt = 'You are a researcher helper bot. Now you need to read the summaries of a research paper.'
        self.messages = []  # Initializing the conversation messages
        self.summary_msg = []  # Initializing the summary messages
        self.token_len = token_length  # Setting the token length to use
        self.keep_round = 2  # Rounds of previous dialogues to keep in conversation
        self.model = model  # Setting the GPT model to use
        self.verbose = verbose  # Flag to enable/disable verbose logging
        self.model = OpenAIModel(api_key=openai_key, model=model)

    def drop_conversation(self, msg):
        # This method is used to drop previous messages from the conversation and keep only recent ones
        if len(msg) >= (self.keep_round + 1) * 2 + 1:
            new_msg = [msg[0]]
            for i in range(3, len(msg)):
                new_msg.append(msg[i])
            return new_msg
        else:
            return msg

    def send_msg(self, msg):
        return self.model.send_msg(msg)

    def _chat(self, message):
        # This method is used to send a message and get a response from the OpenAI API

        # Adding the user message to the conversation messages
        self.messages.append({"role": "user", "content": message})
        # Sending the messages to the API and getting the response
        response = self.send_msg(self.messages)
        # Adding the system response to the conversation messages
        self.messages.append({"role": "system", "content": response})
        # Dropping previous conversation messages to keep the conversation history short
        self.messages = self.drop_conversation(self.messages)
        # Returning the system response
        return response

    def summarize(self, paper: Paper):
        # This method is used to summarize a given research paper

        # Adding the initial prompt to the conversation messages
        self.messages = [
            {"role": "system", "content": self.init_prompt},
        ]
        # Adding the summary prompt to the summary messages
        self.summary_msg = [{"role": "system", "content": self.summary_prompt}]

         # Reading and summarizing each part of the research paper
        for (page_idx, part_idx, text) in paper.iter_pages():
            print('page: {}, part: {}'.format(page_idx, part_idx))
            # Sending the text to the API and getting the response
            summary = self._chat('now I send you page {}, part {}:{}'.format(page_idx, part_idx, text))
            # Logging the summary if verbose logging is enabled
            if self.verbose:
                print(summary)
            # Adding the summary of the part to the summary messages
            self.summary_msg.append({"role": "user", "content": '{}'.format(summary)})

        # Adding a prompt for the user to summarize the whole paper to the summary messages
        self.summary_msg.append({"role": "user", "content": 'Now please make a summary of the whole paper'})
        # Sending the summary messages to the API and getting the response
        result = self.send_msg(self.summary_msg)
        # Returning the summary of the whole paper
        return result

    def read_pdf_and_summarize(self, pdf_path):
        # This method is used to read a research paper from a PDF file and summarize it
        
        # Creating a PdfReader object to read the PDF file
        pdf_reader = PdfReader(pdf_path)
        paper = Paper(pdf_reader)
        # Summarizing the full text of the research paper and returning the summary
        print('reading pdf finished')
        summary = self.summarize(paper)
        return summary

    def get_summary_of_each_part(self):
        # This method is used to get the summary of each part of the research paper
        return self.summary_msg

    def question(self, question):
        # This method is used to ask a question after summarizing a paper

        # Adding the question to the summary messages
        self.summary_msg.append({"role": "user", "content": question})
        # Sending the summary messages to the API and getting the response
        response = self.send_msg(self.summary_msg)
        # Adding the system response to the summary messages
        self.summary_msg.append({"role": "system", "content": response})
        # Returning the system response
        return response