File size: 7,677 Bytes
c00c005
990b17a
7c6e06f
990b17a
c00c005
990b17a
c00c005
0dea2d0
c00c005
 
 
 
 
 
 
b6b3547
 
56950ed
 
 
 
c00c005
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
35b36a4
56950ed
 
1c3a79b
 
56950ed
1c3a79b
56950ed
35b36a4
56950ed
 
 
 
 
 
 
 
 
 
 
c00c005
 
b6b3547
 
 
 
 
 
c00c005
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f44232c
 
c00c005
 
 
56950ed
c00c005
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27e64f2
56950ed
 
 
 
c00c005
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b6b3547
 
c260fe6
b6b3547
56950ed
35b36a4
c00c005
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
import os
from pypdf import PdfReader
from langchain.document_loaders import PyPDFLoader, OnlinePDFLoader
from langchain.docstore.document import Document
from langchain.text_splitter import RecursiveCharacterTextSplitter

from langchain.chat_models import ChatOpenAI
from langchain.llms import OpenAI
from langchain import PromptTemplate
from langchain.chains.summarize import load_summarize_chain
import gradio as gr

title = '''
<div style="text-align: left; font-family:Arial; color:Black; font-size: 16px; max-width: 750px;">
    <h1>Small PDF Summarizer</h1>
    <p style="text-align: left;">This App can be used to summarize small PDF (max. 1 MB, 15 pages)<br/>
    How to Use:<br/>
    1. Upload a .PDF from your computer and fill OpenAI API key.<br/>
    2. Click the "Upload PDF" button, if successful a preview of your PDF text will be shown.<br/>
    3. Click "Summarize!" and the output will be shown on the textbox bellow.<br/>
    You can also change some LLM configurations from the 'config' tab.<br/>
</div>
'''

desc_1 = '''
<div style="text-align: left; font-family:Arial; color:Black; font-size: 14px;">
    <h3>Custom Prompt Template</h3>
    <p style="text-align: left;">You can customize input prompt for the map and combine prompt of langchain's Map-Reduce Summarization pipeline 
    using the texboxt bellow.<br/>
    Prompt which will be fed into LLM use the format of : <b>{textbox input} + {pdf_text} +  "SUMMARY:"</b> <br/>
    In essence each page of PDF will be summarized using map prompt, and each summary then be combined for final output using combine prompt.<br/>
    <a href="https://python.langchain.com/docs/use_cases/summarization">More Info on Map-Reduce for Summarization</a>
</div>
'''

MAP_PROMPT = """
                You will be given a page of text which section is enclosed in triple backticks (```).
                Your goal is to give a summary of this section, ignoring references and footnote if present.
                Your response should be at least 200 words only if input classified as academic text. 
                Your response must fully encompass what was said in the page.

                ```{text}```
                SUMMARY:
                """
COMBINE_PROMPT = """
                    Write a full summary of the following text enclosed in triple backticks (```).
                    Full summary consists of a descriptive summary of at least 100 words (if possible),
                    followed by numbered list which covers key points of the text.

                    ```{text}```
                    SUMMARY:
                    """
config_info = {'temperature': 'Higher means more randomness to the output.',
               'max_tokens' : 'The maximum number of tokens to generate in the output.',
               'llm_list' : ''}
model_list = {'gpt-3.5-turbo':'chat',
              'gpt-4':'chat',
              'gpt-3.5-turbo-instruct':'instruct',
              'text-davinci-003':'instruct'}

text_splitter = RecursiveCharacterTextSplitter(separators=["\n\n", "\n"], chunk_size=10000, chunk_overlap=250)
    
def parse_pdf(pdf_file):
    global pdf_docs, page_count
    loader = PyPDFLoader(pdf_file.name)
    pdf_docs = loader.load_and_split(text_splitter)
    page_count = len(pdf_docs)
    
    file_check(pdf_file)
    
    return pdf_docs[0].page_content[:100]
    
def file_check(pdf_file):
  if os.path.getsize(pdf_file.name)/1024 **2 > 1:
    raise gr.Error("Maximum File Size is 1MB!")
  elif page_count > 15:
    raise gr.Error("Maximum File Length is 15 Pages!")
  else:
    pass
      
def summarize_pdf(api_key, 
                  model_name, temperature, llm_max_tokens, 
                  custom_map_prompt, custom_combine_prompt):
  try:
    if pdf_docs[0].page_content[:1]:
         pass
  except:
    raise gr.Error("No PDF File Detected!")
      
  # Build LLM Model
  os.environ["OPENAI_API_KEY"] = api_key
  if model_list[model_name] == 'chat':
    gpt_llm = ChatOpenAI(temperature=temperature, model_name=model_name, max_tokens=int(llm_max_tokens))
  else:
    gpt_llm = OpenAI(temperature=temperature, model_name=model_name, max_tokens=int(llm_max_tokens))
  
  # Summarize PDF
  if custom_map_prompt !="":
    map_template = PromptTemplate(template=generate_template(custom_map_prompt), input_variables=["text"])
  else:
    map_template = PromptTemplate(template=MAP_PROMPT, input_variables=["text"])

  if custom_combine_prompt !="":
    combine_template = PromptTemplate(template=generate_template(custom_combine_prompt), input_variables=["text"])
  else:
    combine_template = PromptTemplate(template=COMBINE_PROMPT, input_variables=["text"])

  map_reduce_chain = load_summarize_chain(
    gpt_llm,
    chain_type="map_reduce",
    map_prompt=map_template,
    combine_prompt=combine_template,
    return_intermediate_steps=True,
    token_max=3840 # limit the maximum number of tokens in the combined document (combine prompt).
  )
  map_reduce_outputs = map_reduce_chain({"input_documents": pdf_docs})
  return map_reduce_outputs['output_text']

def generate_template(custom_prompt):
  custom_template = custom_prompt + '''

      ```{text}```
      SUMMARY:
      '''
  return custom_template

def main():
  with gr.Blocks() as demo:
    gr.HTML(title)
    with gr.Tab("Main"):
      with gr.Column():
        pdf_doc = gr.File(label="Uploaded PDF:", file_types=['.pdf'])
        with gr.Row():
            submit_button = gr.Button(value="Upload!")
            pdf_preview = gr.Textbox(label="PDF Preview:", lines=2, interactive=False)
            
        API_KEY = gr.Textbox(label="OpenAI API Key:", lines=1, type="password")
        summarize_button = gr.Button(value="Summarize!")
        summarized_text  = gr.Textbox(label="Summary", lines=10, show_copy_button=True)

    with gr.Tab("Config"):
      llm_model = gr.Dropdown(choices=model_list.keys(), label="LLM model used", value='gpt-3.5-turbo', interactive=True)
      with gr.Row():
        temperature = gr.Slider(minimum=0, maximum=0.5, step=0.1, label="temperature", info=config_info['temperature'])
        llm_max_tokens = gr.Radio(choices=[128, 256, 512], value=256, interactive=True, label="LLM max tokens", info=config_info['max_tokens']) 
      gr.HTML(desc_1)
      with gr.Row():
        user_map_prompt = gr.Textbox(label="Map PROMPT", lines=10, interactive=True)
        user_comb_prompt = gr.Textbox(label="Combine PROMPT", lines=10, interactive=True)

      with gr.Accordion("Default Template", open=False):
        with gr.Row():
          default_map_prompt = gr.Textbox(label="Map PROMPT", value=MAP_PROMPT, lines=10, interactive=False)
          default_comb_prompt = gr.Textbox(label="Combine PROMPT", value=COMBINE_PROMPT, lines=10, interactive=False)
      with gr.Accordion("User Custom Prompt Preview", open=False):
        prompt_preview_button = gr.Button(value="View Custom Prompt")
        with gr.Row():
          custom_map_view = gr.Textbox(label="Map PROMPT", lines=10, interactive=False)
          custom_comb_view = gr.Textbox(label="Combine PROMPT", lines=10, interactive=False)

        prompt_preview_button.click(generate_template, inputs=[user_map_prompt], outputs=[custom_map_view])
        prompt_preview_button.click(generate_template, inputs=[user_comb_prompt], outputs=[custom_comb_view])

    inputs_list = [API_KEY, llm_model, temperature, llm_max_tokens, user_map_prompt, user_comb_prompt]

    submit_button.click(parse_pdf, inputs=[pdf_doc], outputs=[pdf_preview])
    summarize_button.click(summarize_pdf, inputs=inputs_list, outputs=[summarized_text])
      
  demo.queue(concurrency_count=1).launch(share=True)

if __name__ == "__main__":
    main()