File size: 5,978 Bytes
a0f19e1
 
 
 
 
 
 
 
 
 
e8a1252
a0f19e1
 
 
a914df4
a0f19e1
 
 
 
 
 
 
 
 
a914df4
 
a0f19e1
 
 
 
 
a914df4
 
 
 
 
 
 
 
 
 
a0f19e1
 
a914df4
 
a0f19e1
a914df4
a0f19e1
a914df4
 
a0f19e1
 
a914df4
a0f19e1
 
 
 
 
 
 
a914df4
a0f19e1
 
 
a914df4
a0f19e1
 
a914df4
a0f19e1
 
 
 
 
 
a914df4
a0f19e1
 
a914df4
 
 
a0f19e1
 
c04c6e5
 
 
 
 
 
 
 
 
 
a0f19e1
c04c6e5
a0f19e1
a914df4
c04c6e5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a914df4
a0f19e1
a914df4
a0f19e1
 
 
c04c6e5
 
 
 
 
 
 
 
 
 
 
 
a0f19e1
a914df4
a0f19e1
 
a914df4
a0f19e1
a914df4
 
c04c6e5
 
a914df4
c04c6e5
 
 
a914df4
 
a0f19e1
a914df4
c04c6e5
 
 
 
 
 
 
 
 
a0f19e1
c04c6e5
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
import os
import gradio as gr
from langchain_groq import ChatGroq
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.chains.summarize import load_summarize_chain
from langchain.docstore.document import Document
import PyPDF2
from langchain.prompts import PromptTemplate

# Set up API keys
groq_api_key = os.environ.get('GROQ_API_KEY')

# Set up LLM
llm = ChatGroq(temperature=0, model_name='llama-3.1-8b-instant', groq_api_key=groq_api_key)

def extract_text_from_pdf(pdf_file):
    pdf_reader = PyPDF2.PdfReader(pdf_file)
    text = ""
    for page in pdf_reader.pages:
        text += page.extract_text()
    return text

def chunk_text(text):
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=4000,
        chunk_overlap=400,
        length_function=len
    )
    chunks = text_splitter.split_text(text)
    return [Document(page_content=chunk) for chunk in chunks]

def summarize_chunks(chunks, conciseness):
    # Adjust the prompts based on the conciseness level
    map_prompt_template = f"""Write a {'very concise' if conciseness > 0.7 else 'detailed'} summary of the following text, focusing on the {'most crucial' if conciseness > 0.7 else 'key'} points:
    "{{text}}"
    {'CONCISE' if conciseness > 0.7 else 'DETAILED'} SUMMARY:"""

    combine_prompt_template = f"""Write a {'highly condensed' if conciseness > 0.7 else 'comprehensive'} summary of the following text, capturing the {'essential' if conciseness > 0.7 else 'key'} points and main ideas:
    "{{text}}"
    {'CONDENSED' if conciseness > 0.7 else 'COMPREHENSIVE'} SUMMARY:"""

    map_prompt = PromptTemplate(template=map_prompt_template, input_variables=["text"])
    combine_prompt = PromptTemplate(template=combine_prompt_template, input_variables=["text"])

    # Adjust the chain type based on the document length and conciseness
    total_length = sum(len(chunk.page_content) for chunk in chunks)
    if total_length < 10000 or conciseness > 0.8:
        chain = load_summarize_chain(
            llm,
            chain_type="stuff",
            prompt=combine_prompt
        )
    else:
        chain = load_summarize_chain(
            llm,
            chain_type="map_reduce",
            map_prompt=map_prompt,
            combine_prompt=combine_prompt,
            verbose=True
        )

    summary = chain.run(chunks)
    return summary

def summarize_content(pdf_file, text_input, conciseness):
    if pdf_file is None and not text_input:
        return "Please upload a PDF file or enter text to summarize."

    if pdf_file is not None:
        # Extract text from PDF
        text = extract_text_from_pdf(pdf_file)
    else:
        # Use the input text
        text = text_input

    # Chunk the text
    chunks = chunk_text(text)

    # Summarize chunks with conciseness level
    final_summary = summarize_chunks(chunks, conciseness)
    return final_summary

FOOTER_TEXT = """
<footer>
    <p>If you enjoyed the functionality of the app, please leave a like!<br>
    Check out more on 
    <a href="https://www.linkedin.com/in/girish-wangikar/" target="_blank">LinkedIn</a> | 
    <a href="https://girishwangikar.github.io/Girish_Wangikar_Portfolio.github.io/" target="_blank">Portfolio</a>
    </p>
</footer>
"""

with gr.Blocks(theme=gr.themes.Soft()) as iface:
    # Add custom CSS for styling
    gr.Markdown(
        """
        <style>
            .title {
                text-align: center;
            }
            .description {
                text-align: center;
            }
            footer {
                text-align: center;
                padding: 10px;
                width: 100%;
                background-color: rgba(240, 240, 240, 0.8);
                z-index: 1000;
                position: relative;
                margin-top: 10px;
                color: black;
            }
            /* Optional: Adjust link styles in footer */
            footer a {
                color: #1a0dab;
                text-decoration: none;
            }
            footer a:hover {
                text-decoration: underline;
            }
        </style>
        """
    )

    # Title and Description with center alignment
    gr.Markdown(
        """
        <div class="title">
            # PDF And Text Summarizer
        </div>
        <div class="description">
            ### Advanced PDF and Text Summarization with Conciseness Control
            - Upload your PDF document or enter text directly, adjust the conciseness level, and let AI generate a summary.
        </div>
        """
    )

    with gr.Row():
        with gr.Column(scale=1):
            input_pdf = gr.File(label="Upload PDF (optional)", file_types=[".pdf"])
            input_text = gr.Textbox(
                label="Or enter text here",
                lines=5,
                placeholder="Paste or type your text here..."
            )
            conciseness_slider = gr.Slider(
                minimum=0,
                maximum=1,
                value=0.5,
                step=0.1,
                label="Conciseness Level"
            )
            submit_btn = gr.Button("Generate Summary", variant="primary")

        with gr.Column(scale=2):
            output = gr.Textbox(label="Generated Summary", lines=10)

    gr.Markdown(
        """
        ### How it works
        1. **Upload a PDF file or enter text directly**
        2. **Adjust the conciseness level:**
           - 0 (Most detailed) to 1 (Most concise)
        3. **Click "Generate Summary"**
        4. **Wait for the AI to process and summarize your content**
        5. **Review the generated summary**
        *Powered by LLAMA 3.1 8B model and LangChain*
        """
    )

    # Add the footer at the end
    gr.Markdown(FOOTER_TEXT)

    # Define the action for the submit button
    submit_btn.click(
        summarize_content, 
        inputs=[input_pdf, input_text, conciseness_slider], 
        outputs=output
    )

iface.launch()