File size: 1,190 Bytes
90169c5
b2d2cca
90169c5
b2d2cca
90169c5
 
b2d2cca
90169c5
 
8f84efd
90169c5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
import fitz  # PyMuPDF
import gradio as gr
from transformers import pipeline

# Initialize summarizer pipeline
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")

def extract_text_from_pdf(file):
    # doc = fitz.open(stream=file.read(), filetype="pdf")
    
    doc = fitz.open(file)
    text = ""
    for page in doc:
        text += page.get_text()
    return text

def summarize_pdf(file):
    raw_text = extract_text_from_pdf(file)
    # Limit to avoid token overflow
    max_chunk = 1024
    chunks = [raw_text[i:i+max_chunk] for i in range(0, len(raw_text), max_chunk)]
    summary = ""
    for chunk in chunks:
        res = summarizer(chunk, max_length=130, min_length=30, do_sample=False)
        summary += res[0]['summary_text'] + " "
    return summary.strip()

# Gradio UI
demo = gr.Interface(
    fn=summarize_pdf,
    # inputs=gr.File(label="Upload a PDF"),
    inputs=gr.File(type="filepath", label="Upload a PDF"),
    outputs=gr.Textbox(label="Summary"),
    title="📄 PDF Summarizer",
    description="Upload a PDF file and get an AI-generated summary using Hugging Face Transformers."
)

if __name__ == "__main__":
    demo.launch(ssr_mode=False)