Spaces:
Sleeping
Sleeping
File size: 1,190 Bytes
90169c5 b2d2cca 90169c5 b2d2cca 90169c5 b2d2cca 90169c5 8f84efd 90169c5 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 |
import fitz # PyMuPDF
import gradio as gr
from transformers import pipeline
# Initialize summarizer pipeline
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
def extract_text_from_pdf(file):
# doc = fitz.open(stream=file.read(), filetype="pdf")
doc = fitz.open(file)
text = ""
for page in doc:
text += page.get_text()
return text
def summarize_pdf(file):
raw_text = extract_text_from_pdf(file)
# Limit to avoid token overflow
max_chunk = 1024
chunks = [raw_text[i:i+max_chunk] for i in range(0, len(raw_text), max_chunk)]
summary = ""
for chunk in chunks:
res = summarizer(chunk, max_length=130, min_length=30, do_sample=False)
summary += res[0]['summary_text'] + " "
return summary.strip()
# Gradio UI
demo = gr.Interface(
fn=summarize_pdf,
# inputs=gr.File(label="Upload a PDF"),
inputs=gr.File(type="filepath", label="Upload a PDF"),
outputs=gr.Textbox(label="Summary"),
title="📄 PDF Summarizer",
description="Upload a PDF file and get an AI-generated summary using Hugging Face Transformers."
)
if __name__ == "__main__":
demo.launch(ssr_mode=False) |