Spaces:
Sleeping
Sleeping
import fitz # PyMuPDF | |
import gradio as gr | |
from transformers import pipeline | |
# Initialize summarizer pipeline | |
summarizer = pipeline("summarization", model="facebook/bart-large-cnn") | |
def extract_text_from_pdf(file): | |
# doc = fitz.open(stream=file.read(), filetype="pdf") | |
doc = fitz.open(file) | |
text = "" | |
for page in doc: | |
text += page.get_text() | |
return text | |
def summarize_pdf(file): | |
raw_text = extract_text_from_pdf(file) | |
# Limit to avoid token overflow | |
max_chunk = 1024 | |
chunks = [raw_text[i:i+max_chunk] for i in range(0, len(raw_text), max_chunk)] | |
summary = "" | |
for chunk in chunks: | |
res = summarizer(chunk, max_length=130, min_length=30, do_sample=False) | |
summary += res[0]['summary_text'] + " " | |
return summary.strip() | |
# Gradio UI | |
demo = gr.Interface( | |
fn=summarize_pdf, | |
# inputs=gr.File(label="Upload a PDF"), | |
inputs=gr.File(type="filepath", label="Upload a PDF"), | |
outputs=gr.Textbox(label="Summary"), | |
title="π PDF Summarizer", | |
description="Upload a PDF file and get an AI-generated summary using Hugging Face Transformers." | |
) | |
if __name__ == "__main__": | |
demo.launch(ssr_mode=False) |