Spaces:
Running
Running
import gradio as gr | |
from transformers import pipeline | |
from PIL import Image | |
import fitz # PyMuPDF for PDF | |
import docx | |
import pptx | |
import openpyxl | |
import easyocr | |
from fastapi import FastAPI | |
from starlette.responses import RedirectResponse | |
# Initialize models | |
summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6") | |
image_captioner = pipeline("image-to-text", model="nlpconnect/vit-gpt2-image-captioning") | |
reader = easyocr.Reader(['en', 'fr']) | |
# FastAPI app | |
app = FastAPI() | |
# Text extraction functions | |
def extract_text_from_pdf(file_path): | |
try: | |
doc = fitz.open(file_path) | |
return "\n".join([page.get_text() for page in doc]) | |
except Exception as e: | |
return f"β Error reading PDF: {e}" | |
def extract_text_from_docx(file): | |
try: | |
doc = docx.Document(file) | |
return "\n".join([p.text for p in doc.paragraphs if p.text.strip()]) | |
except Exception as e: | |
return f"β Error reading DOCX: {e}" | |
def extract_text_from_pptx(file): | |
try: | |
text = [] | |
prs = pptx.Presentation(file) | |
for slide in prs.slides: | |
for shape in slide.shapes: | |
if hasattr(shape, "text"): | |
text.append(shape.text) | |
return "\n".join(text) | |
except Exception as e: | |
return f"β Error reading PPTX: {e}" | |
def extract_text_from_xlsx(file): | |
try: | |
wb = openpyxl.load_workbook(file) | |
text = [] | |
for sheet in wb.sheetnames: | |
ws = wb[sheet] | |
for row in ws.iter_rows(values_only=True): | |
text.append(" ".join(str(cell) for cell in row if cell)) | |
return "\n".join(text) | |
except Exception as e: | |
return f"β Error reading XLSX: {e}" | |
def extract_text_from_image(file): | |
try: | |
image = Image.open(file).convert("RGB") | |
return "\n".join([text[1] for text in reader.readtext(np.array(image))]) | |
except Exception as e: | |
return f"β Error reading image with OCR: {e}" | |
# Main processing function | |
def analyze_input(file): | |
if file is None: | |
return "Please upload a document or image." | |
filename = file.name.lower() | |
ext = filename.split('.')[-1] | |
if ext in ["jpg", "jpeg", "png"]: | |
caption = image_captioner(Image.open(file))[0]['generated_text'] | |
ocr_text = extract_text_from_image(file) | |
return f"π· Image Caption:\n{caption}\n\nπ OCR Text:\n{ocr_text}" | |
elif ext == "pdf": | |
text = extract_text_from_pdf(file.name) | |
elif ext == "docx": | |
text = extract_text_from_docx(file) | |
elif ext == "pptx": | |
text = extract_text_from_pptx(file) | |
elif ext == "xlsx": | |
text = extract_text_from_xlsx(file) | |
else: | |
return "Unsupported file type. Please upload PDF, DOCX, PPTX, XLSX, or an image." | |
if not text.strip(): | |
return "β No text could be extracted from the document." | |
summary = summarizer(text[:3000], max_length=200, min_length=30, do_sample=False) | |
return f"π Document Summary:\n{summary[0]['summary_text']}" | |
# Gradio Interface | |
iface = gr.Interface( | |
fn=analyze_input, | |
inputs=gr.File(label="Upload Document or Image"), | |
outputs=gr.Textbox(label="Result", lines=10), | |
title="Document & Image Analysis Web Service", | |
description="Upload a document (PDF, DOCX, PPTX, XLSX) to get a summary or an image to get a caption. OCR and AI-powered." | |
) | |
demo = gr.TabbedInterface([iface], ["Docs and Images"]) | |
# Mount to FastAPI | |
app = gr.mount_gradio_app(app, demo, path="/") | |
def root(): | |
return RedirectResponse(url="/") | |