Spaces:

Jorick-python
/

mcp-slidedeck

Sleeping

App Files Files Community

Jorick-python commited on May 28, 2025

Commit

91b23d7

1 Parent(s): 152fbf2

Update app.py with speed optimization and debug logging

Browse files

Files changed (1) hide show

app.py +85 -63

app.py CHANGED Viewed

@@ -1,5 +1,5 @@
-# app.py (complete and updated)
-import io, os, json
 from typing import Dict, List, Any
 import gradio as gr
 from fastapi import FastAPI, UploadFile
@@ -12,9 +12,7 @@ from transformers import BlipProcessor, BlipForConditionalGeneration
 import torch
 import uvicorn
-import shutil
-import subprocess
 try:
     print("\n--- DEBUG INFO ---")
     tesseract_path = shutil.which("tesseract")
@@ -29,69 +27,86 @@ try:
 except Exception as e:
     print("Error during Tesseract check:", e)
-# ---------  Image Caption Model (BLIP base) -----------
 processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
 blip_model = BlipForConditionalGeneration.from_pretrained(
     "Salesforce/blip-image-captioning-base",
     torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32
 ).eval()
 def _caption_image(img: Image.Image) -> str:
     """Run BLIP to caption a PIL image."""
-    inputs = processor(img.convert("RGB"), return_tensors="pt")
-    with torch.no_grad():
-        out = blip_model.generate(**{k: v.to(blip_model.device) for k, v in inputs.items()})
-    return processor.decode(out[0], skip_special_tokens=True)
-# ---------  Core analysis function -----------
 def analyze_slidepack(file: Any) -> Dict[str, Any]:
-    fname = os.path.basename(file.name)
-    slides_out: List[Dict[str, Any]] = []
-    # ---------- PPTX ----------
-    if fname.lower().endswith(".pptx"):
-        pres = Presentation(file.name)
-        for idx, slide in enumerate(pres.slides, start=1):
-            texts, caps = [], []
-            for shape in slide.shapes:
-                if hasattr(shape, "text"):
-                    text = shape.text.strip()
-                    if text:
-                        texts.append(text)
-                if shape.shape_type == 13:
-                    img_blob = shape.image.blob
-                    img = Image.open(io.BytesIO(img_blob))
-                    caps.append(_caption_image(img))
-            slides_out.append({
-                "slide_index": idx,
-                "textBlocks": texts,
-                "imageCaptions": caps
-            })
-    # ---------- PDF ----------
-    elif fname.lower().endswith(".pdf"):
-        with pdfplumber.open(file.name) as pdf:
-            for idx, page in enumerate(pdf.pages, start=1):
-                texts = [page.extract_text() or ""]
-                caps = []
-                img = page.to_image(resolution=200).original
-                caps.append(_caption_image(img))
-                ocr_text = pytesseract.image_to_string(img)
-                if ocr_text.strip():
-                    texts.append(ocr_text)
                 slides_out.append({
                     "slide_index": idx,
-                    "textBlocks": [t for t in texts if t.strip()],
                     "imageCaptions": caps
                 })
-    else:
-        raise gr.Error("Unsupported file type. Upload a .pptx or .pdf.")
-    return {"file_name": fname, "slides": slides_out}
-# ---------  Gradio Interface -----------
 demo = gr.Interface(
     fn=analyze_slidepack,
     inputs=gr.File(label="Upload PPTX or PDF"),
@@ -100,10 +115,11 @@ demo = gr.Interface(
     description=(
         "Returns **every** text fragment and BLIP-generated image caption in JSON. "
         "No summarisation – perfect for downstream quiz agents."
-    )
 )
-# ---------  FastAPI Tool Endpoint -----------
 api = FastAPI()
 api.add_middleware(
     CORSMiddleware,
@@ -115,18 +131,24 @@ api.add_middleware(
 @api.post("/extract_slidepack")
 async def extract_slidepack(file: UploadFile):
-    path = f"/tmp/{file.filename}"
-    with open(path, "wb") as f:
-        f.write(await file.read())
-    return analyze_slidepack(type("File", (object,), {"name": path}))
 if __name__ == "__main__":
     import asyncio
     async def delayed_startup():
         print("⏳ Waiting before MCP launch to avoid race condition...")
-        await asyncio.sleep(3)  # wait 3 seconds to allow models to finish loading
         print("🚀 Launching with MCP support now.")
         demo.launch(mcp_server=True)
-    asyncio.run(delayed_startup())

+# app.py (with logging and debug improvements)
+import io, os, json, shutil, subprocess, traceback
 from typing import Dict, List, Any
 import gradio as gr
 from fastapi import FastAPI, UploadFile
 import torch
 import uvicorn
+# ----------- Tesseract Debugging -----------
 try:
     print("\n--- DEBUG INFO ---")
     tesseract_path = shutil.which("tesseract")
 except Exception as e:
     print("Error during Tesseract check:", e)
+# ----------- BLIP Image Caption Model -----------
+print("🔄 Loading BLIP model...")
 processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
 blip_model = BlipForConditionalGeneration.from_pretrained(
     "Salesforce/blip-image-captioning-base",
     torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32
 ).eval()
+print("✅ BLIP model loaded")
 def _caption_image(img: Image.Image) -> str:
     """Run BLIP to caption a PIL image."""
+    try:
+        inputs = processor(img.convert("RGB"), return_tensors="pt")
+        with torch.no_grad():
+            out = blip_model.generate(**{k: v.to(blip_model.device) for k, v in inputs.items()})
+        return processor.decode(out[0], skip_special_tokens=True)
+    except Exception as e:
+        print(f"[ERROR] Captioning image failed: {e}")
+        traceback.print_exc()
+        return "[CAPTION_ERROR]"
+# ----------- Slidepack Processing -----------
 def analyze_slidepack(file: Any) -> Dict[str, Any]:
+    try:
+        fname = os.path.basename(file.name)
+        print(f"📂 Analyzing file: {fname}")
+        slides_out: List[Dict[str, Any]] = []
+        # PPTX
+        if fname.lower().endswith(".pptx"):
+            pres = Presentation(file.name)
+            for idx, slide in enumerate(pres.slides, start=1):
+                texts, caps = [], []
+                for shape in slide.shapes:
+                    if hasattr(shape, "text"):
+                        text = shape.text.strip()
+                        if text:
+                            texts.append(text)
+                    if shape.shape_type == 13:
+                        img_blob = shape.image.blob
+                        img = Image.open(io.BytesIO(img_blob))
+                        caps.append(_caption_image(img))
                 slides_out.append({
                     "slide_index": idx,
+                    "textBlocks": texts,
                     "imageCaptions": caps
                 })
+        # PDF
+        elif fname.lower().endswith(".pdf"):
+            with pdfplumber.open(file.name) as pdf:
+                for idx, page in enumerate(pdf.pages, start=1):
+                    texts = [page.extract_text() or ""]
+                    caps = []
+                    try:
+                        img = page.to_image(resolution=200).original
+                        caps.append(_caption_image(img))
+                        ocr_text = pytesseract.image_to_string(img)
+                        if ocr_text.strip():
+                            texts.append(ocr_text)
+                    except Exception as e:
+                        print(f"[WARN] Skipping image/OCR on page {idx} due to error: {e}")
+                    slides_out.append({
+                        "slide_index": idx,
+                        "textBlocks": [t for t in texts if t.strip()],
+                        "imageCaptions": caps
+                    })
+        else:
+            raise gr.Error("Unsupported file type. Upload a .pptx or .pdf.")
+        print("✅ Slidepack analysis completed")
+        return {"file_name": fname, "slides": slides_out}
+    except Exception as e:
+        print(f"[ERROR] Exception during slidepack analysis: {e}")
+        traceback.print_exc()
+        return {"error": str(e)}
+# ----------- Gradio UI -----------
 demo = gr.Interface(
     fn=analyze_slidepack,
     inputs=gr.File(label="Upload PPTX or PDF"),
     description=(
         "Returns **every** text fragment and BLIP-generated image caption in JSON. "
         "No summarisation – perfect for downstream quiz agents."
+    ),
+    live=True
 )
+# ----------- FastAPI REST Endpoint -----------
 api = FastAPI()
 api.add_middleware(
     CORSMiddleware,
 @api.post("/extract_slidepack")
 async def extract_slidepack(file: UploadFile):
+    try:
+        path = f"/tmp/{file.filename}"
+        with open(path, "wb") as f:
+            f.write(await file.read())
+        return analyze_slidepack(type("File", (object,), {"name": path}))
+    except Exception as e:
+        print(f"[ERROR] extract_slidepack endpoint failed: {e}")
+        traceback.print_exc()
+        return {"error": str(e)}
+# ----------- Main Entry -----------
 if __name__ == "__main__":
     import asyncio
     async def delayed_startup():
         print("⏳ Waiting before MCP launch to avoid race condition...")
+        await asyncio.sleep(3)
         print("🚀 Launching with MCP support now.")
         demo.launch(mcp_server=True)
+    asyncio.run(delayed_startup())