Jorick-python commited on
Commit
91b23d7
Β·
1 Parent(s): 152fbf2

Update app.py with speed optimization and debug logging

Browse files
Files changed (1) hide show
  1. app.py +85 -63
app.py CHANGED
@@ -1,5 +1,5 @@
1
- # app.py (complete and updated)
2
- import io, os, json
3
  from typing import Dict, List, Any
4
  import gradio as gr
5
  from fastapi import FastAPI, UploadFile
@@ -12,9 +12,7 @@ from transformers import BlipProcessor, BlipForConditionalGeneration
12
  import torch
13
  import uvicorn
14
 
15
- import shutil
16
- import subprocess
17
-
18
  try:
19
  print("\n--- DEBUG INFO ---")
20
  tesseract_path = shutil.which("tesseract")
@@ -29,69 +27,86 @@ try:
29
  except Exception as e:
30
  print("Error during Tesseract check:", e)
31
 
32
-
33
- # --------- Image Caption Model (BLIP base) -----------
34
  processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
35
  blip_model = BlipForConditionalGeneration.from_pretrained(
36
  "Salesforce/blip-image-captioning-base",
37
  torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32
38
  ).eval()
 
39
 
40
  def _caption_image(img: Image.Image) -> str:
41
  """Run BLIP to caption a PIL image."""
42
- inputs = processor(img.convert("RGB"), return_tensors="pt")
43
- with torch.no_grad():
44
- out = blip_model.generate(**{k: v.to(blip_model.device) for k, v in inputs.items()})
45
- return processor.decode(out[0], skip_special_tokens=True)
46
-
47
- # --------- Core analysis function -----------
 
 
 
 
 
48
  def analyze_slidepack(file: Any) -> Dict[str, Any]:
49
- fname = os.path.basename(file.name)
50
- slides_out: List[Dict[str, Any]] = []
51
-
52
- # ---------- PPTX ----------
53
- if fname.lower().endswith(".pptx"):
54
- pres = Presentation(file.name)
55
- for idx, slide in enumerate(pres.slides, start=1):
56
- texts, caps = [], []
57
- for shape in slide.shapes:
58
- if hasattr(shape, "text"):
59
- text = shape.text.strip()
60
- if text:
61
- texts.append(text)
62
- if shape.shape_type == 13:
63
- img_blob = shape.image.blob
64
- img = Image.open(io.BytesIO(img_blob))
65
- caps.append(_caption_image(img))
66
- slides_out.append({
67
- "slide_index": idx,
68
- "textBlocks": texts,
69
- "imageCaptions": caps
70
- })
71
-
72
- # ---------- PDF ----------
73
- elif fname.lower().endswith(".pdf"):
74
- with pdfplumber.open(file.name) as pdf:
75
- for idx, page in enumerate(pdf.pages, start=1):
76
- texts = [page.extract_text() or ""]
77
- caps = []
78
- img = page.to_image(resolution=200).original
79
- caps.append(_caption_image(img))
80
- ocr_text = pytesseract.image_to_string(img)
81
- if ocr_text.strip():
82
- texts.append(ocr_text)
83
  slides_out.append({
84
  "slide_index": idx,
85
- "textBlocks": [t for t in texts if t.strip()],
86
  "imageCaptions": caps
87
  })
88
 
89
- else:
90
- raise gr.Error("Unsupported file type. Upload a .pptx or .pdf.")
91
-
92
- return {"file_name": fname, "slides": slides_out}
93
-
94
- # --------- Gradio Interface -----------
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
95
  demo = gr.Interface(
96
  fn=analyze_slidepack,
97
  inputs=gr.File(label="Upload PPTX or PDF"),
@@ -100,10 +115,11 @@ demo = gr.Interface(
100
  description=(
101
  "Returns **every** text fragment and BLIP-generated image caption in JSON. "
102
  "No summarisation – perfect for downstream quiz agents."
103
- )
 
104
  )
105
 
106
- # --------- FastAPI Tool Endpoint -----------
107
  api = FastAPI()
108
  api.add_middleware(
109
  CORSMiddleware,
@@ -115,18 +131,24 @@ api.add_middleware(
115
 
116
  @api.post("/extract_slidepack")
117
  async def extract_slidepack(file: UploadFile):
118
- path = f"/tmp/{file.filename}"
119
- with open(path, "wb") as f:
120
- f.write(await file.read())
121
- return analyze_slidepack(type("File", (object,), {"name": path}))
122
-
 
 
 
 
 
 
123
  if __name__ == "__main__":
124
  import asyncio
125
 
126
  async def delayed_startup():
127
  print("⏳ Waiting before MCP launch to avoid race condition...")
128
- await asyncio.sleep(3) # wait 3 seconds to allow models to finish loading
129
  print("πŸš€ Launching with MCP support now.")
130
  demo.launch(mcp_server=True)
131
 
132
- asyncio.run(delayed_startup())
 
1
+ # app.py (with logging and debug improvements)
2
+ import io, os, json, shutil, subprocess, traceback
3
  from typing import Dict, List, Any
4
  import gradio as gr
5
  from fastapi import FastAPI, UploadFile
 
12
  import torch
13
  import uvicorn
14
 
15
+ # ----------- Tesseract Debugging -----------
 
 
16
  try:
17
  print("\n--- DEBUG INFO ---")
18
  tesseract_path = shutil.which("tesseract")
 
27
  except Exception as e:
28
  print("Error during Tesseract check:", e)
29
 
30
+ # ----------- BLIP Image Caption Model -----------
31
+ print("πŸ”„ Loading BLIP model...")
32
  processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
33
  blip_model = BlipForConditionalGeneration.from_pretrained(
34
  "Salesforce/blip-image-captioning-base",
35
  torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32
36
  ).eval()
37
+ print("βœ… BLIP model loaded")
38
 
39
  def _caption_image(img: Image.Image) -> str:
40
  """Run BLIP to caption a PIL image."""
41
+ try:
42
+ inputs = processor(img.convert("RGB"), return_tensors="pt")
43
+ with torch.no_grad():
44
+ out = blip_model.generate(**{k: v.to(blip_model.device) for k, v in inputs.items()})
45
+ return processor.decode(out[0], skip_special_tokens=True)
46
+ except Exception as e:
47
+ print(f"[ERROR] Captioning image failed: {e}")
48
+ traceback.print_exc()
49
+ return "[CAPTION_ERROR]"
50
+
51
+ # ----------- Slidepack Processing -----------
52
  def analyze_slidepack(file: Any) -> Dict[str, Any]:
53
+ try:
54
+ fname = os.path.basename(file.name)
55
+ print(f"πŸ“‚ Analyzing file: {fname}")
56
+ slides_out: List[Dict[str, Any]] = []
57
+
58
+ # PPTX
59
+ if fname.lower().endswith(".pptx"):
60
+ pres = Presentation(file.name)
61
+ for idx, slide in enumerate(pres.slides, start=1):
62
+ texts, caps = [], []
63
+ for shape in slide.shapes:
64
+ if hasattr(shape, "text"):
65
+ text = shape.text.strip()
66
+ if text:
67
+ texts.append(text)
68
+ if shape.shape_type == 13:
69
+ img_blob = shape.image.blob
70
+ img = Image.open(io.BytesIO(img_blob))
71
+ caps.append(_caption_image(img))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
72
  slides_out.append({
73
  "slide_index": idx,
74
+ "textBlocks": texts,
75
  "imageCaptions": caps
76
  })
77
 
78
+ # PDF
79
+ elif fname.lower().endswith(".pdf"):
80
+ with pdfplumber.open(file.name) as pdf:
81
+ for idx, page in enumerate(pdf.pages, start=1):
82
+ texts = [page.extract_text() or ""]
83
+ caps = []
84
+ try:
85
+ img = page.to_image(resolution=200).original
86
+ caps.append(_caption_image(img))
87
+ ocr_text = pytesseract.image_to_string(img)
88
+ if ocr_text.strip():
89
+ texts.append(ocr_text)
90
+ except Exception as e:
91
+ print(f"[WARN] Skipping image/OCR on page {idx} due to error: {e}")
92
+ slides_out.append({
93
+ "slide_index": idx,
94
+ "textBlocks": [t for t in texts if t.strip()],
95
+ "imageCaptions": caps
96
+ })
97
+
98
+ else:
99
+ raise gr.Error("Unsupported file type. Upload a .pptx or .pdf.")
100
+
101
+ print("βœ… Slidepack analysis completed")
102
+ return {"file_name": fname, "slides": slides_out}
103
+
104
+ except Exception as e:
105
+ print(f"[ERROR] Exception during slidepack analysis: {e}")
106
+ traceback.print_exc()
107
+ return {"error": str(e)}
108
+
109
+ # ----------- Gradio UI -----------
110
  demo = gr.Interface(
111
  fn=analyze_slidepack,
112
  inputs=gr.File(label="Upload PPTX or PDF"),
 
115
  description=(
116
  "Returns **every** text fragment and BLIP-generated image caption in JSON. "
117
  "No summarisation – perfect for downstream quiz agents."
118
+ ),
119
+ live=True
120
  )
121
 
122
+ # ----------- FastAPI REST Endpoint -----------
123
  api = FastAPI()
124
  api.add_middleware(
125
  CORSMiddleware,
 
131
 
132
  @api.post("/extract_slidepack")
133
  async def extract_slidepack(file: UploadFile):
134
+ try:
135
+ path = f"/tmp/{file.filename}"
136
+ with open(path, "wb") as f:
137
+ f.write(await file.read())
138
+ return analyze_slidepack(type("File", (object,), {"name": path}))
139
+ except Exception as e:
140
+ print(f"[ERROR] extract_slidepack endpoint failed: {e}")
141
+ traceback.print_exc()
142
+ return {"error": str(e)}
143
+
144
+ # ----------- Main Entry -----------
145
  if __name__ == "__main__":
146
  import asyncio
147
 
148
  async def delayed_startup():
149
  print("⏳ Waiting before MCP launch to avoid race condition...")
150
+ await asyncio.sleep(3)
151
  print("πŸš€ Launching with MCP support now.")
152
  demo.launch(mcp_server=True)
153
 
154
+ asyncio.run(delayed_startup())