Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -286,17 +286,32 @@ class ForgeryDetector:
|
|
| 286 |
# Skip to end - image is ready
|
| 287 |
pdf_path = None
|
| 288 |
|
| 289 |
-
# If we got a PDF, convert
|
| 290 |
if pdf_path and os.path.exists(pdf_path):
|
| 291 |
import fitz
|
| 292 |
pdf_document = fitz.open(pdf_path)
|
| 293 |
-
|
| 294 |
-
|
| 295 |
-
|
| 296 |
-
|
| 297 |
-
|
|
|
|
|
|
|
|
|
|
| 298 |
pdf_document.close()
|
| 299 |
os.unlink(pdf_path)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 300 |
|
| 301 |
except Exception as e:
|
| 302 |
raise ValueError(f"Could not process Word document. Please convert to PDF or image first. Error: {str(e)}")
|
|
@@ -309,15 +324,30 @@ class ForgeryDetector:
|
|
| 309 |
pass
|
| 310 |
|
| 311 |
elif image.lower().endswith('.pdf'):
|
| 312 |
-
# Handle PDF files
|
| 313 |
import fitz # PyMuPDF
|
| 314 |
pdf_document = fitz.open(image)
|
| 315 |
-
|
| 316 |
-
|
| 317 |
-
|
| 318 |
-
|
| 319 |
-
|
|
|
|
|
|
|
|
|
|
| 320 |
pdf_document.close()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 321 |
else:
|
| 322 |
# Load image file
|
| 323 |
image = Image.open(image)
|
|
|
|
| 286 |
# Skip to end - image is ready
|
| 287 |
pdf_path = None
|
| 288 |
|
| 289 |
+
# If we got a PDF, convert ALL pages to a single tall image
|
| 290 |
if pdf_path and os.path.exists(pdf_path):
|
| 291 |
import fitz
|
| 292 |
pdf_document = fitz.open(pdf_path)
|
| 293 |
+
page_images = []
|
| 294 |
+
for page_num in range(len(pdf_document)):
|
| 295 |
+
page = pdf_document[page_num]
|
| 296 |
+
pix = page.get_pixmap(matrix=fitz.Matrix(2, 2))
|
| 297 |
+
page_img = np.frombuffer(pix.samples, dtype=np.uint8).reshape(pix.height, pix.width, pix.n)
|
| 298 |
+
if pix.n == 4:
|
| 299 |
+
page_img = cv2.cvtColor(page_img, cv2.COLOR_RGBA2RGB)
|
| 300 |
+
page_images.append(page_img)
|
| 301 |
pdf_document.close()
|
| 302 |
os.unlink(pdf_path)
|
| 303 |
+
# Stack all pages vertically into one tall image
|
| 304 |
+
if len(page_images) == 1:
|
| 305 |
+
image = page_images[0]
|
| 306 |
+
else:
|
| 307 |
+
max_width = max(p.shape[1] for p in page_images)
|
| 308 |
+
padded = []
|
| 309 |
+
for p in page_images:
|
| 310 |
+
if p.shape[1] < max_width:
|
| 311 |
+
pad = np.ones((p.shape[0], max_width - p.shape[1], 3), dtype=np.uint8) * 255
|
| 312 |
+
p = np.concatenate([p, pad], axis=1)
|
| 313 |
+
padded.append(p)
|
| 314 |
+
image = np.concatenate(padded, axis=0)
|
| 315 |
|
| 316 |
except Exception as e:
|
| 317 |
raise ValueError(f"Could not process Word document. Please convert to PDF or image first. Error: {str(e)}")
|
|
|
|
| 324 |
pass
|
| 325 |
|
| 326 |
elif image.lower().endswith('.pdf'):
|
| 327 |
+
# Handle PDF files - process ALL pages
|
| 328 |
import fitz # PyMuPDF
|
| 329 |
pdf_document = fitz.open(image)
|
| 330 |
+
page_images = []
|
| 331 |
+
for page_num in range(len(pdf_document)):
|
| 332 |
+
page = pdf_document[page_num]
|
| 333 |
+
pix = page.get_pixmap(matrix=fitz.Matrix(2, 2))
|
| 334 |
+
page_img = np.frombuffer(pix.samples, dtype=np.uint8).reshape(pix.height, pix.width, pix.n)
|
| 335 |
+
if pix.n == 4:
|
| 336 |
+
page_img = cv2.cvtColor(page_img, cv2.COLOR_RGBA2RGB)
|
| 337 |
+
page_images.append(page_img)
|
| 338 |
pdf_document.close()
|
| 339 |
+
# Stack all pages vertically into one tall image
|
| 340 |
+
if len(page_images) == 1:
|
| 341 |
+
image = page_images[0]
|
| 342 |
+
else:
|
| 343 |
+
max_width = max(p.shape[1] for p in page_images)
|
| 344 |
+
padded = []
|
| 345 |
+
for p in page_images:
|
| 346 |
+
if p.shape[1] < max_width:
|
| 347 |
+
pad = np.ones((p.shape[0], max_width - p.shape[1], 3), dtype=np.uint8) * 255
|
| 348 |
+
p = np.concatenate([p, pad], axis=1)
|
| 349 |
+
padded.append(p)
|
| 350 |
+
image = np.concatenate(padded, axis=0)
|
| 351 |
else:
|
| 352 |
# Load image file
|
| 353 |
image = Image.open(image)
|