Spaces:

DocForg
/

Document_Forgery_Detection

Sleeping

App Files Files Community

JKrishnanandhaa commited on Feb 18

Commit

49ca167

verified ·

1 Parent(s): 5c8f686

Update app.py

Browse files

Files changed (1) hide show

app.py +42 -12

app.py CHANGED Viewed

@@ -286,17 +286,32 @@ class ForgeryDetector:
                             # Skip to end - image is ready
                             pdf_path = None
-                    # If we got a PDF, convert it to image
                     if pdf_path and os.path.exists(pdf_path):
                         import fitz
                         pdf_document = fitz.open(pdf_path)
-                        page = pdf_document[0]
-                        pix = page.get_pixmap(matrix=fitz.Matrix(2, 2))
-                        image = np.frombuffer(pix.samples, dtype=np.uint8).reshape(pix.height, pix.width, pix.n)
-                        if pix.n == 4:
-                            image = cv2.cvtColor(image, cv2.COLOR_RGBA2RGB)
                         pdf_document.close()
                         os.unlink(pdf_path)
                 except Exception as e:
                     raise ValueError(f"Could not process Word document. Please convert to PDF or image first. Error: {str(e)}")
@@ -309,15 +324,30 @@ class ForgeryDetector:
                             pass
             elif image.lower().endswith('.pdf'):
-                # Handle PDF files
                 import fitz  # PyMuPDF
                 pdf_document = fitz.open(image)
-                page = pdf_document[0]
-                pix = page.get_pixmap(matrix=fitz.Matrix(2, 2))
-                image = np.frombuffer(pix.samples, dtype=np.uint8).reshape(pix.height, pix.width, pix.n)
-                if pix.n == 4:
-                    image = cv2.cvtColor(image, cv2.COLOR_RGBA2RGB)
                 pdf_document.close()
             else:
                 # Load image file
                 image = Image.open(image)

                             # Skip to end - image is ready
                             pdf_path = None
+                    # If we got a PDF, convert ALL pages to a single tall image
                     if pdf_path and os.path.exists(pdf_path):
                         import fitz
                         pdf_document = fitz.open(pdf_path)
+                        page_images = []
+                        for page_num in range(len(pdf_document)):
+                            page = pdf_document[page_num]
+                            pix = page.get_pixmap(matrix=fitz.Matrix(2, 2))
+                            page_img = np.frombuffer(pix.samples, dtype=np.uint8).reshape(pix.height, pix.width, pix.n)
+                            if pix.n == 4:
+                                page_img = cv2.cvtColor(page_img, cv2.COLOR_RGBA2RGB)
+                            page_images.append(page_img)
                         pdf_document.close()
                         os.unlink(pdf_path)
+                        # Stack all pages vertically into one tall image
+                        if len(page_images) == 1:
+                            image = page_images[0]
+                        else:
+                            max_width = max(p.shape[1] for p in page_images)
+                            padded = []
+                            for p in page_images:
+                                if p.shape[1] < max_width:
+                                    pad = np.ones((p.shape[0], max_width - p.shape[1], 3), dtype=np.uint8) * 255
+                                    p = np.concatenate([p, pad], axis=1)
+                                padded.append(p)
+                            image = np.concatenate(padded, axis=0)
                 except Exception as e:
                     raise ValueError(f"Could not process Word document. Please convert to PDF or image first. Error: {str(e)}")
                             pass
             elif image.lower().endswith('.pdf'):
+                # Handle PDF files - process ALL pages
                 import fitz  # PyMuPDF
                 pdf_document = fitz.open(image)
+                page_images = []
+                for page_num in range(len(pdf_document)):
+                    page = pdf_document[page_num]
+                    pix = page.get_pixmap(matrix=fitz.Matrix(2, 2))
+                    page_img = np.frombuffer(pix.samples, dtype=np.uint8).reshape(pix.height, pix.width, pix.n)
+                    if pix.n == 4:
+                        page_img = cv2.cvtColor(page_img, cv2.COLOR_RGBA2RGB)
+                    page_images.append(page_img)
                 pdf_document.close()
+                # Stack all pages vertically into one tall image
+                if len(page_images) == 1:
+                    image = page_images[0]
+                else:
+                    max_width = max(p.shape[1] for p in page_images)
+                    padded = []
+                    for p in page_images:
+                        if p.shape[1] < max_width:
+                            pad = np.ones((p.shape[0], max_width - p.shape[1], 3), dtype=np.uint8) * 255
+                            p = np.concatenate([p, pad], axis=1)
+                        padded.append(p)
+                    image = np.concatenate(padded, axis=0)
             else:
                 # Load image file
                 image = Image.open(image)