Spaces:

Knightmovies
/

ScannerUniversalRotator

Sleeping

App Files Files Community

Knightmovies commited on Sep 21

Commit

2d92b09

verified ·

1 Parent(s): 89243d2

Update app.py

Browse files

Files changed (1) hide show

app.py +33 -32

app.py CHANGED Viewed

@@ -4,10 +4,7 @@ import numpy as np
 from PIL import Image
 import torch
 from transformers import TableTransformerForObjectDetection, DetrImageProcessor
-import matplotlib.pyplot as plt
-import matplotlib.patches as patches
 import pytesseract
-import re
 from scipy.spatial import distance as dist
 # ==============================================================================
@@ -17,6 +14,7 @@ from scipy.spatial import distance as dist
 # For Hugging Face Spaces deployment, you also need these two files:
 # 1. requirements.txt (listing all Python libraries)
 # 2. packages.txt (containing the line "tesseract-ocr")
 # Set Streamlit page configuration
 st.set_page_config(
@@ -81,8 +79,6 @@ def correct_orientation(image):
         osd = pytesseract.image_to_osd(image, output_type=pytesseract.Output.DICT)
         rotation = osd['rotate']
         if rotation in [90, 180, 270]:
-            # The rotation values from Tesseract are counter-clockwise.
-            # OpenCV's rotation constants are clockwise. We need to map them correctly.
             if rotation == 90:
                 rotated_image = cv2.rotate(image, cv2.ROTATE_90_COUNTERCLOCKWISE)
             elif rotation == 180:
@@ -94,39 +90,46 @@ def correct_orientation(image):
         st.warning(f"OSD check failed: {e}. Returning original image.")
     return image
 def extract_and_draw_table_structure(image_bgr):
-    """Takes a BGR image, finds table structure, and returns an image with boxes."""
     image_pil = Image.fromarray(cv2.cvtColor(image_bgr, cv2.COLOR_BGR2RGB))
     inputs = processor(images=image_pil, return_tensors="pt")
     with torch.no_grad():
         outputs = model(**inputs)
-    width, height = image_pil.size
     target_sizes = torch.tensor([image_pil.size[::-1]])
     results = processor.post_process_object_detection(outputs, threshold=0.7, target_sizes=target_sizes)[0]
-    fig, ax = plt.subplots(1, figsize=(width / 100, height / 100), dpi=100)
-    ax.imshow(image_pil)
-    ax.axis('off')
-    colors = {"table row": "green", "table column": "red", "table": "magenta"}
     for score, label, box in zip(results["scores"], results["labels"], results["boxes"]):
         class_name = model.config.id2label[label.item()]
         if class_name in colors:
-            xmin, ymin, xmax, ymax = box
-            rect = patches.Rectangle((xmin, ymin), xmax - xmin, ymax - ymin, linewidth=1.5, edgecolor=colors[class_name], facecolor='none')
-            ax.add_patch(rect)
-    fig.canvas.draw()
-    # FIX: Replaced deprecated 'tostring_rgb' with 'tobytes_rgb'
-    img_with_boxes = np.frombuffer(fig.canvas.tobytes_rgb(), dtype=np.uint8)
-    img_with_boxes = img_with_boxes.reshape(fig.canvas.get_width_height()[::-1] + (3,))
-    plt.close(fig)
     return img_with_boxes
 # ==============================================================================
-# Streamlit UI
 # ==============================================================================
 st.title("📄 Document Scanner & Table Recognizer")
@@ -136,29 +139,27 @@ uploaded_file = st.file_uploader("Choose a document image...", type=["jpg", "jpe
 if uploaded_file is not None:
     file_bytes = np.asarray(bytearray(uploaded_file.read()), dtype=np.uint8)
-    input_image = cv2.imdecode(file_bytes, 1)
-    input_image_rgb = cv2.cvtColor(input_image, cv2.COLOR_BGR2RGB)
     st.subheader("1. Original Image")
-    # FIX: Replaced deprecated 'use_column_width' with 'use_container_width'
-    st.image(input_image_rgb, caption="Your Uploaded Image", use_container_width=True)
     with st.spinner("Processing your document... This may take a moment."):
         straightened_image = find_and_straighten_document(input_image)
         image_to_process = straightened_image if straightened_image is not None and straightened_image.size > 0 else input_image
         final_image = correct_orientation(image_to_process)
-        final_image_rgb = cv2.cvtColor(final_image, cv2.COLOR_BGR2RGB)
-        image_with_structure = extract_and_draw_table_structure(final_image)
     st.subheader("2. Corrected Document & Detected Structure")
     col1, col2 = st.columns(2)
     with col1:
-        # FIX: Replaced deprecated 'use_column_width' with 'use_container_width'
         st.image(final_image_rgb, caption="Auto-Corrected & Oriented", use_container_width=True)
-        _, buf = cv2.imencode(".jpg", final_image)
         st.download_button(
             label="Download Clean Image",
             data=buf.tobytes(),
@@ -167,5 +168,5 @@ if uploaded_file is not None:
         )
     with col2:
-        # FIX: Replaced deprecated 'use_column_width' with 'use_container_width'
-        st.image(image_with_structure, caption="Detected Table Structure (Rows: Green, Columns: Red)", use_container_width=True)

 from PIL import Image
 import torch
 from transformers import TableTransformerForObjectDetection, DetrImageProcessor
 import pytesseract
 from scipy.spatial import distance as dist
 # ==============================================================================
 # For Hugging Face Spaces deployment, you also need these two files:
 # 1. requirements.txt (listing all Python libraries)
 # 2. packages.txt (containing the line "tesseract-ocr")
+# NOTE: With this new code, you can remove 'matplotlib' from requirements.txt
 # Set Streamlit page configuration
 st.set_page_config(
         osd = pytesseract.image_to_osd(image, output_type=pytesseract.Output.DICT)
         rotation = osd['rotate']
         if rotation in [90, 180, 270]:
             if rotation == 90:
                 rotated_image = cv2.rotate(image, cv2.ROTATE_90_COUNTERCLOCKWISE)
             elif rotation == 180:
         st.warning(f"OSD check failed: {e}. Returning original image.")
     return image
+# ==============================================================================
+# NEW AND IMPROVED: Table Structure Recognition using OpenCV for Drawing
+# ==============================================================================
 def extract_and_draw_table_structure(image_bgr):
+    """
+    Takes a BGR image, finds table structure, and returns an image with
+    bounding boxes drawn directly using OpenCV.
+    """
+    # 1. Run model inference (same as before)
     image_pil = Image.fromarray(cv2.cvtColor(image_bgr, cv2.COLOR_BGR2RGB))
     inputs = processor(images=image_pil, return_tensors="pt")
     with torch.no_grad():
         outputs = model(**inputs)
     target_sizes = torch.tensor([image_pil.size[::-1]])
     results = processor.post_process_object_detection(outputs, threshold=0.7, target_sizes=target_sizes)[0]
+    # 2. Draw results on a copy of the original image using OpenCV
+    img_with_boxes = image_bgr.copy()
+    # BGR color codes for OpenCV
+    colors = {"table row": (0, 255, 0), "table column": (0, 0, 255), "table": (255, 0, 255)}
     for score, label, box in zip(results["scores"], results["labels"], results["boxes"]):
         class_name = model.config.id2label[label.item()]
         if class_name in colors:
+            # Get box coordinates and convert to integers
+            xmin, ymin, xmax, ymax = [int(val) for val in box.tolist()]
+            # Get color for the class
+            color = colors[class_name]
+            # Draw rectangle on the image
+            cv2.rectangle(img_with_boxes, (xmin, ymin), (xmax, ymax), color, 2)
     return img_with_boxes
 # ==============================================================================
+# Streamlit UI (Unchanged)
 # ==============================================================================
 st.title("📄 Document Scanner & Table Recognizer")
 if uploaded_file is not None:
     file_bytes = np.asarray(bytearray(uploaded_file.read()), dtype=np.uint8)
+    input_image = cv2.imdecode(file_bytes, 1) # 1 = COLOR_UNCHANGED
     st.subheader("1. Original Image")
+    st.image(cv2.cvtColor(input_image, cv2.COLOR_BGR2RGB), caption="Your Uploaded Image", use_container_width=True)
     with st.spinner("Processing your document... This may take a moment."):
         straightened_image = find_and_straighten_document(input_image)
         image_to_process = straightened_image if straightened_image is not None and straightened_image.size > 0 else input_image
         final_image = correct_orientation(image_to_process)
+        # This now returns a BGR image from OpenCV
+        image_with_structure_bgr = extract_and_draw_table_structure(final_image)
     st.subheader("2. Corrected Document & Detected Structure")
     col1, col2 = st.columns(2)
     with col1:
+        final_image_rgb = cv2.cvtColor(final_image, cv2.COLOR_BGR2RGB)
         st.image(final_image_rgb, caption="Auto-Corrected & Oriented", use_container_width=True)
+        _, buf = cv2.imencode(".jpg", final_image) # Use the BGR image for encoding
         st.download_button(
             label="Download Clean Image",
             data=buf.tobytes(),
         )
     with col2:
+        image_with_structure_rgb = cv2.cvtColor(image_with_structure_bgr, cv2.COLOR_BGR2RGB)
+        st.image(image_with_structure_rgb, caption="Detected Table Structure (Rows: Green, Columns: Red)", use_container_width=True)