Update app.py
Browse files
app.py
CHANGED
|
@@ -4,10 +4,7 @@ import numpy as np
|
|
| 4 |
from PIL import Image
|
| 5 |
import torch
|
| 6 |
from transformers import TableTransformerForObjectDetection, DetrImageProcessor
|
| 7 |
-
import matplotlib.pyplot as plt
|
| 8 |
-
import matplotlib.patches as patches
|
| 9 |
import pytesseract
|
| 10 |
-
import re
|
| 11 |
from scipy.spatial import distance as dist
|
| 12 |
|
| 13 |
# ==============================================================================
|
|
@@ -17,6 +14,7 @@ from scipy.spatial import distance as dist
|
|
| 17 |
# For Hugging Face Spaces deployment, you also need these two files:
|
| 18 |
# 1. requirements.txt (listing all Python libraries)
|
| 19 |
# 2. packages.txt (containing the line "tesseract-ocr")
|
|
|
|
| 20 |
|
| 21 |
# Set Streamlit page configuration
|
| 22 |
st.set_page_config(
|
|
@@ -81,8 +79,6 @@ def correct_orientation(image):
|
|
| 81 |
osd = pytesseract.image_to_osd(image, output_type=pytesseract.Output.DICT)
|
| 82 |
rotation = osd['rotate']
|
| 83 |
if rotation in [90, 180, 270]:
|
| 84 |
-
# The rotation values from Tesseract are counter-clockwise.
|
| 85 |
-
# OpenCV's rotation constants are clockwise. We need to map them correctly.
|
| 86 |
if rotation == 90:
|
| 87 |
rotated_image = cv2.rotate(image, cv2.ROTATE_90_COUNTERCLOCKWISE)
|
| 88 |
elif rotation == 180:
|
|
@@ -94,39 +90,46 @@ def correct_orientation(image):
|
|
| 94 |
st.warning(f"OSD check failed: {e}. Returning original image.")
|
| 95 |
return image
|
| 96 |
|
|
|
|
|
|
|
|
|
|
| 97 |
def extract_and_draw_table_structure(image_bgr):
|
| 98 |
-
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
| 99 |
image_pil = Image.fromarray(cv2.cvtColor(image_bgr, cv2.COLOR_BGR2RGB))
|
| 100 |
inputs = processor(images=image_pil, return_tensors="pt")
|
| 101 |
|
| 102 |
with torch.no_grad():
|
| 103 |
outputs = model(**inputs)
|
| 104 |
|
| 105 |
-
width, height = image_pil.size
|
| 106 |
target_sizes = torch.tensor([image_pil.size[::-1]])
|
| 107 |
results = processor.post_process_object_detection(outputs, threshold=0.7, target_sizes=target_sizes)[0]
|
| 108 |
|
| 109 |
-
|
| 110 |
-
|
| 111 |
-
|
| 112 |
-
|
|
|
|
| 113 |
|
| 114 |
for score, label, box in zip(results["scores"], results["labels"], results["boxes"]):
|
| 115 |
class_name = model.config.id2label[label.item()]
|
| 116 |
if class_name in colors:
|
| 117 |
-
|
| 118 |
-
|
| 119 |
-
|
| 120 |
-
|
| 121 |
-
|
| 122 |
-
|
| 123 |
-
|
| 124 |
-
|
| 125 |
-
|
| 126 |
return img_with_boxes
|
| 127 |
|
| 128 |
# ==============================================================================
|
| 129 |
-
# Streamlit UI
|
| 130 |
# ==============================================================================
|
| 131 |
|
| 132 |
st.title("📄 Document Scanner & Table Recognizer")
|
|
@@ -136,29 +139,27 @@ uploaded_file = st.file_uploader("Choose a document image...", type=["jpg", "jpe
|
|
| 136 |
|
| 137 |
if uploaded_file is not None:
|
| 138 |
file_bytes = np.asarray(bytearray(uploaded_file.read()), dtype=np.uint8)
|
| 139 |
-
input_image = cv2.imdecode(file_bytes, 1)
|
| 140 |
-
|
| 141 |
-
|
| 142 |
st.subheader("1. Original Image")
|
| 143 |
-
|
| 144 |
-
st.image(input_image_rgb, caption="Your Uploaded Image", use_container_width=True)
|
| 145 |
|
| 146 |
with st.spinner("Processing your document... This may take a moment."):
|
| 147 |
straightened_image = find_and_straighten_document(input_image)
|
| 148 |
image_to_process = straightened_image if straightened_image is not None and straightened_image.size > 0 else input_image
|
| 149 |
final_image = correct_orientation(image_to_process)
|
| 150 |
-
final_image_rgb = cv2.cvtColor(final_image, cv2.COLOR_BGR2RGB)
|
| 151 |
|
| 152 |
-
|
|
|
|
| 153 |
|
| 154 |
st.subheader("2. Corrected Document & Detected Structure")
|
| 155 |
col1, col2 = st.columns(2)
|
| 156 |
|
| 157 |
with col1:
|
| 158 |
-
|
| 159 |
st.image(final_image_rgb, caption="Auto-Corrected & Oriented", use_container_width=True)
|
| 160 |
|
| 161 |
-
_, buf = cv2.imencode(".jpg", final_image)
|
| 162 |
st.download_button(
|
| 163 |
label="Download Clean Image",
|
| 164 |
data=buf.tobytes(),
|
|
@@ -167,5 +168,5 @@ if uploaded_file is not None:
|
|
| 167 |
)
|
| 168 |
|
| 169 |
with col2:
|
| 170 |
-
|
| 171 |
-
st.image(
|
|
|
|
| 4 |
from PIL import Image
|
| 5 |
import torch
|
| 6 |
from transformers import TableTransformerForObjectDetection, DetrImageProcessor
|
|
|
|
|
|
|
| 7 |
import pytesseract
|
|
|
|
| 8 |
from scipy.spatial import distance as dist
|
| 9 |
|
| 10 |
# ==============================================================================
|
|
|
|
| 14 |
# For Hugging Face Spaces deployment, you also need these two files:
|
| 15 |
# 1. requirements.txt (listing all Python libraries)
|
| 16 |
# 2. packages.txt (containing the line "tesseract-ocr")
|
| 17 |
+
# NOTE: With this new code, you can remove 'matplotlib' from requirements.txt
|
| 18 |
|
| 19 |
# Set Streamlit page configuration
|
| 20 |
st.set_page_config(
|
|
|
|
| 79 |
osd = pytesseract.image_to_osd(image, output_type=pytesseract.Output.DICT)
|
| 80 |
rotation = osd['rotate']
|
| 81 |
if rotation in [90, 180, 270]:
|
|
|
|
|
|
|
| 82 |
if rotation == 90:
|
| 83 |
rotated_image = cv2.rotate(image, cv2.ROTATE_90_COUNTERCLOCKWISE)
|
| 84 |
elif rotation == 180:
|
|
|
|
| 90 |
st.warning(f"OSD check failed: {e}. Returning original image.")
|
| 91 |
return image
|
| 92 |
|
| 93 |
+
# ==============================================================================
|
| 94 |
+
# NEW AND IMPROVED: Table Structure Recognition using OpenCV for Drawing
|
| 95 |
+
# ==============================================================================
|
| 96 |
def extract_and_draw_table_structure(image_bgr):
|
| 97 |
+
"""
|
| 98 |
+
Takes a BGR image, finds table structure, and returns an image with
|
| 99 |
+
bounding boxes drawn directly using OpenCV.
|
| 100 |
+
"""
|
| 101 |
+
# 1. Run model inference (same as before)
|
| 102 |
image_pil = Image.fromarray(cv2.cvtColor(image_bgr, cv2.COLOR_BGR2RGB))
|
| 103 |
inputs = processor(images=image_pil, return_tensors="pt")
|
| 104 |
|
| 105 |
with torch.no_grad():
|
| 106 |
outputs = model(**inputs)
|
| 107 |
|
|
|
|
| 108 |
target_sizes = torch.tensor([image_pil.size[::-1]])
|
| 109 |
results = processor.post_process_object_detection(outputs, threshold=0.7, target_sizes=target_sizes)[0]
|
| 110 |
|
| 111 |
+
# 2. Draw results on a copy of the original image using OpenCV
|
| 112 |
+
img_with_boxes = image_bgr.copy()
|
| 113 |
+
|
| 114 |
+
# BGR color codes for OpenCV
|
| 115 |
+
colors = {"table row": (0, 255, 0), "table column": (0, 0, 255), "table": (255, 0, 255)}
|
| 116 |
|
| 117 |
for score, label, box in zip(results["scores"], results["labels"], results["boxes"]):
|
| 118 |
class_name = model.config.id2label[label.item()]
|
| 119 |
if class_name in colors:
|
| 120 |
+
# Get box coordinates and convert to integers
|
| 121 |
+
xmin, ymin, xmax, ymax = [int(val) for val in box.tolist()]
|
| 122 |
+
|
| 123 |
+
# Get color for the class
|
| 124 |
+
color = colors[class_name]
|
| 125 |
+
|
| 126 |
+
# Draw rectangle on the image
|
| 127 |
+
cv2.rectangle(img_with_boxes, (xmin, ymin), (xmax, ymax), color, 2)
|
| 128 |
+
|
| 129 |
return img_with_boxes
|
| 130 |
|
| 131 |
# ==============================================================================
|
| 132 |
+
# Streamlit UI (Unchanged)
|
| 133 |
# ==============================================================================
|
| 134 |
|
| 135 |
st.title("📄 Document Scanner & Table Recognizer")
|
|
|
|
| 139 |
|
| 140 |
if uploaded_file is not None:
|
| 141 |
file_bytes = np.asarray(bytearray(uploaded_file.read()), dtype=np.uint8)
|
| 142 |
+
input_image = cv2.imdecode(file_bytes, 1) # 1 = COLOR_UNCHANGED
|
| 143 |
+
|
|
|
|
| 144 |
st.subheader("1. Original Image")
|
| 145 |
+
st.image(cv2.cvtColor(input_image, cv2.COLOR_BGR2RGB), caption="Your Uploaded Image", use_container_width=True)
|
|
|
|
| 146 |
|
| 147 |
with st.spinner("Processing your document... This may take a moment."):
|
| 148 |
straightened_image = find_and_straighten_document(input_image)
|
| 149 |
image_to_process = straightened_image if straightened_image is not None and straightened_image.size > 0 else input_image
|
| 150 |
final_image = correct_orientation(image_to_process)
|
|
|
|
| 151 |
|
| 152 |
+
# This now returns a BGR image from OpenCV
|
| 153 |
+
image_with_structure_bgr = extract_and_draw_table_structure(final_image)
|
| 154 |
|
| 155 |
st.subheader("2. Corrected Document & Detected Structure")
|
| 156 |
col1, col2 = st.columns(2)
|
| 157 |
|
| 158 |
with col1:
|
| 159 |
+
final_image_rgb = cv2.cvtColor(final_image, cv2.COLOR_BGR2RGB)
|
| 160 |
st.image(final_image_rgb, caption="Auto-Corrected & Oriented", use_container_width=True)
|
| 161 |
|
| 162 |
+
_, buf = cv2.imencode(".jpg", final_image) # Use the BGR image for encoding
|
| 163 |
st.download_button(
|
| 164 |
label="Download Clean Image",
|
| 165 |
data=buf.tobytes(),
|
|
|
|
| 168 |
)
|
| 169 |
|
| 170 |
with col2:
|
| 171 |
+
image_with_structure_rgb = cv2.cvtColor(image_with_structure_bgr, cv2.COLOR_BGR2RGB)
|
| 172 |
+
st.image(image_with_structure_rgb, caption="Detected Table Structure (Rows: Green, Columns: Red)", use_container_width=True)
|