Knightmovies commited on
Commit
2d92b09
·
verified ·
1 Parent(s): 89243d2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +33 -32
app.py CHANGED
@@ -4,10 +4,7 @@ import numpy as np
4
  from PIL import Image
5
  import torch
6
  from transformers import TableTransformerForObjectDetection, DetrImageProcessor
7
- import matplotlib.pyplot as plt
8
- import matplotlib.patches as patches
9
  import pytesseract
10
- import re
11
  from scipy.spatial import distance as dist
12
 
13
  # ==============================================================================
@@ -17,6 +14,7 @@ from scipy.spatial import distance as dist
17
  # For Hugging Face Spaces deployment, you also need these two files:
18
  # 1. requirements.txt (listing all Python libraries)
19
  # 2. packages.txt (containing the line "tesseract-ocr")
 
20
 
21
  # Set Streamlit page configuration
22
  st.set_page_config(
@@ -81,8 +79,6 @@ def correct_orientation(image):
81
  osd = pytesseract.image_to_osd(image, output_type=pytesseract.Output.DICT)
82
  rotation = osd['rotate']
83
  if rotation in [90, 180, 270]:
84
- # The rotation values from Tesseract are counter-clockwise.
85
- # OpenCV's rotation constants are clockwise. We need to map them correctly.
86
  if rotation == 90:
87
  rotated_image = cv2.rotate(image, cv2.ROTATE_90_COUNTERCLOCKWISE)
88
  elif rotation == 180:
@@ -94,39 +90,46 @@ def correct_orientation(image):
94
  st.warning(f"OSD check failed: {e}. Returning original image.")
95
  return image
96
 
 
 
 
97
  def extract_and_draw_table_structure(image_bgr):
98
- """Takes a BGR image, finds table structure, and returns an image with boxes."""
 
 
 
 
99
  image_pil = Image.fromarray(cv2.cvtColor(image_bgr, cv2.COLOR_BGR2RGB))
100
  inputs = processor(images=image_pil, return_tensors="pt")
101
 
102
  with torch.no_grad():
103
  outputs = model(**inputs)
104
 
105
- width, height = image_pil.size
106
  target_sizes = torch.tensor([image_pil.size[::-1]])
107
  results = processor.post_process_object_detection(outputs, threshold=0.7, target_sizes=target_sizes)[0]
108
 
109
- fig, ax = plt.subplots(1, figsize=(width / 100, height / 100), dpi=100)
110
- ax.imshow(image_pil)
111
- ax.axis('off')
112
- colors = {"table row": "green", "table column": "red", "table": "magenta"}
 
113
 
114
  for score, label, box in zip(results["scores"], results["labels"], results["boxes"]):
115
  class_name = model.config.id2label[label.item()]
116
  if class_name in colors:
117
- xmin, ymin, xmax, ymax = box
118
- rect = patches.Rectangle((xmin, ymin), xmax - xmin, ymax - ymin, linewidth=1.5, edgecolor=colors[class_name], facecolor='none')
119
- ax.add_patch(rect)
120
-
121
- fig.canvas.draw()
122
- # FIX: Replaced deprecated 'tostring_rgb' with 'tobytes_rgb'
123
- img_with_boxes = np.frombuffer(fig.canvas.tobytes_rgb(), dtype=np.uint8)
124
- img_with_boxes = img_with_boxes.reshape(fig.canvas.get_width_height()[::-1] + (3,))
125
- plt.close(fig)
126
  return img_with_boxes
127
 
128
  # ==============================================================================
129
- # Streamlit UI
130
  # ==============================================================================
131
 
132
  st.title("📄 Document Scanner & Table Recognizer")
@@ -136,29 +139,27 @@ uploaded_file = st.file_uploader("Choose a document image...", type=["jpg", "jpe
136
 
137
  if uploaded_file is not None:
138
  file_bytes = np.asarray(bytearray(uploaded_file.read()), dtype=np.uint8)
139
- input_image = cv2.imdecode(file_bytes, 1)
140
- input_image_rgb = cv2.cvtColor(input_image, cv2.COLOR_BGR2RGB)
141
-
142
  st.subheader("1. Original Image")
143
- # FIX: Replaced deprecated 'use_column_width' with 'use_container_width'
144
- st.image(input_image_rgb, caption="Your Uploaded Image", use_container_width=True)
145
 
146
  with st.spinner("Processing your document... This may take a moment."):
147
  straightened_image = find_and_straighten_document(input_image)
148
  image_to_process = straightened_image if straightened_image is not None and straightened_image.size > 0 else input_image
149
  final_image = correct_orientation(image_to_process)
150
- final_image_rgb = cv2.cvtColor(final_image, cv2.COLOR_BGR2RGB)
151
 
152
- image_with_structure = extract_and_draw_table_structure(final_image)
 
153
 
154
  st.subheader("2. Corrected Document & Detected Structure")
155
  col1, col2 = st.columns(2)
156
 
157
  with col1:
158
- # FIX: Replaced deprecated 'use_column_width' with 'use_container_width'
159
  st.image(final_image_rgb, caption="Auto-Corrected & Oriented", use_container_width=True)
160
 
161
- _, buf = cv2.imencode(".jpg", final_image)
162
  st.download_button(
163
  label="Download Clean Image",
164
  data=buf.tobytes(),
@@ -167,5 +168,5 @@ if uploaded_file is not None:
167
  )
168
 
169
  with col2:
170
- # FIX: Replaced deprecated 'use_column_width' with 'use_container_width'
171
- st.image(image_with_structure, caption="Detected Table Structure (Rows: Green, Columns: Red)", use_container_width=True)
 
4
  from PIL import Image
5
  import torch
6
  from transformers import TableTransformerForObjectDetection, DetrImageProcessor
 
 
7
  import pytesseract
 
8
  from scipy.spatial import distance as dist
9
 
10
  # ==============================================================================
 
14
  # For Hugging Face Spaces deployment, you also need these two files:
15
  # 1. requirements.txt (listing all Python libraries)
16
  # 2. packages.txt (containing the line "tesseract-ocr")
17
+ # NOTE: With this new code, you can remove 'matplotlib' from requirements.txt
18
 
19
  # Set Streamlit page configuration
20
  st.set_page_config(
 
79
  osd = pytesseract.image_to_osd(image, output_type=pytesseract.Output.DICT)
80
  rotation = osd['rotate']
81
  if rotation in [90, 180, 270]:
 
 
82
  if rotation == 90:
83
  rotated_image = cv2.rotate(image, cv2.ROTATE_90_COUNTERCLOCKWISE)
84
  elif rotation == 180:
 
90
  st.warning(f"OSD check failed: {e}. Returning original image.")
91
  return image
92
 
93
+ # ==============================================================================
94
+ # NEW AND IMPROVED: Table Structure Recognition using OpenCV for Drawing
95
+ # ==============================================================================
96
  def extract_and_draw_table_structure(image_bgr):
97
+ """
98
+ Takes a BGR image, finds table structure, and returns an image with
99
+ bounding boxes drawn directly using OpenCV.
100
+ """
101
+ # 1. Run model inference (same as before)
102
  image_pil = Image.fromarray(cv2.cvtColor(image_bgr, cv2.COLOR_BGR2RGB))
103
  inputs = processor(images=image_pil, return_tensors="pt")
104
 
105
  with torch.no_grad():
106
  outputs = model(**inputs)
107
 
 
108
  target_sizes = torch.tensor([image_pil.size[::-1]])
109
  results = processor.post_process_object_detection(outputs, threshold=0.7, target_sizes=target_sizes)[0]
110
 
111
+ # 2. Draw results on a copy of the original image using OpenCV
112
+ img_with_boxes = image_bgr.copy()
113
+
114
+ # BGR color codes for OpenCV
115
+ colors = {"table row": (0, 255, 0), "table column": (0, 0, 255), "table": (255, 0, 255)}
116
 
117
  for score, label, box in zip(results["scores"], results["labels"], results["boxes"]):
118
  class_name = model.config.id2label[label.item()]
119
  if class_name in colors:
120
+ # Get box coordinates and convert to integers
121
+ xmin, ymin, xmax, ymax = [int(val) for val in box.tolist()]
122
+
123
+ # Get color for the class
124
+ color = colors[class_name]
125
+
126
+ # Draw rectangle on the image
127
+ cv2.rectangle(img_with_boxes, (xmin, ymin), (xmax, ymax), color, 2)
128
+
129
  return img_with_boxes
130
 
131
  # ==============================================================================
132
+ # Streamlit UI (Unchanged)
133
  # ==============================================================================
134
 
135
  st.title("📄 Document Scanner & Table Recognizer")
 
139
 
140
  if uploaded_file is not None:
141
  file_bytes = np.asarray(bytearray(uploaded_file.read()), dtype=np.uint8)
142
+ input_image = cv2.imdecode(file_bytes, 1) # 1 = COLOR_UNCHANGED
143
+
 
144
  st.subheader("1. Original Image")
145
+ st.image(cv2.cvtColor(input_image, cv2.COLOR_BGR2RGB), caption="Your Uploaded Image", use_container_width=True)
 
146
 
147
  with st.spinner("Processing your document... This may take a moment."):
148
  straightened_image = find_and_straighten_document(input_image)
149
  image_to_process = straightened_image if straightened_image is not None and straightened_image.size > 0 else input_image
150
  final_image = correct_orientation(image_to_process)
 
151
 
152
+ # This now returns a BGR image from OpenCV
153
+ image_with_structure_bgr = extract_and_draw_table_structure(final_image)
154
 
155
  st.subheader("2. Corrected Document & Detected Structure")
156
  col1, col2 = st.columns(2)
157
 
158
  with col1:
159
+ final_image_rgb = cv2.cvtColor(final_image, cv2.COLOR_BGR2RGB)
160
  st.image(final_image_rgb, caption="Auto-Corrected & Oriented", use_container_width=True)
161
 
162
+ _, buf = cv2.imencode(".jpg", final_image) # Use the BGR image for encoding
163
  st.download_button(
164
  label="Download Clean Image",
165
  data=buf.tobytes(),
 
168
  )
169
 
170
  with col2:
171
+ image_with_structure_rgb = cv2.cvtColor(image_with_structure_bgr, cv2.COLOR_BGR2RGB)
172
+ st.image(image_with_structure_rgb, caption="Detected Table Structure (Rows: Green, Columns: Red)", use_container_width=True)