omarelsayeed commited on
Commit
14db543
1 Parent(s): b3157e9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +49 -2
app.py CHANGED
@@ -54,7 +54,7 @@ def parse_logits(logits: torch.Tensor, length: int) -> List[int]:
54
  return ret
55
 
56
  def get_orders(image_path, boxes):
57
- print(boxes)
58
  inputs = boxes2inputs(boxes)
59
  inputs = {k: v.to(layout_model.device) for k, v in inputs.items()} # Move inputs to model device
60
  logits = layout_model(**inputs).logits.cpu().squeeze(0) # Perform inference and get logits
@@ -111,7 +111,7 @@ def draw_bboxes_on_image(image_path, bboxes, classes, reading_order):
111
  }
112
 
113
  # Open the image using PIL
114
- image = image_path
115
 
116
  # Prepare to draw on the image
117
  draw = ImageDraw.Draw(image)
@@ -157,6 +157,50 @@ def draw_bboxes_on_image(image_path, bboxes, classes, reading_order):
157
 
158
  # Return the modified image as a PIL image object
159
  return image
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
160
  from PIL import Image, ImageDraw
161
 
162
  def is_inside(box1, box2):
@@ -199,6 +243,8 @@ def remove_overlapping_and_inside_boxes(boxes, classes):
199
  del classes[idx]
200
 
201
  return boxes, classes
 
 
202
  def full_predictions(IMAGE_PATH, conf_threshold, iou_threshold):
203
  bboxes, classes = detect_layout(IMAGE_PATH, conf_threshold, iou_threshold)
204
  bboxes, classes = remove_overlapping_and_inside_boxes(bboxes, classes)
@@ -207,6 +253,7 @@ def full_predictions(IMAGE_PATH, conf_threshold, iou_threshold):
207
  return final_image
208
 
209
 
 
210
  iface = gr.Interface(
211
  fn=full_predictions,
212
  inputs=[
 
54
  return ret
55
 
56
  def get_orders(image_path, boxes):
57
+ boxes = scale_and_normalize_boxes(boxes)
58
  inputs = boxes2inputs(boxes)
59
  inputs = {k: v.to(layout_model.device) for k, v in inputs.items()} # Move inputs to model device
60
  logits = layout_model(**inputs).logits.cpu().squeeze(0) # Perform inference and get logits
 
111
  }
112
 
113
  # Open the image using PIL
114
+ image = Image.open(image_path)
115
 
116
  # Prepare to draw on the image
117
  draw = ImageDraw.Draw(image)
 
157
 
158
  # Return the modified image as a PIL image object
159
  return image
160
+
161
+
162
+ def scale_and_normalize_boxes(bboxes, old_width = 1024, old_height= 1024, new_width=640, new_height=640, normalize_width=1000, normalize_height=1000):
163
+ """
164
+ Scales and normalizes bounding boxes from original dimensions to new dimensions.
165
+
166
+ Args:
167
+ bboxes (list of lists): List of bounding boxes in [x_min, y_min, x_max, y_max] format.
168
+ old_width (int or float): Width of the original image.
169
+ old_height (int or float): Height of the original image.
170
+ new_width (int or float): Width of the scaled image.
171
+ new_height (int or float): Height of the scaled image.
172
+ normalize_width (int or float): Width of the normalization range (e.g., target resolution width).
173
+ normalize_height (int or float): Height of the normalization range (e.g., target resolution height).
174
+
175
+ Returns:
176
+ list of lists: Scaled and normalized bounding boxes in [x_min, y_min, x_max, y_max] format.
177
+ """
178
+ scale_x = new_width / old_width
179
+ scale_y = new_height / old_height
180
+
181
+ def scale_and_normalize_single(bbox):
182
+ # Extract coordinates
183
+ x_min, y_min, x_max, y_max = bbox
184
+
185
+ # Scale to new dimensions
186
+ x_min *= scale_x
187
+ y_min *= scale_y
188
+ x_max *= scale_x
189
+ y_max *= scale_y
190
+
191
+ # Normalize to the target range
192
+ x_min = int(normalize_width * (x_min / new_width))
193
+ y_min = int(normalize_height * (y_min / new_height))
194
+ x_max = int(normalize_width * (x_max / new_width))
195
+ y_max = int(normalize_height * (y_max / new_height))
196
+
197
+ return [x_min, y_min, x_max, y_max]
198
+
199
+ # Process all bounding boxes
200
+ return [scale_and_normalize_single(bbox) for bbox in bboxes]
201
+
202
+
203
+
204
  from PIL import Image, ImageDraw
205
 
206
  def is_inside(box1, box2):
 
243
  del classes[idx]
244
 
245
  return boxes, classes
246
+
247
+
248
  def full_predictions(IMAGE_PATH, conf_threshold, iou_threshold):
249
  bboxes, classes = detect_layout(IMAGE_PATH, conf_threshold, iou_threshold)
250
  bboxes, classes = remove_overlapping_and_inside_boxes(bboxes, classes)
 
253
  return final_image
254
 
255
 
256
+
257
  iface = gr.Interface(
258
  fn=full_predictions,
259
  inputs=[