omarelsayeed
commited on
Commit
•
14db543
1
Parent(s):
b3157e9
Update app.py
Browse files
app.py
CHANGED
@@ -54,7 +54,7 @@ def parse_logits(logits: torch.Tensor, length: int) -> List[int]:
|
|
54 |
return ret
|
55 |
|
56 |
def get_orders(image_path, boxes):
|
57 |
-
|
58 |
inputs = boxes2inputs(boxes)
|
59 |
inputs = {k: v.to(layout_model.device) for k, v in inputs.items()} # Move inputs to model device
|
60 |
logits = layout_model(**inputs).logits.cpu().squeeze(0) # Perform inference and get logits
|
@@ -111,7 +111,7 @@ def draw_bboxes_on_image(image_path, bboxes, classes, reading_order):
|
|
111 |
}
|
112 |
|
113 |
# Open the image using PIL
|
114 |
-
image = image_path
|
115 |
|
116 |
# Prepare to draw on the image
|
117 |
draw = ImageDraw.Draw(image)
|
@@ -157,6 +157,50 @@ def draw_bboxes_on_image(image_path, bboxes, classes, reading_order):
|
|
157 |
|
158 |
# Return the modified image as a PIL image object
|
159 |
return image
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
160 |
from PIL import Image, ImageDraw
|
161 |
|
162 |
def is_inside(box1, box2):
|
@@ -199,6 +243,8 @@ def remove_overlapping_and_inside_boxes(boxes, classes):
|
|
199 |
del classes[idx]
|
200 |
|
201 |
return boxes, classes
|
|
|
|
|
202 |
def full_predictions(IMAGE_PATH, conf_threshold, iou_threshold):
|
203 |
bboxes, classes = detect_layout(IMAGE_PATH, conf_threshold, iou_threshold)
|
204 |
bboxes, classes = remove_overlapping_and_inside_boxes(bboxes, classes)
|
@@ -207,6 +253,7 @@ def full_predictions(IMAGE_PATH, conf_threshold, iou_threshold):
|
|
207 |
return final_image
|
208 |
|
209 |
|
|
|
210 |
iface = gr.Interface(
|
211 |
fn=full_predictions,
|
212 |
inputs=[
|
|
|
54 |
return ret
|
55 |
|
56 |
def get_orders(image_path, boxes):
|
57 |
+
boxes = scale_and_normalize_boxes(boxes)
|
58 |
inputs = boxes2inputs(boxes)
|
59 |
inputs = {k: v.to(layout_model.device) for k, v in inputs.items()} # Move inputs to model device
|
60 |
logits = layout_model(**inputs).logits.cpu().squeeze(0) # Perform inference and get logits
|
|
|
111 |
}
|
112 |
|
113 |
# Open the image using PIL
|
114 |
+
image = Image.open(image_path)
|
115 |
|
116 |
# Prepare to draw on the image
|
117 |
draw = ImageDraw.Draw(image)
|
|
|
157 |
|
158 |
# Return the modified image as a PIL image object
|
159 |
return image
|
160 |
+
|
161 |
+
|
162 |
+
def scale_and_normalize_boxes(bboxes, old_width = 1024, old_height= 1024, new_width=640, new_height=640, normalize_width=1000, normalize_height=1000):
|
163 |
+
"""
|
164 |
+
Scales and normalizes bounding boxes from original dimensions to new dimensions.
|
165 |
+
|
166 |
+
Args:
|
167 |
+
bboxes (list of lists): List of bounding boxes in [x_min, y_min, x_max, y_max] format.
|
168 |
+
old_width (int or float): Width of the original image.
|
169 |
+
old_height (int or float): Height of the original image.
|
170 |
+
new_width (int or float): Width of the scaled image.
|
171 |
+
new_height (int or float): Height of the scaled image.
|
172 |
+
normalize_width (int or float): Width of the normalization range (e.g., target resolution width).
|
173 |
+
normalize_height (int or float): Height of the normalization range (e.g., target resolution height).
|
174 |
+
|
175 |
+
Returns:
|
176 |
+
list of lists: Scaled and normalized bounding boxes in [x_min, y_min, x_max, y_max] format.
|
177 |
+
"""
|
178 |
+
scale_x = new_width / old_width
|
179 |
+
scale_y = new_height / old_height
|
180 |
+
|
181 |
+
def scale_and_normalize_single(bbox):
|
182 |
+
# Extract coordinates
|
183 |
+
x_min, y_min, x_max, y_max = bbox
|
184 |
+
|
185 |
+
# Scale to new dimensions
|
186 |
+
x_min *= scale_x
|
187 |
+
y_min *= scale_y
|
188 |
+
x_max *= scale_x
|
189 |
+
y_max *= scale_y
|
190 |
+
|
191 |
+
# Normalize to the target range
|
192 |
+
x_min = int(normalize_width * (x_min / new_width))
|
193 |
+
y_min = int(normalize_height * (y_min / new_height))
|
194 |
+
x_max = int(normalize_width * (x_max / new_width))
|
195 |
+
y_max = int(normalize_height * (y_max / new_height))
|
196 |
+
|
197 |
+
return [x_min, y_min, x_max, y_max]
|
198 |
+
|
199 |
+
# Process all bounding boxes
|
200 |
+
return [scale_and_normalize_single(bbox) for bbox in bboxes]
|
201 |
+
|
202 |
+
|
203 |
+
|
204 |
from PIL import Image, ImageDraw
|
205 |
|
206 |
def is_inside(box1, box2):
|
|
|
243 |
del classes[idx]
|
244 |
|
245 |
return boxes, classes
|
246 |
+
|
247 |
+
|
248 |
def full_predictions(IMAGE_PATH, conf_threshold, iou_threshold):
|
249 |
bboxes, classes = detect_layout(IMAGE_PATH, conf_threshold, iou_threshold)
|
250 |
bboxes, classes = remove_overlapping_and_inside_boxes(bboxes, classes)
|
|
|
253 |
return final_image
|
254 |
|
255 |
|
256 |
+
|
257 |
iface = gr.Interface(
|
258 |
fn=full_predictions,
|
259 |
inputs=[
|