Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1,19 +1,22 @@
|
|
1 |
import gradio as gr
|
2 |
import numpy as np
|
3 |
-
|
4 |
-
from PIL import Image
|
5 |
-
from ultralytics import YOLOv10
|
6 |
import spaces
|
|
|
|
|
7 |
# Load the trained model
|
|
|
8 |
model = YOLOv10("best.pt")
|
9 |
|
|
|
10 |
# Define the class indices for figures and tables
|
11 |
figure_class_index = 3 # class index for figures
|
12 |
table_class_index = 4 # class index for tables
|
13 |
|
14 |
# Function to perform inference on an image and return bounding boxes for figures and tables
|
|
|
15 |
def infer_image_and_get_boxes(image, confidence_threshold=0.6):
|
16 |
-
results = model.predict(
|
17 |
boxes = [
|
18 |
(int(box.xyxy[0][0]), int(box.xyxy[0][1]), int(box.xyxy[0][2]), int(box.xyxy[0][3]))
|
19 |
for result in results for box in result.boxes
|
@@ -22,39 +25,46 @@ def infer_image_and_get_boxes(image, confidence_threshold=0.6):
|
|
22 |
return boxes
|
23 |
|
24 |
# Function to crop images from the boxes
|
|
|
25 |
def crop_images_from_boxes(image, boxes, scale_factor):
|
26 |
cropped_images = [
|
27 |
-
image
|
28 |
for (x1, y1, x2, y2) in boxes
|
29 |
]
|
30 |
return cropped_images
|
31 |
|
32 |
@spaces.GPU
|
33 |
def process_pdf(pdf_file):
|
|
|
|
|
34 |
all_cropped_images = []
|
35 |
|
36 |
# Set the DPI for inference and high resolution for cropping
|
37 |
low_dpi = 50
|
38 |
high_dpi = 300
|
39 |
|
40 |
-
# Convert PDF pages to images at low DPI
|
41 |
-
low_res_images = convert_from_path(pdf_file.name, dpi=low_dpi)
|
42 |
-
|
43 |
# Calculate the scaling factor
|
44 |
scale_factor = high_dpi / low_dpi
|
45 |
|
46 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
47 |
# Get bounding boxes from low DPI image
|
48 |
boxes = infer_image_and_get_boxes(low_res_img)
|
49 |
|
50 |
if boxes:
|
51 |
-
#
|
52 |
-
|
|
|
53 |
|
54 |
# Crop images at high DPI
|
55 |
cropped_imgs = crop_images_from_boxes(high_res_img, boxes, scale_factor)
|
56 |
all_cropped_images.extend(cropped_imgs)
|
57 |
-
|
58 |
return all_cropped_images
|
59 |
|
60 |
# Create Gradio interface
|
@@ -69,3 +79,4 @@ iface = gr.Interface(
|
|
69 |
# Launch the app
|
70 |
iface.launch()
|
71 |
|
|
|
|
1 |
import gradio as gr
|
2 |
import numpy as np
|
3 |
+
import fitz # PyMuPDF
|
|
|
|
|
4 |
import spaces
|
5 |
+
from ultralytics import YOLOv10
|
6 |
+
|
7 |
# Load the trained model
|
8 |
+
|
9 |
model = YOLOv10("best.pt")
|
10 |
|
11 |
+
|
12 |
# Define the class indices for figures and tables
|
13 |
figure_class_index = 3 # class index for figures
|
14 |
table_class_index = 4 # class index for tables
|
15 |
|
16 |
# Function to perform inference on an image and return bounding boxes for figures and tables
|
17 |
+
|
18 |
def infer_image_and_get_boxes(image, confidence_threshold=0.6):
|
19 |
+
results = model.predict(image)
|
20 |
boxes = [
|
21 |
(int(box.xyxy[0][0]), int(box.xyxy[0][1]), int(box.xyxy[0][2]), int(box.xyxy[0][3]))
|
22 |
for result in results for box in result.boxes
|
|
|
25 |
return boxes
|
26 |
|
27 |
# Function to crop images from the boxes
|
28 |
+
|
29 |
def crop_images_from_boxes(image, boxes, scale_factor):
|
30 |
cropped_images = [
|
31 |
+
image[int(y1 * scale_factor):int(y2 * scale_factor), int(x1 * scale_factor):int(x2 * scale_factor)]
|
32 |
for (x1, y1, x2, y2) in boxes
|
33 |
]
|
34 |
return cropped_images
|
35 |
|
36 |
@spaces.GPU
|
37 |
def process_pdf(pdf_file):
|
38 |
+
# Open the PDF file
|
39 |
+
doc = fitz.open(pdf_file)
|
40 |
all_cropped_images = []
|
41 |
|
42 |
# Set the DPI for inference and high resolution for cropping
|
43 |
low_dpi = 50
|
44 |
high_dpi = 300
|
45 |
|
|
|
|
|
|
|
46 |
# Calculate the scaling factor
|
47 |
scale_factor = high_dpi / low_dpi
|
48 |
|
49 |
+
# Pre-cache all page pixmaps at low DPI
|
50 |
+
low_res_pixmaps = [page.get_pixmap(dpi=low_dpi) for page in doc]
|
51 |
+
|
52 |
+
# Loop through each page
|
53 |
+
for page_num, low_res_pix in enumerate(low_res_pixmaps):
|
54 |
+
low_res_img = np.frombuffer(low_res_pix.samples, dtype=np.uint8).reshape(low_res_pix.height, low_res_pix.width, 3)
|
55 |
+
|
56 |
# Get bounding boxes from low DPI image
|
57 |
boxes = infer_image_and_get_boxes(low_res_img)
|
58 |
|
59 |
if boxes:
|
60 |
+
# Load high DPI image for cropping only if boxes are found
|
61 |
+
high_res_pix = doc[page_num].get_pixmap(dpi=high_dpi)
|
62 |
+
high_res_img = np.frombuffer(high_res_pix.samples, dtype=np.uint8).reshape(high_res_pix.height, high_res_pix.width, 3)
|
63 |
|
64 |
# Crop images at high DPI
|
65 |
cropped_imgs = crop_images_from_boxes(high_res_img, boxes, scale_factor)
|
66 |
all_cropped_images.extend(cropped_imgs)
|
67 |
+
|
68 |
return all_cropped_images
|
69 |
|
70 |
# Create Gradio interface
|
|
|
79 |
# Launch the app
|
80 |
iface.launch()
|
81 |
|
82 |
+
|