Spaces:
Running
Running
Abdur Rahman
commited on
Commit
•
af39285
1
Parent(s):
56742cc
Migrating from Kraken to YoloV8 (Finetuned on UrduDoc)
Browse files- 2.jpg +2 -2
- 3.jpg +2 -2
- app.py +9 -6
- modules/cnn/__pycache__/unet.cpython-310.pyc +0 -0
- requirements.txt +1 -1
2.jpg
CHANGED
Git LFS Details
|
Git LFS Details
|
3.jpg
CHANGED
Git LFS Details
|
Git LFS Details
|
app.py
CHANGED
@@ -3,8 +3,7 @@ import gradio as gr
|
|
3 |
from read import text_recognizer
|
4 |
from model import Model
|
5 |
from utils import CTCLabelConverter
|
6 |
-
from
|
7 |
-
from kraken import pageseg as detection_model
|
8 |
from PIL import ImageDraw
|
9 |
|
10 |
""" vocab / character number configuration """
|
@@ -16,24 +15,28 @@ content = content+" "
|
|
16 |
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
17 |
converter = CTCLabelConverter(content)
|
18 |
recognition_model = Model(num_class=len(converter.character), device=device)
|
19 |
-
|
20 |
recognition_model.load_state_dict(torch.load("best_norm_ED.pth", map_location=device))
|
21 |
recognition_model.eval()
|
22 |
|
|
|
|
|
23 |
examples = ["1.jpg","2.jpg","3.jpg"]
|
24 |
|
25 |
input = gr.Image(type="pil",image_mode="RGB", label="Input Image")
|
26 |
|
27 |
def predict(input):
|
28 |
"Line Detection"
|
29 |
-
|
30 |
-
bounding_boxes =
|
31 |
bounding_boxes.sort(key=lambda x: x[1])
|
32 |
|
33 |
"Draw the bounding boxes"
|
34 |
draw = ImageDraw.Draw(input)
|
35 |
for box in bounding_boxes:
|
36 |
-
draw
|
|
|
|
|
37 |
|
38 |
"Crop the detected lines"
|
39 |
cropped_images = []
|
|
|
3 |
from read import text_recognizer
|
4 |
from model import Model
|
5 |
from utils import CTCLabelConverter
|
6 |
+
from ultralytics import YOLO
|
|
|
7 |
from PIL import ImageDraw
|
8 |
|
9 |
""" vocab / character number configuration """
|
|
|
15 |
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
16 |
converter = CTCLabelConverter(content)
|
17 |
recognition_model = Model(num_class=len(converter.character), device=device)
|
18 |
+
recognition_model = recognition_model.to(device)
|
19 |
recognition_model.load_state_dict(torch.load("best_norm_ED.pth", map_location=device))
|
20 |
recognition_model.eval()
|
21 |
|
22 |
+
detection_model = YOLO("yolov8m_UrduDoc.pt")
|
23 |
+
|
24 |
examples = ["1.jpg","2.jpg","3.jpg"]
|
25 |
|
26 |
input = gr.Image(type="pil",image_mode="RGB", label="Input Image")
|
27 |
|
28 |
def predict(input):
|
29 |
"Line Detection"
|
30 |
+
detection_results = detection_model.predict(source=input, conf=0.2, imgsz=1280, save=False, nms=True, device=device)
|
31 |
+
bounding_boxes = detection_results[0].boxes.xyxy.cpu().numpy().tolist()
|
32 |
bounding_boxes.sort(key=lambda x: x[1])
|
33 |
|
34 |
"Draw the bounding boxes"
|
35 |
draw = ImageDraw.Draw(input)
|
36 |
for box in bounding_boxes:
|
37 |
+
# draw rectangle outline with random color and width=5
|
38 |
+
from numpy import random
|
39 |
+
draw.rectangle(box, fill=None, outline=tuple(random.randint(0,255,3)), width=5)
|
40 |
|
41 |
"Crop the detected lines"
|
42 |
cropped_images = []
|
modules/cnn/__pycache__/unet.cpython-310.pyc
CHANGED
Binary files a/modules/cnn/__pycache__/unet.cpython-310.pyc and b/modules/cnn/__pycache__/unet.cpython-310.pyc differ
|
|
requirements.txt
CHANGED
@@ -8,6 +8,6 @@ pillow==10.2.0
|
|
8 |
tqdm==4.66.1
|
9 |
opencv-python==4.9.0.80
|
10 |
opencv-contrib-python==4.9.0.80
|
11 |
-
|
12 |
gradio==4.16.0
|
13 |
spaces==0.22.0
|
|
|
8 |
tqdm==4.66.1
|
9 |
opencv-python==4.9.0.80
|
10 |
opencv-contrib-python==4.9.0.80
|
11 |
+
ultralytics==8.1.8
|
12 |
gradio==4.16.0
|
13 |
spaces==0.22.0
|