Abdur Rahman commited on
Commit
af39285
1 Parent(s): 56742cc

Migrating from Kraken to YoloV8 (Finetuned on UrduDoc)

Browse files
Files changed (5) hide show
  1. 2.jpg +2 -2
  2. 3.jpg +2 -2
  3. app.py +9 -6
  4. modules/cnn/__pycache__/unet.cpython-310.pyc +0 -0
  5. requirements.txt +1 -1
2.jpg CHANGED

Git LFS Details

  • SHA256: 76307f1c49fb5f488fbb2e01f3fc2160044d7ec82d9edf62948fd7a84441e574
  • Pointer size: 132 Bytes
  • Size of remote file: 2.55 MB

Git LFS Details

  • SHA256: ebef0d7f3abdf9aa57dcfd7a092a77f599ac1605a63ca6cd458dd4a90d70d29c
  • Pointer size: 132 Bytes
  • Size of remote file: 2.54 MB
3.jpg CHANGED

Git LFS Details

  • SHA256: d53124c728fcb3f52f6325b03389ca37511430c4af001316940e1cb722b6f780
  • Pointer size: 132 Bytes
  • Size of remote file: 2.34 MB

Git LFS Details

  • SHA256: 1cf825c533e47402cb842551c99ad509de032edddc772a1b7210fc6f44c12ea4
  • Pointer size: 132 Bytes
  • Size of remote file: 2.38 MB
app.py CHANGED
@@ -3,8 +3,7 @@ import gradio as gr
3
  from read import text_recognizer
4
  from model import Model
5
  from utils import CTCLabelConverter
6
- from kraken import binarization
7
- from kraken import pageseg as detection_model
8
  from PIL import ImageDraw
9
 
10
  """ vocab / character number configuration """
@@ -16,24 +15,28 @@ content = content+" "
16
  device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
17
  converter = CTCLabelConverter(content)
18
  recognition_model = Model(num_class=len(converter.character), device=device)
19
- modrecognition_modelel = recognition_model.to(device)
20
  recognition_model.load_state_dict(torch.load("best_norm_ED.pth", map_location=device))
21
  recognition_model.eval()
22
 
 
 
23
  examples = ["1.jpg","2.jpg","3.jpg"]
24
 
25
  input = gr.Image(type="pil",image_mode="RGB", label="Input Image")
26
 
27
  def predict(input):
28
  "Line Detection"
29
- bw_input = binarization.nlbin(input)
30
- bounding_boxes = detection_model.segment(bw_input)['boxes']
31
  bounding_boxes.sort(key=lambda x: x[1])
32
 
33
  "Draw the bounding boxes"
34
  draw = ImageDraw.Draw(input)
35
  for box in bounding_boxes:
36
- draw.rectangle(box, outline='red', width=3)
 
 
37
 
38
  "Crop the detected lines"
39
  cropped_images = []
 
3
  from read import text_recognizer
4
  from model import Model
5
  from utils import CTCLabelConverter
6
+ from ultralytics import YOLO
 
7
  from PIL import ImageDraw
8
 
9
  """ vocab / character number configuration """
 
15
  device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
16
  converter = CTCLabelConverter(content)
17
  recognition_model = Model(num_class=len(converter.character), device=device)
18
+ recognition_model = recognition_model.to(device)
19
  recognition_model.load_state_dict(torch.load("best_norm_ED.pth", map_location=device))
20
  recognition_model.eval()
21
 
22
+ detection_model = YOLO("yolov8m_UrduDoc.pt")
23
+
24
  examples = ["1.jpg","2.jpg","3.jpg"]
25
 
26
  input = gr.Image(type="pil",image_mode="RGB", label="Input Image")
27
 
28
  def predict(input):
29
  "Line Detection"
30
+ detection_results = detection_model.predict(source=input, conf=0.2, imgsz=1280, save=False, nms=True, device=device)
31
+ bounding_boxes = detection_results[0].boxes.xyxy.cpu().numpy().tolist()
32
  bounding_boxes.sort(key=lambda x: x[1])
33
 
34
  "Draw the bounding boxes"
35
  draw = ImageDraw.Draw(input)
36
  for box in bounding_boxes:
37
+ # draw rectangle outline with random color and width=5
38
+ from numpy import random
39
+ draw.rectangle(box, fill=None, outline=tuple(random.randint(0,255,3)), width=5)
40
 
41
  "Crop the detected lines"
42
  cropped_images = []
modules/cnn/__pycache__/unet.cpython-310.pyc CHANGED
Binary files a/modules/cnn/__pycache__/unet.cpython-310.pyc and b/modules/cnn/__pycache__/unet.cpython-310.pyc differ
 
requirements.txt CHANGED
@@ -8,6 +8,6 @@ pillow==10.2.0
8
  tqdm==4.66.1
9
  opencv-python==4.9.0.80
10
  opencv-contrib-python==4.9.0.80
11
- kraken==4.3.13
12
  gradio==4.16.0
13
  spaces==0.22.0
 
8
  tqdm==4.66.1
9
  opencv-python==4.9.0.80
10
  opencv-contrib-python==4.9.0.80
11
+ ultralytics==8.1.8
12
  gradio==4.16.0
13
  spaces==0.22.0