text_detection

Sleeping

narugo1992 commited on Oct 8, 2023

Commit

4f6e58b

•

1 Parent(s): d10a366

dev(narugo): add this example

Files changed (2) hide show

app.py CHANGED Viewed

@@ -7,7 +7,6 @@ from detect import _ALL_MODELS, _DEFAULT_MODEL, detect_text
 def _gr_detect_text(image, model: str, threshold: float):
-    print(image)
     return detection_visualize(image, detect_text(image, model, threshold))


7
8
9	def _gr_detect_text(image, model: str, threshold: float):

10	return detection_visualize(image, detect_text(image, model, threshold))
11
12

detect.py CHANGED Viewed

@@ -1,5 +1,6 @@
 import os.path
 from functools import lru_cache
 import cv2
 import numpy as np
@@ -18,7 +19,7 @@ def _get_available_models():
 _ALL_MODELS = list(_get_available_models())
-_DEFAULT_MODEL = 'dbnetpp_resnet50-oclip_fpnc_1200e_icdar2015'
 @lru_cache()
@@ -29,7 +30,7 @@ def _get_onnx_session(model):
     ))
-def detect_text(image: ImageTyping, model: str = _DEFAULT_MODEL, threshold: float = 0.05):
     origin_width, origin_height = width, height = image.size
     align = 32
     if width % align != 0:
@@ -52,17 +53,27 @@ def detect_text(image: ImageTyping, model: str = _DEFAULT_MODEL, threshold: floa
     heatmap = output_[0]
     heatmap = heatmap[:origin_height, :origin_width]
-    cnts = cv2.findContours((heatmap * 255.0).astype(np.uint8), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
-    cnts = cnts[0] if len(cnts) == 2 else cnts[1]
     bboxes = []
-    for c in cnts:
         x, y, w, h = cv2.boundingRect(c)
-        x0, y0 = x, y
-        x1, y1 = x + w, y + h
-        area = heatmap[y0:y1, x0:x1]
-        valid_area = area[area >= 1e-4]
-        score = valid_area.mean().item()
         if score >= threshold:
-            bboxes.append(((x0, y0, x1, y1), 'text', score))
     return bboxes

 import os.path
 from functools import lru_cache
+from typing import List, Tuple
 import cv2
 import numpy as np
 _ALL_MODELS = list(_get_available_models())
+_DEFAULT_MODEL = 'dbnetpp_resnet50_fpnc_1200e_icdar2015'
 @lru_cache()
     ))
+def _get_heatmap_of_text(image: ImageTyping, model: str) -> np.ndarray:
     origin_width, origin_height = width, height = image.size
     align = 32
     if width % align != 0:
     heatmap = output_[0]
     heatmap = heatmap[:origin_height, :origin_width]
+    return heatmap
+def _get_bounding_box_of_text(image: ImageTyping, model: str, threshold: float) \
+        -> List[Tuple[Tuple[int, int, int, int], float]]:
+    heatmap = _get_heatmap_of_text(image, model)
+    c_rets = cv2.findContours((heatmap * 255.0).astype(np.uint8), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
+    contours = c_rets[0] if len(c_rets) == 2 else c_rets[1]
     bboxes = []
+    for c in contours:
         x, y, w, h = cv2.boundingRect(c)
+        x0, y0, x1, y1 = x, y, x + w, y + h
+        score = heatmap[y0:y1, x0:x1].mean().item()
         if score >= threshold:
+            bboxes.append(((x0, y0, x1, y1), score))
+    return bboxes
+def detect_text(image: ImageTyping, model: str = _DEFAULT_MODEL, threshold: float = 0.05):
+    bboxes = []
+    for (x0, y0, x1, y1), score in _get_bounding_box_of_text(image, model, threshold):
+        bboxes.append(((x0, y0, x1, y1), 'text', score))
     return bboxes