Spaces:

OFA-Sys
/

OFA-OCR

Runtime error

App Files Files Community

JustinLin610 commited on Nov 17, 2022

Commit

d5a6a2f

•

1 Parent(s): 28de30d

add modelscope

Browse files

Files changed (1) hide show

app.py +39 -11

app.py CHANGED Viewed

@@ -8,6 +8,8 @@ os.system('cd fairseq;'
 os.system('cd ezocr;'
           'pip install .; cd ..')
 import torch
 import numpy as np
 from fairseq import utils, tasks
@@ -21,6 +23,10 @@ from typing import List, Tuple
 import cv2
 from easyocrlite import ReaderLite
 import gradio as gr
 # Register refcoco task
@@ -114,7 +120,9 @@ def patch_resize_transform(patch_image_size=480, is_document=False):
     return _patch_resize_transform
-reader = ReaderLite(gpu=True)
 overrides={"eval_cider": False, "beam": 5, "max_len_b": 64, "patch_image_size": 480,
            "orig_patch_image_size": 224, "no_repeat_ngram_size": 0, "seed": 42}
 models, cfg, task = checkpoint_utils.load_model_ensemble_and_task(
@@ -166,24 +174,44 @@ def apply_half(t):
 def ocr(img):
     out_img = Image.open(img)
-    results = get_images(img, reader, text_confidence=0.7, text_threshold=0.4,
-                         link_threshold=0.43, slope_ths=0., add_margin=0.02)
-    box_list, image_list = zip(*results)
-    draw_boxes(out_img, box_list)
-    ocr_result = []
-    for i, (box, image) in enumerate(zip(box_list, image_list)):
-        image = Image.fromarray(image)
-        sample = construct_sample(task, image, cfg.task.patch_image_size)
         sample = utils.move_to_cuda(sample) if use_cuda else sample
         sample = utils.apply_to_sample(apply_half, sample) if use_fp16 else sample
         with torch.no_grad():
             result, scores = eval_step(task, generator, models, sample)
         ocr_result.append([str(i+1), result[0]['ocr'].replace(' ', '')])
     result = pd.DataFrame(ocr_result, columns=['Box ID', 'Text'])
     return out_img, result

 os.system('cd ezocr;'
           'pip install .; cd ..')
+os.system('pip install "modelscope[cv]" -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html')
 import torch
 import numpy as np
 from fairseq import utils, tasks
 import cv2
 from easyocrlite import ReaderLite
 import gradio as gr
+from modelscope.pipelines import pipeline
+from modelscope.utils.constant import Tasks
+from modelscope.outputs import OutputKeys
+from modelscope.preprocessors.image import load_image
 # Register refcoco task
     return _patch_resize_transform
+# reader = ReaderLite(gpu=True)
+ocr_detection = pipeline(Tasks.ocr_detection, model='damo/cv_resnet18_ocr-detection-line-level_damo')
 overrides={"eval_cider": False, "beam": 5, "max_len_b": 64, "patch_image_size": 480,
            "orig_patch_image_size": 224, "no_repeat_ngram_size": 0, "seed": 42}
 models, cfg, task = checkpoint_utils.load_model_ensemble_and_task(
 def ocr(img):
+    boxes = ocr_detection(img)[OutputKeys.POLYGONS]
+    image = cv2.imread(img)
     out_img = Image.open(img)
+    ocr_result = list()
+    for i, box in boxes:  # 因为检测结果是四边形，所以用透视变化转为长方形
+        post1 = box.reshape((4, 2)).astype(np.float32)
+        width = box[4] - box[0]
+        height = box[5] - box[1]
+        post2 = np.float32([[0, 0], [width, 0], [width, height], [0, height]])
+        M = cv2.getPerspectiveTransform(post1, post2)
+        new_img = cv2.warpPerspective(image, M, (width, height))
+        new_img_pil = Image.fromarray(cv2.cvtColor(new_img, cv2.COLOR_BGR2RGB))
+        # 开启文字识别
+        sample = construct_sample(task, new_img_pil, cfg.task.patch_image_size)
         sample = utils.move_to_cuda(sample) if use_cuda else sample
         sample = utils.apply_to_sample(apply_half, sample) if use_fp16 else sample
         with torch.no_grad():
             result, scores = eval_step(task, generator, models, sample)
         ocr_result.append([str(i+1), result[0]['ocr'].replace(' ', '')])
     result = pd.DataFrame(ocr_result, columns=['Box ID', 'Text'])
+    # results = get_images(img, reader, text_confidence=0.7, text_threshold=0.4,
+    #                      link_threshold=0.43, slope_ths=0., add_margin=0.02)
+    # box_list, image_list = zip(*results)
+    draw_boxes(out_img, boxes)
+    #
+    # ocr_result = []
+    # for i, (box, image) in enumerate(zip(box_list, image_list)):
+    #     image = Image.fromarray(image)
+    #     sample = construct_sample(task, image, cfg.task.patch_image_size)
+    #     sample = utils.move_to_cuda(sample) if use_cuda else sample
+    #     sample = utils.apply_to_sample(apply_half, sample) if use_fp16 else sample
+    #
+    #     with torch.no_grad():
+    #         result, scores = eval_step(task, generator, models, sample)
+    #     ocr_result.append([str(i+1), result[0]['ocr'].replace(' ', '')])
+    #
+    # result = pd.DataFrame(ocr_result, columns=['Box ID', 'Text'])
     return out_img, result