Spaces:
Runtime error
Runtime error
JustinLin610
commited on
Commit
•
d5a6a2f
1
Parent(s):
28de30d
add modelscope
Browse files
app.py
CHANGED
@@ -8,6 +8,8 @@ os.system('cd fairseq;'
|
|
8 |
os.system('cd ezocr;'
|
9 |
'pip install .; cd ..')
|
10 |
|
|
|
|
|
11 |
import torch
|
12 |
import numpy as np
|
13 |
from fairseq import utils, tasks
|
@@ -21,6 +23,10 @@ from typing import List, Tuple
|
|
21 |
import cv2
|
22 |
from easyocrlite import ReaderLite
|
23 |
import gradio as gr
|
|
|
|
|
|
|
|
|
24 |
|
25 |
|
26 |
# Register refcoco task
|
@@ -114,7 +120,9 @@ def patch_resize_transform(patch_image_size=480, is_document=False):
|
|
114 |
return _patch_resize_transform
|
115 |
|
116 |
|
117 |
-
reader = ReaderLite(gpu=True)
|
|
|
|
|
118 |
overrides={"eval_cider": False, "beam": 5, "max_len_b": 64, "patch_image_size": 480,
|
119 |
"orig_patch_image_size": 224, "no_repeat_ngram_size": 0, "seed": 42}
|
120 |
models, cfg, task = checkpoint_utils.load_model_ensemble_and_task(
|
@@ -166,24 +174,44 @@ def apply_half(t):
|
|
166 |
|
167 |
|
168 |
def ocr(img):
|
|
|
|
|
169 |
out_img = Image.open(img)
|
170 |
-
|
171 |
-
|
172 |
-
|
173 |
-
|
174 |
-
|
175 |
-
|
176 |
-
|
177 |
-
|
178 |
-
|
|
|
|
|
179 |
sample = utils.move_to_cuda(sample) if use_cuda else sample
|
180 |
sample = utils.apply_to_sample(apply_half, sample) if use_fp16 else sample
|
181 |
|
182 |
with torch.no_grad():
|
183 |
result, scores = eval_step(task, generator, models, sample)
|
184 |
ocr_result.append([str(i+1), result[0]['ocr'].replace(' ', '')])
|
185 |
-
|
186 |
result = pd.DataFrame(ocr_result, columns=['Box ID', 'Text'])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
187 |
|
188 |
return out_img, result
|
189 |
|
|
|
8 |
os.system('cd ezocr;'
|
9 |
'pip install .; cd ..')
|
10 |
|
11 |
+
os.system('pip install "modelscope[cv]" -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html')
|
12 |
+
|
13 |
import torch
|
14 |
import numpy as np
|
15 |
from fairseq import utils, tasks
|
|
|
23 |
import cv2
|
24 |
from easyocrlite import ReaderLite
|
25 |
import gradio as gr
|
26 |
+
from modelscope.pipelines import pipeline
|
27 |
+
from modelscope.utils.constant import Tasks
|
28 |
+
from modelscope.outputs import OutputKeys
|
29 |
+
from modelscope.preprocessors.image import load_image
|
30 |
|
31 |
|
32 |
# Register refcoco task
|
|
|
120 |
return _patch_resize_transform
|
121 |
|
122 |
|
123 |
+
# reader = ReaderLite(gpu=True)
|
124 |
+
ocr_detection = pipeline(Tasks.ocr_detection, model='damo/cv_resnet18_ocr-detection-line-level_damo')
|
125 |
+
|
126 |
overrides={"eval_cider": False, "beam": 5, "max_len_b": 64, "patch_image_size": 480,
|
127 |
"orig_patch_image_size": 224, "no_repeat_ngram_size": 0, "seed": 42}
|
128 |
models, cfg, task = checkpoint_utils.load_model_ensemble_and_task(
|
|
|
174 |
|
175 |
|
176 |
def ocr(img):
|
177 |
+
boxes = ocr_detection(img)[OutputKeys.POLYGONS]
|
178 |
+
image = cv2.imread(img)
|
179 |
out_img = Image.open(img)
|
180 |
+
ocr_result = list()
|
181 |
+
for i, box in boxes: # 因为检测结果是四边形,所以用透视变化转为长方形
|
182 |
+
post1 = box.reshape((4, 2)).astype(np.float32)
|
183 |
+
width = box[4] - box[0]
|
184 |
+
height = box[5] - box[1]
|
185 |
+
post2 = np.float32([[0, 0], [width, 0], [width, height], [0, height]])
|
186 |
+
M = cv2.getPerspectiveTransform(post1, post2)
|
187 |
+
new_img = cv2.warpPerspective(image, M, (width, height))
|
188 |
+
new_img_pil = Image.fromarray(cv2.cvtColor(new_img, cv2.COLOR_BGR2RGB))
|
189 |
+
# 开启文字识别
|
190 |
+
sample = construct_sample(task, new_img_pil, cfg.task.patch_image_size)
|
191 |
sample = utils.move_to_cuda(sample) if use_cuda else sample
|
192 |
sample = utils.apply_to_sample(apply_half, sample) if use_fp16 else sample
|
193 |
|
194 |
with torch.no_grad():
|
195 |
result, scores = eval_step(task, generator, models, sample)
|
196 |
ocr_result.append([str(i+1), result[0]['ocr'].replace(' ', '')])
|
|
|
197 |
result = pd.DataFrame(ocr_result, columns=['Box ID', 'Text'])
|
198 |
+
# results = get_images(img, reader, text_confidence=0.7, text_threshold=0.4,
|
199 |
+
# link_threshold=0.43, slope_ths=0., add_margin=0.02)
|
200 |
+
# box_list, image_list = zip(*results)
|
201 |
+
draw_boxes(out_img, boxes)
|
202 |
+
#
|
203 |
+
# ocr_result = []
|
204 |
+
# for i, (box, image) in enumerate(zip(box_list, image_list)):
|
205 |
+
# image = Image.fromarray(image)
|
206 |
+
# sample = construct_sample(task, image, cfg.task.patch_image_size)
|
207 |
+
# sample = utils.move_to_cuda(sample) if use_cuda else sample
|
208 |
+
# sample = utils.apply_to_sample(apply_half, sample) if use_fp16 else sample
|
209 |
+
#
|
210 |
+
# with torch.no_grad():
|
211 |
+
# result, scores = eval_step(task, generator, models, sample)
|
212 |
+
# ocr_result.append([str(i+1), result[0]['ocr'].replace(' ', '')])
|
213 |
+
#
|
214 |
+
# result = pd.DataFrame(ocr_result, columns=['Box ID', 'Text'])
|
215 |
|
216 |
return out_img, result
|
217 |
|