JustinLin610 commited on
Commit
d5a6a2f
1 Parent(s): 28de30d

add modelscope

Browse files
Files changed (1) hide show
  1. app.py +39 -11
app.py CHANGED
@@ -8,6 +8,8 @@ os.system('cd fairseq;'
8
  os.system('cd ezocr;'
9
  'pip install .; cd ..')
10
 
 
 
11
  import torch
12
  import numpy as np
13
  from fairseq import utils, tasks
@@ -21,6 +23,10 @@ from typing import List, Tuple
21
  import cv2
22
  from easyocrlite import ReaderLite
23
  import gradio as gr
 
 
 
 
24
 
25
 
26
  # Register refcoco task
@@ -114,7 +120,9 @@ def patch_resize_transform(patch_image_size=480, is_document=False):
114
  return _patch_resize_transform
115
 
116
 
117
- reader = ReaderLite(gpu=True)
 
 
118
  overrides={"eval_cider": False, "beam": 5, "max_len_b": 64, "patch_image_size": 480,
119
  "orig_patch_image_size": 224, "no_repeat_ngram_size": 0, "seed": 42}
120
  models, cfg, task = checkpoint_utils.load_model_ensemble_and_task(
@@ -166,24 +174,44 @@ def apply_half(t):
166
 
167
 
168
  def ocr(img):
 
 
169
  out_img = Image.open(img)
170
- results = get_images(img, reader, text_confidence=0.7, text_threshold=0.4,
171
- link_threshold=0.43, slope_ths=0., add_margin=0.02)
172
- box_list, image_list = zip(*results)
173
- draw_boxes(out_img, box_list)
174
-
175
- ocr_result = []
176
- for i, (box, image) in enumerate(zip(box_list, image_list)):
177
- image = Image.fromarray(image)
178
- sample = construct_sample(task, image, cfg.task.patch_image_size)
 
 
179
  sample = utils.move_to_cuda(sample) if use_cuda else sample
180
  sample = utils.apply_to_sample(apply_half, sample) if use_fp16 else sample
181
 
182
  with torch.no_grad():
183
  result, scores = eval_step(task, generator, models, sample)
184
  ocr_result.append([str(i+1), result[0]['ocr'].replace(' ', '')])
185
-
186
  result = pd.DataFrame(ocr_result, columns=['Box ID', 'Text'])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
187
 
188
  return out_img, result
189
 
 
8
  os.system('cd ezocr;'
9
  'pip install .; cd ..')
10
 
11
+ os.system('pip install "modelscope[cv]" -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html')
12
+
13
  import torch
14
  import numpy as np
15
  from fairseq import utils, tasks
 
23
  import cv2
24
  from easyocrlite import ReaderLite
25
  import gradio as gr
26
+ from modelscope.pipelines import pipeline
27
+ from modelscope.utils.constant import Tasks
28
+ from modelscope.outputs import OutputKeys
29
+ from modelscope.preprocessors.image import load_image
30
 
31
 
32
  # Register refcoco task
 
120
  return _patch_resize_transform
121
 
122
 
123
+ # reader = ReaderLite(gpu=True)
124
+ ocr_detection = pipeline(Tasks.ocr_detection, model='damo/cv_resnet18_ocr-detection-line-level_damo')
125
+
126
  overrides={"eval_cider": False, "beam": 5, "max_len_b": 64, "patch_image_size": 480,
127
  "orig_patch_image_size": 224, "no_repeat_ngram_size": 0, "seed": 42}
128
  models, cfg, task = checkpoint_utils.load_model_ensemble_and_task(
 
174
 
175
 
176
  def ocr(img):
177
+ boxes = ocr_detection(img)[OutputKeys.POLYGONS]
178
+ image = cv2.imread(img)
179
  out_img = Image.open(img)
180
+ ocr_result = list()
181
+ for i, box in boxes: # 因为检测结果是四边形,所以用透视变化转为长方形
182
+ post1 = box.reshape((4, 2)).astype(np.float32)
183
+ width = box[4] - box[0]
184
+ height = box[5] - box[1]
185
+ post2 = np.float32([[0, 0], [width, 0], [width, height], [0, height]])
186
+ M = cv2.getPerspectiveTransform(post1, post2)
187
+ new_img = cv2.warpPerspective(image, M, (width, height))
188
+ new_img_pil = Image.fromarray(cv2.cvtColor(new_img, cv2.COLOR_BGR2RGB))
189
+ # 开启文字识别
190
+ sample = construct_sample(task, new_img_pil, cfg.task.patch_image_size)
191
  sample = utils.move_to_cuda(sample) if use_cuda else sample
192
  sample = utils.apply_to_sample(apply_half, sample) if use_fp16 else sample
193
 
194
  with torch.no_grad():
195
  result, scores = eval_step(task, generator, models, sample)
196
  ocr_result.append([str(i+1), result[0]['ocr'].replace(' ', '')])
 
197
  result = pd.DataFrame(ocr_result, columns=['Box ID', 'Text'])
198
+ # results = get_images(img, reader, text_confidence=0.7, text_threshold=0.4,
199
+ # link_threshold=0.43, slope_ths=0., add_margin=0.02)
200
+ # box_list, image_list = zip(*results)
201
+ draw_boxes(out_img, boxes)
202
+ #
203
+ # ocr_result = []
204
+ # for i, (box, image) in enumerate(zip(box_list, image_list)):
205
+ # image = Image.fromarray(image)
206
+ # sample = construct_sample(task, image, cfg.task.patch_image_size)
207
+ # sample = utils.move_to_cuda(sample) if use_cuda else sample
208
+ # sample = utils.apply_to_sample(apply_half, sample) if use_fp16 else sample
209
+ #
210
+ # with torch.no_grad():
211
+ # result, scores = eval_step(task, generator, models, sample)
212
+ # ocr_result.append([str(i+1), result[0]['ocr'].replace(' ', '')])
213
+ #
214
+ # result = pd.DataFrame(ocr_result, columns=['Box ID', 'Text'])
215
 
216
  return out_img, result
217