tianchiguaixia commited on
Commit
fbaa7e3
0 Parent(s):

Duplicate from tianchiguaixia/ocr

Browse files
.gitattributes ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tflite filter=lfs diff=lfs merge=lfs -text
29
+ *.tgz filter=lfs diff=lfs merge=lfs -text
30
+ *.wasm filter=lfs diff=lfs merge=lfs -text
31
+ *.xz filter=lfs diff=lfs merge=lfs -text
32
+ *.zip filter=lfs diff=lfs merge=lfs -text
33
+ *.zst filter=lfs diff=lfs merge=lfs -text
34
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Ocr
3
+ emoji: 🐠
4
+ colorFrom: green
5
+ colorTo: green
6
+ sdk: streamlit
7
+ sdk_version: 1.21.0
8
+ app_file: app.py
9
+ pinned: false
10
+ duplicated_from: tianchiguaixia/ocr
11
+ ---
12
+
13
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,67 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ # time: 2022/10/17 11:22
3
+ # file: AI-医学图片OCR.py
4
+
5
+
6
+ import streamlit as st
7
+
8
+ from ocr.ocr import detect, recognize
9
+ from ocr.utils import bytes_to_numpy
10
+ import pandas as pd
11
+
12
+ import os
13
+ import cv2
14
+ from paddleocr import PPStructure, draw_structure_result, save_structure_res
15
+
16
+
17
+ st.title("AI-医学图片OCR")
18
+
19
+
20
+
21
+ def convert_df(df):
22
+ # IMPORTANT: Cache the conversion to prevent computation on every rerun
23
+ return df.to_csv().encode("gbk")
24
+
25
+
26
+ # 上传图片
27
+ uploaded_file = st.sidebar.file_uploader(
28
+ '请选择一张图片', type=['png', 'jpg', 'jpeg'])
29
+ print('uploaded_file:', uploaded_file)
30
+ table_engine = PPStructure(show_log=True)
31
+ if uploaded_file is not None:
32
+ # To read file as bytes:
33
+ # content = cv2.imread(uploaded_file)
34
+ # st.write(content)
35
+ bytes_data = uploaded_file.getvalue()
36
+ # 转换格式
37
+ img = bytes_to_numpy(bytes_data, channels='RGB')
38
+ option_task = st.sidebar.radio('请选择要执行的任务', ('查看原图', '文本检测'))
39
+ if option_task == '查看原图':
40
+ st.image(img, caption='原图')
41
+ elif option_task == '文本检测':
42
+ im_show = detect(img)
43
+ st.image(im_show, caption='文本检测后的图片')
44
+
45
+ base_path = "streamlit_data"
46
+
47
+ path = os.path.exists(base_path + "/" + uploaded_file.name.split('.')[0])
48
+
49
+ if st.button('✨ 启动!'):
50
+ local_path = base_path + "/" + uploaded_file.name.split('.')[0]
51
+ result = table_engine(img)
52
+ save_structure_res(result, base_path, uploaded_file.name.split('.')[0])
53
+ with st.container():
54
+ with st.expander(label="json结果展示", expanded=False):
55
+ st.write(result)
56
+ for i in os.listdir(local_path):
57
+ if ".xlsx" in i:
58
+ df = pd.read_excel(os.path.join(local_path, i))
59
+ df = df.fillna("")
60
+ st.write(df)
61
+ csv = convert_df(df)
62
+ st.download_button(
63
+ label="Download data as csv",
64
+ data=csv,
65
+ file_name='large_df.csv',
66
+ mime='text/csv',
67
+ )
ocr/__init__.py ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ # time: 2022/10/17 13:03
3
+ # file: __init__.py.py
4
+ # author: fengchenggang
5
+ # email: chenggang.feng@ashermed.com
ocr/__pycache__/__init__.cpython-38.pyc ADDED
Binary file (158 Bytes). View file
 
ocr/__pycache__/ocr.cpython-38.pyc ADDED
Binary file (1.48 kB). View file
 
ocr/__pycache__/utils.cpython-38.pyc ADDED
Binary file (1.15 kB). View file
 
ocr/ocr.py ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ # time: 2022/10/17 13:04
3
+ # file: ocr.py
4
+
5
+
6
+ from paddleocr import PaddleOCR
7
+ from ocr_utils import draw_ocr, draw_ocr_box_txt
8
+
9
+ ocr = PaddleOCR(lang='ch',use_angle_cls=True)
10
+
11
+
12
+ def detect(image):
13
+ """
14
+ 文本检测
15
+ args:
16
+ image(array): numpy格式的图片 'RGB'
17
+ return(array):
18
+ 检测后的图片 numpy格式 'RGB'
19
+ """
20
+ result = ocr.ocr(image, rec=False)
21
+ im_show = draw_ocr(image, result)
22
+
23
+ return im_show
24
+
25
+
26
+ def recognize(image, output_mode=0):
27
+ """
28
+ 文本识别
29
+ args:
30
+ image(array): numpy格式的图片 'RGB'
31
+ output_mode(int): 图片输出模式 [0|1]
32
+ return(array):
33
+ 识别后的图片 numpy格式 'RGB'
34
+ """
35
+ result = ocr.ocr(image)
36
+ boxes = [line[0] for line in result]
37
+ txts = [line[1][0] for line in result]
38
+ scores = [line[1][1] for line in result]
39
+ if output_mode == 0:
40
+ im_show = draw_ocr_box_txt(image, boxes, txts, scores)
41
+ elif output_mode == 1:
42
+ im_show = draw_ocr(image, boxes, txts, scores)
43
+
44
+ return im_show
45
+
ocr/utils.py ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ # time: 2022/10/17 13:04
3
+ # file: utils.py
4
+
5
+
6
+ import cv2
7
+ import numpy as np
8
+
9
+
10
+ def bytes_to_numpy(image_bytes, channels='BGR'):
11
+ """
12
+ 图片格式转换 bytes -> numpy
13
+ args:
14
+ image_bytes(str): 图片的字节流
15
+ channels(str): 图片的格式 ['BGR'|'RGB']
16
+ return(array):
17
+ 转换后的图片
18
+ """
19
+ _image_np = np.frombuffer(image_bytes, dtype=np.uint8)
20
+ image_np = cv2.imdecode(_image_np, cv2.IMREAD_COLOR)
21
+ if channels == 'BGR':
22
+ return image_np
23
+ elif channels == 'RGB':
24
+ image_np = cv2.cvtColor(image_np, cv2.COLOR_BGR2RGB)
25
+ return image_np
26
+
27
+
28
+ def numpy_to_pic(image_path, image_np, channels='BGR'):
29
+ """
30
+ 保存图片
31
+ args:
32
+ image_path(str): 图片路径
33
+ image_np(array): numpy格式的图片数据
34
+ channels(str): 图片的格式 ['BGR'|'RGB']
35
+ """
36
+ if channels == 'BGR':
37
+ cv2.imwrite(image_path, image_np)
38
+ elif channels == 'RGB':
39
+ image_np = cv2.cvtColor(image_np, cv2.COLOR_RGB2BGR)
40
+ cv2.imwrite(image_path, image_np)
ocr_utils.py ADDED
@@ -0,0 +1,281 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ # time: 2022/10/17 13:25
3
+ # file: ocr_utils.py
4
+
5
+ import cv2
6
+ import math
7
+ import numpy as np
8
+
9
+ from PIL import Image, ImageDraw, ImageFont
10
+
11
+
12
+ def resize_img(img, input_size=600):
13
+ """
14
+ resize img and limit the longest side of the image to input_size
15
+ """
16
+ img = np.array(img)
17
+ im_shape = img.shape
18
+ im_size_max = np.max(im_shape[0:2])
19
+ im_scale = float(input_size) / float(im_size_max)
20
+ img = cv2.resize(img, None, None, fx=im_scale, fy=im_scale)
21
+ return img
22
+
23
+
24
+ def draw_ocr(
25
+ image,
26
+ boxes,
27
+ txts=None,
28
+ scores=None,
29
+ drop_score=0.5,
30
+ font_path="./fonts/font.ttf"
31
+ ):
32
+ """
33
+ Visualize the results of OCR detection and recognition
34
+ args:
35
+ image(Image|array): RGB image
36
+ boxes(list): boxes with shape(N, 4, 2)
37
+ txts(list): the texts
38
+ scores(list): txxs corresponding scores
39
+ drop_score(float): only scores greater than drop_threshold will be visualized
40
+ font_path: the path of font which is used to draw text
41
+ return(array):
42
+ the visualized img
43
+ """
44
+ if scores is None:
45
+ scores = [1] * len(boxes)
46
+ box_num = len(boxes)
47
+ for i in range(box_num):
48
+ if scores is not None and (scores[i] < drop_score or math.isnan(scores[i])):
49
+ continue
50
+ box = np.reshape(np.array(boxes[i]), [-1, 1, 2]).astype(np.int64)
51
+ image = cv2.polylines(np.array(image), [box], True, (255, 0, 0), 2)
52
+ if txts is not None:
53
+ img = np.array(resize_img(image, input_size=600))
54
+ txt_img = text_visual(
55
+ txts,
56
+ scores,
57
+ img_h=img.shape[0],
58
+ img_w=600,
59
+ threshold=drop_score,
60
+ font_path=font_path
61
+ )
62
+ img = np.concatenate([np.array(img), np.array(txt_img)], axis=1)
63
+ return img
64
+ return image
65
+
66
+
67
+ def draw_ocr_box_txt(
68
+ image,
69
+ boxes,
70
+ txts,
71
+ scores=None,
72
+ drop_score=0.5,
73
+ font_path="./fonts/font.ttf"
74
+ ):
75
+ image = Image.fromarray(image)
76
+ h, w = image.height, image.width
77
+ img_left = image.copy()
78
+ img_right = Image.new('RGB', (w, h), (255, 255, 255))
79
+
80
+ import random
81
+
82
+ random.seed(0)
83
+ draw_left = ImageDraw.Draw(img_left)
84
+ draw_right = ImageDraw.Draw(img_right)
85
+ for idx, (box, txt) in enumerate(zip(boxes, txts)):
86
+ if scores is not None and scores[idx] < drop_score:
87
+ continue
88
+ color = (random.randint(0, 255), random.randint(0, 255), random.randint(0, 255))
89
+ draw_left.polygon(
90
+ [
91
+ box[0][0], box[0][1], box[1][0], box[1][1], box[2][0],
92
+ box[2][1], box[3][0], box[3][1]
93
+ ],
94
+ fill=color)
95
+ draw_right.polygon(
96
+ [
97
+ box[0][0], box[0][1], box[1][0], box[1][1], box[2][0],
98
+ box[2][1], box[3][0], box[3][1]
99
+ ],
100
+ outline=color)
101
+ box_height = math.sqrt((box[0][0] - box[3][0])**2 + (box[0][1] - box[3][
102
+ 1])**2)
103
+ box_width = math.sqrt((box[0][0] - box[1][0])**2 + (box[0][1] - box[1][
104
+ 1])**2)
105
+ if box_height > 2 * box_width:
106
+ font_size = max(int(box_width * 0.9), 10)
107
+ font = ImageFont.truetype(font_path, font_size, encoding="utf-8")
108
+ cur_y = box[0][1]
109
+ for c in txt:
110
+ char_size = font.getsize(c)
111
+ draw_right.text((box[0][0] + 3, cur_y), c, fill=(0, 0, 0), font=font)
112
+ cur_y += char_size[1]
113
+ else:
114
+ font_size = max(int(box_height * 0.8), 10)
115
+ font = ImageFont.truetype(font_path, font_size, encoding="utf-8")
116
+ draw_right.text([box[0][0], box[0][1]], txt, fill=(0, 0, 0), font=font)
117
+ img_left = Image.blend(image, img_left, 0.5)
118
+ img_show = Image.new('RGB', (w * 2, h), (255, 255, 255))
119
+ img_show.paste(img_left, (0, 0, w, h))
120
+ img_show.paste(img_right, (w, 0, w * 2, h))
121
+ return np.array(img_show)
122
+
123
+
124
+ def str_count(s):
125
+ """
126
+ Count the number of Chinese characters,
127
+ a single English character and a single number
128
+ equal to half the length of Chinese characters.
129
+ args:
130
+ s(string): the input of string
131
+ return(int):
132
+ the number of Chinese characters
133
+ """
134
+ import string
135
+ count_zh = count_pu = 0
136
+ s_len = len(s)
137
+ en_dg_count = 0
138
+ for c in s:
139
+ if c in string.ascii_letters or c.isdigit() or c.isspace():
140
+ en_dg_count += 1
141
+ elif c.isalpha():
142
+ count_zh += 1
143
+ else:
144
+ count_pu += 1
145
+ return s_len - math.ceil(en_dg_count / 2)
146
+
147
+
148
+ def text_visual(
149
+ texts,
150
+ scores,
151
+ img_h=400,
152
+ img_w=600,
153
+ threshold=0.,
154
+ font_path="./fonts/font.ttf"
155
+ ):
156
+ """
157
+ create new blank img and draw txt on it
158
+ args:
159
+ texts(list): the text will be draw
160
+ scores(list|None): corresponding score of each txt
161
+ img_h(int): the height of blank img
162
+ img_w(int): the width of blank img
163
+ font_path: the path of font which is used to draw text
164
+ return(array):
165
+ """
166
+ if scores is not None:
167
+ assert len(texts) == len(scores), "The number of txts and corresponding scores must match"
168
+
169
+ def create_blank_img():
170
+ blank_img = np.ones(shape=[img_h, img_w], dtype=np.int8) * 255
171
+ blank_img[:, img_w - 1:] = 0
172
+ blank_img = Image.fromarray(blank_img).convert("RGB")
173
+ draw_txt = ImageDraw.Draw(blank_img)
174
+ return blank_img, draw_txt
175
+
176
+ blank_img, draw_txt = create_blank_img()
177
+
178
+ font_size = 20
179
+ txt_color = (0, 0, 0)
180
+ font = ImageFont.truetype(font_path, font_size, encoding="utf-8")
181
+
182
+ gap = font_size + 5
183
+ txt_img_list = []
184
+ count, index = 1, 0
185
+ for idx, txt in enumerate(texts):
186
+ index += 1
187
+ if scores[idx] < threshold or math.isnan(scores[idx]):
188
+ index -= 1
189
+ continue
190
+ first_line = True
191
+ while str_count(txt) >= img_w // font_size - 4:
192
+ tmp = txt
193
+ txt = tmp[:img_w // font_size - 4]
194
+ if first_line:
195
+ new_txt = str(index) + ': ' + txt
196
+ first_line = False
197
+ else:
198
+ new_txt = ' ' + txt
199
+ draw_txt.text((0, gap * count), new_txt, txt_color, font=font)
200
+ txt = tmp[img_w // font_size - 4:]
201
+ if count >= img_h // gap - 1:
202
+ txt_img_list.append(np.array(blank_img))
203
+ blank_img, draw_txt = create_blank_img()
204
+ count = 0
205
+ count += 1
206
+ if first_line:
207
+ new_txt = str(index) + ': ' + txt + ' ' + '%.3f' % (scores[idx])
208
+ else:
209
+ new_txt = " " + txt + " " + '%.3f' % (scores[idx])
210
+ draw_txt.text((0, gap * count), new_txt, txt_color, font=font)
211
+ # whether add new blank img or not
212
+ if count >= img_h // gap - 1 and idx + 1 < len(texts):
213
+ txt_img_list.append(np.array(blank_img))
214
+ blank_img, draw_txt = create_blank_img()
215
+ count = 0
216
+ count += 1
217
+ txt_img_list.append(np.array(blank_img))
218
+ if len(txt_img_list) == 1:
219
+ blank_img = np.array(txt_img_list[0])
220
+ else:
221
+ blank_img = np.concatenate(txt_img_list, axis=1)
222
+ return np.array(blank_img)
223
+
224
+
225
+ def base64_to_cv2(b64str):
226
+ import base64
227
+ data = base64.b64decode(b64str.encode('utf8'))
228
+ data = np.fromstring(data, np.uint8)
229
+ data = cv2.imdecode(data, cv2.IMREAD_COLOR)
230
+ return data
231
+
232
+
233
+ def draw_boxes(image, boxes, scores=None, drop_score=0.5):
234
+ if scores is None:
235
+ scores = [1] * len(boxes)
236
+ for (box, score) in zip(boxes, scores):
237
+ if score < drop_score:
238
+ continue
239
+ box = np.reshape(np.array(box), [-1, 1, 2]).astype(np.int64)
240
+ image = cv2.polylines(np.array(image), [box], True, (255, 0, 0), 2)
241
+ return image
242
+
243
+
244
+ def get_rotate_crop_image(img, points):
245
+ '''
246
+ img_height, img_width = img.shape[0:2]
247
+ left = int(np.min(points[:, 0]))
248
+ right = int(np.max(points[:, 0]))
249
+ top = int(np.min(points[:, 1]))
250
+ bottom = int(np.max(points[:, 1]))
251
+ img_crop = img[top:bottom, left:right, :].copy()
252
+ points[:, 0] = points[:, 0] - left
253
+ points[:, 1] = points[:, 1] - top
254
+ '''
255
+ assert len(points) == 4, "shape of points must be 4*2"
256
+ img_crop_width = int(
257
+ max(
258
+ np.linalg.norm(points[0] - points[1]),
259
+ np.linalg.norm(points[2] - points[3])))
260
+ img_crop_height = int(
261
+ max(
262
+ np.linalg.norm(points[0] - points[3]),
263
+ np.linalg.norm(points[1] - points[2])))
264
+ pts_std = np.float32([[0, 0], [img_crop_width, 0],
265
+ [img_crop_width, img_crop_height],
266
+ [0, img_crop_height]])
267
+ M = cv2.getPerspectiveTransform(points, pts_std)
268
+ dst_img = cv2.warpPerspective(
269
+ img,
270
+ M, (img_crop_width, img_crop_height),
271
+ borderMode=cv2.BORDER_REPLICATE,
272
+ flags=cv2.INTER_CUBIC
273
+ )
274
+ dst_img_height, dst_img_width = dst_img.shape[0:2]
275
+ if dst_img_height * 1.0 / dst_img_width >= 1.5:
276
+ dst_img = np.rot90(dst_img)
277
+ return dst_img
278
+
279
+
280
+ if __name__ == '__main__':
281
+ pass
requirements.txt ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ paddleocr~=2.6
2
+ paddlepaddle
3
+ opencv-python==4.6.0.66
4
+ opencv-contrib-python==4.6.0.66
5
+ opencv-python-headless
6
+ numpy~=1.23.4
7
+ pillow~=8.0.1
8
+ streamlit~=1.12.2
9
+ pandas~=1.5.0