OCR / app.py
Suevar's picture
Update app.py
4215c80 verified
raw
history blame contribute delete
No virus
11.5 kB
# coding: utf-8
import cv2
import numpy as np
import tempfile
from pathlib import Path
import gradio as gr
import math
from paddleocr import PaddleOCR, draw_ocr
import re
from PIL import Image
def image_split(image):
# Read image from imageBytes
# image = cv2.imdecode(np.frombuffer(imageBytes, np.uint8), cv2.IMREAD_COLOR)
(h, w) = image.shape[:2]
sub_images = []
# If height is greater than or equal to 2000
if h >= 2000:
# Split image into multiple images
num = int(math.ceil(h / 2000))
height = int(h / num)
for i in range(num):
# Calculate the dimensions of the sub-image
startY = i * height
endY = (i + 1) * height
width = w
dim = (width, endY - startY)
# Double check that the dimensions are within bounds
if dim[0] <= w and dim[1] <= h:
# Resize the sub-image
sub_image = cv2.resize(image[startY:endY, 0:w], dim, interpolation=cv2.INTER_AREA)
# Save the sub-image
# cv2.imwrite('test_{}.png'.format(i),sub_image)
# Add the sub-image to the list
sub_images.append(sub_image)
else:
sub_images.append(image)
return sub_images
def image_resize(image):
#image = cv2.imdecode(np.frombuffer(imageBytes, np.uint8), cv2.IMREAD_COLOR)
(h,w) = image.shape[:2]
WIDTH = 541
# Calculate the ratio of height and width
r = WIDTH / float(w)
# Create a dimension with the calculated width and height
dim = (WIDTH, int(h * r))
# Resize the image
image = cv2.resize(image, dim, interpolation=cv2.INTER_AREA)
return image
def msg_type(line_list, image, ori_width):
resize_ratio = 541 / ori_width
if len(line_list) == 2:
# inner_lines = line_list[0]
words = line_list[1][0]
x = int(line_list[0][0][0] * resize_ratio)
y_min = int(line_list[0][0][1] * resize_ratio)
y_max = int(line_list[0][3][1] * resize_ratio)
y_mid = y_min + int((y_max-y_min)/2) # 计算首个文字的正中心起始位置,确保后续计算对话框背景颜色更准确
color = image[y_mid][x]
# print("1111")
return words, (x, y_mid), (y_min, y_max), color
else:
return None, (None, None), None
def date_delete(words, line_text, line, dialog, res):
data_pattern1 = re.compile(r'^\d{1,2}:\d{1,2}$') # 剔除24:24的时间戳
if bool(data_pattern1.match(words)):
if line == res[-1]:
dialog.append(line_text)
return True
data_pattern2 = re.compile(r'(凌晨|上午|下午|晚上)\d{1,2}:\d{1,2}$') # 剔除凌晨|上午|下午|晚上24:24的时间戳
if bool(data_pattern2.match(words)):
if line == res[-1]:
dialog.append(line_text)
return True
data_pattern3 = re.compile(r'^(\d{4})年(\d{1,2})月(\d{1,2})日\d{1,2}:\d{1,2}$') # 剔除2024年4月12日24:24的时间戳
if bool(data_pattern3.match(words)):
if line == res[-1]:
dialog.append(line_text)
return True
data_pattern4 = re.compile(r'^(\d{4})年(\d{1,2})月(\d{1,2})日(凌晨|上午|下午|晚上)\d{1,2}:\d{1,2}$') # 剔除2024年4月12日凌晨|上午|下午|晚上24:24的时间戳
if bool(data_pattern4.match(words)):
if line == res[-1]:
dialog.append(line_text)
return True
data_pattern5 = re.compile(r'^昨天\d{1,2}:\d{1,2}$') # 剔除昨天24:24的时间戳
if bool(data_pattern5.match(words)):
if line == res[-1]:
dialog.append(line_text)
return True
data_pattern6 = re.compile(r'昨天(凌晨|上午|下午|晚上)\d{1,2}:\d{1,2}$') # 剔除昨天凌晨|上午|下午|晚上24:24的时间戳
if bool(data_pattern6.match(words)):
if line == res[-1]:
dialog.append(line_text)
return True
data_pattern7 = re.compile(r'^(\d{1,2})月(\d{1,2})日\d{1,2}:\d{1,2}$') # 剔除4月12日24:24的时间戳
if bool(data_pattern7.match(words)):
if line == res[-1]:
dialog.append(line_text)
return True
data_pattern8 = re.compile(r'^(\d{1,2})月(\d{1,2})日(凌晨|上午|下午|晚上)\d{1,2}:\d{1,2}$') # 剔除4月12日(凌晨|上午|下午|晚上)24:24的时间戳
if bool(data_pattern8.match(words)):
if line == res[-1]:
dialog.append(line_text)
return True
data_pattern9 = re.compile(r'(星期一|星期二|星期三|星期四|星期五|星期六|星期日)\d{1,2}:\d{1,2}$') # 剔除星期一24:24的时间戳
if bool(data_pattern9.match(words)):
if line == res[-1]:
dialog.append(line_text)
return True
data_pattern10 = re.compile(r'(星期一|星期二|星期三|星期四|星期五|星期六|星期日)(凌晨|上午|下午|晚上)\d{1,2}:\d{1,2}$') # 剔除星期一(凌晨|上午|下午|晚上)24:24的时间戳
if bool(data_pattern10.match(words)):
if line == res[-1]:
dialog.append(line_text)
return True
def read_txt_file(file_path):
with open(file_path, 'r', encoding='utf-8') as file:
content = file.read()
return content
def start_processing(input_path, output_path):
output_content = OCR_text(input_path)
# output_content = read_txt_file(output_text_path)
return output_content
# , imshow
def OCR_text(img):
ocr = PaddleOCR(use_angle_cls=True, lang="ch", ocr_version="PP-OCRv4")
img_path = img["background"]
# with open(img_path, "rb") as f:
# imageBytes = f.read()
img_path= img_path[:,:,:3]
image_list = image_split(img_path)
result = ocr.ocr(img_path, cls=True)
dialog = []
line_text = ''
filename = ''
result0 = result[0]
image_RGB = Image.fromarray(img_path).convert('RGB')
# image_RGB = Image.open(img_path).convert('RGB')
boxes = [line[0] for line in result0]
txts = [line[1][0] for line in result0]
scores = [line[1][1] for line in result0]
im_show = draw_ocr(image_RGB, boxes, txts, scores, font_path='./ppocr_img/fonts/chinese_cht.ttf')
imshow = Image.fromarray(im_show)
# imshow.save('result.jpg')
# output_txt_path = Path(output_path) / "result.txt"
for image in image_list:
ori_width = image.shape[1]
image = image_resize(image)
image = image[:,:,:3]
for idx in range(len(result)):
res = result[idx]
for line in res:
# print(line)
words, (x, y_mid), (y_min, y_max), color = msg_type(line, image, ori_width)
if line[1][1] < 0.75:
if line == res[-1]:
dialog.append(line_text)
continue
if words in ['...', '我通过了你的朋友验证请求,现在', '我们可以开始聊天了', '以上是打招呼的内容', '三',
'+', '你撤回了一条消息', 'HD', ':', '按住说话', '以下是新消息', '川']:
if line == res[-1]:
dialog.append(line_text)
continue
if date_delete(words, line_text, line, dialog, res):
continue
data_pattern = re.compile(r'^\d{1,3}"$') # 剔除语音
if bool(data_pattern.match(words)):
if line == res[-1]:
dialog.append(line_text)
continue
data_pattern = re.compile(r"^\d{1,3}'$") # 剔除语音
if bool(data_pattern.match(words)):
if line == res[-1]:
dialog.append(line_text)
continue
### 使用颜色判断(待进一步测试)
if color[0] > 150 and color[1] > 150 and color[2] > 150: # 背景颜色为白色,则判断为speaker1
if line_text == '':
line_text = 'Talker1:' + words
y_max_last = y_max
elif (y_min - y_max_last) < 1 * (y_max - y_min):
line_text = line_text + words
y_max_last = y_max
else:
dialog.append(line_text)
line_text = 'Talker1:' + words
y_max_last = y_max
else: # 否则判定为speaker2
if line_text == '':
line_text = 'Talker2:' + words
y_max_last = y_max
elif (y_min - y_max_last) < 1 * (y_max - y_min):
line_text = line_text + words
y_max_last = y_max
else:
dialog.append(line_text)
line_text = 'Talker2:' + words
y_max_last = y_max
if line == res[-1]:
dialog.append(line_text)
### dialog分行
output_content = ''
for dia in dialog:
output_content += dia + '\n'
# ### 保存识别到的结果为txt
# if filename == '':
# with open(output_txt_path, 'a+', encoding='utf-8') as f:
# for dia in dialog:
# f.write(dia + '\n')
# else:
# with open(filename.replace('/', '.') + '.txt', 'a+', encoding='utf-8') as f:
# for dia in dialog:
# f.write(dia + '\n')
# print(f'output dir is: {output_txt_path}')
return output_content
# , imshow
if __name__ == '__main__':
examples = [['./image/3.png',]]
with gr.Blocks() as demo:
with gr.Tab("Module-聊天截图->文字"):
# 使用Markdown显示文本信息,介绍界面的功能
gr.Markdown(
"""
# OCR
聊天截图转文字
"""
)
# 行容器,水平排列元素
with gr.Row():
# 列容器,垂直排列元素
with gr.Column():
input_path = gr.ImageEditor(label="Input image") # 图片输入控件,用于上传图片文件
with gr.Row():
# 创建两个按钮控件,分别用于开始处理和停止处理
start_button = gr.Button("Process")
# stop_button = gr.Button("Stop")
with gr.Column():
output = gr.Text(label="Output") # 文本显示控件,展示处理后的输出txt
# output_image = gr.Image(label="Output image")
# 添加示例到GUI中,允许用户选择预定义的输入进行快速测试
gr.Examples(examples, label="Examples",
inputs=[input_path],
outputs=[output],
fn=start_processing, # 指定处理示例时调用的函数
cache_examples=False) # 禁用示例缓存
# 将按钮与处理函数绑定
start_button.click(start_processing, inputs=[input_path],
outputs=[output])
# stop_button.click(stop_processing)
demo.launch(share=True)