Spaces:

Suevar
/

OCR

Runtime error

App Files Files Community

OCR / app.py

Suevar

Update app.py

4215c80 verified 6 months ago

raw

history blame contribute delete

No virus

11.5 kB

	# coding: utf-8
	import cv2
	import numpy as np
	import tempfile
	from pathlib import Path
	import gradio as gr
	import math
	from paddleocr import PaddleOCR, draw_ocr
	import re
	from PIL import Image

	def image_split(image):
	# Read image from imageBytes
	# image = cv2.imdecode(np.frombuffer(imageBytes, np.uint8), cv2.IMREAD_COLOR)
	(h, w) = image.shape[:2]

	sub_images = []
	# If height is greater than or equal to 2000
	if h >= 2000:
	# Split image into multiple images
	num = int(math.ceil(h / 2000))
	height = int(h / num)

	for i in range(num):
	# Calculate the dimensions of the sub-image
	startY = i * height
	endY = (i + 1) * height
	width = w
	dim = (width, endY - startY)
	# Double check that the dimensions are within bounds
	if dim[0] <= w and dim[1] <= h:
	# Resize the sub-image
	sub_image = cv2.resize(image[startY:endY, 0:w], dim, interpolation=cv2.INTER_AREA)
	# Save the sub-image
	# cv2.imwrite('test_{}.png'.format(i),sub_image)
	# Add the sub-image to the list
	sub_images.append(sub_image)
	else:
	sub_images.append(image)
	return sub_images

	def image_resize(image):
	#image = cv2.imdecode(np.frombuffer(imageBytes, np.uint8), cv2.IMREAD_COLOR)
	(h,w) = image.shape[:2]
	WIDTH = 541
	# Calculate the ratio of height and width
	r = WIDTH / float(w)
	# Create a dimension with the calculated width and height
	dim = (WIDTH, int(h * r))
	# Resize the image
	image = cv2.resize(image, dim, interpolation=cv2.INTER_AREA)

	return image

	def msg_type(line_list, image, ori_width):
	resize_ratio = 541 / ori_width
	if len(line_list) == 2:
	# inner_lines = line_list[0]
	words = line_list[1][0]
	x = int(line_list[0][0][0] * resize_ratio)
	y_min = int(line_list[0][0][1] * resize_ratio)
	y_max = int(line_list[0][3][1] * resize_ratio)
	y_mid = y_min + int((y_max-y_min)/2) # 计算首个文字的正中心起始位置，确保后续计算对话框背景颜色更准确
	color = image[y_mid][x]
	# print("1111")

	return words, (x, y_mid), (y_min, y_max), color
	else:
	return None, (None, None), None

	def date_delete(words, line_text, line, dialog, res):

	data_pattern1 = re.compile(r'^\d{1,2}:\d{1,2}$') # 剔除24：24的时间戳
	if bool(data_pattern1.match(words)):
	if line == res[-1]:
	dialog.append(line_text)
	return True

	data_pattern2 = re.compile(r'(凌晨\|上午\|下午\|晚上)\d{1,2}:\d{1,2}$') # 剔除凌晨\|上午\|下午\|晚上24：24的时间戳
	if bool(data_pattern2.match(words)):
	if line == res[-1]:
	dialog.append(line_text)
	return True

	data_pattern3 = re.compile(r'^(\d{4})年(\d{1,2})月(\d{1,2})日\d{1,2}:\d{1,2}$') # 剔除2024年4月12日24：24的时间戳
	if bool(data_pattern3.match(words)):
	if line == res[-1]:
	dialog.append(line_text)
	return True

	data_pattern4 = re.compile(r'^(\d{4})年(\d{1,2})月(\d{1,2})日(凌晨\|上午\|下午\|晚上)\d{1,2}:\d{1,2}$') # 剔除2024年4月12日凌晨\|上午\|下午\|晚上24：24的时间戳
	if bool(data_pattern4.match(words)):
	if line == res[-1]:
	dialog.append(line_text)
	return True

	data_pattern5 = re.compile(r'^昨天\d{1,2}:\d{1,2}$') # 剔除昨天24：24的时间戳
	if bool(data_pattern5.match(words)):
	if line == res[-1]:
	dialog.append(line_text)
	return True

	data_pattern6 = re.compile(r'昨天(凌晨\|上午\|下午\|晚上)\d{1,2}:\d{1,2}$') # 剔除昨天凌晨\|上午\|下午\|晚上24：24的时间戳
	if bool(data_pattern6.match(words)):
	if line == res[-1]:
	dialog.append(line_text)
	return True

	data_pattern7 = re.compile(r'^(\d{1,2})月(\d{1,2})日\d{1,2}:\d{1,2}$') # 剔除4月12日24：24的时间戳
	if bool(data_pattern7.match(words)):
	if line == res[-1]:
	dialog.append(line_text)
	return True

	data_pattern8 = re.compile(r'^(\d{1,2})月(\d{1,2})日(凌晨\|上午\|下午\|晚上)\d{1,2}:\d{1,2}$') # 剔除4月12日(凌晨\|上午\|下午\|晚上)24：24的时间戳
	if bool(data_pattern8.match(words)):
	if line == res[-1]:
	dialog.append(line_text)
	return True

	data_pattern9 = re.compile(r'(星期一\|星期二\|星期三\|星期四\|星期五\|星期六\|星期日)\d{1,2}:\d{1,2}$') # 剔除星期一24：24的时间戳
	if bool(data_pattern9.match(words)):
	if line == res[-1]:
	dialog.append(line_text)
	return True

	data_pattern10 = re.compile(r'(星期一\|星期二\|星期三\|星期四\|星期五\|星期六\|星期日)(凌晨\|上午\|下午\|晚上)\d{1,2}:\d{1,2}$') # 剔除星期一(凌晨\|上午\|下午\|晚上)24：24的时间戳
	if bool(data_pattern10.match(words)):
	if line == res[-1]:
	dialog.append(line_text)
	return True

	def read_txt_file(file_path):
	with open(file_path, 'r', encoding='utf-8') as file:
	content = file.read()
	return content

	def start_processing(input_path, output_path):

	output_content = OCR_text(input_path)
	# output_content = read_txt_file(output_text_path)

	return output_content
	# , imshow

	def OCR_text(img):
	ocr = PaddleOCR(use_angle_cls=True, lang="ch", ocr_version="PP-OCRv4")

	img_path = img["background"]
	# with open(img_path, "rb") as f:
	# imageBytes = f.read()
	img_path= img_path[:,:,:3]
	image_list = image_split(img_path)

	result = ocr.ocr(img_path, cls=True)
	dialog = []
	line_text = ''
	filename = ''

	result0 = result[0]
	image_RGB = Image.fromarray(img_path).convert('RGB')
	# image_RGB = Image.open(img_path).convert('RGB')
	boxes = [line[0] for line in result0]
	txts = [line[1][0] for line in result0]
	scores = [line[1][1] for line in result0]
	im_show = draw_ocr(image_RGB, boxes, txts, scores, font_path='./ppocr_img/fonts/chinese_cht.ttf')
	imshow = Image.fromarray(im_show)
	# imshow.save('result.jpg')

	# output_txt_path = Path(output_path) / "result.txt"

	for image in image_list:
	ori_width = image.shape[1]

	image = image_resize(image)
	image = image[:,:,:3]
	for idx in range(len(result)):
	res = result[idx]
	for line in res:
	# print(line)
	words, (x, y_mid), (y_min, y_max), color = msg_type(line, image, ori_width)

	if line[1][1] < 0.75:
	if line == res[-1]:
	dialog.append(line_text)
	continue

	if words in ['...', '我通过了你的朋友验证请求，现在', '我们可以开始聊天了', '以上是打招呼的内容', '三',
	'+', '你撤回了一条消息', 'HD', ':', '按住说话', '以下是新消息', '川']:
	if line == res[-1]:
	dialog.append(line_text)
	continue

	if date_delete(words, line_text, line, dialog, res):
	continue

	data_pattern = re.compile(r'^\d{1,3}"$') # 剔除语音
	if bool(data_pattern.match(words)):
	if line == res[-1]:
	dialog.append(line_text)
	continue

	data_pattern = re.compile(r"^\d{1,3}'$") # 剔除语音
	if bool(data_pattern.match(words)):
	if line == res[-1]:
	dialog.append(line_text)
	continue

	### 使用颜色判断（待进一步测试）
	if color[0] > 150 and color[1] > 150 and color[2] > 150: # 背景颜色为白色，则判断为speaker1
	if line_text == '':
	line_text = 'Talker1：' + words
	y_max_last = y_max
	elif (y_min - y_max_last) < 1 * (y_max - y_min):
	line_text = line_text + words
	y_max_last = y_max
	else:
	dialog.append(line_text)
	line_text = 'Talker1：' + words
	y_max_last = y_max
	else: # 否则判定为speaker2
	if line_text == '':
	line_text = 'Talker2:' + words
	y_max_last = y_max
	elif (y_min - y_max_last) < 1 * (y_max - y_min):
	line_text = line_text + words
	y_max_last = y_max
	else:
	dialog.append(line_text)
	line_text = 'Talker2:' + words
	y_max_last = y_max

	if line == res[-1]:
	dialog.append(line_text)

	### dialog分行
	output_content = ''
	for dia in dialog:
	output_content += dia + '\n'

	# ### 保存识别到的结果为txt
	# if filename == '':
	# with open(output_txt_path, 'a+', encoding='utf-8') as f:
	# for dia in dialog:
	# f.write(dia + '\n')
	# else:
	# with open(filename.replace('/', '.') + '.txt', 'a+', encoding='utf-8') as f:
	# for dia in dialog:
	# f.write(dia + '\n')

	# print(f'output dir is: {output_txt_path}')
	return output_content
	# , imshow


	if __name__ == '__main__':

	examples = [['./image/3.png',]]
	with gr.Blocks() as demo:
	with gr.Tab("Module-聊天截图->文字"):
	# 使用Markdown显示文本信息，介绍界面的功能
	gr.Markdown(
	"""
	# OCR
	聊天截图转文字
	"""
	)
	# 行容器，水平排列元素
	with gr.Row():
	# 列容器，垂直排列元素
	with gr.Column():
	input_path = gr.ImageEditor(label="Input image") # 图片输入控件，用于上传图片文件

	with gr.Row():
	# 创建两个按钮控件，分别用于开始处理和停止处理
	start_button = gr.Button("Process")
	# stop_button = gr.Button("Stop")
	with gr.Column():
	output = gr.Text(label="Output") # 文本显示控件，展示处理后的输出txt
	# output_image = gr.Image(label="Output image")

	# 添加示例到GUI中，允许用户选择预定义的输入进行快速测试
	gr.Examples(examples, label="Examples",
	inputs=[input_path],
	outputs=[output],
	fn=start_processing, # 指定处理示例时调用的函数
	cache_examples=False) # 禁用示例缓存

	# 将按钮与处理函数绑定
	start_button.click(start_processing, inputs=[input_path],
	outputs=[output])
	# stop_button.click(stop_processing)

	demo.launch(share=True)