Spaces:

jayllfpt
/

ocr-copy

Sleeping

App Files Files Community

ocr-copy / app.py

jayllfpt

clean packages

fab08f3 over 1 year ago

raw

history blame contribute delete

3.46 kB

	import streamlit as st
	from akaocr import TextEngine, BoxEngine
	import cv2
	import numpy as np
	from PIL import Image
	import time

	from custom_component import st_copy_to_clipboard


	# Initialize the OCR engines
	box_engine = BoxEngine()
	text_engine = TextEngine()


	def transform_image(image, box):
	# Get perspective transform image
	assert len(box) == 4, "Shape of points must be 4x2"
	img_crop_width = int(
	max(
	np.linalg.norm(box[0] - box[1]),
	np.linalg.norm(box[2] - box[3])))
	img_crop_height = int(
	max(
	np.linalg.norm(box[0] - box[3]),
	np.linalg.norm(box[1] - box[2])))
	pts_std = np.float32([[0, 0],
	[img_crop_width, 0],
	[img_crop_width, img_crop_height],
	[0, img_crop_height]])
	box = np.array(box, dtype="float32")
	M = cv2.getPerspectiveTransform(box, pts_std)
	dst_img = cv2.warpPerspective(
	image,
	M, (img_crop_width, img_crop_height),
	borderMode=cv2.BORDER_REPLICATE,
	flags=cv2.INTER_CUBIC)

	img_height, img_width = dst_img.shape[0:2]
	if img_height/img_width >= 1.25:
	dst_img = np.rot90(dst_img, k=3)

	return dst_img


	def two_pts(bounding_box):
	# Extract the x and y coordinates separately
	return (
	(
	round(min([x[0] for x in bounding_box])),
	round(min([x[1] for x in bounding_box]))
	),
	(
	round(max([x[0] for x in bounding_box])),
	round(max([x[1] for x in bounding_box]))
	)
	)


	def main():
	st.set_page_config(
	page_title="Quick OCR Copy",
	page_icon=":flag-vn:",
	layout="wide"
	)

	uploaded_file = st.file_uploader(
	"Choose an image...", type=["jpg", "jpeg", "png"])

	if uploaded_file is not None:
	# Convert the uploaded file to an OpenCV image
	file_bytes = np.asarray(
	bytearray(uploaded_file.read()), dtype=np.uint8)
	org_image = cv2.imdecode(file_bytes, 1)

	images = []
	_time_start = time.perf_counter()
	boxes = box_engine(org_image)
	st.write(
	f"Text detection took {time.perf_counter() - _time_start:.2f} seconds.")

	# crop and transform images for recognition
	for box in boxes[::-1]:
	# org_image = cv2.polylines(org_image, [box.astype(
	# np.int32)], isClosed=True, color=(0, 255, 0), thickness=2)
	image = transform_image(org_image, box)
	images.append(image)

	# Get the texts from the boxes
	_time_start = time.perf_counter()
	texts = text_engine(images)
	st.write(
	f"Text recognition took {time.perf_counter() - _time_start:.2f} seconds.")

	# Convert back to PIL Image for displaying
	output_image = Image.fromarray(
	cv2.cvtColor(org_image, cv2.COLOR_BGR2RGB))

	# button_locations = [(50, 10), (100, 100), (200, 300)]
	# text_list = ["Hello", "Streamlit", "World"]
	button_coords = [two_pts(box) for box in boxes[::-1]]
	text_list = [x[0] for x in texts]

	# Call the custom component
	st_copy_to_clipboard(
	image=output_image,
	button_coords=button_coords,
	text_list=text_list,
	before_copy_label="",
	after_copy_label=""
	)


	if __name__ == '__main__':
	main()