Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| from akaocr import TextEngine, BoxEngine | |
| import cv2 | |
| import numpy as np | |
| from PIL import Image | |
| import time | |
| from custom_component import st_copy_to_clipboard | |
| # Initialize the OCR engines | |
| box_engine = BoxEngine() | |
| text_engine = TextEngine() | |
| def transform_image(image, box): | |
| # Get perspective transform image | |
| assert len(box) == 4, "Shape of points must be 4x2" | |
| img_crop_width = int( | |
| max( | |
| np.linalg.norm(box[0] - box[1]), | |
| np.linalg.norm(box[2] - box[3]))) | |
| img_crop_height = int( | |
| max( | |
| np.linalg.norm(box[0] - box[3]), | |
| np.linalg.norm(box[1] - box[2]))) | |
| pts_std = np.float32([[0, 0], | |
| [img_crop_width, 0], | |
| [img_crop_width, img_crop_height], | |
| [0, img_crop_height]]) | |
| box = np.array(box, dtype="float32") | |
| M = cv2.getPerspectiveTransform(box, pts_std) | |
| dst_img = cv2.warpPerspective( | |
| image, | |
| M, (img_crop_width, img_crop_height), | |
| borderMode=cv2.BORDER_REPLICATE, | |
| flags=cv2.INTER_CUBIC) | |
| img_height, img_width = dst_img.shape[0:2] | |
| if img_height/img_width >= 1.25: | |
| dst_img = np.rot90(dst_img, k=3) | |
| return dst_img | |
| def two_pts(bounding_box): | |
| # Extract the x and y coordinates separately | |
| return ( | |
| ( | |
| round(min([x[0] for x in bounding_box])), | |
| round(min([x[1] for x in bounding_box])) | |
| ), | |
| ( | |
| round(max([x[0] for x in bounding_box])), | |
| round(max([x[1] for x in bounding_box])) | |
| ) | |
| ) | |
| def main(): | |
| st.set_page_config( | |
| page_title="Quick OCR Copy", | |
| page_icon=":flag-vn:", | |
| layout="wide" | |
| ) | |
| uploaded_file = st.file_uploader( | |
| "Choose an image...", type=["jpg", "jpeg", "png"]) | |
| if uploaded_file is not None: | |
| # Convert the uploaded file to an OpenCV image | |
| file_bytes = np.asarray( | |
| bytearray(uploaded_file.read()), dtype=np.uint8) | |
| org_image = cv2.imdecode(file_bytes, 1) | |
| images = [] | |
| _time_start = time.perf_counter() | |
| boxes = box_engine(org_image) | |
| st.write( | |
| f"Text detection took {time.perf_counter() - _time_start:.2f} seconds.") | |
| # crop and transform images for recognition | |
| for box in boxes[::-1]: | |
| # org_image = cv2.polylines(org_image, [box.astype( | |
| # np.int32)], isClosed=True, color=(0, 255, 0), thickness=2) | |
| image = transform_image(org_image, box) | |
| images.append(image) | |
| # Get the texts from the boxes | |
| _time_start = time.perf_counter() | |
| texts = text_engine(images) | |
| st.write( | |
| f"Text recognition took {time.perf_counter() - _time_start:.2f} seconds.") | |
| # Convert back to PIL Image for displaying | |
| output_image = Image.fromarray( | |
| cv2.cvtColor(org_image, cv2.COLOR_BGR2RGB)) | |
| # button_locations = [(50, 10), (100, 100), (200, 300)] | |
| # text_list = ["Hello", "Streamlit", "World"] | |
| button_coords = [two_pts(box) for box in boxes[::-1]] | |
| text_list = [x[0] for x in texts] | |
| # Call the custom component | |
| st_copy_to_clipboard( | |
| image=output_image, | |
| button_coords=button_coords, | |
| text_list=text_list, | |
| before_copy_label="", | |
| after_copy_label="" | |
| ) | |
| if __name__ == '__main__': | |
| main() | |