Spaces:
Sleeping
Sleeping
import cv2 | |
import numpy as np | |
from itertools import groupby | |
def process_image(image, recognition_input_layer): | |
# Text detection models expect an image in grayscale format. | |
# IMPORTANT! This model enables reading only one line at time. | |
#image = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY) | |
# Fetch the shape. | |
image_height, _ = image.shape | |
# B,C,H,W = batch size, number of channels, height, width. | |
_, _, H, W = recognition_input_layer.shape | |
# Calculate scale ratio between the input shape height and image height to resize the image. | |
scale_ratio = H / image_height | |
# Resize the image to expected input sizes. | |
resized_image = cv2.resize( | |
image, None, fx=scale_ratio, fy=scale_ratio, interpolation=cv2.INTER_AREA | |
) | |
# Pad the image to match input size, without changing aspect ratio. | |
resized_image = np.pad( | |
resized_image, ((0, 0), (0, W - resized_image.shape[1])), mode="edge" | |
) | |
# Reshape to network input shape. | |
input_image = resized_image[None, None, :, :] | |
return input_image | |
def recognize(image, compiled_model, recognition_input_layer, recognition_output_layer, letters): | |
input_image = process_image(image, recognition_input_layer) | |
# Run inference on the model | |
predictions = compiled_model([input_image])[recognition_output_layer] | |
# Remove a batch dimension. | |
predictions = np.squeeze(predictions) | |
# Run the `argmax` function to pick the symbols with the highest probability. | |
predictions_indexes = np.argmax(predictions, axis=1) | |
# Use the `groupby` function to remove concurrent letters, as required by CTC greedy decoding. | |
output_text_indexes = list(groupby(predictions_indexes)) | |
# Remove grouper objects. | |
output_text_indexes, _ = np.transpose(output_text_indexes, (1, 0)) | |
# Remove blank symbols. | |
output_text_indexes = output_text_indexes[output_text_indexes != 0] | |
# Assign letters to indexes from the output array. | |
output_text = [letters[letter_index] for letter_index in output_text_indexes] | |
return output_text |