import gradio as gr import tensorflow as tf from huggingface_hub import from_pretrained_keras from tensorflow import keras num_to_char = keras.layers.StringLookup( vocabulary=sorted( set("abcdefghijklmnpqrstuvwxyz123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ".upper()) ), mask_token=None, invert=True, ) model = from_pretrained_keras("wangxinhe/luogu-captcha-recognition", compile=False) # Get the prediction model by extracting layers till the output layer prediction_model = keras.models.Model( model.input[0], model.get_layer(name="dense2").output ) prediction_model.summary() def ocr(img): # Convert to float32 in [0, 1] range img = tf.image.convert_image_dtype(img, tf.float32) # Transpose the image because we want the time # dimension to correspond to the width of the image. img = tf.transpose(img, perm=[1, 0, 2]) preds = prediction_model(tf.expand_dims(img, axis=0)) # Use greedy search. For complex tasks, you can use beam search results = keras.backend.ctc_decode( preds, input_length=[preds.shape[1]], greedy=True )[0][0][:, :4] return tf.strings.reduce_join(num_to_char(results[0])).numpy().decode("ascii") iface = gr.Interface( fn=ocr, inputs=gr.Image( shape=(90, 35), source="upload", label="CAPTCHA image", ), outputs="textbox", ) iface.launch()