wangxinhe's picture
Downgrade Gradio to v3 in order to use API endpoint
cfc019d verified
raw
history blame contribute delete
No virus
1.38 kB
import gradio as gr
import tensorflow as tf
from huggingface_hub import from_pretrained_keras
from tensorflow import keras
num_to_char = keras.layers.StringLookup(
vocabulary=sorted(
set("abcdefghijklmnpqrstuvwxyz123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ".upper())
),
mask_token=None,
invert=True,
)
model = from_pretrained_keras("wangxinhe/luogu-captcha-recognition", compile=False)
# Get the prediction model by extracting layers till the output layer
prediction_model = keras.models.Model(
model.input[0], model.get_layer(name="dense2").output
)
prediction_model.summary()
def ocr(img):
# Convert to float32 in [0, 1] range
img = tf.image.convert_image_dtype(img, tf.float32)
# Transpose the image because we want the time
# dimension to correspond to the width of the image.
img = tf.transpose(img, perm=[1, 0, 2])
preds = prediction_model(tf.expand_dims(img, axis=0))
# Use greedy search. For complex tasks, you can use beam search
results = keras.backend.ctc_decode(
preds, input_length=[preds.shape[1]], greedy=True
)[0][0][:, :4]
return tf.strings.reduce_join(num_to_char(results[0])).numpy().decode("ascii")
iface = gr.Interface(
fn=ocr,
inputs=gr.Image(
shape=(90, 35),
source="upload",
label="CAPTCHA image",
),
outputs="textbox",
)
iface.launch()