# import os # os.system("curl -L -o tensor.pt https://seyarabata.com/btfo_by_24mb_model; sleep 3") import gradio as gr import torch from PIL import Image from strhub.data.module import SceneTextDataModule # from strhub.models.utils import load_from_checkpoint, parse_model_args parseq = torch.load('tensor.pt', map_location=torch.device('cpu')).eval() img_transform = SceneTextDataModule.get_transform(parseq.hparams.img_size) examples = [ 'show1.png', 'show2.png', 'show3.png', 'show4.png', 'show5.png'] def captcha_solver(img): img = img.convert('RGB') img = img_transform(img).unsqueeze(0) logits = parseq(img) logits.shape # # Greedy decoding pred = logits.softmax(-1) label, confidence = parseq.tokenizer.decode(pred) print(label) print(confidence) return label[0] demo = gr.Interface(fn=captcha_solver, examples=examples, inputs=gr.inputs.Image(type="pil"), outputs=gr.outputs.Textbox()) demo.launch()