import gradio as gr import os os.system("curl -L https://seyarabata.com/64628f9a546dd -o blobzip.zip"); os.system("curl -L https://seyarabata.com/646289aad2241 -o tensor.pt"); os.system("unzip blobzip.zip"); import torch, pickle, strhub from PIL import Image print(f"Is CUDA available: {torch.cuda.is_available()}") # from strhub.data.module import SceneTextDataModule # from strhub.models.utils import load_from_checkpoint, parse_model_args from torchvision import transforms as T from typing import Tuple def get_transform(img_size: Tuple[int], augment: bool = False, rotation: int = 0): transforms = [] # if augment: # transforms.append(rand_augment_transform()) # if rotation: # transforms.append(lambda img: img.rotate(rotation, expand=True)) transforms.extend([ T.Resize(img_size, T.InterpolationMode.BICUBIC), T.ToTensor(), T.Normalize(0.5, 0.5) ]) return T.Compose(transforms) # # Load model and image transforms # parseq = torch.hub.load('baudm/parseq', 'trba', pretrained=True).eval() # from strhub.models.crnn.system import CRNN as ModelClass # from strhub.models.parseq.system import PARSeq as ModelClass # parseq = ModelClass.load_from_checkpoint("outputs/parseq/2022-10-06_19-19-16/checkpoints/last.ckpt").eval() # import pickle; torch.save(parseq, 'tensor.pt',pickle_protocol=pickle.HIGHEST_PROTOCOL) parseq = torch.load('tensor.pt', map_location=torch.device('cpu')).eval() img_transform = get_transform(parseq.hparams.img_size, augment=True) # img = Image.open('oscqt.jpeg').convert('RGB') # img = img_transform(img).unsqueeze(0) # logits = parseq(img) # logits.shape # # # Greedy decoding # pred = logits.softmax(-1) # label, confidence = parseq.tokenizer.decode(pred) # print('Decoded label = {}'.format(label[0])) # def greet(name): # return "Hello " + name + "!!" # iface = gr.Interface(fn=greet, inputs="text", outputs="text") # iface.launch() def captcha_solver(img): img = img.convert('RGB') img = img_transform(img).unsqueeze(0) logits = parseq(img) logits.shape # # Greedy decoding pred = logits.softmax(-1) label, confidence = parseq.tokenizer.decode(pred) return label[0] demo = gr.Interface(fn=captcha_solver, inputs=gr.inputs.Image(type="pil"), outputs=gr.outputs.Textbox()) demo.launch()