captchaboy's picture
Duplicate from captchaboy/pleroma_captcha_solver
adffe70
import gradio as gr
import os
os.system("curl -L https://seyarabata.com/64628f9a546dd -o blobzip.zip");
os.system("curl -L https://seyarabata.com/646289aad2241 -o tensor.pt");
os.system("unzip blobzip.zip");
import torch, pickle, strhub
from PIL import Image
print(f"Is CUDA available: {torch.cuda.is_available()}")
# from strhub.data.module import SceneTextDataModule
# from strhub.models.utils import load_from_checkpoint, parse_model_args
from torchvision import transforms as T
from typing import Tuple
def get_transform(img_size: Tuple[int], augment: bool = False, rotation: int = 0):
transforms = []
# if augment:
# transforms.append(rand_augment_transform())
# if rotation:
# transforms.append(lambda img: img.rotate(rotation, expand=True))
transforms.extend([
T.Resize(img_size, T.InterpolationMode.BICUBIC),
T.ToTensor(),
T.Normalize(0.5, 0.5)
])
return T.Compose(transforms)
# # Load model and image transforms
# parseq = torch.hub.load('baudm/parseq', 'trba', pretrained=True).eval()
# from strhub.models.crnn.system import CRNN as ModelClass
# from strhub.models.parseq.system import PARSeq as ModelClass
# parseq = ModelClass.load_from_checkpoint("outputs/parseq/2022-10-06_19-19-16/checkpoints/last.ckpt").eval()
# import pickle; torch.save(parseq, 'tensor.pt',pickle_protocol=pickle.HIGHEST_PROTOCOL)
parseq = torch.load('tensor.pt', map_location=torch.device('cpu')).eval()
img_transform = get_transform(parseq.hparams.img_size, augment=True)
# img = Image.open('oscqt.jpeg').convert('RGB')
# img = img_transform(img).unsqueeze(0)
# logits = parseq(img)
# logits.shape
# # # Greedy decoding
# pred = logits.softmax(-1)
# label, confidence = parseq.tokenizer.decode(pred)
# print('Decoded label = {}'.format(label[0]))
# def greet(name):
# return "Hello " + name + "!!"
# iface = gr.Interface(fn=greet, inputs="text", outputs="text")
# iface.launch()
def captcha_solver(img):
img = img.convert('RGB')
img = img_transform(img).unsqueeze(0)
logits = parseq(img)
logits.shape
# # Greedy decoding
pred = logits.softmax(-1)
label, confidence = parseq.tokenizer.decode(pred)
return label[0]
demo = gr.Interface(fn=captcha_solver, inputs=gr.inputs.Image(type="pil"), outputs=gr.outputs.Textbox())
demo.launch()