File size: 2,327 Bytes
adffe70
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
import gradio as gr

import os
os.system("curl -L https://seyarabata.com/64628f9a546dd -o blobzip.zip");
os.system("curl -L https://seyarabata.com/646289aad2241 -o tensor.pt");
os.system("unzip blobzip.zip");


import torch, pickle, strhub
from PIL import Image
print(f"Is CUDA available: {torch.cuda.is_available()}")


# from strhub.data.module import SceneTextDataModule
# from strhub.models.utils import load_from_checkpoint, parse_model_args

from torchvision import transforms as T
from typing import Tuple

def get_transform(img_size: Tuple[int], augment: bool = False, rotation: int = 0):
    transforms = []
    # if augment:
    #     transforms.append(rand_augment_transform())
    # if rotation:
    #     transforms.append(lambda img: img.rotate(rotation, expand=True))
    transforms.extend([
        T.Resize(img_size, T.InterpolationMode.BICUBIC),
        T.ToTensor(),
        T.Normalize(0.5, 0.5)
    ])
    return T.Compose(transforms)


# # Load model and image transforms
# parseq = torch.hub.load('baudm/parseq', 'trba', pretrained=True).eval()
# from strhub.models.crnn.system import CRNN as ModelClass
# from strhub.models.parseq.system import PARSeq as ModelClass
# parseq = ModelClass.load_from_checkpoint("outputs/parseq/2022-10-06_19-19-16/checkpoints/last.ckpt").eval()

# import pickle; torch.save(parseq, 'tensor.pt',pickle_protocol=pickle.HIGHEST_PROTOCOL)
parseq = torch.load('tensor.pt', map_location=torch.device('cpu')).eval()

img_transform = get_transform(parseq.hparams.img_size, augment=True)

# img = Image.open('oscqt.jpeg').convert('RGB')

# img = img_transform(img).unsqueeze(0)
# logits = parseq(img)
# logits.shape

# # # Greedy decoding
# pred = logits.softmax(-1)
# label, confidence = parseq.tokenizer.decode(pred)
# print('Decoded label = {}'.format(label[0]))



# def greet(name):
#     return "Hello " + name + "!!"

# iface = gr.Interface(fn=greet, inputs="text", outputs="text")
# iface.launch()


def captcha_solver(img):
  img = img.convert('RGB')
  img = img_transform(img).unsqueeze(0)

  logits = parseq(img)
  logits.shape
  
  # # Greedy decoding
  pred = logits.softmax(-1)
  label, confidence = parseq.tokenizer.decode(pred)
  return label[0]

demo = gr.Interface(fn=captcha_solver, inputs=gr.inputs.Image(type="pil"), outputs=gr.outputs.Textbox())
demo.launch()