Captcha_Time / app.py
GIGAParviz's picture
Update app.py
7c9877c verified
import gradio as gr
import random
import os
from typing import Tuple , Dict
import time
import torch
from PIL import Image
import numpy as np
from torchvision import transforms
import torch.nn as nn
from torch.nn.functional import relu
import requests
from io import BytesIO
class GlobalAttention(nn.Module):
def __init__(self, num_channels):
super(GlobalAttention, self).__init__()
self.attention = nn.Sequential(
nn.Conv2d(num_channels, 1, kernel_size=1),
nn.BatchNorm2d(1),
nn.Sigmoid()
)
def forward(self, x):
attention_weights = self.attention(x)
return x * attention_weights
class ModelWithAttention(nn.Module):
def __init__(self, num_characters):
super(ModelWithAttention, self).__init__()
self.conv1 = nn.Conv2d(1, 64, kernel_size=(3, 3), padding='same')
self.bn1 = nn.BatchNorm2d(64)
self.pool = nn.MaxPool2d(kernel_size=(2, 2))
self.conv2 = nn.Conv2d(64, 128, kernel_size=(3, 3), padding='same')
self.bn2 = nn.BatchNorm2d(128)
self.conv3 = nn.Conv2d(128, 256, kernel_size=(3, 3), padding='same')
self.bn3 = nn.BatchNorm2d(256)
self.conv4 = nn.Conv2d(256, 512, kernel_size=(3, 3), padding='same')
self.bn4 = nn.BatchNorm2d(512)
self.pool2 = nn.MaxPool2d(kernel_size=(1, 2))
self.attention = GlobalAttention(512)
self.flatten = nn.Flatten()
self.fc1 = nn.Linear(16384, 512)
self.bn5 = nn.BatchNorm1d(512)
self.dropout1 = nn.Dropout(0.5)
self.fc2 = nn.Linear(512, 512)
self.bn6 = nn.BatchNorm1d(512)
self.dropout2 = nn.Dropout(0.75)
self.output = nn.Linear(512, num_characters)
self.sm = nn.Softmax()
def forward(self, x):
x = self.pool(relu(self.bn1(self.conv1(x))))
x = self.pool(relu(self.bn2(self.conv2(x))))
x = self.pool(relu(self.bn3(self.conv3(x))))
x = self.pool2(relu(self.bn4(self.conv4(x))))
# x = self.attention(x)
x = self.flatten(x)
x = relu(self.bn5(self.fc1(x)))
x = self.dropout1(x)
x = relu(self.bn6(self.fc2(x)))
x = self.dropout2(x)
x = self.output(x)
x = self.sm(x)
return x
device = "cpu"
path = "Captcha(Best).pt"
from torchvision import transforms
model = ModelWithAttention(10).to(device)
model.load_state_dict(torch.load(path , map_location=torch.device('cpu')))
transform = transforms.Compose([
transforms.ToTensor(),
transforms.Grayscale(),
transforms.Resize((64,64))
])
def predict(img= None , link:str = None) -> str:
sizes = [
[15,-5 , 15,27 ],
[15,-5 , 28,40 ],
[15,-5 , 41,53 ],
[15,-5 , 53,65 ],
[15,-5 , 66,78 ]]
answer = ""
if img != None:
imgss = np.array((img))
model.eval()
for size in (sizes):
img = imgss[size[0]:size[1], size[2]:size[3]]
img = Image.fromarray(img)
img = transform(img)
img = img.unsqueeze(0)
answer += str((torch.argmax(model(img.to(device)))).numpy())
return answer , imgss
if link != None:
response = requests.get(str(link))
if response.status_code == 200:
imgss = np.array(Image.open(BytesIO(response.content)))
# print("Image downloaded and converted to numpy array successfully!")
# print(imgss.shape)
model.eval()
for size in (sizes):
img = imgss[size[0]:size[1], size[2]:size[3]]
img = Image.fromarray(img)
img = transform(img)
img = img.unsqueeze(0)
answer += str((torch.argmax(model(img.to(device)))).cpu().numpy())
return answer , imgss
from pathlib import Path
path = "example"
list_path = []
list_paths = os.listdir(path)
for i in list_paths:
list_path.append(os.path.join(path , i))
# print(list_path)
import gradio as gr
title = "GIGA Captcha Solver"
description = "This Model can solve persian numbers Captcha easly"
article = "Created By A.M.Parviz <3"
# Create the Gradio demo
demo = gr.Interface(
fn=predict,
inputs=[gr.Image(type="pil"),
gr.Text()],
outputs=[
gr.Label(num_top_classes=10, label="Predictions"),
gr.Image()
],
examples = [[img_path, ""] for img_path in list_path],
title=title,
description=description,
article=article,
)
demo.launch()
# share=True)