Spaces:

GIGAParviz
/

Captcha_Time

Sleeping

File size: 4,595 Bytes

import gradio as gr
import random
import os
from typing import Tuple , Dict
import time 
import torch
from PIL import Image
import numpy as np
from torchvision import transforms  
import torch.nn as nn
from torch.nn.functional import relu
import requests
from io import BytesIO


class GlobalAttention(nn.Module):
    def __init__(self, num_channels):
        super(GlobalAttention, self).__init__()
        self.attention = nn.Sequential(
            nn.Conv2d(num_channels, 1, kernel_size=1),
            nn.BatchNorm2d(1),
            nn.Sigmoid()
        )

    def forward(self, x):
        attention_weights = self.attention(x)
        return x * attention_weights


class ModelWithAttention(nn.Module):
    def __init__(self, num_characters):
        super(ModelWithAttention, self).__init__()
        self.conv1 = nn.Conv2d(1, 64, kernel_size=(3, 3), padding='same')
        self.bn1 = nn.BatchNorm2d(64)
        self.pool = nn.MaxPool2d(kernel_size=(2, 2))

        self.conv2 = nn.Conv2d(64, 128, kernel_size=(3, 3), padding='same')
        self.bn2 = nn.BatchNorm2d(128)

        self.conv3 = nn.Conv2d(128, 256, kernel_size=(3, 3), padding='same')
        self.bn3 = nn.BatchNorm2d(256)

        self.conv4 = nn.Conv2d(256, 512, kernel_size=(3, 3), padding='same')
        self.bn4 = nn.BatchNorm2d(512)
        self.pool2 = nn.MaxPool2d(kernel_size=(1, 2))

        self.attention = GlobalAttention(512)  
        
        self.flatten = nn.Flatten()
        self.fc1 = nn.Linear(16384, 512)  
        self.bn5 = nn.BatchNorm1d(512)
        self.dropout1 = nn.Dropout(0.5)

        self.fc2 = nn.Linear(512, 512)
        self.bn6 = nn.BatchNorm1d(512)
        self.dropout2 = nn.Dropout(0.75)

        self.output = nn.Linear(512, num_characters)
        
        self.sm = nn.Softmax()
    def forward(self, x):
        x = self.pool(relu(self.bn1(self.conv1(x))))
        x = self.pool(relu(self.bn2(self.conv2(x))))
        x = self.pool(relu(self.bn3(self.conv3(x))))
        x = self.pool2(relu(self.bn4(self.conv4(x))))

        # x = self.attention(x)

        x = self.flatten(x)
        x = relu(self.bn5(self.fc1(x)))
        x = self.dropout1(x)

        x = relu(self.bn6(self.fc2(x)))
        x = self.dropout2(x)

        x = self.output(x)
        x = self.sm(x)
        return x



device = "cpu"
path = "Captcha(Best).pt"


from torchvision import transforms
model = ModelWithAttention(10).to(device)
model.load_state_dict(torch.load(path , map_location=torch.device('cpu')))


transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Grayscale(),
    transforms.Resize((64,64))
])


def predict(img= None , link:str = None) -> str:
  
    sizes = [
    [15,-5 , 15,27 ],
    [15,-5 , 28,40 ],
    [15,-5 , 41,53 ],
    [15,-5 , 53,65 ],
    [15,-5 , 66,78 ]]
    
    answer = ""
    
    if img != None:
        imgss = np.array((img))
        model.eval()
        for size in (sizes):
            img = imgss[size[0]:size[1], size[2]:size[3]]
            img = Image.fromarray(img)
            img = transform(img)
            img = img.unsqueeze(0)
            answer += str((torch.argmax(model(img.to(device)))).numpy())

        return answer , imgss

    if link != None:

        response = requests.get(str(link))

        if response.status_code == 200:
            imgss = np.array(Image.open(BytesIO(response.content)))
            # print("Image downloaded and converted to numpy array successfully!")

            # print(imgss.shape)
            
            model.eval()
            for size in (sizes):
                img = imgss[size[0]:size[1], size[2]:size[3]]
                img = Image.fromarray(img)
                img = transform(img)
                img = img.unsqueeze(0)
                answer += str((torch.argmax(model(img.to(device)))).cpu().numpy())
            return answer , imgss



from pathlib import Path

path = "example"

list_path = []
list_paths = os.listdir(path)
for i in list_paths:
  list_path.append(os.path.join(path , i))
# print(list_path)



import gradio as gr

title = "GIGA Captcha Solver"
description = "This Model can solve persian numbers Captcha easly"
article = "Created By A.M.Parviz <3"

# Create the Gradio demo
demo = gr.Interface(
    fn=predict,
    inputs=[gr.Image(type="pil"),
            gr.Text()],
    outputs=[
        gr.Label(num_top_classes=10, label="Predictions"),
        gr.Image()
    ],
    examples = [[img_path, ""] for img_path in list_path],
    title=title,
    description=description,
    article=article,
)
demo.launch()
            # share=True)