import gradio as gr import random import os from typing import Tuple , Dict import time import torch from PIL import Image import numpy as np from torchvision import transforms import torch.nn as nn from torch.nn.functional import relu import requests from io import BytesIO class GlobalAttention(nn.Module): def __init__(self, num_channels): super(GlobalAttention, self).__init__() self.attention = nn.Sequential( nn.Conv2d(num_channels, 1, kernel_size=1), nn.BatchNorm2d(1), nn.Sigmoid() ) def forward(self, x): attention_weights = self.attention(x) return x * attention_weights class ModelWithAttention(nn.Module): def __init__(self, num_characters): super(ModelWithAttention, self).__init__() self.conv1 = nn.Conv2d(1, 64, kernel_size=(3, 3), padding='same') self.bn1 = nn.BatchNorm2d(64) self.pool = nn.MaxPool2d(kernel_size=(2, 2)) self.conv2 = nn.Conv2d(64, 128, kernel_size=(3, 3), padding='same') self.bn2 = nn.BatchNorm2d(128) self.conv3 = nn.Conv2d(128, 256, kernel_size=(3, 3), padding='same') self.bn3 = nn.BatchNorm2d(256) self.conv4 = nn.Conv2d(256, 512, kernel_size=(3, 3), padding='same') self.bn4 = nn.BatchNorm2d(512) self.pool2 = nn.MaxPool2d(kernel_size=(1, 2)) self.attention = GlobalAttention(512) self.flatten = nn.Flatten() self.fc1 = nn.Linear(16384, 512) self.bn5 = nn.BatchNorm1d(512) self.dropout1 = nn.Dropout(0.5) self.fc2 = nn.Linear(512, 512) self.bn6 = nn.BatchNorm1d(512) self.dropout2 = nn.Dropout(0.75) self.output = nn.Linear(512, num_characters) self.sm = nn.Softmax() def forward(self, x): x = self.pool(relu(self.bn1(self.conv1(x)))) x = self.pool(relu(self.bn2(self.conv2(x)))) x = self.pool(relu(self.bn3(self.conv3(x)))) x = self.pool2(relu(self.bn4(self.conv4(x)))) # x = self.attention(x) x = self.flatten(x) x = relu(self.bn5(self.fc1(x))) x = self.dropout1(x) x = relu(self.bn6(self.fc2(x))) x = self.dropout2(x) x = self.output(x) x = self.sm(x) return x device = "cpu" path = "Captcha(Best).pt" from torchvision import transforms model = ModelWithAttention(10).to(device) model.load_state_dict(torch.load(path , map_location=torch.device('cpu'))) transform = transforms.Compose([ transforms.ToTensor(), transforms.Grayscale(), transforms.Resize((64,64)) ]) def predict(img= None , link:str = None) -> str: sizes = [ [15,-5 , 15,27 ], [15,-5 , 28,40 ], [15,-5 , 41,53 ], [15,-5 , 53,65 ], [15,-5 , 66,78 ]] answer = "" if img != None: imgss = np.array((img)) model.eval() for size in (sizes): img = imgss[size[0]:size[1], size[2]:size[3]] img = Image.fromarray(img) img = transform(img) img = img.unsqueeze(0) answer += str((torch.argmax(model(img.to(device)))).numpy()) return answer , imgss if link != None: response = requests.get(str(link)) if response.status_code == 200: imgss = np.array(Image.open(BytesIO(response.content))) # print("Image downloaded and converted to numpy array successfully!") # print(imgss.shape) model.eval() for size in (sizes): img = imgss[size[0]:size[1], size[2]:size[3]] img = Image.fromarray(img) img = transform(img) img = img.unsqueeze(0) answer += str((torch.argmax(model(img.to(device)))).cpu().numpy()) return answer , imgss from pathlib import Path path = "example" list_path = [] list_paths = os.listdir(path) for i in list_paths: list_path.append(os.path.join(path , i)) # print(list_path) import gradio as gr title = "GIGA Captcha Solver" description = "This Model can solve persian numbers Captcha easly" article = "Created By A.M.Parviz <3" # Create the Gradio demo demo = gr.Interface( fn=predict, inputs=[gr.Image(type="pil"), gr.Text()], outputs=[ gr.Label(num_top_classes=10, label="Predictions"), gr.Image() ], examples = [[img_path, ""] for img_path in list_path], title=title, description=description, article=article, ) demo.launch() # share=True)