|
--- |
|
license: apache-2.0 |
|
language: |
|
- en |
|
metrics: |
|
- accuracy |
|
library_name: adapter-transformers |
|
pipeline_tag: image-to-text |
|
--- |
|
# Model Card for Pixelated Captcha Digit Detection |
|
|
|
## Model Details |
|
|
|
- **License:** Apache-2.0 |
|
- **Developed by:** Saidi Souhaieb |
|
- **Finetuned from model:** YOLOv8 |
|
|
|
## Uses |
|
|
|
This model is designed to detect pixelated captcha digits by showing bounding boxes and extracting the coordinates of the detections. |
|
|
|
## How to Get Started with the Model |
|
|
|
```python |
|
import torch |
|
import torch.nn as nn |
|
import torch.optim as optim |
|
from torch.utils.data import DataLoader |
|
import torchvision.transforms as transforms |
|
from torchvision.datasets import ImageFolder |
|
from tqdm import tqdm |
|
from PIL import Image |
|
import torch.nn.functional as F |
|
import os |
|
|
|
class CNN(nn.Module): |
|
def __init__(self): |
|
super(CNN, self).__init__() |
|
self.conv1 = nn.Conv2d(3, 16, kernel_size=3, stride=1, padding=1) |
|
self.conv2 = nn.Conv2d(16, 32, kernel_size=3, stride=1, padding=1) |
|
self.conv3 = nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1) |
|
self.pool = nn.MaxPool2d(2, 2) |
|
self.fc1 = nn.Linear(64 * 4 * 4, 500) |
|
self.fc2 = nn.Linear(500, 10) # 10 classes for example |
|
|
|
def forward(self, x): |
|
x = self.pool(F.relu(self.conv1(x))) |
|
x = self.pool(F.relu(self.conv2(x))) |
|
x = self.pool(F.relu(self.conv3(x))) |
|
x = x.view(-1, 64 * 4 * 4) |
|
x = F.relu(self.fc1(x)) |
|
x = self.fc2(x) |
|
return x |
|
|
|
transform = transforms.Compose([ |
|
transforms.Resize((32, 32)), # Adjust the size accordingly |
|
transforms.ToTensor(), |
|
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) |
|
]) |
|
|
|
transform = transforms.Compose([ |
|
transforms.Resize((32, 32)), # Adjust the size accordingly |
|
transforms.ToTensor(), |
|
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) |
|
]) |
|
|
|
model = CNN() |
|
model.load_state_dict(torch.load('models/99acc_model.pth')) |
|
|
|
def predict_number(folder_path): |
|
""" |
|
Predict the numbers in the images in the folder |
|
""" |
|
predict_numbers = [] |
|
for file in os.listdir(folder_path): |
|
input_image = Image.open(f"temp/{file}").convert('RGB') |
|
# Load and preprocess the input image |
|
input_tensor = transform(input_image) |
|
input_batch = input_tensor.unsqueeze(0) # Add a batch dimension |
|
|
|
# Perform inference |
|
with torch.no_grad(): |
|
output = model(input_batch) |
|
|
|
# Get the predicted class label |
|
_, predicted = torch.max(output, 1) |
|
|
|
# Print the predicted class label |
|
print("Predicted class label:", predicted.item(), "file", file) |
|
predict_numbers.append(predicted.item()) |
|
|
|
return predict_numbers |
|
|
|
``` |
|
|
|
## Training Details |
|
|
|
### Training Data |
|
|
|
Pixel Digit Captcha Data [https://huggingface.co/datasets/Softy-lines/Pixel-Digit-Captcha-Data] |
|
|
|
## Model Card Authors |
|
|
|
[Saidi Souhaieb] |