Spaces:

7eu7d7
/

CAPTCHA_recognize

Sleeping

App Files Files Community

7eu7d7 commited on Jun 9

Commit

f535d79

1 Parent(s): 75f5c93

caformer

Browse files

Files changed (4) hide show

app.py +24 -13
cap.py +6 -3
models/__init__.py +1 -1
models/enc_dec.py +22 -0

app.py CHANGED Viewed

@@ -13,33 +13,33 @@ def load_predictor(model):
     predictor = Predictor(hf_hub_download(
         f'7eu7d7/CAPTCHA_recognize',
         model,
-    ))
     return predictor
-def process_image(image):
     """
-    Process the uploaded image - this is an example function
-    You can modify this function to implement specific image processing logic
     """
     if image is None:
         return "Please upload an image first"
-    # Example processing: convert image to grayscale
     if isinstance(image, np.ndarray):
-        # If it's a numpy array, convert to PIL Image
         img = Image.fromarray(image.astype('uint8')).convert('RGB')
     else:
         img = image.convert('RGB')
-    predictor = load_predictor('captcha-7400.safetensors')
-    text = predictor.pred_img(img, show=False)
-    return text
 # Create Gradio interface
 with gr.Blocks(title="CAPTCHA Recognize") as demo:
     with gr.Row():
         # Left column - Input area
         with gr.Column(scale=1):
@@ -49,6 +49,18 @@ with gr.Blocks(title="CAPTCHA Recognize") as demo:
                 height=300
             )
             # Run button
             process_btn = gr.Button(
                 "Run",
@@ -67,11 +79,10 @@ with gr.Blocks(title="CAPTCHA Recognize") as demo:
     # Bind events
     process_btn.click(
         fn=process_image,
-        inputs=image_input,
         outputs=[text_output]
     )
 # Launch the application
 if __name__ == "__main__":
-    demo.launch()

     predictor = Predictor(hf_hub_download(
         f'7eu7d7/CAPTCHA_recognize',
         model,
+    ), ckpt_name=model)
     return predictor
+def process_image(image, model_name):
     """
+    Process the uploaded image with selected model
     """
     if image is None:
         return "Please upload an image first"
+    # Convert image to PIL format if needed
     if isinstance(image, np.ndarray):
         img = Image.fromarray(image.astype('uint8')).convert('RGB')
     else:
         img = image.convert('RGB')
+    try:
+        predictor = load_predictor(model_name)
+        text = predictor.pred_img(img, show=False)
+        return text
+    except Exception as e:
+        return f"Error processing image: {str(e)}"
 # Create Gradio interface
 with gr.Blocks(title="CAPTCHA Recognize") as demo:
     with gr.Row():
         # Left column - Input area
         with gr.Column(scale=1):
                 height=300
             )
+            # Model selection dropdown
+            model_dropdown = gr.Dropdown(
+                label="Select Model",
+                choices=[
+                    "captcha-2000.safetensors",
+                    "captcha-7400.safetensors",
+                    "captcha-caformer-v2-6200.safetensors",
+                ],
+                value="captcha-caformer-v2-6200.safetensors",  # 默认选择
+                interactive=True
+            )
             # Run button
             process_btn = gr.Button(
                 "Run",
     # Bind events
     process_btn.click(
         fn=process_image,
+        inputs=[image_input, model_dropdown],
         outputs=[text_output]
     )
 # Launch the application
 if __name__ == "__main__":
+    demo.launch()

cap.py CHANGED Viewed

@@ -1,7 +1,7 @@
 # -*- coding: utf-8 -*-
 import torch
 import argparse
-from models import ResnetEncoderDecoder
 from utils import remove_rptch
 from safetensors import safe_open
 from torchvision import transforms as T
@@ -14,8 +14,11 @@ char_dict_pp = '_0123456789abcdefghijklmnopqrstuvwxyz()+-*/='
 class Predictor:
-    def __init__(self, model_path, char_dict=char_dict_pp):
-        self.model = ResnetEncoderDecoder(char_dict).to(device)
         self.model.eval()
         if str(device)=='cpu':
             check_point = self.load_safetensor(model_path, map_location='cpu')

 # -*- coding: utf-8 -*-
 import torch
 import argparse
+from models import ResnetEncoderDecoder, CaformerEncoderDecoder
 from utils import remove_rptch
 from safetensors import safe_open
 from torchvision import transforms as T
 class Predictor:
+    def __init__(self, model_path, ckpt_name, char_dict=char_dict_pp):
+        if 'caformer' in ckpt_name:
+            self.model = CaformerEncoderDecoder(char_dict).to(device)
+        else:
+            self.model = ResnetEncoderDecoder(char_dict).to(device)
         self.model.eval()
         if str(device)=='cpu':
             check_point = self.load_safetensor(model_path, map_location='cpu')

models/__init__.py CHANGED Viewed

	@@ -1 +1 @@
1	- from .enc_dec import ResnetEncoderDecoder


1	+ from .enc_dec import ResnetEncoderDecoder, CaformerEncoderDecoder

models/enc_dec.py CHANGED Viewed

@@ -26,3 +26,25 @@ class ResnetEncoderDecoder(nn.Module):
         input = F.softmax(self.out(input), dim=-1)
         return input

         input = F.softmax(self.out(input), dim=-1)
         return input
+class CaformerEncoderDecoder(nn.Module):
+    def __init__(self, char_dict, drop_rate=0.2, drop_path_rate=0.3):
+        super().__init__()
+        self.bn = nn.BatchNorm2d(64)
+        backbone = timm.create_model('caformer_s18.sail_in22k_ft_in1k', pretrained=True, drop_rate=drop_rate, drop_path_rate=drop_path_rate)
+        backbone.set_grad_checkpointing(True)
+        self.conv = nn.Conv2d(3, 64, kernel_size=3, padding=1, stride=1)
+        self.cnn = nn.Sequential(*list(backbone.children())[1:-1])
+        self.out = nn.Linear(512, len(char_dict))
+        self.char_dict = char_dict
+    def forward(self, input):
+        input = F.silu(self.bn(self.conv(input)), True)
+        input = F.max_pool2d(input, kernel_size=(2, 2), stride=(2, 2))
+        input = self.cnn(input)
+        input = input.permute(0, 2, 3, 1)
+        input = F.softmax(self.out(input), dim=-1)
+        return input