HarmonAICaptchaRecognition

Runtime error

App Files Files Community

docparser commited on Jul 19, 2023

Commit

1603bbc

•

1 Parent(s): 00c314d

Upload 8 files

Browse files

Files changed (8) hide show

00bAQwhAZU.jpg +0 -0
11JW29.png +0 -0
2a8486.jpg +0 -0
2nbcx.png +0 -0
8000.png +0 -0
app.py +75 -0
requirements.txt +5 -0
tokenizer_base.py +132 -0

00bAQwhAZU.jpg ADDED Viewed

11JW29.png ADDED Viewed

2a8486.jpg ADDED Viewed

2nbcx.png ADDED Viewed

8000.png ADDED Viewed

app.py ADDED Viewed

	@@ -0,0 +1,75 @@

+import torch
+import onnx
+import onnxruntime as rt
+from torchvision import transforms as T
+from PIL import Image
+from tokenizer_base import Tokenizer
+import pathlib
+import os
+import gradio as gr
+from huggingface_hub import Repository
+repo = Repository(
+    local_dir="secret_models",
+    repo_type="model",
+    clone_from="docparser/captcha",
+    token=True
+)
+repo.git_pull()
+cwd = pathlib.Path(__file__).parent.resolve()
+model_file = os.path.join(cwd,"secret_models","captcha.onnx")
+img_size = (32,128)
+charset = r"0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~"
+tokenizer_base = Tokenizer(charset)
+def get_transform(img_size):
+        transforms = []
+        transforms.extend([
+            T.Resize(img_size, T.InterpolationMode.BICUBIC),
+            T.ToTensor(),
+            T.Normalize(0.5, 0.5)
+        ])
+        return T.Compose(transforms)
+def to_numpy(tensor):
+    return tensor.detach().cpu().numpy() if tensor.requires_grad else tensor.cpu().numpy()
+def initialize_model(model_file):
+    transform = get_transform(img_size)
+    # Onnx model loading
+    onnx_model = onnx.load(model_file)
+    onnx.checker.check_model(onnx_model)
+    ort_session = rt.InferenceSession(model_file)
+    return transform,ort_session
+def get_text(img_org):
+    # img_org = Image.open(image_path)
+    # Preprocess. Model expects a batch of images with shape: (B, C, H, W)
+    x = transform(img_org.convert('RGB')).unsqueeze(0)
+    # compute ONNX Runtime output prediction
+    ort_inputs = {ort_session.get_inputs()[0].name: to_numpy(x)}
+    logits = ort_session.run(None, ort_inputs)[0]
+    probs = torch.tensor(logits).softmax(-1)
+    preds, probs = tokenizer_base.decode(probs)
+    preds = preds[0]
+    print(preds)
+    return preds
+transform,ort_session = initialize_model(model_file=model_file)
+gr.Interface(
+    get_text,
+    inputs=gr.Image(type="pil"),
+    outputs=gr.outputs.Textbox(),
+    title="Text Captcha Reader",
+    examples=["8000.png","11JW29.png","2a8486.jpg","2nbcx.png"]
+).launch()
+# if __name__ == "__main__":
+#     image_path = "8000.png"
+#     preds,probs = get_text(image_path)
+#     print(preds[0])

requirements.txt ADDED Viewed

	@@ -0,0 +1,5 @@

+torch==1.11.0
+torchvision==0.12.0
+onnx==1.14.0
+onnxruntime==1.15.1
+Pillow==10.0.0

tokenizer_base.py ADDED Viewed

	@@ -0,0 +1,132 @@

+import re
+from abc import ABC, abstractmethod
+from itertools import groupby
+from typing import List, Optional, Tuple
+import torch
+from torch import Tensor
+from torch.nn.utils.rnn import pad_sequence
+class CharsetAdapter:
+    """Transforms labels according to the target charset."""
+    def __init__(self, target_charset) -> None:
+        super().__init__()
+        self.charset = target_charset ###
+        self.lowercase_only = target_charset == target_charset.lower()
+        self.uppercase_only = target_charset == target_charset.upper()
+#         self.unsupported = f'[^{re.escape(target_charset)}]'
+    def __call__(self, label):
+        if self.lowercase_only:
+            label = label.lower()
+        elif self.uppercase_only:
+            label = label.upper()
+        return label
+class BaseTokenizer(ABC):
+    def __init__(self, charset: str, specials_first: tuple = (), specials_last: tuple = ()) -> None:
+        self._itos = specials_first + tuple(charset+'[UNK]') + specials_last
+        self._stoi = {s: i for i, s in enumerate(self._itos)}
+    def __len__(self):
+        return len(self._itos)
+    def _tok2ids(self, tokens: str) -> List[int]:
+        return [self._stoi[s] for s in tokens]
+    def _ids2tok(self, token_ids: List[int], join: bool = True) -> str:
+        tokens = [self._itos[i] for i in token_ids]
+        return ''.join(tokens) if join else tokens
+    @abstractmethod
+    def encode(self, labels: List[str], device: Optional[torch.device] = None) -> Tensor:
+        """Encode a batch of labels to a representation suitable for the model.
+        Args:
+            labels: List of labels. Each can be of arbitrary length.
+            device: Create tensor on this device.
+        Returns:
+            Batched tensor representation padded to the max label length. Shape: N, L
+        """
+        raise NotImplementedError
+    @abstractmethod
+    def _filter(self, probs: Tensor, ids: Tensor) -> Tuple[Tensor, List[int]]:
+        """Internal method which performs the necessary filtering prior to decoding."""
+        raise NotImplementedError
+    def decode(self, token_dists: Tensor, raw: bool = False) -> Tuple[List[str], List[Tensor]]:
+        """Decode a batch of token distributions.
+        Args:
+            token_dists: softmax probabilities over the token distribution. Shape: N, L, C
+            raw: return unprocessed labels (will return list of list of strings)
+        Returns:
+            list of string labels (arbitrary length) and
+            their corresponding sequence probabilities as a list of Tensors
+        """
+        batch_tokens = []
+        batch_probs = []
+        for dist in token_dists:
+            probs, ids = dist.max(-1)  # greedy selection
+            if not raw:
+                probs, ids = self._filter(probs, ids)
+            tokens = self._ids2tok(ids, not raw)
+            batch_tokens.append(tokens)
+            batch_probs.append(probs)
+        return batch_tokens, batch_probs
+class Tokenizer(BaseTokenizer):
+    BOS = '[B]'
+    EOS = '[E]'
+    PAD = '[P]'
+    def __init__(self, charset: str) -> None:
+        specials_first = (self.EOS,)
+        specials_last = (self.BOS, self.PAD)
+        super().__init__(charset, specials_first, specials_last)
+        self.eos_id, self.bos_id, self.pad_id = [self._stoi[s] for s in specials_first + specials_last]
+    def encode(self, labels: List[str], device: Optional[torch.device] = None) -> Tensor:
+        batch = [torch.as_tensor([self.bos_id] + self._tok2ids(y) + [self.eos_id], dtype=torch.long, device=device)
+                 for y in labels]
+        return pad_sequence(batch, batch_first=True, padding_value=self.pad_id)
+    def _filter(self, probs: Tensor, ids: Tensor) -> Tuple[Tensor, List[int]]:
+        ids = ids.tolist()
+        try:
+            eos_idx = ids.index(self.eos_id)
+        except ValueError:
+            eos_idx = len(ids)  # Nothing to truncate.
+        # Truncate after EOS
+        ids = ids[:eos_idx]
+        probs = probs[:eos_idx + 1]  # but include prob. for EOS (if it exists)
+        return probs, ids
+class CTCTokenizer(BaseTokenizer):
+    BLANK = '[B]'
+    def __init__(self, charset: str) -> None:
+        # BLANK uses index == 0 by default
+        super().__init__(charset, specials_first=(self.BLANK,))
+        self.blank_id = self._stoi[self.BLANK]
+    def encode(self, labels: List[str], device: Optional[torch.device] = None) -> Tensor:
+        # We use a padded representation since we don't want to use CUDNN's CTC implementation
+        batch = [torch.as_tensor(self._tok2ids(y), dtype=torch.long, device=device) for y in labels]
+        return pad_sequence(batch, batch_first=True, padding_value=self.blank_id)
+    def _filter(self, probs: Tensor, ids: Tensor) -> Tuple[Tensor, List[int]]:
+        # Best path decoding:
+        ids = list(zip(*groupby(ids.tolist())))[0]  # Remove duplicate tokens
+        ids = [x for x in ids if x != self.blank_id]  # Remove BLANKs
+        # `probs` is just pass-through since all positions are considered part of the path
+        return probs, ids