baudm commited on
Commit
1f4ffb7
·
1 Parent(s): 4df8f52

Add app, config, and data

Browse files
README.md CHANGED
@@ -5,6 +5,7 @@ colorFrom: red
5
  colorTo: purple
6
  sdk: gradio
7
  sdk_version: 3.1.0
 
8
  app_file: app.py
9
  pinned: false
10
  license: apache-2.0
 
5
  colorTo: purple
6
  sdk: gradio
7
  sdk_version: 3.1.0
8
+ python_version: 3.9.13
9
  app_file: app.py
10
  pinned: false
11
  license: apache-2.0
app.py ADDED
@@ -0,0 +1,92 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Scene Text Recognition Model Hub
2
+ # Copyright 2022 Darwin Bautista
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # https://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+
16
+ from pathlib import Path
17
+
18
+ import torch
19
+ from torchvision import transforms as T
20
+
21
+ import gradio as gr
22
+
23
+
24
+ class App:
25
+
26
+ title = 'Scene Text Recognition with Permuted Autoregressive Sequence Models'
27
+ models = ['parseq', 'parseq_tiny', 'abinet', 'crnn', 'trba', 'vitstr']
28
+
29
+ def __init__(self):
30
+ self._model_cache = {}
31
+ self._preprocess = T.Compose([
32
+ T.Resize((32, 128), T.InterpolationMode.BICUBIC),
33
+ T.ToTensor(),
34
+ T.Normalize(0.5, 0.5)
35
+ ])
36
+
37
+ def _get_model(self, name):
38
+ if name in self._model_cache:
39
+ return self._model_cache[name]
40
+ model = torch.hub.load('baudm/parseq', name, pretrained=True).eval()
41
+ model.freeze()
42
+ self._model_cache[name] = model
43
+ return model
44
+
45
+ def __call__(self, model_name, image):
46
+ model = self._get_model(model_name)
47
+ image = self._preprocess(image.convert('RGB')).unsqueeze(0)
48
+ # Greedy decoding
49
+ pred = model(image).softmax(-1)
50
+ label, confidence = model.tokenizer.decode(pred)
51
+ return label[0]
52
+
53
+
54
+ def main():
55
+
56
+ app = App()
57
+
58
+ with gr.Blocks(analytics_enabled=False, title=app.title) as demo:
59
+ gr.Markdown("""
60
+ <div align="center">
61
+
62
+ # Scene Text Recognition with<br/>Permuted Autoregressive Sequence Models
63
+ [![GitHub](https://img.shields.io/badge/baudm-parseq-blue?logo=github)](https://github.com/baudm/parseq)
64
+
65
+ </div>
66
+
67
+ To use this interactive demo for PARSeq and reproduced models:
68
+ 1. Select which model you want to use.
69
+ 2. Upload your own image, choose from the examples below, or draw on the canvas.
70
+ 3. Read the given image or drawing.
71
+ """)
72
+ model_name = gr.Radio(app.models, value=app.models[0], label='Select STR model to use')
73
+ with gr.Row():
74
+ image_upload = gr.Image(type='pil', source='upload', label='Image')
75
+ image_canvas = gr.Image(type='pil', source='canvas', label='Drawing')
76
+ with gr.Row():
77
+ read_upload = gr.Button('Read Image')
78
+ read_canvas = gr.Button('Read Drawing')
79
+
80
+ output = gr.Textbox(max_lines=1, label='Model output')
81
+
82
+ demo_images = Path(__file__).parent.joinpath('demo_images').glob('*.*')
83
+ gr.Examples([str(p) for p in demo_images], inputs=image_upload)
84
+
85
+ read_upload.click(app, inputs=[model_name, image_upload], outputs=output)
86
+ read_canvas.click(app, inputs=[model_name, image_canvas], outputs=output)
87
+
88
+ demo.launch()
89
+
90
+
91
+ if __name__ == '__main__':
92
+ main()
demo_images/art-01107.jpg ADDED
demo_images/coco-1166773.jpg ADDED
demo_images/cute-184.jpg ADDED
demo_images/ic13_word_256.png ADDED
demo_images/ic15_word_26.png ADDED
demo_images/uber-27491.jpg ADDED
requirements.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ Gradio
2
+ torch
3
+ torchtext
4
+ torchvision
5
+ torchmetrics==0.6.2
6
+ timm==0.4.12
7
+ nltk
8
+ git+https://github.com/baudm/parseq.git