Mattral commited on
Commit
b302612
1 Parent(s): 18cb291

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +223 -0
app.py ADDED
@@ -0,0 +1,223 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import cv2
3
+ import numpy as np
4
+ from PIL import Image
5
+ from path import Path
6
+ import streamlit as st
7
+ from typing import Tuple
8
+ from app.dataloader_iam import Batch
9
+ from app.model import Model, DecoderType
10
+ from app.preprocessor import Preprocessor
11
+ from streamlit_drawable_canvas import st_canvas
12
+ import easyocr # Import EasyOCR
13
+
14
+ # Set page config at the very beginning (only executed once)
15
+ st.set_page_config(
16
+ page_title="HTR App",
17
+ page_icon=":pencil:",
18
+ layout="centered",
19
+ initial_sidebar_state="auto",
20
+ )
21
+
22
+ ms = st.session_state
23
+ if "themes" not in ms:
24
+ ms.themes = {"current_theme": "light",
25
+ "refreshed": True,
26
+
27
+ "light": {"theme.base": "dark",
28
+ "theme.backgroundColor": "black",
29
+ "theme.primaryColor": "#c98bdb",
30
+ "theme.secondaryBackgroundColor": "#5591f5",
31
+ "theme.textColor": "white",
32
+ "theme.textColor": "white",
33
+ "button_face": "🌜"},
34
+
35
+ "dark": {"theme.base": "light",
36
+ "theme.backgroundColor": "white",
37
+ "theme.primaryColor": "#5591f5",
38
+ "theme.secondaryBackgroundColor": "#82E1D7",
39
+ "theme.textColor": "#0a1464",
40
+ "button_face": "🌞"},
41
+ }
42
+
43
+
44
+ def ChangeTheme():
45
+ previous_theme = ms.themes["current_theme"]
46
+ tdict = ms.themes["light"] if ms.themes["current_theme"] == "light" else ms.themes["dark"]
47
+ for vkey, vval in tdict.items():
48
+ if vkey.startswith("theme"): st._config.set_option(vkey, vval)
49
+
50
+ ms.themes["refreshed"] = False
51
+ if previous_theme == "dark": ms.themes["current_theme"] = "light"
52
+ elif previous_theme == "light": ms.themes["current_theme"] = "dark"
53
+
54
+
55
+ btn_face = ms.themes["light"]["button_face"] if ms.themes["current_theme"] == "light" else ms.themes["dark"]["button_face"]
56
+ st.button(btn_face, on_click=ChangeTheme)
57
+
58
+ if ms.themes["refreshed"] == False:
59
+ ms.themes["refreshed"] = True
60
+ st.rerun()
61
+
62
+
63
+ def get_img_size(line_mode: bool = False) -> Tuple[int, int]:
64
+ """
65
+ Auxiliary method that sets the height and width
66
+ Height is fixed while width is set according to the Model used.
67
+ """
68
+ if line_mode:
69
+ return 256, get_img_height()
70
+ return 128, get_img_height()
71
+
72
+ def get_img_height() -> int:
73
+ """
74
+ Auxiliary method that sets the height, which is fixed for the Neural Network.
75
+ """
76
+ return 32
77
+
78
+ def infer(line_mode: bool, model: Model, fn_img: Path) -> None:
79
+ """
80
+ Auxiliary method that does inference using the pretrained models:
81
+ Recognizes text in an image given its path.
82
+ """
83
+ img = cv2.imread(fn_img, cv2.IMREAD_GRAYSCALE)
84
+ assert img is not None
85
+
86
+ preprocessor = Preprocessor(get_img_size(line_mode), dynamic_width=True, padding=16)
87
+ img = preprocessor.process_img(img)
88
+
89
+ batch = Batch([img], None, 1)
90
+ recognized, probability = model.infer_batch(batch, True)
91
+ return [recognized, probability]
92
+
93
+ def infer_super_model(image_path) -> None:
94
+ reader = easyocr.Reader(['en']) # Initialize EasyOCR reader
95
+ result = reader.readtext(image_path)
96
+ recognized_texts = [text[1] for text in result] # Extract recognized texts
97
+ probabilities = [text[2] for text in result] # Extract probabilities
98
+ return recognized_texts, probabilities
99
+
100
+
101
+
102
+ def main():
103
+
104
+ st.title('Extract text from Image Demo')
105
+
106
+ st.markdown("""
107
+ Streamlit Web Interface for Handwritten Text Recognition (HTR), Optical Character Recognition (OCR)
108
+ implemented with TensorFlow and trained on the IAM off-line HTR dataset.
109
+ The model takes images of single words or text lines (multiple words) as input and outputs the recognized text.
110
+ """, unsafe_allow_html=True)
111
+
112
+ st.markdown("""
113
+ Predictions can be made using one of two models:
114
+ - Single_Model (Trained on Single Word Images)
115
+ - Line_Model (Trained on Text Line Images)
116
+ - Super_Model ( Most Robust Option for English )
117
+ - Burmese (Link)
118
+ """, unsafe_allow_html=True)
119
+
120
+ st.subheader('Select a Model, Choose the Arguments and Draw in the box below or Upload an Image to obtain a prediction.')
121
+
122
+ #Selectors for the model and decoder
123
+ modelSelect = st.selectbox("Select a Model", ['Single_Model', 'Line_Model', 'Super_Model'])
124
+
125
+
126
+ if modelSelect != 'Super_Model':
127
+ decoderSelect = st.selectbox("Select a Decoder", ['Bestpath', 'Beamsearch', 'Wordbeamsearch'])
128
+
129
+
130
+ #Mappings (dictionaries) for the model and decoder. Asigns the directory or the DecoderType of the selected option.
131
+ modelMapping = {
132
+ "Single_Model": '../model/word-model',
133
+ "Line_Model": '../model/line-model'
134
+ }
135
+
136
+ decoderMapping = {
137
+ 'Bestpath': DecoderType.BestPath,
138
+ 'Beamsearch': DecoderType.BeamSearch,
139
+ 'Wordbeamsearch': DecoderType.WordBeamSearch
140
+ }
141
+
142
+ #Slider for pencil width
143
+ strokeWidth = st.slider("Stroke Width: ", 1, 25, 6)
144
+
145
+ #Canvas/Text Box for user input. BackGround Color must be white (#FFFFFF) or else text will not be properly recognised.
146
+ inputDrawn = st_canvas(
147
+ fill_color="rgba(255, 165, 0, 0.3)",
148
+ stroke_width=strokeWidth,
149
+ update_streamlit=True,
150
+ background_image=None,
151
+ height = 200,
152
+ width = 400,
153
+ drawing_mode='freedraw',
154
+ key="canvas",
155
+ background_color = '#FFFFFF'
156
+ )
157
+
158
+ #Buffer for user input (images uploaded from the user's device)
159
+ inputBuffer = st.file_uploader("Upload an Image", type=["png"])
160
+
161
+ #Inference Button
162
+ inferBool = st.button("Recognize Text")
163
+
164
+ # After clicking the "Recognize Text" button, check if the model selected is Super_Model
165
+ if inferBool:
166
+ if modelSelect == 'Super_Model':
167
+ inputArray = None # Initialize inputArray to None
168
+
169
+ # Handling uploaded file
170
+ if inputBuffer is not None:
171
+ with Image.open(inputBuffer).convert('RGB') as img:
172
+ inputArray = np.array(img)
173
+
174
+ # Handling canvas data
175
+ elif inputDrawn.image_data is not None:
176
+ # Convert RGBA to RGB
177
+ inputArray = cv2.cvtColor(np.array(inputDrawn.image_data, dtype=np.uint8), cv2.COLOR_RGBA2RGB)
178
+
179
+ # Now check if inputArray has been set
180
+ if inputArray is not None:
181
+ # Initialize EasyOCR Reader
182
+ reader = easyocr.Reader(['en']) # Assuming English language; adjust as necessary
183
+ # Perform OCR
184
+ results = reader.readtext(inputArray)
185
+
186
+ # Display results
187
+ all_text = ''
188
+ for (bbox, text, prob) in results:
189
+ all_text += f'{text} (confidence: {prob:.2f})\n'
190
+
191
+ st.write("**Recognized Texts and their Confidence Scores:**")
192
+ st.text(all_text)
193
+ else:
194
+ st.write("No image data found. Please upload an image or draw on the canvas.")
195
+
196
+
197
+ else:
198
+ # Handle other model selections as before
199
+ if ((inputDrawn.image_data is not None or inputBuffer is not None) and inferBool == True):
200
+ #We turn the input into a numpy array
201
+ if inputDrawn.image_data is not None:
202
+ inputArray = np.array(inputDrawn.image_data)
203
+
204
+ if inputBuffer is not None:
205
+ inputBufferImage = Image.open(inputBuffer)
206
+ inputArray = np.array(inputBufferImage)
207
+
208
+ #We turn this array into a .png format and save it.
209
+ inputImage = Image.fromarray(inputArray.astype('uint8'), 'RGBA')
210
+ inputImage.save('userInput.png')
211
+ #We obtain the model directory and the decoder type from their mapping
212
+ modelDir = modelMapping[modelSelect]
213
+ decoderType = decoderMapping[decoderSelect]
214
+
215
+ #Finally, we call the model with this image as attribute and display the Best Candidate and its probability on the Interface
216
+ model = Model(list(open(modelDir + "/charList.txt").read()), modelDir, decoderType, must_restore=True)
217
+ inferedText = infer(modelDir == '../model/line-model', model, 'userInput.png')
218
+
219
+ st.write("**Best Candidate: **", inferedText[0][0])
220
+ st.write("**Probability: **", str(inferedText[1][0]*100) + "%")
221
+
222
+ if __name__ == "__main__":
223
+ main()