Tanaanan commited on
Commit
1848c0a
1 Parent(s): 133e2f9

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +390 -0
app.py ADDED
@@ -0,0 +1,390 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import easyocr as ocr #OCR
2
+ import streamlit as st #Web App
3
+ from PIL import Image, ImageOps #Image Processing
4
+ import time
5
+ from unittest import result
6
+ import editdistance
7
+ from pythainlp.util import isthai
8
+ import numpy as np
9
+
10
+ st.title("Thai-Identification Card (OCR) Webapp.")
11
+
12
+ #subtitle
13
+ st.markdown("ระบบตรวจจับข้อมูลจากบัตรประชาชน (ชื่อ-นามสกุล , เลขบัตรประชาชน).")
14
+
15
+ st.markdown("")
16
+
17
+ pages_name = ['Detection (ตรวจจับ)', 'Example image (ภาพตัวอย่าง)']
18
+ page = st.radio('Select option mode :', pages_name)
19
+
20
+
21
+
22
+
23
+ @st.cache
24
+ def load_model():
25
+ reader = ocr.Reader(['en'],model_storage_directory='.')
26
+ return reader
27
+
28
+ reader = load_model() #load model
29
+
30
+ #set default size as 1280 x 1280
31
+ def img_resize(input_path,img_size): # padding
32
+ desired_size = img_size
33
+ im = Image.open(input_path)
34
+ im = ImageOps.exif_transpose(im) # fix image rotating
35
+ width, height = im.size # get img_input size
36
+ if (width == 1280) and (height == 1280):
37
+ new_im = im
38
+ else:
39
+ #im = im.convert('L') #Convert to gray
40
+ old_size = im.size # old_size[0] is in (width, height) format
41
+ ratio = float(desired_size)/max(old_size)
42
+ new_size = tuple([int(x*ratio) for x in old_size])
43
+ im = im.resize(new_size, Image.ANTIALIAS)
44
+ new_im = Image.new("RGB", (desired_size, desired_size))
45
+ new_im.paste(im, ((desired_size-new_size[0])//2,
46
+ (desired_size-new_size[1])//2))
47
+
48
+ return new_im
49
+
50
+ def Get_OCR(input_image):
51
+ input_image = Image.open(input_image) #read image
52
+ with st.spinner("On working... "):
53
+
54
+ t1 = time.perf_counter()
55
+ result = reader.readtext(np.array(input_image))
56
+
57
+ result_text = [] #empty list for results
58
+
59
+
60
+ for text in result:
61
+ result_text.append(text[1])
62
+
63
+ st.write(result_text)
64
+ t2 = time.perf_counter()
65
+ st.write('time taken to run: {:.2f} sec'.format(t2-t1))
66
+ #st.success("Here you go!")
67
+
68
+
69
+ def Get_Idcard_detail(file_path):
70
+ raw_data = []
71
+ id_num = {"id_num" : "None"}
72
+ name = file_path
73
+ img = Image.open(name)
74
+ img = ImageOps.exif_transpose(img) # fix image rotating
75
+
76
+ width, height = img.size # get img_input size
77
+ if (width == 1280) and (height == 1280):
78
+ result = reader.readtext(np.array(img))
79
+ else:
80
+ #im = im.convert('L') #Convert to gray
81
+ old_size = img.size # old_size[0] is in (width, height) format
82
+ ratio = float(1280)/max(old_size)
83
+ new_size = tuple([int(x*ratio) for x in old_size])
84
+ img = img.resize(new_size, Image.ANTIALIAS)
85
+ new_im = Image.new("RGB", (1280, 1280))
86
+ new_im.paste(img, ((1280-new_size[0])//2,
87
+ (1280-new_size[1])//2))
88
+
89
+ result = reader.readtext(np.array(new_im))
90
+
91
+
92
+
93
+
94
+ result_text = [] #empty list for results
95
+ for text in result:
96
+ result_text.append(text[1])
97
+
98
+
99
+ raw_data = result_text
100
+
101
+
102
+
103
+ def get_english(raw_list): # Cut only english var
104
+ eng_name = []
105
+ thai_name = []
106
+
107
+ for name in raw_list:
108
+ if isthai(name) == True:
109
+ thai_name.append(name)
110
+ else:
111
+ eng_name.append(name)
112
+
113
+ return eng_name
114
+
115
+ raw_data = get_english(raw_data)
116
+
117
+
118
+ def Clear_syntax(raw_list):
119
+
120
+ Clean_syntax = ["","#","{","}","=","/","@","#","$","—","|","%","-","(",")","¥", "[", "]", "‘",':',';']
121
+
122
+ for k in range(len(Clean_syntax)):
123
+ while (Clean_syntax[k] in raw_list): # remove single symbol
124
+ raw_list.remove(Clean_syntax[k])
125
+
126
+ for l in range(len(raw_list)):
127
+ raw_list[l] = raw_list[l].replace("!","l") #split ! --> l (Error OCR Check)
128
+ raw_list[l] = raw_list[l].replace(",",".") #split ! --> l (Error OCR Check)
129
+ raw_list[l] = raw_list[l].replace(" ","") #split " " out from str
130
+ raw_list[l] = raw_list[l].lower() #Set all string to lowercase
131
+
132
+ for m in range(len(raw_list)): #Clear symbol in str "Hi/'" --> "Hi"
133
+ for n in range(len(Clean_syntax)):
134
+ raw_list[m] = raw_list[m].replace(Clean_syntax[n],"")
135
+ return raw_list
136
+
137
+ raw_data = Clear_syntax(raw_data)
138
+
139
+
140
+ def get_idnum(raw_list):
141
+ id_num = {"id_num" : "None"}
142
+ # 1. normal check
143
+ for i in range(len(raw_list)): # check if len(list) = 1, 4, 5, 2, 1 (13 digit idcard) and all is int
144
+ try:
145
+ if ((len(raw_list[i]) == 1) and (len(raw_list[i+1]) == 4) and (len(raw_list[i+2]) == 5) and (len(raw_list[i+3]) == 2) and (len(raw_list[i+4]) == 1)) and ((raw_list[i] + raw_list[i+1] + raw_list[i+2] + raw_list[i+3] + raw_list[i+4]).isnumeric()):
146
+ id_num["id_num"] = (raw_list[i] + raw_list[i+1] + raw_list[i+2] + raw_list[i+3] + raw_list[i+4])
147
+ break
148
+ except:
149
+ pass
150
+
151
+ # 2. Hardcore Check
152
+ if id_num["id_num"] == "None":
153
+ id_count = 0
154
+ index_first = 0
155
+ index_end = 0
156
+ for i in range(len(raw_list)):
157
+ if id_count == 13:
158
+ index_end = i-1 #ลบ 1 index เพราะ ครบ 13 รอบ��่อนหน้านี้
159
+ #print(f"index_first == {index_first} index_end == {index_end}")
160
+ #print(f"id = {raw_list[index_first:index_end+1]}")
161
+ id_num["id_num"] = ''.join(raw_list[index_first:index_end+1])
162
+ break
163
+ else:
164
+ if raw_list[i].isnumeric() == True and index_first == 0:
165
+ id_count += len(raw_list[i])
166
+ index_first = i
167
+ elif raw_list[i].isnumeric() == True and index_first != 0:
168
+ id_count += len(raw_list[i])
169
+ elif raw_list[i].isnumeric() == False:
170
+ id_count = 0
171
+ index_first = 0
172
+
173
+ return id_num
174
+
175
+ id_num = (get_idnum(raw_data))
176
+
177
+ #Complete list name check
178
+ def list_name_check(raw_list):
179
+ sum_list = raw_list
180
+ name_key = ['name', 'lastname']
181
+
182
+ #1. name_key check
183
+ if ("name" in sum_list) and ("lastname" in sum_list): # if name and lastname in list pass it!
184
+ pass
185
+ else:
186
+ for i in range(len(name_key)):
187
+ for j in range(len(sum_list)):
188
+ if (editdistance.eval(name_key[i], sum_list[j]) <= 2 ):
189
+ sum_list[j] = name_key[i]
190
+
191
+ gender_key = ["mr.", "mrs.", 'master', 'miss']
192
+ #2 gender_key check
193
+ count = 0 # check for break
194
+ for i in range(len(gender_key)):
195
+ for j in range(len(sum_list)):
196
+ if (count == 0):
197
+ try:
198
+ if (sum_list[i] == "name") or (sum_list[i] == "lastname"): # skip "name" and "lastname"
199
+ pass
200
+ else:
201
+ # mr, mrs sensitive case double check with len(gender_key) == len(keyword)
202
+ if (gender_key[i] == "mr." or gender_key[i] == "mrs.") and (editdistance.eval(gender_key[i], sum_list[j]) <= 3 and (len(gender_key[i]) == len(sum_list[j]))):
203
+ sum_list[j] = gender_key[i]
204
+ count+=1
205
+ #print(1)
206
+ elif (gender_key[i] == "master" or gender_key[i] == "miss") and (editdistance.eval(gender_key[i], sum_list[j]) <= 3 ) and (len(gender_key[i]) == len(sum_list[j])):
207
+ sum_list[j] = gender_key[i]
208
+ count+=1
209
+ #print(1)
210
+ except:
211
+ if (gender_key[i] == "mr." or gender_key[i] == "mrs.") and (editdistance.eval(gender_key[i], sum_list[j]) <= 2 and (len(gender_key[i]) == len(sum_list[j]))):
212
+ sum_list[j] = gender_key[i]
213
+ count+=1
214
+ #print(1)
215
+ elif (gender_key[i] == "master" or gender_key[i] == "miss") and (editdistance.eval(gender_key[i], sum_list[j]) <= 3 ) and (len(gender_key[i]) == len(sum_list[j])):
216
+ sum_list[j] = gender_key[i]
217
+ count+=1
218
+ #print(1)
219
+ else:
220
+ break
221
+
222
+ return sum_list
223
+
224
+ raw_data = list_name_check(raw_data)
225
+
226
+ #get_eng_name
227
+ def get_engname(raw_list):
228
+ get_data = raw_list
229
+ engname_list = []
230
+
231
+ name_pos = []
232
+ lastname_pos = []
233
+ mr_pos = []
234
+ mrs_pos = []
235
+
236
+ # check keyword by name, lastname, master, mr, miss, mrs
237
+ for j in range(len(get_data)): #get "name" , "lastname" index
238
+ if "name" == get_data[j]:
239
+ name_pos.append(j)
240
+ elif "lastname" == get_data[j]:
241
+ lastname_pos.append(j)
242
+ elif ("mr." == get_data[j]) or ("master" == get_data[j]):
243
+ mr_pos.append(j)
244
+ elif ("miss" == get_data[j]) or ("mrs." == get_data[j]):
245
+ mrs_pos.append(j)
246
+
247
+
248
+ if len(name_pos) != 0: #get_engname ex --> ['name', 'master', 'tanaanan', 'lastname', 'chalermpan']
249
+ engname_list = get_data[name_pos[0]:name_pos[0]+6] # select first index กรณีมี "name" มากกว่า 1 ตัว
250
+ elif len(lastname_pos) != 0:
251
+ engname_list = get_data[lastname_pos[0]-3:lastname_pos[0]+3]
252
+ elif len(mr_pos) != 0:
253
+ engname_list = get_data[mr_pos[0]-1:mr_pos[0]+5]
254
+ elif len(mrs_pos) != 0:
255
+ engname_list = get_data[mrs_pos[0]-1:mrs_pos[0]+5]
256
+ else:
257
+ print("Can't find eng name!!")
258
+
259
+ return engname_list
260
+
261
+ raw_data = get_engname(raw_data)
262
+
263
+
264
+
265
+
266
+ def split_genkey(raw_list): # remove stringname + gender_key ex. "missjate" -> "jate"
267
+ data = raw_list
268
+ key = ['mrs.','mr.','master','miss']
269
+ name = "" #gen_key name
270
+ name_pos = 0
271
+ gen_index = 0
272
+ gen_type = "" #male / female
273
+ # check keyword
274
+ for key_val in key:
275
+ for each_text in data:
276
+ if (each_text[:len(key_val)] == key_val) or (editdistance.eval(each_text[:len(key_val)],key_val) <= 1 and (len(each_text[:len(key_val)]) == len(key_val))):
277
+ #each_text = each_text[len(key):]
278
+ if (each_text == "name") or (each_text == "lastname"):
279
+ pass
280
+ else:
281
+ name = (each_text[:len(key_val)])
282
+ name_pos = data.index(each_text) # get_index
283
+ gen_index = len(key_val)
284
+ break
285
+ if (name_pos != 0):
286
+ data[name_pos] = data[name_pos][gen_index:] # split gender_key on list
287
+ for empty_str in range(data.count('')): # clear "empty string"
288
+ data.remove('')
289
+ return data
290
+
291
+ raw_data = split_genkey(raw_data)
292
+
293
+
294
+ def clean_name_data(raw_list): # delete all single string and int string
295
+ for k in range(len(raw_list)):
296
+ try:
297
+ while ((len(raw_list[k]) <= 2) or (raw_list[k].isnumeric() == True)): # remove single symbol
298
+ raw_list.remove(raw_list[k])
299
+ except IndexError:
300
+ pass
301
+ return raw_list
302
+
303
+ raw_data = clean_name_data(raw_data)
304
+
305
+
306
+ def name_sum(raw_list):
307
+ info = {"name" : "None",
308
+ "lastname" : "None"}
309
+ key = ['mr.','mrs.', 'master', 'miss', 'mrs','mr']
310
+ name_pos = 0
311
+ lastname_pos = 0
312
+ for key_val in key: # remove gender_key in string
313
+ if key_val in raw_list:
314
+ raw_list.remove(key_val)
315
+ try:
316
+ for i in range(len(raw_list)):
317
+ if raw_list[i] == "name":
318
+ info["name"] = raw_list[i+1]
319
+ name_pos = i+1
320
+ elif raw_list[i] == "lastname":
321
+ info["lastname"] = raw_list[i+1]
322
+ lastname_pos = i+1
323
+ except:
324
+ pass
325
+
326
+ # กรณี หาอย่างใดอย่าหนึ่งเจอให้ลองข้ามไปดู 1 index name, "name_val", lastname , "lastname_val"
327
+ if (info["name"] != "None") and (info["lastname"] == "None"):
328
+ try:
329
+ info["lastname"] = raw_list[name_pos+2]
330
+ except:
331
+ pass
332
+ elif (info["lastname"] != "None") and (info["name"] == "None"):
333
+ try:
334
+ info["name"] = raw_list[lastname_pos-2]
335
+ except:
336
+ pass
337
+
338
+ # remove . on "mr." and "mrs."
339
+ info["name"] = info["name"].replace(".","")
340
+ info["lastname"] = info["lastname"].replace(".","")
341
+
342
+
343
+ return info
344
+
345
+ st.success("Process Completed!.....")
346
+ st.write(id_num)
347
+ st.write(name_sum(raw_data))
348
+
349
+
350
+ if page == "Detection (ตรวจจับ)":
351
+ #image uploader
352
+ image = st.file_uploader(label = "upload Idcard image here.. OwO",type=['png','jpg','jpeg'])
353
+
354
+ if image is not None:
355
+ new_img = img_resize(image, 1280)
356
+ st.image(new_img)
357
+
358
+ #Get_OCR(image)
359
+ with st.spinner("On working..."):
360
+
361
+ t1 = time.perf_counter()
362
+ Get_Idcard_detail(image)
363
+ t2 = time.perf_counter()
364
+ st.write('time taken to run: {:.2f} sec'.format(t2-t1))
365
+
366
+ else:
367
+ st.write("## Waiting for image.. U w U")
368
+ st.image('spy-x-family-anya-heh-anime.jpg')
369
+
370
+ st.warning("""ระบบไม่มีการเก็บข้อมูลบัตรประชาชนจากผู้ใช้งาน ใช้ในการศึกษา และ เป็นแนวทางในการพัฒนาต่อเท่านั้น \n
371
+ (No data kept in this system used for education and development guildlines only)""")
372
+ else:
373
+ image = "./id_ex.jpg"
374
+ new_img = img_resize(image, 1280)
375
+ st.image(new_img)
376
+
377
+ #Get_OCR(image)
378
+ with st.spinner("On working..."):
379
+
380
+ t1 = time.perf_counter()
381
+ Get_Idcard_detail(image)
382
+ t2 = time.perf_counter()
383
+ st.write('time taken to run: {:.2f} sec'.format(t2-t1))
384
+
385
+ st.warning("""ภาพตัวอย่างบัตรประชาชนเป็นภาพสังเคราะห์ที่ได้มาจาก [AI for Thai.](https://aiforthai.in.th/files/iappIDcr-front-ex.jpg) \n
386
+ Example Identification Card is synthetic image from [AI for Thai.](https://aiforthai.in.th/files/iappIDcr-front-ex.jpg)""")
387
+
388
+
389
+
390
+ st.caption("Developed by Tanaanan .M mjsalyjoh@gmail.com")