Tanaanan commited on
Commit
f13aee5
1 Parent(s): ec5d63d

Delete full.py

Browse files
Files changed (1) hide show
  1. full.py +0 -448
full.py DELETED
@@ -1,448 +0,0 @@
1
- import streamlit as st #Web App
2
- from PIL import Image, ImageOps #Image Processing
3
- import time
4
- from unittest import result
5
- from pythainlp.util import isthai
6
- import numpy as np
7
- import easyocr as ocr #OCR
8
- import editdistance
9
- from fastbook import *
10
- from fastai.vision import *
11
- from glob import glob
12
- from pathlib import Path
13
- from sklearn.metrics import precision_recall_fscore_support, accuracy_score, roc_auc_score
14
-
15
-
16
-
17
-
18
- st.sidebar.image("./logo.png")
19
-
20
- st.sidebar.header("ATK-OCR classification (AOC) Webapp.")
21
-
22
-
23
- activities = ["Detection", "About"]
24
- choice = st.sidebar.selectbox("Select option..",activities)
25
-
26
-
27
-
28
-
29
-
30
- #set default size as 1280 x 1280
31
- def img_resize(input_path,img_size): # padding
32
- desired_size = img_size
33
- im = Image.open(input_path)
34
- im = ImageOps.exif_transpose(im) # fix image rotating
35
- width, height = im.size # get img_input size
36
- if (width == 1280) and (height == 1280):
37
- new_im = im
38
- else:
39
- #im = im.convert('L') #Convert to gray
40
- old_size = im.size # old_size[0] is in (width, height) format
41
- ratio = float(desired_size)/max(old_size)
42
- new_size = tuple([int(x*ratio) for x in old_size])
43
- im = im.resize(new_size, Image.ANTIALIAS)
44
- new_im = Image.new("RGB", (desired_size, desired_size))
45
- new_im.paste(im, ((desired_size-new_size[0])//2,
46
- (desired_size-new_size[1])//2))
47
-
48
- return new_im
49
-
50
-
51
- checkpoint_path = "./ATK Efficientb_7 FastAI(96%).pkl"
52
-
53
- learn_inf = load_learner(checkpoint_path)
54
- model = learn_inf.model.eval()
55
-
56
-
57
-
58
-
59
- def get_detection(img_path):
60
- bytes_data = img_path.getvalue() # change fileuploader type to bytes (st.file_uploader)
61
- pred = learn_inf.predict(bytes_data)
62
- detect_val = ""
63
- if pred[0] == "1_Positive":
64
- detect_val = "Positive"
65
- st.error("Result : {} with {}% confidence".format(detect_val, round(float(pred[2][1]*100),2)))
66
- if pred[0] == "0_Negative":
67
- detect_val = "Negative"
68
- st.success("Result : {} with {}% confidence".format(detect_val, round(float(pred[2][0]*100),2)))
69
-
70
-
71
-
72
- @st.cache
73
- def load_model():
74
- reader = ocr.Reader(['en'],model_storage_directory='.')
75
- return reader
76
-
77
- reader = load_model() #load model
78
-
79
- def Get_Idcard_detail(file_path):
80
- raw_data = []
81
- id_num = {"id_num" : "None"}
82
- name = file_path
83
- img = Image.open(name)
84
- img = ImageOps.exif_transpose(img) # fix image rotating
85
-
86
- width, height = img.size # get img_input size
87
- if (width == 1280) and (height == 1280):
88
- result = reader.readtext(np.array(img))
89
- else:
90
- #im = im.convert('L') #Convert to gray
91
- old_size = img.size # old_size[0] is in (width, height) format
92
- ratio = float(1280)/max(old_size)
93
- new_size = tuple([int(x*ratio) for x in old_size])
94
- img = img.resize(new_size, Image.ANTIALIAS)
95
- new_im = Image.new("RGB", (1280, 1280))
96
- new_im.paste(img, ((1280-new_size[0])//2,
97
- (1280-new_size[1])//2))
98
-
99
- result = reader.readtext(np.array(new_im))
100
-
101
-
102
-
103
-
104
- result_text = [] #empty list for results
105
- for text in result:
106
- result_text.append(text[1])
107
-
108
-
109
- raw_data = result_text
110
-
111
-
112
-
113
- def get_english(raw_list): # Cut only english var
114
- eng_name = []
115
- thai_name = []
116
-
117
- for name in raw_list:
118
- if isthai(name) == True:
119
- thai_name.append(name)
120
- else:
121
- eng_name.append(name)
122
-
123
- return eng_name
124
-
125
- raw_data = get_english(raw_data)
126
-
127
-
128
- def Clear_syntax(raw_list):
129
-
130
- Clean_syntax = ["","#","{","}","=","/","@","#","$","—","|","%","-","(",")","¥", "[", "]", "‘",':',';']
131
-
132
- for k in range(len(Clean_syntax)):
133
- while (Clean_syntax[k] in raw_list): # remove single symbol
134
- raw_list.remove(Clean_syntax[k])
135
-
136
- for l in range(len(raw_list)):
137
- raw_list[l] = raw_list[l].replace("!","l") #split ! --> l (Error OCR Check)
138
- raw_list[l] = raw_list[l].replace(",",".") #split ! --> l (Error OCR Check)
139
- raw_list[l] = raw_list[l].replace(" ","") #split " " out from str
140
- raw_list[l] = raw_list[l].lower() #Set all string to lowercase
141
-
142
- for m in range(len(raw_list)): #Clear symbol in str "Hi/'" --> "Hi"
143
- for n in range(len(Clean_syntax)):
144
- raw_list[m] = raw_list[m].replace(Clean_syntax[n],"")
145
- return raw_list
146
-
147
- raw_data = Clear_syntax(raw_data)
148
-
149
-
150
- def get_idnum(raw_list):
151
- id_num = {"id_num" : "None"}
152
- # 1. normal check
153
- for i in range(len(raw_list)): # check if len(list) = 1, 4, 5, 2, 1 (13 digit idcard) and all is int
154
- try:
155
- if ((len(raw_list[i]) == 1) and (len(raw_list[i+1]) == 4) and (len(raw_list[i+2]) == 5) and (len(raw_list[i+3]) == 2) and (len(raw_list[i+4]) == 1)) and ((raw_list[i] + raw_list[i+1] + raw_list[i+2] + raw_list[i+3] + raw_list[i+4]).isnumeric()):
156
- id_num["id_num"] = (raw_list[i] + raw_list[i+1] + raw_list[i+2] + raw_list[i+3] + raw_list[i+4])
157
- break
158
- except:
159
- pass
160
-
161
- # 2. Hardcore Check
162
- if id_num["id_num"] == "None":
163
- id_count = 0
164
- index_first = 0
165
- index_end = 0
166
- for i in range(len(raw_list)):
167
- if id_count == 13:
168
- index_end = i-1 #ลบ 1 index เพราะ ครบ 13 รอบก่อนหน้านี้
169
- #print(f"index_first == {index_first} index_end == {index_end}")
170
- #print(f"id = {raw_list[index_first:index_end+1]}")
171
- id_num["id_num"] = ''.join(raw_list[index_first:index_end+1])
172
- break
173
- else:
174
- if raw_list[i].isnumeric() == True and index_first == 0:
175
- id_count += len(raw_list[i])
176
- index_first = i
177
- elif raw_list[i].isnumeric() == True and index_first != 0:
178
- id_count += len(raw_list[i])
179
- elif raw_list[i].isnumeric() == False:
180
- id_count = 0
181
- index_first = 0
182
-
183
- return id_num
184
-
185
- id_num = (get_idnum(raw_data))
186
-
187
- #Complete list name check
188
- def list_name_check(raw_list):
189
- sum_list = raw_list
190
- name_key = ['name', 'lastname']
191
-
192
- #1. name_key check
193
- if ("name" in sum_list) and ("lastname" in sum_list): # if name and lastname in list pass it!
194
- pass
195
- else:
196
- for i in range(len(name_key)):
197
- for j in range(len(sum_list)):
198
- if (editdistance.eval(name_key[i], sum_list[j]) <= 2 ):
199
- sum_list[j] = name_key[i]
200
-
201
- gender_key = ["mr.", "mrs.", 'master', 'miss']
202
- #2 gender_key check
203
- count = 0 # check for break
204
- for i in range(len(gender_key)):
205
- for j in range(len(sum_list)):
206
- if (count == 0):
207
- try:
208
- if (sum_list[i] == "name") or (sum_list[i] == "lastname"): # skip "name" and "lastname"
209
- pass
210
- else:
211
- # mr, mrs sensitive case double check with len(gender_key) == len(keyword)
212
- if (gender_key[i] == "mr." or gender_key[i] == "mrs.") and (editdistance.eval(gender_key[i], sum_list[j]) <= 3 and (len(gender_key[i]) == len(sum_list[j]))):
213
- sum_list[j] = gender_key[i]
214
- count+=1
215
- #print(1)
216
- elif (gender_key[i] == "master" or gender_key[i] == "miss") and (editdistance.eval(gender_key[i], sum_list[j]) <= 3 ) and (len(gender_key[i]) == len(sum_list[j])):
217
- sum_list[j] = gender_key[i]
218
- count+=1
219
- #print(1)
220
- except:
221
- if (gender_key[i] == "mr." or gender_key[i] == "mrs.") and (editdistance.eval(gender_key[i], sum_list[j]) <= 2 and (len(gender_key[i]) == len(sum_list[j]))):
222
- sum_list[j] = gender_key[i]
223
- count+=1
224
- #print(1)
225
- elif (gender_key[i] == "master" or gender_key[i] == "miss") and (editdistance.eval(gender_key[i], sum_list[j]) <= 3 ) and (len(gender_key[i]) == len(sum_list[j])):
226
- sum_list[j] = gender_key[i]
227
- count+=1
228
- #print(1)
229
- else:
230
- break
231
-
232
- return sum_list
233
-
234
- raw_data = list_name_check(raw_data)
235
-
236
- #get_eng_name
237
- def get_engname(raw_list):
238
- get_data = raw_list
239
- engname_list = []
240
-
241
- name_pos = []
242
- lastname_pos = []
243
- mr_pos = []
244
- mrs_pos = []
245
-
246
- # check keyword by name, lastname, master, mr, miss, mrs
247
- for j in range(len(get_data)): #get "name" , "lastname" index
248
- if "name" == get_data[j]:
249
- name_pos.append(j)
250
- elif "lastname" == get_data[j]:
251
- lastname_pos.append(j)
252
- elif ("mr." == get_data[j]) or ("master" == get_data[j]):
253
- mr_pos.append(j)
254
- elif ("miss" == get_data[j]) or ("mrs." == get_data[j]):
255
- mrs_pos.append(j)
256
-
257
-
258
- if len(name_pos) != 0: #get_engname ex --> ['name', 'master', 'tanaanan', 'lastname', 'chalermpan']
259
- engname_list = get_data[name_pos[0]:name_pos[0]+6] # select first index กรณีมี "name" มากกว่า 1 ตัว
260
- elif len(lastname_pos) != 0:
261
- engname_list = get_data[lastname_pos[0]-3:lastname_pos[0]+3]
262
- elif len(mr_pos) != 0:
263
- engname_list = get_data[mr_pos[0]-1:mr_pos[0]+5]
264
- elif len(mrs_pos) != 0:
265
- engname_list = get_data[mrs_pos[0]-1:mrs_pos[0]+5]
266
- else:
267
- print("Can't find eng name!!")
268
-
269
- return engname_list
270
-
271
- raw_data = get_engname(raw_data)
272
-
273
-
274
-
275
-
276
- def split_genkey(raw_list): # remove stringname + gender_key ex. "missjate" -> "jate"
277
- data = raw_list
278
- key = ['mrs.','mr.','master','miss']
279
- name = "" #gen_key name
280
- name_pos = 0
281
- gen_index = 0
282
- gen_type = "" #male / female
283
- # check keyword
284
- for key_val in key:
285
- for each_text in data:
286
- if (each_text[:len(key_val)] == key_val) or (editdistance.eval(each_text[:len(key_val)],key_val) <= 1 and (len(each_text[:len(key_val)]) == len(key_val))):
287
- #each_text = each_text[len(key):]
288
- if (each_text == "name") or (each_text == "lastname"):
289
- pass
290
- else:
291
- name = (each_text[:len(key_val)])
292
- name_pos = data.index(each_text) # get_index
293
- gen_index = len(key_val)
294
- break
295
- if (name_pos != 0):
296
- data[name_pos] = data[name_pos][gen_index:] # split gender_key on list
297
- for empty_str in range(data.count('')): # clear "empty string"
298
- data.remove('')
299
- return data
300
-
301
- raw_data = split_genkey(raw_data)
302
-
303
-
304
- def clean_name_data(raw_list): # delete all single string and int string
305
- for k in range(len(raw_list)):
306
- try:
307
- while ((len(raw_list[k]) <= 2) or (raw_list[k].isnumeric() == True)): # remove single symbol
308
- raw_list.remove(raw_list[k])
309
- except IndexError:
310
- pass
311
- return raw_list
312
-
313
- raw_data = clean_name_data(raw_data)
314
-
315
-
316
- def name_sum(raw_list):
317
- info = {"name" : "None",
318
- "lastname" : "None"}
319
- key = ['mr.','mrs.', 'master', 'miss', 'mrs','mr']
320
- name_pos = 0
321
- lastname_pos = 0
322
- for key_val in key: # remove gender_key in string
323
- if key_val in raw_list:
324
- raw_list.remove(key_val)
325
- try:
326
- for i in range(len(raw_list)):
327
- if raw_list[i] == "name":
328
- info["name"] = raw_list[i+1]
329
- name_pos = i+1
330
- elif raw_list[i] == "lastname":
331
- info["lastname"] = raw_list[i+1]
332
- lastname_pos = i+1
333
- except:
334
- pass
335
-
336
- # กรณี หาอย่างใดอย่าหนึ่งเจอให้ลองข้ามไปดู 1 index name, "name_val", lastname , "lastname_val"
337
- if (info["name"] != "None") and (info["lastname"] == "None"):
338
- try:
339
- info["lastname"] = raw_list[name_pos+2]
340
- except:
341
- pass
342
- elif (info["lastname"] != "None") and (info["name"] == "None"):
343
- try:
344
- info["name"] = raw_list[lastname_pos-2]
345
- except:
346
- pass
347
-
348
- # remove . on "mr." and "mrs."
349
- info["name"] = info["name"].replace(".","")
350
- info["lastname"] = info["lastname"].replace(".","")
351
-
352
-
353
- return info
354
-
355
- st.subheader("Process Completed!.....")
356
- st.write(id_num)
357
- st.write(name_sum(raw_data))
358
-
359
-
360
-
361
-
362
-
363
-
364
- if choice == "Detection":
365
-
366
- st.title("ATK-OCR classification (AOC) Webapp.")
367
-
368
- #subtitle
369
- st.subheader(" Antigen test kit + Identification Card detector.")
370
-
371
- pages_name = ['ATK + Idcard Detect', 'ATK Detect', 'Idcard Detect']
372
- page = st.radio('Select option mode :', pages_name)
373
-
374
- #image uploader
375
- image = st.file_uploader(label = "upload ATK + Idcard img here.. OwO",type=['png','jpg','jpeg'])
376
-
377
- if image is not None:
378
- new_img = img_resize(image, 1280)
379
-
380
- if page == "ATK + Idcard Detect":
381
- st.image(new_img)
382
- with st.spinner("🤖 ATK + Idcard Working... "):
383
-
384
- t1 = time.perf_counter()
385
- Get_Idcard_detail(image)
386
- get_detection(image)
387
- t2 = time.perf_counter()
388
- st.write('time taken to run: {:.2f} sec'.format(t2-t1))
389
-
390
- elif page == "ATK Detect":
391
- st.image(new_img)
392
- with st.spinner("🤖 ATK Working... "):
393
- t1 = time.perf_counter()
394
- get_detection(image)
395
- t2 = time.perf_counter()
396
- st.write('time taken to run: {:.2f} sec'.format(t2-t1))
397
-
398
- elif page == "Idcard Detect":
399
- st.image(new_img)
400
- with st.spinner("🤖 Idcard Working... "):
401
- t1 = time.perf_counter()
402
- Get_Idcard_detail(image)
403
- t2 = time.perf_counter()
404
- st.write('time taken to run: {:.2f} sec'.format(t2-t1))
405
- else:
406
- st.write("## Waiting for image..")
407
- st.image('atk_idcard.jpeg')
408
-
409
- elif choice =='About' :
410
- st.header("About...")
411
- st.subheader("AOC คืออะไร ?")
412
- st.write("- เป็นระบบที่สามารถคัดกรองผลตรวจเชื้อของ COVID-19 ได้ผ่าน ที่ตรวจ ATK (Antigen Test Kit) ควบคู่กับบัตรประชาชน จากรูปภาพได้โดยอัตโนมัติ")
413
-
414
- st.subheader("AOC ทำอะไรได้บ้าง ?")
415
- st.write("- ตรวจจับผลตรวจ ATK (Obj detection) [debugging in progress]")
416
- st.write("- ตรวจจับชื่อ-นามสกุล (OCR)")
417
- st.write("- ตรวจจับเลขบัตรประชาชน (OCR)")
418
-
419
- st.subheader("AOC ดีกว่ายังไง ?")
420
- st.write("จากผลที่ได้จากการเปรียบเทียบกันระหว่าง model (AOC) กับ คน (Baseline) จำนวน 30 ภาพ / คน ได้ผลดังนี้")
421
- st.image("./acc_table.png")
422
- st.write("จากผลที่ได้สรุปได้ว่า ส่วนที่ผ่าน Baseline และมีประสิทธิภาพดีกว่าคัดกรองด้วยคนคือ ผลตรวจ ATK ได้ผลที่ 100 %, เลขบัตรประชน ได้ผลที่ 100 % และ ความเร็วในการคัดกรอง ได้ผลที่ 4.84 วินาที ซึ่งมีความเร็วมากกว่า 81% เมื่อเทียบกับค���ดกรองด้วยคน ถือว่ามีประสิทธิภาพที่สูงมากในการคัดกรอง และ มีประสิทธิภาพมากกว่าการคัดแยกด้วยมนุษย์")
423
- st.write("** ความเร็วที่โมเดลทำได้อาจไม่ตรงตามที่ deploy บนเว็บ เนื่องจากในเว็บ ไม่มี GPU ในการประมวลผลอาจทำให้โมเดลใช้เวลาในการประมวลที่นานกว่าตอนใช้ GPU")
424
-
425
-
426
- st.subheader("คำแนะนำในการใช้งาน")
427
- st.write("- ในการใช้งานให้ถ่ายรูปภาพบัตรประชาชนในแนวตั้งเท่านั้น เนื่องจากถ้าเป็นแนวอื่นอาจทำให้การตรวจจับคลาดเคลื่อนเอาได้")#3
428
- st.write("- ภาพไม่ควรมีแสงที่สว่างมากเกืนไป และ มืดเกินไป มิฉะนั้นอาจทำให้การตรวจจับคลาดเคลื่อนเอาได้")#4
429
- st.write("- ภาพไม่ควรที่จะอยู่ไกลเกินไป และ ควรมีความชัด มิฉะนั้นอาจทำให้การตรวจจับคลาดเคลื่อน หรือ ไม่สามารถตรวจจับได้")#5
430
-
431
- st.subheader("รายละเอียดเพิ่มเติม")
432
- st.write('[Medium blog](https://medium.com/@mjsalyjoh/atk-ocr-classification-aoc-%E0%B8%A3%E0%B8%B0%E0%B8%9A%E0%B8%9A%E0%B8%84%E0%B8%B1%E0%B8%94%E0%B8%81%E0%B8%A3%E0%B8%AD%E0%B8%87%E0%B8%9C%E0%B8%A5%E0%B8%95%E0%B8%A3%E0%B8%A7%E0%B8%88-atk-%E0%B9%81%E0%B8%A5%E0%B8%B0-%E0%B8%9A%E0%B8%B1%E0%B8%95%E0%B8%A3%E0%B8%9B%E0%B8%A3%E0%B8%B0%E0%B8%8A%E0%B8%B2%E0%B8%8A%E0%B8%99-fa32a8d47599)')
433
- st.write('[Github Link](https://github.com/Tanaanan/AOC_ATK_OCR_Classification)')
434
-
435
- st.warning("** ระบบ ATK ตอนนี้ใช้เป็น Image classification อยู่เนื่องจาก Object detection ยังมีปัญหาในการ deploy on cloud.. (กำลังอยู่ในขั้นตอน debug!)")
436
-
437
-
438
- st.sidebar.subheader('More image for test..')
439
- st.sidebar.write('[Github img test set.](https://github.com/Tanaanan/AOC_ATK_OCR_Classification/tree/main/test_set(img))')
440
-
441
- st.sidebar.markdown('---')
442
- st.sidebar.subheader('Recomend / Issues report..')
443
- st.sidebar.write('[Google form](https://forms.gle/zYpYFKcTpBoFGxN58)')
444
-
445
-
446
- st.sidebar.markdown('---')
447
- st.sidebar.subheader('Made by Tanaanan .M')
448
- st.sidebar.write("Contact : mjsalyjoh@gmail.com")