Loren commited on
Commit
ca7fcaf
β€’
1 Parent(s): 7919715

Use streamlit_option_menu

Browse files
Home.py CHANGED
@@ -1,19 +1,17 @@
1
  import streamlit as st
 
 
2
 
3
- st.set_page_config(page_title='OCR Comparator', layout ="wide")
4
- st.image('ocr.png')
 
 
 
5
 
6
- st.write("")
 
 
 
7
 
8
- st.markdown('''#### OCR, or Optical Character Recognition, is a computer vision task, \
9
- which includes the detection of text areas, and the recognition of characters.''')
10
- st.write("")
11
- st.write("")
12
-
13
- st.markdown("##### This app allows you to compare, from a given image, the results of different solutions:")
14
- st.markdown("##### *EasyOcr, PaddleOCR, MMOCR, Tesseract*")
15
- st.write("")
16
- st.write("")
17
- st.markdown("πŸ‘ˆ Select the **About** page from the sidebar for information on how the app works")
18
-
19
- st.markdown("πŸ‘ˆ or directly select the **App** page")
 
1
  import streamlit as st
2
+ from multipage import MultiPage
3
+ from app_pages import home, about, ocr_comparator
4
 
5
+ app = MultiPage()
6
+ st.set_page_config(
7
+ page_title='OCR Comparator', layout ="wide",
8
+ initial_sidebar_state="expanded",
9
+ )
10
 
11
+ # Add all your application here
12
+ app.add_page("Home", "house", home.app)
13
+ app.add_page("About", "info-circle", about.app)
14
+ app.add_page("App", "cast", ocr_comparator.app)
15
 
16
+ # The main app
17
+ app.run()
 
 
 
 
 
 
 
 
 
 
__pycache__/multipage.cpython-37.pyc ADDED
Binary file (2.65 kB). View file
 
app_pages/__pycache__/about.cpython-37.pyc ADDED
Binary file (2.02 kB). View file
 
app_pages/__pycache__/home.cpython-37.pyc ADDED
Binary file (889 Bytes). View file
 
app_pages/__pycache__/ocr_comparator.cpython-37.pyc ADDED
Binary file (46.5 kB). View file
 
app_pages/about.py ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+
3
+ def app():
4
+ st.title("OCR solutions comparator")
5
+
6
+ st.write("")
7
+ st.write("")
8
+ st.write("")
9
+
10
+ st.markdown("##### This app allows you to compare, from a given picture, the results of different solutions:")
11
+ st.markdown("##### *EasyOcr, PaddleOCR, MMOCR, Tesseract*")
12
+ st.write("")
13
+ st.write("")
14
+
15
+ st.markdown(''' The 1st step is to choose the language for the text recognition (not all solutions \
16
+ support the same languages), and then choose the picture to consider. It is possible to upload a file, \
17
+ to take a picture, or to use a demo file. \
18
+ It is then possible to change the default values for the text area detection process, \
19
+ before launching the detection task for each solution.''')
20
+ st.write("")
21
+
22
+ st.markdown(''' The different results are then presented. The 2nd step is to choose one of these \
23
+ detection results, in order to carry out the text recognition process there. It is also possible to change \
24
+ the default settings for each solution.''')
25
+ st.write("")
26
+
27
+ st.markdown("###### The recognition results appear in 2 formats:")
28
+ st.markdown(''' - a visual format resumes the initial image, replacing the detected areas with \
29
+ the recognized text. The background is + or - strongly colored in green according to the \
30
+ confidence level of the recognition.
31
+ A slider allows you to change the font size, another \
32
+ allows you to modify the confidence threshold above which the text color changes: if it is at \
33
+ 70% for example, then all the texts with a confidence threshold higher or equal to 70 will appear \
34
+ in white, in black otherwise.''')
35
+
36
+ st.markdown(" - a detailed format presents the results in a table, for each text box detected. \
37
+ It is possible to download this results in a local csv file.")
app_pages/home.py ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+
3
+ def app():
4
+ st.image('ocr.png')
5
+
6
+ st.write("")
7
+
8
+ st.markdown('''#### OCR, or Optical Character Recognition, is a computer vision task, \
9
+ which includes the detection of text areas, and the recognition of characters.''')
10
+ st.write("")
11
+ st.write("")
12
+
13
+ st.markdown("##### This app allows you to compare, from a given image, the results of different solutions:")
14
+ st.markdown("##### *EasyOcr, PaddleOCR, MMOCR, Tesseract*")
15
+ st.write("")
16
+ st.write("")
17
+ st.markdown("πŸ‘ˆ Select the **About** page from the sidebar for information on how the app works")
18
+
19
+ st.markdown("πŸ‘ˆ or directly select the **App** page")
app_pages/img_demo_1.jpg ADDED
app_pages/img_demo_2.jpg ADDED
app_pages/ocr.png ADDED
pages/App.py β†’ app_pages/ocr_comparator.py RENAMED
@@ -929,491 +929,482 @@ def raz():
929
  ###################################################################################################
930
  ## MAIN
931
  ###################################################################################################
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
932
 
933
- ##----------- Initializations ---------------------------------------------------------------------
934
- #print("PID : ", os.getpid())
935
- st.set_page_config(page_title='OCR Comparator', layout ="wide")
936
- st.markdown("""
937
- <style>
938
- section[data-testid="stSidebar"] {
939
- width: 5rem;
940
- }
941
- </style>
942
- """,unsafe_allow_html=True)
943
-
944
-
945
- st.title("OCR solutions comparator")
946
- st.markdown("##### *EasyOCR, PPOCR, MMOCR, Tesseract*")
947
- #st.markdown("#### PID : " + str(os.getpid()))
948
-
949
- # Initializations
950
- with st.spinner("Initializations in progress ..."):
951
- reader_type_list, reader_type_dict, list_dict_lang, \
952
- cols_size, dict_back_colors, fig_colorscale = initializations()
953
- img_demo_1, img_demo_2 = get_demo()
954
-
955
- ##----------- Choose language & image -------------------------------------------------------------
956
- st.markdown("#### Choose languages for the text recognition:")
957
- lang_col = st.columns(4)
958
- easyocr_key_lang = lang_col[0].selectbox(reader_type_list[0]+" :", list_dict_lang[0].keys(), 26)
959
- easyocr_lang = list_dict_lang[0][easyocr_key_lang]
960
- ppocr_key_lang = lang_col[1].selectbox(reader_type_list[1]+" :", list_dict_lang[1].keys(), 22)
961
- ppocr_lang = list_dict_lang[1][ppocr_key_lang]
962
- mmocr_key_lang = lang_col[2].selectbox(reader_type_list[2]+" :", list_dict_lang[2].keys(), 0)
963
- mmocr_lang = list_dict_lang[2][mmocr_key_lang]
964
- tesserocr_key_lang = lang_col[3].selectbox(reader_type_list[3]+" :", list_dict_lang[3].keys(), 35)
965
- tesserocr_lang = list_dict_lang[3][tesserocr_key_lang]
966
-
967
- st.markdown("#### Choose picture:")
968
- cols_pict = st.columns([1, 2])
969
- img_typ = cols_pict[0].radio("", ['Upload file', 'Take a picture', 'Use a demo file'], \
970
- index=0, on_change=raz)
971
-
972
- if img_typ == 'Upload file':
973
- image_file = cols_pict[1].file_uploader("Upload a file:", type=["jpg","jpeg"], on_change=raz)
974
- if img_typ == 'Take a picture':
975
- image_file = cols_pict[1].camera_input("Take a picture:", on_change=raz)
976
- if img_typ == 'Use a demo file':
977
- with st.expander('Choose a demo file:', expanded=True):
978
- demo_used = st.radio('', ['File 1', 'File 2'], index=0, \
979
- horizontal=True, on_change=raz)
980
- cols_demo = st.columns([1, 2])
981
- cols_demo[0].markdown('###### File 1')
982
- cols_demo[0].image(img_demo_1, width=150)
983
- cols_demo[1].markdown('###### File 2')
984
- cols_demo[1].image(img_demo_2, width=300)
985
- if demo_used == 'File 1':
986
- image_file = 'img_demo_1.jpg'
987
- else:
988
- image_file = 'img_demo_2.jpg'
989
-
990
- ##----------- Process input image -----------------------------------------------------------------
991
- if image_file is not None:
992
- image_path, image_orig, image_cv2 = load_image(image_file)
993
- list_images = [image_orig, image_cv2]
994
-
995
- ##----------- Form with original image & hyperparameters for detectors ----------------------------
996
- with st.form("form1"):
997
- col1, col2 = st.columns(2, ) #gap="medium")
998
- col1.markdown("##### Original image")
999
- col1.image(list_images[0], width=500, use_column_width=True)
1000
- col2.markdown("##### Hyperparameters values for detection")
1001
-
1002
- with col2.expander("Choose detection hyperparameters for " + reader_type_list[0], \
1003
- expanded=False):
1004
- t0_min_size = st.slider("min_size", 1, 20, 10, step=1, \
1005
- help="min_size (int, default = 10) - Filter text box smaller than \
1006
- minimum value in pixel")
1007
- t0_text_threshold = st.slider("text_threshold", 0.1, 1., 0.7, step=0.1, \
1008
- help="text_threshold (float, default = 0.7) - Text confidence threshold")
1009
- t0_low_text = st.slider("low_text", 0.1, 1., 0.4, step=0.1, \
1010
- help="low_text (float, default = 0.4) - Text low-bound score")
1011
- t0_link_threshold = st.slider("link_threshold", 0.1, 1., 0.4, step=0.1, \
1012
- help="link_threshold (float, default = 0.4) - Link confidence threshold")
1013
- t0_canvas_size = st.slider("canvas_size", 2000, 5000, 2560, step=10, \
1014
- help='''canvas_size (int, default = 2560) \n
1015
- Maximum e size. Image bigger than this value will be resized down''')
1016
- t0_mag_ratio = st.slider("mag_ratio", 0.1, 5., 1., step=0.1, \
1017
- help="mag_ratio (float, default = 1) - Image magnification ratio")
1018
- t0_slope_ths = st.slider("slope_ths", 0.01, 1., 0.1, step=0.01, \
1019
- help='''slope_ths (float, default = 0.1) - Maximum slope \
1020
- (delta y/delta x) to considered merging. \n
1021
- Low valuans tiled boxes will not be merged.''')
1022
- t0_ycenter_ths = st.slider("ycenter_ths", 0.1, 1., 0.5, step=0.1, \
1023
- help='''ycenter_ths (float, default = 0.5) - Maximum shift in y direction. \n
1024
- Boxes wiifferent level should not be merged.''')
1025
- t0_height_ths = st.slider("height_ths", 0.1, 1., 0.5, step=0.1, \
1026
- help='''height_ths (float, default = 0.5) - Maximum different in box height. \n
1027
- Boxes wiery different text size should not be merged.''')
1028
- t0_width_ths = st.slider("width_ths", 0.1, 1., 0.5, step=0.1, \
1029
- help="width_ths (float, default = 0.5) - Maximum horizontal \
1030
- distance to merge boxes.")
1031
- t0_add_margin = st.slider("add_margin", 0.1, 1., 0.1, step=0.1, \
1032
- help='''add_margin (float, default = 0.1) - \
1033
- Extend bounding boxes in all direction by certain value. \n
1034
- This is rtant for language with complex script (E.g. Thai).''')
1035
- t0_optimal_num_chars = st.slider("optimal_num_chars", None, 100, None, step=10, \
1036
- help="optimal_num_chars (int, default = None) - If specified, bounding boxes \
1037
- with estimated number of characters near this value are returned first.")
1038
-
1039
- with col2.expander("Choose detection hyperparameters for " + reader_type_list[1], \
1040
- expanded=False):
1041
- t1_det_algorithm = st.selectbox('det_algorithm', ['DB'], \
1042
- help='Type of detection algorithm selected. (default = DB)')
1043
- t1_det_max_side_len = st.slider('det_max_side_len', 500, 2000, 960, step=10, \
1044
- help='''The maximum size of the long side of the image. (default = 960)\n
1045
- Limit thximum image height and width.\n
1046
- When theg side exceeds this value, the long side will be resized to this size, and the short side \
1047
- will be ed proportionally.''')
1048
- t1_det_db_thresh = st.slider('det_db_thresh', 0.1, 1., 0.3, step=0.1, \
1049
- help='''Binarization threshold value of DB output map. (default = 0.3) \n
1050
- Used to er the binarized image of DB prediction, setting 0.-0.3 has no obvious effect on the result.''')
1051
- t1_det_db_box_thresh = st.slider('det_db_box_thresh', 0.1, 1., 0.6, step=0.1, \
1052
- help='''The threshold value of the DB output box. (default = 0.6) \n
1053
- DB post-essing filter box threshold, if there is a missing box detected, it can be reduced as appropriate. \n
1054
- Boxes sclower than this value will be discard.''')
1055
- t1_det_db_unclip_ratio = st.slider('det_db_unclip_ratio', 1., 3.0, 1.6, step=0.1, \
1056
- help='''The expanded ratio of DB output box. (default = 1.6) \n
1057
- Indicatee compactness of the text box, the smaller the value, the closer the text box to the text.''')
1058
- t1_det_east_score_thresh = st.slider('det_east_cover_thresh', 0.1, 1., 0.8, step=0.1, \
1059
- help="Binarization threshold value of EAST output map. (default = 0.8)")
1060
- t1_det_east_cover_thresh = st.slider('det_east_cover_thresh', 0.1, 1., 0.1, step=0.1, \
1061
- help='''The threshold value of the EAST output box. (default = 0.1) \n
1062
- Boxes sclower than this value will be discarded.''')
1063
- t1_det_east_nms_thresh = st.slider('det_east_nms_thresh', 0.1, 1., 0.2, step=0.1, \
1064
- help="The NMS threshold value of EAST model output box. (default = 0.2)")
1065
- t1_det_db_score_mode = st.selectbox('det_db_score_mode', ['fast', 'slow'], \
1066
- help='''slow: use polygon box to calculate bbox score, fast: use rectangle box \
1067
- to calculate. (default = fast) \n
1068
- Use rectlar box to calculate faster, and polygonal box more accurate for curved text area.''')
1069
-
1070
- with col2.expander("Choose detection hyperparameters for " + reader_type_list[2], \
1071
- expanded=False):
1072
- t2_det = st.selectbox('det', ['DB_r18','DB_r50','DBPP_r50','DRRG','FCE_IC15', \
1073
- 'FCE_CTW_DCNv2','MaskRCNN_CTW','MaskRCNN_IC15', \
1074
- 'MaskRCNN_IC17', 'PANet_CTW','PANet_IC15','PS_CTW',\
1075
- 'PS_IC15','Tesseract','TextSnake'], 10, \
1076
- help='Text detection algorithm. (default = PANet_IC15)')
1077
- st.write("###### *More about text detection models* πŸ‘‰ \
1078
- [here](https://mmocr.readthedocs.io/en/latest/textdet_models.html)")
1079
- t2_merge_xdist = st.slider('merge_xdist', 1, 50, 20, step=1, \
1080
- help='The maximum x-axis distance to merge boxes. (defaut=20)')
1081
-
1082
- with col2.expander("Choose detection hyperparameters for " + reader_type_list[3], \
1083
- expanded=False):
1084
- t3_psm = st.selectbox('Page segmentation mode (psm)', \
1085
- [' - Default', \
1086
- ' 4 Assume a single column of text of variable sizes', \
1087
- ' 5 Assume a single uniform block of vertically aligned text', \
1088
- ' 6 Assume a single uniform block of text', \
1089
- ' 7 Treat the image as a single text line', \
1090
- ' 8 Treat the image as a single word', \
1091
- ' 9 Treat the image as a single word in a circle', \
1092
- '10 Treat the image as a single character', \
1093
- '11 Sparse text. Find as much text as possible in no \
1094
- particular order', \
1095
- '13 Raw line. Treat the image as a single text line, \
1096
- bypassing hacks that are Tesseract-specific'])
1097
- t3_oem = st.selectbox('OCR engine mode', ['0 Legacy engine only', \
1098
- '1 Neural nets LSTM engine only', \
1099
- '2 Legacy + LSTM engines', \
1100
- '3 Default, based on what is available'], 3)
1101
- t3_whitelist = st.text_input('Limit tesseract to recognize only this characters :', \
1102
- placeholder='Limit tesseract to recognize only this characters', \
1103
- help='Example for numbers only : 0123456789')
1104
-
1105
- color_hex = col2.color_picker('Set a color for box outlines:', '#004C99')
1106
- color_part = color_hex.lstrip('#')
1107
- color = tuple(int(color_part[i:i+2], 16) for i in (0, 2, 4))
1108
-
1109
- submit_detect = st.form_submit_button("Launch detection")
1110
-
1111
- ##----------- Process text detection --------------------------------------------------------------
1112
- if submit_detect:
1113
- # Process text detection
1114
-
1115
- if t0_optimal_num_chars == 0:
1116
- t0_optimal_num_chars = None
1117
-
1118
- # Construct the config Tesseract parameter
1119
- t3_config = ''
1120
- psm = t3_psm[:2]
1121
- if psm != ' -':
1122
- t3_config += '--psm ' + psm.strip()
1123
- oem = t3_oem[:1]
1124
- if oem != '3':
1125
- t3_config += ' --oem ' + oem
1126
- if t3_whitelist != '':
1127
- t3_config += ' -c tessedit_char_whitelist=' + t3_whitelist
1128
-
1129
- list_params_det = \
1130
- [[easyocr_lang, \
1131
- {'min_size': t0_min_size, 'text_threshold': t0_text_threshold, \
1132
- 'low_text': t0_low_text, 'link_threshold': t0_link_threshold, \
1133
- 'canvas_size': t0_canvas_size, 'mag_ratio': t0_mag_ratio, \
1134
- 'slope_ths': t0_slope_ths, 'ycenter_ths': t0_ycenter_ths, \
1135
- 'height_ths': t0_height_ths, 'width_ths': t0_width_ths, \
1136
- 'add_margin': t0_add_margin, 'optimal_num_chars': t0_optimal_num_chars \
1137
- }], \
1138
- [ppocr_lang, \
1139
- {'det_algorithm': t1_det_algorithm, 'det_max_side_len': t1_det_max_side_len, \
1140
- 'det_db_thresh': t1_det_db_thresh, 'det_db_box_thresh': t1_det_db_box_thresh, \
1141
- 'det_db_unclip_ratio': t1_det_db_unclip_ratio, \
1142
- 'det_east_score_thresh': t1_det_east_score_thresh, \
1143
- 'det_east_cover_thresh': t1_det_east_cover_thresh, \
1144
- 'det_east_nms_thresh': t1_det_east_nms_thresh, \
1145
- 'det_db_score_mode': t1_det_db_score_mode}],
1146
- [mmocr_lang, {'det': t2_det, 'merge_xdist': t2_merge_xdist}],
1147
- [tesserocr_lang, {'lang': tesserocr_lang, 'config': t3_config}]
1148
- ]
1149
-
1150
- show_info1 = st.empty()
1151
- show_info1.info("Readers initializations in progress (it may take a while) ...")
1152
- list_readers = init_readers(list_params_det)
1153
-
1154
- show_info1.info("Text detection in progress ...")
1155
- list_images, list_coordinates = process_detect(image_path, list_images, list_readers, \
1156
- list_params_det, color)
1157
- show_info1.empty()
1158
-
1159
- # Clear previous recognition results
1160
- st.session_state.df_results = pd.DataFrame([])
1161
-
1162
- st.session_state.list_readers = list_readers
1163
- st.session_state.list_coordinates = list_coordinates
1164
- st.session_state.list_images = list_images
1165
- st.session_state.list_params_det = list_params_det
1166
-
1167
- if 'columns_size' not in st.session_state:
1168
- st.session_state.columns_size = [2] + [1 for x in reader_type_list[1:]]
1169
- if 'column_width' not in st.session_state:
1170
- st.session_state.column_width = [500] + [400 for x in reader_type_list[1:]]
1171
- if 'columns_color' not in st.session_state:
1172
- st.session_state.columns_color = ["rgb(228,26,28)"] + \
1173
- ["rgb(0,0,0)" for x in reader_type_list[1:]]
1174
-
1175
- if st.session_state.list_coordinates:
1176
- list_coordinates = st.session_state.list_coordinates
1177
- list_images = st.session_state.list_images
1178
- list_readers = st.session_state.list_readers
1179
- list_params_det = st.session_state.list_params_det
1180
-
1181
- ##----------- Text detection results --------------------------------------------------------------
1182
- st.subheader("Text detection")
1183
- show_detect = st.empty()
1184
- list_ok_detect = []
1185
- with show_detect.container():
1186
- columns = st.columns(st.session_state.columns_size, ) #gap='medium')
1187
- for no_col, col in enumerate(columns):
1188
- column_title = '<p style="font-size: 20px;color:' + \
1189
- st.session_state.columns_color[no_col] + \
1190
- ';">Detection with ' + reader_type_list[no_col]+ '</p>'
1191
- col.markdown(column_title, unsafe_allow_html=True)
1192
- if isinstance(list_images[no_col+2], PIL.Image.Image):
1193
- col.image(list_images[no_col+2], width=st.session_state.column_width[no_col], \
1194
- use_column_width=True)
1195
- list_ok_detect.append(reader_type_list[no_col])
1196
- else:
1197
- col.write(list_images[no_col+2], use_column_width=True)
1198
-
1199
- st.subheader("Text recognition")
1200
 
1201
- st.markdown("##### Using detection performed above by:")
1202
- st.radio('Choose the detecter:', list_ok_detect, key='detect_reader', \
1203
- horizontal=True, on_change=highlight)
 
 
 
1204
 
1205
- ##----------- Form with hyperparameters for recognition -----------------------
1206
- st.markdown("##### Hyperparameters values for recognition:")
1207
- with st.form("form2"):
1208
- with st.expander("Choose recognition hyperparameters for " + reader_type_list[0], \
1209
  expanded=False):
1210
- t0_decoder = st.selectbox('decoder', ['greedy', 'beamsearch', 'wordbeamsearch'], \
1211
- help="decoder (string, default = 'greedy') - options are 'greedy', \
1212
- 'beamsearch' and 'wordbeamsearch.")
1213
- t0_beamWidth = st.slider('beamWidth', 2, 20, 5, step=1, \
1214
- help="beamWidth (int, default = 5) - How many beam to keep when decoder = \
1215
- 'beamsearch' or 'wordbeamsearch'.")
1216
- t0_batch_size = st.slider('batch_size', 1, 10, 1, step=1, \
1217
- help="batch_size (int, default = 1) - batch_size>1 will make EasyOCR faster \
1218
- but use more memory.")
1219
- t0_workers = st.slider('workers', 0, 10, 0, step=1, \
1220
- help="workers (int, default = 0) - Number thread used in of dataloader.")
1221
- t0_allowlist = st.text_input('allowlist', value="", max_chars=None, \
1222
- placeholder='Force EasyOCR to recognize only this subset of characters', \
1223
- help='''allowlist (string) - Force EasyOCR to recognize only subset of characters.\n
1224
- Usefor specific problem (E.g. license plate, etc.)''')
1225
- t0_blocklist = st.text_input('blocklist', value="", max_chars=None, \
1226
- placeholder='Block subset of character (will be ignored if allowlist is given)', \
1227
- help='''blocklist (string) - Block subset of character. This argument will be \
1228
- ignored if allowlist is given.''')
1229
- t0_detail = st.radio('detail', [0, 1], 1, horizontal=True, \
1230
- help="detail (int, default = 1) - Set this to 0 for simple output")
1231
- t0_paragraph = st.radio('paragraph', [True, False], 1, horizontal=True, \
1232
- help='paragraph (bool, default = False) - Combine result into paragraph')
1233
- t0_contrast_ths = st.slider('contrast_ths', 0.05, 1., 0.1, step=0.01, \
1234
- help='''contrast_ths (float, default = 0.1) - Text box with contrast lower than \
1235
- this value will be passed into model 2 times.\n
1236
- Firs with original image and second with contrast adjusted to 'adjust_contrast' value.\n
1237
- The with more confident level will be returned as a result.''')
1238
- t0_adjust_contrast = st.slider('adjust_contrast', 0.1, 1., 0.5, step=0.1, \
1239
- help = 'adjust_contrast (float, default = 0.5) - target contrast level for low \
1240
- contrast text box')
1241
-
1242
- with st.expander("Choose recognition hyperparameters for " + reader_type_list[1], \
 
 
 
1243
  expanded=False):
1244
- t1_rec_algorithm = st.selectbox('rec_algorithm', ['CRNN', 'SVTR_LCNet'], 0, \
1245
- help="Type of recognition algorithm selected. (default=CRNN)")
1246
- t1_rec_batch_num = st.slider('rec_batch_num', 1, 50, step=1, \
1247
- help="When performing recognition, the batchsize of forward images. \
1248
- (default=30)")
1249
- t1_max_text_length = st.slider('max_text_length', 3, 250, 25, step=1, \
1250
- help="The maximum text length that the recognition algorithm can recognize. \
1251
- (default=25)")
1252
- t1_use_space_char = st.radio('use_space_char', [True, False], 0, horizontal=True, \
1253
- help="Whether to recognize spaces. (default=TRUE)")
1254
- t1_drop_score = st.slider('drop_score', 0., 1., 0.25, step=.05, \
1255
- help="Filter the output by score (from the recognition model), and those \
1256
- below this score will not be returned. (default=0.5)")
1257
-
1258
- with st.expander("Choose recognition hyperparameters for " + reader_type_list[2], \
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1259
  expanded=False):
1260
- t2_recog = st.selectbox('recog', ['ABINet','CRNN','CRNN_TPS','MASTER', \
1261
- 'NRTR_1/16-1/8','NRTR_1/8-1/4','RobustScanner','SAR','SAR_CN', \
1262
- 'SATRN','SATRN_sm','SEG','Tesseract'], 7, \
1263
- help='Text recognition algorithm. (default = SAR)')
1264
- st.write("###### *More about text recognition models* πŸ‘‰ \
1265
- [here](https://mmocr.readthedocs.io/en/latest/textrecog_models.html)")
1266
-
1267
- with st.expander("Choose recognition hyperparameters for " + reader_type_list[3], \
 
 
 
1268
  expanded=False):
1269
- t3r_psm = st.selectbox('Page segmentation mode (psm)', \
1270
- [' - Default', \
1271
- ' 4 Assume a single column of text of variable sizes', \
1272
- ' 5 Assume a single uniform block of vertically aligned \
1273
- text', \
1274
- ' 6 Assume a single uniform block of text', \
1275
- ' 7 Treat the image as a single text line', \
1276
- ' 8 Treat the image as a single word', \
1277
- ' 9 Treat the image as a single word in a circle', \
1278
- '10 Treat the image as a single character', \
1279
- '11 Sparse text. Find as much text as possible in no \
1280
  particular order', \
1281
- '13 Raw line. Treat the image as a single text line, \
1282
  bypassing hacks that are Tesseract-specific'])
1283
- t3r_oem = st.selectbox('OCR engine mode', ['0 Legacy engine only', \
1284
- '1 Neural nets LSTM engine only', \
1285
- '2 Legacy + LSTM engines', \
1286
- '3 Default, based on what is available'], 3)
1287
- t3r_whitelist = st.text_input('Limit tesseract to recognize only this \
1288
- characters :', \
1289
- placeholder='Limit tesseract to recognize only this characters', \
1290
- help='Example for numbers only : 0123456789')
1291
-
1292
- submit_reco = st.form_submit_button("Launch recognition")
1293
-
1294
- if submit_reco:
1295
- process_detect.clear()
1296
- ##----------- Process recognition ------------------------------------------
1297
- reader_ind = reader_type_dict[st.session_state.detect_reader]
1298
- list_boxes = list_coordinates[reader_ind]
 
 
 
 
1299
 
1300
  # Construct the config Tesseract parameter
1301
- t3r_config = ''
1302
- psm = t3r_psm[:2]
1303
  if psm != ' -':
1304
- t3r_config += '--psm ' + psm.strip()
1305
- oem = t3r_oem[:1]
1306
  if oem != '3':
1307
- t3r_config += ' --oem ' + oem
1308
- if t3r_whitelist != '':
1309
- t3r_config += ' -c tessedit_char_whitelist=' + t3r_whitelist
1310
-
1311
- list_params_rec = \
1312
- [{'decoder': t0_decoder, 'beamWidth': t0_beamWidth, \
1313
- 'batch_size': t0_batch_size, 'workers': t0_workers, \
1314
- 'allowlist': t0_allowlist, 'blocklist': t0_blocklist, \
1315
- 'detail': t0_detail, 'paragraph': t0_paragraph, \
1316
- 'contrast_ths': t0_contrast_ths, 'adjust_contrast': t0_adjust_contrast
1317
- },
1318
- { **list_params_det[1][1], **{'rec_algorithm': t1_rec_algorithm, \
1319
- 'rec_batch_num': t1_rec_batch_num, 'max_text_length': t1_max_text_length, \
1320
- 'use_space_char': t1_use_space_char, 'drop_score': t1_drop_score}, \
1321
- **{'lang': list_params_det[1][0]}
1322
- },
1323
- {'recog': t2_recog},
1324
- {'lang': tesserocr_lang, 'config': t3r_config}
 
 
 
 
 
1325
  ]
1326
 
1327
- show_info2 = st.empty()
1328
-
1329
- with show_info2.container():
1330
- st.info("Text recognition in progress ...")
1331
- df_results, df_results_tesseract, list_reco_status = \
1332
- process_recog(list_readers, list_images[1], list_boxes, list_params_rec)
1333
- show_info2.empty()
1334
-
1335
- st.session_state.df_results = df_results
1336
- st.session_state.list_boxes = list_boxes
1337
- st.session_state.df_results_tesseract = df_results_tesseract
1338
- st.session_state.list_reco_status = list_reco_status
1339
-
1340
- if 'df_results' in st.session_state:
1341
- if not st.session_state.df_results.empty:
1342
- ##----------- Show recognition results ------------------------------------------------------------
1343
- results_cols = st.session_state.df_results.columns
1344
- list_col_text = np.arange(1, len(cols_size), 2)
1345
- list_col_confid = np.arange(2, len(cols_size), 2)
1346
-
1347
- dict_draw_reco = {'in_image': st.session_state.list_images[1], \
1348
- 'in_boxes_coordinates': st.session_state.list_boxes, \
1349
- 'in_list_texts': [st.session_state.df_results[x].to_list() \
1350
- for x in results_cols[list_col_text]], \
1351
- 'in_list_confid': [st.session_state.df_results[x].to_list() \
1352
- for x in results_cols[list_col_confid]], \
1353
- 'in_dict_back_colors': dict_back_colors, \
1354
- 'in_df_results_tesseract' : st.session_state.df_results_tesseract, \
1355
- 'in_reader_type_list': reader_type_list
1356
- }
1357
- show_reco = st.empty()
1358
-
1359
- with st.form("form3"):
1360
- st.plotly_chart(fig_colorscale, use_container_width=True)
1361
-
1362
- col_font, col_threshold = st.columns(2)
1363
-
1364
- col_font.slider('Font scale', 1, 7, 1, step=1, key="font_scale_sld")
1365
- col_threshold.slider('% confidence threshold for text color change', 40, 100, 64, \
1366
- step=1, key="conf_threshold_sld")
1367
- col_threshold.write("(text color is black below this % confidence threshold, \
1368
- and white above)")
1369
-
1370
- draw_reco_images(**dict_draw_reco)
1371
-
1372
- submit_resize = st.form_submit_button("Refresh")
1373
-
1374
- if submit_resize:
1375
- draw_reco_images(**dict_draw_reco, \
1376
- in_font_scale=st.session_state.font_scale_sld, \
1377
- in_conf_threshold=st.session_state.conf_threshold_sld)
1378
-
1379
- st.subheader("Recognition details")
1380
- with st.expander("Detailed areas for EasyOCR, PPOCR, MMOCR", expanded=True):
1381
- cols = st.columns(cols_size)
1382
- cols[0].markdown('#### Detected area')
1383
- for i in range(1, (len(reader_type_list)-1)*2, 2):
1384
- cols[i].markdown('#### with ' + reader_type_list[i//2])
1385
-
1386
- for row in st.session_state.df_results.itertuples():
1387
- #cols = st.columns(1 + len(reader_type_list)*2)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1388
  cols = st.columns(cols_size)
1389
- cols[0].image(row.cropped_image, width=150)
1390
- for ind_col in range(1, len(cols), 2):
1391
- cols[ind_col].write(getattr(row, results_cols[ind_col]))
1392
- cols[ind_col+1].write("("+str( \
1393
- getattr(row, results_cols[ind_col+1]))+"%)")
1394
-
1395
- st.download_button(
1396
- label="Download results as CSV file",
1397
- data=convert_df(st.session_state.df_results),
1398
- file_name='OCR_comparator_results.csv',
1399
- mime='text/csv',
1400
- )
1401
-
1402
- if not st.session_state.df_results_tesseract.empty:
1403
- with st.expander("Detailed areas for Tesseract", expanded=False):
1404
- cols = st.columns([2,2,1])
1405
  cols[0].markdown('#### Detected area')
1406
- cols[1].markdown('#### with Tesseract')
1407
-
1408
- for row in st.session_state.df_results_tesseract.itertuples():
1409
- cols = st.columns([2,2,1])
1410
- cols[0].image(row.cropped, width=150)
1411
- cols[1].write(getattr(row, 'text'))
1412
- cols[2].write("("+str(getattr(row, 'conf'))+"%)")
 
 
 
 
1413
 
1414
  st.download_button(
1415
- label="Download Tesseract results as CSV file",
1416
  data=convert_df(st.session_state.df_results),
1417
- file_name='OCR_comparator_Tesseract_results.csv',
1418
  mime='text/csv',
1419
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
929
  ###################################################################################################
930
  ## MAIN
931
  ###################################################################################################
932
+ def app():
933
+ ##----------- Initializations ---------------------------------------------------------------------
934
+ #print("PID : ", os.getpid())
935
+
936
+ st.title("OCR solutions comparator")
937
+ st.markdown("##### *EasyOCR, PPOCR, MMOCR, Tesseract*")
938
+ #st.markdown("#### PID : " + str(os.getpid()))
939
+
940
+ # Initializations
941
+ with st.spinner("Initializations in progress ..."):
942
+ reader_type_list, reader_type_dict, list_dict_lang, \
943
+ cols_size, dict_back_colors, fig_colorscale = initializations()
944
+ img_demo_1, img_demo_2 = get_demo()
945
+
946
+ ##----------- Choose language & image -------------------------------------------------------------
947
+ st.markdown("#### Choose languages for the text recognition:")
948
+ lang_col = st.columns(4)
949
+ easyocr_key_lang = lang_col[0].selectbox(reader_type_list[0]+" :", list_dict_lang[0].keys(), 26)
950
+ easyocr_lang = list_dict_lang[0][easyocr_key_lang]
951
+ ppocr_key_lang = lang_col[1].selectbox(reader_type_list[1]+" :", list_dict_lang[1].keys(), 22)
952
+ ppocr_lang = list_dict_lang[1][ppocr_key_lang]
953
+ mmocr_key_lang = lang_col[2].selectbox(reader_type_list[2]+" :", list_dict_lang[2].keys(), 0)
954
+ mmocr_lang = list_dict_lang[2][mmocr_key_lang]
955
+ tesserocr_key_lang = lang_col[3].selectbox(reader_type_list[3]+" :", list_dict_lang[3].keys(), 35)
956
+ tesserocr_lang = list_dict_lang[3][tesserocr_key_lang]
957
+
958
+ st.markdown("#### Choose picture:")
959
+ cols_pict = st.columns([1, 2])
960
+ img_typ = cols_pict[0].radio("", ['Upload file', 'Take a picture', 'Use a demo file'], \
961
+ index=0, on_change=raz)
962
+
963
+ if img_typ == 'Upload file':
964
+ image_file = cols_pict[1].file_uploader("Upload a file:", type=["jpg","jpeg"], on_change=raz)
965
+ if img_typ == 'Take a picture':
966
+ image_file = cols_pict[1].camera_input("Take a picture:", on_change=raz)
967
+ if img_typ == 'Use a demo file':
968
+ with st.expander('Choose a demo file:', expanded=True):
969
+ demo_used = st.radio('', ['File 1', 'File 2'], index=0, \
970
+ horizontal=True, on_change=raz)
971
+ cols_demo = st.columns([1, 2])
972
+ cols_demo[0].markdown('###### File 1')
973
+ cols_demo[0].image(img_demo_1, width=150)
974
+ cols_demo[1].markdown('###### File 2')
975
+ cols_demo[1].image(img_demo_2, width=300)
976
+ if demo_used == 'File 1':
977
+ image_file = 'img_demo_1.jpg'
978
+ else:
979
+ image_file = 'img_demo_2.jpg'
980
 
981
+ ##----------- Process input image -----------------------------------------------------------------
982
+ if image_file is not None:
983
+ image_path, image_orig, image_cv2 = load_image(image_file)
984
+ list_images = [image_orig, image_cv2]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
985
 
986
+ ##----------- Form with original image & hyperparameters for detectors ----------------------------
987
+ with st.form("form1"):
988
+ col1, col2 = st.columns(2, ) #gap="medium")
989
+ col1.markdown("##### Original image")
990
+ col1.image(list_images[0], width=500, use_column_width=True)
991
+ col2.markdown("##### Hyperparameters values for detection")
992
 
993
+ with col2.expander("Choose detection hyperparameters for " + reader_type_list[0], \
 
 
 
994
  expanded=False):
995
+ t0_min_size = st.slider("min_size", 1, 20, 10, step=1, \
996
+ help="min_size (int, default = 10) - Filter text box smaller than \
997
+ minimum value in pixel")
998
+ t0_text_threshold = st.slider("text_threshold", 0.1, 1., 0.7, step=0.1, \
999
+ help="text_threshold (float, default = 0.7) - Text confidence threshold")
1000
+ t0_low_text = st.slider("low_text", 0.1, 1., 0.4, step=0.1, \
1001
+ help="low_text (float, default = 0.4) - Text low-bound score")
1002
+ t0_link_threshold = st.slider("link_threshold", 0.1, 1., 0.4, step=0.1, \
1003
+ help="link_threshold (float, default = 0.4) - Link confidence threshold")
1004
+ t0_canvas_size = st.slider("canvas_size", 2000, 5000, 2560, step=10, \
1005
+ help='''canvas_size (int, default = 2560) \n
1006
+ Maximum e size. Image bigger than this value will be resized down''')
1007
+ t0_mag_ratio = st.slider("mag_ratio", 0.1, 5., 1., step=0.1, \
1008
+ help="mag_ratio (float, default = 1) - Image magnification ratio")
1009
+ t0_slope_ths = st.slider("slope_ths", 0.01, 1., 0.1, step=0.01, \
1010
+ help='''slope_ths (float, default = 0.1) - Maximum slope \
1011
+ (delta y/delta x) to considered merging. \n
1012
+ Low valuans tiled boxes will not be merged.''')
1013
+ t0_ycenter_ths = st.slider("ycenter_ths", 0.1, 1., 0.5, step=0.1, \
1014
+ help='''ycenter_ths (float, default = 0.5) - Maximum shift in y direction. \n
1015
+ Boxes wiifferent level should not be merged.''')
1016
+ t0_height_ths = st.slider("height_ths", 0.1, 1., 0.5, step=0.1, \
1017
+ help='''height_ths (float, default = 0.5) - Maximum different in box height. \n
1018
+ Boxes wiery different text size should not be merged.''')
1019
+ t0_width_ths = st.slider("width_ths", 0.1, 1., 0.5, step=0.1, \
1020
+ help="width_ths (float, default = 0.5) - Maximum horizontal \
1021
+ distance to merge boxes.")
1022
+ t0_add_margin = st.slider("add_margin", 0.1, 1., 0.1, step=0.1, \
1023
+ help='''add_margin (float, default = 0.1) - \
1024
+ Extend bounding boxes in all direction by certain value. \n
1025
+ This is rtant for language with complex script (E.g. Thai).''')
1026
+ t0_optimal_num_chars = st.slider("optimal_num_chars", None, 100, None, step=10, \
1027
+ help="optimal_num_chars (int, default = None) - If specified, bounding boxes \
1028
+ with estimated number of characters near this value are returned first.")
1029
+
1030
+ with col2.expander("Choose detection hyperparameters for " + reader_type_list[1], \
1031
  expanded=False):
1032
+ t1_det_algorithm = st.selectbox('det_algorithm', ['DB'], \
1033
+ help='Type of detection algorithm selected. (default = DB)')
1034
+ t1_det_max_side_len = st.slider('det_max_side_len', 500, 2000, 960, step=10, \
1035
+ help='''The maximum size of the long side of the image. (default = 960)\n
1036
+ Limit thximum image height and width.\n
1037
+ When theg side exceeds this value, the long side will be resized to this size, and the short side \
1038
+ will be ed proportionally.''')
1039
+ t1_det_db_thresh = st.slider('det_db_thresh', 0.1, 1., 0.3, step=0.1, \
1040
+ help='''Binarization threshold value of DB output map. (default = 0.3) \n
1041
+ Used to er the binarized image of DB prediction, setting 0.-0.3 has no obvious effect on the result.''')
1042
+ t1_det_db_box_thresh = st.slider('det_db_box_thresh', 0.1, 1., 0.6, step=0.1, \
1043
+ help='''The threshold value of the DB output box. (default = 0.6) \n
1044
+ DB post-essing filter box threshold, if there is a missing box detected, it can be reduced as appropriate. \n
1045
+ Boxes sclower than this value will be discard.''')
1046
+ t1_det_db_unclip_ratio = st.slider('det_db_unclip_ratio', 1., 3.0, 1.6, step=0.1, \
1047
+ help='''The expanded ratio of DB output box. (default = 1.6) \n
1048
+ Indicatee compactness of the text box, the smaller the value, the closer the text box to the text.''')
1049
+ t1_det_east_score_thresh = st.slider('det_east_cover_thresh', 0.1, 1., 0.8, step=0.1, \
1050
+ help="Binarization threshold value of EAST output map. (default = 0.8)")
1051
+ t1_det_east_cover_thresh = st.slider('det_east_cover_thresh', 0.1, 1., 0.1, step=0.1, \
1052
+ help='''The threshold value of the EAST output box. (default = 0.1) \n
1053
+ Boxes sclower than this value will be discarded.''')
1054
+ t1_det_east_nms_thresh = st.slider('det_east_nms_thresh', 0.1, 1., 0.2, step=0.1, \
1055
+ help="The NMS threshold value of EAST model output box. (default = 0.2)")
1056
+ t1_det_db_score_mode = st.selectbox('det_db_score_mode', ['fast', 'slow'], \
1057
+ help='''slow: use polygon box to calculate bbox score, fast: use rectangle box \
1058
+ to calculate. (default = fast) \n
1059
+ Use rectlar box to calculate faster, and polygonal box more accurate for curved text area.''')
1060
+
1061
+ with col2.expander("Choose detection hyperparameters for " + reader_type_list[2], \
1062
  expanded=False):
1063
+ t2_det = st.selectbox('det', ['DB_r18','DB_r50','DBPP_r50','DRRG','FCE_IC15', \
1064
+ 'FCE_CTW_DCNv2','MaskRCNN_CTW','MaskRCNN_IC15', \
1065
+ 'MaskRCNN_IC17', 'PANet_CTW','PANet_IC15','PS_CTW',\
1066
+ 'PS_IC15','Tesseract','TextSnake'], 10, \
1067
+ help='Text detection algorithm. (default = PANet_IC15)')
1068
+ st.write("###### *More about text detection models* πŸ‘‰ \
1069
+ [here](https://mmocr.readthedocs.io/en/latest/textdet_models.html)")
1070
+ t2_merge_xdist = st.slider('merge_xdist', 1, 50, 20, step=1, \
1071
+ help='The maximum x-axis distance to merge boxes. (defaut=20)')
1072
+
1073
+ with col2.expander("Choose detection hyperparameters for " + reader_type_list[3], \
1074
  expanded=False):
1075
+ t3_psm = st.selectbox('Page segmentation mode (psm)', \
1076
+ [' - Default', \
1077
+ ' 4 Assume a single column of text of variable sizes', \
1078
+ ' 5 Assume a single uniform block of vertically aligned text', \
1079
+ ' 6 Assume a single uniform block of text', \
1080
+ ' 7 Treat the image as a single text line', \
1081
+ ' 8 Treat the image as a single word', \
1082
+ ' 9 Treat the image as a single word in a circle', \
1083
+ '10 Treat the image as a single character', \
1084
+ '11 Sparse text. Find as much text as possible in no \
 
1085
  particular order', \
1086
+ '13 Raw line. Treat the image as a single text line, \
1087
  bypassing hacks that are Tesseract-specific'])
1088
+ t3_oem = st.selectbox('OCR engine mode', ['0 Legacy engine only', \
1089
+ '1 Neural nets LSTM engine only', \
1090
+ '2 Legacy + LSTM engines', \
1091
+ '3 Default, based on what is available'], 3)
1092
+ t3_whitelist = st.text_input('Limit tesseract to recognize only this characters :', \
1093
+ placeholder='Limit tesseract to recognize only this characters', \
1094
+ help='Example for numbers only : 0123456789')
1095
+
1096
+ color_hex = col2.color_picker('Set a color for box outlines:', '#004C99')
1097
+ color_part = color_hex.lstrip('#')
1098
+ color = tuple(int(color_part[i:i+2], 16) for i in (0, 2, 4))
1099
+
1100
+ submit_detect = st.form_submit_button("Launch detection")
1101
+
1102
+ ##----------- Process text detection --------------------------------------------------------------
1103
+ if submit_detect:
1104
+ # Process text detection
1105
+
1106
+ if t0_optimal_num_chars == 0:
1107
+ t0_optimal_num_chars = None
1108
 
1109
  # Construct the config Tesseract parameter
1110
+ t3_config = ''
1111
+ psm = t3_psm[:2]
1112
  if psm != ' -':
1113
+ t3_config += '--psm ' + psm.strip()
1114
+ oem = t3_oem[:1]
1115
  if oem != '3':
1116
+ t3_config += ' --oem ' + oem
1117
+ if t3_whitelist != '':
1118
+ t3_config += ' -c tessedit_char_whitelist=' + t3_whitelist
1119
+
1120
+ list_params_det = \
1121
+ [[easyocr_lang, \
1122
+ {'min_size': t0_min_size, 'text_threshold': t0_text_threshold, \
1123
+ 'low_text': t0_low_text, 'link_threshold': t0_link_threshold, \
1124
+ 'canvas_size': t0_canvas_size, 'mag_ratio': t0_mag_ratio, \
1125
+ 'slope_ths': t0_slope_ths, 'ycenter_ths': t0_ycenter_ths, \
1126
+ 'height_ths': t0_height_ths, 'width_ths': t0_width_ths, \
1127
+ 'add_margin': t0_add_margin, 'optimal_num_chars': t0_optimal_num_chars \
1128
+ }], \
1129
+ [ppocr_lang, \
1130
+ {'det_algorithm': t1_det_algorithm, 'det_max_side_len': t1_det_max_side_len, \
1131
+ 'det_db_thresh': t1_det_db_thresh, 'det_db_box_thresh': t1_det_db_box_thresh, \
1132
+ 'det_db_unclip_ratio': t1_det_db_unclip_ratio, \
1133
+ 'det_east_score_thresh': t1_det_east_score_thresh, \
1134
+ 'det_east_cover_thresh': t1_det_east_cover_thresh, \
1135
+ 'det_east_nms_thresh': t1_det_east_nms_thresh, \
1136
+ 'det_db_score_mode': t1_det_db_score_mode}],
1137
+ [mmocr_lang, {'det': t2_det, 'merge_xdist': t2_merge_xdist}],
1138
+ [tesserocr_lang, {'lang': tesserocr_lang, 'config': t3_config}]
1139
  ]
1140
 
1141
+ show_info1 = st.empty()
1142
+ show_info1.info("Readers initializations in progress (it may take a while) ...")
1143
+ list_readers = init_readers(list_params_det)
1144
+
1145
+ show_info1.info("Text detection in progress ...")
1146
+ list_images, list_coordinates = process_detect(image_path, list_images, list_readers, \
1147
+ list_params_det, color)
1148
+ show_info1.empty()
1149
+
1150
+ # Clear previous recognition results
1151
+ st.session_state.df_results = pd.DataFrame([])
1152
+
1153
+ st.session_state.list_readers = list_readers
1154
+ st.session_state.list_coordinates = list_coordinates
1155
+ st.session_state.list_images = list_images
1156
+ st.session_state.list_params_det = list_params_det
1157
+
1158
+ if 'columns_size' not in st.session_state:
1159
+ st.session_state.columns_size = [2] + [1 for x in reader_type_list[1:]]
1160
+ if 'column_width' not in st.session_state:
1161
+ st.session_state.column_width = [500] + [400 for x in reader_type_list[1:]]
1162
+ if 'columns_color' not in st.session_state:
1163
+ st.session_state.columns_color = ["rgb(228,26,28)"] + \
1164
+ ["rgb(0,0,0)" for x in reader_type_list[1:]]
1165
+
1166
+ if st.session_state.list_coordinates:
1167
+ list_coordinates = st.session_state.list_coordinates
1168
+ list_images = st.session_state.list_images
1169
+ list_readers = st.session_state.list_readers
1170
+ list_params_det = st.session_state.list_params_det
1171
+
1172
+ ##----------- Text detection results --------------------------------------------------------------
1173
+ st.subheader("Text detection")
1174
+ show_detect = st.empty()
1175
+ list_ok_detect = []
1176
+ with show_detect.container():
1177
+ columns = st.columns(st.session_state.columns_size, ) #gap='medium')
1178
+ for no_col, col in enumerate(columns):
1179
+ column_title = '<p style="font-size: 20px;color:' + \
1180
+ st.session_state.columns_color[no_col] + \
1181
+ ';">Detection with ' + reader_type_list[no_col]+ '</p>'
1182
+ col.markdown(column_title, unsafe_allow_html=True)
1183
+ if isinstance(list_images[no_col+2], PIL.Image.Image):
1184
+ col.image(list_images[no_col+2], width=st.session_state.column_width[no_col], \
1185
+ use_column_width=True)
1186
+ list_ok_detect.append(reader_type_list[no_col])
1187
+ else:
1188
+ col.write(list_images[no_col+2], use_column_width=True)
1189
+
1190
+ st.subheader("Text recognition")
1191
+
1192
+ st.markdown("##### Using detection performed above by:")
1193
+ st.radio('Choose the detecter:', list_ok_detect, key='detect_reader', \
1194
+ horizontal=True, on_change=highlight)
1195
+
1196
+ ##----------- Form with hyperparameters for recognition -----------------------
1197
+ st.markdown("##### Hyperparameters values for recognition:")
1198
+ with st.form("form2"):
1199
+ with st.expander("Choose recognition hyperparameters for " + reader_type_list[0], \
1200
+ expanded=False):
1201
+ t0_decoder = st.selectbox('decoder', ['greedy', 'beamsearch', 'wordbeamsearch'], \
1202
+ help="decoder (string, default = 'greedy') - options are 'greedy', \
1203
+ 'beamsearch' and 'wordbeamsearch.")
1204
+ t0_beamWidth = st.slider('beamWidth', 2, 20, 5, step=1, \
1205
+ help="beamWidth (int, default = 5) - How many beam to keep when decoder = \
1206
+ 'beamsearch' or 'wordbeamsearch'.")
1207
+ t0_batch_size = st.slider('batch_size', 1, 10, 1, step=1, \
1208
+ help="batch_size (int, default = 1) - batch_size>1 will make EasyOCR faster \
1209
+ but use more memory.")
1210
+ t0_workers = st.slider('workers', 0, 10, 0, step=1, \
1211
+ help="workers (int, default = 0) - Number thread used in of dataloader.")
1212
+ t0_allowlist = st.text_input('allowlist', value="", max_chars=None, \
1213
+ placeholder='Force EasyOCR to recognize only this subset of characters', \
1214
+ help='''allowlist (string) - Force EasyOCR to recognize only subset of characters.\n
1215
+ Usefor specific problem (E.g. license plate, etc.)''')
1216
+ t0_blocklist = st.text_input('blocklist', value="", max_chars=None, \
1217
+ placeholder='Block subset of character (will be ignored if allowlist is given)', \
1218
+ help='''blocklist (string) - Block subset of character. This argument will be \
1219
+ ignored if allowlist is given.''')
1220
+ t0_detail = st.radio('detail', [0, 1], 1, horizontal=True, \
1221
+ help="detail (int, default = 1) - Set this to 0 for simple output")
1222
+ t0_paragraph = st.radio('paragraph', [True, False], 1, horizontal=True, \
1223
+ help='paragraph (bool, default = False) - Combine result into paragraph')
1224
+ t0_contrast_ths = st.slider('contrast_ths', 0.05, 1., 0.1, step=0.01, \
1225
+ help='''contrast_ths (float, default = 0.1) - Text box with contrast lower than \
1226
+ this value will be passed into model 2 times.\n
1227
+ Firs with original image and second with contrast adjusted to 'adjust_contrast' value.\n
1228
+ The with more confident level will be returned as a result.''')
1229
+ t0_adjust_contrast = st.slider('adjust_contrast', 0.1, 1., 0.5, step=0.1, \
1230
+ help = 'adjust_contrast (float, default = 0.5) - target contrast level for low \
1231
+ contrast text box')
1232
+
1233
+ with st.expander("Choose recognition hyperparameters for " + reader_type_list[1], \
1234
+ expanded=False):
1235
+ t1_rec_algorithm = st.selectbox('rec_algorithm', ['CRNN', 'SVTR_LCNet'], 0, \
1236
+ help="Type of recognition algorithm selected. (default=CRNN)")
1237
+ t1_rec_batch_num = st.slider('rec_batch_num', 1, 50, step=1, \
1238
+ help="When performing recognition, the batchsize of forward images. \
1239
+ (default=30)")
1240
+ t1_max_text_length = st.slider('max_text_length', 3, 250, 25, step=1, \
1241
+ help="The maximum text length that the recognition algorithm can recognize. \
1242
+ (default=25)")
1243
+ t1_use_space_char = st.radio('use_space_char', [True, False], 0, horizontal=True, \
1244
+ help="Whether to recognize spaces. (default=TRUE)")
1245
+ t1_drop_score = st.slider('drop_score', 0., 1., 0.25, step=.05, \
1246
+ help="Filter the output by score (from the recognition model), and those \
1247
+ below this score will not be returned. (default=0.5)")
1248
+
1249
+ with st.expander("Choose recognition hyperparameters for " + reader_type_list[2], \
1250
+ expanded=False):
1251
+ t2_recog = st.selectbox('recog', ['ABINet','CRNN','CRNN_TPS','MASTER', \
1252
+ 'NRTR_1/16-1/8','NRTR_1/8-1/4','RobustScanner','SAR','SAR_CN', \
1253
+ 'SATRN','SATRN_sm','SEG','Tesseract'], 7, \
1254
+ help='Text recognition algorithm. (default = SAR)')
1255
+ st.write("###### *More about text recognition models* πŸ‘‰ \
1256
+ [here](https://mmocr.readthedocs.io/en/latest/textrecog_models.html)")
1257
+
1258
+ with st.expander("Choose recognition hyperparameters for " + reader_type_list[3], \
1259
+ expanded=False):
1260
+ t3r_psm = st.selectbox('Page segmentation mode (psm)', \
1261
+ [' - Default', \
1262
+ ' 4 Assume a single column of text of variable sizes', \
1263
+ ' 5 Assume a single uniform block of vertically aligned \
1264
+ text', \
1265
+ ' 6 Assume a single uniform block of text', \
1266
+ ' 7 Treat the image as a single text line', \
1267
+ ' 8 Treat the image as a single word', \
1268
+ ' 9 Treat the image as a single word in a circle', \
1269
+ '10 Treat the image as a single character', \
1270
+ '11 Sparse text. Find as much text as possible in no \
1271
+ particular order', \
1272
+ '13 Raw line. Treat the image as a single text line, \
1273
+ bypassing hacks that are Tesseract-specific'])
1274
+ t3r_oem = st.selectbox('OCR engine mode', ['0 Legacy engine only', \
1275
+ '1 Neural nets LSTM engine only', \
1276
+ '2 Legacy + LSTM engines', \
1277
+ '3 Default, based on what is available'], 3)
1278
+ t3r_whitelist = st.text_input('Limit tesseract to recognize only this \
1279
+ characters :', \
1280
+ placeholder='Limit tesseract to recognize only this characters', \
1281
+ help='Example for numbers only : 0123456789')
1282
+
1283
+ submit_reco = st.form_submit_button("Launch recognition")
1284
+
1285
+ if submit_reco:
1286
+ process_detect.clear()
1287
+ ##----------- Process recognition ------------------------------------------
1288
+ reader_ind = reader_type_dict[st.session_state.detect_reader]
1289
+ list_boxes = list_coordinates[reader_ind]
1290
+
1291
+ # Construct the config Tesseract parameter
1292
+ t3r_config = ''
1293
+ psm = t3r_psm[:2]
1294
+ if psm != ' -':
1295
+ t3r_config += '--psm ' + psm.strip()
1296
+ oem = t3r_oem[:1]
1297
+ if oem != '3':
1298
+ t3r_config += ' --oem ' + oem
1299
+ if t3r_whitelist != '':
1300
+ t3r_config += ' -c tessedit_char_whitelist=' + t3r_whitelist
1301
+
1302
+ list_params_rec = \
1303
+ [{'decoder': t0_decoder, 'beamWidth': t0_beamWidth, \
1304
+ 'batch_size': t0_batch_size, 'workers': t0_workers, \
1305
+ 'allowlist': t0_allowlist, 'blocklist': t0_blocklist, \
1306
+ 'detail': t0_detail, 'paragraph': t0_paragraph, \
1307
+ 'contrast_ths': t0_contrast_ths, 'adjust_contrast': t0_adjust_contrast
1308
+ },
1309
+ { **list_params_det[1][1], **{'rec_algorithm': t1_rec_algorithm, \
1310
+ 'rec_batch_num': t1_rec_batch_num, 'max_text_length': t1_max_text_length, \
1311
+ 'use_space_char': t1_use_space_char, 'drop_score': t1_drop_score}, \
1312
+ **{'lang': list_params_det[1][0]}
1313
+ },
1314
+ {'recog': t2_recog},
1315
+ {'lang': tesserocr_lang, 'config': t3r_config}
1316
+ ]
1317
+
1318
+ show_info2 = st.empty()
1319
+
1320
+ with show_info2.container():
1321
+ st.info("Text recognition in progress ...")
1322
+ df_results, df_results_tesseract, list_reco_status = \
1323
+ process_recog(list_readers, list_images[1], list_boxes, list_params_rec)
1324
+ show_info2.empty()
1325
+
1326
+ st.session_state.df_results = df_results
1327
+ st.session_state.list_boxes = list_boxes
1328
+ st.session_state.df_results_tesseract = df_results_tesseract
1329
+ st.session_state.list_reco_status = list_reco_status
1330
+
1331
+ if 'df_results' in st.session_state:
1332
+ if not st.session_state.df_results.empty:
1333
+ ##----------- Show recognition results ------------------------------------------------------------
1334
+ results_cols = st.session_state.df_results.columns
1335
+ list_col_text = np.arange(1, len(cols_size), 2)
1336
+ list_col_confid = np.arange(2, len(cols_size), 2)
1337
+
1338
+ dict_draw_reco = {'in_image': st.session_state.list_images[1], \
1339
+ 'in_boxes_coordinates': st.session_state.list_boxes, \
1340
+ 'in_list_texts': [st.session_state.df_results[x].to_list() \
1341
+ for x in results_cols[list_col_text]], \
1342
+ 'in_list_confid': [st.session_state.df_results[x].to_list() \
1343
+ for x in results_cols[list_col_confid]], \
1344
+ 'in_dict_back_colors': dict_back_colors, \
1345
+ 'in_df_results_tesseract' : st.session_state.df_results_tesseract, \
1346
+ 'in_reader_type_list': reader_type_list
1347
+ }
1348
+ show_reco = st.empty()
1349
+
1350
+ with st.form("form3"):
1351
+ st.plotly_chart(fig_colorscale, use_container_width=True)
1352
+
1353
+ col_font, col_threshold = st.columns(2)
1354
+
1355
+ col_font.slider('Font scale', 1, 7, 1, step=1, key="font_scale_sld")
1356
+ col_threshold.slider('% confidence threshold for text color change', 40, 100, 64, \
1357
+ step=1, key="conf_threshold_sld")
1358
+ col_threshold.write("(text color is black below this % confidence threshold, \
1359
+ and white above)")
1360
+
1361
+ draw_reco_images(**dict_draw_reco)
1362
+
1363
+ submit_resize = st.form_submit_button("Refresh")
1364
+
1365
+ if submit_resize:
1366
+ draw_reco_images(**dict_draw_reco, \
1367
+ in_font_scale=st.session_state.font_scale_sld, \
1368
+ in_conf_threshold=st.session_state.conf_threshold_sld)
1369
+
1370
+ st.subheader("Recognition details")
1371
+ with st.expander("Detailed areas for EasyOCR, PPOCR, MMOCR", expanded=True):
1372
  cols = st.columns(cols_size)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1373
  cols[0].markdown('#### Detected area')
1374
+ for i in range(1, (len(reader_type_list)-1)*2, 2):
1375
+ cols[i].markdown('#### with ' + reader_type_list[i//2])
1376
+
1377
+ for row in st.session_state.df_results.itertuples():
1378
+ #cols = st.columns(1 + len(reader_type_list)*2)
1379
+ cols = st.columns(cols_size)
1380
+ cols[0].image(row.cropped_image, width=150)
1381
+ for ind_col in range(1, len(cols), 2):
1382
+ cols[ind_col].write(getattr(row, results_cols[ind_col]))
1383
+ cols[ind_col+1].write("("+str( \
1384
+ getattr(row, results_cols[ind_col+1]))+"%)")
1385
 
1386
  st.download_button(
1387
+ label="Download results as CSV file",
1388
  data=convert_df(st.session_state.df_results),
1389
+ file_name='OCR_comparator_results.csv',
1390
  mime='text/csv',
1391
  )
1392
+
1393
+ if not st.session_state.df_results_tesseract.empty:
1394
+ with st.expander("Detailed areas for Tesseract", expanded=False):
1395
+ cols = st.columns([2,2,1])
1396
+ cols[0].markdown('#### Detected area')
1397
+ cols[1].markdown('#### with Tesseract')
1398
+
1399
+ for row in st.session_state.df_results_tesseract.itertuples():
1400
+ cols = st.columns([2,2,1])
1401
+ cols[0].image(row.cropped, width=150)
1402
+ cols[1].write(getattr(row, 'text'))
1403
+ cols[2].write("("+str(getattr(row, 'conf'))+"%)")
1404
+
1405
+ st.download_button(
1406
+ label="Download Tesseract results as CSV file",
1407
+ data=convert_df(st.session_state.df_results),
1408
+ file_name='OCR_comparator_Tesseract_results.csv',
1409
+ mime='text/csv',
1410
+ )
multipage.py ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ This file is the framework for generating multiple Streamlit applications
3
+ through an object oriented framework.
4
+
5
+ Source: https://huggingface.co/spaces/deepset/wikipedia-assistant/tree/main
6
+ """
7
+
8
+ # Import necessary libraries
9
+ import streamlit as st
10
+ from streamlit_option_menu import option_menu
11
+
12
+
13
+ # Define the multipage class to manage the multiple apps in our program
14
+ class MultiPage:
15
+ """Framework for combining multiple streamlit applications."""
16
+
17
+ def __init__(self) -> None:
18
+ """Constructor class to generate a list which will store all our applications as an instance variable."""
19
+ self.pages = []
20
+
21
+ def add_page(self, title, icon, func) -> None:
22
+ """Class Method to Add pages to the project
23
+
24
+ Args:
25
+ title ([str]): The title of page which we are adding to the list of apps
26
+
27
+ func: Python function to render this page in Streamlit
28
+ """
29
+
30
+ self.pages.append(
31
+ {
32
+ "title": title,
33
+ "icon": icon,
34
+ "function": func
35
+ }
36
+ )
37
+
38
+ def run(self):
39
+ # Drodown to select the page to run
40
+ st.markdown("""
41
+ <style>
42
+ section[data-testid="stSidebar"] > div:first-of-type {
43
+ background-color: var(--secondary-background-color);
44
+ background: var(--secondary-background-color);
45
+ width: 250px;
46
+ padding: 4rem 0;
47
+ box-shadow: -2rem 0px 2rem 2rem rgba(0,0,0,0.16);
48
+ }
49
+ section[aria-expanded="true"] > div:nth-of-type(2) {
50
+ display: none;
51
+ }
52
+ .main > div:first-of-type {
53
+ padding: 1rem 0;
54
+ }
55
+ </style>
56
+ """, unsafe_allow_html=True)
57
+
58
+ with st.sidebar:
59
+ selected = option_menu("OCR Comparator",
60
+ [page["title"] for page in self.pages],
61
+ icons=[page["icon"] for page in self.pages],
62
+ menu_icon="app-indicator", default_index=0)
63
+
64
+ # Run the selected page
65
+ for index, item in enumerate(self.pages):
66
+ if item["title"] == selected:
67
+ self.pages[index]["function"]()
68
+ break
pages/About.py DELETED
@@ -1,37 +0,0 @@
1
- import streamlit as st
2
-
3
- st.set_page_config(page_title='OCR Comparator', layout ="wide")
4
- st.title("OCR solutions comparator")
5
-
6
- st.write("")
7
- st.write("")
8
- st.write("")
9
-
10
- st.markdown("##### This app allows you to compare, from a given picture, the results of different solutions:")
11
- st.markdown("##### *EasyOcr, PaddleOCR, MMOCR, Tesseract*")
12
- st.write("")
13
- st.write("")
14
-
15
- st.markdown(''' The 1st step is to choose the language for the text recognition (not all solutions \
16
- support the same languages), and then choose the picture to consider. It is possible to upload a file, \
17
- to take a picture, or to use a demo file. \
18
- It is then possible to change the default values for the text area detection process, \
19
- before launching the detection task for each solution.''')
20
- st.write("")
21
-
22
- st.markdown(''' The different results are then presented. The 2nd step is to choose one of these \
23
- detection results, in order to carry out the text recognition process there. It is also possible to change \
24
- the default settings for each solution.''')
25
- st.write("")
26
-
27
- st.markdown("###### The recognition results appear in 2 formats:")
28
- st.markdown(''' - a visual format resumes the initial image, replacing the detected areas with \
29
- the recognized text. The background is + or - strongly colored in green according to the \
30
- confidence level of the recognition.
31
- A slider allows you to change the font size, another \
32
- allows you to modify the confidence threshold above which the text color changes: if it is at \
33
- 70% for example, then all the texts with a confidence threshold higher or equal to 70 will appear \
34
- in white, in black otherwise.''')
35
-
36
- st.markdown(" - a detailed format presents the results in a table, for each text box detected. \
37
- It is possible to download this results in a local csv file.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
requirements.txt CHANGED
@@ -13,4 +13,5 @@ paddlepaddle==2.3.2
13
  mycolorpy==1.5.1
14
  plotly==5.10.0
15
  plotly-express==0.4.1
16
- pytesseract==0.3.10
 
 
13
  mycolorpy==1.5.1
14
  plotly==5.10.0
15
  plotly-express==0.4.1
16
+ pytesseract==0.3.10
17
+ streamlit_option_menu