Loren commited on
Commit
409fb43
1 Parent(s): 09378f5

Update app_pages/ocr_comparator.py

Browse files
Files changed (1) hide show
  1. app_pages/ocr_comparator.py +39 -39
app_pages/ocr_comparator.py CHANGED
@@ -620,8 +620,8 @@ def app():
620
  # Recognize with Tesseract
621
  with st.spinner('Tesseract Text recognition in progress ...'):
622
  out_df_results_tesseract, status_tesseract = \
623
- #tesserocr_recog(in_image_cv, in_list_dict_params[3], len(list_cropped_images))
624
  tesserocr_recog(in_image_cv, in_list_dict_params[2], len(list_cropped_images))
 
625
  ##
626
 
627
  # Create results data frame
@@ -757,44 +757,44 @@ def app():
757
  #
758
  # return out_list_text_mmocr, out_list_confidence_mmocr, out_status
759
  #
760
- ####
761
- #@st.experimental_memo(suppress_st_warning=True, show_spinner=False)
762
- #def tesserocr_recog(in_img, in_params, in_nb_images):
763
- # """Recognition with Tesseract
764
- #
765
- # Args:
766
- # in_image_cv (matrix) : original image
767
- # in_params (dict) : parameters for recognition
768
- # in_nb_images : nb cropped images (used for progress bar)
769
- #
770
- # Returns:
771
- # Pandas data frame : recognition results
772
- # string/Exception : recognition status
773
- # """
774
- # ## ------- Tesseract Text recognition
775
- # step = 3*in_nb_images # fourth recognition process
776
- # nb_steps = 4 * in_nb_images
777
- # progress_bar = st.progress(step/nb_steps)
778
- #
779
- # try:
780
- # out_df_result = pytesseract.image_to_data(in_img, **in_params,output_type=Output.DATAFRAME)
781
- #
782
- # out_df_result['box'] = out_df_result.apply(lambda d: [[d['left'], d['top']], \
783
- # [d['left'] + d['width'], d['top']], \
784
- # [d['left']+d['width'], d['top']+d['height']], \
785
- # [d['left'], d['top'] + d['height']], \
786
- # ], axis=1)
787
- # out_df_result['cropped'] = out_df_result['box'].apply(lambda b: cropped_1box(b, in_img))
788
- # out_df_result = out_df_result[(out_df_result.word_num > 0) & (out_df_result.text != ' ')] \
789
- # .reset_index(drop=True)
790
- # out_status = 'OK'
791
- # except Exception as e:
792
- # out_df_result = pd.DataFrame([])
793
- # out_status = e
794
- #
795
- # progress_bar.progress(1.)
796
- #
797
- # return out_df_result, out_status
798
 
799
  ###
800
  def draw_reco_images(in_image, in_boxes_coordinates, in_list_texts, in_list_confid, \
 
620
  # Recognize with Tesseract
621
  with st.spinner('Tesseract Text recognition in progress ...'):
622
  out_df_results_tesseract, status_tesseract = \
 
623
  tesserocr_recog(in_image_cv, in_list_dict_params[2], len(list_cropped_images))
624
+ #tesserocr_recog(in_image_cv, in_list_dict_params[3], len(list_cropped_images))
625
  ##
626
 
627
  # Create results data frame
 
757
  #
758
  # return out_list_text_mmocr, out_list_confidence_mmocr, out_status
759
  #
760
+ ###
761
+ @st.experimental_memo(suppress_st_warning=True, show_spinner=False)
762
+ def tesserocr_recog(in_img, in_params, in_nb_images):
763
+ """Recognition with Tesseract
764
+
765
+ Args:
766
+ in_image_cv (matrix) : original image
767
+ in_params (dict) : parameters for recognition
768
+ in_nb_images : nb cropped images (used for progress bar)
769
+
770
+ Returns:
771
+ Pandas data frame : recognition results
772
+ string/Exception : recognition status
773
+ """
774
+ ## ------- Tesseract Text recognition
775
+ step = 3*in_nb_images # fourth recognition process
776
+ nb_steps = 4 * in_nb_images
777
+ progress_bar = st.progress(step/nb_steps)
778
+
779
+ try:
780
+ out_df_result = pytesseract.image_to_data(in_img, **in_params,output_type=Output.DATAFRAME)
781
+
782
+ out_df_result['box'] = out_df_result.apply(lambda d: [[d['left'], d['top']], \
783
+ [d['left'] + d['width'], d['top']], \
784
+ [d['left']+d['width'], d['top']+d['height']], \
785
+ [d['left'], d['top'] + d['height']], \
786
+ ], axis=1)
787
+ out_df_result['cropped'] = out_df_result['box'].apply(lambda b: cropped_1box(b, in_img))
788
+ out_df_result = out_df_result[(out_df_result.word_num > 0) & (out_df_result.text != ' ')] \
789
+ .reset_index(drop=True)
790
+ out_status = 'OK'
791
+ except Exception as e:
792
+ out_df_result = pd.DataFrame([])
793
+ out_status = e
794
+
795
+ progress_bar.progress(1.)
796
+
797
+ return out_df_result, out_status
798
 
799
  ###
800
  def draw_reco_images(in_image, in_boxes_coordinates, in_list_texts, in_list_confid, \