from paddleocr import PaddleOCR

class OCR:
    def __init__(self):
        self.model_det  = 'saved_model/det'
        self.model_rec  = 'saved_model/rec'
        self.char_dict  = 'saved_model/char_dict.txt'
        self.ocr        = self.paddleocr() 
    
    def paddleocr(self):
        return PaddleOCR(
            det_model_dir       = self.model_det,
            det_db_box_thresh   = 0.6,
            rec_model_dir       = self.model_rec,
            rec_image_shape     = '3, 32, 100',
            rec_char_dict_path  = self.char_dict,
            use_angle_cls       = True,
            show_log            = False
        )
        
    def ocr_image(self, image):
        return self.ocr.ocr(image)
    
    @staticmethod
    def extract_output(result, sorted_boxes=True):
        '''
        Extract the output of paddleocr.
        Args:
            result(list): The output of paddleocr.
            sorted_boxes(bool): Sort the result by xmin
        Returns:
            boxes(list): The list of boxes.
            txts(list): The list of texts.
            scores(list): The list of scores.
        '''
        if sorted_boxes:
            x_min_x_list    = [i[0][-1][0] for i in result]
            id_sorted       = [x_min_x_list.index(x) for x in sorted(x_min_x_list)]
            result          = [result[i] for i in id_sorted]

        boxes   = [line[0] for line in result]
        txts    = [line[1][0] for line in result]
        scores  = [line[1][1] for line in result]
        return boxes, txts, scores