import numpy as np
from io import BytesIO
import matplotlib.font_manager as mfm
from matplotlib import mathtext
import pandas as pd
import gradio as gr
from cnocr import CnOcr
from PIL import Image
from pix2tex.cli import LatexOCR
from pix2tex.utils.utils import post_process

from paddleocr import PPStructure#,draw_structure_result,save_structure_res


def table_text(img):
    table_engine = PPStructure(layout=False, show_log=True)

    result = table_engine(img)
   
    im_show = pd.read_html(result[0]['res']['html'])[0]
    #print(im_show)
    return im_show 
def pix_text(x):
    x = Image.fromarray(x)
    model = LatexOCR()
    out = model(x)
    out = '$'+out+'$'
    
    #out = re.compile(r'\left').sub('', out)
    #out = re.compile(r'\right').sub('', out)
    return out 
    
def merge_strings(lst):  
    indices = []
    for num,char in enumerate(lst):
        if char[-1] == '。' or char[-1] == '!' or char[-1] == '.':
            indices.append(num+1)
    groups = [' '.join(lst[i:j]) for i, j in zip([0]+indices, indices+[None])]
    return groups   
    
def ocr_image(x,det_model,rec_model):
    text = []
    ocr = CnOcr(det_model_name=det_model,rec_model_name=rec_model)  
    out = ocr.ocr(x)
    out = list(filter(None, [i['text'] for i in out]))
    out = merge_strings(out)
    text = ' '.join([j for j in out])
    return text

with gr.Blocks() as demo:
    gr.Markdown("Product identification")

    with gr.Tab("Text product extraction"):
        with gr.Row():
            det_model_text = gr.Dropdown(["db_shufflenet_v2","db_shufflenet_v2_tiny","db_mobilenet_v3","db_resnet34","db_resnet18","ch_PP-OCRv3_det",
                     "ch_PP-OCRv2_det","en_PP-OCRv3_det"],value='ch_PP-OCRv3_det' ,interactive=True,
                     label="detection models")
            rec_model_text = gr.Dropdown(["densenet_lite_114-fc","densenet_lite_134-fc","densenet_lite_136-fc","densenet_lite_134-gru",
                                     "densenet_lite_136-gru",
                                 "ch_PP-OCRv3","ch_ppocr_mobile_v2.0","en_PP-OCRv3","en_number_mobile_v2.0","chinese_cht_PP-OCRv3"], 
                     value = "densenet_lite_136-gru",label="recognition model")   
        with gr.Row():
            with gr.Column(min_width=300):
                image_input = gr.Image(label='product to be recognized')
            with gr.Column(scale=1, min_width=300):
                image_output = gr.Textbox(lines=8,label='Identification results').style(show_copy_button=True)
        with gr.Row():
            clear_button = gr.ClearButton(value='clean')
            image_button = gr.Button("recognize")
        

    with gr.Tab("Prodcut table extracto"):
        with gr.Row():
            table_input = gr.Image(label='Prodcut')
            #table_output = gr.File()
        table_output = gr.Dataframe(datatype=["str", "number"],row_count=5,
                col_count=5,label='Result'
                                   )
        with gr.Row():            
            clear_button3 = gr.ClearButton(value='Clear')
            table_button = gr.Button("Extract")

    
    image_button.click(ocr_image, inputs=[image_input,det_model_text,rec_model_text], outputs=image_output)
    clear_button.add([image_input,image_output])
    
    
    table_button.click(table_text, inputs=table_input, outputs=table_output)
    clear_button3.add([table_input,table_output])
demo.launch()