Spelling Tester

import gradio as gr
from paddleocr import PaddleOCR, draw_ocr
import asyncio
import edge_tts

def image2Text(image:str, langChoice:str):
    ocr = PaddleOCR(use_angle_cls=True, lang=langChoice)  # need to run only once to download and load model into memory
    img_path = image
    result = ocr.ocr(img_path, cls=True)
    text = ""
    for idx in range(len(result)):
        res = result[idx]
        for line in res:
            import re
            # remove pinyin if it's Chinese
            if langChoice=="ch":
                #t = re.sub('[a-z0-9.]', '', line[1][0])
                t = re.sub('[a-z]', '', line[1][0])
                t = re.sub('[0-9]\.', '', t)
                t = t.replace(" ", "")
                t = t.replace("（）", "")
                t = t.replace("（)", "")
                t = t.replace("( )", "")
                t = t.replace("()", "")            
                if t!="":
                    text +=((t) + "\n")             
            else:
                t = line[1][0]
                t = re.sub('Term [0-9] Spelling', '', t)
                t = re.sub('Page [0-9]', '', t)
                if t!="":
                    text += (t + "\n")
    return text

    
def text2Voice(recognized_text, langChoice:str):
    async def amain() -> None:
        """Main function"""
        communicate = edge_tts.Communicate(TEXT, VOICE)
        await communicate.save(OUTPUT_FILE)
    TEXT = recognized_text
    if langChoice == "ch":
        VOICE = "zh-CN-YunxiaNeural"
    else:
        VOICE = "en-GB-LibbyNeural"
    OUTPUT_FILE = "voice.mp3"
    #await amain()
    asyncio.run(amain())
    return "voice.mp3"
    
with gr.Blocks() as demo:
    gr.HTML("""<h1 align="center">Spelling Tester</h1>""")

    examples = ['SpellingList3.jpg']    

    with gr.Row():
        with gr.Column(scale=1):
            upload_image = gr.Image(height=400,width=400, value = "SpellingTest8.jpg")
            #langDrop = gr.Dropdown(
            #    ["ch", "en"], label="Language", info="specify the language will help to increase accuracy.", value = "ch"
            #),            
            langChoice = gr.Textbox(value="ch", label="Select lanaguage: 'ch' for Chinese, 'en' for English")
        with gr.Column(scale=3):
            recognized_text = gr.Textbox(show_label=False, placeholder="spelling list", lines=15)
    toText = gr.Button("Convert image to text")

    generated_voice = gr.Audio()
    toVoice = gr.Button("Convert text to voice", variant="primary")

    
    toText.click(
        image2Text,
        [upload_image, langChoice],
        [recognized_text],
        #show_progress=True,

    )
    toVoice.click(text2Voice, [recognized_text, langChoice], [generated_voice])

    #searchBtn.click(search, inputs=[user_input], outputs=[search_result], show_progress=True)
demo.queue().launch(share=False, inbrowser=True)