import gradio as gr from paddleocr import PaddleOCR, draw_ocr import asyncio import edge_tts def image2Text(image:str, langChoice:str): ocr = PaddleOCR(use_angle_cls=True, lang=langChoice) # need to run only once to download and load model into memory img_path = image result = ocr.ocr(img_path, cls=True) text = "" for idx in range(len(result)): res = result[idx] for line in res: import re # remove pinyin if it's Chinese if langChoice=="ch": #t = re.sub('[a-z0-9.]', '', line[1][0]) t = re.sub('[a-z]', '', line[1][0]) t = re.sub('[0-9]\.', '', t) t = t.replace(" ", "") t = t.replace("()", "") t = t.replace("()", "") t = t.replace("( )", "") t = t.replace("()", "") if t!="": text +=((t) + "\n") else: t = line[1][0] t = re.sub('Term [0-9] Spelling', '', t) t = re.sub('Page [0-9]', '', t) if t!="": text += (t + "\n") return text def text2Voice(recognized_text, langChoice:str): async def amain() -> None: """Main function""" communicate = edge_tts.Communicate(TEXT, VOICE) await communicate.save(OUTPUT_FILE) TEXT = recognized_text if langChoice == "ch": VOICE = "zh-CN-YunxiaNeural" else: VOICE = "en-GB-LibbyNeural" OUTPUT_FILE = "voice.mp3" #await amain() asyncio.run(amain()) return "voice.mp3" with gr.Blocks() as demo: gr.HTML("""

Spelling Tester

""") examples = ['SpellingList3.jpg'] with gr.Row(): with gr.Column(scale=1): upload_image = gr.Image(height=400,width=400, value = "SpellingTest8.jpg") #langDrop = gr.Dropdown( # ["ch", "en"], label="Language", info="specify the language will help to increase accuracy.", value = "ch" #), langChoice = gr.Textbox(value="ch", label="Select lanaguage: 'ch' for Chinese, 'en' for English") with gr.Column(scale=3): recognized_text = gr.Textbox(show_label=False, placeholder="spelling list", lines=15) toText = gr.Button("Convert image to text") generated_voice = gr.Audio() toVoice = gr.Button("Convert text to voice", variant="primary") toText.click( image2Text, [upload_image, langChoice], [recognized_text], #show_progress=True, ) toVoice.click(text2Voice, [recognized_text, langChoice], [generated_voice]) #searchBtn.click(search, inputs=[user_input], outputs=[search_result], show_progress=True) demo.queue().launch(share=False, inbrowser=True)