Spaces:
				
			
			
	
			
			
		Sleeping
		
	
	
	
			
			
	
	
	
	
		
		
		Sleeping
		
	| import gradio as gr | |
| from paddleocr import PaddleOCR, draw_ocr | |
| import asyncio | |
| import edge_tts | |
| def image2Text(image:str, langChoice:str): | |
| ocr = PaddleOCR(use_angle_cls=True, lang=langChoice) # need to run only once to download and load model into memory | |
| img_path = image | |
| result = ocr.ocr(img_path, cls=True) | |
| text = "" | |
| for idx in range(len(result)): | |
| res = result[idx] | |
| for line in res: | |
| import re | |
| # remove pinyin if it's Chinese | |
| if langChoice=="ch": | |
| #t = re.sub('[a-z0-9.]', '', line[1][0]) | |
| t = re.sub('[a-z]', '', line[1][0]) | |
| t = re.sub('[0-9]\.', '', t) | |
| t = t.replace(" ", "") | |
| t = t.replace("()", "") | |
| t = t.replace("()", "") | |
| t = t.replace("( )", "") | |
| t = t.replace("()", "") | |
| if t!="": | |
| text +=((t) + "\n") | |
| else: | |
| t = line[1][0] | |
| t = re.sub('Term [0-9] Spelling', '', t) | |
| t = re.sub('Page [0-9]', '', t) | |
| if t!="": | |
| text += (t + "\n") | |
| return text | |
| def text2Voice(recognized_text, langChoice:str): | |
| async def amain() -> None: | |
| """Main function""" | |
| communicate = edge_tts.Communicate(TEXT, VOICE) | |
| await communicate.save(OUTPUT_FILE) | |
| TEXT = recognized_text | |
| if langChoice == "ch": | |
| VOICE = "zh-CN-YunxiaNeural" | |
| else: | |
| VOICE = "en-GB-LibbyNeural" | |
| OUTPUT_FILE = "voice.mp3" | |
| #await amain() | |
| asyncio.run(amain()) | |
| return "voice.mp3" | |
| with gr.Blocks() as demo: | |
| gr.HTML("""<h1 align="center">Spelling Tester</h1>""") | |
| examples = ['SpellingList3.jpg'] | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| upload_image = gr.Image(height=400,width=400, value = "SpellingTest8.jpg") | |
| #langDrop = gr.Dropdown( | |
| # ["ch", "en"], label="Language", info="specify the language will help to increase accuracy.", value = "ch" | |
| #), | |
| langChoice = gr.Textbox(value="ch", label="Select lanaguage: 'ch' for Chinese, 'en' for English") | |
| with gr.Column(scale=3): | |
| recognized_text = gr.Textbox(show_label=False, placeholder="spelling list", lines=15) | |
| toText = gr.Button("Convert image to text") | |
| generated_voice = gr.Audio() | |
| toVoice = gr.Button("Convert text to voice", variant="primary") | |
| toText.click( | |
| image2Text, | |
| [upload_image, langChoice], | |
| [recognized_text], | |
| #show_progress=True, | |
| ) | |
| toVoice.click(text2Voice, [recognized_text, langChoice], [generated_voice]) | |
| #searchBtn.click(search, inputs=[user_input], outputs=[search_result], show_progress=True) | |
| demo.queue().launch(share=False, inbrowser=True) |