Spaces:

wb-droid
/

SpellingTester

Sleeping

App Files Files Community

SpellingTester / app.py

wb-droid

First commit.

e37c35c about 1 year ago

raw

history blame contribute delete

2.87 kB

	import gradio as gr
	from paddleocr import PaddleOCR, draw_ocr
	import asyncio
	import edge_tts

	def image2Text(image:str, langChoice:str):
	ocr = PaddleOCR(use_angle_cls=True, lang=langChoice) # need to run only once to download and load model into memory
	img_path = image
	result = ocr.ocr(img_path, cls=True)
	text = ""
	for idx in range(len(result)):
	res = result[idx]
	for line in res:
	import re
	# remove pinyin if it's Chinese
	if langChoice=="ch":
	#t = re.sub('[a-z0-9.]', '', line[1][0])
	t = re.sub('[a-z]', '', line[1][0])
	t = re.sub('[0-9]\.', '', t)
	t = t.replace(" ", "")
	t = t.replace("（）", "")
	t = t.replace("（)", "")
	t = t.replace("( )", "")
	t = t.replace("()", "")
	if t!="":
	text +=((t) + "\n")
	else:
	t = line[1][0]
	t = re.sub('Term [0-9] Spelling', '', t)
	t = re.sub('Page [0-9]', '', t)
	if t!="":
	text += (t + "\n")
	return text


	def text2Voice(recognized_text, langChoice:str):
	async def amain() -> None:
	"""Main function"""
	communicate = edge_tts.Communicate(TEXT, VOICE)
	await communicate.save(OUTPUT_FILE)
	TEXT = recognized_text
	if langChoice == "ch":
	VOICE = "zh-CN-YunxiaNeural"
	else:
	VOICE = "en-GB-LibbyNeural"
	OUTPUT_FILE = "voice.mp3"
	#await amain()
	asyncio.run(amain())
	return "voice.mp3"

	with gr.Blocks() as demo:
	gr.HTML("""<h1 align="center">Spelling Tester</h1>""")

	examples = ['SpellingList3.jpg']

	with gr.Row():
	with gr.Column(scale=1):
	upload_image = gr.Image(height=400,width=400, value = "SpellingTest8.jpg")
	#langDrop = gr.Dropdown(
	# ["ch", "en"], label="Language", info="specify the language will help to increase accuracy.", value = "ch"
	#),
	langChoice = gr.Textbox(value="ch", label="Select lanaguage: 'ch' for Chinese, 'en' for English")
	with gr.Column(scale=3):
	recognized_text = gr.Textbox(show_label=False, placeholder="spelling list", lines=15)
	toText = gr.Button("Convert image to text")

	generated_voice = gr.Audio()
	toVoice = gr.Button("Convert text to voice", variant="primary")


	toText.click(
	image2Text,
	[upload_image, langChoice],
	[recognized_text],
	#show_progress=True,

	)
	toVoice.click(text2Voice, [recognized_text, langChoice], [generated_voice])

	#searchBtn.click(search, inputs=[user_input], outputs=[search_result], show_progress=True)
	demo.queue().launch(share=False, inbrowser=True)