Spaces:

teatwots
/

stt

Runtime error

App Files Files Community

stt / app.py

teatwots

Create app.py

23c65ce verified about 1 month ago

raw

history blame contribute delete

No virus

2.82 kB

	#@markdown Output: Accuracy Score

	import gradio as gr
	import speech_recognition as sr
	from Levenshtein import ratio
	import tempfile
	import numpy as np
	import soundfile as sf
	import pandas as pd

	# Sample dataframe with sentences ordered from easy to hard
	data = {
	"Sentences": [
	"A stitch in time saves nine.",
	"To be or not to be, that is the question.",
	"Five cats were living in safe caves.",
	"Hives give shelter to bees in large caves.",
	"His decision to plant a rose was amazing.",
	"She sells sea shells by the sea shore.",
	"The colorful parrot likes rolling berries.",
	"Time flies like an arrow; fruit flies like a banana.",
	"Good things come to those who wait.",
	"All human beings are born free and equal in dignity and rights."
	]
	}
	df = pd.DataFrame(data)

	def transcribe_audio(file_info):
	r = sr.Recognizer()
	with tempfile.NamedTemporaryFile(delete=True, suffix=".wav") as tmpfile:
	sf.write(tmpfile.name, data=file_info[1], samplerate=44100, format='WAV')
	tmpfile.seek(0)
	with sr.AudioFile(tmpfile.name) as source:
	audio_data = r.record(source)
	try:
	text = r.recognize_google(audio_data)
	return text
	except sr.UnknownValueError:
	return "Could not understand audio"
	except sr.RequestError as e:
	return f"Could not request results; {e}"

	def pronunciation_correction(expected_text, file_info):
	user_spoken_text = transcribe_audio(file_info)
	similarity = ratio(expected_text.lower(), user_spoken_text.lower())
	description = f"{similarity:.2f}"

	if similarity >= 0.9:
	feedback = "Excellent pronunciation!"
	elif similarity >= 0.7:
	feedback = "Good pronunciation!"
	elif similarity >= 0.5:
	feedback = "Needs improvement."
	else:
	feedback = "Poor pronunciation, try to focus more on clarity."

	return feedback, description

	with gr.Blocks() as app:
	with gr.Row():
	sentence_dropdown = gr.Dropdown(choices=df['Sentences'].tolist(), label="Select a Sentence")
	selected_sentence_output = gr.Textbox(label="Selected Text", interactive=False)
	audio_input = gr.Audio(label="Upload Audio File", type="numpy")
	check_pronunciation_button = gr.Button("Check Pronunciation")
	pronunciation_feedback = gr.Textbox(label="Pronunciation Feedback")
	pronunciation_score = gr.Number(label="Pronunciation Accuracy Score: 0 (No Match) ~ 1 (Perfect)")

	sentence_dropdown.change(lambda x: x, inputs=sentence_dropdown, outputs=selected_sentence_output)
	check_pronunciation_button.click(
	pronunciation_correction,
	inputs=[sentence_dropdown, audio_input],
	outputs=[pronunciation_feedback, pronunciation_score]
	)

	app.launch(debug=True)