Spaces:

NLPV
/

ReadingTestHindi

Sleeping

App Files Files Community

ReadingTestHindi / app.py

NLPV

Update app.py

0b42505 verified about 1 month ago

raw

history blame contribute delete

5.32 kB

	import gradio as gr
	from gtts import gTTS
	import tempfile
	import os
	import torch
	from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor
	import torchaudio
	import difflib
	import pandas as pd
	from Levenshtein import distance as lev_distance

	# Load AI4Bharat Hindi model & processor (public model on Hugging Face)
	MODEL_NAME = "ai4bharat/indicwav2vec-hindi"
	processor = Wav2Vec2Processor.from_pretrained(MODEL_NAME)
	model = Wav2Vec2ForCTC.from_pretrained(MODEL_NAME)

	def play_text(text):
	tts = gTTS(text=text, lang='hi', slow=False)
	temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.mp3')
	tts.save(temp_file.name)
	return temp_file.name

	def get_error_type(asr_word, correct_word):
	if not asr_word:
	return "Missing word"
	if not correct_word:
	return "Extra word"
	if lev_distance(asr_word, correct_word) <= 2:
	return "Spelling mistake"
	set1, set2 = set(asr_word), set(correct_word)
	if set1 & set2:
	return "Phonetic/Matra error"
	return "Substitution/Distorted"

	def compare_hindi_sentences(expected, transcribed):
	expected_words = expected.strip().split()
	transcribed_words = transcribed.strip().split()
	matcher = difflib.SequenceMatcher(None, transcribed_words, expected_words)
	errors = []
	for opcode, i1, i2, j1, j2 in matcher.get_opcodes():
	if opcode == "equal":
	continue
	elif opcode == "replace":
	for k in range(max(i2 - i1, j2 - j1)):
	asr_word = transcribed_words[i1 + k] if i1 + k < i2 else ""
	correct_word = expected_words[j1 + k] if j1 + k < j2 else ""
	error_type = get_error_type(asr_word, correct_word)
	errors.append((asr_word, correct_word, error_type))
	elif opcode == "insert":
	for k in range(j1, j2):
	errors.append(("", expected_words[k], "Missing word"))
	elif opcode == "delete":
	for k in range(i1, i2):
	errors.append((transcribed_words[k], "", "Extra word"))
	return errors

	def calculate_accuracy(expected, transcribed):
	expected_words = expected.strip().split()
	transcribed_words = transcribed.strip().split()
	matcher = difflib.SequenceMatcher(None, transcribed_words, expected_words)
	correct = 0
	total = len(expected_words)
	for tag, i1, i2, j1, j2 in matcher.get_opcodes():
	if tag == 'equal':
	correct += (j2-j1)
	accuracy = (correct / total) * 100 if total > 0 else 0
	return round(accuracy, 2)

	def transcribe_audio(audio_path, original_text):
	try:
	waveform, sample_rate = torchaudio.load(audio_path)
	if waveform.shape[0] > 1:
	waveform = waveform.mean(dim=0, keepdim=True)
	if sample_rate != 16000:
	transform = torchaudio.transforms.Resample(orig_freq=sample_rate, new_freq=16000)
	waveform = transform(waveform)
	waveform = waveform / waveform.abs().max()
	input_values = processor(waveform.squeeze().numpy(), sampling_rate=16000, return_tensors="pt").input_values
	with torch.no_grad():
	logits = model(input_values).logits
	predicted_ids = torch.argmax(logits, dim=-1)
	transcription = processor.decode(predicted_ids[0]).strip()
	# Error analysis
	errors = compare_hindi_sentences(original_text, transcription)
	df_errors = pd.DataFrame(errors, columns=["बिगड़ा हुआ शब्द", "संभावित सही शब्द", "गलती का प्रकार"])
	# Speaking speed
	transcribed_words = transcription.strip().split()
	duration = waveform.shape[1] / 16000
	speed = round(len(transcribed_words) / duration, 2) if duration > 0 else 0
	# Accuracy
	accuracy = calculate_accuracy(original_text, transcription)
	result = {
	"📝 Transcribed Text": transcription,
	"⏱️ Speaking Speed (words/sec)": speed,
	"✅ Reading Accuracy (%)": accuracy
	}
	return result, df_errors
	except Exception as e:
	return {"error": str(e)}, pd.DataFrame(columns=["बिगड़ा हुआ शब्द", "संभावित सही शब्द", "गलती का प्रकार"])

	with gr.Blocks() as app:
	gr.Markdown("## 🗣️ Hindi Reading & Pronunciation Practice App (AI4Bharat IndicWav2Vec)")
	with gr.Row():
	input_text = gr.Textbox(label="Paste Hindi Text Here", placeholder="यहाँ हिंदी टेक्स्ट लिखें...")
	play_button = gr.Button("🔊 Listen to Text")
	audio_output = gr.Audio(label="Text-to-Speech Output", type="filepath")
	play_button.click(play_text, inputs=input_text, outputs=audio_output)

	gr.Markdown("### 🎤 Now upload or record yourself reading the text aloud below:")
	audio_input = gr.Audio(type="filepath", label="Upload or Record Your Voice")
	submit_button = gr.Button("✅ Submit Recording for Checking")
	output = gr.JSON(label="Results")
	error_table = gr.Dataframe(label="गलती तालिका (Error Table)")
	submit_button.click(
	transcribe_audio,
	inputs=[audio_input, input_text],
	outputs=[output, error_table]
	)

	app.launch()