File size: 2,406 Bytes
b63476f
fb93a17
 
d6fa022
 
 
 
f5d520a
fb93a17
d6fa022
 
fb93a17
d6fa022
 
 
 
fb93a17
f5d520a
 
 
d6fa022
fb93a17
f5d520a
 
d6fa022
f5d520a
 
 
2cf982a
 
 
 
d6fa022
 
f5d520a
 
 
2cf982a
d6fa022
 
 
bf2d620
d6fa022
 
fb93a17
d6fa022
 
2cf982a
d6fa022
 
 
fb93a17
d6fa022
 
 
 
 
 
c994feb
 
d6fa022
 
2a0284a
d6fa022
2a0284a
d6fa022
 
 
5995a5d
bf2d620
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
import gradio as gr
from gtts import gTTS
import time
import difflib
import tempfile
import os
import speech_recognition as sr
from faster_whisper import WhisperModel

# Function to play the text (optional)
def play_text(text):
    tts = gTTS(text=text, lang='hi', slow=False)
    temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.mp3')
    tts.save(temp_file.name)
    os.system(f"start {temp_file.name}")  # Windows
    return "✅ Text is being read out. Please listen and read it yourself."

# Load model once (outside function for efficiency)
model = WhisperModel("small", compute_type="float32")  # Or "medium" for better accuracy

def transcribe_audio(audio, original_text):
    try:
        # Run inference
        segments, info = model.transcribe(audio, language='hi')
        
        transcription = " ".join([segment.text for segment in segments])

        # Clean and split the text better
        import re
        original_words = re.findall(r'\w+', original_text.strip())
        transcribed_words = re.findall(r'\w+', transcription.strip())
        
        matcher = difflib.SequenceMatcher(None, original_words, transcribed_words)
        accuracy = round(matcher.ratio() * 100, 2)

        # Speaking speed (approximate)
        speed = round(len(transcribed_words) / info.duration, 2)

        result = {
            "📝 Transcribed Text": transcription,
            "🎯 Accuracy (%)": accuracy,
            "⏱️ Speaking Speed (words/sec)": speed
        }
        return result
    except Exception as e:
        return {"error": str(e)}


# Gradio App
with gr.Blocks() as app:
    gr.Markdown("## 🗣️ Hindi Reading & Pronunciation Practice App")
    
    with gr.Row():
        input_text = gr.Textbox(label="Paste Hindi Text Here", placeholder="यहाँ हिंदी टेक्स्ट लिखें...")
        play_button = gr.Button("🔊 Listen to Text")

    play_button.click(play_text, inputs=[input_text], outputs=[])

    gr.Markdown("### 🎤 Now upload or record yourself reading the text aloud below:")
    audio_input = gr.Audio(type="filepath", label="Upload or Record Your Voice")
    
    submit_button = gr.Button("✅ Submit Recording for Checking")
    
    output = gr.JSON(label="Results")
    
    submit_button.click(transcribe_audio, inputs=[audio_input, input_text], outputs=[output])

# Launch the app
app.launch()