ENGLISH-Speaking-Scoring

Running

File size: 19,263 Bytes

import gradio as gr
import numpy as np
import matplotlib.pyplot as plt
import time
import os
from transformers import pipeline, AutoModelForSequenceClassification, AutoTokenizer
import pandas as pd
from sklearn.feature_extraction.text import CountVectorizer
import nltk
from nltk.tokenize import word_tokenize
import re

# Download necessary NLTK data
try:
    # Make the download more reliable by specifying download directory
    nltk_data_dir = '/home/user/nltk_data'
    os.makedirs(nltk_data_dir, exist_ok=True)
    
    # Download all required resources
    nltk.download('punkt', download_dir=nltk_data_dir)
    nltk.download('averaged_perceptron_tagger', download_dir=nltk_data_dir)
    
    # Set the data path to include our custom directory
    nltk.data.path.insert(0, nltk_data_dir)
except Exception as e:
    print(f"NLTK download issue: {e}")
    # Fallback simple approach if the directory approach fails
    nltk.download('punkt')
    nltk.download('averaged_perceptron_tagger')

# Add error handling around model loading
try:
    # Load Whisper for ASR
    asr_pipeline = pipeline("automatic-speech-recognition", model="openai/whisper-large-v3")

    # Load Grammar Scoring Model (CoLA)
    cola_model = AutoModelForSequenceClassification.from_pretrained("textattack/roberta-base-CoLA")
    cola_tokenizer = AutoTokenizer.from_pretrained("textattack/roberta-base-CoLA")
    grammar_pipeline = pipeline("text-classification", model=cola_model, tokenizer=cola_tokenizer)

    # Load Grammar Correction Model (T5)
    correction_pipeline = pipeline("text2text-generation", model="vennify/t5-base-grammar-correction")

    # Add sentiment analysis
    sentiment_pipeline = pipeline("sentiment-analysis", model="distilbert-base-uncased-finetuned-sst-2-english")

    # Add fluency analysis (using BERT)
    fluency_pipeline = pipeline("text-classification", model="textattack/bert-base-uncased-CoLA")
    
    # Set variables to track loaded models
    MODELS_LOADED = True
except Exception as e:
    print(f"Error loading models: {e}")
    # Set variable to track failed model loading
    MODELS_LOADED = False

# Common English filler words to detect
FILLER_WORDS = ["um", "uh", "like", "you know", "actually", "basically", "literally", 
                "sort of", "kind of", "i mean", "so", "well", "right", "okay", "yeah"]

def count_filler_words(text):
    """Count filler words in the text"""
    text = text.lower()
    count = 0
    for word in FILLER_WORDS:
        count += len(re.findall(r'\b' + word + r'\b', text))
    return count, count / max(len(text.split()), 1)  # Count and ratio

def calculate_speaking_rate(text, duration):
    """Calculate words per minute"""
    if duration <= 0:
        return 0
    words = len(text.split())
    return (words / duration) * 60  # Words per minute

def analyze_vocabulary_richness(text):
    """Analyze vocabulary richness"""
    # Split text by simple regex instead of using word_tokenize to avoid NLTK issues
    try:
        # Try using word_tokenize first
        words = word_tokenize(text.lower())
    except LookupError:
        # Fallback to simple regex-based tokenization if NLTK fails
        words = re.findall(r'\b\w+\b', text.lower())
    
    if not words:
        return 0, {}
    
    # Vocabulary richness (unique words / total words)
    unique_words = set(words)
    richness = len(unique_words) / len(words)
    
    # Use simple POS tagging or skip it if NLTK fails
    try:
        pos_tags = nltk.pos_tag(words)
        pos_counts = {}
        for _, tag in pos_tags:
            pos_counts[tag] = pos_counts.get(tag, 0) + 1
    except Exception:
        # Return simplified count if POS tagging fails
        pos_counts = {"WORD": len(words), "UNIQUE": len(unique_words)}
    
    return richness, pos_counts

def analyze_sentence_complexity(text):
    """Analyze sentence complexity with error handling"""
    try:
        # Simple sentence splitting by punctuation
        sentences = re.split(r'[.!?]+', text)
        sentences = [s.strip() for s in sentences if s.strip()]
        
        if not sentences:
            return 0, 0
        
        # Average words per sentence
        words_per_sentence = [len(s.split()) for s in sentences]
        avg_words = sum(words_per_sentence) / len(sentences)
        
        # Sentence length variation (standard deviation)
        sentence_length_variation = np.std(words_per_sentence) if len(sentences) > 1 else 0
        
        return avg_words, sentence_length_variation
    except Exception:
        # In case of any error, return simple defaults
        word_count = len(text.split())
        # Assume approximately 15 words per sentence if we can't detect
        return word_count / max(1, text.count('.') + text.count('!') + text.count('?')), 0

def create_detailed_feedback(transcription, grammar_score, corrected_text, 
                            sentiment, fluency, filler_ratio, speaking_rate, 
                            vocabulary_richness, avg_words_per_sentence):
    """Create detailed feedback based on all metrics"""
    feedback = []
    
    # Grammar feedback
    if "acceptable" in grammar_score.lower():
        feedback.append("✅ Your grammar is good!")
    else:
        feedback.append("❗ Your grammar needs improvement. Check the corrections provided.")
    
    # Fluency feedback
    if fluency > 0.7:
        feedback.append("✅ Your speech flows naturally.")
    else:
        feedback.append("❗ Work on making your speech more fluid and natural.")
    
    # Filler words feedback
    if filler_ratio > 0.1:
        feedback.append(f"❗ You used too many filler words ({filler_ratio:.1%} of your words).")
    else:
        feedback.append("✅ Good job minimizing filler words!")
    
    # Speaking rate feedback
    if 120 <= speaking_rate <= 160:
        feedback.append(f"✅ Your speaking pace is good ({speaking_rate:.0f} words/min).")
    elif speaking_rate < 120:
        feedback.append(f"❗ Try speaking a bit faster ({speaking_rate:.0f} words/min is slower than ideal).")
    else:
        feedback.append(f"❗ Try speaking a bit slower ({speaking_rate:.0f} words/min is faster than ideal).")
    
    # Vocabulary feedback
    if vocabulary_richness > 0.6:
        feedback.append("✅ Excellent vocabulary diversity!")
    elif vocabulary_richness > 0.4:
        feedback.append("✅ Good vocabulary usage.")
    else:
        feedback.append("❗ Try using more varied vocabulary.")
    
    # Sentence complexity feedback
    if 10 <= avg_words_per_sentence <= 20:
        feedback.append("✅ Good sentence structure and length.")
    elif avg_words_per_sentence < 10:
        feedback.append("❗ Try using more complex sentences occasionally.")
    else:
        feedback.append("❗ Your sentences are quite long. Consider varying your sentence length.")
    
    # Overall sentiment feedback
    if sentiment == "POSITIVE":
        feedback.append("✅ Your tone is positive and engaging.")
    else:
        feedback.append("ℹ️ Your tone is neutral/negative. Consider if this matches your intent.")
    
    return "\n".join(feedback)

def process_audio(audio):
    if audio is None:
        return "No audio provided.", "", "", "", None, ""
    
    start_time = time.time()
    
    # Check if models loaded properly
    if 'MODELS_LOADED' in globals() and not MODELS_LOADED:
        return ("Models failed to load. Please check the logs for details.", 
                "Error", "Error", "Unable to process audio due to model loading issues.", 
                None, "## Error\nThe required models couldn't be loaded. Please check the system configuration.")
    
    try:
        # Get audio duration (assuming audio[1] contains the sample rate)
        sample_rate = 16000  # Default if we can't determine
        if isinstance(audio, tuple) and len(audio) > 1:
            sample_rate = audio[1]
        
        # For file uploads, we need to handle differently
        duration = 0
        if isinstance(audio, str):
            # This is a file path
            try:
                import librosa
                y, sr = librosa.load(audio, sr=None)
                duration = librosa.get_duration(y=y, sr=sr)
            except Exception as e:
                print(f"Error getting duration: {e}")
                # Estimate duration based on file size
                try:
                    file_size = os.path.getsize(audio)
                    # Rough estimate: 16kHz, 16-bit audio is about 32KB per second
                    duration = file_size / 32000
                except:
                    duration = 10  # Default to 10 seconds if we can't determine
        else:
            # Assuming a tuple with (samples, sample_rate)
            try:
                duration = len(audio[0]) / sample_rate if sample_rate > 0 else 0
            except:
                duration = 10  # Default duration
        
        # Step 1: Transcription
        try:
            transcription_result = asr_pipeline(audio)
            transcription = transcription_result["text"]
        except Exception as e:
            print(f"Transcription error: {e}")
            return ("Error in speech recognition. Please try again.", 
                    "Error", "Error", "There was an error processing your audio.", 
                    None, f"## Error\nError in speech recognition: {str(e)[:100]}...")
        
        if not transcription or transcription.strip() == "":
            return ("No speech detected. Please speak louder or check your microphone.", 
                    "N/A", "N/A", "No speech detected in the audio.", 
                    None, "## No Speech Detected\nPlease try recording again with clearer speech.")
        
        # Step 2: Grammar Scoring
        try:
            score_output = grammar_pipeline(transcription)[0]
            label = score_output["label"]
            confidence = score_output["score"]
            grammar_score = f"{label} ({confidence:.2f})"
        except Exception as e:
            print(f"Grammar scoring error: {e}")
            label = "UNKNOWN"
            confidence = 0.5
            grammar_score = "Could not analyze grammar"
        
        # Step 3: Grammar Correction
        try:
            corrected = correction_pipeline(transcription, max_length=128)[0]["generated_text"]
        except Exception as e:
            print(f"Grammar correction error: {e}")
            corrected = transcription
        
        # Step 4: Sentiment Analysis
        try:
            sentiment_result = sentiment_pipeline(transcription)[0]
            sentiment = sentiment_result["label"]
            sentiment_score = sentiment_result["score"]
        except Exception as e:
            print(f"Sentiment analysis error: {e}")
            sentiment = "NEUTRAL"
            sentiment_score = 0.5
        
        # Step 5: Fluency Analysis
        try:
            fluency_result = fluency_pipeline(transcription)[0]
            fluency_score = fluency_result["score"] if fluency_result["label"] == "acceptable" else 1 - fluency_result["score"]
        except Exception as e:
            print(f"Fluency analysis error: {e}")
            fluency_score = 0.5
        
        # Step 6: Filler Words Analysis
        try:
            filler_count, filler_ratio = count_filler_words(transcription)
        except Exception as e:
            print(f"Filler word analysis error: {e}")
            filler_count, filler_ratio = 0, 0
        
        # Step 7: Speaking Rate
        try:
            speaking_rate = calculate_speaking_rate(transcription, duration)
        except Exception as e:
            print(f"Speaking rate calculation error: {e}")
            speaking_rate = 0
        
        # Step 8: Vocabulary Richness
        try:
            vocab_richness, pos_counts = analyze_vocabulary_richness(transcription)
        except Exception as e:
            print(f"Vocabulary analysis error: {e}")
            vocab_richness, pos_counts = 0.5, {"N/A": 1}
        
        # Step 9: Sentence Complexity
        try:
            avg_words, sentence_variation = analyze_sentence_complexity(transcription)
        except Exception as e:
            print(f"Sentence complexity analysis error: {e}")
            avg_words, sentence_variation = 0, 0
        
        # Create feedback
        try:
            feedback = create_detailed_feedback(
                transcription, grammar_score, corrected, sentiment,
                fluency_score, filler_ratio, speaking_rate, vocab_richness, avg_words
            )
        except Exception as e:
            print(f"Feedback creation error: {e}")
            feedback = "Error generating detailed feedback."
        
        # Create metrics visualization
        try:
            fig, ax = plt.subplots(figsize=(10, 6))
            
            # Define metrics for radar chart
            categories = ['Grammar', 'Fluency', 'Vocabulary', 'Speaking Rate', 'Clarity']
            
            # Normalize scores between 0 and 1
            grammar_norm = confidence if label == "acceptable" else 1 - confidence
            speaking_rate_norm = max(0, min(1, 1 - abs((speaking_rate - 140) / 100)))  # Optimal around 140 wpm
            
            values = [
                grammar_norm,
                fluency_score,
                vocab_richness,
                speaking_rate_norm,
                1 - filler_ratio  # Lower filler ratio is better
            ]
            
            # Complete the loop for the radar chart
            values += values[:1]
            categories += categories[:1]
            
            # Convert to radians and plot
            angles = np.linspace(0, 2*np.pi, len(categories), endpoint=False).tolist()
            angles += angles[:1]
            
            ax.plot(angles, values, linewidth=2, linestyle='solid')
            ax.fill(angles, values, alpha=0.25)
            ax.set_yticklabels([])
            ax.set_xticks(angles[:-1])
            ax.set_xticklabels(categories[:-1])
            ax.grid(True)
            plt.title('Speaking Performance Metrics', size=15, color='navy', y=1.1)
        except Exception as e:
            print(f"Visualization error: {e}")
            # Create a simple error figure
            fig, ax = plt.subplots(figsize=(6, 3))
            ax.text(0.5, 0.5, "Error creating visualization", 
                    horizontalalignment='center', verticalalignment='center')
            ax.axis('off')
        
        # Create detailed analysis text
        processing_time = time.time() - start_time
        try:
            pos_counts_str = ', '.join([f"{k}: {v}" for k, v in sorted(pos_counts.items(), key=lambda x: x[1], reverse=True)[:5]])
        except:
            pos_counts_str = "N/A"
            
        detailed_analysis = f"""
        ## Detailed Speech Analysis

        **Processing Time:** {processing_time:.2f} seconds
        **Audio Duration:** {duration:.2f} seconds

        ### Metrics:
        - **Grammar Score:** {confidence:.2f} ({label})
        - **Fluency Score:** {fluency_score:.2f}
        - **Speaking Rate:** {speaking_rate:.1f} words per minute
        - **Vocabulary Richness:** {vocab_richness:.2f} (higher is better)
        - **Filler Words:** {filler_count} occurrences ({filler_ratio:.1%} of speech)
        - **Avg Words Per Sentence:** {avg_words:.1f}
        - **Sentiment:** {sentiment} ({sentiment_score:.2f})

        ### Word Types Used:
        {pos_counts_str}
        """
        
        return transcription, grammar_score, corrected, feedback, fig, detailed_analysis
        
    except Exception as e:
        print(f"Unexpected error in process_audio: {e}")
        return ("An unexpected error occurred during processing.", 
                "Error", "Error", "There was an unexpected error processing your audio.", 
                None, f"## Unexpected Error\n\nAn error occurred: {str(e)[:200]}...")


# Create theme
theme = gr.themes.Soft(
    primary_hue="blue",
    secondary_hue="indigo",
).set(
    button_primary_background_fill="*primary_500",
    button_primary_background_fill_hover="*primary_600",
    button_primary_text_color="white",
    block_title_text_weight="600",
    block_border_width="2px",
    block_shadow="0 4px 6px -1px rgb(0 0 0 / 0.1), 0 2px 4px -2px rgb(0 0 0 / 0.1)",
)

with gr.Blocks(theme=theme, css="""
    .container { max-width: 1000px; margin: auto; }
    .header { text-align: center; margin-bottom: 20px; }
    .header h1 { color: #1e40af; font-size: 2.5rem; }
    .header p { color: #6b7280; font-size: 1.1rem; }
    .footer { text-align: center; margin-top: 30px; color: #6b7280; }
    .tips-box { background-color: #f0f9ff; border-radius: 10px; padding: 15px; margin: 10px 0; }
    .score-card { border: 2px solid #dbeafe; border-radius: 10px; padding: 10px; }
""") as demo:
    gr.HTML("""
    <div class="header">
        <h1>🎙️ Advanced ENGLISH Speaking Assessment</h1>
        <p>Record or upload your speech to receive comprehensive feedback on your English speaking skills</p>
    </div>
    """)
    
    with gr.Row():
        with gr.Column():
            audio_input = gr.Audio(
                sources=["microphone", "upload"], 
                type="filepath", 
                label="🎤 Speak or Upload Audio"
            )
            
            with gr.Accordion("Speaking Tips", open=False):
                gr.HTML("""
                <div class="tips-box">
                    <h4>Tips for Better Results:</h4>
                    <ul>
                        <li>Speak clearly and at a moderate pace</li>
                        <li>Minimize background noise</li>
                        <li>Try to speak for at least 20-30 seconds</li>
                        <li>Avoid filler words like "um", "uh", "like"</li>
                        <li>Practice with both prepared and impromptu topics</li>
                    </ul>
                </div>
                """)
            
            submit_btn = gr.Button("Analyze Speech", variant="primary")
        
    with gr.Row():
        with gr.Column():
            transcription_output = gr.Textbox(label="📝 Transcription", lines=3)
            corrected_output = gr.Textbox(label="✍️ Grammar Correction", lines=3)
            grammar_score_output = gr.Textbox(label="✅ Grammar Score")
        
    with gr.Row():
        with gr.Column():
            metrics_chart = gr.Plot(label="Performance Metrics")
        with gr.Column():
            feedback_output = gr.Textbox(label="💬 Feedback", lines=8)
    
    with gr.Accordion("Detailed Analysis", open=False):
        detailed_analysis = gr.Markdown()
    
    gr.HTML("""
    <div class="footer">
        <p>This tool provides an assessment of your spoken English. For professional evaluation, consult a qualified language instructor.</p>
    </div>
    """)
    
    submit_btn.click(
        fn=process_audio,
        inputs=[audio_input],
        outputs=[
            transcription_output,
            grammar_score_output, 
            corrected_output,
            feedback_output,
            metrics_chart,
            detailed_analysis
        ]
    )

if __name__ == "__main__":
    demo.launch()