|
import gradio as gr |
|
import numpy as np |
|
import matplotlib.pyplot as plt |
|
import time |
|
import os |
|
from transformers import pipeline, AutoModelForSequenceClassification, AutoTokenizer |
|
import pandas as pd |
|
from sklearn.feature_extraction.text import CountVectorizer |
|
import nltk |
|
from nltk.tokenize import word_tokenize |
|
import re |
|
|
|
|
|
try: |
|
|
|
nltk_data_dir = '/home/user/nltk_data' |
|
os.makedirs(nltk_data_dir, exist_ok=True) |
|
|
|
|
|
nltk.download('punkt', download_dir=nltk_data_dir) |
|
nltk.download('averaged_perceptron_tagger', download_dir=nltk_data_dir) |
|
|
|
|
|
nltk.data.path.insert(0, nltk_data_dir) |
|
except Exception as e: |
|
print(f"NLTK download issue: {e}") |
|
|
|
nltk.download('punkt') |
|
nltk.download('averaged_perceptron_tagger') |
|
|
|
|
|
try: |
|
|
|
asr_pipeline = pipeline("automatic-speech-recognition", model="openai/whisper-large-v3") |
|
|
|
|
|
cola_model = AutoModelForSequenceClassification.from_pretrained("textattack/roberta-base-CoLA") |
|
cola_tokenizer = AutoTokenizer.from_pretrained("textattack/roberta-base-CoLA") |
|
grammar_pipeline = pipeline("text-classification", model=cola_model, tokenizer=cola_tokenizer) |
|
|
|
|
|
correction_pipeline = pipeline("text2text-generation", model="vennify/t5-base-grammar-correction") |
|
|
|
|
|
sentiment_pipeline = pipeline("sentiment-analysis", model="distilbert-base-uncased-finetuned-sst-2-english") |
|
|
|
|
|
fluency_pipeline = pipeline("text-classification", model="textattack/bert-base-uncased-CoLA") |
|
|
|
|
|
MODELS_LOADED = True |
|
except Exception as e: |
|
print(f"Error loading models: {e}") |
|
|
|
MODELS_LOADED = False |
|
|
|
|
|
FILLER_WORDS = ["um", "uh", "like", "you know", "actually", "basically", "literally", |
|
"sort of", "kind of", "i mean", "so", "well", "right", "okay", "yeah"] |
|
|
|
def count_filler_words(text): |
|
"""Count filler words in the text""" |
|
text = text.lower() |
|
count = 0 |
|
for word in FILLER_WORDS: |
|
count += len(re.findall(r'\b' + word + r'\b', text)) |
|
return count, count / max(len(text.split()), 1) |
|
|
|
def calculate_speaking_rate(text, duration): |
|
"""Calculate words per minute""" |
|
if duration <= 0: |
|
return 0 |
|
words = len(text.split()) |
|
return (words / duration) * 60 |
|
|
|
def analyze_vocabulary_richness(text): |
|
"""Analyze vocabulary richness""" |
|
|
|
try: |
|
|
|
words = word_tokenize(text.lower()) |
|
except LookupError: |
|
|
|
words = re.findall(r'\b\w+\b', text.lower()) |
|
|
|
if not words: |
|
return 0, {} |
|
|
|
|
|
unique_words = set(words) |
|
richness = len(unique_words) / len(words) |
|
|
|
|
|
try: |
|
pos_tags = nltk.pos_tag(words) |
|
pos_counts = {} |
|
for _, tag in pos_tags: |
|
pos_counts[tag] = pos_counts.get(tag, 0) + 1 |
|
except Exception: |
|
|
|
pos_counts = {"WORD": len(words), "UNIQUE": len(unique_words)} |
|
|
|
return richness, pos_counts |
|
|
|
def analyze_sentence_complexity(text): |
|
"""Analyze sentence complexity with error handling""" |
|
try: |
|
|
|
sentences = re.split(r'[.!?]+', text) |
|
sentences = [s.strip() for s in sentences if s.strip()] |
|
|
|
if not sentences: |
|
return 0, 0 |
|
|
|
|
|
words_per_sentence = [len(s.split()) for s in sentences] |
|
avg_words = sum(words_per_sentence) / len(sentences) |
|
|
|
|
|
sentence_length_variation = np.std(words_per_sentence) if len(sentences) > 1 else 0 |
|
|
|
return avg_words, sentence_length_variation |
|
except Exception: |
|
|
|
word_count = len(text.split()) |
|
|
|
return word_count / max(1, text.count('.') + text.count('!') + text.count('?')), 0 |
|
|
|
def create_detailed_feedback(transcription, grammar_score, corrected_text, |
|
sentiment, fluency, filler_ratio, speaking_rate, |
|
vocabulary_richness, avg_words_per_sentence): |
|
"""Create detailed feedback based on all metrics""" |
|
feedback = [] |
|
|
|
|
|
if "acceptable" in grammar_score.lower(): |
|
feedback.append("โ
Your grammar is good!") |
|
else: |
|
feedback.append("โ Your grammar needs improvement. Check the corrections provided.") |
|
|
|
|
|
if fluency > 0.7: |
|
feedback.append("โ
Your speech flows naturally.") |
|
else: |
|
feedback.append("โ Work on making your speech more fluid and natural.") |
|
|
|
|
|
if filler_ratio > 0.1: |
|
feedback.append(f"โ You used too many filler words ({filler_ratio:.1%} of your words).") |
|
else: |
|
feedback.append("โ
Good job minimizing filler words!") |
|
|
|
|
|
if 120 <= speaking_rate <= 160: |
|
feedback.append(f"โ
Your speaking pace is good ({speaking_rate:.0f} words/min).") |
|
elif speaking_rate < 120: |
|
feedback.append(f"โ Try speaking a bit faster ({speaking_rate:.0f} words/min is slower than ideal).") |
|
else: |
|
feedback.append(f"โ Try speaking a bit slower ({speaking_rate:.0f} words/min is faster than ideal).") |
|
|
|
|
|
if vocabulary_richness > 0.6: |
|
feedback.append("โ
Excellent vocabulary diversity!") |
|
elif vocabulary_richness > 0.4: |
|
feedback.append("โ
Good vocabulary usage.") |
|
else: |
|
feedback.append("โ Try using more varied vocabulary.") |
|
|
|
|
|
if 10 <= avg_words_per_sentence <= 20: |
|
feedback.append("โ
Good sentence structure and length.") |
|
elif avg_words_per_sentence < 10: |
|
feedback.append("โ Try using more complex sentences occasionally.") |
|
else: |
|
feedback.append("โ Your sentences are quite long. Consider varying your sentence length.") |
|
|
|
|
|
if sentiment == "POSITIVE": |
|
feedback.append("โ
Your tone is positive and engaging.") |
|
else: |
|
feedback.append("โน๏ธ Your tone is neutral/negative. Consider if this matches your intent.") |
|
|
|
return "\n".join(feedback) |
|
|
|
def process_audio(audio): |
|
if audio is None: |
|
return "No audio provided.", "", "", "", None, "" |
|
|
|
start_time = time.time() |
|
|
|
|
|
if 'MODELS_LOADED' in globals() and not MODELS_LOADED: |
|
return ("Models failed to load. Please check the logs for details.", |
|
"Error", "Error", "Unable to process audio due to model loading issues.", |
|
None, "## Error\nThe required models couldn't be loaded. Please check the system configuration.") |
|
|
|
try: |
|
|
|
sample_rate = 16000 |
|
if isinstance(audio, tuple) and len(audio) > 1: |
|
sample_rate = audio[1] |
|
|
|
|
|
duration = 0 |
|
if isinstance(audio, str): |
|
|
|
try: |
|
import librosa |
|
y, sr = librosa.load(audio, sr=None) |
|
duration = librosa.get_duration(y=y, sr=sr) |
|
except Exception as e: |
|
print(f"Error getting duration: {e}") |
|
|
|
try: |
|
file_size = os.path.getsize(audio) |
|
|
|
duration = file_size / 32000 |
|
except: |
|
duration = 10 |
|
else: |
|
|
|
try: |
|
duration = len(audio[0]) / sample_rate if sample_rate > 0 else 0 |
|
except: |
|
duration = 10 |
|
|
|
|
|
try: |
|
transcription_result = asr_pipeline(audio) |
|
transcription = transcription_result["text"] |
|
except Exception as e: |
|
print(f"Transcription error: {e}") |
|
return ("Error in speech recognition. Please try again.", |
|
"Error", "Error", "There was an error processing your audio.", |
|
None, f"## Error\nError in speech recognition: {str(e)[:100]}...") |
|
|
|
if not transcription or transcription.strip() == "": |
|
return ("No speech detected. Please speak louder or check your microphone.", |
|
"N/A", "N/A", "No speech detected in the audio.", |
|
None, "## No Speech Detected\nPlease try recording again with clearer speech.") |
|
|
|
|
|
try: |
|
score_output = grammar_pipeline(transcription)[0] |
|
label = score_output["label"] |
|
confidence = score_output["score"] |
|
grammar_score = f"{label} ({confidence:.2f})" |
|
except Exception as e: |
|
print(f"Grammar scoring error: {e}") |
|
label = "UNKNOWN" |
|
confidence = 0.5 |
|
grammar_score = "Could not analyze grammar" |
|
|
|
|
|
try: |
|
corrected = correction_pipeline(transcription, max_length=128)[0]["generated_text"] |
|
except Exception as e: |
|
print(f"Grammar correction error: {e}") |
|
corrected = transcription |
|
|
|
|
|
try: |
|
sentiment_result = sentiment_pipeline(transcription)[0] |
|
sentiment = sentiment_result["label"] |
|
sentiment_score = sentiment_result["score"] |
|
except Exception as e: |
|
print(f"Sentiment analysis error: {e}") |
|
sentiment = "NEUTRAL" |
|
sentiment_score = 0.5 |
|
|
|
|
|
try: |
|
fluency_result = fluency_pipeline(transcription)[0] |
|
fluency_score = fluency_result["score"] if fluency_result["label"] == "acceptable" else 1 - fluency_result["score"] |
|
except Exception as e: |
|
print(f"Fluency analysis error: {e}") |
|
fluency_score = 0.5 |
|
|
|
|
|
try: |
|
filler_count, filler_ratio = count_filler_words(transcription) |
|
except Exception as e: |
|
print(f"Filler word analysis error: {e}") |
|
filler_count, filler_ratio = 0, 0 |
|
|
|
|
|
try: |
|
speaking_rate = calculate_speaking_rate(transcription, duration) |
|
except Exception as e: |
|
print(f"Speaking rate calculation error: {e}") |
|
speaking_rate = 0 |
|
|
|
|
|
try: |
|
vocab_richness, pos_counts = analyze_vocabulary_richness(transcription) |
|
except Exception as e: |
|
print(f"Vocabulary analysis error: {e}") |
|
vocab_richness, pos_counts = 0.5, {"N/A": 1} |
|
|
|
|
|
try: |
|
avg_words, sentence_variation = analyze_sentence_complexity(transcription) |
|
except Exception as e: |
|
print(f"Sentence complexity analysis error: {e}") |
|
avg_words, sentence_variation = 0, 0 |
|
|
|
|
|
try: |
|
feedback = create_detailed_feedback( |
|
transcription, grammar_score, corrected, sentiment, |
|
fluency_score, filler_ratio, speaking_rate, vocab_richness, avg_words |
|
) |
|
except Exception as e: |
|
print(f"Feedback creation error: {e}") |
|
feedback = "Error generating detailed feedback." |
|
|
|
|
|
try: |
|
fig, ax = plt.subplots(figsize=(10, 6)) |
|
|
|
|
|
categories = ['Grammar', 'Fluency', 'Vocabulary', 'Speaking Rate', 'Clarity'] |
|
|
|
|
|
grammar_norm = confidence if label == "acceptable" else 1 - confidence |
|
speaking_rate_norm = max(0, min(1, 1 - abs((speaking_rate - 140) / 100))) |
|
|
|
values = [ |
|
grammar_norm, |
|
fluency_score, |
|
vocab_richness, |
|
speaking_rate_norm, |
|
1 - filler_ratio |
|
] |
|
|
|
|
|
values += values[:1] |
|
categories += categories[:1] |
|
|
|
|
|
angles = np.linspace(0, 2*np.pi, len(categories), endpoint=False).tolist() |
|
angles += angles[:1] |
|
|
|
ax.plot(angles, values, linewidth=2, linestyle='solid') |
|
ax.fill(angles, values, alpha=0.25) |
|
ax.set_yticklabels([]) |
|
ax.set_xticks(angles[:-1]) |
|
ax.set_xticklabels(categories[:-1]) |
|
ax.grid(True) |
|
plt.title('Speaking Performance Metrics', size=15, color='navy', y=1.1) |
|
except Exception as e: |
|
print(f"Visualization error: {e}") |
|
|
|
fig, ax = plt.subplots(figsize=(6, 3)) |
|
ax.text(0.5, 0.5, "Error creating visualization", |
|
horizontalalignment='center', verticalalignment='center') |
|
ax.axis('off') |
|
|
|
|
|
processing_time = time.time() - start_time |
|
try: |
|
pos_counts_str = ', '.join([f"{k}: {v}" for k, v in sorted(pos_counts.items(), key=lambda x: x[1], reverse=True)[:5]]) |
|
except: |
|
pos_counts_str = "N/A" |
|
|
|
detailed_analysis = f""" |
|
## Detailed Speech Analysis |
|
|
|
**Processing Time:** {processing_time:.2f} seconds |
|
**Audio Duration:** {duration:.2f} seconds |
|
|
|
### Metrics: |
|
- **Grammar Score:** {confidence:.2f} ({label}) |
|
- **Fluency Score:** {fluency_score:.2f} |
|
- **Speaking Rate:** {speaking_rate:.1f} words per minute |
|
- **Vocabulary Richness:** {vocab_richness:.2f} (higher is better) |
|
- **Filler Words:** {filler_count} occurrences ({filler_ratio:.1%} of speech) |
|
- **Avg Words Per Sentence:** {avg_words:.1f} |
|
- **Sentiment:** {sentiment} ({sentiment_score:.2f}) |
|
|
|
### Word Types Used: |
|
{pos_counts_str} |
|
""" |
|
|
|
return transcription, grammar_score, corrected, feedback, fig, detailed_analysis |
|
|
|
except Exception as e: |
|
print(f"Unexpected error in process_audio: {e}") |
|
return ("An unexpected error occurred during processing.", |
|
"Error", "Error", "There was an unexpected error processing your audio.", |
|
None, f"## Unexpected Error\n\nAn error occurred: {str(e)[:200]}...") |
|
|
|
|
|
|
|
theme = gr.themes.Soft( |
|
primary_hue="blue", |
|
secondary_hue="indigo", |
|
).set( |
|
button_primary_background_fill="*primary_500", |
|
button_primary_background_fill_hover="*primary_600", |
|
button_primary_text_color="white", |
|
block_title_text_weight="600", |
|
block_border_width="2px", |
|
block_shadow="0 4px 6px -1px rgb(0 0 0 / 0.1), 0 2px 4px -2px rgb(0 0 0 / 0.1)", |
|
) |
|
|
|
with gr.Blocks(theme=theme, css=""" |
|
.container { max-width: 1000px; margin: auto; } |
|
.header { text-align: center; margin-bottom: 20px; } |
|
.header h1 { color: #1e40af; font-size: 2.5rem; } |
|
.header p { color: #6b7280; font-size: 1.1rem; } |
|
.footer { text-align: center; margin-top: 30px; color: #6b7280; } |
|
.tips-box { background-color: #f0f9ff; border-radius: 10px; padding: 15px; margin: 10px 0; } |
|
.score-card { border: 2px solid #dbeafe; border-radius: 10px; padding: 10px; } |
|
""") as demo: |
|
gr.HTML(""" |
|
<div class="header"> |
|
<h1>๐๏ธ Advanced ENGLISH Speaking Assessment</h1> |
|
<p>Record or upload your speech to receive comprehensive feedback on your English speaking skills</p> |
|
</div> |
|
""") |
|
|
|
with gr.Row(): |
|
with gr.Column(): |
|
audio_input = gr.Audio( |
|
sources=["microphone", "upload"], |
|
type="filepath", |
|
label="๐ค Speak or Upload Audio" |
|
) |
|
|
|
with gr.Accordion("Speaking Tips", open=False): |
|
gr.HTML(""" |
|
<div class="tips-box"> |
|
<h4>Tips for Better Results:</h4> |
|
<ul> |
|
<li>Speak clearly and at a moderate pace</li> |
|
<li>Minimize background noise</li> |
|
<li>Try to speak for at least 20-30 seconds</li> |
|
<li>Avoid filler words like "um", "uh", "like"</li> |
|
<li>Practice with both prepared and impromptu topics</li> |
|
</ul> |
|
</div> |
|
""") |
|
|
|
submit_btn = gr.Button("Analyze Speech", variant="primary") |
|
|
|
with gr.Row(): |
|
with gr.Column(): |
|
transcription_output = gr.Textbox(label="๐ Transcription", lines=3) |
|
corrected_output = gr.Textbox(label="โ๏ธ Grammar Correction", lines=3) |
|
grammar_score_output = gr.Textbox(label="โ
Grammar Score") |
|
|
|
with gr.Row(): |
|
with gr.Column(): |
|
metrics_chart = gr.Plot(label="Performance Metrics") |
|
with gr.Column(): |
|
feedback_output = gr.Textbox(label="๐ฌ Feedback", lines=8) |
|
|
|
with gr.Accordion("Detailed Analysis", open=False): |
|
detailed_analysis = gr.Markdown() |
|
|
|
gr.HTML(""" |
|
<div class="footer"> |
|
<p>This tool provides an assessment of your spoken English. For professional evaluation, consult a qualified language instructor.</p> |
|
</div> |
|
""") |
|
|
|
submit_btn.click( |
|
fn=process_audio, |
|
inputs=[audio_input], |
|
outputs=[ |
|
transcription_output, |
|
grammar_score_output, |
|
corrected_output, |
|
feedback_output, |
|
metrics_chart, |
|
detailed_analysis |
|
] |
|
) |
|
|
|
if __name__ == "__main__": |
|
demo.launch() |