Fast_api / fluency /fluency.py
mulasagg's picture
Add application file
8ad2ab3
import spacy
from typing import List, Dict
def calc_srs(wpm, filler_count, long_pause_count, pitch_variation):
"""
Speech Rate Stability (SRS): Reflects the consistency of the speaker's pace and rhythm.
Args:
wpm (float): Words per minute
filler_count (int): Number of filler words ("um", "uh", etc.)
long_pause_count (int): Number of pauses longer than 1 second
pitch_variation (float): Standard deviation of pitch in semitones
Returns:
float: SRS score between 0-100
Requires:
- Words per Minute Consistency: Regularity in speech speed.
- Absence of Sudden Speed Shifts: Smooth transitions without erratic tempo changes.
"""
ideal_wpm = 150
wpm_deviation = min(30, abs(wpm - ideal_wpm)) # Cap at 30 WPM deviation
wpm_consistency = max(0, 100 - (wpm_deviation * 1.67)) # 100-50 for max deviation
# Sudden Speech Shift Penalty
filler_penalty = min(filler_count / 10, 1.0)
pause_penalty = min(long_pause_count / 5, 1.0)
pitch_penalty = min(pitch_variation / 3.0, 1.0) # High variation → unstable
# Combine into absence of sudden shifts
stability = (1 - ((filler_penalty + pause_penalty + pitch_penalty) / 3)) * 100
# Final SRS Score
SRS = (0.45 * wpm_consistency) + (0.55 * stability)
return min(100, max(0, SRS))
def calculate_pas(transcript: str, segments: List[Dict], filler_count: int, duration: float) -> Dict[str, float]:
"""
Calculate the Pause Appropriateness Score (PAS) and its components.
Args:
transcript (str): Full transcript text
segments (List[Dict]): List of transcript segments with start/end times
filler_count (int): Number of filler words detected
duration (float): Total duration of audio in seconds
Returns:
Dict[str, float]: Dictionary with NPP, AFW, and PAS scores
"""
if not transcript or not segments or duration <= 0:
raise ValueError("Transcript, segments, and duration must be valid")
nlp = spacy.load("en_core_web_sm")
doc = nlp(transcript)
words = transcript.split()
total_words = len(words)
if total_words == 0:
raise ValueError("No words found in transcript")
# Calculate Avoidance of Filler Words (AFW)
filler_rate = filler_count / total_words if total_words > 0 else 0.0
if filler_rate >= 0.10:
afw = 0.0
elif filler_rate <= 0.0:
afw = 100.0
else:
afw = 100.0 - (filler_rate * 1000)
afw = max(0.0, min(100.0, afw))
# Calculate Natural Pause Placement (NPP)
total_pauses = 0
natural_pauses = 0
segment_texts = [seg["text"].strip() for seg in segments]
segment_starts = [seg["start"] for seg in segments]
segment_ends = [seg["end"] for seg in segments]
for i in range(len(segments) - 1):
pause_dur = segment_starts[i + 1] - segment_ends[i]
if pause_dur > 0.5:
total_pauses += 1
if segment_texts[i] and segment_texts[i][-1] in ".!?,":
natural_pauses += 1
# Check initial and final pauses
if segment_starts[0] > 0.5:
total_pauses += 1
if duration - segment_ends[-1] > 0.5:
total_pauses += 1
if segment_texts[-1] and segment_texts[-1][-1] in ".!?":
natural_pauses += 1
npp = 100.0 if total_pauses == 0 else (natural_pauses / total_pauses) * 100.0
# Calculate final PAS
pas = (0.4 * npp) + (0.6 * afw)
return {
"NPP": npp,
"AFW": afw,
"PAS": pas
}
def calculate_fluency(srs: float, pas: float) -> Dict[str, float]:
"""
Calculate fluency score based on Speech Rate Stability and Pause Appropriateness Score.
Args:
srs (float): Speech Rate Stability score (0-100)
pas (float): Pause Appropriateness Score (0-100)
Returns:
Dict[str, float]: Dictionary with fluency score (0-100) and component contributions
"""
# Equal weighting of SRS and PAS for fluency
fluency_score = (0.5 * srs) + (0.5 * pas)
return {
"score": fluency_score,
"SRS_contribution": 0.5 * srs,
"PAS_contribution": 0.5 * pas
}
def get_fluency_insight(fluency_score: float) -> str:
"""
Generate insight text based on the fluency score.
Args:
fluency_score (float): The calculated fluency score (0-100)
Returns:
str: Insight text explaining the score
"""
if fluency_score >= 85:
return "Excellent fluency with very consistent pacing and natural pauses. Speech flows effortlessly."
elif fluency_score >= 70:
return "Good fluency with generally stable speech rate and appropriate pauses. Some minor inconsistencies."
elif fluency_score >= 50:
return "Moderate fluency with occasional disruptions in speech flow. Consider working on pace stability and pause placement."
elif fluency_score >= 30:
return "Below average fluency with noticeable disruptions. Focus on reducing filler words and maintaining consistent pace."
else:
return "Speech fluency needs significant improvement. Work on maintaining consistent pace, reducing long pauses, and eliminating filler words."