File size: 5,323 Bytes
8ad2ab3 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 |
import spacy
from typing import List, Dict
def calc_srs(wpm, filler_count, long_pause_count, pitch_variation):
"""
Speech Rate Stability (SRS): Reflects the consistency of the speaker's pace and rhythm.
Args:
wpm (float): Words per minute
filler_count (int): Number of filler words ("um", "uh", etc.)
long_pause_count (int): Number of pauses longer than 1 second
pitch_variation (float): Standard deviation of pitch in semitones
Returns:
float: SRS score between 0-100
Requires:
- Words per Minute Consistency: Regularity in speech speed.
- Absence of Sudden Speed Shifts: Smooth transitions without erratic tempo changes.
"""
ideal_wpm = 150
wpm_deviation = min(30, abs(wpm - ideal_wpm)) # Cap at 30 WPM deviation
wpm_consistency = max(0, 100 - (wpm_deviation * 1.67)) # 100-50 for max deviation
# Sudden Speech Shift Penalty
filler_penalty = min(filler_count / 10, 1.0)
pause_penalty = min(long_pause_count / 5, 1.0)
pitch_penalty = min(pitch_variation / 3.0, 1.0) # High variation → unstable
# Combine into absence of sudden shifts
stability = (1 - ((filler_penalty + pause_penalty + pitch_penalty) / 3)) * 100
# Final SRS Score
SRS = (0.45 * wpm_consistency) + (0.55 * stability)
return min(100, max(0, SRS))
def calculate_pas(transcript: str, segments: List[Dict], filler_count: int, duration: float) -> Dict[str, float]:
"""
Calculate the Pause Appropriateness Score (PAS) and its components.
Args:
transcript (str): Full transcript text
segments (List[Dict]): List of transcript segments with start/end times
filler_count (int): Number of filler words detected
duration (float): Total duration of audio in seconds
Returns:
Dict[str, float]: Dictionary with NPP, AFW, and PAS scores
"""
if not transcript or not segments or duration <= 0:
raise ValueError("Transcript, segments, and duration must be valid")
nlp = spacy.load("en_core_web_sm")
doc = nlp(transcript)
words = transcript.split()
total_words = len(words)
if total_words == 0:
raise ValueError("No words found in transcript")
# Calculate Avoidance of Filler Words (AFW)
filler_rate = filler_count / total_words if total_words > 0 else 0.0
if filler_rate >= 0.10:
afw = 0.0
elif filler_rate <= 0.0:
afw = 100.0
else:
afw = 100.0 - (filler_rate * 1000)
afw = max(0.0, min(100.0, afw))
# Calculate Natural Pause Placement (NPP)
total_pauses = 0
natural_pauses = 0
segment_texts = [seg["text"].strip() for seg in segments]
segment_starts = [seg["start"] for seg in segments]
segment_ends = [seg["end"] for seg in segments]
for i in range(len(segments) - 1):
pause_dur = segment_starts[i + 1] - segment_ends[i]
if pause_dur > 0.5:
total_pauses += 1
if segment_texts[i] and segment_texts[i][-1] in ".!?,":
natural_pauses += 1
# Check initial and final pauses
if segment_starts[0] > 0.5:
total_pauses += 1
if duration - segment_ends[-1] > 0.5:
total_pauses += 1
if segment_texts[-1] and segment_texts[-1][-1] in ".!?":
natural_pauses += 1
npp = 100.0 if total_pauses == 0 else (natural_pauses / total_pauses) * 100.0
# Calculate final PAS
pas = (0.4 * npp) + (0.6 * afw)
return {
"NPP": npp,
"AFW": afw,
"PAS": pas
}
def calculate_fluency(srs: float, pas: float) -> Dict[str, float]:
"""
Calculate fluency score based on Speech Rate Stability and Pause Appropriateness Score.
Args:
srs (float): Speech Rate Stability score (0-100)
pas (float): Pause Appropriateness Score (0-100)
Returns:
Dict[str, float]: Dictionary with fluency score (0-100) and component contributions
"""
# Equal weighting of SRS and PAS for fluency
fluency_score = (0.5 * srs) + (0.5 * pas)
return {
"score": fluency_score,
"SRS_contribution": 0.5 * srs,
"PAS_contribution": 0.5 * pas
}
def get_fluency_insight(fluency_score: float) -> str:
"""
Generate insight text based on the fluency score.
Args:
fluency_score (float): The calculated fluency score (0-100)
Returns:
str: Insight text explaining the score
"""
if fluency_score >= 85:
return "Excellent fluency with very consistent pacing and natural pauses. Speech flows effortlessly."
elif fluency_score >= 70:
return "Good fluency with generally stable speech rate and appropriate pauses. Some minor inconsistencies."
elif fluency_score >= 50:
return "Moderate fluency with occasional disruptions in speech flow. Consider working on pace stability and pause placement."
elif fluency_score >= 30:
return "Below average fluency with noticeable disruptions. Focus on reducing filler words and maintaining consistent pace."
else:
return "Speech fluency needs significant improvement. Work on maintaining consistent pace, reducing long pauses, and eliminating filler words." |