File size: 5,323 Bytes
8ad2ab3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149


import spacy
from typing import List, Dict

def calc_srs(wpm, filler_count, long_pause_count, pitch_variation):
    """
    Speech Rate Stability (SRS): Reflects the consistency of the speaker's pace and rhythm.
    
    Args:
        wpm (float): Words per minute
        filler_count (int): Number of filler words ("um", "uh", etc.)
        long_pause_count (int): Number of pauses longer than 1 second
        pitch_variation (float): Standard deviation of pitch in semitones
        
    Returns:
        float: SRS score between 0-100
    
    Requires:
        - Words per Minute Consistency: Regularity in speech speed.
        - Absence of Sudden Speed Shifts: Smooth transitions without erratic tempo changes.
    """
    ideal_wpm = 150
    wpm_deviation = min(30, abs(wpm - ideal_wpm))  # Cap at 30 WPM deviation
    wpm_consistency = max(0, 100 - (wpm_deviation * 1.67))  # 100-50 for max deviation

    # Sudden Speech Shift Penalty
    filler_penalty = min(filler_count / 10, 1.0)
    pause_penalty = min(long_pause_count / 5, 1.0)
    pitch_penalty = min(pitch_variation / 3.0, 1.0)  # High variation → unstable

    # Combine into absence of sudden shifts
    stability = (1 - ((filler_penalty + pause_penalty + pitch_penalty) / 3)) * 100

    # Final SRS Score
    SRS = (0.45 * wpm_consistency) + (0.55 * stability)
    return min(100, max(0, SRS))


def calculate_pas(transcript: str, segments: List[Dict], filler_count: int, duration: float) -> Dict[str, float]:
    """
    Calculate the Pause Appropriateness Score (PAS) and its components.
    
    Args:
        transcript (str): Full transcript text
        segments (List[Dict]): List of transcript segments with start/end times
        filler_count (int): Number of filler words detected
        duration (float): Total duration of audio in seconds
        
    Returns:
        Dict[str, float]: Dictionary with NPP, AFW, and PAS scores
    """
    if not transcript or not segments or duration <= 0:
        raise ValueError("Transcript, segments, and duration must be valid")
    
    nlp = spacy.load("en_core_web_sm")
    doc = nlp(transcript)
    
    words = transcript.split()
    total_words = len(words)
    if total_words == 0:
        raise ValueError("No words found in transcript")
    
    # Calculate Avoidance of Filler Words (AFW)
    filler_rate = filler_count / total_words if total_words > 0 else 0.0
    if filler_rate >= 0.10:
        afw = 0.0
    elif filler_rate <= 0.0:
        afw = 100.0
    else:
        afw = 100.0 - (filler_rate * 1000)
    afw = max(0.0, min(100.0, afw))
    
    # Calculate Natural Pause Placement (NPP)
    total_pauses = 0
    natural_pauses = 0
    segment_texts = [seg["text"].strip() for seg in segments]
    segment_starts = [seg["start"] for seg in segments]
    segment_ends = [seg["end"] for seg in segments]
    
    for i in range(len(segments) - 1):
        pause_dur = segment_starts[i + 1] - segment_ends[i]
        if pause_dur > 0.5:
            total_pauses += 1
            if segment_texts[i] and segment_texts[i][-1] in ".!?,": 
                natural_pauses += 1
    
    # Check initial and final pauses
    if segment_starts[0] > 0.5:
        total_pauses += 1
    if duration - segment_ends[-1] > 0.5:
        total_pauses += 1
        if segment_texts[-1] and segment_texts[-1][-1] in ".!?":
            natural_pauses += 1
    
    npp = 100.0 if total_pauses == 0 else (natural_pauses / total_pauses) * 100.0
    
    # Calculate final PAS
    pas = (0.4 * npp) + (0.6 * afw)
    
    return {
        "NPP": npp,
        "AFW": afw,
        "PAS": pas
    }


def calculate_fluency(srs: float, pas: float) -> Dict[str, float]:
    """
    Calculate fluency score based on Speech Rate Stability and Pause Appropriateness Score.
    
    Args:
        srs (float): Speech Rate Stability score (0-100)
        pas (float): Pause Appropriateness Score (0-100)
        
    Returns:
        Dict[str, float]: Dictionary with fluency score (0-100) and component contributions
    """
    # Equal weighting of SRS and PAS for fluency
    fluency_score = (0.5 * srs) + (0.5 * pas)
    
    
    return {
        "score": fluency_score,
        "SRS_contribution": 0.5 * srs,
        "PAS_contribution": 0.5 * pas
    }


def get_fluency_insight(fluency_score: float) -> str:
    """
    Generate insight text based on the fluency score.
    
    Args:
        fluency_score (float): The calculated fluency score (0-100)
        
    Returns:
        str: Insight text explaining the score
    """
    if fluency_score >= 85:
        return "Excellent fluency with very consistent pacing and natural pauses. Speech flows effortlessly."
    elif fluency_score >= 70:
        return "Good fluency with generally stable speech rate and appropriate pauses. Some minor inconsistencies."
    elif fluency_score >= 50:
        return "Moderate fluency with occasional disruptions in speech flow. Consider working on pace stability and pause placement."
    elif fluency_score >= 30:
        return "Below average fluency with noticeable disruptions. Focus on reducing filler words and maintaining consistent pace."
    else:
        return "Speech fluency needs significant improvement. Work on maintaining consistent pace, reducing long pauses, and eliminating filler words."