Spaces:

cruvss
/

Fast_api

Running

File size: 5,323 Bytes

8ad2ab3



import spacy
from typing import List, Dict

def calc_srs(wpm, filler_count, long_pause_count, pitch_variation):
    """
    Speech Rate Stability (SRS): Reflects the consistency of the speaker's pace and rhythm.
    
    Args:
        wpm (float): Words per minute
        filler_count (int): Number of filler words ("um", "uh", etc.)
        long_pause_count (int): Number of pauses longer than 1 second
        pitch_variation (float): Standard deviation of pitch in semitones
        
    Returns:
        float: SRS score between 0-100
    
    Requires:
        - Words per Minute Consistency: Regularity in speech speed.
        - Absence of Sudden Speed Shifts: Smooth transitions without erratic tempo changes.
    """
    ideal_wpm = 150
    wpm_deviation = min(30, abs(wpm - ideal_wpm))  # Cap at 30 WPM deviation
    wpm_consistency = max(0, 100 - (wpm_deviation * 1.67))  # 100-50 for max deviation

    # Sudden Speech Shift Penalty
    filler_penalty = min(filler_count / 10, 1.0)
    pause_penalty = min(long_pause_count / 5, 1.0)
    pitch_penalty = min(pitch_variation / 3.0, 1.0)  # High variation → unstable

    # Combine into absence of sudden shifts
    stability = (1 - ((filler_penalty + pause_penalty + pitch_penalty) / 3)) * 100

    # Final SRS Score
    SRS = (0.45 * wpm_consistency) + (0.55 * stability)
    return min(100, max(0, SRS))


def calculate_pas(transcript: str, segments: List[Dict], filler_count: int, duration: float) -> Dict[str, float]:
    """
    Calculate the Pause Appropriateness Score (PAS) and its components.
    
    Args:
        transcript (str): Full transcript text
        segments (List[Dict]): List of transcript segments with start/end times
        filler_count (int): Number of filler words detected
        duration (float): Total duration of audio in seconds
        
    Returns:
        Dict[str, float]: Dictionary with NPP, AFW, and PAS scores
    """
    if not transcript or not segments or duration <= 0:
        raise ValueError("Transcript, segments, and duration must be valid")
    
    nlp = spacy.load("en_core_web_sm")
    doc = nlp(transcript)
    
    words = transcript.split()
    total_words = len(words)
    if total_words == 0:
        raise ValueError("No words found in transcript")
    
    # Calculate Avoidance of Filler Words (AFW)
    filler_rate = filler_count / total_words if total_words > 0 else 0.0
    if filler_rate >= 0.10:
        afw = 0.0
    elif filler_rate <= 0.0:
        afw = 100.0
    else:
        afw = 100.0 - (filler_rate * 1000)
    afw = max(0.0, min(100.0, afw))
    
    # Calculate Natural Pause Placement (NPP)
    total_pauses = 0
    natural_pauses = 0
    segment_texts = [seg["text"].strip() for seg in segments]
    segment_starts = [seg["start"] for seg in segments]
    segment_ends = [seg["end"] for seg in segments]
    
    for i in range(len(segments) - 1):
        pause_dur = segment_starts[i + 1] - segment_ends[i]
        if pause_dur > 0.5:
            total_pauses += 1
            if segment_texts[i] and segment_texts[i][-1] in ".!?,": 
                natural_pauses += 1
    
    # Check initial and final pauses
    if segment_starts[0] > 0.5:
        total_pauses += 1
    if duration - segment_ends[-1] > 0.5:
        total_pauses += 1
        if segment_texts[-1] and segment_texts[-1][-1] in ".!?":
            natural_pauses += 1
    
    npp = 100.0 if total_pauses == 0 else (natural_pauses / total_pauses) * 100.0
    
    # Calculate final PAS
    pas = (0.4 * npp) + (0.6 * afw)
    
    return {
        "NPP": npp,
        "AFW": afw,
        "PAS": pas
    }


def calculate_fluency(srs: float, pas: float) -> Dict[str, float]:
    """
    Calculate fluency score based on Speech Rate Stability and Pause Appropriateness Score.
    
    Args:
        srs (float): Speech Rate Stability score (0-100)
        pas (float): Pause Appropriateness Score (0-100)
        
    Returns:
        Dict[str, float]: Dictionary with fluency score (0-100) and component contributions
    """
    # Equal weighting of SRS and PAS for fluency
    fluency_score = (0.5 * srs) + (0.5 * pas)
    
    
    return {
        "score": fluency_score,
        "SRS_contribution": 0.5 * srs,
        "PAS_contribution": 0.5 * pas
    }


def get_fluency_insight(fluency_score: float) -> str:
    """
    Generate insight text based on the fluency score.
    
    Args:
        fluency_score (float): The calculated fluency score (0-100)
        
    Returns:
        str: Insight text explaining the score
    """
    if fluency_score >= 85:
        return "Excellent fluency with very consistent pacing and natural pauses. Speech flows effortlessly."
    elif fluency_score >= 70:
        return "Good fluency with generally stable speech rate and appropriate pauses. Some minor inconsistencies."
    elif fluency_score >= 50:
        return "Moderate fluency with occasional disruptions in speech flow. Consider working on pace stability and pause placement."
    elif fluency_score >= 30:
        return "Below average fluency with noticeable disruptions. Focus on reducing filler words and maintaining consistent pace."
    else:
        return "Speech fluency needs significant improvement. Work on maintaining consistent pace, reducing long pauses, and eliminating filler words."