File size: 2,177 Bytes
8ad2ab3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
aef3b1e
8ad2ab3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
"""
Compute Voice Clarity Score from audio file
"""

import librosa
import numpy as np
from typing import Dict, Any
from .vcs import calculate_voice_clarity_score, get_clarity_insight

def compute_voice_clarity_score(file_path: str, whisper_model) -> Dict[str, Any]:
    """
    Compute Voice Clarity Score and its components from a speech sample.

    Args:
        file_path (str): Path to the audio file.
        whisper_model: Transcription model (e.g., OpenAI Whisper or faster-whisper)

    Returns:
        dict: A dictionary containing Voice Clarity Score and component scores.
    """
    # Transcribe audio
    result = whisper_model.transcribe(file_path, word_timestamps=False, fp16=False)
    transcript = result.get("text", "").strip()
    segments = result.get("segments", [])

    # Validate early
    if not transcript or not segments:
        raise ValueError("Empty transcript or segments from Whisper.")

    # Load audio
    y, sr = librosa.load(file_path, sr=None)
    duration = len(y) / sr if sr else 0.0
    if duration <= 0:
        raise ValueError("Audio duration invalid or zero.")
    
    # Calculate Voice Clarity Score
    clarity_result = calculate_voice_clarity_score(y, sr, segments)
    
    
    # Add word count and duration info for reference
    word_count = len(transcript.split())
    clarity_result["components"]["word_count"] = word_count
    clarity_result["components"]["duration"] = duration
    
    return clarity_result

def analyze_voice_quality(file_path: str, whisper_model) -> Dict[str, Any]:
    """
    Comprehensive voice quality analysis including clarity.
    
    Args:
        file_path (str): Path to the audio file
        whisper_model: Transcription model
        
    Returns:
        Dict[str, Any]: Complete voice quality analysis
    """
    # Get Voice Clarity Score
    clarity_results = compute_voice_clarity_score(file_path, whisper_model)
    vcs = clarity_results["VCS"]
    
    
    # Add to results
    combined_results = {
        "VCS": vcs,
    }
    
    return combined_results

# Ensure the functions are exposed when imported
__all__ = ['compute_voice_clarity_score', 'analyze_voice_quality']