Spaces:

cruvss
/

Fast_api

Running

File size: 2,177 Bytes

8ad2ab3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
aef3b1e
8ad2ab3

"""
Compute Voice Clarity Score from audio file
"""

import librosa
import numpy as np
from typing import Dict, Any
from .vcs import calculate_voice_clarity_score, get_clarity_insight

def compute_voice_clarity_score(file_path: str, whisper_model) -> Dict[str, Any]:
    """
    Compute Voice Clarity Score and its components from a speech sample.

    Args:
        file_path (str): Path to the audio file.
        whisper_model: Transcription model (e.g., OpenAI Whisper or faster-whisper)

    Returns:
        dict: A dictionary containing Voice Clarity Score and component scores.
    """
    # Transcribe audio
    result = whisper_model.transcribe(file_path, word_timestamps=False, fp16=False)
    transcript = result.get("text", "").strip()
    segments = result.get("segments", [])

    # Validate early
    if not transcript or not segments:
        raise ValueError("Empty transcript or segments from Whisper.")

    # Load audio
    y, sr = librosa.load(file_path, sr=None)
    duration = len(y) / sr if sr else 0.0
    if duration <= 0:
        raise ValueError("Audio duration invalid or zero.")
    
    # Calculate Voice Clarity Score
    clarity_result = calculate_voice_clarity_score(y, sr, segments)
    
    
    # Add word count and duration info for reference
    word_count = len(transcript.split())
    clarity_result["components"]["word_count"] = word_count
    clarity_result["components"]["duration"] = duration
    
    return clarity_result

def analyze_voice_quality(file_path: str, whisper_model) -> Dict[str, Any]:
    """
    Comprehensive voice quality analysis including clarity.
    
    Args:
        file_path (str): Path to the audio file
        whisper_model: Transcription model
        
    Returns:
        Dict[str, Any]: Complete voice quality analysis
    """
    # Get Voice Clarity Score
    clarity_results = compute_voice_clarity_score(file_path, whisper_model)
    vcs = clarity_results["VCS"]
    
    
    # Add to results
    combined_results = {
        "VCS": vcs,
    }
    
    return combined_results

# Ensure the functions are exposed when imported
__all__ = ['compute_voice_clarity_score', 'analyze_voice_quality']