Spaces:

nahue-passano
/

librispeech-corpus-generator

Runtime error

File size: 2,732 Bytes

from typing import Tuple, List
from pathlib import Path
import numpy as np
import soundfile as sf
import pandas as pd

from utils.text import get_utterance_boundaries


def load_audio(audio_path: Path) -> Tuple[np.ndarray, float]:
    """Loads an audio given its path

    Parameters
    ----------
    audio_path : Path
        Path of the audio file

    Returns
    -------
    Tuple[np.ndarray, float]
        Audio array and sample rate
    """
    audio_array, sample_rate = sf.read(str(audio_path))
    return audio_array, sample_rate


def split_audio(
    audio_array: np.ndarray, sample_rate: float, timestamp_list: list
) -> List[np.ndarray]:
    """Slices audio_array with timestamps in timestamp_list

    Parameters
    ----------
    audio_array : np.ndarray
        Array of the audio to be splitted
    sample_rate : float
        Audio sample rate
    timestamp_list : list
        List of tuples containing the start and end of each stamp.

    Returns
    -------
    List[np.ndarray]
        List of numpy arrays with audio splits
    """
    audio_segments = []
    for timestamp_i in timestamp_list:
        start_sample = round(timestamp_i[0] * sample_rate)
        end_sample = round(timestamp_i[1] * sample_rate)
        audio_segments.append(audio_array[start_sample:end_sample])

    return audio_segments


def save_audio_segments(
    destination: Path,
    audio_path: Path,
    audio_segments: List[np.ndarray],
    sample_rate: float,
) -> None:
    """Saves audio segments from audio_segments in destination path.

    Parameters
    ----------
    destination : Path
        Path were segments will be saved
    audio_name : Path
        Name of the original audio file
    audio_segments : List[np.ndarray]
        List containing numpy arrays with the audio segments
    sample_rate : float
        Sample rate of the original audio file
    """
    for i, segment in enumerate(audio_segments):
        segment_path = destination / f"{audio_path.stem}-{i}.wav"
        sf.write(str(segment_path), segment, sample_rate)


def generate_audio_splits(
    audio_path: Path, timestamps_df: pd.DataFrame, destination: Path
) -> None:
    """Splits an audio given its path and timestamps

    Parameters
    ----------
    audio_path : Path
        Path of the audio
    timestamps_df : pd.DataFrame
        DataFrame containing start and end of the utterances
    destination : Path
        Path were segments will be saved.
    """
    audio_array, sample_rate = load_audio(audio_path)
    timestamp_list = get_utterance_boundaries(timestamps_df)
    audio_segments = split_audio(audio_array, sample_rate, timestamp_list)
    save_audio_segments(destination, audio_path, audio_segments, sample_rate)