from typing import Tuple, List from pathlib import Path import numpy as np import soundfile as sf import pandas as pd from utils.text import get_utterance_boundaries def load_audio(audio_path: Path) -> Tuple[np.ndarray, float]: """Loads an audio given its path Parameters ---------- audio_path : Path Path of the audio file Returns ------- Tuple[np.ndarray, float] Audio array and sample rate """ audio_array, sample_rate = sf.read(str(audio_path)) return audio_array, sample_rate def split_audio( audio_array: np.ndarray, sample_rate: float, timestamp_list: list ) -> List[np.ndarray]: """Slices audio_array with timestamps in timestamp_list Parameters ---------- audio_array : np.ndarray Array of the audio to be splitted sample_rate : float Audio sample rate timestamp_list : list List of tuples containing the start and end of each stamp. Returns ------- List[np.ndarray] List of numpy arrays with audio splits """ audio_segments = [] for timestamp_i in timestamp_list: start_sample = round(timestamp_i[0] * sample_rate) end_sample = round(timestamp_i[1] * sample_rate) audio_segments.append(audio_array[start_sample:end_sample]) return audio_segments def save_audio_segments( destination: Path, audio_path: Path, audio_segments: List[np.ndarray], sample_rate: float, ) -> None: """Saves audio segments from audio_segments in destination path. Parameters ---------- destination : Path Path were segments will be saved audio_name : Path Name of the original audio file audio_segments : List[np.ndarray] List containing numpy arrays with the audio segments sample_rate : float Sample rate of the original audio file """ for i, segment in enumerate(audio_segments): segment_path = destination / f"{audio_path.stem}-{i}.wav" sf.write(str(segment_path), segment, sample_rate) def generate_audio_splits( audio_path: Path, timestamps_df: pd.DataFrame, destination: Path ) -> None: """Splits an audio given its path and timestamps Parameters ---------- audio_path : Path Path of the audio timestamps_df : pd.DataFrame DataFrame containing start and end of the utterances destination : Path Path were segments will be saved. """ audio_array, sample_rate = load_audio(audio_path) timestamp_list = get_utterance_boundaries(timestamps_df) audio_segments = split_audio(audio_array, sample_rate, timestamp_list) save_audio_segments(destination, audio_path, audio_segments, sample_rate)