nahue-passano
update: added lenght fixing in short utterances
80f5b87
from typing import Tuple, List
from pathlib import Path
import numpy as np
import soundfile as sf
import pandas as pd
from utils.text import get_utterance_boundaries
def load_audio(audio_path: Path) -> Tuple[np.ndarray, float]:
"""Loads an audio given its path
Parameters
----------
audio_path : Path
Path of the audio file
Returns
-------
Tuple[np.ndarray, float]
Audio array and sample rate
"""
audio_array, sample_rate = sf.read(str(audio_path))
return audio_array, sample_rate
def split_audio(
audio_array: np.ndarray, sample_rate: float, timestamp_list: list
) -> List[np.ndarray]:
"""Slices audio_array with timestamps in timestamp_list
Parameters
----------
audio_array : np.ndarray
Array of the audio to be splitted
sample_rate : float
Audio sample rate
timestamp_list : list
List of tuples containing the start and end of each stamp.
Returns
-------
List[np.ndarray]
List of numpy arrays with audio splits
"""
audio_segments = []
for timestamp_i in timestamp_list:
start_sample = round(timestamp_i[0] * sample_rate)
end_sample = round(timestamp_i[1] * sample_rate)
audio_segments.append(audio_array[start_sample:end_sample])
return audio_segments
def save_audio_segments(
destination: Path,
audio_path: Path,
audio_segments: List[np.ndarray],
sample_rate: float,
) -> None:
"""Saves audio segments from audio_segments in destination path.
Parameters
----------
destination : Path
Path were segments will be saved
audio_name : Path
Name of the original audio file
audio_segments : List[np.ndarray]
List containing numpy arrays with the audio segments
sample_rate : float
Sample rate of the original audio file
"""
for i, segment in enumerate(audio_segments):
segment_path = destination / f"{audio_path.stem}-{i}.wav"
sf.write(str(segment_path), segment, sample_rate)
def generate_audio_splits(
audio_path: Path, timestamps_df: pd.DataFrame, destination: Path
) -> None:
"""Splits an audio given its path and timestamps
Parameters
----------
audio_path : Path
Path of the audio
timestamps_df : pd.DataFrame
DataFrame containing start and end of the utterances
destination : Path
Path were segments will be saved.
"""
audio_array, sample_rate = load_audio(audio_path)
timestamp_list = get_utterance_boundaries(timestamps_df)
audio_segments = split_audio(audio_array, sample_rate, timestamp_list)
save_audio_segments(destination, audio_path, audio_segments, sample_rate)