Spaces:
Runtime error
Runtime error
File size: 2,732 Bytes
7405904 80f5b87 7405904 80f5b87 7405904 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 |
from typing import Tuple, List
from pathlib import Path
import numpy as np
import soundfile as sf
import pandas as pd
from utils.text import get_utterance_boundaries
def load_audio(audio_path: Path) -> Tuple[np.ndarray, float]:
"""Loads an audio given its path
Parameters
----------
audio_path : Path
Path of the audio file
Returns
-------
Tuple[np.ndarray, float]
Audio array and sample rate
"""
audio_array, sample_rate = sf.read(str(audio_path))
return audio_array, sample_rate
def split_audio(
audio_array: np.ndarray, sample_rate: float, timestamp_list: list
) -> List[np.ndarray]:
"""Slices audio_array with timestamps in timestamp_list
Parameters
----------
audio_array : np.ndarray
Array of the audio to be splitted
sample_rate : float
Audio sample rate
timestamp_list : list
List of tuples containing the start and end of each stamp.
Returns
-------
List[np.ndarray]
List of numpy arrays with audio splits
"""
audio_segments = []
for timestamp_i in timestamp_list:
start_sample = round(timestamp_i[0] * sample_rate)
end_sample = round(timestamp_i[1] * sample_rate)
audio_segments.append(audio_array[start_sample:end_sample])
return audio_segments
def save_audio_segments(
destination: Path,
audio_path: Path,
audio_segments: List[np.ndarray],
sample_rate: float,
) -> None:
"""Saves audio segments from audio_segments in destination path.
Parameters
----------
destination : Path
Path were segments will be saved
audio_name : Path
Name of the original audio file
audio_segments : List[np.ndarray]
List containing numpy arrays with the audio segments
sample_rate : float
Sample rate of the original audio file
"""
for i, segment in enumerate(audio_segments):
segment_path = destination / f"{audio_path.stem}-{i}.wav"
sf.write(str(segment_path), segment, sample_rate)
def generate_audio_splits(
audio_path: Path, timestamps_df: pd.DataFrame, destination: Path
) -> None:
"""Splits an audio given its path and timestamps
Parameters
----------
audio_path : Path
Path of the audio
timestamps_df : pd.DataFrame
DataFrame containing start and end of the utterances
destination : Path
Path were segments will be saved.
"""
audio_array, sample_rate = load_audio(audio_path)
timestamp_list = get_utterance_boundaries(timestamps_df)
audio_segments = split_audio(audio_array, sample_rate, timestamp_list)
save_audio_segments(destination, audio_path, audio_segments, sample_rate)
|