Spaces:

nahue-passano
/

librispeech-corpus-generator

Runtime error

App Files Files Community

librispeech-corpus-generator / utils /audio.py

nahue-passano

update: added lenght fixing in short utterances

80f5b87 about 1 year ago

raw

history blame contribute delete

No virus

2.73 kB

	from typing import Tuple, List
	from pathlib import Path
	import numpy as np
	import soundfile as sf
	import pandas as pd

	from utils.text import get_utterance_boundaries


	def load_audio(audio_path: Path) -> Tuple[np.ndarray, float]:
	"""Loads an audio given its path

	Parameters
	----------
	audio_path : Path
	Path of the audio file

	Returns
	-------
	Tuple[np.ndarray, float]
	Audio array and sample rate
	"""
	audio_array, sample_rate = sf.read(str(audio_path))
	return audio_array, sample_rate


	def split_audio(
	audio_array: np.ndarray, sample_rate: float, timestamp_list: list
	) -> List[np.ndarray]:
	"""Slices audio_array with timestamps in timestamp_list

	Parameters
	----------
	audio_array : np.ndarray
	Array of the audio to be splitted
	sample_rate : float
	Audio sample rate
	timestamp_list : list
	List of tuples containing the start and end of each stamp.

	Returns
	-------
	List[np.ndarray]
	List of numpy arrays with audio splits
	"""
	audio_segments = []
	for timestamp_i in timestamp_list:
	start_sample = round(timestamp_i[0] * sample_rate)
	end_sample = round(timestamp_i[1] * sample_rate)
	audio_segments.append(audio_array[start_sample:end_sample])

	return audio_segments


	def save_audio_segments(
	destination: Path,
	audio_path: Path,
	audio_segments: List[np.ndarray],
	sample_rate: float,
	) -> None:
	"""Saves audio segments from audio_segments in destination path.

	Parameters
	----------
	destination : Path
	Path were segments will be saved
	audio_name : Path
	Name of the original audio file
	audio_segments : List[np.ndarray]
	List containing numpy arrays with the audio segments
	sample_rate : float
	Sample rate of the original audio file
	"""
	for i, segment in enumerate(audio_segments):
	segment_path = destination / f"{audio_path.stem}-{i}.wav"
	sf.write(str(segment_path), segment, sample_rate)


	def generate_audio_splits(
	audio_path: Path, timestamps_df: pd.DataFrame, destination: Path
	) -> None:
	"""Splits an audio given its path and timestamps

	Parameters
	----------
	audio_path : Path
	Path of the audio
	timestamps_df : pd.DataFrame
	DataFrame containing start and end of the utterances
	destination : Path
	Path were segments will be saved.
	"""
	audio_array, sample_rate = load_audio(audio_path)
	timestamp_list = get_utterance_boundaries(timestamps_df)
	audio_segments = split_audio(audio_array, sample_rate, timestamp_list)
	save_audio_segments(destination, audio_path, audio_segments, sample_rate)