Spaces:

ModIA
/

FrenchDroneKeyword

Runtime error

App Files Files Community

FrenchDroneKeyword / dataloading.py

blanchon

to python3.9

4b3147f over 3 years ago

raw

history blame contribute delete

4.77 kB

	import numpy as np
	import pandas as pd

	import librosa

	from pathlib import Path
	from typing import Callable, Literal, Optional

	def load_dataset(
	paths: list,
	remove_label: list = [""],
	sr: int = 22050,
	method = "fix_length",
	max_time: float = 4.0):
	"""Folder dataset in memory loader (return fully loaded pandas dataframe).
	- For sklearn, load the whole dataset if possible otherwise use `proportion` to only load a part of the dataset.
	- For pytorch, load the whole dataset if possible otherwise use `proportion` to only load a part of the dataset.
	And convert output to Tensor on the fly.

	Use `to_numpy(df.y)` to extract a numpy matrix with a (n_row, ...) shape.

	Expect a dataset folder structure as: paths = [paths1, paths2, ...]
	- paths1
	- sub1
	- blabla_GroundTruth1.wav
	- blabla_GroundTruth2.wav
	- sub2
	- ...
	...
	- ...

	Args:
	paths (list[Path]): list of dataset directory to parse.
	remove_label (list, optional): list of label to remove. Defaults to None.. Defaults to [""].
	shuffle (bool, optional): True to suffle the dataframe. Defaults to True.
	proportion (float, optional): Proportion of file to load. Defaults to 1.0.
	sr (int, optional): Sample Rate to resample audio file. Defaults to 22050.
	method (Literal['fix_length';, 'time_stretch'], optional): uniformization method to apply. Defaults to "fix_length".
	max_time (float, optional): Common audio duration . Defaults to 4.0.

	Returns:
	df (pd.DataFrame): A pd.DataFrame with such define column:
	- absolute_path (str): file-system absolute path of the .wav file.
	- labels (list): list of labels defining the sound file (ie, subdirectories and post _ filename).
	- ground_truth (str): ground_truth label meaning the last one after _ in the sound filename.
	- y_original_signal (np.ndarray): sound signal normalize as `float64` and resample with the given sr by `librosa.load`
	- y_original_duration (float): y_original_signal signal duration.
	- y_uniform (np.ndarray): uniformized sound signal compute from y_original_signal using the chosen uniform method.
	uniform_transform (Callable[[np.ndarray, int], np.ndarray]]): A lambda function to uniformized an audio signal as the same in df.
	"""
	data = []
	uniform_transform = lambda y, sr: uniformize(y, sr, method, max_time)
	for path in paths:
	path = Path(path)
	for wav_file in path.rglob("*.wav"):
	wav_file_dict = dict()
	absolute_path = wav_file.absolute()
	*labels, label = absolute_path.relative_to(path.absolute()).parts
	label = label.replace(".wav", "").split("_")
	labels.extend(label)
	ground_truth = labels[-1]
	if ground_truth not in remove_label:
	y_original, sr = librosa.load(path=absolute_path, sr=sr)
	# WARINING : Convert the sampling rate to 22.05 KHz,
	# normalize the bit depth between -1 and 1 and convert stereo to mono
	wav_file_dict["absolute_path"] = absolute_path
	wav_file_dict["labels"] = labels
	wav_file_dict["ground_truth"] = ground_truth
	## Save original sound signal
	wav_file_dict["y_original_signal"] = y_original
	duration = librosa.get_duration(y=y_original, sr=sr)
	wav_file_dict["y_original_duration"] = duration
	## Save uniformized sound signal
	wav_file_dict["y_uniform"] = uniform_transform(y_original, sr)
	data.append(wav_file_dict)
	df = pd.DataFrame(data)
	return df, uniform_transform

	def uniformize(
	audio: np.ndarray,
	sr: int,
	method = "fix_length",
	max_time: float = 4.0
	):
	if method == "fix_length":
	return librosa.util.fix_length(audio, size=int(np.ceil(max_time*sr)))
	elif method == "time_stretch":
	duration = librosa.get_duration(y=audio, sr=sr)
	return librosa.effects.time_stretch(audio, rate=duration/max_time)


	def to_numpy(ds: pd.Series) -> np.ndarray:
	"""Transform a pd.Series (ie columns slice) in a numpy array with the shape (n_row, cell_array.flatten()).

	Args:
	df (pd.Series): Columns to transform in numpy.

	Returns:
	np.ndarray: resulting np.array from the ds pd.Series.
	"""
	numpy_df = np.stack([*ds.to_numpy()])
	C, *o = numpy_df.shape

	if o:
	return numpy_df.reshape(numpy_df.shape[0], np.prod(o))
	else:
	return numpy_df.reshape(numpy_df.shape[0])