Spaces:

ylacombe
/

speech-explorer

Running on Zero

App Files Files Community

speech-explorer / dataspeech /gpu_enrichments /pitch.py

ylacombe HF staff

Upload 17 files

db36668 verified 3 months ago

raw

history blame

No virus

2.35 kB

	import torch
	import penn


	# Here we'll use a 10 millisecond hopsize
	hopsize = .01

	# Provide a sensible frequency range given your domain and model
	fmin = 30.
	fmax = 1000.

	# Select a checkpoint to use for inference. Selecting None will
	# download and use FCNF0++ pretrained on MDB-stem-synth and PTDB
	checkpoint = None

	# Centers frames at hopsize / 2, 3 * hopsize / 2, 5 * hopsize / 2, ...
	center = 'half-hop'

	# (Optional) Linearly interpolate unvoiced regions below periodicity threshold
	interp_unvoiced_at = .065


	def pitch_apply(batch, rank=None, audio_column_name="audio", output_column_name="utterance_pitch", penn_batch_size=4096):
	if isinstance(batch[audio_column_name], list):
	utterance_pitch_mean = []
	utterance_pitch_std = []
	for sample in batch[audio_column_name]:
	# Infer pitch and periodicity
	pitch, periodicity = penn.from_audio(
	torch.tensor(sample["array"][None, :]).float(),
	sample["sampling_rate"],
	hopsize=hopsize,
	fmin=fmin,
	fmax=fmax,
	checkpoint=checkpoint,
	batch_size=penn_batch_size,
	center=center,
	interp_unvoiced_at=interp_unvoiced_at,
	gpu=(rank or 0)% torch.cuda.device_count() if rank else rank
	)

	utterance_pitch_mean.append(pitch.mean().cpu())
	utterance_pitch_std.append(pitch.std().cpu())

	batch[f"{output_column_name}_mean"] = utterance_pitch_mean
	batch[f"{output_column_name}_std"] = utterance_pitch_std
	else:
	sample = batch[audio_column_name]
	pitch, periodicity = penn.from_audio(
	torch.tensor(sample["array"][None, :]).float(),
	sample["sampling_rate"],
	hopsize=hopsize,
	fmin=fmin,
	fmax=fmax,
	checkpoint=checkpoint,
	batch_size=penn_batch_size,
	center=center,
	interp_unvoiced_at=interp_unvoiced_at,
	gpu=(rank or 0)% torch.cuda.device_count() if rank else rank
	)
	batch[f"{output_column_name}_mean"] = pitch.mean().cpu()
	batch[f"{output_column_name}_std"] = pitch.std().cpu()

	return batch