lewtun
/

superb-s3prl-osanseviero__hubert_base-diarization-7f28b8b5

superb speaker-diarization osanseviero/hubert_base

Model card Files Files and versions Metrics Training metrics Community

superb-s3prl-osanseviero__hubert_base-diarization-7f28b8b5 / model.py

lewtun's picture

lewtun HF staff

commit files to HF hub

51fb653 over 2 years ago

raw history blame contribute delete

No virus

1.27 kB

	from s3prl.downstream.runner import Runner
	from typing import Dict
	import torch
	import os


	class PreTrainedModel(Runner):
	def __init__(self, path=""):
	"""
	Initialize downstream model.
	"""
	ckp_file = os.path.join(path, "model.ckpt")
	ckp = torch.load(ckp_file, map_location='cpu')
	ckp["Args"].init_ckpt = ckp_file
	ckp["Args"].mode = "inference"
	ckp["Args"].device = "cpu"

	Runner.__init__(self, ckp["Args"], ckp["Config"])

	def __call__(self, inputs)-> Dict[str, str]:
	"""
	Args:
	inputs (:obj:`np.array`):
	The raw waveform of audio received. By default at 16KHz.
	Return:
	A :obj:`dict`:. The object should return a dictionary like
	{"frames": "XXX"} which contains the frames where one, both, or none
	of the speakers are speaking.
	"""
	for entry in self.all_entries:
	entry.model.eval()

	inputs = [torch.FloatTensor(inputs)]

	with torch.no_grad():
	features = self.upstream.model(inputs)
	features = self.featurizer.model(inputs, features)
	preds = self.downstream.model.inference(features, [])
	return {"frames": preds[0]}