Spaces:

pustozerov
/

poc_call_transcription

Build error

App Files Files Community

poc_call_transcription / modules /diarization /nemo_diarization.py

pustozerov

Temporary remove samples.

02dca0a over 2 years ago

raw

history blame

3.29 kB

	import os

	from nemo.collections.asr.parts.utils.decoder_timestamps_utils import ASR_TIMESTAMPS
	from nemo.collections.asr.parts.utils.diarization_utils import ASR_DIAR_OFFLINE
	from omegaconf import OmegaConf
	from pyannote.audio import Pipeline

	ROOT = os.getcwd()
	MODEL_CONFIG = "info/configs/offline_diarization_asr.yaml"
	data_dir = os.path.join(ROOT, 'info/configs/')
	os.makedirs(data_dir, exist_ok=True)
	output_dir = os.path.join(ROOT, 'info/transcripts/')
	os.makedirs(output_dir, exist_ok=True)


	def diarization(file_path):
	# Create a manifest for input with below format.
	# {'audio_filepath': /path/to/audio_file, 'offset': 0, 'duration':None, 'label': 'infer', 'text': '-',
	# 'num_speakers': None, 'rttm_filepath': /path/to/rttm/file, 'uem_filepath'='/path/to/uem/filepath'}
	import json
	meta = {
	'audio_filepath': file_path,
	'offset': 0,
	'duration': None,
	'label': 'infer',
	'text': '-',
	'num_speakers': 2,
	'rttm_filepath': None,
	'uem_filepath': None
	}
	with open(os.path.join(data_dir, 'manifests/', 'input_manifest.json'), 'w') as fp:
	json.dump(meta, fp)
	fp.write('\n')

	# Make a manifest with an external VAD
	pipeline = Pipeline.from_pretrained("pyannote/voice-activity-detection")
	output = pipeline(file_path)
	initial_json = output.for_json()
	keys = ("audio_filepath", "offset", "duration", "label")
	output_json = []
	for segment in initial_json["content"]:
	vad_json = dict.fromkeys(keys)
	vad_json["audio_filepath"] = file_path
	vad_json["offset"] = segment["segment"]["start"]
	vad_json["duration"] = segment["segment"]["end"] - segment["segment"]["start"]
	vad_json["label"] = "SPEECH"
	vad_json["uniq_id"] = initial_json["uri"]
	output_json.append(vad_json)
	with open(os.path.join(data_dir, 'manifests/', 'external_vad_manifest.json'), 'w') as f:
	for item in output_json:
	f.write(str(item).replace("'", '"') + '\n')

	config2 = OmegaConf.load(MODEL_CONFIG)
	config2.diarizer.asr.model_path = 'QuartzNet15x5Base-En'
	config2.diarizer.manifest_filepath = \
	os.path.join(data_dir, 'manifests/', 'input_manifest.json')
	config2.diarizer.speaker_embeddings.model_path = 'titanet_large'
	config2.diarizer.vad.external_vad_manifest = \
	os.path.join(data_dir, 'manifests/', 'external_vad_manifest.json')
	config2.diarizer.out_dir = output_dir
	config2.num_workers = 0
	asr_ts_decoder = ASR_TIMESTAMPS(**config2.diarizer)
	asr_model = asr_ts_decoder.set_asr_model()
	word_hyp, word_ts_hyp = asr_ts_decoder.run_ASR(asr_model)
	print(word_hyp)
	print(word_ts_hyp)

	asr_diar_offline = ASR_DIAR_OFFLINE(**config2.diarizer)
	asr_diar_offline.word_ts_anchor_offset = asr_ts_decoder.word_ts_anchor_offset
	diar_hyp, diar_score = asr_diar_offline.run_diarization(config2, word_ts_hyp)
	print("Diarization hypothesis output: \n", diar_hyp)
	result = asr_diar_offline.get_transcript_with_speaker_labels(diar_hyp, word_hyp, word_ts_hyp)
	file_to_show = os.path.join(data_dir, 'transcripts/pred_rttms/', file_path.split('/')[-1].split(".")[0], '.txt')
	print(file_to_show)
	print(diar_hyp)
	return result