Spaces:

ktangri
/

call-sentiment

Runtime error

File size: 1,808 Bytes

a263f35
ea52814
c150302
a263f35
eb6ba59
a263f35
7db1162
ea52814
 
c150302
a263f35
eb6ba59
 
a263f35
02be93f
a263f35
 
 
c150302
 
 
 
 
4f3fae8
c150302
 
 
 
 
 
 
 
 
 
 
4f3fae8
a263f35
 
 
c150302
 
1dc5584
a263f35
 
 
 
 
 
1dc5584
 
a263f35

import gradio as gr
from transformers import pipeline, Wav2Vec2ProcessorWithLM
from pyannote.audio import Pipeline
from librosa import load, resample
from rpunct import RestorePuncts

asr_model = 'patrickvonplaten/wav2vec2-base-960h-4-gram'
processor = Wav2Vec2ProcessorWithLM.from_pretrained(asr_model)
asr = pipeline('automatic-speech-recognition', model=asr_model, tokenizer=processor.tokenizer, feature_extractor=processor.feature_extractor, decoder=processor.decoder)
speaker_segmentation = Pipeline.from_pretrained("pyannote/speaker-segmentation")

rpunct = RestorePuncts()

def transcribe(filepath):
	speaker_output = speaker_segmentation(filepath)
	speech, sampling_rate = load(filepath)
	if sampling_rate != 16000:
		speech = resample(speech, sampling_rate, 16000)
	text = asr(speech, return_timestamps="word")

	full_text = text['text'].lower()
	chunks = text['chunks']

	diarized_output = ""
	i = 0
	for turn, _, speaker in speaker_output.itertracks(yield_label=True):
		diarized = ""
		while i < len(chunks) and chunks[i]['timestamp'][1] <= turn.end:
			diarized += chunks[i]['text'].lower() + ' '
			i += 1

		if diarized != "":
			diarized = rpunct.punctuate(diarized)
			diarized_output += "{}: ''{}'' from {:.3f}-{:.3f}\n".format(speaker,diarized,turn.start,turn.end)

	return diarized_output, full_text

mic = gr.inputs.Audio(source='microphone', type='filepath', label='Speech input', optional=False)

diarized_transcript = gr.outputs.Textbox(type='auto', label='Diarized Output')
full_transcript = gr.outputs.Textbox(type='auto', label='Full Transcript')
examples = [["meeting_audio.wav"]]

iface = gr.Interface(
	theme='huggingface',
	description='Testing transcription',
	fn=transcribe,
	inputs=[mic],
	outputs=[diarized_transcript, full_transcript],
	examples=examples
)
iface.launch()