pyannote-speaker-diarization-mark-audio-section

Sleeping

pyannote-speaker-diarization-mark-audio-section / app.py

Hervé Bredin

feat: initial import

ebc74bd about 3 years ago

2.73 kB

	from huggingface_hub import HfApi
	import matplotlib.pyplot as plt
	import streamlit as st
	from pyannote.audio import Pipeline
	from pyannote.audio import Audio
	from pyannote.core import notebook, Segment
	import io
	import base64

	from matplotlib.backends.backend_agg import RendererAgg

	_lock = RendererAgg.lock

	PYANNOTE_LOGO = "https://avatars.githubusercontent.com/u/7559051?s=400&v=4"
	EXCERPT = 30.0

	st.set_page_config(
	page_title="pyannote.audio pretrained pipelines",
	page_icon=PYANNOTE_LOGO)

	st.sidebar.image(PYANNOTE_LOGO)

	st.markdown(
	f"""
	# 🎹 Pretrained pipelines

	Upload an audio file and the first {EXCERPT:g} seconds will be processed automatically.
	"""
	)

	PIPELINES = [p.modelId for p in HfApi().list_models(filter="pyannote-audio-pipeline") if p.modelId.startswith("pyannote/")]

	audio = Audio(sample_rate=16000, mono=True)

	selected_pipeline = st.selectbox("", PIPELINES, index=0)

	with st.spinner('Loading pipeline...'):
	pipeline = Pipeline.from_pretrained(selected_pipeline)

	uploaded_file = st.file_uploader("")
	if uploaded_file is not None:

	try:
	duration = audio.get_duration(uploaded_file)
	except RuntimeError as e:
	st.error(e)
	st.stop()
	waveform, sample_rate = audio.crop(uploaded_file, Segment(0, min(duration, EXCERPT)))
	file = {"waveform": waveform, "sample_rate": sample_rate, "uri": uploaded_file.name}

	with st.spinner('Running pipeline...'):
	output = pipeline(file)

	with _lock:

	notebook.reset()
	notebook.crop = Segment(0, min(duration, EXCERPT))

	fig, ax = plt.subplots(nrows=1, ncols=1)
	fig.set_figwidth(12)
	fig.set_figheight(2.0)
	notebook.plot_annotation(output, ax=ax, time=True, legend=True)

	plt.tight_layout()
	st.pyplot(fig=fig, clear_figure=True)
	plt.close(fig)

	with io.StringIO() as fp:
	output.write_rttm(fp)
	content = fp.getvalue()

	b64 = base64.b64encode(content.encode()).decode()
	href = f'<a download="{output.uri}.rttm" href="data:file/text;base64,{b64}">Download as RTTM</a>'
	st.markdown(href, unsafe_allow_html=True)


	st.sidebar.markdown(
	"""
	-------------------

	To use these pipelines on more and longer files on your own (GPU, hence much faster) servers, check the [documentation](https://github.com/pyannote/pyannote-audio).

	For [technical questions](https://github.com/pyannote/pyannote-audio/discussions) and [bug reports](https://github.com/pyannote/pyannote-audio/issues), please check [pyannote.audio](https://github.com/pyannote/pyannote-audio) Github repository.

	For commercial enquiries and scientific consulting, please contact [me](mailto:herve@niderb.fr).
	"""
	)