Spaces:

pyannote
/

pretrained-pipelines

Sleeping

File size: 2,725 Bytes

ebc74bd

from huggingface_hub import HfApi
import matplotlib.pyplot as plt
import streamlit as st
from pyannote.audio import Pipeline
from pyannote.audio import Audio
from pyannote.core import notebook, Segment
import io
import base64

from matplotlib.backends.backend_agg import RendererAgg

_lock = RendererAgg.lock

PYANNOTE_LOGO = "https://avatars.githubusercontent.com/u/7559051?s=400&v=4"
EXCERPT = 30.0

st.set_page_config(
    page_title="pyannote.audio pretrained pipelines",
    page_icon=PYANNOTE_LOGO)

st.sidebar.image(PYANNOTE_LOGO)

st.markdown(
    f"""
# 🎹 Pretrained pipelines

Upload an audio file and the first {EXCERPT:g} seconds will be processed automatically.
"""
)

PIPELINES = [p.modelId for p in HfApi().list_models(filter="pyannote-audio-pipeline") if p.modelId.startswith("pyannote/")]

audio = Audio(sample_rate=16000, mono=True)

selected_pipeline = st.selectbox("", PIPELINES, index=0)

with st.spinner('Loading pipeline...'):
    pipeline = Pipeline.from_pretrained(selected_pipeline)

uploaded_file = st.file_uploader("")
if uploaded_file is not None:

    try:
        duration = audio.get_duration(uploaded_file)
    except RuntimeError as e:
        st.error(e)
        st.stop()
    waveform, sample_rate = audio.crop(uploaded_file, Segment(0, min(duration, EXCERPT)))
    file = {"waveform": waveform, "sample_rate": sample_rate, "uri": uploaded_file.name}

    with st.spinner('Running pipeline...'):
        output = pipeline(file)

    with _lock:

        notebook.reset()
        notebook.crop = Segment(0, min(duration, EXCERPT))

        fig, ax = plt.subplots(nrows=1, ncols=1)
        fig.set_figwidth(12)
        fig.set_figheight(2.0)
        notebook.plot_annotation(output, ax=ax, time=True, legend=True)

        plt.tight_layout()
        st.pyplot(fig=fig, clear_figure=True)
        plt.close(fig)

    with io.StringIO() as fp:
        output.write_rttm(fp)
        content = fp.getvalue()

        b64 = base64.b64encode(content.encode()).decode()
        href = f'<a download="{output.uri}.rttm" href="data:file/text;base64,{b64}">Download as RTTM</a>'
        st.markdown(href, unsafe_allow_html=True)


st.sidebar.markdown(
    """
-------------------

To use these pipelines on more and longer files on your own (GPU, hence much faster) servers, check the [documentation](https://github.com/pyannote/pyannote-audio).  

For [technical questions](https://github.com/pyannote/pyannote-audio/discussions) and [bug reports](https://github.com/pyannote/pyannote-audio/issues), please check [pyannote.audio](https://github.com/pyannote/pyannote-audio) Github repository.

For commercial enquiries and scientific consulting, please contact [me](mailto:herve@niderb.fr).
"""
)