Spaces:

pyannote
/

pretrained-pipelines

Running on T4

App Files Files Community

Hervé BREDIN

jpetiot commited on Feb 1, 2022

Commit

c174364

unverified ·

1 Parent(s): ebc74bd

feat: visualize output with wavesurfer.js (#1)

Browse files

Files changed (6) hide show

.gitignore +91 -0
LICENSE +21 -0
app.py +86 -25
assets/style.css +3 -0
assets/template.html +46 -0
requirements.txt +0 -2

.gitignore ADDED Viewed

	@@ -0,0 +1,91 @@

+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+# C extensions
+*.so
+# Distribution / packaging
+.Python
+env/
+.env/
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+*.egg-info/
+.installed.cfg
+*.egg
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*,cover
+.hypothesis/
+# Translations
+*.mo
+*.pot
+# Django stuff:
+*.log
+# Sphinx documentation
+docs/_build/
+# PyBuilder
+target/
+#Ipython Notebook
+.ipynb_checkpoints
+notebooks
+experiments
+*~
+*.npy
+*.pt
+*events.out.tfevents*
+*.csv
+# PyCharm
+.idea/
+gh-pages
+gh-pages.pub
+*.zip
+.mypy_cache/
+.vscode/
+**/lightning_logs/**
+# Version Output
+pyannote/audio/version.py
+# vim
+.vim

LICENSE ADDED Viewed

	@@ -0,0 +1,21 @@

+MIT License
+Copyright (c) 2022 CNRS
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

app.py CHANGED Viewed

@@ -1,22 +1,58 @@
 from huggingface_hub import HfApi
-import matplotlib.pyplot as plt
 import streamlit as st
 from pyannote.audio import Pipeline
 from pyannote.audio import Audio
-from pyannote.core import notebook, Segment
-import io
-import base64
-from matplotlib.backends.backend_agg import RendererAgg
-_lock = RendererAgg.lock
 PYANNOTE_LOGO = "https://avatars.githubusercontent.com/u/7559051?s=400&v=4"
 EXCERPT = 30.0
 st.set_page_config(
-    page_title="pyannote.audio pretrained pipelines",
-    page_icon=PYANNOTE_LOGO)
 st.sidebar.image(PYANNOTE_LOGO)
@@ -28,13 +64,17 @@ Upload an audio file and the first {EXCERPT:g} seconds will be processed automat
 """
 )
-PIPELINES = [p.modelId for p in HfApi().list_models(filter="pyannote-audio-pipeline") if p.modelId.startswith("pyannote/")]
 audio = Audio(sample_rate=16000, mono=True)
 selected_pipeline = st.selectbox("", PIPELINES, index=0)
-with st.spinner('Loading pipeline...'):
     pipeline = Pipeline.from_pretrained(selected_pipeline)
 uploaded_file = st.file_uploader("")
@@ -45,25 +85,46 @@ if uploaded_file is not None:
     except RuntimeError as e:
         st.error(e)
         st.stop()
-    waveform, sample_rate = audio.crop(uploaded_file, Segment(0, min(duration, EXCERPT)))
     file = {"waveform": waveform, "sample_rate": sample_rate, "uri": uploaded_file.name}
-    with st.spinner('Running pipeline...'):
         output = pipeline(file)
-    with _lock:
-        notebook.reset()
-        notebook.crop = Segment(0, min(duration, EXCERPT))
-        fig, ax = plt.subplots(nrows=1, ncols=1)
-        fig.set_figwidth(12)
-        fig.set_figheight(2.0)
-        notebook.plot_annotation(output, ax=ax, time=True, legend=True)
-        plt.tight_layout()
-        st.pyplot(fig=fig, clear_figure=True)
-        plt.close(fig)
     with io.StringIO() as fp:
         output.write_rttm(fp)

+# MIT License
+#
+# Copyright (c) 2022- CNRS
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+import io
+import base64
+import numpy as np
+import scipy.io.wavfile
+from typing import Text
 from huggingface_hub import HfApi
 import streamlit as st
 from pyannote.audio import Pipeline
 from pyannote.audio import Audio
+from pyannote.core import Segment
+import streamlit.components.v1 as components
+def to_base64(waveform: np.ndarray, sample_rate: int = 16000) -> Text:
+    """Convert waveform to base64 data"""
+    waveform /= np.max(np.abs(waveform)) + 1e-8
+    with io.BytesIO() as content:
+        scipy.io.wavfile.write(content, sample_rate, waveform)
+        content.seek(0)
+        b64 = base64.b64encode(content.read()).decode()
+        b64 = f"data:audio/x-wav;base64,{b64}"
+    return b64
 PYANNOTE_LOGO = "https://avatars.githubusercontent.com/u/7559051?s=400&v=4"
 EXCERPT = 30.0
 st.set_page_config(
+    page_title="pyannote.audio pretrained pipelines", page_icon=PYANNOTE_LOGO
+)
 st.sidebar.image(PYANNOTE_LOGO)
 """
 )
+PIPELINES = [
+    p.modelId
+    for p in HfApi().list_models(filter="pyannote-audio-pipeline")
+    if p.modelId.startswith("pyannote/")
+]
 audio = Audio(sample_rate=16000, mono=True)
 selected_pipeline = st.selectbox("", PIPELINES, index=0)
+with st.spinner("Loading pipeline..."):
     pipeline = Pipeline.from_pretrained(selected_pipeline)
 uploaded_file = st.file_uploader("")
     except RuntimeError as e:
         st.error(e)
         st.stop()
+    waveform, sample_rate = audio.crop(
+        uploaded_file, Segment(0, min(duration, EXCERPT))
+    )
     file = {"waveform": waveform, "sample_rate": sample_rate, "uri": uploaded_file.name}
+    with st.spinner("Running pipeline..."):
         output = pipeline(file)
+    with open('assets/template.html') as html, open('assets/style.css') as css:
+        html_template = html.read()
+        st.markdown('<style>{}</style>'.format(css.read()), unsafe_allow_html=True)
+    colors = [
+        "#ffd70033",
+        "#00ffff33",
+        "#ff00ff33",
+        "#00ff0033",
+        "#9932cc33",
+        "#00bfff33",
+        "#ff7f5033",
+        "#66cdaa33",
+    ]
+    num_colors = len(colors)
+    label2color = {label: colors[k % num_colors] for k, label in enumerate(sorted(output.labels()))}
+    BASE64 = to_base64(waveform.numpy().T)
+    REGIONS = ""
+    LEGENDS = ""
+    labels=[]
+    for segment, _, label in output.itertracks(yield_label=True):
+        REGIONS += f"var re = wavesurfer.addRegion({{start: {segment.start:g}, end: {segment.end:g}, color: '{label2color[label]}', resize : false, drag : false}});"
+        if not label in labels:
+            LEGENDS += f"<li><span style='background-color:{label2color[label]}'></span>{label}</li>"
+            labels.append(label)
+    html = html_template.replace("BASE64", BASE64).replace("REGIONS", REGIONS)
+    st.markdown("<div style='overflow : auto'><ul class='legend'>"+LEGENDS+"</ul></div>", unsafe_allow_html=True)
+    components.html(html, height=250, scrolling=True)
     with io.StringIO() as fp:
         output.write_rttm(fp)

assets/style.css ADDED Viewed

	@@ -0,0 +1,3 @@

+.legend { list-style: none; margin: 0; padding: 0}
+.legend li { float: left; margin : auto; margin-right: 10px; font-family : "Lato", "Trebuchet MS", Roboto, Helvetica, Arial, sans-serif; line-height: 1.8; font-size:20px;color : #444}
+.legend span { border: 1px solid #ccc; float: left; width: 30px; height: 30px; margin: 2px; }

assets/template.html ADDED Viewed

	@@ -0,0 +1,46 @@

+<script src="https://unpkg.com/wavesurfer.js"></script>
+<script src="https://unpkg.com/wavesurfer.js/dist/plugin/wavesurfer.regions.min.js"></script>
+<script src="https://unpkg.com/wavesurfer.js/dist/plugin/wavesurfer.timeline.min.js"></script>
+<br>
+<div id="waveform"></div>
+<div id="timeline"></div>
+<br>
+<div><button onclick="play()" id="ppb">Play</button><div>
+<script type="text/javascript">
+	var labels=[];
+	var wavesurfer = WaveSurfer.create({
+		container: '#waveform',
+		barGap: 2,
+		barHeight: 3,
+ 		barWidth: 3,
+		barRadius: 2,
+		plugins: [
+			WaveSurfer.regions.create({}),
+			WaveSurfer.timeline.create({
+				container: "#timeline",
+				notchPercentHeight: 40,
+				primaryColor: "#444",
+				primaryFontColor: "#444"
+                        })
+		]
+	});
+	wavesurfer.load('BASE64');
+	wavesurfer.on('ready', function () {
+		wavesurfer.play();
+	});
+	wavesurfer.on('play',function() {
+		document.getElementById('ppb').innerHTML = "Pause";
+	});
+	wavesurfer.on('pause',function() {
+		document.getElementById('ppb').innerHTML = "Play";
+	});
+	REGIONS
+	document.addEventListener('keyup', event => {
+		if (event.code === 'Space') {
+			play();
+		}
+	})
+	function play(){
+		wavesurfer.isPlaying() ? wavesurfer.pause() : wavesurfer.play();
+	}
+</script>

requirements.txt CHANGED Viewed

@@ -1,4 +1,2 @@
 git+https://github.com/pyannote/pyannote-audio.git@develop#egg=pyannote-audio
 speechbrain == 0.5.10
-matplotlib == 3.3.3


1	git+https://github.com/pyannote/pyannote-audio.git@develop#egg=pyannote-audio
2	speechbrain == 0.5.10