Spaces:

baaastien
/

Spleeter_and_ASR

Runtime error

App Files Files Community

baaastien commited on Sep 6, 2022

Commit

bd81cd1

•

1 Parent(s): e9103a2

Upload app.py

Browse files

Files changed (1) hide show

app.py +51 -0

app.py ADDED Viewed

	@@ -0,0 +1,51 @@

+import torch
+import gradio as gr
+import os
+import random2
+from spleeter.separator import Separator
+from transformers import pipeline
+# Initiate a file separator with 2 stems (instruments and vocals) and 16khz bitrate, required for ASR
+separator = Separator('spleeter:2stems-16kHz')
+# Initiate Speech to text model with Wave2Vec english
+# https://huggingface.co/jonatasgrosman/wav2vec2-large-xlsr-53-english
+pipe = pipeline("automatic-speech-recognition", "jonatasgrosman/wav2vec2-large-xlsr-53-english")
+# Gradio function to split audio stems, transcribe vocals and return their filepaths
+def extract_stems(audio):
+    # initiate a unique folder name for splitted files
+    foldername = str(random2.randrange(100000000))
+    # Separate audio input. Synchronous is true to wait for the end of split before going further
+    separator.separate_to_file(audio, "output/", filename_format= foldername + "/{instrument}.wav", synchronous=True)
+    # build filepaths for vocals and accompaniment files
+    vocals = f"./output/"+ foldername +"/vocals.wav"
+    accompaniment = f"./output/"+ foldername +"/accompaniment.wav"
+    # Get a transcript of the vocals, by using the huggingface pipeline
+    transcript = pipe(vocals, chunk_length_s=10, decoder=None)
+    return vocals, accompaniment, transcript
+# Launch a Gradio interface
+# Input is an audio file,
+# Output is two audio files and a transcript
+title = "Demo: Deezer Spleeter + english Automatic Speech Recognition"
+description = "This demo is a basic interface for <a href='https://research.deezer.com/projects/spleeter.html' target='_blank'>Deezer Spleeter</a>. It uses the Spleeter library for separate audio file in two stems : accompaniments and vocals. Once splitted, it performs ASR (Automatic Speech Recognition) based on a Wav2vec2 english model."
+examples = [["examples/" + mp3] for mp3 in os.listdir("examples/")]
+demo = gr.Interface(
+    fn=extract_stems,
+    inputs=gr.Audio(source="upload", type="filepath"),
+    outputs=[gr.Audio(label="Vocals stem", source="upload", type="filepath"), gr.Audio(label="Accompaniment stem", source="upload", type="filepath"), gr.Textbox(label="Automatic Speech Recognition (English)")],
+    title=title,
+    description=description,
+    examples=examples,
+    allow_flagging="never"
+    )
+demo.launch()