trysem baaastien commited on
Commit
26f8bd6
0 Parent(s):

Duplicate from baaastien/Spleeter_and_ASR

Browse files

Co-authored-by: Bastien <baaastien@users.noreply.huggingface.co>

.gitattributes ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ftz filter=lfs diff=lfs merge=lfs -text
6
+ *.gz filter=lfs diff=lfs merge=lfs -text
7
+ *.h5 filter=lfs diff=lfs merge=lfs -text
8
+ *.joblib filter=lfs diff=lfs merge=lfs -text
9
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
10
+ *.model filter=lfs diff=lfs merge=lfs -text
11
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
12
+ *.npy filter=lfs diff=lfs merge=lfs -text
13
+ *.npz filter=lfs diff=lfs merge=lfs -text
14
+ *.onnx filter=lfs diff=lfs merge=lfs -text
15
+ *.ot filter=lfs diff=lfs merge=lfs -text
16
+ *.parquet filter=lfs diff=lfs merge=lfs -text
17
+ *.pickle filter=lfs diff=lfs merge=lfs -text
18
+ *.pkl filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pt filter=lfs diff=lfs merge=lfs -text
21
+ *.pth filter=lfs diff=lfs merge=lfs -text
22
+ *.rar filter=lfs diff=lfs merge=lfs -text
23
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
24
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
25
+ *.tflite filter=lfs diff=lfs merge=lfs -text
26
+ *.tgz filter=lfs diff=lfs merge=lfs -text
27
+ *.wasm filter=lfs diff=lfs merge=lfs -text
28
+ *.xz filter=lfs diff=lfs merge=lfs -text
29
+ *.zip filter=lfs diff=lfs merge=lfs -text
30
+ *.zst filter=lfs diff=lfs merge=lfs -text
31
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Spleeter And ASR
3
+ emoji: 🚀
4
+ colorFrom: gray
5
+ colorTo: pink
6
+ sdk: gradio
7
+ sdk_version: 3.2
8
+ app_file: app.py
9
+ pinned: false
10
+ license: mit
11
+ duplicated_from: baaastien/Spleeter_and_ASR
12
+ ---
13
+
14
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import os
3
+ import random2
4
+ from spleeter.separator import Separator
5
+ from transformers import pipeline, AutoModelForCTC, Wav2Vec2Processor, Wav2Vec2ProcessorWithLM
6
+
7
+ # Initiate a file separator with 2 stems (instruments and vocals) and 16khz bitrate, required for ASR
8
+ separator = Separator('spleeter:2stems-16kHz')
9
+
10
+ # Initiate Speech to text model with Wave2Vec english
11
+ # https://huggingface.co/jonatasgrosman/wav2vec2-large-xlsr-53-english
12
+ pipe = pipeline("automatic-speech-recognition", "jonatasgrosman/wav2vec2-large-xlsr-53-english")
13
+
14
+ # Gradio function to split audio stems, transcribe vocals and return their filepaths
15
+ def extract_stems(audio):
16
+
17
+ # initiate a unique folder name for splitted files
18
+ foldername = str(random2.randrange(100000000))
19
+
20
+ # Separate audio input. Synchronous is true to wait for the end of split before going further
21
+ separator.separate_to_file(audio, "output/", filename_format= foldername + "/{instrument}.wav", synchronous=True)
22
+
23
+ # build filepaths for vocals and accompaniment files
24
+ vocals = f"./output/"+ foldername +"/vocals.wav"
25
+ accompaniment = f"./output/"+ foldername +"/accompaniment.wav"
26
+
27
+ # Get a transcript of the vocals, by using the huggingface pipeline
28
+ transcript = pipe(vocals, chunk_length_s=10, decoder=None)
29
+
30
+ return vocals, accompaniment, transcript
31
+
32
+ # Launch a Gradio interface
33
+ # Input is an audio file,
34
+ # Output is two audio files and a transcript
35
+
36
+ title = "Demo: Deezer Spleeter + english Automatic Speech Recognition"
37
+ description = "<p>This demo is a basic interface for <a href='https://research.deezer.com/projects/spleeter.html' target='_blank'>Deezer Spleeter</a>.</p><p>It uses the Spleeter library for separate audio file in two stems : accompaniments and vocals.</p><p>Once splitted, it performs ASR (Automatic Speech Recognition) based on a Wav2vec2 english model.</p>"
38
+ examples = [["examples/" + mp3] for mp3 in os.listdir("examples/")]
39
+
40
+ demo = gr.Interface(
41
+ fn=extract_stems,
42
+ inputs=gr.Audio(source="upload", type="filepath"),
43
+ outputs=[gr.Audio(label="Vocals stem", source="upload", type="filepath"), gr.Audio(label="Accompaniment stem", source="upload", type="filepath"), gr.Textbox(label="Wave2vec2 Automatic Speech Recognition (English)")],
44
+ title=title,
45
+ description=description,
46
+ examples=examples,
47
+ allow_flagging="never"
48
+ )
49
+
50
+ demo.launch()
examples/MyBubbaMi-NothingMuch.mp3 ADDED
Binary file (416 kB). View file
 
examples/audio_example.mp3 ADDED
Binary file (263 kB). View file
 
license.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ Music Nothing Much by My Bubba & Mi
2
+ from https://freemusicarchive.org
3
+
4
+ Under Attribution-NonCommercial-ShareAlike 3.0 Unported (CC BY-NC-SA 3.0)
packages.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ libsndfile1
2
+ ffmpeg
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ transformers
2
+ torch
3
+ pyctcdecode
4
+ pypi-kenlm
5
+ spleeter
6
+ random2