Rachid Ammari commited on
Commit
9a3ba32
1 Parent(s): 5014270

initial push

Browse files
Files changed (2) hide show
  1. app.py +53 -0
  2. requirements.txt +2 -0
app.py ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import pipeline
2
+ import gradio as gr
3
+ import whisper
4
+
5
+ wav2vec_en_model = pipeline("automatic-speech-recognition", model="facebook/wav2vec2-base-960h", device=0)
6
+ wav2vec_fr_model = pipeline("automatic-speech-recognition", model="facebook/wav2vec2-large-xlsr-53-french", device=0)
7
+ whisper_model = whisper.load_model("base")
8
+
9
+ def transcribe_audio(language=None, mic=None, file=None):
10
+ print(language)
11
+ if mic is not None:
12
+ audio = mic
13
+ elif file is not None:
14
+ audio = file
15
+ else:
16
+ return "You must either provide a mic recording or a file"
17
+ wav2vec_model = load_models(language)
18
+ transcription = wav2vec_model(audio)["text"]
19
+ transcription2 = whisper_model.transcribe(audio, language=language)["text"]
20
+ return transcription, transcription2
21
+
22
+ def load_models(lang):
23
+ if lang == 'en':
24
+ return wav2vec_en_model
25
+ elif lang == 'fr':
26
+ return wav2vec_fr_model
27
+ else:
28
+ # default english
29
+ return wav2vec_en_model
30
+
31
+ title = "Speech2text comparison (Wav2vec vs Whisper)"
32
+ description = """
33
+ This Space allows easy comparisons for transcribed texts between Facebook's Wav2vec model and newly released OpenAI's Whisper model.\n
34
+ (Even if Whisper includes a language detection, here we have decided to select the language to speed up the computation and to focus only on the quality of the transcriptions. The default language is english)
35
+ """
36
+ article = "Check out [the OpenAI Whisper model](https://github.com/openai/whisper) and [the Facebook Wav2vec model](https://ai.facebook.com/blog/wav2vec-20-learning-the-structure-of-speech-from-raw-audio/) that this demo is based off of."
37
+ examples = [["english_sentence.flac"], ["2022-a-Droite-un-fauteuil-pour-trois-3034044.mp3000.mp3"]]
38
+
39
+ gr.Interface(
40
+ fn=transcribe_audio,
41
+ inputs=[
42
+ gr.Radio(label="Language", choices=["en", "fr"], value="en"),
43
+ gr.Audio(source="microphone", type="filepath", optional=True),
44
+ gr.Audio(source="upload", type="filepath", optional=True),
45
+ ],
46
+ outputs=[
47
+ gr.Textbox(label="facebook/wav2vec"),
48
+ gr.Textbox(label="openai/whisper"),],
49
+ title=title,
50
+ description=description,
51
+ article=article,
52
+ examples=examples
53
+ ).launch(debug=True)
requirements.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ transformers
2
+ git+https://github.com/openai/whisper.git