Ahsen Khaliq commited on
Commit
c1b51ca
1 Parent(s): e1a7f99

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +6 -25
app.py CHANGED
@@ -1,26 +1,7 @@
1
- import os
2
- os.system('pip install transformers')
3
- os.system('pip freeze')
4
- import soundfile as sf
5
  import gradio as gr
6
- import torch
7
- from transformers import SpeechEncoderDecoderModel, Speech2Text2Processor
8
-
9
- model = SpeechEncoderDecoder.from_pretrained("facebook/s2t-wav2vec2-large-en-de")
10
- processor = Speech2Text2Processor.from_pretrained("facebook/s2t-wav2vec2-large-en-de")
11
-
12
- def map_to_array(file):
13
- speech, _ = sf.read(file)
14
- return speech
15
-
16
- def inference(audio):
17
- inputs = processor(map_to_array(audio.name), sampling_rate=16_000, return_tensors="pt")
18
- generated_ids = model.generate(input_ids=inputs["input_features"], attention_mask=inputs["attention_mask"])
19
- transcription = processor.batch_decode(generated_ids)
20
- return transcription[0]
21
- inputs = gr.inputs.Audio(label="Input Audio", type="file")
22
- outputs = gr.outputs.Textbox(label="Output Text")
23
- title = "Robust wav2vec 2.0"
24
- description = "Gradio demo for Robust wav2vec 2.0. To use it, simply upload your audio, or click one of the examples to load them. Read more at the links below. Currently supports .wav and .flac files"
25
- article = "<p style='text-align: center'><a href='https://arxiv.org/abs/2104.01027' target='_blank'>Robust wav2vec 2.0: Analyzing Domain Shift in Self-Supervised Pre-Training</a> | <a href='https://github.com/pytorch/fairseq' target='_blank'>Github Repo</a></p>"
26
- gr.Interface(inference, inputs, outputs, title=title, description=description, article=article).launch()
 
 
 
 
1
  import gradio as gr
2
+ description = "HuBERT demo. Add your audio or click one of the examples below to load them."
3
+ article = "<p style='text-align: center'><a href='https://arxiv.org/abs/2106.07447'>HuBERT: Self-Supervised Speech Representation Learning by Masked Prediction of Hidden Units</a></p>"
4
+ gr.Interface.load("huggingface/facebook/s2t-wav2vec2-large-en-de",
5
+ description=description,
6
+ article=article
7
+ ).launch()