Spaces:
Runtime error
Runtime error
File size: 1,457 Bytes
85c00e6 e1a7f99 0c8bbcc 7e2b4a1 5c0879c 0c8bbcc e442b97 0c8bbcc |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 |
import os
os.system('pip install transformers')
os.system('pip freeze')
import soundfile as sf
import gradio as gr
import torch
from transformers import SpeechEncoderDecoderModel, Speech2Text2Processor
model = SpeechEncoderDecoder.from_pretrained("facebook/s2t-wav2vec2-large-en-de")
processor = Speech2Text2Processor.from_pretrained("facebook/s2t-wav2vec2-large-en-de")
def map_to_array(file):
speech, _ = sf.read(file)
return speech
def inference(audio):
inputs = processor(map_to_array(audio.name), sampling_rate=16_000, return_tensors="pt")
generated_ids = model.generate(input_ids=inputs["input_features"], attention_mask=inputs["attention_mask"])
transcription = processor.batch_decode(generated_ids)
return transcription[0]
inputs = gr.inputs.Audio(label="Input Audio", type="file")
outputs = gr.outputs.Textbox(label="Output Text")
title = "Robust wav2vec 2.0"
description = "Gradio demo for Robust wav2vec 2.0. To use it, simply upload your audio, or click one of the examples to load them. Read more at the links below. Currently supports .wav and .flac files"
article = "<p style='text-align: center'><a href='https://arxiv.org/abs/2104.01027' target='_blank'>Robust wav2vec 2.0: Analyzing Domain Shift in Self-Supervised Pre-Training</a> | <a href='https://github.com/pytorch/fairseq' target='_blank'>Github Repo</a></p>"
gr.Interface(inference, inputs, outputs, title=title, description=description, article=article).launch() |