patrickvonplaten commited on
Commit
ec5489a
1 Parent(s): 2c93146

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +9 -5
app.py CHANGED
@@ -1,19 +1,21 @@
1
  import gradio as gr
2
  import librosa
3
  from transformers import AutoFeatureExtractor, AutoTokenizer, SpeechEncoderDecoderModel
 
4
 
5
  model_name = "facebook/wav2vec2-xls-r-2b-22-to-16"
 
6
 
7
- feature_extractor = AutoFeatureExtractor.from_pretrained(model_name, use_auth_token="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")
8
- tokenizer = AutoTokenizer.from_pretrained(model_name, use_auth_token="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", use_fast=False)
9
- model = SpeechEncoderDecoderModel.from_pretrained(model_name, use_auth_token="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")
10
 
11
  def process_audio_file(file):
12
  data, sr = librosa.load(file)
13
  if sr != 16000:
14
  data = librosa.resample(data, sr, 16000)
15
  print(data.shape)
16
- input_values = feature_extractor(data, return_tensors="pt").input_values
17
  return input_values
18
 
19
  def transcribe(file, target_language):
@@ -75,7 +77,9 @@ iface = gr.Interface(
75
  outputs="text",
76
  layout="horizontal",
77
  theme="huggingface",
78
- title="XLS-R 300M 22-to-16 Speech Translation",
79
  description="A simple interface to translate from 22 input spoken languages to 16 written languages.",
 
 
80
  )
81
  iface.launch()
 
1
  import gradio as gr
2
  import librosa
3
  from transformers import AutoFeatureExtractor, AutoTokenizer, SpeechEncoderDecoderModel
4
+ import torch
5
 
6
  model_name = "facebook/wav2vec2-xls-r-2b-22-to-16"
7
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
8
 
9
+ feature_extractor = AutoFeatureExtractor.from_pretrained(model_name)
10
+ tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=False)
11
+ model = SpeechEncoderDecoderModel.from_pretrained(model_name).to(device)
12
 
13
  def process_audio_file(file):
14
  data, sr = librosa.load(file)
15
  if sr != 16000:
16
  data = librosa.resample(data, sr, 16000)
17
  print(data.shape)
18
+ input_values = feature_extractor(data, return_tensors="pt").input_values.to(device)
19
  return input_values
20
 
21
  def transcribe(file, target_language):
 
77
  outputs="text",
78
  layout="horizontal",
79
  theme="huggingface",
80
+ title="XLS-R 2B 22-to-16 Speech Translation",
81
  description="A simple interface to translate from 22 input spoken languages to 16 written languages.",
82
+ article = "<p style='text-align: center'><a href='https://huggingface.co/facebook/wav2vec2-xls-r-2b-22-to-16' target='_blank'>Click to learn more about XLS-R-2B-22-16 </a> | <a href='https://arxiv.org/abs/2111.09296' target='_blank'> With 🎙️ from Facebook XLS-R </a></p>",
83
+
84
  )
85
  iface.launch()