patrickvonplaten commited on
Commit
69502c9
1 Parent(s): 5cac893

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +5 -4
app.py CHANGED
@@ -1,8 +1,9 @@
1
  import gradio as gr
2
  import librosa
3
- from transformers import Wav2Vec2Processor, SpeechEncoderDecoderModel
4
 
5
- processor = Wav2Vec2Processor.from_pretrained("facebook/wav2vec2-xls-r-300m-en-to-15", use_auth_token="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")
 
6
  model = SpeechEncoderDecoderModel.from_pretrained("facebook/wav2vec2-xls-r-300m-en-to-15", use_auth_token="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")
7
 
8
  def process_audio_file(file):
@@ -10,7 +11,7 @@ def process_audio_file(file):
10
  if sr != 16000:
11
  data = librosa.resample(data, sr, 16000)
12
  print(data.shape)
13
- input_values = processor(data, return_tensors="pt").input_values
14
  return input_values
15
 
16
  def transcribe(file):
@@ -18,7 +19,7 @@ def transcribe(file):
18
 
19
  sequences = model.generate(input_values, num_beams=1, max_length=30)
20
 
21
- transcription = processor.batch_decode(sequences)
22
  return transcription[0]
23
 
24
  iface = gr.Interface(
 
1
  import gradio as gr
2
  import librosa
3
+ from transformers import AutoFeatureExtractor, AutoTokenizer, SpeechEncoderDecoderModel
4
 
5
+ feature_extractor = AutoFeatureExtractor.from_pretrained("facebook/wav2vec2-xls-r-300m-en-to-15", use_auth_token="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")
6
+ tokenizer = AutoTokenizer.from_pretrained("facebook/wav2vec2-xls-r-300m-en-to-15", use_auth_token="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")
7
  model = SpeechEncoderDecoderModel.from_pretrained("facebook/wav2vec2-xls-r-300m-en-to-15", use_auth_token="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")
8
 
9
  def process_audio_file(file):
 
11
  if sr != 16000:
12
  data = librosa.resample(data, sr, 16000)
13
  print(data.shape)
14
+ input_values = feature_extractor(data, return_tensors="pt").input_values
15
  return input_values
16
 
17
  def transcribe(file):
 
19
 
20
  sequences = model.generate(input_values, num_beams=1, max_length=30)
21
 
22
+ transcription = tokenizer.batch_decode(sequences)
23
  return transcription[0]
24
 
25
  iface = gr.Interface(