juulaii commited on
Commit
40817ec
1 Parent(s): 64a4879

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +3 -25
app.py CHANGED
@@ -2,31 +2,9 @@ import gradio as gr
2
 
3
  #Get models
4
  #ASR model for input speech
5
- import torch
6
- from transformers import Wav2Vec2Processor, HubertForCTC
7
- from datasets import load_dataset
8
- import soundfile as sf
9
-
10
- processor = Wav2Vec2Processor.from_pretrained("facebook/hubert-large-ls960-ft")
11
- model = HubertForCTC.from_pretrained("facebook/hubert-large-ls960-ft")
12
-
13
- def map_to_array(batch):
14
- speech, _ = sf.read(batch["file"])
15
- batch["speech"] = speech
16
- return batch
17
-
18
- ds = load_dataset("patrickvonplaten/librispeech_asr_dummy", "clean", split="validation")
19
- ds = ds.map(map_to_array)
20
-
21
- input_values = processor(ds["speech"][0], return_tensors="pt").input_values # Batch size 1
22
- logits = model(input_values).logits
23
- predicted_ids = torch.argmax(logits, dim=-1)
24
- transcription = processor.decode(predicted_ids[0])
25
-
26
- #speech2text = gr.Interface.load("huggingface/facebook/hubert-large-ls960-ft",
27
- # inputs=gr.inputs.Audio(label="Record Audio File", type="file", source = "microphone"))
28
- speech2text = gr.Interface.(transcription,
29
- inputs=gr.inputs.Audio(label="Record Audio File", type="file", source = "microphone"))
30
  #translates english to spanish text
31
  translator = gr.Interface.load("huggingface/Helsinki-NLP/opus-mt-en-es",
32
  input=transcription
 
2
 
3
  #Get models
4
  #ASR model for input speech
5
+ speech2text = gr.Interface.load("huggingface/facebook/hubert-large-ls960-ft",
6
+ inputs=gr.inputs.Audio(label="Record Audio File", type="file", source = "microphone"))
7
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
  #translates english to spanish text
9
  translator = gr.Interface.load("huggingface/Helsinki-NLP/opus-mt-en-es",
10
  input=transcription