Sakil commited on
Commit
1a6ae54
β€’
1 Parent(s): 66e88fa

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +22 -0
app.py ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import librosa
2
+ import torch
3
+ from transformers import Wav2Vec2ForCTC, Wav2Vec2Tokenizer
4
+ import gradio as gr
5
+ from transformers import pipeline
6
+ import IPython.display as display
7
+ def speech_text(audio_file):
8
+ tokenizer = Wav2Vec2Tokenizer.from_pretrained("facebook/wav2vec2-base-960h")
9
+ model = Wav2Vec2ForCTC.from_pretrained("facebook/wav2vec2-base-960h")
10
+ speech, rate = librosa.load(audio_file,sr=16000)
11
+ display.Audio(audio_file, autoplay=True)
12
+ print(rate)
13
+ input_values = tokenizer(speech, return_tensors ='pt').input_values
14
+ #Store logits (non-normalized predictions)
15
+ logits = model(input_values).logits
16
+ #Store predicted id's
17
+ predicted_ids = torch.argmax(logits, dim =-1)
18
+ transcriptions = tokenizer.decode(predicted_ids[0])
19
+ return transcriptions
20
+ iface = gr.Interface(speech_text,inputs="audio",outputs="text",title='Sakil Transcription',description="Transcription")
21
+ iface.launch(inline=False)
22
+