kozak-vaclav commited on
Commit
5abfa23
1 Parent(s): 207763a

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +33 -0
app.py ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import tensorflow as tf
3
+ from transformers import Wav2Vec2Processor, TFWav2Vec2Model
4
+ import librosa
5
+
6
+ # Load the model and processor
7
+ processor = Wav2Vec2Processor.from_pretrained("openai/whisper-tiny")
8
+ model = TFWav2Vec2Model.from_pretrained("kobrasoft/kobraspeech-rnn-cs")
9
+
10
+ def transcribe(audio):
11
+ # Load audio
12
+ audio, rate = librosa.load(audio, sr=16000)
13
+
14
+ # Process audio
15
+ inputs = processor(audio, sampling_rate=rate, return_tensors="tf", padding="longest")
16
+ logits = model(inputs.input_values).logits
17
+
18
+ # Decode the logits
19
+ predicted_ids = tf.argmax(logits, axis=-1)
20
+ transcription = processor.batch_decode(predicted_ids)[0]
21
+ return transcription
22
+
23
+ # Create Gradio interface
24
+ iface = gr.Interface(
25
+ fn=transcribe,
26
+ inputs=gr.inputs.Audio(source="microphone", type="filepath"),
27
+ outputs="text",
28
+ title="ASR Model Demo",
29
+ description="Upload an audio file or record your voice to get the transcription."
30
+ )
31
+
32
+ if __name__ == "__main__":
33
+ iface.launch()