MusIre commited on
Commit
987ea19
β€’
1 Parent(s): eb7f955

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +7 -10
app.py CHANGED
@@ -12,11 +12,13 @@ processor = WhisperProcessor.from_pretrained("openai/whisper-large")
12
  model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-large")
13
  model.config.forced_decoder_ids = None
14
 
15
- # Function to perform ASR on audio data
16
- def transcribe_audio(audio_data):
17
- # Process audio data using the Whisper processor
18
- input_features = processor(audio_data, return_tensors="pt").input_features
19
 
 
 
20
  # Generate token ids
21
  predicted_ids = model.generate(input_features)
22
 
@@ -25,11 +27,6 @@ def transcribe_audio(audio_data):
25
 
26
  return transcription[0]
27
 
28
- # Custom preprocessing function
29
- def preprocess_audio(audio_data):
30
- # Apply any custom preprocessing to the audio data here if needed
31
- return audio_data
32
-
33
  # Create Gradio interface
34
  audio_input = gr.Audio(preprocess=preprocess_audio)
35
- gr.Interface(fn=transcribe_audio, inputs=audio_input, outputs="text").launch()
 
12
  model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-large")
13
  model.config.forced_decoder_ids = None
14
 
15
+ # Custom preprocessing function
16
+ def preprocess_audio(audio_data):
17
+ # Apply any custom preprocessing to the audio data here if needed
18
+ return processor(audio_data, return_tensors="pt").input_features
19
 
20
+ # Function to perform ASR on audio data
21
+ def transcribe_audio(input_features):
22
  # Generate token ids
23
  predicted_ids = model.generate(input_features)
24
 
 
27
 
28
  return transcription[0]
29
 
 
 
 
 
 
30
  # Create Gradio interface
31
  audio_input = gr.Audio(preprocess=preprocess_audio)
32
+ gr.Interface(fn=transcribe_audio, inputs=audio_input, outputs="text").launch()