frogcho123 commited on
Commit
b3ba25a
1 Parent(s): 2920572

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +20 -12
app.py CHANGED
@@ -1,6 +1,7 @@
1
  import gradio as gr
2
  import os
3
  import whisper
 
4
 
5
  # Load the Whisper model
6
  model = whisper.load_model("base")
@@ -8,34 +9,41 @@ model = whisper.load_model("base")
8
  # Function to process the uploaded audio file and perform transcription
9
  def process_audio(upload):
10
  # Save the uploaded audio file
11
- file_path = "uploaded_audio.wav"
12
- with open(file_path, "wb") as f:
13
- f.write(upload.read())
14
-
 
 
 
 
 
15
  # Load the audio file and perform preprocessing
16
- audio = whisper.load_audio(file_path)
17
  audio = whisper.pad_or_trim(audio)
18
  mel = whisper.log_mel_spectrogram(audio).to(model.device)
19
-
20
  # Detect the spoken language
21
  _, probs = model.detect_language(mel)
22
  detected_language = max(probs, key=probs.get)
23
-
24
  # Perform transcription using Whisper ASR
25
  options = whisper.DecodingOptions()
26
  result = whisper.decode(model, mel, options)
27
  transcription = result.text
28
-
29
- # Delete the temporary audio file
30
- os.remove(file_path)
31
-
 
32
  return transcription
33
 
34
  # Create a file input component for uploading the audio file
35
- audio_input = gr.inputs.File(label="Upload Audio")
36
 
37
  # Create a text output component for displaying the transcription
38
  text_output = gr.outputs.Textbox(label="Transcription")
39
 
40
  # Create a Gradio interface
41
  gr.Interface(fn=process_audio, inputs=audio_input, outputs=text_output, title="Audio Transcription").launch()
 
 
1
  import gradio as gr
2
  import os
3
  import whisper
4
+ from pydub import AudioSegment
5
 
6
  # Load the Whisper model
7
  model = whisper.load_model("base")
 
9
  # Function to process the uploaded audio file and perform transcription
10
  def process_audio(upload):
11
  # Save the uploaded audio file
12
+ file_path = "uploaded_audio"
13
+ upload_path = f"{file_path}.mp3"
14
+ upload.save(upload_path)
15
+
16
+ # Convert the audio file to WAV format
17
+ wav_path = f"{file_path}.wav"
18
+ audio = AudioSegment.from_file(upload_path)
19
+ audio.export(wav_path, format="wav")
20
+
21
  # Load the audio file and perform preprocessing
22
+ audio = whisper.load_audio(wav_path)
23
  audio = whisper.pad_or_trim(audio)
24
  mel = whisper.log_mel_spectrogram(audio).to(model.device)
25
+
26
  # Detect the spoken language
27
  _, probs = model.detect_language(mel)
28
  detected_language = max(probs, key=probs.get)
29
+
30
  # Perform transcription using Whisper ASR
31
  options = whisper.DecodingOptions()
32
  result = whisper.decode(model, mel, options)
33
  transcription = result.text
34
+
35
+ # Delete the temporary audio files
36
+ os.remove(upload_path)
37
+ os.remove(wav_path)
38
+
39
  return transcription
40
 
41
  # Create a file input component for uploading the audio file
42
+ audio_input = gr.inputs.File(label="Upload Audio", accept=".wav, .mp3")
43
 
44
  # Create a text output component for displaying the transcription
45
  text_output = gr.outputs.Textbox(label="Transcription")
46
 
47
  # Create a Gradio interface
48
  gr.Interface(fn=process_audio, inputs=audio_input, outputs=text_output, title="Audio Transcription").launch()
49
+