Spaces:

Hunzla
/

whisperaudio

Runtime error

Hunzla commited on Aug 9, 2023

Commit

c5fe8de

•

1 Parent(s): 64bceb8

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -37,12 +37,7 @@ def transcribe_with_diarization(audio_path):
             # Export the interval audio as a temporary WAV file
         torchaudio.save("interval_audio.wav", interval_audio,sample_rate)
         transcript = asr_pipe("interval_audio.wav")
-        print(transcript)
-        start_time = segment.start
-        end_time = segment.end
-        label = track[0].label()  # Extract the label manually
-        speaker_audio = audio_path + f"[{start_time:.2f},{end_time:.2f}]"
-        transcript = asr_pipe(speaker_audio)[0]["text"]
         transcripts.append(transcript)
     # Combine the transcriptions from all speakers
@@ -53,10 +48,10 @@ iface = gr.Interface(
     fn=transcribe_with_diarization,
     inputs=[
         gr.File(label="Audio File"),
-        gr.Audio(source="microphone", type="filepath")
     ],
     outputs="text",
-    title="Whisper small Hindi with Speaker Diarization",
     description="Real-time demo for Hindi speech recognition using a fine-tuned Whisper large model with speaker diarization.",
 )

             # Export the interval audio as a temporary WAV file
         torchaudio.save("interval_audio.wav", interval_audio,sample_rate)
         transcript = asr_pipe("interval_audio.wav")
+        print(transcript)
         transcripts.append(transcript)
     # Combine the transcriptions from all speakers
     fn=transcribe_with_diarization,
     inputs=[
         gr.File(label="Audio File"),
+        gr.Audio(source="microphone", type="filepath", filetype="mp3")
     ],
     outputs="text",
+    title="Whisper Large Hindi with Speaker Diarization",
     description="Real-time demo for Hindi speech recognition using a fine-tuned Whisper large model with speaker diarization.",
 )