Spaces:

esnagy
/

hungarian_speech_transcriber

Runtime error

App Files Files Community

Edward Nagy commited on Dec 2, 2023

Commit

8c68f65

•

1 Parent(s): 691f320

Fix audio file codec and add input placeholders

Browse files

Files changed (1) hide show

app.py +11 -6

app.py CHANGED Viewed

@@ -6,12 +6,14 @@ import os
 # pipe = pipeline(model="esnagy/whisper-small-hu")
 def transcribe_audio(audio_file):
     text = "Test text"
     # text = pipe(audio_file)["text"]
     os.remove(audio_file)  # Remove temporary audio file
     return text
 def transcribe(input_data):
     if input_data["audio"]:
         return transcribe_audio(input_data["audio"].name)
@@ -19,7 +21,7 @@ def transcribe(input_data):
         video_url = input_data["video_url"]
         # Download the video from the URL
         video_filename = "temp_video.mp4"
-        with open(video_filename, 'wb') as f:
             response = requests.get(video_url)
             f.write(response.content)
@@ -28,7 +30,7 @@ def transcribe(input_data):
         audio = video.audio
         audio_file = "temp_audio.wav"
-        audio.write_audiofile(audio_file, codec='pcm_s16le')
         text = transcribe_audio(audio_file)
@@ -38,15 +40,18 @@ def transcribe(input_data):
         return text
-video_url_input = gr.inputs.Textbox(label="Enter video URL", placeholder="Or leave empty to use microphone")
-audio_input = gr.inputs.Audio(label="Or record your voice", source="microphone")
 iface = gr.Interface(
     fn=transcribe,
-    inputs=[video_url_input, audio_input],
     outputs=gr.outputs.Textbox(),
     title="Whisper Small Hungarian",
-    description="Realtime demo for Hungarian speech recognition using a fine-tuned Whisper small model. Enter a video URL or record your voice to transcribe."
 )
 iface.launch()

 # pipe = pipeline(model="esnagy/whisper-small-hu")
 def transcribe_audio(audio_file):
     text = "Test text"
     # text = pipe(audio_file)["text"]
     os.remove(audio_file)  # Remove temporary audio file
     return text
 def transcribe(input_data):
     if input_data["audio"]:
         return transcribe_audio(input_data["audio"].name)
         video_url = input_data["video_url"]
         # Download the video from the URL
         video_filename = "temp_video.mp4"
+        with open(video_filename, "wb") as f:
             response = requests.get(video_url)
             f.write(response.content)
         audio = video.audio
         audio_file = "temp_audio.wav"
+        audio.write_audiofile(audio_file, codec="pcm_s16le")
         text = transcribe_audio(audio_file)
         return text
 iface = gr.Interface(
     fn=transcribe,
+    inputs=[
+        gr.Textbox(
+            label="Enter video URL", placeholder="Or leave empty to use microphone"
+        ),
+        gr.Audio(sources=["microphone"], type="filepath"),
+    ],
     outputs=gr.outputs.Textbox(),
     title="Whisper Small Hungarian",
+    description="Realtime demo for Hungarian speech recognition using a fine-tuned Whisper small model. Enter a video URL or record your voice to transcribe.",
 )
 iface.launch()