Edward Nagy commited on
Commit
8c68f65
1 Parent(s): 691f320

Fix audio file codec and add input placeholders

Browse files
Files changed (1) hide show
  1. app.py +11 -6
app.py CHANGED
@@ -6,12 +6,14 @@ import os
6
 
7
  # pipe = pipeline(model="esnagy/whisper-small-hu")
8
 
 
9
  def transcribe_audio(audio_file):
10
  text = "Test text"
11
  # text = pipe(audio_file)["text"]
12
  os.remove(audio_file) # Remove temporary audio file
13
  return text
14
 
 
15
  def transcribe(input_data):
16
  if input_data["audio"]:
17
  return transcribe_audio(input_data["audio"].name)
@@ -19,7 +21,7 @@ def transcribe(input_data):
19
  video_url = input_data["video_url"]
20
  # Download the video from the URL
21
  video_filename = "temp_video.mp4"
22
- with open(video_filename, 'wb') as f:
23
  response = requests.get(video_url)
24
  f.write(response.content)
25
 
@@ -28,7 +30,7 @@ def transcribe(input_data):
28
  audio = video.audio
29
 
30
  audio_file = "temp_audio.wav"
31
- audio.write_audiofile(audio_file, codec='pcm_s16le')
32
 
33
  text = transcribe_audio(audio_file)
34
 
@@ -38,15 +40,18 @@ def transcribe(input_data):
38
 
39
  return text
40
 
41
- video_url_input = gr.inputs.Textbox(label="Enter video URL", placeholder="Or leave empty to use microphone")
42
- audio_input = gr.inputs.Audio(label="Or record your voice", source="microphone")
43
 
44
  iface = gr.Interface(
45
  fn=transcribe,
46
- inputs=[video_url_input, audio_input],
 
 
 
 
 
47
  outputs=gr.outputs.Textbox(),
48
  title="Whisper Small Hungarian",
49
- description="Realtime demo for Hungarian speech recognition using a fine-tuned Whisper small model. Enter a video URL or record your voice to transcribe."
50
  )
51
 
52
  iface.launch()
 
6
 
7
  # pipe = pipeline(model="esnagy/whisper-small-hu")
8
 
9
+
10
  def transcribe_audio(audio_file):
11
  text = "Test text"
12
  # text = pipe(audio_file)["text"]
13
  os.remove(audio_file) # Remove temporary audio file
14
  return text
15
 
16
+
17
  def transcribe(input_data):
18
  if input_data["audio"]:
19
  return transcribe_audio(input_data["audio"].name)
 
21
  video_url = input_data["video_url"]
22
  # Download the video from the URL
23
  video_filename = "temp_video.mp4"
24
+ with open(video_filename, "wb") as f:
25
  response = requests.get(video_url)
26
  f.write(response.content)
27
 
 
30
  audio = video.audio
31
 
32
  audio_file = "temp_audio.wav"
33
+ audio.write_audiofile(audio_file, codec="pcm_s16le")
34
 
35
  text = transcribe_audio(audio_file)
36
 
 
40
 
41
  return text
42
 
 
 
43
 
44
  iface = gr.Interface(
45
  fn=transcribe,
46
+ inputs=[
47
+ gr.Textbox(
48
+ label="Enter video URL", placeholder="Or leave empty to use microphone"
49
+ ),
50
+ gr.Audio(sources=["microphone"], type="filepath"),
51
+ ],
52
  outputs=gr.outputs.Textbox(),
53
  title="Whisper Small Hungarian",
54
+ description="Realtime demo for Hungarian speech recognition using a fine-tuned Whisper small model. Enter a video URL or record your voice to transcribe.",
55
  )
56
 
57
  iface.launch()