Edward Nagy commited on
Commit
623ac6b
1 Parent(s): dd3ed35

Update video transcription functionality

Browse files
Files changed (1) hide show
  1. app.py +19 -5
app.py CHANGED
@@ -1,24 +1,38 @@
1
  from transformers import pipeline
2
  import gradio as gr
3
- import moviepy.editor as mp
 
4
  import os
5
 
6
  # pipe = pipeline(model="esnagy/whisper-small-hu")
7
 
8
  def transcribe(video_url):
9
- video = mp.VideoFileClip(video_url)
 
 
 
 
 
 
 
10
  audio = video.audio
 
11
  audio_file = "temp_audio.wav"
12
  audio.write_audiofile(audio_file, codec='pcm_s16le')
 
 
13
  text = "Test text"
14
  # text = pipe(audio_file)["text"]
15
- # Remove temporary audio file after transcription
 
 
16
  os.remove(audio_file)
 
17
  return text
18
 
19
  iface = gr.Interface(
20
- fn=transcribe,
21
- inputs=gr.Textbox(label="Enter video URL"),
22
  outputs="text",
23
  title="Whisper Small Hungarian",
24
  description="Realtime demo for Hungarian speech recognition using a fine-tuned Whisper small model. Enter a video URL to transcribe its audio.",
 
1
  from transformers import pipeline
2
  import gradio as gr
3
+ import requests
4
+ from moviepy.editor import *
5
  import os
6
 
7
  # pipe = pipeline(model="esnagy/whisper-small-hu")
8
 
9
  def transcribe(video_url):
10
+ # Download the video from the URL
11
+ video_filename = "temp_video.mp4"
12
+ with open(video_filename, 'wb') as f:
13
+ response = requests.get(video_url)
14
+ f.write(response.content)
15
+
16
+ # Load the video using moviepy
17
+ video = VideoFileClip(video_filename)
18
  audio = video.audio
19
+
20
  audio_file = "temp_audio.wav"
21
  audio.write_audiofile(audio_file, codec='pcm_s16le')
22
+
23
+ # Transcribe the audio
24
  text = "Test text"
25
  # text = pipe(audio_file)["text"]
26
+
27
+ # Remove temporary files
28
+ os.remove(video_filename)
29
  os.remove(audio_file)
30
+
31
  return text
32
 
33
  iface = gr.Interface(
34
+ fn=transcribe,
35
+ inputs=gr.Textbox(label="Enter video URL"),
36
  outputs="text",
37
  title="Whisper Small Hungarian",
38
  description="Realtime demo for Hungarian speech recognition using a fine-tuned Whisper small model. Enter a video URL to transcribe its audio.",