To handle videos longer than one hour and to transcribe them in segments, we need to make several modifications to the yt_transcribe function.

#15
by Illia56 - opened
Files changed (1) hide show
  1. app.py +30 -9
app.py CHANGED
@@ -71,21 +71,42 @@ def download_yt_audio(yt_url, filename):
71
  raise gr.Error(str(err))
72
 
73
 
74
- def yt_transcribe(yt_url, task, max_filesize=75.0):
 
 
 
 
 
 
75
  html_embed_str = _return_yt_html_embed(yt_url)
76
 
77
  with tempfile.TemporaryDirectory() as tmpdirname:
78
  filepath = os.path.join(tmpdirname, "video.mp4")
79
  download_yt_audio(yt_url, filepath)
80
- with open(filepath, "rb") as f:
81
- inputs = f.read()
82
-
83
- inputs = ffmpeg_read(inputs, pipe.feature_extractor.sampling_rate)
84
- inputs = {"array": inputs, "sampling_rate": pipe.feature_extractor.sampling_rate}
85
-
86
- text = pipe(inputs, batch_size=BATCH_SIZE, generate_kwargs={"task": task}, return_timestamps=True)["text"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
87
 
88
- return html_embed_str, text
89
 
90
 
91
  demo = gr.Blocks()
 
71
  raise gr.Error(str(err))
72
 
73
 
74
+ def ffmpeg_read(file_path, sampling_rate):
75
+ # This function should use FFmpeg to extract audio and convert it to the desired format and sampling rate.
76
+ # The exact implementation will depend on your requirements and setup.
77
+ # For now, I'll provide a placeholder.
78
+ raise NotImplementedError("Please implement the ffmpeg_read function.")
79
+
80
+ def yt_transcribe(yt_url, task, max_filesize=75.0, segment_length=30*1000):
81
  html_embed_str = _return_yt_html_embed(yt_url)
82
 
83
  with tempfile.TemporaryDirectory() as tmpdirname:
84
  filepath = os.path.join(tmpdirname, "video.mp4")
85
  download_yt_audio(yt_url, filepath)
86
+
87
+ # Load the audio using pydub
88
+ audio = AudioSegment.from_file(filepath, format="mp4")
89
+
90
+ # Split the audio into segments
91
+ segments = [audio[i:i+segment_length] for i in range(0, len(audio), segment_length)]
92
+
93
+ # Transcribe each segment and combine the results
94
+ transcriptions = []
95
+ for segment in segments:
96
+ with tempfile.NamedTemporaryFile(delete=True, suffix=".wav") as segment_file:
97
+ segment.export(segment_file.name, format="wav")
98
+
99
+ # Convert the segment using ffmpeg
100
+ segment_data = ffmpeg_read(segment_file.name, pipe.feature_extractor.sampling_rate)
101
+ inputs = {"array": segment_data, "sampling_rate": pipe.feature_extractor.sampling_rate}
102
+
103
+ transcription = pipe(inputs, batch_size=BATCH_SIZE, generate_kwargs={"task": task}, return_timestamps=True)["text"]
104
+ transcriptions.append(transcription)
105
+
106
+ full_transcription = " ".join(transcriptions)
107
+
108
+ return html_embed_str, full_transcription
109
 
 
110
 
111
 
112
  demo = gr.Blocks()