Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -228,32 +228,17 @@ import time
|
|
228 |
from pytube import YouTube
|
229 |
import yt_dlp
|
230 |
|
231 |
-
def
|
232 |
-
|
233 |
-
|
234 |
-
|
235 |
-
'outtmpl': 'downloads/%(title)s.%(ext)s',
|
236 |
-
}
|
237 |
-
try:
|
238 |
-
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
|
239 |
-
info_dict = ydl.extract_info(url, download=True)
|
240 |
-
print(f"Downloaded: {info_dict['title']}")
|
241 |
-
return info_dict['title'] # Or return other relevant info
|
242 |
-
except Exception as e:
|
243 |
-
print(f"Error: {e}")
|
244 |
-
raise
|
245 |
-
|
246 |
|
|
|
247 |
|
248 |
-
def
|
249 |
-
|
250 |
-
audio_filename = filename[:-4] + ".mp3"
|
251 |
-
clip.audio.write_audiofile(audio_filename)
|
252 |
-
clip.close()
|
253 |
-
time.sleep(5)
|
254 |
-
|
255 |
model = whisper.load_model("base")
|
256 |
-
result = model.transcribe(
|
257 |
|
258 |
transcription = result["text"]
|
259 |
|
@@ -720,8 +705,8 @@ def process_video(url):
|
|
720 |
# transcript = YouTubeTranscriptApi.get_transcript(video_id)
|
721 |
# transcript_text = ' '.join([t['text'] for t in transcript])
|
722 |
|
723 |
-
|
724 |
-
transcript_text =
|
725 |
|
726 |
# Clean the transcript text
|
727 |
cleaned_text = clean_text(transcript_text)
|
|
|
228 |
from pytube import YouTube
|
229 |
import yt_dlp
|
230 |
|
231 |
+
def download_video_mp3(URL):
|
232 |
+
yt = YouTube(url, on_progress_callback = on_progress)
|
233 |
+
ys = yt.streams.get_audio_only()
|
234 |
+
file = ys.download(mp3=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
235 |
|
236 |
+
return file
|
237 |
|
238 |
+
def audio_to_text(filename):
|
239 |
+
|
|
|
|
|
|
|
|
|
|
|
240 |
model = whisper.load_model("base")
|
241 |
+
result = model.transcribe(filename)
|
242 |
|
243 |
transcription = result["text"]
|
244 |
|
|
|
705 |
# transcript = YouTubeTranscriptApi.get_transcript(video_id)
|
706 |
# transcript_text = ' '.join([t['text'] for t in transcript])
|
707 |
|
708 |
+
audio_file = download_video_mp3(url)
|
709 |
+
transcript_text = audio_to_text(audio_file)
|
710 |
|
711 |
# Clean the transcript text
|
712 |
cleaned_text = clean_text(transcript_text)
|