Spaces:

sofzcc
/

multimodal-ai-chatbot-youtube-qa

Sleeping

sofzcc commited on Jul 31, 2024

Commit

a7c2258

verified ·

1 Parent(s): 3e5eaad

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -228,32 +228,17 @@ import time
 from pytube import YouTube
 import yt_dlp
-def download_video(url):
-    print(f"Attempting to download video from URL: {url}")
-    ydl_opts = {
-        'format': 'bestvideo+bestaudio/best',
-        'outtmpl': 'downloads/%(title)s.%(ext)s',
-    }
-    try:
-        with yt_dlp.YoutubeDL(ydl_opts) as ydl:
-            info_dict = ydl.extract_info(url, download=True)
-            print(f"Downloaded: {info_dict['title']}")
-            return info_dict['title']  # Or return other relevant info
-    except Exception as e:
-        print(f"Error: {e}")
-        raise
-def video_to_text(filename):
-    clip = VideoFileClip(filename)
-    audio_filename = filename[:-4] + ".mp3"
-    clip.audio.write_audiofile(audio_filename)
-    clip.close()
-    time.sleep(5)
     model = whisper.load_model("base")
-    result = model.transcribe(audio_filename)
     transcription = result["text"]
@@ -720,8 +705,8 @@ def process_video(url):
 #    transcript = YouTubeTranscriptApi.get_transcript(video_id)
 #    transcript_text = ' '.join([t['text'] for t in transcript])
-    video = download_video(url)
-    transcript_text = video_to_text(video)
     # Clean the transcript text
     cleaned_text = clean_text(transcript_text)

 from pytube import YouTube
 import yt_dlp
+def download_video_mp3(URL):
+    yt = YouTube(url, on_progress_callback = on_progress)
+    ys = yt.streams.get_audio_only()
+    file = ys.download(mp3=True)
+    return file
+def audio_to_text(filename):
     model = whisper.load_model("base")
+    result = model.transcribe(filename)
     transcription = result["text"]
 #    transcript = YouTubeTranscriptApi.get_transcript(video_id)
 #    transcript_text = ' '.join([t['text'] for t in transcript])
+    audio_file = download_video_mp3(url)
+    transcript_text = audio_to_text(audio_file)
     # Clean the transcript text
     cleaned_text = clean_text(transcript_text)