Final_Assignment_Template / youtube_utils.py
magdap116's picture
Upload 2 files
6530149 verified
raw
history blame contribute delete
828 Bytes
from youtube_transcript_api import YouTubeTranscriptApi
import re
def get_youtube_video_id(query):
try:
match = re.search(r'(?:youtu\.be/|youtube\.com/(?:watch\?v=|embed/|v/|shorts/))([\w-]{11})', query)
if match:
video_id = match.group(1)
print(video_id)
return video_id
except:
print("Did not find youtube video id from query ", query)
def fetch_transcript_english(video_id):
try:
ytt_api = YouTubeTranscriptApi()
transcript = ytt_api.fetch(video_id,languages=['en'])
return transcript
except:
print("Error ")
def post_process_transcript(transcript_snippets):
full_transcript = " ".join([transcript_snippet.text for transcript_snippet in transcript_snippets])
return full_transcript