|
from youtube_transcript_api import YouTubeTranscriptApi
|
|
import re
|
|
|
|
def get_youtube_video_id(query):
|
|
try:
|
|
match = re.search(r'(?:youtu\.be/|youtube\.com/(?:watch\?v=|embed/|v/|shorts/))([\w-]{11})', query)
|
|
if match:
|
|
video_id = match.group(1)
|
|
print(video_id)
|
|
return video_id
|
|
except:
|
|
print("Did not find youtube video id from query ", query)
|
|
|
|
def fetch_transcript_english(video_id):
|
|
try:
|
|
ytt_api = YouTubeTranscriptApi()
|
|
transcript = ytt_api.fetch(video_id,languages=['en'])
|
|
return transcript
|
|
except:
|
|
print("Error ")
|
|
|
|
def post_process_transcript(transcript_snippets):
|
|
full_transcript = " ".join([transcript_snippet.text for transcript_snippet in transcript_snippets])
|
|
return full_transcript
|
|
|