youtube_video_similarity / utils /helper_funcs.py
aapot
Add gradio error handling for incorrect video urls
25bd6d3
import itertools
import random
import requests
import pandas as pd
import gradio as gr
from pytube import YouTube
from youtube_transcript_api import YouTubeTranscriptApi
from youtube_transcript_api.formatters import TextFormatter
def is_youtube_video_available(url):
video = YouTube(url)
try:
video.title
return True
except:
return False
def get_example_videos(rr_examples_url, num_rr_examples):
example_videos = [['https://www.youtube.com/watch?v=WfVF-Ec4naQ', 'https://www.youtube.com/watch?v=4hrNt28t7Cw'],
['https://www.youtube.com/watch?v=GbpjLP-UvIU',
'https://www.youtube.com/watch?v=BlQ2mP2EE4A'],
['https://www.youtube.com/watch?v=fdzY1f2P91k',
'https://www.youtube.com/watch?v=BlQ2mP2EE4A'],
['https://www.youtube.com/watch?v=fdzY1f2P91k', 'https://www.youtube.com/watch?v=9gIVGJQ3xWE']]
example_videos = [ex for ex in example_videos if is_youtube_video_available(
ex[0]) and is_youtube_video_available(ex[1])]
try:
example_videos_rr = requests.get(rr_examples_url).json()
except:
example_videos_rr = []
example_videos_rr = [[f'https://www.youtube.com/watch?v={ex["rejected_video_id"]}',
f'https://www.youtube.com/watch?v={ex["recommendation_id"]}'] for ex in example_videos_rr]
# remove duplicate video pairs, there seems to be one duplicate
example_videos_rr.sort()
example_videos_rr = list(example_videos_rr for example_videos_rr,
_ in itertools.groupby(example_videos_rr))
example_videos_rr = [ex for ex in example_videos_rr if is_youtube_video_available(
ex[0]) and is_youtube_video_available(ex[1])]
if len(example_videos_rr) > num_rr_examples:
example_videos_rr = random.sample(example_videos_rr, num_rr_examples)
return example_videos, example_videos_rr
def get_youtube_embedded_html(embed_url, video_position):
return f'''
<p>Video {video_position}</p>
<iframe width="100%" height="360px" src="{embed_url}" title="YouTube video player" frameborder="0" allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture; fullscreen" allowfullscreen></iframe>
'''
def update_youtube_embedded_html(video_url, video_position):
try:
embed_url = YouTube(video_url).embed_url
except:
return f'''
<p>There was error in fetching details for video with the URL: {video_url}</p>
'''
return get_youtube_embedded_html(embed_url, video_position)
def get_youtube_video_data(url):
try:
video = YouTube(url)
except:
raise gr.Error(f'Could not find YouTube video with the URL {url}')
channel_id = video.channel_id
video_title = video.title
video_description = video.description
try:
transcript_list = YouTubeTranscriptApi.list_transcripts(video.video_id)
except:
return channel_id, video_title, video_description, None
available_non_common_langs = [tr.language_code for tr in list(
transcript_list) if tr.language_code not in ['en', 'en-US', 'es', 'de']]
video_transcript = YouTubeTranscriptApi.get_transcript(
video.video_id, languages=['en', 'en-US', 'es', 'de'] + available_non_common_langs)
video_transcript = TextFormatter().format_transcript(
video_transcript).replace('\n', ' ')
return channel_id, video_title, video_description, video_transcript
def get_input_data_df(video1_url, video2_url):
channel_id, video_title, video_description, video_transcript = get_youtube_video_data(
video1_url)
channel_id2, video_title2, video_description2, video_transcript2 = get_youtube_video_data(
video2_url)
channel_sim = 1 if channel_id == channel_id2 else 0
df = pd.DataFrame([[video_title, video_description, video_transcript] + [video_title2, video_description2, video_transcript2] + [channel_sim]], columns=[
'regret_title', 'regret_description', 'regret_transcript', 'recommendation_title', 'recommendation_description', 'recommendation_transcript', 'channel_sim'])
return df