Spaces:
Runtime error
Runtime error
import gradio as gr | |
import requests | |
from sentence_transformers import SentenceTransformer | |
from youtube_transcript_api import YouTubeTranscriptApi | |
import numpy as np | |
import huggingface_hub | |
import os | |
import faiss | |
# Set up SentenceTransformer | |
model = SentenceTransformer('paraphrase-MiniLM-L6-v2') | |
playlist_id = 'PLD4EAA8F8C9148A1B' | |
api_key = 'AIzaSyBGuTvXcnliEh6yhTxugrAVM5YzcG9qr9U' | |
# Make a request to the YouTube Data API to retrieve the playlist items | |
url = f'https://www.googleapis.com/youtube/v3/playlistItems?part=snippet&maxResults=50&playlistId={playlist_id}&key={api_key}' | |
video_ids = [] | |
while True: | |
response = requests.get(url) | |
data = response.json() | |
# Extract the video IDs from the response | |
for item in data['items']: | |
video_ids.append(item['snippet']['resourceId']['videoId']) | |
# Check if there are more pages of results | |
if 'nextPageToken' in data: | |
next_page_token = data['nextPageToken'] | |
url = f'https://www.googleapis.com/youtube/v3/playlistItems?part=snippet&maxResults=50&playlistId={playlist_id}&key={api_key}&pageToken={next_page_token}' | |
else: | |
break | |
# Empty lists to store transcripts and video IDs | |
transcripts = [] | |
ids = [] | |
for video_id in video_ids: | |
try: | |
transcript = YouTubeTranscriptApi.get_transcript(video_id) | |
transcript_text = ' '.join([t['text'] for t in transcript]) | |
transcripts.append(transcript_text) | |
ids.append(video_id) | |
except Exception as e: | |
print(f"Error retrieving transcript for video {video_id}: {e}") | |
continue | |
# create sentence embeddings | |
sentence_embeddings = model.encode(transcripts) | |
# Set up FAISS | |
index = faiss.IndexFlatL2(384) | |
# Convert list of embeddings to NumPy array | |
sentence_embeddings = np.array(sentence_embeddings) | |
# Add sentence embeddings to FAISS index | |
index.add(sentence_embeddings) | |
#--------------------------------------------- | |
def get_video_links(input_text): | |
# Encode input text using SentenceTransformer | |
input_embedding = model.encode([input_text])[0] | |
# Perform nearest neighbor search in FAISS index | |
k = 15 # Number of nearest neighbors to retrieve | |
_, T = index.search(np.array([input_embedding]), k) # search | |
# Return the list of video links with thumbnails and titles as an HTML string | |
video_links = [] | |
visited_ids = set() | |
for i in T[0]: | |
video_id = ids[i] | |
if video_id in visited_ids: | |
continue # Skip if the video_id has already been visited | |
visited_ids.add(video_id) | |
# Retrieve video details using YouTube Data API | |
video_info_url = f"https://www.googleapis.com/youtube/v3/videos?part=snippet&id={video_id}&key={api_key}" | |
response = requests.get(video_info_url) | |
data = response.json() | |
video_title = data['items'][0]['snippet']['title'] | |
video_thumbnail = data['items'][0]['snippet']['thumbnails']['default']['url'] | |
# Generate HTML code for the video link with thumbnail and title | |
video_link = f"https://www.youtube.com/watch?v={video_id}" | |
video_html = f'<a href="{video_link}" target="_blank"><img src="{video_thumbnail}"><br>{video_title}</a><br>' | |
video_links.append(video_html) | |
return ''.join(video_links) | |
# Create Gradio interface with "html" output type | |
iface = gr.Interface(fn=get_video_links, inputs=[gr.inputs.Textbox(label="Add what you are looking to find in Dr. Joe's testimonials!")], outputs="html", title="Dr. Joe Dispenza testimonials Search") | |
# Launch the Gradio interface on Hugging Face Spaces | |
if __name__ == '__main__': | |
iface.launch() | |