Spaces:
Runtime error
Runtime error
| import gradio as gr | |
| import requests | |
| from sentence_transformers import SentenceTransformer | |
| from youtube_transcript_api import YouTubeTranscriptApi | |
| import numpy as np | |
| import huggingface_hub | |
| import os | |
| import faiss | |
| # Set up SentenceTransformer | |
| model = SentenceTransformer('paraphrase-MiniLM-L6-v2') | |
| playlist_id = 'PLD4EAA8F8C9148A1B' | |
| api_key = 'AIzaSyBGuTvXcnliEh6yhTxugrAVM5YzcG9qr9U' | |
| # Make a request to the YouTube Data API to retrieve the playlist items | |
| url = f'https://www.googleapis.com/youtube/v3/playlistItems?part=snippet&maxResults=50&playlistId={playlist_id}&key={api_key}' | |
| video_ids = [] | |
| while True: | |
| response = requests.get(url) | |
| data = response.json() | |
| # Extract the video IDs from the response | |
| for item in data['items']: | |
| video_ids.append(item['snippet']['resourceId']['videoId']) | |
| # Check if there are more pages of results | |
| if 'nextPageToken' in data: | |
| next_page_token = data['nextPageToken'] | |
| url = f'https://www.googleapis.com/youtube/v3/playlistItems?part=snippet&maxResults=50&playlistId={playlist_id}&key={api_key}&pageToken={next_page_token}' | |
| else: | |
| break | |
| # Empty lists to store transcripts and video IDs | |
| transcripts = [] | |
| ids = [] | |
| for video_id in video_ids: | |
| try: | |
| transcript = YouTubeTranscriptApi.get_transcript(video_id) | |
| transcript_text = ' '.join([t['text'] for t in transcript]) | |
| transcripts.append(transcript_text) | |
| ids.append(video_id) | |
| except Exception as e: | |
| print(f"Error retrieving transcript for video {video_id}: {e}") | |
| continue | |
| # create sentence embeddings | |
| sentence_embeddings = model.encode(transcripts) | |
| # Set up FAISS | |
| index = faiss.IndexFlatL2(384) | |
| # Convert list of embeddings to NumPy array | |
| sentence_embeddings = np.array(sentence_embeddings) | |
| # Add sentence embeddings to FAISS index | |
| index.add(sentence_embeddings) | |
| #--------------------------------------------- | |
| def get_video_links(input_text): | |
| # Encode input text using SentenceTransformer | |
| input_embedding = model.encode([input_text])[0] | |
| # Perform nearest neighbor search in FAISS index | |
| k = 15 # Number of nearest neighbors to retrieve | |
| _, T = index.search(np.array([input_embedding]), k) # search | |
| # Return the list of video links with thumbnails and titles as an HTML string | |
| video_links = [] | |
| visited_ids = set() | |
| for i in T[0]: | |
| video_id = ids[i] | |
| if video_id in visited_ids: | |
| continue # Skip if the video_id has already been visited | |
| visited_ids.add(video_id) | |
| # Retrieve video details using YouTube Data API | |
| video_info_url = f"https://www.googleapis.com/youtube/v3/videos?part=snippet&id={video_id}&key={api_key}" | |
| response = requests.get(video_info_url) | |
| data = response.json() | |
| video_title = data['items'][0]['snippet']['title'] | |
| video_thumbnail = data['items'][0]['snippet']['thumbnails']['default']['url'] | |
| # Generate HTML code for the video link with thumbnail and title | |
| video_link = f"https://www.youtube.com/watch?v={video_id}" | |
| video_html = f'<a href="{video_link}" target="_blank"><img src="{video_thumbnail}"><br>{video_title}</a><br>' | |
| video_links.append(video_html) | |
| return ''.join(video_links) | |
| # Create Gradio interface with "html" output type | |
| iface = gr.Interface(fn=get_video_links, inputs=[gr.inputs.Textbox(label="Add what you are looking to find in Dr. Joe's testimonials!")], outputs="html", title="Dr. Joe Dispenza testimonials Search") | |
| # Launch the Gradio interface on Hugging Face Spaces | |
| if __name__ == '__main__': | |
| iface.launch() | |