import gradio as gr import requests from sentence_transformers import SentenceTransformer from youtube_transcript_api import YouTubeTranscriptApi import numpy as np import huggingface_hub import os import faiss # Set up SentenceTransformer model = SentenceTransformer('paraphrase-MiniLM-L6-v2') playlist_id = 'PLD4EAA8F8C9148A1B' api_key = 'AIzaSyBGuTvXcnliEh6yhTxugrAVM5YzcG9qr9U' # Make a request to the YouTube Data API to retrieve the playlist items url = f'https://www.googleapis.com/youtube/v3/playlistItems?part=snippet&maxResults=50&playlistId={playlist_id}&key={api_key}' video_ids = [] while True: response = requests.get(url) data = response.json() # Extract the video IDs from the response for item in data['items']: video_ids.append(item['snippet']['resourceId']['videoId']) # Check if there are more pages of results if 'nextPageToken' in data: next_page_token = data['nextPageToken'] url = f'https://www.googleapis.com/youtube/v3/playlistItems?part=snippet&maxResults=50&playlistId={playlist_id}&key={api_key}&pageToken={next_page_token}' else: break # Empty lists to store transcripts and video IDs transcripts = [] ids = [] for video_id in video_ids: try: transcript = YouTubeTranscriptApi.get_transcript(video_id) transcript_text = ' '.join([t['text'] for t in transcript]) transcripts.append(transcript_text) ids.append(video_id) except Exception as e: print(f"Error retrieving transcript for video {video_id}: {e}") continue # create sentence embeddings sentence_embeddings = model.encode(transcripts) # Set up FAISS index = faiss.IndexFlatL2(384) # Convert list of embeddings to NumPy array sentence_embeddings = np.array(sentence_embeddings) # Add sentence embeddings to FAISS index index.add(sentence_embeddings) #--------------------------------------------- def get_video_links(input_text): # Encode input text using SentenceTransformer input_embedding = model.encode([input_text])[0] # Perform nearest neighbor search in FAISS index k = 15 # Number of nearest neighbors to retrieve _, T = index.search(np.array([input_embedding]), k) # search # Return the list of video links with thumbnails and titles as an HTML string video_links = [] visited_ids = set() for i in T[0]: video_id = ids[i] if video_id in visited_ids: continue # Skip if the video_id has already been visited visited_ids.add(video_id) # Retrieve video details using YouTube Data API video_info_url = f"https://www.googleapis.com/youtube/v3/videos?part=snippet&id={video_id}&key={api_key}" response = requests.get(video_info_url) data = response.json() video_title = data['items'][0]['snippet']['title'] video_thumbnail = data['items'][0]['snippet']['thumbnails']['default']['url'] # Generate HTML code for the video link with thumbnail and title video_link = f"https://www.youtube.com/watch?v={video_id}" video_html = f'
{video_title}

' video_links.append(video_html) return ''.join(video_links) # Create Gradio interface with "html" output type iface = gr.Interface(fn=get_video_links, inputs=[gr.inputs.Textbox(label="Add what you are looking to find in Dr. Joe's testimonials!")], outputs="html", title="Dr. Joe Dispenza testimonials Search") # Launch the Gradio interface on Hugging Face Spaces if __name__ == '__main__': iface.launch()