Gradio_Youtube_Transcript_v2

Runtime error

File size: 3,629 Bytes

import gradio as gr
import requests
from sentence_transformers import SentenceTransformer
from youtube_transcript_api import YouTubeTranscriptApi
import numpy as np
import huggingface_hub
import os
import faiss

# Set up SentenceTransformer
model = SentenceTransformer('paraphrase-MiniLM-L6-v2')


playlist_id = 'PLD4EAA8F8C9148A1B'
api_key = 'AIzaSyBGuTvXcnliEh6yhTxugrAVM5YzcG9qr9U'

# Make a request to the YouTube Data API to retrieve the playlist items
url = f'https://www.googleapis.com/youtube/v3/playlistItems?part=snippet&maxResults=50&playlistId={playlist_id}&key={api_key}'
video_ids = []

while True:
    response = requests.get(url)
    data = response.json()

    # Extract the video IDs from the response
    for item in data['items']:
        video_ids.append(item['snippet']['resourceId']['videoId'])

    # Check if there are more pages of results
    if 'nextPageToken' in data:
        next_page_token = data['nextPageToken']
        url = f'https://www.googleapis.com/youtube/v3/playlistItems?part=snippet&maxResults=50&playlistId={playlist_id}&key={api_key}&pageToken={next_page_token}'
    else:
        break

# Empty lists to store transcripts and video IDs
transcripts = []
ids = []

for video_id in video_ids:
    try:
        transcript = YouTubeTranscriptApi.get_transcript(video_id)
        transcript_text = ' '.join([t['text'] for t in transcript])
        transcripts.append(transcript_text)
        ids.append(video_id)

    except Exception as e:
        print(f"Error retrieving transcript for video {video_id}: {e}")
        continue

# create sentence embeddings
sentence_embeddings = model.encode(transcripts)

# Set up FAISS
index = faiss.IndexFlatL2(384)
# Convert list of embeddings to NumPy array
sentence_embeddings = np.array(sentence_embeddings)

# Add sentence embeddings to FAISS index
index.add(sentence_embeddings)


#---------------------------------------------

def get_video_links(input_text):
    # Encode input text using SentenceTransformer
    input_embedding = model.encode([input_text])[0]

    # Perform nearest neighbor search in FAISS index
    k = 15  # Number of nearest neighbors to retrieve
    _, T = index.search(np.array([input_embedding]), k)  # search

    # Return the list of video links with thumbnails and titles as an HTML string
    video_links = []
    visited_ids = set()
    for i in T[0]:
        video_id = ids[i]
        if video_id in visited_ids:
            continue  # Skip if the video_id has already been visited
        visited_ids.add(video_id)

        # Retrieve video details using YouTube Data API
        video_info_url = f"https://www.googleapis.com/youtube/v3/videos?part=snippet&id={video_id}&key={api_key}"
        response = requests.get(video_info_url)
        data = response.json()
        video_title = data['items'][0]['snippet']['title']
        video_thumbnail = data['items'][0]['snippet']['thumbnails']['default']['url']

        # Generate HTML code for the video link with thumbnail and title
        video_link = f"https://www.youtube.com/watch?v={video_id}"
        video_html = f'<a href="{video_link}" target="_blank"><img src="{video_thumbnail}"><br>{video_title}</a><br>'
        video_links.append(video_html)

    return ''.join(video_links)

# Create Gradio interface with "html" output type
iface = gr.Interface(fn=get_video_links, inputs=[gr.inputs.Textbox(label="Add what you are looking to find in Dr. Joe's testimonials!")], outputs="html", title="Dr. Joe Dispenza testimonials Search")



# Launch the Gradio interface on Hugging Face Spaces
if __name__ == '__main__':
    iface.launch()