Spaces:
Runtime error
Runtime error
Bilalst
commited on
Commit
•
90d5de6
0
Parent(s):
Duplicate from Bilalst/Gradio_Youtube_Transcript
Browse files- .gitattributes +35 -0
- README.md +13 -0
- app.py +118 -0
- apt.txt +4 -0
- requirements.txt +9 -0
.gitattributes
ADDED
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
*.7z filter=lfs diff=lfs merge=lfs -text
|
2 |
+
*.arrow filter=lfs diff=lfs merge=lfs -text
|
3 |
+
*.bin filter=lfs diff=lfs merge=lfs -text
|
4 |
+
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
5 |
+
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
6 |
+
*.ftz filter=lfs diff=lfs merge=lfs -text
|
7 |
+
*.gz filter=lfs diff=lfs merge=lfs -text
|
8 |
+
*.h5 filter=lfs diff=lfs merge=lfs -text
|
9 |
+
*.joblib filter=lfs diff=lfs merge=lfs -text
|
10 |
+
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
11 |
+
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
12 |
+
*.model filter=lfs diff=lfs merge=lfs -text
|
13 |
+
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
14 |
+
*.npy filter=lfs diff=lfs merge=lfs -text
|
15 |
+
*.npz filter=lfs diff=lfs merge=lfs -text
|
16 |
+
*.onnx filter=lfs diff=lfs merge=lfs -text
|
17 |
+
*.ot filter=lfs diff=lfs merge=lfs -text
|
18 |
+
*.parquet filter=lfs diff=lfs merge=lfs -text
|
19 |
+
*.pb filter=lfs diff=lfs merge=lfs -text
|
20 |
+
*.pickle filter=lfs diff=lfs merge=lfs -text
|
21 |
+
*.pkl filter=lfs diff=lfs merge=lfs -text
|
22 |
+
*.pt filter=lfs diff=lfs merge=lfs -text
|
23 |
+
*.pth filter=lfs diff=lfs merge=lfs -text
|
24 |
+
*.rar filter=lfs diff=lfs merge=lfs -text
|
25 |
+
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
26 |
+
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
27 |
+
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
28 |
+
*.tar filter=lfs diff=lfs merge=lfs -text
|
29 |
+
*.tflite filter=lfs diff=lfs merge=lfs -text
|
30 |
+
*.tgz filter=lfs diff=lfs merge=lfs -text
|
31 |
+
*.wasm filter=lfs diff=lfs merge=lfs -text
|
32 |
+
*.xz filter=lfs diff=lfs merge=lfs -text
|
33 |
+
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
+
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
+
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
README.md
ADDED
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
title: Gradio Youtube Transcript
|
3 |
+
emoji: 🦀
|
4 |
+
colorFrom: green
|
5 |
+
colorTo: yellow
|
6 |
+
sdk: gradio
|
7 |
+
sdk_version: 3.35.2
|
8 |
+
app_file: app.py
|
9 |
+
pinned: false
|
10 |
+
duplicated_from: Bilalst/Gradio_Youtube_Transcript
|
11 |
+
---
|
12 |
+
|
13 |
+
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
app.py
ADDED
@@ -0,0 +1,118 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
import requests
|
3 |
+
from sentence_transformers import SentenceTransformer
|
4 |
+
from youtube_transcript_api import YouTubeTranscriptApi
|
5 |
+
import numpy as np
|
6 |
+
import huggingface_hub
|
7 |
+
import os
|
8 |
+
import faiss
|
9 |
+
|
10 |
+
# Set up SentenceTransformer
|
11 |
+
model = SentenceTransformer('all-mpnet-base-v2')
|
12 |
+
|
13 |
+
|
14 |
+
|
15 |
+
playlist_id = 'PLD4EAA8F8C9148A1B'
|
16 |
+
api_key = 'AIzaSyBGuTvXcnliEh6yhTxugrAVM5YzcG9qr9U'
|
17 |
+
|
18 |
+
# Make a request to the YouTube Data API to retrieve the playlist items
|
19 |
+
url = f'https://www.googleapis.com/youtube/v3/playlistItems?part=snippet&maxResults=50&playlistId={playlist_id}&key={api_key}'
|
20 |
+
video_ids = []
|
21 |
+
|
22 |
+
while True:
|
23 |
+
response = requests.get(url)
|
24 |
+
data = response.json()
|
25 |
+
|
26 |
+
# Extract the video IDs from the response
|
27 |
+
for item in data['items']:
|
28 |
+
video_ids.append(item['snippet']['resourceId']['videoId'])
|
29 |
+
|
30 |
+
# Check if there are more pages of results
|
31 |
+
if 'nextPageToken' in data:
|
32 |
+
next_page_token = data['nextPageToken']
|
33 |
+
url = f'https://www.googleapis.com/youtube/v3/playlistItems?part=snippet&maxResults=50&playlistId={playlist_id}&key={api_key}&pageToken={next_page_token}'
|
34 |
+
else:
|
35 |
+
break
|
36 |
+
|
37 |
+
# Empty lists to store transcripts and video IDs
|
38 |
+
transcripts = []
|
39 |
+
ids = []
|
40 |
+
|
41 |
+
for video_id in video_ids:
|
42 |
+
try:
|
43 |
+
transcript = YouTubeTranscriptApi.get_transcript(video_id)
|
44 |
+
transcript_text = ' '.join([t['text'] for t in transcript])
|
45 |
+
transcripts.append(transcript_text)
|
46 |
+
ids.append(video_id)
|
47 |
+
|
48 |
+
except Exception as e:
|
49 |
+
print(f"Error retrieving transcript for video {video_id}: {e}")
|
50 |
+
continue
|
51 |
+
|
52 |
+
# create sentence embeddings
|
53 |
+
sentence_embeddings = model.encode(transcripts)
|
54 |
+
|
55 |
+
# Set up FAISS
|
56 |
+
index = faiss.IndexFlatL2(768) # Create an index with L2 distance
|
57 |
+
|
58 |
+
# Convert list of embeddings to NumPy array
|
59 |
+
sentence_embeddings = np.array(sentence_embeddings)
|
60 |
+
|
61 |
+
# Add sentence embeddings to FAISS index
|
62 |
+
index.add(sentence_embeddings)
|
63 |
+
|
64 |
+
|
65 |
+
#---------------------------------------------
|
66 |
+
|
67 |
+
def get_video_links(input_text):
|
68 |
+
# Encode input text using SentenceTransformer
|
69 |
+
input_embedding = model.encode([input_text])[0]
|
70 |
+
|
71 |
+
# Perform nearest neighbor search in FAISS index
|
72 |
+
k = 15 # Number of nearest neighbors to retrieve
|
73 |
+
_, T = index.search(np.array([input_embedding]), k) # search
|
74 |
+
|
75 |
+
# Return the list of video links with thumbnails and titles as an HTML string
|
76 |
+
video_links = []
|
77 |
+
visited_ids = set()
|
78 |
+
for i in T[0]:
|
79 |
+
video_id = ids[i]
|
80 |
+
if video_id in visited_ids:
|
81 |
+
continue # Skip if the video_id has already been visited
|
82 |
+
visited_ids.add(video_id)
|
83 |
+
|
84 |
+
# Retrieve video details using YouTube Data API
|
85 |
+
video_info_url = f"https://www.googleapis.com/youtube/v3/videos?part=snippet&id={video_id}&key={api_key}"
|
86 |
+
response = requests.get(video_info_url)
|
87 |
+
data = response.json()
|
88 |
+
video_title = data['items'][0]['snippet']['title']
|
89 |
+
video_thumbnail = data['items'][0]['snippet']['thumbnails']['default']['url']
|
90 |
+
|
91 |
+
# Generate HTML code for the video link with thumbnail and title
|
92 |
+
video_link = f"https://www.youtube.com/watch?v={video_id}"
|
93 |
+
video_html = f'<a href="{video_link}" target="_blank"><img src="{video_thumbnail}"><br>{video_title}</a><br>'
|
94 |
+
video_links.append(video_html)
|
95 |
+
|
96 |
+
return ''.join(video_links)
|
97 |
+
|
98 |
+
# Create Gradio interface with "html" output type
|
99 |
+
iface = gr.Interface(fn=get_video_links, inputs=[gr.inputs.Textbox(label="Add what you are looking to find in Dr. Joe's testimonials!")], outputs="html", title="Dr. Joe Dispenza testimonials Search")
|
100 |
+
|
101 |
+
|
102 |
+
|
103 |
+
# Launch the Gradio interface on Hugging Face Spaces
|
104 |
+
if __name__ == '__main__':
|
105 |
+
iface.launch()
|
106 |
+
|
107 |
+
|
108 |
+
|
109 |
+
|
110 |
+
|
111 |
+
|
112 |
+
|
113 |
+
|
114 |
+
|
115 |
+
|
116 |
+
|
117 |
+
|
118 |
+
|
apt.txt
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
build-essential
|
2 |
+
python3-dev
|
3 |
+
libomp-dev
|
4 |
+
swig
|
requirements.txt
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
Gradio
|
2 |
+
sentence-transformers
|
3 |
+
faiss-cpu
|
4 |
+
youtube-transcript-api
|
5 |
+
huggingface-hub
|
6 |
+
requests
|
7 |
+
numpy
|
8 |
+
|
9 |
+
|