Bilalst commited on
Commit
90d5de6
0 Parent(s):

Duplicate from Bilalst/Gradio_Youtube_Transcript

Browse files
Files changed (5) hide show
  1. .gitattributes +35 -0
  2. README.md +13 -0
  3. app.py +118 -0
  4. apt.txt +4 -0
  5. requirements.txt +9 -0
.gitattributes ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Gradio Youtube Transcript
3
+ emoji: 🦀
4
+ colorFrom: green
5
+ colorTo: yellow
6
+ sdk: gradio
7
+ sdk_version: 3.35.2
8
+ app_file: app.py
9
+ pinned: false
10
+ duplicated_from: Bilalst/Gradio_Youtube_Transcript
11
+ ---
12
+
13
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,118 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import requests
3
+ from sentence_transformers import SentenceTransformer
4
+ from youtube_transcript_api import YouTubeTranscriptApi
5
+ import numpy as np
6
+ import huggingface_hub
7
+ import os
8
+ import faiss
9
+
10
+ # Set up SentenceTransformer
11
+ model = SentenceTransformer('all-mpnet-base-v2')
12
+
13
+
14
+
15
+ playlist_id = 'PLD4EAA8F8C9148A1B'
16
+ api_key = 'AIzaSyBGuTvXcnliEh6yhTxugrAVM5YzcG9qr9U'
17
+
18
+ # Make a request to the YouTube Data API to retrieve the playlist items
19
+ url = f'https://www.googleapis.com/youtube/v3/playlistItems?part=snippet&maxResults=50&playlistId={playlist_id}&key={api_key}'
20
+ video_ids = []
21
+
22
+ while True:
23
+ response = requests.get(url)
24
+ data = response.json()
25
+
26
+ # Extract the video IDs from the response
27
+ for item in data['items']:
28
+ video_ids.append(item['snippet']['resourceId']['videoId'])
29
+
30
+ # Check if there are more pages of results
31
+ if 'nextPageToken' in data:
32
+ next_page_token = data['nextPageToken']
33
+ url = f'https://www.googleapis.com/youtube/v3/playlistItems?part=snippet&maxResults=50&playlistId={playlist_id}&key={api_key}&pageToken={next_page_token}'
34
+ else:
35
+ break
36
+
37
+ # Empty lists to store transcripts and video IDs
38
+ transcripts = []
39
+ ids = []
40
+
41
+ for video_id in video_ids:
42
+ try:
43
+ transcript = YouTubeTranscriptApi.get_transcript(video_id)
44
+ transcript_text = ' '.join([t['text'] for t in transcript])
45
+ transcripts.append(transcript_text)
46
+ ids.append(video_id)
47
+
48
+ except Exception as e:
49
+ print(f"Error retrieving transcript for video {video_id}: {e}")
50
+ continue
51
+
52
+ # create sentence embeddings
53
+ sentence_embeddings = model.encode(transcripts)
54
+
55
+ # Set up FAISS
56
+ index = faiss.IndexFlatL2(768) # Create an index with L2 distance
57
+
58
+ # Convert list of embeddings to NumPy array
59
+ sentence_embeddings = np.array(sentence_embeddings)
60
+
61
+ # Add sentence embeddings to FAISS index
62
+ index.add(sentence_embeddings)
63
+
64
+
65
+ #---------------------------------------------
66
+
67
+ def get_video_links(input_text):
68
+ # Encode input text using SentenceTransformer
69
+ input_embedding = model.encode([input_text])[0]
70
+
71
+ # Perform nearest neighbor search in FAISS index
72
+ k = 15 # Number of nearest neighbors to retrieve
73
+ _, T = index.search(np.array([input_embedding]), k) # search
74
+
75
+ # Return the list of video links with thumbnails and titles as an HTML string
76
+ video_links = []
77
+ visited_ids = set()
78
+ for i in T[0]:
79
+ video_id = ids[i]
80
+ if video_id in visited_ids:
81
+ continue # Skip if the video_id has already been visited
82
+ visited_ids.add(video_id)
83
+
84
+ # Retrieve video details using YouTube Data API
85
+ video_info_url = f"https://www.googleapis.com/youtube/v3/videos?part=snippet&id={video_id}&key={api_key}"
86
+ response = requests.get(video_info_url)
87
+ data = response.json()
88
+ video_title = data['items'][0]['snippet']['title']
89
+ video_thumbnail = data['items'][0]['snippet']['thumbnails']['default']['url']
90
+
91
+ # Generate HTML code for the video link with thumbnail and title
92
+ video_link = f"https://www.youtube.com/watch?v={video_id}"
93
+ video_html = f'<a href="{video_link}" target="_blank"><img src="{video_thumbnail}"><br>{video_title}</a><br>'
94
+ video_links.append(video_html)
95
+
96
+ return ''.join(video_links)
97
+
98
+ # Create Gradio interface with "html" output type
99
+ iface = gr.Interface(fn=get_video_links, inputs=[gr.inputs.Textbox(label="Add what you are looking to find in Dr. Joe's testimonials!")], outputs="html", title="Dr. Joe Dispenza testimonials Search")
100
+
101
+
102
+
103
+ # Launch the Gradio interface on Hugging Face Spaces
104
+ if __name__ == '__main__':
105
+ iface.launch()
106
+
107
+
108
+
109
+
110
+
111
+
112
+
113
+
114
+
115
+
116
+
117
+
118
+
apt.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ build-essential
2
+ python3-dev
3
+ libomp-dev
4
+ swig
requirements.txt ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ Gradio
2
+ sentence-transformers
3
+ faiss-cpu
4
+ youtube-transcript-api
5
+ huggingface-hub
6
+ requests
7
+ numpy
8
+
9
+