Spaces:
Runtime error
Runtime error
File size: 2,303 Bytes
bda77e6 4bcf60e bda77e6 49e4213 bda77e6 dd748bd bda77e6 90323cf bda77e6 90323cf bda77e6 90323cf bda77e6 90323cf bda77e6 92597d1 6dac9eb |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 |
#importing the necessary modules
import os
import urllib.request
import re
import time
import gradio as gr
#Creating a Gradio App Menu
def transcript_extract():
#specifying the YouTube channel URL
channel_url = gr.inputs.Textbox(label="Channel URL")
#accessing the webpage
page = urllib.request.urlopen(channel_url)
#reading the source code
data = page.read().decode("utf-8")
#creating a directory to save the transcripts
os.makedirs('Transcripts',exist_ok=True)
#finding the transcripts
transcript_links = re.findall(r'(\/watch\?v=[A-Za-z0-9_.-]*)', str(data))
#looping through each transcript to download
for link in transcript_links:
video_url = 'http://www.youtube.com'+link
#access the video page
video_page = urllib.request.urlopen(video_url)
#read the source code
video_data = video_page.read().decode("utf-8")
#find the transcript
transcript_link = re.findall(r'(\/timedtext_editor\?[A-Za-z0-9_.-]*)', str(video_data))
#check if there is a transcript available
if(len(transcript_link) > 0):
#access the transcript page
transcript_url ='http://www.youtube.com'+ transcript_link[0]
transcript_page = urllib.request.urlopen(transcript_url)
transcript_data = transcript_page.read().decode("utf-8")
#find the link to the transcript
transcript_download_link = re.findall(r'(\/api\/timedtext\?[A-Za-z0-9_.-]*)', str(transcript_data))
#check if the transcript is available for download
if(len(transcript_download_link) > 0):
#download the transcript
file_name = "Transcripts/" + link[9:] + ".xml"
download_url = 'http://www.youtube.com'+transcript_download_link[0]
urllib.request.urlretrieve(download_url, file_name)
print("Downloading transcript for video " + link[9:] + "...")
time.sleep(3)
else:
print("Transcript not available for video " + link[9:])
else:
print("Transcript not available for video " + link[9:])
#launch the gradio
gr.Interface(fn=transcript_extract, inputs="textbox", outputs="textbox", share=True).launch() |