Spaces:
Sleeping
Sleeping
File size: 1,706 Bytes
a0792c3 09cc673 a0792c3 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 |
import streamlit as st
from youtube_transcript_api import YouTubeTranscriptApi as yta
import re
def extract_video_id(youtube_url):
"""
Extracts the video ID from a YouTube URL.
"""
if "youtube.com/watch" not in youtube_url:
st.error("Invalid YouTube URL")
return None
index_v = youtube_url.find('v=')
if index_v == -1:
st.error("Video ID not found in the URL")
return None
video_id_start = index_v + 2
video_id_end = youtube_url.find('&', video_id_start)
video_id = youtube_url[video_id_start:video_id_end] if video_id_end != -1 else youtube_url[video_id_start:]
return video_id
def transcribe_video(youtube_url):
"""
Fetches and cleans the transcription of a given YouTube video.
"""
video_id = extract_video_id(youtube_url)
if video_id is None:
return "Invalid YouTube URL"
try:
transcript = yta.get_transcript(video_id, languages=('us', 'en'))
data1 = [t['text'] for t in transcript]
data2 = [re.sub(r"[^a-zA-Z0-9-1sg��çiISGÖÜçï ]", "", line) for line in data1]
return "\n".join(data2)
except Exception as e:
return str(e)
# Streamlit interface
st.title("YouTube Video Transcription Extractor")
youtube_url = st.text_input("Enter the YouTube Video URL", placeholder="Example: https://www.youtube.com/watch?v=MnDudvCyWpc")
if youtube_url:
video_id = extract_video_id(youtube_url)
if video_id:
video_embed_url = f"https://www.youtube.com/embed/{video_id}"
st.video(video_embed_url)
if st.button("Transcribe Video"):
result = transcribe_video(youtube_url)
st.text_area("Transcription:", value=result, height=300)
|