VideoScriptMaster / pages /2_MultipleTranscript.py
eaglelandsonce's picture
Update pages/2_MultipleTranscript.py
df78bc9 verified
raw
history blame
1.84 kB
import streamlit as st
from youtube_transcript_api import YouTubeTranscriptApi as yta
import re
def extract_video_id(youtube_url):
"""
Extracts the video ID from a YouTube URL.
"""
if "youtube.com/watch" not in youtube_url:
st.error("Invalid YouTube URL")
return None
index_v = youtube_url.find('v=')
if index_v == -1:
st.error("Video ID not found in the URL")
return None
video_id_start = index_v + 2
video_id_end = youtube_url.find('&', video_id_start)
video_id = youtube_url[video_id_start:video_id_end] if video_id_end != -1 else youtube_url[video_id_start:]
return video_id
def transcribe_videos(youtube_urls):
"""
Fetches and cleans the transcriptions of given YouTube videos.
"""
all_transcripts = []
for url in youtube_urls.split(','):
url = url.strip()
video_id = extract_video_id(url)
if video_id is None:
continue
try:
transcript = yta.get_transcript(video_id, languages=('us', 'en'))
cleaned_text = [re.sub(r"[^a-zA-Z0-9-1sgçiISGÖÜçï ]", "", t['text']) for t in transcript]
all_transcripts.append("\n".join(cleaned_text))
except Exception as e:
all_transcripts.append(f"Failed to transcribe {url}: {str(e)}")
return "\n\n---\n\n".join(all_transcripts)
# Streamlit interface
st.title("YouTube Multiple-Video Transcription Extractor")
youtube_urls = st.text_input("Enter YouTube Video URLs separated by commas", placeholder="Example: https://www.youtube.com/watch?v=MnDudvCyWpc")
if st.button("Transcribe Videos"):
result = transcribe_videos(youtube_urls)
st.text_area("Transcriptions:", value=result, height=300)
st.download_button("Download Transcripts", data=result, file_name="transcripts.txt", mime="text/plain")