Spaces:
Sleeping
Sleeping
import gradio as gr | |
from youtube_transcript_api import YouTubeTranscriptApi | |
def get_youtube_transcript(video_id: str) -> str: | |
""" | |
Fetches and formats the transcript for a given YouTube video ID. | |
Args: | |
video_id (str): The YouTube video ID (e.g., 'dQw4w9WgXcQ'). | |
Returns: | |
str: A formatted string containing the full transcript and detailed segments, | |
or an error message if the transcript cannot be retrieved. | |
""" | |
if not video_id: | |
return "Please enter a YouTube video ID." | |
try: | |
# Fetch the transcript list | |
transcript_list = YouTubeTranscriptApi.get_transcript(video_id) | |
# Extract full transcript text | |
full_transcript_text = " ".join([item['text'] for item in transcript_list]) | |
# Prepare detailed transcript segments | |
detailed_segments = [] | |
for segment in transcript_list: | |
detailed_segments.append( | |
f"Start: {segment['start']:.2f}s, Duration: {segment['duration']:.2f}s, Text: {segment['text']}" | |
) | |
# Combine results into a single string for Gradio output | |
# Corrected approach: Join segments first, then format the output string | |
detailed_segments_str = "\n".join(detailed_segments) | |
output = ( | |
"Full Transcript:\n" | |
f"{full_transcript_text}\n\n" | |
"Detailed Transcript Segments:\n" | |
f"{detailed_segments_str}" | |
) | |
return output | |
except Exception as e: | |
# Handle potential errors during transcript fetching | |
error_message = ( | |
f"An error occurred: {e}\n" | |
"Possible reasons: No transcript available for this video, " | |
"invalid video ID, or network issues. " | |
"Please ensure the video ID is correct and the video has captions enabled." | |
) | |
return error_message | |
# Create the Gradio interface | |
iface = gr.Interface( | |
fn=get_youtube_transcript, | |
inputs=gr.Textbox( | |
label="YouTube Video ID", | |
placeholder="e.g., dQw4w9WgXcQ (from youtube.com/watch?v=dQw4w9WgXcQ)" | |
), | |
outputs=gr.Textbox( | |
label="Transcript Output", | |
lines=20, # Adjust number of lines for better display of long transcripts | |
interactive=False # Make the output box read-only | |
), | |
title="YouTube Transcript Fetcher", | |
description="Enter a YouTube video ID to get its full transcript and detailed segments." | |
) | |
# Launch the Gradio app | |
if __name__ == "__main__": | |
iface.launch() |