from fastapi import FastAPI, HTTPException, Query from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled, NoTranscriptFound from urllib.parse import urlparse, parse_qs app = FastAPI() def extract_video_id(youtube_url: str) -> str: try: parsed_url = urlparse(youtube_url) video_id = parse_qs(parsed_url.query).get("v", [None])[0] if not video_id: raise ValueError("Invalid YouTube URL.") return video_id except Exception: raise HTTPException(status_code=400, detail="Invalid YouTube URL format.") @app.get("/transcript") def get_transcript(youtube_url: str = Query(..., description="Full YouTube video URL")): video_id = extract_video_id(youtube_url) try: transcript = YouTubeTranscriptApi.get_transcript(video_id) text_only = " ".join([entry["text"] for entry in transcript]) return { "video_id": video_id, "transcript": text_only } except TranscriptsDisabled: raise HTTPException(status_code=404, detail="Transcripts are disabled for this video.") except NoTranscriptFound: raise HTTPException(status_code=404, detail="No transcript found for this video.") except Exception as e: raise HTTPException(status_code=500, detail=str(e))