from typing import Union import re from youtube_transcript_api import YouTubeTranscriptApi from youtube_transcript_api.formatters import TextFormatter from transformers import pipeline from fastapi import FastAPI, Query from fastapi.staticfiles import StaticFiles from fastapi.responses import FileResponse app = FastAPI() text_summary = pipeline("summarization", model="Falconsai/text_summarization", max_length=12000) def extract_video_id(url: str) -> Union[str, None]: regex = r"(?:youtube\.com\/(?:[^\/\n\s]+\/\S+\/|(?:v|e(?:mbed)?)\/|\S*?[?&]v=)|youtu\.be\/)([a-zA-Z0-9_-]{11})" match = re.search(regex, url) if match: return match.group(1) return None def get_youtube_transcript(video_url: str) -> Union[str, None]: video_id = extract_video_id(video_url) if not video_id: return "Video ID could not be extracted." try: transcript = YouTubeTranscriptApi.get_transcript(video_id) formatter = TextFormatter() text_transcript = formatter.format_transcript(transcript) return text_transcript except Exception as e: return f"An error occurred: {e}" def summarize_text(text: str) -> str: summarized_text = text_summary(text) return summarized_text[0]['summary_text'] # Get the input from the frontend @app.get("/getdata") def get_data(input: str = Query(..., title="YouTube Video URL")) -> dict: transcript = get_youtube_transcript(input) if transcript: summary = summarize_text(transcript) return {"summary": summary} else: return {"error": "Failed to get transcript from the YouTube video."} app.mount('/', StaticFiles(directory="static", html=True), name="static") @app.get('/') def index() -> FileResponse: return FileResponse('/app/static/index.html', media_type="text/html")