from typing import Union import re from youtube_transcript_api import YouTubeTranscriptApi from youtube_transcript_api.formatters import TextFormatter from transformers import pipeline from fastapi import FastAPI, Query from fastapi.staticfiles import StaticFiles from fastapi.responses import FileResponse app = FastAPI() text_summary = pipeline("summarization", model="Falconsai/text_summarization") def extract_video_id(url): regex = r"(?:youtube\.com\/(?:[^\/\n\s]+\/\S+\/|(?:v|e(?:mbed)?)\/|\S*?[?&]v=)|youtu\.be\/)([a-zA-Z0-9_-]{11})" match = re.search(regex, url) if match: return match.group(1) return None def get_youtube_transcript(video_url): video_id = extract_video_id(video_url) if not video_id: return "Video ID could not be extracted." try: transcript = YouTubeTranscriptApi.get_transcript(video_id) formatter = TextFormatter() text_transcript = formatter.format_transcript(transcript) return text_transcript except Exception as e: return f"An error occurred: {e}" # Get the input from the frontend @app.get("/getdata") def get_data(input): print(input) transcript = get_youtube_transcript(input) if transcript: summary = text_summary(transcript,min_length=10,max_length=1000,do_sample=False) return {"summary": summary[0]['summary_text']} else: return {"error": "Failed to get transcript from the YouTube video."} app.mount('/', StaticFiles(directory="static", html=True), name="static") @app.get('/') def index() -> FileResponse: return FileResponse('/app/static/index.html', media_type="text/html")