ammaan's picture
Upload 3 files
f109e0f verified
raw
history blame
No virus
3.72 kB
# from typing import Union
# import re
# from youtube_transcript_api import YouTubeTranscriptApi
# from youtube_transcript_api.formatters import TextFormatter
# import torch
# from transformers import pipeline
# from fastapi import FastAPI
# from fastapi.staticfiles import StaticFiles
# from fastapi.responses import FileResponse
# app = FastAPI()
# text_summary = pipeline("summarization", model="Falconsai/text_summarization")
# @app.get("/getdata")
# def getInput(input: str) -> dict:
# output = text_summary(input)
# return output[0]['summary_text']
# def extract_video_id(url):
# regex = r"(?:youtube\.com\/(?:[^\/\n\s]+\/\S+\/|(?:v|e(?:mbed)?)\/|\S*?[?&]v=)|youtu\.be\/)([a-zA-Z0-9_-]{11})"
# match = re.search(regex, url)
# if match:
# return match.group(1)
# return None
# def get_youtube_transcript(video_url):
# video_id = extract_video_id(video_url)
# if not video_id:
# return "Video ID could not be extracted."
# try:
# transcript = YouTubeTranscriptApi.get_transcript(video_id)
# formatter = TextFormatter()
# text_transcript = formatter.format_transcript(transcript)
# summary_text = summary(text_transcript)
# print(summary_text)
# return summary_text
# except Exception as e:
# return f"An error occurred: {e}"
# app.mount('/',StaticFiles(directory="static",html=True),name="static")
# @app.get('/')
# def index() -> FileResponse:
# return FileResponse('/app/static/index.html',media_type="text/html")
# if __name__ == "__main__":
# import uvicorn
# uvicorn.run(app, host="127.0.0.1", port=5050)
from typing import Union
import re
from youtube_transcript_api import YouTubeTranscriptApi
from youtube_transcript_api.formatters import TextFormatter
from transformers import pipeline
from fastapi import FastAPI, Query
from fastapi.staticfiles import StaticFiles
from fastapi.responses import FileResponse
app = FastAPI()
text_summary = pipeline("summarization", model="Falconsai/text_summarization", max_length=512)
def extract_video_id(url: str) -> Union[str, None]:
regex = r"(?:youtube\.com\/(?:[^\/\n\s]+\/\S+\/|(?:v|e(?:mbed)?)\/|\S*?[?&]v=)|youtu\.be\/)([a-zA-Z0-9_-]{11})"
match = re.search(regex, url)
if match:
return match.group(1)
return None
def get_youtube_transcript(video_url: str) -> Union[str, None]:
video_id = extract_video_id(video_url)
if not video_id:
return "Video ID could not be extracted."
try:
transcript = YouTubeTranscriptApi.get_transcript(video_id)
formatter = TextFormatter()
text_transcript = formatter.format_transcript(transcript)
return text_transcript
except Exception as e:
return f"An error occurred: {e}"
def summarize_text(text: str) -> str:
summarized_text = text_summary(text)
return summarized_text[0]['summary_text']
# Get the input from the frontend
@app.get("/getdata")
def get_data(input: str = Query(..., title="YouTube Video URL")) -> dict:
print(input)
transcript = get_youtube_transcript(input)
if transcript:
summary = summarize_text(transcript)
return {"summary": summary}
else:
return {"error": "Failed to get transcript from the YouTube video."}
app.mount('/', StaticFiles(directory="static", html=True), name="static")
@app.get('/')
def index() -> FileResponse:
return FileResponse('/app/static/index.html', media_type="text/html")
if __name__ == "__main__":
import uvicorn
uvicorn.run(app, host="127.0.0.1", port=5050)