IshanGenAI's picture
Update app.py
2b5548d verified
raw
history blame contribute delete
No virus
2.65 kB
pip install --upgrade pip
pip install youtube_transcript_api
from urllib.parse import urlparse, parse_qs
from youtube_transcript_api import YouTubeTranscriptApi
from youtube_transcript_api.formatters import TextFormatter
import torch
import gradio as gr
from transformers import pipeline
text_summary = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6",
torch_dtype=torch.bfloat16)
# model_path = "../models/models--sshleifer--distilbart-cnn-12-6/snapshots/a4f8f3ea906ed274767e9906dbaede7531d660ff"
# text_summary = pipeline("summarization", model=model_path, torch_dtype=torch.bfloat16)
# def summary(input):
# output = text_summary(input)
# return output[0]['summary_text']
def summary(input_text):
max_length = 1024 # Maximum sequence length supported by the model
if len(input_text) > max_length:
input_text = input_text[:max_length] # Truncate input if it exceeds max length
output = text_summary(input_text)
return output[0]['summary_text']
def get_youtube_video_id(video_url):
parsed_url = urlparse(video_url)
if parsed_url.netloc == 'youtu.be':
return parsed_url.path[1:]
elif parsed_url.netloc in ('www.youtube.com', 'youtube.com'):
if parsed_url.path == '/watch':
p = parse_qs(parsed_url.query)
return p['v'][0]
elif parsed_url.path[:7] == '/embed/':
return parsed_url.path.split('/')[2]
elif parsed_url.path[:3] == '/v/':
return parsed_url.path.split('/')[2]
return None
def get_youtube_transcript(video_url):
video_id = get_youtube_video_id(video_url)
if not video_id:
return "Error: Invalid YouTube URL."
try:
transcript = YouTubeTranscriptApi.get_transcript(video_id)
# Concatenate text from each segment of the transcript
transcript_text = ' '.join([segment['text'] for segment in transcript])
summary_text = summary(transcript_text)
return summary_text
except Exception as e:
print("Error:", e)
return None
# video_url = "https://youtu.be/l00VBUXl1Q4?t=421"
# print(get_youtube_transcript(video_url))
gr.close_all()
# demo = gr.Interface(fn=summary, inputs="text", outputs="text")
demo = gr.Interface(fn=get_youtube_transcript,
inputs=[gr.Textbox(label="Input youtube url to summarize",lines=1)],
outputs=[gr.Textbox(label="Summarized text",lines=4)],
title="GenAI Project 2: Youtube Transcript",
description="THIS APPLICATION WILL BE USED TO GENERATE YOUTUBE VIDEO TRANSCRIPT")
demo.launch()