File size: 2,653 Bytes
2b5548d
 
9fb1db7
b0f8f76
e54c121
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
pip install --upgrade pip

pip install youtube_transcript_api

from urllib.parse import urlparse, parse_qs
from youtube_transcript_api import YouTubeTranscriptApi
from youtube_transcript_api.formatters import TextFormatter
import torch
import gradio as gr
from transformers import pipeline

text_summary = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6",
                        torch_dtype=torch.bfloat16)

# model_path = "../models/models--sshleifer--distilbart-cnn-12-6/snapshots/a4f8f3ea906ed274767e9906dbaede7531d660ff"
# text_summary = pipeline("summarization", model=model_path, torch_dtype=torch.bfloat16)

# def summary(input):
#     output = text_summary(input)
#     return output[0]['summary_text']

def summary(input_text):
    max_length = 1024  # Maximum sequence length supported by the model
    if len(input_text) > max_length:
        input_text = input_text[:max_length]  # Truncate input if it exceeds max length
    output = text_summary(input_text)
    return output[0]['summary_text']


def get_youtube_video_id(video_url):
    parsed_url = urlparse(video_url)
    if parsed_url.netloc == 'youtu.be':
        return parsed_url.path[1:]
    elif parsed_url.netloc in ('www.youtube.com', 'youtube.com'):
        if parsed_url.path == '/watch':
            p = parse_qs(parsed_url.query)
            return p['v'][0]
        elif parsed_url.path[:7] == '/embed/':
            return parsed_url.path.split('/')[2]
        elif parsed_url.path[:3] == '/v/':
            return parsed_url.path.split('/')[2]
    return None

def get_youtube_transcript(video_url):
    video_id = get_youtube_video_id(video_url)
    if not video_id:
        return "Error: Invalid YouTube URL."

    try:
        transcript = YouTubeTranscriptApi.get_transcript(video_id)
        # Concatenate text from each segment of the transcript
        transcript_text = ' '.join([segment['text'] for segment in transcript])
        summary_text = summary(transcript_text)

        return summary_text
    except Exception as e:
        print("Error:", e)
        return None


# video_url = "https://youtu.be/l00VBUXl1Q4?t=421"
# print(get_youtube_transcript(video_url))

gr.close_all()

# demo = gr.Interface(fn=summary, inputs="text", outputs="text")
demo = gr.Interface(fn=get_youtube_transcript,
                    inputs=[gr.Textbox(label="Input youtube url to summarize",lines=1)],
                    outputs=[gr.Textbox(label="Summarized text",lines=4)],
                    title="GenAI Project 2: Youtube Transcript",
                    description="THIS APPLICATION WILL BE USED TO GENERATE YOUTUBE VIDEO TRANSCRIPT")

demo.launch()