File size: 2,428 Bytes
c689838
 
1bcd797
 
6f37fff
eb2166e
1bcd797
c689838
 
 
 
 
 
 
4a1e5e6
f2e799c
4a1e5e6
f2e799c
 
4a1e5e6
f2e799c
c689838
974a2b2
1bcd797
 
 
 
eb2166e
 
 
 
 
 
 
 
 
1bcd797
 
c689838
 
 
 
 
 
 
 
eb2166e
 
 
 
 
 
 
 
 
 
 
 
 
974a2b2
9d8f197
df1a248
9d8f197
4a1e5e6
 
 
 
f2e799c
4a1e5e6
f2e799c
c689838
 
 
eb2166e
 
cb79876
 
eb2166e
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
import subprocess
import importlib.util
from transformers import pipeline
from youtube_transcript_api import YouTubeTranscriptApi
import streamlit as st 
import re

def check_installation(package):
    spec = importlib.util.find_spec(package)
    if spec is None:
        return False
    else:
        return True

def install_tf_keras():
    try:
        subprocess.check_call(['pip', 'install', 'tf-keras'])
        return True
    except subprocess.CalledProcessError as e:
        st.error(f"Failed to install tf-keras package: {e}")
        return False

def summarize(result):
    summarizer = pipeline('summarization')
    num_iters = int(len(result)/1000)
    summarized_text = []
    for i in range(0, num_iters + 1):
        start = 0
        start = i * 1000
        end = (i + 1) * 1000
        print("input text \n" + result[start:end])
        out = summarizer(result[start:end])
        out = out[0]
        out = out['summary_text']
        print("Summarized text\n"+out)
        summarized_text.append(out)
    st.write(summarized_text)

def extract_video_id(video_link):
    regex = r"(?:https:\/\/)?(?:www\.)?(?:youtube\.com\/(?:[^\/\n\s]+\/\S+\/|(?:v|e(?:mbed)?)\/|\S*?[?&]v=)|youtu\.be\/)([a-zA-Z0-9_-]{11})"
    matches = re.findall(regex, video_link)
    if matches:
        return matches[0]
    else:
        return None

def get_transcript(video_link):
    video_id = extract_video_id(video_link)
    if video_id:
        transcript = YouTubeTranscriptApi.get_transcript(video_id)
        result = ""
        for i in transcript:
            result += ' ' + i['text']
        print(len(result))
        st.write(result)
        summarize(result)
    else:
        st.write("Invalid YouTube video link")

def main():
    st.title('YouTube Transcript Generator')
    st.write("Get the full text of any YouTube video in seconds.")

    if not check_installation("tf_keras"):
        st.write("tf-keras is not installed. Installing...")
        if install_tf_keras():
            st.success("tf-keras has been successfully installed.")
        else:
            st.error("Failed to install tf-keras. Please install it manually and retry.")
            return

    import tensorflow as tf

    video_link = st.text_input("Enter YouTube video link:")
    if video_link:
        if st.button("Generate"):
            get_transcript(video_link) # Call get_transcript() function when button is clicked

main()