JOSHUA / app.py
xxfdsf's picture
Update app.py
cb79876 verified
import subprocess
import importlib.util
from transformers import pipeline
from youtube_transcript_api import YouTubeTranscriptApi
import streamlit as st
import re
def check_installation(package):
spec = importlib.util.find_spec(package)
if spec is None:
return False
else:
return True
def install_tf_keras():
try:
subprocess.check_call(['pip', 'install', 'tf-keras'])
return True
except subprocess.CalledProcessError as e:
st.error(f"Failed to install tf-keras package: {e}")
return False
def summarize(result):
summarizer = pipeline('summarization')
num_iters = int(len(result)/1000)
summarized_text = []
for i in range(0, num_iters + 1):
start = 0
start = i * 1000
end = (i + 1) * 1000
print("input text \n" + result[start:end])
out = summarizer(result[start:end])
out = out[0]
out = out['summary_text']
print("Summarized text\n"+out)
summarized_text.append(out)
st.write(summarized_text)
def extract_video_id(video_link):
regex = r"(?:https:\/\/)?(?:www\.)?(?:youtube\.com\/(?:[^\/\n\s]+\/\S+\/|(?:v|e(?:mbed)?)\/|\S*?[?&]v=)|youtu\.be\/)([a-zA-Z0-9_-]{11})"
matches = re.findall(regex, video_link)
if matches:
return matches[0]
else:
return None
def get_transcript(video_link):
video_id = extract_video_id(video_link)
if video_id:
transcript = YouTubeTranscriptApi.get_transcript(video_id)
result = ""
for i in transcript:
result += ' ' + i['text']
print(len(result))
st.write(result)
summarize(result)
else:
st.write("Invalid YouTube video link")
def main():
st.title('YouTube Transcript Generator')
st.write("Get the full text of any YouTube video in seconds.")
if not check_installation("tf_keras"):
st.write("tf-keras is not installed. Installing...")
if install_tf_keras():
st.success("tf-keras has been successfully installed.")
else:
st.error("Failed to install tf-keras. Please install it manually and retry.")
return
import tensorflow as tf
video_link = st.text_input("Enter YouTube video link:")
if video_link:
if st.button("Generate"):
get_transcript(video_link) # Call get_transcript() function when button is clicked
main()