File size: 5,838 Bytes
9a2eb37 900e8be 9a2eb37 900e8be 9a2eb37 900e8be 153763d 900e8be 9a2eb37 900e8be 9a2eb37 153763d 670ecc2 900e8be 9a2eb37 670ecc2 153763d 670ecc2 153763d 670ecc2 153763d 724ff3b 153763d 425dabf 153763d 670ecc2 153763d 670ecc2 9a2eb37 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 |
import whisper
from pytube import YouTube
import requests, io
from urllib.request import urlopen
from PIL import Image
import time
import streamlit as st
from streamlit_lottie import st_lottie
import numpy as np
import os
from typing import Iterator
from io import StringIO
from utils import write_vtt, write_srt
st.set_page_config(page_title="YouTube Transcriber", page_icon="🗣", layout="wide")
# Define a function that we can use to load lottie files from a link.
@st.cache(allow_output_mutation=True)
def load_lottieurl(url: str):
r = requests.get(url)
if r.status_code != 200:
return None
return r.json()
col1, col2 = st.columns([1, 3])
with col1:
lottie = load_lottieurl("https://assets9.lottiefiles.com/private_files/lf30_bntlaz7t.json")
st_lottie(lottie, speed=1, height=200, width=200)
with col2:
st.write("""
## Youtube Transcriber
##### This is an app that transcribes YouTube videos into text.""")
#def load_model(size):
#default_size = size
#if size == default_size:
#return None
#else:
#loaded_model = whisper.load_model(size)
#return loaded_model
@st.cache(allow_output_mutation=True)
def populate_metadata(link):
yt = YouTube(link)
author = yt.author
title = yt.title
description = yt.description
thumbnail = yt.thumbnail_url
length = yt.length
views = yt.views
return author, title, description, thumbnail, length, views
# Uncomment if you want to fetch the thumbnails as well.
#def fetch_thumbnail(thumbnail):
#tnail = urlopen(thumbnail)
#raw_data = tnail.read()
#image = Image.open(io.BytesIO(raw_data))
#st.image(image, use_column_width=True)
def convert(seconds):
return time.strftime("%H:%M:%S", time.gmtime(seconds))
loaded_model = whisper.load_model("base")
current_size = "None"
size = st.selectbox("Model Size", ["tiny", "base", "small", "medium", "large"], index=1)
def change_model(current_size, size):
if current_size != size:
loaded_model = whisper.load_model(size)
st.write(f"Model is {'multilingual' if loaded_model.is_multilingual else 'English-only'} "
f"and has {sum(np.prod(p.shape) for p in loaded_model.parameters()):,} parameters.")
return loaded_model
else:
return None
@st.cache(allow_output_mutation=True)
def inference(link):
yt = YouTube(link)
path = yt.streams.filter(only_audio=True)[0].download(filename="audio.mp4")
results = loaded_model.transcribe(path)
vtt = getSubs(results["segments"], "vtt", 80)
srt = getSubs(results["segments"], "srt", 80)
return results["text"], vtt, srt
def getSubs(segments: Iterator[dict], format: str, maxLineWidth: int) -> str:
segmentStream = StringIO()
if format == 'vtt':
write_vtt(segments, file=segmentStream, maxLineWidth=maxLineWidth)
elif format == 'srt':
write_srt(segments, file=segmentStream, maxLineWidth=maxLineWidth)
else:
raise Exception("Unknown format " + format)
segmentStream.seek(0)
return segmentStream.read()
def main():
change_model(current_size, size)
link = st.text_input("YouTube Link")
if st.button("Transcribe"):
author, title, description, thumbnail, length, views = populate_metadata(link)
results = inference(link)
col3, col4 = st.columns(2)
with col3:
#fetch_thumbnail(thumbnail)
st.video(link)
st.markdown(f"**Channel**: {author}")
st.markdown(f"**Title**: {title}")
st.markdown(f"**Length**: {convert(length)}")
st.markdown(f"**Views**: {views:,}")
with col4:
with st.expander("Video Description"):
st.write(description)
#st.markdown(f"**Video Description**: {description}")
with st.expander("Video Transcript"):
st.write(results[0])
# Write the results to a .txt file and download it.
with open("transcript.txt", "w+") as f:
f.writelines(results[0])
f.close()
with open(os.path.join(os.getcwd(), "transcript.txt"), "rb") as f:
datatxt = f.read()
with open("transcript.vtt", "w+") as f:
f.writelines(results[1])
f.close()
with open(os.path.join(os.getcwd(), "transcript.vtt"), "rb") as f:
datavtt = f.read()
with open("transcript.srt", "w+") as f:
f.writelines(results[2])
f.close()
with open(os.path.join(os.getcwd(), "transcript.srt"), "rb") as f:
datasrt = f.read()
if st.download_button(label="Download Transcript (.txt) ",
data=datatxt,
file_name=f"{title}.txt"):
st.success("Downloaded Successfully!")
elif st.download_button(label="Download Transcript (.vtt)",
data=datavtt,
file_name=f"{title}.vtt"):
st.success("Downloaded Successfully!")
elif st.download_button(label="Download Transcript (.srt)",
data=datasrt,
file_name=f"{title}.srt"):
st.success("Downloaded Successfully! ")
else:
st.success("You can download the transcript in .srt format and upload it to YouTube to create subtitles for your video.")
st.info("Streamlit refreshes after the download button is clicked. The data is cached so you can download the transcript again without having to transcribe the video again.")
if __name__ == "__main__":
main() |