Video-Audio-Subbed

Sleeping

App Files Files Community

Video-Audio-Subbed / 01_🎥_Input_YouTube_Link.py

kamau1

Update 01_🎥_Input_YouTube_Link.py

5149aa0 verified 7 months ago

raw

history blame

11.7 kB

	import whisper
	from pytube import YouTube
	import requests
	import time
	import streamlit as st
	from streamlit_lottie import st_lottie
	import numpy as np
	import os
	from typing import Iterator
	from io import StringIO
	from utils import write_vtt, write_srt
	import ffmpeg
	from languages import LANGUAGES
	from flores200_codes import flores_codes

	st.set_page_config(page_title="Sematube", page_icon="🎦", layout="wide")

	# Sema Translator
	Public_Url = 'https://lewiskimaru-helloworld.hf.space' #endpoint

	# Define a function that we can use to load lottie files from a link.
	@st.cache()
	def load_lottieurl(url: str):
	r = requests.get(url)
	if r.status_code != 200:
	return None
	return r.json()


	col1, col2 = st.columns([1, 3])
	with col1:
	lottie = load_lottieurl("https://assets8.lottiefiles.com/packages/lf20_jh9gfdye.json")
	st_lottie(lottie)

	with col2:
	st.write("""
	## Sematube
	##### Input a YouTube video link and get a video with subtitles.""")


	@st.cache(allow_output_mutation=True)
	def populate_metadata(link):
	yt = YouTube(link)
	author = yt.author
	title = yt.title
	description = yt.description
	thumbnail = yt.thumbnail_url
	length = yt.length
	views = yt.views
	return author, title, description, thumbnail, length, views


	@st.cache(allow_output_mutation=True)
	def download_video(link):
	yt = YouTube(link)
	video = yt.streams.filter(progressive=True, file_extension='mp4').order_by('resolution').desc().first().download()
	return video


	def convert(seconds):
	return time.strftime("%H:%M:%S", time.gmtime(seconds))


	loaded_model = whisper.load_model("base")
	current_size = "None"


	@st.cache(allow_output_mutation=True)
	def change_model(current_size, size):
	if current_size != size:
	loaded_model = whisper.load_model(size)
	return loaded_model
	else:
	raise Exception("Model size is the same as the current size.")


	@st.cache(allow_output_mutation=True)
	def inference(link, loaded_model, task):
	yt = YouTube(link)
	path = yt.streams.filter(only_audio=True)[0].download(filename="audio.mp3")
	if task == "Transcribe":
	options = dict(task="transcribe", best_of=5)
	results = loaded_model.transcribe(path, **options)
	vtt = getSubs(results["segments"], "vtt", 80)
	srt = getSubs(results["segments"], "srt", 80)
	lang = results["language"]
	return results["text"], vtt, srt, lang
	elif task == "Translate":
	options = dict(task="translate", best_of=5)
	results = loaded_model.transcribe(path, **options)
	vtt = getSubs(results["segments"], "vtt", 80)
	srt = getSubs(results["segments"], "srt", 80)
	lang = results["language"]
	return results["text"], vtt, srt, lang
	else:
	raise ValueError("Task not supported")


	@st.cache(allow_output_mutation=True)
	def getSubs(segments: Iterator[dict], format: str, maxLineWidth: int) -> str:
	segmentStream = StringIO()

	if format == 'vtt':
	write_vtt(segments, file=segmentStream, maxLineWidth=maxLineWidth)
	elif format == 'srt':
	write_srt(segments, file=segmentStream, maxLineWidth=maxLineWidth)
	else:
	raise Exception("Unknown format " + format)

	segmentStream.seek(0)
	return segmentStream.read()


	def get_language_code(language):
	if language in LANGUAGES.keys():
	detected_language = LANGUAGES[language]
	return detected_language
	else:
	raise ValueError("Language not supported")

	def translate(userinput, target_lang, source_lang=None):
	if source_lang:
	url = f"{Public_Url}/translate_enter/"
	data = {
	"userinput": userinput,
	"source_lang": source_lang,
	"target_lang": target_lang,
	}
	response = requests.post(url, json=data)
	result = response.json()
	print(type(result))
	source_lange = source_lang
	translation = result['translated_text']

	else:
	url = f"{Public_Url}/translate_detect/"
	data = {
	"userinput": userinput,
	"target_lang": target_lang,
	}

	response = requests.post(url, json=data)
	result = response.json()
	source_lange = result['source_language']
	translation = result['translated_text']
	return source_lange, translation

	def generate_subtitled_video(video, audio, transcript):
	video_file = ffmpeg.input(video)
	audio_file = ffmpeg.input(audio)
	ffmpeg.concat(video_file.filter("subtitles", transcript), audio_file, v=1, a=1).output("final.mp4").run(quiet=True, overwrite_output=True)
	video_with_subs = open("final.mp4", "rb")
	return video_with_subs


	def main():
	size = st.selectbox("Select Model Size (The larger the model, the more accurate the transcription will be, but it will take longer)", ["tiny", "base", "small", "medium", "large"], index=1)
	loaded_model = change_model(current_size, size)
	st.write(f"Model is {'multilingual' if loaded_model.is_multilingual else 'English-only'} "
	f"and has {sum(np.prod(p.shape) for p in loaded_model.parameters()):,} parameters.")
	link = st.text_input("YouTube Link (The longer the video, the longer the processing time)")
	task = st.selectbox("Select Task", ["Transcribe", "Translate with Whisper", "Translate with Sema"], index=0)
	if task == "Transcribe":
	if st.button("Transcribe"):
	author, title, description, thumbnail, length, views = populate_metadata(link)
	results = inference(link, loaded_model, task)
	video = download_video(link)
	lang = results[3]
	detected_language = get_language_code(lang)

	col3, col4 = st.columns(2)
	col5, col6, col7, col8 = st.columns(4)
	col9, col10 = st.columns(2)
	with col3:
	st.video(video)

	# Write the results to a .txt file and download it.
	with open("transcript.txt", "w+", encoding='utf8') as f:
	f.writelines(results[0])
	f.close()
	with open(os.path.join(os.getcwd(), "transcript.txt"), "rb") as f:
	datatxt = f.read()

	with open("transcript.vtt", "w+",encoding='utf8') as f:
	f.writelines(results[1])
	f.close()
	with open(os.path.join(os.getcwd(), "transcript.vtt"), "rb") as f:
	datavtt = f.read()

	with open("transcript.srt", "w+",encoding='utf8') as f:
	f.writelines(results[2])
	f.close()
	with open(os.path.join(os.getcwd(), "transcript.srt"), "rb") as f:
	datasrt = f.read()

	with col5:
	st.download_button(label="Download Transcript (.txt)",
	data=datatxt,
	file_name="transcript.txt")
	with col6:
	st.download_button(label="Download Transcript (.vtt)",
	data=datavtt,
	file_name="transcript.vtt")
	with col7:
	st.download_button(label="Download Transcript (.srt)",
	data=datasrt,
	file_name="transcript.srt")
	with col9:
	st.success("You can download the transcript in .srt format, edit it (if you need to) and upload it to YouTube to create subtitles for your video.")
	with col10:
	st.info("Streamlit refreshes after the download button is clicked. The data is cached so you can download the transcript again without having to transcribe the video again.")

	with col4:
	with st.spinner("Generating Subtitled Video "):
	video_with_subs = generate_subtitled_video(video, "audio.mp3", "transcript.srt")
	st.video(video_with_subs)
	st.balloons()
	with col8:
	st.download_button(label="Download Subtitled Video",
	data=video_with_subs,
	file_name=f"{title} with subtitles.mp4")
	elif task == "Translate with Whisper":
	if st.button("Translate to English"):
	author, title, description, thumbnail, length, views = populate_metadata(link)
	results = inference(link, loaded_model, task)
	video = download_video(link)
	lang = results[3]
	detected_language = get_language_code(lang)

	col3, col4 = st.columns(2)
	col5, col6, col7, col8 = st.columns(4)
	col9, col10 = st.columns(2)
	with col3:
	st.video(video)

	# Write the results to a .txt file and download it.
	with open("transcript.txt", "w+", encoding='utf8') as f:
	f.writelines(results[0])
	f.close()
	with open(os.path.join(os.getcwd(), "transcript.txt"), "rb") as f:
	datatxt = f.read()

	with open("transcript.vtt", "w+",encoding='utf8') as f:
	f.writelines(results[1])
	f.close()
	with open(os.path.join(os.getcwd(), "transcript.vtt"), "rb") as f:
	datavtt = f.read()

	with open("transcript.srt", "w+",encoding='utf8') as f:
	f.writelines(results[2])
	f.close()
	with open(os.path.join(os.getcwd(), "transcript.srt"), "rb") as f:
	datasrt = f.read()
	with col5:
	st.download_button(label="Download Transcript (.txt)",
	data=datatxt,
	file_name="transcript.txt")
	with col6:
	st.download_button(label="Download Transcript (.vtt)",
	data=datavtt,
	file_name="transcript.vtt")
	with col7:
	st.download_button(label="Download Transcript (.srt)",
	data=datasrt,
	file_name="transcript.srt")
	with col9:
	st.success("You can download the transcript in .srt format, edit it (if you need to) and upload it to YouTube to create subtitles for your video.")
	with col10:
	st.info("Streamlit refreshes after the download button is clicked. The data is cached so you can download the transcript again without having to transcribe the video again.")

	with col4:
	with st.spinner("Generating Subtitled Video "):
	video_with_subs = generate_subtitled_video(video, "audio.mp3", "transcript.srt")
	st.video(video_with_subs)
	st.balloons()
	with col8:
	st.download_button(label="Download Subtitled Video ",
	data=video_with_subs,
	file_name=f"{title} with subtitles.mp4")
	elif task == "Translate with Sema":
	default_language = "French"
	target = st.selectbox("Select Language", list(flores_codes.keys()), index=list(flores_codes.keys()).index(default_language))
	target_code = flores_codes[target]

	else:
	st.error("Please select a task.")


	if __name__ == "__main__":
	main()
	st.markdown("###### ")
	st.markdown("###### Powered by [sema © 2024](https://www.sema.wiki)")