Spaces:

FriendlyUser
/

YoutubeDownloaderSubber

Runtime error

David Li

fix: try again

d903faf almost 2 years ago

4.66 kB

	import whisper
	import gradio as gr
	import ffmpeg
	import youtube_dl
	import os

	youtube_livestream_codes = [
	91,
	92,
	93,
	94,
	95,
	96,
	300,
	301,
	]
	youtube_mp4_codes = [
	298,
	18,
	22,
	140,
	133,
	134
	]

	import sys

	def get_video_metadata(video_url: str = "https://www.youtube.com/watch?v=21X5lGlDOfg&ab_channel=NASA")-> dict:
	with youtube_dl.YoutubeDL({'outtmpl': '%(id)s.%(ext)s'}) as ydl:
	info_dict = ydl.extract_info(video_url, download=False)
	video_title = info_dict.get('title', None)
	uploader_id = info_dict.get('uploader_id', None)
	print(f"[youtube] {video_title}: {uploader_id}")
	return info_dict


	def parse_metadata(metadata) -> dict:
	"""
	Parse metadata and send to discord.
	After a video is done recording,
	it will have both the livestream format and the mp4 format.
	"""
	# send metadata to discord
	formats = metadata.get("formats", [])
	# filter for ext = mp4
	mp4_formats = [f for f in formats if f.get("ext", "") == "mp4"]
	format_ids = [int(f.get("format_id", 0)) for f in mp4_formats]
	video_entries = sorted(set(format_ids).intersection(youtube_mp4_codes))

	is_livestream = True
	if len(video_entries) > 0:
	# use video format id over livestream id if available
	selected_id = video_entries[0]
	is_livestream = False


	return {
	"selected_id": selected_id,
	"is_livestream": is_livestream,
	}

	def get_video(url: str, config: dict):
	"""
	Get video from start time.
	"""
	# result = subprocess.run()
	# could delay start time by a few seconds to just sync up and capture the full video length
	# but would need to time how long it takes to fetch the video using youtube-dl and other adjustments and start a bit before
	filename = config.get("filename", "livestream01.mp4")
	end = config.get("end", "00:15:00")
	overlay_file = ffmpeg.input(filename)
	(
	ffmpeg
	.input(url, t=end)
	.output(filename)
	.run()
	)

	def get_all_files(url: str, end: str = "00:15:00"):
	metadata = get_video_metadata(url)
	temp_dict = parse_metadata(metadata)
	selected_id = temp_dict.get("selected_id", 0)
	formats = metadata.get("formats", [])
	selected_format = [f for f in formats if f.get("format_id", "") == str(selected_id)][0]
	format_url = selected_format.get("url", "")
	filename = "temp.mp4"
	get_video(format_url, {"filename": filename, "end": end})
	return filename

	def get_text_from_mp3_whisper(inputType:str, mp3_file: str, url_path: str, taskName: str, srcLanguage: str)->str:
	model = whisper.load_model("medium")
	# options = whisper.DecodingOptions(language="en", without_timestamps=True)
	options = dict(language=srcLanguage)
	transcribe_options = dict(task=taskName, **options)
	if inputType == "url":
	filename = get_all_files(url_path)
	result = model.transcribe(filename, **transcribe_options)
	else:
	result = model.transcribe(mp3_file, **transcribe_options)
	# adjust for spacy mode
	html_text = ""
	lines = []
	for count, segment in enumerate(result.get("segments")):
	# print(segment)
	start = segment.get("start")
	end = segment.get("end")
	lines.append(f"{count}")
	lines.append(f"{second_to_timecode(start)} --> {second_to_timecode(end)}")
	lines.append(segment.get("text", "").strip())
	lines.append('')
	words = '\n'.join(lines)
	input_file = filename or mp3_file
	# ffmpeg -i testing.mp4 -vf subtitles=transcript.srt mysubtitledmovie.mp4
	# use ffmpeg bindings to add subtitles to video
	# use python to call ffmpeg -i testing.mp4 -vf subtitles=transcript.srt mysubtitledmovie.mp4

	input_video = ffmpeg.input('testing.mp4')
	subtitle = ffmpeg.filter('subtitles', 'transcript.srt')

	output_video = ffmpeg.output(input_video, subtitle, 'subtitled.mp4', vcodec='libx264', video_filters='[v]subtitles=transcript.srt[v]')

	ffmpeg.run(output_video)
	# for spacy use advanced logic to extract and append to html_text using tables?

	# get output_video as mp4
	return result.get("segments"), words, "subtitled.mp4"

	gr.Interface(
	title = 'Download Video From url and extract text from audio',
	fn=get_text_from_mp3_whisper,
	inputs=[
	gr.Dropdown(["url", "file"]),
	gr.inputs.Audio(type="filepath"),
	gr.inputs.Textbox(),
	gr.Dropdown(["translate", "transcribe"]),
	gr.Dropdown(["Japanese", "English"])
	],
	outputs=[
	"json", "text", "mp4"
	],
	live=True).launch()