Spaces:

bpiyush
/

SoundOfWater

Running

App Files Files Community

SoundOfWater / app.py

bpiyush

Improves the yt download reliablity

cdac373 3 months ago

raw

history blame contribute delete

11 kB

	import os
	import sys
	sys.path.append("../")

	import gradio as gr
	import torch
	import numpy as np
	import matplotlib.pyplot as plt
	plt.rcParams["font.family"] = "serif"
	import decord
	import PIL, PIL.Image
	import librosa
	from IPython.display import Markdown, display
	import pandas as pd

	from util import *


	css = """
	<style>
	body {
	font-family: 'Arial', serif;
	margin: 0;
	padding: 0;
	color: black;
	}
	.header {
	display: flex;
	align-items: center;
	justify-content: center;
	margin-top: 5px;
	color: black;
	}
	.footer {
	display: flex;
	align-items: center;
	justify-content: center;
	margin-top: 5px;
	}
	.image {
	margin-right: 20px;
	}
	.content {
	text-align: center;
	color: black;
	}
	.title {
	font-size: 2.5em;
	font-weight: bold;
	margin-bottom: 10px;
	}
	.authors {
	color: #4a90e2;
	font-size: 1.05em;
	margin: 10px 0;
	}
	.affiliations {
	font-size: 1.em;
	margin-bottom: 20px;
	}
	.buttons {
	display: flex;
	justify-content: center;
	gap: 10px;
	}
	.button {
	background-color: #545758;
	text-decoration: none;
	padding: 8px 16px;
	border-radius: 5px;
	font-size: 1.05em;
	}
	.button:hover {
	background-color: #333;
	}
	</style>
	"""


	header = css + """
	<div class="header">
	<!-- <div class="image">
	<img src="./media_assets/pouring-water-logo5.png" alt="logo" width="100">
	</div> -->
	<div class="content">
	<img src="https://bpiyush.github.io/pouring-water-website/assets/pouring-water-logo5.png" alt="logo" width="80" style="margin-bottom: -50px; margin-right: 30px;">
	<div class="title" style="font-size: 44px; margin-left: -30px;">The Sound of Water</div>
	<div style="font-size: 30px; margin-left: -30px;"><b>Inferring Physical Properties from Pouring Liquids</b></div>
	<div class="authors">
	<a style="color: #92eaff; href="https://bpiyush.github.io/">Piyush Bagad</a><sup>1</sup>,
	<a style="color: #92eaff; href="https://makarandtapaswi.github.io/">Makarand Tapaswi</a><sup>2</sup>,
	<a style="color: #92eaff; href="https://www.ceessnoek.info/">Cees G. M. Snoek</a><sup>3</sup>,
	<a style="color: #92eaff; href="https://www.robots.ox.ac.uk/~az/">Andrew Zisserman</a><sup>1</sup>,
	</div>
	<div class="affiliations">
	<sup>1</sup>University of Oxford, <sup>2</sup>IIIT Hyderabad, <sup>3</sup>University of Amsterdam
	</div>

	<div class="buttons">
	<a href="#" style="color: #92eaff;" class="button">arXiv</a>
	<a href="https://bpiyush.github.io/pouring-water-website/" style="color: #92eaff;" class="button">🌐 Project</a>
	<a href="https://github.com/bpiyush/SoundOfWater" style="color: #92eaff;" class="button"> <img src="https://bpiyush.github.io/pouring-water-website/assets/github-logo.png" alt="logo" style="height:16px; float: left;">  Code</a>
	<a href="https://huggingface.co/datasets/bpiyush/sound-of-water" style="color: #92eaff;" class="button">🤗 Data</a>
	<a href="https://huggingface.co/bpiyush/sound-of-water-models" style="color: #92eaff;" class="button">🤗 Models</a>
	<a href="#" style="color: #92eaff;" class="button">🎯 Demo</a>
	</div>
	</div>
	</div>
	"""

	footer = css + """
	<div class="header" style="justify-content: left;">
	<div class="content" style="font-size: 16px;">
	Please give us a 🌟 on <a href='https://github.com/bpiyush/SoundOfWater'>Github</a> if you like our work!
	Tips to get better results:
	<br><br>
	<ol style="text-align: left; font-size: 14px; margin-left: 30px">
	<li>The first example may take up to 30-60s for processing since the model is also loaded.</li>
	<li>
	If you are providing a link, it may take a few seconds to download video from YouTube.
	Note that the entire video shall be used.
	If the sound of pouring is not clear, the results will be random.
	</li>
	<li>Although the model is somewhat robust to noise, make sure there is not too much noise such that the pouring is audible.</li>
	<li>Note that the video is not used during the inference. The displayed frame is only for reference.</li>
	</ol>
	</div>
	</div>
	"""
	from download_youtube import download_youtube_video_ytdlp

	def download_from_youtube(
	video_id,
	save_dir="/tmp/",
	convert_to_mp4=False,
	):
	"""
	Downloads a YouTube video from start to end times.

	Args:
	video_id (str): YouTube video ID.
	save_dir (str): Directory to save the video.
	convert_to_mp4 (bool): Whether to convert the video to mp4 format.

	The saved video is in the format: {save_dir}/{video_id}.mp4
	"""

	import datetime
	from subprocess import call

	print("Downloading video from YouTube...")
	print("Video ID:", video_id)

	command = [
	"yt-dlp",
	"-o", "'{}%(id)s.%(ext)s'".format(save_dir),
	"--cookies ./youtube_cookies.txt",
	"--verbose",
	"--force-overwrites",
	f"https://www.youtube.com/watch?v={video_id}",
	]
	try:
	call(" ".join(command), shell=True)
	except Exception as e:
	print(e)
	raise IOError("Failed to download to download YouTube video.")

	# If not mp4, convert to mp4
	from glob import glob
	saved_filepath = glob(os.path.join(save_dir, f"{video_id}.*"))[0]
	print("Saved file:", saved_filepath)

	if convert_to_mp4:
	ext = saved_filepath.split(".")[-1]
	to_save = saved_filepath.replace(ext, "mp4")
	if ext != "mp4":
	# convert to mp4 using ffmpeg
	command = "ffmpeg -y -i {} {}".format(saved_filepath, to_save)
	call(command, shell=True)
	return to_save
	else:
	return saved_filepath


	def configure_input():
	gr.Markdown(
	"#### Either upload a video file or provide a YouTube link to a video. Note that the entire video shall be used.",
	)
	video_input = gr.Video(label="Upload Video", height=520)
	youtube_link = gr.Textbox(label="YouTube Link", value=None)
	gr.Markdown(
	"Note: Often, YouTube download can fail because the video may not be public or YouTube asks for Sign in."\
	"We recommend downloading the video in other ways on your machine and uploading it here."\
	" Alternatively, you can clone the repository and run the demo locally which can allow for Sign-in.",
	)
	return [video_input, youtube_link]


	# video_backend = "decord"
	video_backend = "torchvision"
	def get_predictions(video_path):
	model = load_model()
	frame = load_frame(video_path, video_backend=video_backend)
	S = load_spectrogram(video_path)
	audio = load_audio_tensor(video_path)
	z_audio, y_audio = get_model_output(audio, model)
	image, df_show, tsne_image = show_output(frame, S, y_audio, z_audio)
	return image, df_show, tsne_image


	def get_video_id_from_url(url):
	import re
	if "v=" in url:
	video_id = re.findall(r"v=([a-zA-Z0-9_-]+)", url)
	elif "youtu.be" in url:
	video_id = re.findall(r"youtu.be/([a-zA-Z0-9_-]+)", url)
	elif "shorts" in url:
	video_id = re.findall(r"shorts/([a-zA-Z0-9_-]+)", url)
	else:
	raise ValueError("Invalid YouTube URL")
	print("Video URL:", url)
	print("Video ID:", video_id)

	if len(video_id) > 0:
	return video_id[0]
	else:
	raise ValueError("Invalid YouTube URL")


	note = """
	Note: Radius (as well as height) estimation depends on accurate wavelength estimation towards the end.
	Thus, it may not be accurate if the wavelength is not estimated correctly at the end.

	$$
	H = l(0) = \\frac{\lambda(0) - \lambda(T)}{4} \ \ \\text{and} \ \ R = \\frac{\lambda(T)}{4\\beta}
	$$
	"""


	# Example usage in a Gradio interface
	def process_input(video, youtube_link):

	provided_video = video is not None
	if youtube_link is None:
	provided_link = False
	elif isinstance(youtube_link, str):
	provided_link = len(youtube_link) > 0
	else:
	raise ValueError(f"Invalid type of link {youtube_link}.")

	if provided_video and provided_link:
	raise ValueError("Please provide either a video file or a YouTube link, not both.")

	if provided_video:
	print(video)

	# # Load model globally
	# model = load_model()

	# The input is a video file path
	video_path = video

	# Get predictions
	image, df_show, tsne_image = get_predictions(video_path)

	return image, df_show, gr.Markdown(note), tsne_image

	else:
	print(provided_link)

	assert provided_link, \
	"YouTube Link cannot be empty if no video is provided."

	video_id = get_video_id_from_url(youtube_link)
	print("Video ID:", video_id)
	video_path = download_youtube_video_ytdlp(
	video_id, save_dir="/tmp/",
	)

	# Get predictions
	image, df_show, tsne_image = get_predictions(video_path)

	# Add youtube link to the note
	local_note = f"{note}\n\nYou can watch the original video here: "\
	f"[YouTube Link](https://www.youtube.com/watch?v={video_id})"

	return image, df_show, gr.Markdown(local_note), tsne_image


	def configure_outputs():
	image_wide = gr.Image(label="Estimated pitch")
	dataframe = gr.DataFrame(label="Estimated physical properties")
	image_tsne = gr.Image(label="TSNE of features", width=300)
	markdown = gr.Markdown(label="Note")
	return [image_wide, dataframe, markdown, image_tsne]


	# Configure pre-defined examples
	examples = [
	["./media_assets/example_video.mp4", None],
	["./media_assets/ayNzH0uygFw_9.0_21.0.mp4", None],
	["./media_assets/biDn0Gi6V8U_7.0_15.0.mp4", None],
	["./media_assets/goWgiQQMugA_2.5_9.0.mp4", None],
	["./media_assets/K87g4RvO-9k_254.0_259.0.mp4", None],
	# Shows that it works with background noise
	["./media_assets/l74zJHCZ9uA.webm", None],
	# Shows that it works with a slightly differently shaped container
	["./media_assets/LpRPV0hIymU.webm", None],
	["./media_assets/k-HnMsS36J8.webm", None],
	# [None, "https://www.youtube.com/shorts/6eUQTdkTooo"],
	# [None, "https://www.youtube.com/shorts/VxZT15cG6tw"],
	# [None, "https://www.youtube.com/shorts/GSXQnNhliDY"],
	]


	# Define Gradio interface
	with gr.Blocks(
	css=custom_css,
	theme=gr.themes.Default(),
	) as demo:

	# Add the header
	gr.HTML(header)

	gr.Interface(
	fn=process_input,
	inputs=configure_input(),
	outputs=configure_outputs(),
	examples=examples,
	)

	# Add the footer
	gr.HTML(footer)


	# Launch the interface
	demo.launch(allowed_paths=["."], share=True)