import os import sys sys.path.append("../") import gradio as gr import torch import numpy as np import matplotlib.pyplot as plt plt.rcParams["font.family"] = "serif" import decord import PIL, PIL.Image import librosa from IPython.display import Markdown, display import pandas as pd from util import * css = """ """ header = css + """

The Sound of Water

Inferring Physical Properties from Pouring Liquids

Piyush Bagad¹, Makarand Tapaswi², Cees G. M. Snoek³, Andrew Zisserman¹,

¹University of Oxford, ²IIIT Hyderabad, ³University of Amsterdam

arXiv 🌐 Project

Code 🤗 Data 🤗 Models 🎯 Demo

""" footer = css + """ """ # def process_input(video=None, youtube_link=None, start_time=None, end_time=None): # if video: # return f"Video file uploaded: {video.name}" # elif youtube_link and start_time and end_time: # return f"YouTube link: {youtube_link} (Start: {start_time}, End: {end_time})" # else: # return "Please upload a video or provide a YouTube link with start and end times." def configure_input(): gr.Markdown( "#### Either upload a video file or provide a YouTube link with start and end times." ) video_input = gr.Video(label="Upload Video", height=480) youtube_link_start = gr.Textbox(label="YouTube Link (Start time)") youtube_link_end = gr.Textbox(label="YouTube Link (End time)") return [video_input, youtube_link_start, youtube_link_end] # Example usage in a Gradio interface def process_input(video, youtube_link_start, youtube_link_end): if video is not None: print(video) # Load model globally model = load_model() # The input is a video file path video_path = video # Load first frame frame = load_frame(video_path) # Load spectrogram S = load_spectrogram(video_path) # Load audio tensor audio = load_audio_tensor(video_path) # Get output z_audio, y_audio = get_model_output(audio, model) # Show image output image, df_show, tsne_image = show_output(frame, S, y_audio, z_audio) return image, df_show, gr.Markdown(note), tsne_image elif (youtube_link_start is not None) and (youtube_link_end is not None): # Using the provided YouTube link # Example: https://youtu.be/6-HVn8Jzzuk?t=10 start_link = f"Processing YouTube link: {youtube_link_start}" end_link = f"Processing YouTube link: {youtube_link_end}" # Get video ID video_id = youtube_link_start.split("/")[-1].split("?")[0] assert video_id == youtube_link_end.split("/")[-1].split("?")[0], "Video IDs do not match" start_time = float(youtube_link_start.split("t=")[-1]) end_time = float(youtube_link_end.split("t=")[-1]) raise NotImplementedError("YouTube link processing is not implemented yet") else: return "No input provided" def greet(name, is_morning, temperature): salutation = "Good morning" if is_morning else "Good evening" greeting = f"{salutation} {name}. It is {temperature} degrees today" celsius = (temperature - 32) * 5 / 9 return greeting, round(celsius, 2) note = """ **Note**: Radius (as well as height) estimation depends on accurate wavelength estimation towards the end. Thus, it may not be accurate if the wavelength is not estimated correctly at the end. $$ H = l(0) = \\frac{\lambda(0) - \lambda(T)}{4} \ \ \\text{and} \ \ R = \\frac{\lambda(T)}{4\\beta} $$ """ def configure_outputs(): image_wide = gr.Image(label="Estimated pitch") dataframe = gr.DataFrame(label="Estimated physical properties") image_tsne = gr.Image(label="TSNE of features", width=300) markdown = gr.Markdown(label="Note") # ["image", "dataframe", "image", "markdown"] return [image_wide, dataframe, markdown, image_tsne] # Configure pre-defined examples examples = [ ["./media_assets/example_video.mp4", None, None], ["./media_assets/ayNzH0uygFw_9.0_21.0.mp4", None, None], ["./media_assets/biDn0Gi6V8U_7.0_15.0.mp4", None, None], ["./media_assets/goWgiQQMugA_2.5_9.0.mp4", None, None], ["./media_assets/K87g4RvO-9k_254.0_259.0.mp4", None, None], ] # Define Gradio interface with gr.Blocks( css=custom_css, theme=gr.themes.Default(), ) as demo: # Add the header gr.HTML(header) gr.Interface( fn=process_input, inputs=configure_input(), outputs=configure_outputs(), examples=examples, ) # Add the footer gr.HTML(footer) # Launch the interface demo.launch(allowed_paths=["."], share=True)