import os import sys sys.path.append("../") import gradio as gr import torch import numpy as np import matplotlib.pyplot as plt plt.rcParams["font.family"] = "serif" import decord import PIL, PIL.Image import librosa from IPython.display import Markdown, display import pandas as pd from util import * css = """ """ header = css + """
logo
The Sound of Water
Inferring Physical Properties from Pouring Liquids
1University of Oxford, 2IIIT Hyderabad, 3University of Amsterdam
""" footer = css + """
Please give us a 🌟 on Github if you like our work! Tips to get better results:

  1. Make sure there is not too much noise such that the pouring is audible.
  2. Note that the video is not used during the inference. Only the audio must be clear enough.
""" # def process_input(video=None, youtube_link=None, start_time=None, end_time=None): # if video: # return f"Video file uploaded: {video.name}" # elif youtube_link and start_time and end_time: # return f"YouTube link: {youtube_link} (Start: {start_time}, End: {end_time})" # else: # return "Please upload a video or provide a YouTube link with start and end times." def configure_input(): gr.Markdown( "#### Either upload a video file or provide a YouTube link with start and end times." ) video_input = gr.Video(label="Upload Video", height=480) youtube_link_start = gr.Textbox(label="YouTube Link (Start time)") youtube_link_end = gr.Textbox(label="YouTube Link (End time)") return [video_input, youtube_link_start, youtube_link_end] # Example usage in a Gradio interface def process_input(video, youtube_link_start, youtube_link_end): if video is not None: print(video) # Load model globally model = load_model() # The input is a video file path video_path = video # Load first frame frame = load_frame(video_path) # Load spectrogram S = load_spectrogram(video_path) # Load audio tensor audio = load_audio_tensor(video_path) # Get output z_audio, y_audio = get_model_output(audio, model) # Show image output image, df_show, tsne_image = show_output(frame, S, y_audio, z_audio) return image, df_show, gr.Markdown(note), tsne_image elif (youtube_link_start is not None) and (youtube_link_end is not None): # Using the provided YouTube link # Example: https://youtu.be/6-HVn8Jzzuk?t=10 start_link = f"Processing YouTube link: {youtube_link_start}" end_link = f"Processing YouTube link: {youtube_link_end}" # Get video ID video_id = youtube_link_start.split("/")[-1].split("?")[0] assert video_id == youtube_link_end.split("/")[-1].split("?")[0], "Video IDs do not match" start_time = float(youtube_link_start.split("t=")[-1]) end_time = float(youtube_link_end.split("t=")[-1]) raise NotImplementedError("YouTube link processing is not implemented yet") else: return "No input provided" def greet(name, is_morning, temperature): salutation = "Good morning" if is_morning else "Good evening" greeting = f"{salutation} {name}. It is {temperature} degrees today" celsius = (temperature - 32) * 5 / 9 return greeting, round(celsius, 2) note = """ **Note**: Radius (as well as height) estimation depends on accurate wavelength estimation towards the end. Thus, it may not be accurate if the wavelength is not estimated correctly at the end. $$ H = l(0) = \\frac{\lambda(0) - \lambda(T)}{4} \ \ \\text{and} \ \ R = \\frac{\lambda(T)}{4\\beta} $$ """ def configure_outputs(): image_wide = gr.Image(label="Estimated pitch") dataframe = gr.DataFrame(label="Estimated physical properties") image_tsne = gr.Image(label="TSNE of features", width=300) markdown = gr.Markdown(label="Note") # ["image", "dataframe", "image", "markdown"] return [image_wide, dataframe, markdown, image_tsne] # Configure pre-defined examples examples = [ ["./media_assets/example_video.mp4", None, None], ["./media_assets/ayNzH0uygFw_9.0_21.0.mp4", None, None], ["./media_assets/biDn0Gi6V8U_7.0_15.0.mp4", None, None], ["./media_assets/goWgiQQMugA_2.5_9.0.mp4", None, None], ["./media_assets/K87g4RvO-9k_254.0_259.0.mp4", None, None], ] # Define Gradio interface with gr.Blocks( css=custom_css, theme=gr.themes.Default(), ) as demo: # Add the header gr.HTML(header) gr.Interface( fn=process_input, inputs=configure_input(), outputs=configure_outputs(), examples=examples, ) # Add the footer gr.HTML(footer) # Launch the interface demo.launch(allowed_paths=["."], share=True)