import os
import sys
sys.path.append("../")
import gradio as gr
import torch
import numpy as np
import matplotlib.pyplot as plt
plt.rcParams["font.family"] = "serif"
import decord
import PIL, PIL.Image
import librosa
from IPython.display import Markdown, display
import pandas as pd
from util import *
css = """
"""
header = css + """
"""
footer = css + """
"""
# def process_input(video=None, youtube_link=None, start_time=None, end_time=None):
# if video:
# return f"Video file uploaded: {video.name}"
# elif youtube_link and start_time and end_time:
# return f"YouTube link: {youtube_link} (Start: {start_time}, End: {end_time})"
# else:
# return "Please upload a video or provide a YouTube link with start and end times."
def configure_input():
gr.Markdown(
"#### Either upload a video file or provide a YouTube link with start and end times."
)
video_input = gr.Video(label="Upload Video", height=480)
youtube_link_start = gr.Textbox(label="YouTube Link (Start time)")
youtube_link_end = gr.Textbox(label="YouTube Link (End time)")
return [video_input, youtube_link_start, youtube_link_end]
# Example usage in a Gradio interface
def process_input(video, youtube_link_start, youtube_link_end):
if video is not None:
print(video)
# Load model globally
model = load_model()
# The input is a video file path
video_path = video
# Load first frame
frame = load_frame(video_path)
# Load spectrogram
S = load_spectrogram(video_path)
# Load audio tensor
audio = load_audio_tensor(video_path)
# Get output
z_audio, y_audio = get_model_output(audio, model)
# Show image output
image, df_show, tsne_image = show_output(frame, S, y_audio, z_audio)
return image, df_show, gr.Markdown(note), tsne_image
elif (youtube_link_start is not None) and (youtube_link_end is not None):
# Using the provided YouTube link
# Example: https://youtu.be/6-HVn8Jzzuk?t=10
start_link = f"Processing YouTube link: {youtube_link_start}"
end_link = f"Processing YouTube link: {youtube_link_end}"
# Get video ID
video_id = youtube_link_start.split("/")[-1].split("?")[0]
assert video_id == youtube_link_end.split("/")[-1].split("?")[0], "Video IDs do not match"
start_time = float(youtube_link_start.split("t=")[-1])
end_time = float(youtube_link_end.split("t=")[-1])
raise NotImplementedError("YouTube link processing is not implemented yet")
else:
return "No input provided"
def greet(name, is_morning, temperature):
salutation = "Good morning" if is_morning else "Good evening"
greeting = f"{salutation} {name}. It is {temperature} degrees today"
celsius = (temperature - 32) * 5 / 9
return greeting, round(celsius, 2)
note = """
**Note**: Radius (as well as height) estimation depends on accurate wavelength estimation towards the end.
Thus, it may not be accurate if the wavelength is not estimated correctly at the end.
$$
H = l(0) = \\frac{\lambda(0) - \lambda(T)}{4} \ \ \\text{and} \ \ R = \\frac{\lambda(T)}{4\\beta}
$$
"""
def configure_outputs():
image_wide = gr.Image(label="Estimated pitch")
dataframe = gr.DataFrame(label="Estimated physical properties")
image_tsne = gr.Image(label="TSNE of features", width=300)
markdown = gr.Markdown(label="Note")
# ["image", "dataframe", "image", "markdown"]
return [image_wide, dataframe, markdown, image_tsne]
# Configure pre-defined examples
examples = [
["./media_assets/example_video.mp4", None, None],
["./media_assets/ayNzH0uygFw_9.0_21.0.mp4", None, None],
["./media_assets/biDn0Gi6V8U_7.0_15.0.mp4", None, None],
["./media_assets/goWgiQQMugA_2.5_9.0.mp4", None, None],
["./media_assets/K87g4RvO-9k_254.0_259.0.mp4", None, None],
]
# Define Gradio interface
with gr.Blocks(
css=custom_css,
theme=gr.themes.Default(),
) as demo:
# Add the header
gr.HTML(header)
gr.Interface(
fn=process_input,
inputs=configure_input(),
outputs=configure_outputs(),
examples=examples,
)
# Add the footer
gr.HTML(footer)
# Launch the interface
demo.launch(allowed_paths=["."], share=True)