import gradio as gr
import logging
import json
import os
from typing import Dict, Any, List
from itertools import groupby
# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
video_folder = 'video/'
metadata_folder = 'metadata/'
def load_video_list() -> List[Dict[str, str]]:
video_list = []
for filename in os.listdir(video_folder):
if filename.endswith('.mp4'):
video_id = os.path.splitext(filename)[0]
metadata_path = os.path.join(metadata_folder, f"{video_id}.json")
if os.path.exists(metadata_path):
with open(metadata_path, 'r') as f:
metadata = json.load(f)
metadata = metadata['content_metadata']
title = metadata.get('title', 'Untitled')
video_list.append({"video_id": video_id, "title": title})
# Define the custom order for the first five videos
custom_order = ['7BhJmDPB7RU', 'PrAwsi3Ldzo', '3rhsSPxQ39c', 'P7WnJZ55sgc', 'g9GtUQs7XUM']
# Custom sorting function
def custom_sort(item):
try:
return custom_order.index(item['video_id'])
except ValueError:
return len(custom_order) + 1 # Place non-specified videos after the custom ordered ones
# Sort the video list
video_list.sort(key=lambda x: (custom_sort(x), x['title']))
return video_list
def score_to_emoji(score):
if score < 0.2:
return "😴"
elif score < 0.4:
return "🙂"
elif score < 0.6:
return "😊"
elif score < 0.8:
return "😃"
else:
return "🤩"
def load_metadata(video_id: str) -> Dict[str, Any]:
metadata_path = os.path.join(metadata_folder, f"{video_id}.json")
try:
with open(metadata_path, 'r') as f:
asd =json.load(f)
return asd['content_metadata']
except FileNotFoundError:
logger.error(f"Metadata file not found for video ID: {video_id}")
raise
except json.JSONDecodeError:
logger.error(f"Invalid JSON in metadata file for video ID: {video_id}")
raise
def timestamp_to_seconds(timestamp: str) -> float:
try:
h, m, s = timestamp.split(':')
return int(h) * 3600 + int(m) * 60 + float(s)
except ValueError:
logger.error(f"Invalid timestamp format: {timestamp}")
return 0.0
def format_timestamp(timestamp: str) -> str:
try:
h, m, s = timestamp.split(':')
return f"{int(m):02d}:{int(float(s)):02d}"
except Exception as e:
logger.error(f"Invalid timestamp format: {timestamp}")
return ""
def create_scene_table(scene: Dict[str, Any]) -> str:
dynamism_score = scene.get('dynamismScore', 0)
av_correlation = scene.get('audioVisualCorrelation', 0)
cast = ", ".join([cast_member for cast_member in scene.get('cast', [])])
output = f"""
Scene {scene.get('sceneId', 'Unknown')}: {scene.get('title', '')}
Dynamism: {score_to_emoji(dynamism_score)} Audio-visual correlation: {score_to_emoji(av_correlation)} Cast: {cast}
"""
return output
def create_storylines_table(storylines: Dict[str, Any]) -> str:
output = """
"""
return output
def create_qa_section(qa_list: List[Dict[str, str]]) -> str:
output = """
Q&A
"""
for qa in qa_list:
output += f"""
{qa.get('question', '')}
{qa.get('answer', '')}
"""
output += """
"""
return output
def create_trimming_suggestions(suggestions: List[Dict[str, Any]]) -> str:
output = """
"""
return output
def create_filmstrip(scenes: List[Dict[str, Any]], video_duration: float) -> str:
filmstrip_html = f"""
"""
for scene in scenes:
start_time = timestamp_to_seconds(scene['timestamps'].get('start_timestamp', '0:00:00'))
end_time = timestamp_to_seconds(scene['timestamps'].get('end_timestamp', str(video_duration)))
left_pos = (start_time / video_duration) * 100
width = ((end_time - start_time) / video_duration) * 100
title = scene.get('title', '')
filmstrip_html += f'''
'''
filmstrip_html += """
"""
return filmstrip_html
def process_video(video_id: str):
try:
logger.info(f"Processing video with ID: {video_id}")
metadata = load_metadata(video_id)
# Always use the test URL instead of the actual video file
video_url = f"https://huggingface.co/spaces/HuggingFaceFV/FineVideo-Explorer/resolve/main/video/{video_id}.mp4"
# Create HTML for video player
video_html = f"""
Your browser does not support the video tag.
"""
# Character List Table
character_table = """
Characters
"
additional_data = f"""
{metadata.get('title', 'Untitled')}
Description: {metadata.get('description', 'No description available')}
{character_table}
"""
scenes_output = ""
for scene in metadata.get('scenes', []):
scenes_output += create_scene_table(scene)
storylines_output = create_storylines_table(metadata.get('storylines', {}))
qa_output = create_qa_section(metadata.get('qAndA', []))
trimming_suggestions_output = create_trimming_suggestions(metadata.get('trimmingSuggestions', []))
# Generate filmstrip HTML
last_scene = metadata['scenes'][-1]
video_duration = timestamp_to_seconds(last_scene['timestamps'].get('end_timestamp', '0:00:00'))
filmstrip_html = create_filmstrip(metadata['scenes'], video_duration)
logger.info("Video processing completed successfully")
return video_html, filmstrip_html, additional_data + scenes_output + storylines_output + qa_output + trimming_suggestions_output
except Exception as e:
logger.exception(f"Error processing video: {str(e)}")
return None, "", f"Error processing video: {str(e)}"
css = """
body {
margin: 0;
padding: 0;
font-family: Arial, sans-serif;
overflow: hidden;
}
.container {
display: flex;
flex-direction: column;
height: 100vh;
}
#header {
display: flex;
align-items: center;
padding: 10px;
background-color: white;
}
#logo {
width: auto;
height: 150px;
box-shadow: none !important;
border: none !important;
background: none !important;
object-fit: contain;
}
#header-content {
flex-grow: 1;
display: flex;
justify-content: space-between;
align-items: center;
}
#header-content h1 {
margin: 0;
font-size: 36px;
font-weight: bold;
}
#header-content a {
font-size: 18px;
color: #0066cc;
text-decoration: none;
}
#header-content a:hover {
text-decoration: underline;
}
#top-panel {
height: 33vh;
display: flex;
padding: 10px;
box-shadow: 0 2px 5px rgba(0,0,0,0.1);
overflow: hidden;
}
#video-list-column {
max-height: 80vh; /* Adjust as needed */
overflow-y: auto;
}
#video-column {
width: 70%;
display: flex;
flex-direction: column;
}
#video-wrapper {
flex-grow: 1;
display: flex;
justify-content: center;
align-items: center;
overflow: hidden;
}
#video-player {
width: 100%;
max-height: calc(33vh - 120px) !important;
}
#filmstrip-container {
width: 100%;
height: 80px !important;
background-color: #f0f0f0;
position: relative;
overflow: hidden;
cursor: pointer;
}
#filmstrip-container > div,
#filmstrip-container > div > div,
#filmstrip-container > div > div > div {
height: 100% !important;
}
#scrollable-content {
height: 67vh;
overflow-y: auto;
padding: 20px;
}
#metadata-container {
margin-top: 20px;
}
.content-samples {
display: flex;
flex-direction: column;
overflow-y: auto;
max-height: 100%;
}
.content-samples > .wrap {
display: flex;
flex-direction: column;
}
.content-samples .hidden {
display: none !important;
}
.content-samples > .wrap > .wrap {
display: flex !important;
flex-direction: column !important;
}
.content-samples label {
display: block;
padding: 10px;
cursor: pointer;
border-bottom: 1px solid #ddd;
white-space: nowrap;
overflow: hidden;
text-overflow: ellipsis;
}
.content-samples label:hover {
background-color: #f0f0f0;
}
.video-info {
margin-bottom: 20px;
}
.scene-container {
margin-bottom: 30px;
}
.metadata-table {
width: 100%;
border-collapse: collapse;
margin-bottom: 20px;
}
.metadata-table th, .metadata-table td {
border: 1px solid #ddd;
padding: 8px;
text-align: left;
}
.metadata-table th {
background-color: #f2f2f2;
}
.metadata-table tr:nth-child(even) {
background-color: #f9f9f9;
}
.timestamp-link {
color: #0066cc;
text-decoration: none;
cursor: pointer;
}
.timestamp-link:hover {
text-decoration: underline;
}
.chat-discussion {
background-color: #f0f0f0;
border-radius: 10px;
padding: 15px;
margin-bottom: 20px;
}
.question {
font-weight: bold;
margin-bottom: 5px;
}
.answer {
margin-bottom: 15px;
padding-left: 15px;
}
.correlation-scores {
font-size: 18px;
margin-bottom: 20px;
}
#reinitialization-overlay {
position: fixed;
top: 0;
left: 0;
width: 100%;
height: 100%;
background-color: rgba(0, 0, 0, 0.5);
display: flex;
justify-content: center;
align-items: center;
z-index: 9999;
color: white;
font-size: 24px;
font-weight: bold;
}
@media (max-width: 768px) {
#header {
flex-direction: column;
align-items: flex-start;
}
#header-content h1 {
font-size: 24px;
}
#header-content p {
font-size: 14px;
}
#logo {
align-self: flex-end;
margin-top: 10px;
}
#top-panel {
flex-direction: column;
}
#video-list-column, #video-column {
width: 100%;
}
}
.icon-buttons button {
display: none !important;
}
/* Ensure one element per row in Gradio list */
#video-list-column .wrap {
display: flex;
flex-direction: column;
}
#video-list-column .wrap > .wrap {
display: flex !important;
flex-direction: column !important;
}
#video-list-column label {
display: block;
width: 100%;
}
"""
js = """
"""
with gr.Blocks(css=css, head=js) as iface:
with gr.Row(elem_id="header"):
with gr.Column(scale=1):
gr.Image("logo.png", elem_id="logo", show_label=False, interactive=False)
gr.Markdown("### Click a title to dive into the data:")
with gr.Column(elem_id="header-content", scale=10):
gr.Markdown("""
# Exploration page
## [🔗 Dataset](https://huggingface.co/mfarre/Video-LLaVA-7B-hf-CinePile)
""")
with gr.Row(elem_id="top-panel"):
with gr.Column(scale=3, elem_id="video-list-column"):
video_list_data = load_video_list()
video_list = gr.Radio(
label="Content Samples",
choices=[video["title"] for video in video_list_data],
elem_id="video-list",
value=None,
container=False
)
with gr.Column(scale=7, elem_id="video-column"):
video_output = gr.HTML(elem_id="video-container")
filmstrip_output = gr.HTML(elem_id="filmstrip-container")
with gr.Row(elem_id="scrollable-content"):
metadata_output = gr.HTML(elem_id="metadata-container")
def wrapped_process_video(title: str) -> tuple:
if not title:
return "", "", ""
video_id = next(video["video_id"] for video in video_list_data if video["title"] == title)
logging.info(f"Processing video with ID: {video_id}")
video_html, filmstrip_html, metadata_html = process_video(video_id)
return video_html, filmstrip_html, metadata_html
video_list.change(
fn=wrapped_process_video,
inputs=[video_list],
outputs=[video_output, filmstrip_output, metadata_output]
)
if __name__ == "__main__":
iface.launch()