Spaces:

awacke1
/

CodeCompetitionClaudeVsGPT

Running

App Files Files Community

CodeCompetitionClaudeVsGPT / backup2.app.py

awacke1

Rename app.py to backup2.app.py

96a693d verified about 1 month ago

raw

history blame

8.67 kB

	import streamlit as st
	import pandas as pd
	import numpy as np
	from sentence_transformers import SentenceTransformer
	from sklearn.metrics.pairwise import cosine_similarity
	import torch
	import json
	import os
	from pathlib import Path

	class VideoRetrieval:
	def __init__(self, use_dummy_data=True):
	self.text_model = SentenceTransformer('all-MiniLM-L6-v2')
	if use_dummy_data:
	self.create_dummy_data()
	else:
	self.load_data()

	def create_dummy_data(self):
	"""Create dummy features and metadata for demonstration"""
	# Create dummy features
	n_clips = 20
	feature_dim = 384 # matching the dimension of all-MiniLM-L6-v2

	self.features = {
	'visual_features': np.random.randn(n_clips, feature_dim),
	'scene_features': np.random.randn(n_clips, feature_dim),
	'object_features': np.random.randn(n_clips, feature_dim)
	}

	# Create dummy metadata
	movie_titles = [
	"The Matrix", "Inception", "The Dark Knight", "Pulp Fiction",
	"The Shawshank Redemption", "Forrest Gump", "The Godfather",
	"Fight Club", "Interstellar", "The Silence of the Lambs"
	]

	descriptions = [
	"A dramatic confrontation in a dark room where the truth is revealed",
	"A high-stakes chase through a crowded city street",
	"An emotional reunion between long-lost friends",
	"A tense negotiation that determines the fate of many",
	"A quiet moment of reflection before a life-changing decision"
	]

	# Sample YouTube clips (famous movie scenes)
	youtube_clips = [
	"https://www.youtube.com/watch?v=kcsNbQRU5TI", # Matrix - Red Pill Blue Pill
	"https://www.youtube.com/watch?v=YoHD9XEInc0", # Inception - Hallway Fight
	"https://www.youtube.com/watch?v=ZWCAf-xLV2k", # Dark Knight - Interrogation
	"https://www.youtube.com/watch?v=Jomr9SAjcyw", # Pulp Fiction - Restaurant
	"https://www.youtube.com/watch?v=SQ7_5MMbPYs", # Shawshank - Hope Speech
	]

	data = []
	for i in range(n_clips):
	data.append({
	'clip_id': f'clip_{i}',
	'movie_title': movie_titles[i % len(movie_titles)],
	'description': descriptions[i % len(descriptions)],
	'timestamp': f'{(i5):02d}:00 - {(i5+3):02d}:00',
	'duration': '3:00',
	'youtube_url': youtube_clips[i % len(youtube_clips)]
	})

	self.clips_df = pd.DataFrame(data)

	def load_data(self):
	"""Load actual pre-computed features and metadata"""
	try:
	self.features = {
	'visual_features': np.load('path_to_visual_features.npy'),
	'scene_features': np.load('path_to_scene_features.npy'),
	'object_features': np.load('path_to_object_features.npy')
	}
	self.clips_df = pd.read_csv('clips_metadata.csv')
	except FileNotFoundError as e:
	st.error(f"Error loading data: {e}. Falling back to dummy data.")
	self.create_dummy_data()

	def encode_query(self, query_text):
	"""Encode the text query into embeddings"""
	return self.text_model.encode(query_text)

	def compute_similarity(self, query_embedding, feature_type='visual_features'):
	"""Compute similarity between query and video features"""
	similarities = cosine_similarity(
	query_embedding.reshape(1, -1),
	self.features[feature_type]
	)
	return similarities[0]

	def retrieve_clips(self, query_text, top_k=3):
	"""Retrieve top-k most relevant clips based on query"""
	# Encode query
	query_embedding = self.encode_query(query_text)

	# Compute similarities for different feature types
	similarities = {}
	weights = {
	'visual_features': 0.4,
	'scene_features': 0.3,
	'object_features': 0.3
	}

	for feat_type, weight in weights.items():
	similarities[feat_type] = self.compute_similarity(query_embedding, feat_type) * weight

	# Combine similarities
	combined_similarities = sum(similarities.values())

	# Get top-k indices
	top_indices = np.argsort(combined_similarities)[-top_k:][::-1]

	# Return clip information
	results = []
	for idx in top_indices:
	results.append({
	'clip_id': self.clips_df.iloc[idx]['clip_id'],
	'movie_title': self.clips_df.iloc[idx]['movie_title'],
	'description': self.clips_df.iloc[idx]['description'],
	'timestamp': self.clips_df.iloc[idx]['timestamp'],
	'youtube_url': self.clips_df.iloc[idx]['youtube_url'],
	'similarity_score': float(combined_similarities[idx]) # Convert to float for JSON serialization
	})

	return results

	def main():
	st.set_page_config(
	page_title="Movie Scene Retrieval System",
	page_icon="🎬",
	layout="wide"
	)

	st.title("🎬 Movie Scene Retrieval System")
	st.write("""
	Search for movie scenes using natural language descriptions.
	The system will retrieve the most relevant 2-3 minute clips based on your query.

	Note: This is a demo version using simulated data.
	""")

	# Initialize retrieval system
	try:
	retrieval_system = st.session_state.retrieval_system
	except AttributeError:
	retrieval_system = VideoRetrieval(use_dummy_data=True)
	st.session_state.retrieval_system = retrieval_system

	# Search interface
	col1, col2 = st.columns([3, 1])

	with col1:
	query = st.text_input(
	"Enter your scene description:",
	placeholder="e.g., A dramatic confrontation between two characters in a dark room"
	)

	with col2:
	num_results = st.slider("Number of results:", min_value=1, max_value=5, value=3)

	if st.button("🔍 Search", type="primary"):
	if not query:
	st.warning("Please enter a scene description.")
	else:
	with st.spinner("Searching for relevant clips..."):
	results = retrieval_system.retrieve_clips(query, top_k=num_results)

	for i, result in enumerate(results, 1):
	with st.container():
	st.subheader(f"{result['movie_title']}")
	cols = st.columns([2, 1])

	with cols[0]:
	st.markdown(f"Scene Description:")
	st.write(result['description'])
	st.text(f"⏱️ Timestamp: {result['timestamp']}")

	# Add video player
	if result['youtube_url']:
	st.video(result['youtube_url'])

	with cols[1]:
	st.markdown("Relevance Score:")
	score = min(1.0, max(0.0, result['similarity_score']))
	st.progress(score)
	st.text(f"{score:.2%} match")

	# Add direct YouTube link
	st.markdown(f"[🔗 Watch on YouTube]({result['youtube_url']})")
	st.text("Click to open in a new tab")

	st.divider()

	# Sidebar with additional information
	with st.sidebar:
	st.header("ℹ️ About")
	st.write("""
	This demo system simulates a video retrieval engine that uses:

	- 🎥 Visual scene understanding
	- 👥 Character interaction analysis
	- 🎯 Object detection
	- 🎭 Action recognition

	In a production system, these features would be pre-computed
	from actual movie clips using state-of-the-art AI models.
	""")

	st.header("⚙️ Feature Weights")
	st.write("Current weights used for similarity computation:")
	st.write("- 🎬 Visual Features: 40%")
	st.write("- 🏞️ Scene Features: 30%")
	st.write("- 📦 Object Features: 30%")

	if __name__ == "__main__":
	main()