awacke1 commited on
Commit
1d91d2b
β€’
1 Parent(s): 8433575

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +218 -0
app.py ADDED
@@ -0,0 +1,218 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import numpy as np
4
+ from sentence_transformers import SentenceTransformer
5
+ from sklearn.metrics.pairwise import cosine_similarity
6
+ import torch
7
+ import json
8
+ import os
9
+ from pathlib import Path
10
+
11
+ class VideoRetrieval:
12
+ def __init__(self, use_dummy_data=True):
13
+ self.text_model = SentenceTransformer('all-MiniLM-L6-v2')
14
+ if use_dummy_data:
15
+ self.create_dummy_data()
16
+ else:
17
+ self.load_data()
18
+
19
+ def create_dummy_data(self):
20
+ """Create dummy features and metadata for demonstration"""
21
+ # Create dummy features
22
+ n_clips = 20
23
+ feature_dim = 384 # matching the dimension of all-MiniLM-L6-v2
24
+
25
+ self.features = {
26
+ 'visual_features': np.random.randn(n_clips, feature_dim),
27
+ 'scene_features': np.random.randn(n_clips, feature_dim),
28
+ 'object_features': np.random.randn(n_clips, feature_dim)
29
+ }
30
+
31
+ # Create dummy metadata
32
+ movie_titles = [
33
+ "The Matrix", "Inception", "The Dark Knight", "Pulp Fiction",
34
+ "The Shawshank Redemption", "Forrest Gump", "The Godfather",
35
+ "Fight Club", "Interstellar", "The Silence of the Lambs"
36
+ ]
37
+
38
+ descriptions = [
39
+ "A dramatic confrontation in a dark room where the truth is revealed",
40
+ "A high-stakes chase through a crowded city street",
41
+ "An emotional reunion between long-lost friends",
42
+ "A tense negotiation that determines the fate of many",
43
+ "A quiet moment of reflection before a life-changing decision"
44
+ ]
45
+
46
+ # Sample YouTube clips (famous movie scenes)
47
+ youtube_clips = [
48
+ "https://www.youtube.com/watch?v=kcsNbQRU5TI", # Matrix - Red Pill Blue Pill
49
+ "https://www.youtube.com/watch?v=YoHD9XEInc0", # Inception - Hallway Fight
50
+ "https://www.youtube.com/watch?v=ZWCAf-xLV2k", # Dark Knight - Interrogation
51
+ "https://www.youtube.com/watch?v=Jomr9SAjcyw", # Pulp Fiction - Restaurant
52
+ "https://www.youtube.com/watch?v=SQ7_5MMbPYs", # Shawshank - Hope Speech
53
+ ]
54
+
55
+ data = []
56
+ for i in range(n_clips):
57
+ data.append({
58
+ 'clip_id': f'clip_{i}',
59
+ 'movie_title': movie_titles[i % len(movie_titles)],
60
+ 'description': descriptions[i % len(descriptions)],
61
+ 'timestamp': f'{(i*5):02d}:00 - {(i*5+3):02d}:00',
62
+ 'duration': '3:00',
63
+ 'youtube_url': youtube_clips[i % len(youtube_clips)]
64
+ })
65
+
66
+ self.clips_df = pd.DataFrame(data)
67
+
68
+ def load_data(self):
69
+ """Load actual pre-computed features and metadata"""
70
+ try:
71
+ self.features = {
72
+ 'visual_features': np.load('path_to_visual_features.npy'),
73
+ 'scene_features': np.load('path_to_scene_features.npy'),
74
+ 'object_features': np.load('path_to_object_features.npy')
75
+ }
76
+ self.clips_df = pd.read_csv('clips_metadata.csv')
77
+ except FileNotFoundError as e:
78
+ st.error(f"Error loading data: {e}. Falling back to dummy data.")
79
+ self.create_dummy_data()
80
+
81
+ def encode_query(self, query_text):
82
+ """Encode the text query into embeddings"""
83
+ return self.text_model.encode(query_text)
84
+
85
+ def compute_similarity(self, query_embedding, feature_type='visual_features'):
86
+ """Compute similarity between query and video features"""
87
+ similarities = cosine_similarity(
88
+ query_embedding.reshape(1, -1),
89
+ self.features[feature_type]
90
+ )
91
+ return similarities[0]
92
+
93
+ def retrieve_clips(self, query_text, top_k=3):
94
+ """Retrieve top-k most relevant clips based on query"""
95
+ # Encode query
96
+ query_embedding = self.encode_query(query_text)
97
+
98
+ # Compute similarities for different feature types
99
+ similarities = {}
100
+ weights = {
101
+ 'visual_features': 0.4,
102
+ 'scene_features': 0.3,
103
+ 'object_features': 0.3
104
+ }
105
+
106
+ for feat_type, weight in weights.items():
107
+ similarities[feat_type] = self.compute_similarity(query_embedding, feat_type) * weight
108
+
109
+ # Combine similarities
110
+ combined_similarities = sum(similarities.values())
111
+
112
+ # Get top-k indices
113
+ top_indices = np.argsort(combined_similarities)[-top_k:][::-1]
114
+
115
+ # Return clip information
116
+ results = []
117
+ for idx in top_indices:
118
+ results.append({
119
+ 'clip_id': self.clips_df.iloc[idx]['clip_id'],
120
+ 'movie_title': self.clips_df.iloc[idx]['movie_title'],
121
+ 'description': self.clips_df.iloc[idx]['description'],
122
+ 'timestamp': self.clips_df.iloc[idx]['timestamp'],
123
+ 'youtube_url': self.clips_df.iloc[idx]['youtube_url'],
124
+ 'similarity_score': float(combined_similarities[idx]) # Convert to float for JSON serialization
125
+ })
126
+
127
+ return results
128
+
129
+ def main():
130
+ st.set_page_config(
131
+ page_title="Movie Scene Retrieval System",
132
+ page_icon="🎬",
133
+ layout="wide"
134
+ )
135
+
136
+ st.title("🎬 Movie Scene Retrieval System")
137
+ st.write("""
138
+ Search for movie scenes using natural language descriptions.
139
+ The system will retrieve the most relevant 2-3 minute clips based on your query.
140
+
141
+ *Note: This is a demo version using simulated data.*
142
+ """)
143
+
144
+ # Initialize retrieval system
145
+ try:
146
+ retrieval_system = st.session_state.retrieval_system
147
+ except AttributeError:
148
+ retrieval_system = VideoRetrieval(use_dummy_data=True)
149
+ st.session_state.retrieval_system = retrieval_system
150
+
151
+ # Search interface
152
+ col1, col2 = st.columns([3, 1])
153
+
154
+ with col1:
155
+ query = st.text_input(
156
+ "Enter your scene description:",
157
+ placeholder="e.g., A dramatic confrontation between two characters in a dark room"
158
+ )
159
+
160
+ with col2:
161
+ num_results = st.slider("Number of results:", min_value=1, max_value=5, value=3)
162
+
163
+ if st.button("πŸ” Search", type="primary"):
164
+ if not query:
165
+ st.warning("Please enter a scene description.")
166
+ else:
167
+ with st.spinner("Searching for relevant clips..."):
168
+ results = retrieval_system.retrieve_clips(query, top_k=num_results)
169
+
170
+ for i, result in enumerate(results, 1):
171
+ with st.container():
172
+ st.subheader(f"{result['movie_title']}")
173
+ cols = st.columns([2, 1])
174
+
175
+ with cols[0]:
176
+ st.markdown(f"**Scene Description:**")
177
+ st.write(result['description'])
178
+ st.text(f"⏱️ Timestamp: {result['timestamp']}")
179
+
180
+ # Add video player
181
+ if result['youtube_url']:
182
+ st.video(result['youtube_url'])
183
+
184
+ with cols[1]:
185
+ st.markdown("**Relevance Score:**")
186
+ score = min(1.0, max(0.0, result['similarity_score']))
187
+ st.progress(score)
188
+ st.text(f"{score:.2%} match")
189
+
190
+ # Add direct YouTube link
191
+ st.markdown(f"[πŸ”— Watch on YouTube]({result['youtube_url']})")
192
+ st.text("Click to open in a new tab")
193
+
194
+ st.divider()
195
+
196
+ # Sidebar with additional information
197
+ with st.sidebar:
198
+ st.header("ℹ️ About")
199
+ st.write("""
200
+ This demo system simulates a video retrieval engine that uses:
201
+
202
+ - πŸŽ₯ Visual scene understanding
203
+ - πŸ‘₯ Character interaction analysis
204
+ - 🎯 Object detection
205
+ - 🎭 Action recognition
206
+
207
+ In a production system, these features would be pre-computed
208
+ from actual movie clips using state-of-the-art AI models.
209
+ """)
210
+
211
+ st.header("βš™οΈ Feature Weights")
212
+ st.write("Current weights used for similarity computation:")
213
+ st.write("- 🎬 Visual Features: 40%")
214
+ st.write("- 🏞️ Scene Features: 30%")
215
+ st.write("- πŸ“¦ Object Features: 30%")
216
+
217
+ if __name__ == "__main__":
218
+ main()