smolvlm2-video-highlights2

Running

avinashHuggingface108 commited on Sep 20

Commit

bb657cb

1 Parent(s): 1e083bf

Fix deployment issues: permissions, short videos, and AI responses

- Fix permission error by using /tmp/temp_segments with proper permissions
- Add minimum video length check (2x segment_length) for meaningful highlights
- Improve AI prompting for 256M model: max_new_tokens=8, greedy decoding
- Simplify prompt to force ONE WORD answers: YES or NO only
- Handle very short videos gracefully with informative error message

Files changed (1) hide show

huggingface_exact_approach.py +15 -6

huggingface_exact_approach.py CHANGED Viewed

@@ -154,7 +154,7 @@ class VideoHighlightDetector:
                 "role": "user",
                 "content": [
                     {"type": "video", "path": video_path},
-                    {"type": "text", "text": f"""Given these specific highlight examples:\n{highlight_types}\n\nDoes this video segment contain a CLEAR, OBVIOUS match for one of these highlights?\n\nBe very strict. Answer ONLY:\n- 'YES' if there is an obvious, clear match\n- 'NO' if the match is weak, unclear, or if this is just ordinary content\n\nMost segments should be NO. Only exceptional moments should be YES."""}]
             }
         ]
@@ -169,9 +169,9 @@ class VideoHighlightDetector:
             outputs = self.model.generate(
                 **inputs,
-                max_new_tokens=128,
-                do_sample=True,
-                temperature=0.3  # Lower temperature for more consistent decisions
             )
             response = self.processor.decode(outputs[0], skip_special_tokens=True)
@@ -332,6 +332,15 @@ class VideoHighlightDetector:
         print(f"📹 Video duration: {duration:.1f}s ({duration/60:.1f} minutes)")
         # Step 1: Analyze overall video content
         print("🎬 Step 1: Analyzing overall video content...")
         video_desc = self.analyze_video_content(video_path)
@@ -349,8 +358,8 @@ class VideoHighlightDetector:
         print()
         # Step 3: Split video into segments
-        temp_dir = "temp_segments"
-        os.makedirs(temp_dir, exist_ok=True)
         kept_segments1 = []
         kept_segments2 = []

                 "role": "user",
                 "content": [
                     {"type": "video", "path": video_path},
+                    {"type": "text", "text": f"""Looking for these highlights:\n{highlight_types}\n\nDoes this video segment match ANY of these highlights?\n\nAnswer with ONE WORD ONLY:\nYES or NO\n\nNothing else. Just YES or NO."""}]
             }
         ]
             outputs = self.model.generate(
                 **inputs,
+                max_new_tokens=8,  # Force very short responses
+                do_sample=False,    # Use greedy decoding for consistency
+                temperature=0.1     # Very low temperature for strict adherence
             )
             response = self.processor.decode(outputs[0], skip_special_tokens=True)
         print(f"📹 Video duration: {duration:.1f}s ({duration/60:.1f} minutes)")
+        # Check if video is too short for meaningful highlights
+        if duration < segment_length * 2:
+            return {
+                "error": f"Video too short ({duration:.1f}s). Need at least {segment_length * 2:.1f}s for meaningful highlights.",
+                "video_description": "Video too short for analysis",
+                "total_segments": 0,
+                "selected_segments": 0
+            }
         # Step 1: Analyze overall video content
         print("🎬 Step 1: Analyzing overall video content...")
         video_desc = self.analyze_video_content(video_path)
         print()
         # Step 3: Split video into segments
+        temp_dir = os.path.join("/tmp", "temp_segments")
+        os.makedirs(temp_dir, mode=0o755, exist_ok=True)
         kept_segments1 = []
         kept_segments2 = []