""" Simplified local processors for Hugging Face Spaces This is a streamlined version of the full local_processors.py file """ import os import tempfile import json import logging import subprocess import random import re import uuid from pathlib import Path from utils.transcription import transcribe_audio from utils.debug import debug_print, examine_file from utils.video import validate_video_file # Set up logging logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) def transcribe_video_locally(video_path: str) -> dict: """Transcribe a video using available methods""" try: # Try using Whisper transcript = transcribe_audio(video_path, method='whisper-local') return transcript except Exception as e: logger.error(f"Transcription failed: {str(e)}") return { "full_text": "Transcription failed.", "segments": [{"start": 0, "end": 5, "text": "Transcription failed."}], "method": "error" } def select_highlights_locally(transcript_data: dict, video_title: str, num_highlights: int = 3, segment_length: int = 60) -> list: """Select the most engaging highlights from a transcript""" # Simple algorithmic highlight selection return _select_highlights_algorithmically(transcript_data, num_highlights, segment_length) def _select_highlights_algorithmically(transcript_data: dict, num_highlights: int, segment_length: int) -> list: """Simple algorithm to select highlights at equal intervals""" segments = transcript_data.get("segments", []) if not segments: # If no segments, create dummy highlights return [ { "start_time": i * segment_length, "end_time": (i + 1) * segment_length, "title": f"Highlight {i+1}", "description": f"Auto-selected highlight {i+1}" } for i in range(num_highlights) ] # Get the total duration of the video last_segment_end = segments[-1]["end"] if segments else 180 # Calculate equal intervals interval = max(last_segment_end / (num_highlights + 1), segment_length) # Generate highlights at these intervals highlights = [] for i in range(num_highlights): start_time = min((i + 1) * interval, last_segment_end - segment_length) if start_time < 0: start_time = 0 # Find the closest segment to this time closest_segment = min(segments, key=lambda s: abs(s["start"] - start_time)) highlights.append({ "start_time": closest_segment["start"], "end_time": min(closest_segment["start"] + segment_length, last_segment_end), "title": f"Highlight {i+1}", "description": f"Interesting moment at {int(closest_segment['start']//60)}:{int(closest_segment['start']%60):02d}" }) return highlights def clip_video_locally(video_path: str, highlights: list, content_type="interesting") -> list: """Clip segments from a video based on highlight timestamps""" if not os.path.isfile(video_path) or not validate_video_file(video_path): return _generate_dummy_clips(num_clips=len(highlights)) clip_infos = [] clips_dir = os.path.join(tempfile.gettempdir(), "shorts_clips") os.makedirs(clips_dir, exist_ok=True) try: for i, highlight in enumerate(highlights): start_time = highlight["start_time"] end_time = highlight["end_time"] title = highlight["title"].replace(" ", "_")[:30] # Create output filename output_filename = f"clip_{i}_{title}.mp4" output_path = os.path.join(clips_dir, output_filename) # Use ffmpeg to create clip cmd = [ 'ffmpeg', '-y', # Overwrite output files '-i', video_path, '-ss', str(start_time), # Start time '-t', str(end_time - start_time), # Duration '-c:v', 'libx264', # Video codec '-c:a', 'aac', # Audio codec '-strict', 'experimental', '-b:a', '128k', output_path ] result = subprocess.run( cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True ) if result.returncode == 0 and os.path.exists(output_path) and os.path.getsize(output_path) > 10000: clip_infos.append({ "path": output_path, "start_time": start_time, "end_time": end_time, "title": highlight["title"], "description": highlight.get("description", "") }) except Exception as e: logger.error(f"Video clipping failed: {str(e)}") # If no clips were successfully created, create dummy clips if not clip_infos: return _generate_dummy_clips(num_clips=len(highlights)) return clip_infos def _generate_dummy_clips(num_clips=3): """Generate dummy clips when clipping fails""" clips_dir = os.path.join(tempfile.gettempdir(), "shorts_clips") os.makedirs(clips_dir, exist_ok=True) dummy_clips = [] for i in range(num_clips): dummy_path = os.path.join(clips_dir, f"dummy_clip_{i}.mp4") with open(dummy_path, 'wb') as f: f.write(b'DUMMY VIDEO') dummy_clips.append({ "path": dummy_path, "start_time": i * 60, "end_time": (i + 1) * 60, "title": f"Example Highlight {i+1}", "description": "This is a placeholder clip since video clipping failed." }) return dummy_clips def generate_caption_locally(clip_info: dict, transcript_data: dict, video_title: str) -> dict: """Generate captions and titles for a video clip""" try: # Extract transcript for this clip start_time = clip_info["start_time"] end_time = clip_info["end_time"] clip_transcript = "" for segment in transcript_data.get("segments", []): if segment["end"] >= start_time and segment["start"] <= end_time: clip_transcript += segment["text"] + " " clip_transcript = clip_transcript.strip() # Create simple captions title = clip_info["title"] if title.startswith("Highlight "): words = clip_transcript.split() if len(words) > 5: title = f"✨ {' '.join(words[:5])}..." # Generate caption from transcript if len(clip_transcript) > 10: caption = clip_transcript[:100] + "..." if len(clip_transcript) > 100 else clip_transcript else: caption = clip_info.get("description", "Check out this interesting moment!") hashtags = "#shorts #youtube #viral" return { "title": title, "caption": caption, "hashtags": hashtags } except Exception as e: logger.error(f"Caption generation failed: {str(e)}") return { "title": clip_info.get("title", "Interesting Moment"), "caption": "Check out this interesting moment!", "hashtags": "#shorts #youtube #viral" }