Spaces:
Configuration error
Configuration error
""" | |
Simplified local processors for Hugging Face Spaces | |
This is a streamlined version of the full local_processors.py file | |
""" | |
import os | |
import tempfile | |
import json | |
import logging | |
import subprocess | |
import random | |
import re | |
import uuid | |
from pathlib import Path | |
from utils.transcription import transcribe_audio | |
from utils.debug import debug_print, examine_file | |
from utils.video import validate_video_file | |
# Set up logging | |
logging.basicConfig(level=logging.INFO) | |
logger = logging.getLogger(__name__) | |
def transcribe_video_locally(video_path: str) -> dict: | |
"""Transcribe a video using available methods""" | |
try: | |
# Try using Whisper | |
transcript = transcribe_audio(video_path, method='whisper-local') | |
return transcript | |
except Exception as e: | |
logger.error(f"Transcription failed: {str(e)}") | |
return { | |
"full_text": "Transcription failed.", | |
"segments": [{"start": 0, "end": 5, "text": "Transcription failed."}], | |
"method": "error" | |
} | |
def select_highlights_locally(transcript_data: dict, video_title: str, num_highlights: int = 3, segment_length: int = 60) -> list: | |
"""Select the most engaging highlights from a transcript""" | |
# Simple algorithmic highlight selection | |
return _select_highlights_algorithmically(transcript_data, num_highlights, segment_length) | |
def _select_highlights_algorithmically(transcript_data: dict, num_highlights: int, segment_length: int) -> list: | |
"""Simple algorithm to select highlights at equal intervals""" | |
segments = transcript_data.get("segments", []) | |
if not segments: | |
# If no segments, create dummy highlights | |
return [ | |
{ | |
"start_time": i * segment_length, | |
"end_time": (i + 1) * segment_length, | |
"title": f"Highlight {i+1}", | |
"description": f"Auto-selected highlight {i+1}" | |
} | |
for i in range(num_highlights) | |
] | |
# Get the total duration of the video | |
last_segment_end = segments[-1]["end"] if segments else 180 | |
# Calculate equal intervals | |
interval = max(last_segment_end / (num_highlights + 1), segment_length) | |
# Generate highlights at these intervals | |
highlights = [] | |
for i in range(num_highlights): | |
start_time = min((i + 1) * interval, last_segment_end - segment_length) | |
if start_time < 0: | |
start_time = 0 | |
# Find the closest segment to this time | |
closest_segment = min(segments, key=lambda s: abs(s["start"] - start_time)) | |
highlights.append({ | |
"start_time": closest_segment["start"], | |
"end_time": min(closest_segment["start"] + segment_length, last_segment_end), | |
"title": f"Highlight {i+1}", | |
"description": f"Interesting moment at {int(closest_segment['start']//60)}:{int(closest_segment['start']%60):02d}" | |
}) | |
return highlights | |
def clip_video_locally(video_path: str, highlights: list, content_type="interesting") -> list: | |
"""Clip segments from a video based on highlight timestamps""" | |
if not os.path.isfile(video_path) or not validate_video_file(video_path): | |
return _generate_dummy_clips(num_clips=len(highlights)) | |
clip_infos = [] | |
clips_dir = os.path.join(tempfile.gettempdir(), "shorts_clips") | |
os.makedirs(clips_dir, exist_ok=True) | |
try: | |
for i, highlight in enumerate(highlights): | |
start_time = highlight["start_time"] | |
end_time = highlight["end_time"] | |
title = highlight["title"].replace(" ", "_")[:30] | |
# Create output filename | |
output_filename = f"clip_{i}_{title}.mp4" | |
output_path = os.path.join(clips_dir, output_filename) | |
# Use ffmpeg to create clip | |
cmd = [ | |
'ffmpeg', | |
'-y', # Overwrite output files | |
'-i', video_path, | |
'-ss', str(start_time), # Start time | |
'-t', str(end_time - start_time), # Duration | |
'-c:v', 'libx264', # Video codec | |
'-c:a', 'aac', # Audio codec | |
'-strict', 'experimental', | |
'-b:a', '128k', | |
output_path | |
] | |
result = subprocess.run( | |
cmd, | |
stdout=subprocess.PIPE, | |
stderr=subprocess.PIPE, | |
text=True | |
) | |
if result.returncode == 0 and os.path.exists(output_path) and os.path.getsize(output_path) > 10000: | |
clip_infos.append({ | |
"path": output_path, | |
"start_time": start_time, | |
"end_time": end_time, | |
"title": highlight["title"], | |
"description": highlight.get("description", "") | |
}) | |
except Exception as e: | |
logger.error(f"Video clipping failed: {str(e)}") | |
# If no clips were successfully created, create dummy clips | |
if not clip_infos: | |
return _generate_dummy_clips(num_clips=len(highlights)) | |
return clip_infos | |
def _generate_dummy_clips(num_clips=3): | |
"""Generate dummy clips when clipping fails""" | |
clips_dir = os.path.join(tempfile.gettempdir(), "shorts_clips") | |
os.makedirs(clips_dir, exist_ok=True) | |
dummy_clips = [] | |
for i in range(num_clips): | |
dummy_path = os.path.join(clips_dir, f"dummy_clip_{i}.mp4") | |
with open(dummy_path, 'wb') as f: | |
f.write(b'DUMMY VIDEO') | |
dummy_clips.append({ | |
"path": dummy_path, | |
"start_time": i * 60, | |
"end_time": (i + 1) * 60, | |
"title": f"Example Highlight {i+1}", | |
"description": "This is a placeholder clip since video clipping failed." | |
}) | |
return dummy_clips | |
def generate_caption_locally(clip_info: dict, transcript_data: dict, video_title: str) -> dict: | |
"""Generate captions and titles for a video clip""" | |
try: | |
# Extract transcript for this clip | |
start_time = clip_info["start_time"] | |
end_time = clip_info["end_time"] | |
clip_transcript = "" | |
for segment in transcript_data.get("segments", []): | |
if segment["end"] >= start_time and segment["start"] <= end_time: | |
clip_transcript += segment["text"] + " " | |
clip_transcript = clip_transcript.strip() | |
# Create simple captions | |
title = clip_info["title"] | |
if title.startswith("Highlight "): | |
words = clip_transcript.split() | |
if len(words) > 5: | |
title = f"✨ {' '.join(words[:5])}..." | |
# Generate caption from transcript | |
if len(clip_transcript) > 10: | |
caption = clip_transcript[:100] + "..." if len(clip_transcript) > 100 else clip_transcript | |
else: | |
caption = clip_info.get("description", "Check out this interesting moment!") | |
hashtags = "#shorts #youtube #viral" | |
return { | |
"title": title, | |
"caption": caption, | |
"hashtags": hashtags | |
} | |
except Exception as e: | |
logger.error(f"Caption generation failed: {str(e)}") | |
return { | |
"title": clip_info.get("title", "Interesting Moment"), | |
"caption": "Check out this interesting moment!", | |
"hashtags": "#shorts #youtube #viral" | |
} | |