shorts-generator / local_processors.py
vanshcodeworks's picture
Upload 18 files
5168d07 verified
"""
Simplified local processors for Hugging Face Spaces
This is a streamlined version of the full local_processors.py file
"""
import os
import tempfile
import json
import logging
import subprocess
import random
import re
import uuid
from pathlib import Path
from utils.transcription import transcribe_audio
from utils.debug import debug_print, examine_file
from utils.video import validate_video_file
# Set up logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
def transcribe_video_locally(video_path: str) -> dict:
"""Transcribe a video using available methods"""
try:
# Try using Whisper
transcript = transcribe_audio(video_path, method='whisper-local')
return transcript
except Exception as e:
logger.error(f"Transcription failed: {str(e)}")
return {
"full_text": "Transcription failed.",
"segments": [{"start": 0, "end": 5, "text": "Transcription failed."}],
"method": "error"
}
def select_highlights_locally(transcript_data: dict, video_title: str, num_highlights: int = 3, segment_length: int = 60) -> list:
"""Select the most engaging highlights from a transcript"""
# Simple algorithmic highlight selection
return _select_highlights_algorithmically(transcript_data, num_highlights, segment_length)
def _select_highlights_algorithmically(transcript_data: dict, num_highlights: int, segment_length: int) -> list:
"""Simple algorithm to select highlights at equal intervals"""
segments = transcript_data.get("segments", [])
if not segments:
# If no segments, create dummy highlights
return [
{
"start_time": i * segment_length,
"end_time": (i + 1) * segment_length,
"title": f"Highlight {i+1}",
"description": f"Auto-selected highlight {i+1}"
}
for i in range(num_highlights)
]
# Get the total duration of the video
last_segment_end = segments[-1]["end"] if segments else 180
# Calculate equal intervals
interval = max(last_segment_end / (num_highlights + 1), segment_length)
# Generate highlights at these intervals
highlights = []
for i in range(num_highlights):
start_time = min((i + 1) * interval, last_segment_end - segment_length)
if start_time < 0:
start_time = 0
# Find the closest segment to this time
closest_segment = min(segments, key=lambda s: abs(s["start"] - start_time))
highlights.append({
"start_time": closest_segment["start"],
"end_time": min(closest_segment["start"] + segment_length, last_segment_end),
"title": f"Highlight {i+1}",
"description": f"Interesting moment at {int(closest_segment['start']//60)}:{int(closest_segment['start']%60):02d}"
})
return highlights
def clip_video_locally(video_path: str, highlights: list, content_type="interesting") -> list:
"""Clip segments from a video based on highlight timestamps"""
if not os.path.isfile(video_path) or not validate_video_file(video_path):
return _generate_dummy_clips(num_clips=len(highlights))
clip_infos = []
clips_dir = os.path.join(tempfile.gettempdir(), "shorts_clips")
os.makedirs(clips_dir, exist_ok=True)
try:
for i, highlight in enumerate(highlights):
start_time = highlight["start_time"]
end_time = highlight["end_time"]
title = highlight["title"].replace(" ", "_")[:30]
# Create output filename
output_filename = f"clip_{i}_{title}.mp4"
output_path = os.path.join(clips_dir, output_filename)
# Use ffmpeg to create clip
cmd = [
'ffmpeg',
'-y', # Overwrite output files
'-i', video_path,
'-ss', str(start_time), # Start time
'-t', str(end_time - start_time), # Duration
'-c:v', 'libx264', # Video codec
'-c:a', 'aac', # Audio codec
'-strict', 'experimental',
'-b:a', '128k',
output_path
]
result = subprocess.run(
cmd,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
text=True
)
if result.returncode == 0 and os.path.exists(output_path) and os.path.getsize(output_path) > 10000:
clip_infos.append({
"path": output_path,
"start_time": start_time,
"end_time": end_time,
"title": highlight["title"],
"description": highlight.get("description", "")
})
except Exception as e:
logger.error(f"Video clipping failed: {str(e)}")
# If no clips were successfully created, create dummy clips
if not clip_infos:
return _generate_dummy_clips(num_clips=len(highlights))
return clip_infos
def _generate_dummy_clips(num_clips=3):
"""Generate dummy clips when clipping fails"""
clips_dir = os.path.join(tempfile.gettempdir(), "shorts_clips")
os.makedirs(clips_dir, exist_ok=True)
dummy_clips = []
for i in range(num_clips):
dummy_path = os.path.join(clips_dir, f"dummy_clip_{i}.mp4")
with open(dummy_path, 'wb') as f:
f.write(b'DUMMY VIDEO')
dummy_clips.append({
"path": dummy_path,
"start_time": i * 60,
"end_time": (i + 1) * 60,
"title": f"Example Highlight {i+1}",
"description": "This is a placeholder clip since video clipping failed."
})
return dummy_clips
def generate_caption_locally(clip_info: dict, transcript_data: dict, video_title: str) -> dict:
"""Generate captions and titles for a video clip"""
try:
# Extract transcript for this clip
start_time = clip_info["start_time"]
end_time = clip_info["end_time"]
clip_transcript = ""
for segment in transcript_data.get("segments", []):
if segment["end"] >= start_time and segment["start"] <= end_time:
clip_transcript += segment["text"] + " "
clip_transcript = clip_transcript.strip()
# Create simple captions
title = clip_info["title"]
if title.startswith("Highlight "):
words = clip_transcript.split()
if len(words) > 5:
title = f"✨ {' '.join(words[:5])}..."
# Generate caption from transcript
if len(clip_transcript) > 10:
caption = clip_transcript[:100] + "..." if len(clip_transcript) > 100 else clip_transcript
else:
caption = clip_info.get("description", "Check out this interesting moment!")
hashtags = "#shorts #youtube #viral"
return {
"title": title,
"caption": caption,
"hashtags": hashtags
}
except Exception as e:
logger.error(f"Caption generation failed: {str(e)}")
return {
"title": clip_info.get("title", "Interesting Moment"),
"caption": "Check out this interesting moment!",
"hashtags": "#shorts #youtube #viral"
}