Spaces:

Agents-MCP-Hackathon
/

shorts-generator

Configuration error

App Files Files Community

shorts-generator / local_processors.py

vanshcodeworks

Upload 18 files

5168d07 verified 4 months ago

raw

history blame contribute delete

7.75 kB

	"""
	Simplified local processors for Hugging Face Spaces
	This is a streamlined version of the full local_processors.py file
	"""

	import os
	import tempfile
	import json
	import logging
	import subprocess
	import random
	import re
	import uuid
	from pathlib import Path
	from utils.transcription import transcribe_audio
	from utils.debug import debug_print, examine_file
	from utils.video import validate_video_file

	# Set up logging
	logging.basicConfig(level=logging.INFO)
	logger = logging.getLogger(__name__)

	def transcribe_video_locally(video_path: str) -> dict:
	"""Transcribe a video using available methods"""
	try:
	# Try using Whisper
	transcript = transcribe_audio(video_path, method='whisper-local')
	return transcript
	except Exception as e:
	logger.error(f"Transcription failed: {str(e)}")
	return {
	"full_text": "Transcription failed.",
	"segments": [{"start": 0, "end": 5, "text": "Transcription failed."}],
	"method": "error"
	}

	def select_highlights_locally(transcript_data: dict, video_title: str, num_highlights: int = 3, segment_length: int = 60) -> list:
	"""Select the most engaging highlights from a transcript"""
	# Simple algorithmic highlight selection
	return _select_highlights_algorithmically(transcript_data, num_highlights, segment_length)

	def _select_highlights_algorithmically(transcript_data: dict, num_highlights: int, segment_length: int) -> list:
	"""Simple algorithm to select highlights at equal intervals"""
	segments = transcript_data.get("segments", [])

	if not segments:
	# If no segments, create dummy highlights
	return [
	{
	"start_time": i * segment_length,
	"end_time": (i + 1) * segment_length,
	"title": f"Highlight {i+1}",
	"description": f"Auto-selected highlight {i+1}"
	}
	for i in range(num_highlights)
	]

	# Get the total duration of the video
	last_segment_end = segments[-1]["end"] if segments else 180

	# Calculate equal intervals
	interval = max(last_segment_end / (num_highlights + 1), segment_length)

	# Generate highlights at these intervals
	highlights = []
	for i in range(num_highlights):
	start_time = min((i + 1) * interval, last_segment_end - segment_length)
	if start_time < 0:
	start_time = 0

	# Find the closest segment to this time
	closest_segment = min(segments, key=lambda s: abs(s["start"] - start_time))

	highlights.append({
	"start_time": closest_segment["start"],
	"end_time": min(closest_segment["start"] + segment_length, last_segment_end),
	"title": f"Highlight {i+1}",
	"description": f"Interesting moment at {int(closest_segment['start']//60)}:{int(closest_segment['start']%60):02d}"
	})

	return highlights

	def clip_video_locally(video_path: str, highlights: list, content_type="interesting") -> list:
	"""Clip segments from a video based on highlight timestamps"""
	if not os.path.isfile(video_path) or not validate_video_file(video_path):
	return _generate_dummy_clips(num_clips=len(highlights))

	clip_infos = []
	clips_dir = os.path.join(tempfile.gettempdir(), "shorts_clips")
	os.makedirs(clips_dir, exist_ok=True)

	try:
	for i, highlight in enumerate(highlights):
	start_time = highlight["start_time"]
	end_time = highlight["end_time"]
	title = highlight["title"].replace(" ", "_")[:30]

	# Create output filename
	output_filename = f"clip_{i}_{title}.mp4"
	output_path = os.path.join(clips_dir, output_filename)

	# Use ffmpeg to create clip
	cmd = [
	'ffmpeg',
	'-y', # Overwrite output files
	'-i', video_path,
	'-ss', str(start_time), # Start time
	'-t', str(end_time - start_time), # Duration
	'-c:v', 'libx264', # Video codec
	'-c:a', 'aac', # Audio codec
	'-strict', 'experimental',
	'-b:a', '128k',
	output_path
	]

	result = subprocess.run(
	cmd,
	stdout=subprocess.PIPE,
	stderr=subprocess.PIPE,
	text=True
	)

	if result.returncode == 0 and os.path.exists(output_path) and os.path.getsize(output_path) > 10000:
	clip_infos.append({
	"path": output_path,
	"start_time": start_time,
	"end_time": end_time,
	"title": highlight["title"],
	"description": highlight.get("description", "")
	})

	except Exception as e:
	logger.error(f"Video clipping failed: {str(e)}")

	# If no clips were successfully created, create dummy clips
	if not clip_infos:
	return _generate_dummy_clips(num_clips=len(highlights))

	return clip_infos

	def _generate_dummy_clips(num_clips=3):
	"""Generate dummy clips when clipping fails"""
	clips_dir = os.path.join(tempfile.gettempdir(), "shorts_clips")
	os.makedirs(clips_dir, exist_ok=True)

	dummy_clips = []
	for i in range(num_clips):
	dummy_path = os.path.join(clips_dir, f"dummy_clip_{i}.mp4")
	with open(dummy_path, 'wb') as f:
	f.write(b'DUMMY VIDEO')

	dummy_clips.append({
	"path": dummy_path,
	"start_time": i * 60,
	"end_time": (i + 1) * 60,
	"title": f"Example Highlight {i+1}",
	"description": "This is a placeholder clip since video clipping failed."
	})

	return dummy_clips

	def generate_caption_locally(clip_info: dict, transcript_data: dict, video_title: str) -> dict:
	"""Generate captions and titles for a video clip"""
	try:
	# Extract transcript for this clip
	start_time = clip_info["start_time"]
	end_time = clip_info["end_time"]

	clip_transcript = ""
	for segment in transcript_data.get("segments", []):
	if segment["end"] >= start_time and segment["start"] <= end_time:
	clip_transcript += segment["text"] + " "

	clip_transcript = clip_transcript.strip()

	# Create simple captions
	title = clip_info["title"]
	if title.startswith("Highlight "):
	words = clip_transcript.split()
	if len(words) > 5:
	title = f"✨ {' '.join(words[:5])}..."

	# Generate caption from transcript
	if len(clip_transcript) > 10:
	caption = clip_transcript[:100] + "..." if len(clip_transcript) > 100 else clip_transcript
	else:
	caption = clip_info.get("description", "Check out this interesting moment!")

	hashtags = "#shorts #youtube #viral"

	return {
	"title": title,
	"caption": caption,
	"hashtags": hashtags
	}

	except Exception as e:
	logger.error(f"Caption generation failed: {str(e)}")
	return {
	"title": clip_info.get("title", "Interesting Moment"),
	"caption": "Check out this interesting moment!",
	"hashtags": "#shorts #youtube #viral"
	}