Notes / download.py
noumanjavaid's picture
Update download.py
1e32418 verified
from __future__ import unicode_literals
import yt_dlp
import os
import time
import shutil
import logging
import re
import tempfile
from pathlib import Path
from typing import Optional, Callable, Dict, Any, Union
# Configuration
MAX_FILE_SIZE = 40 * 1024 * 1024 # 40 MB
FILE_TOO_LARGE_MESSAGE = "The audio file exceeds the 40MB size limit. Please try a shorter video clip or select a lower quality option."
MAX_RETRIES = 3
RETRY_DELAY = 2 # seconds
DEFAULT_AUDIO_FORMAT = "mp3"
DEFAULT_AUDIO_QUALITY = "192" # kbps
SUPPORTED_FORMATS = ["mp3", "m4a", "wav", "aac", "flac", "opus"]
# Setup logging
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
)
logger = logging.getLogger("youtube_downloader")
class DownloadLogger:
"""Enhanced logger for yt-dlp with callback support"""
def __init__(self, progress_callback: Optional[Callable[[str], None]] = None):
self.progress_callback = progress_callback or (lambda x: None)
def debug(self, msg: str) -> None:
if msg.startswith('[download]'):
# Extract progress information
if '%' in msg:
self.progress_callback(msg)
logger.debug(msg)
def warning(self, msg: str) -> None:
logger.warning(msg)
def error(self, msg: str) -> None:
logger.error(msg)
class DownloadError(Exception):
"""Custom exception for download errors"""
pass
def validate_url(url: str) -> bool:
"""Validate if the URL is a supported video platform URL"""
video_platforms = [
r'youtube\.com',
r'youtu\.be',
r'vimeo\.com',
r'dailymotion\.com',
r'twitch\.tv',
r'soundcloud\.com',
r'instagram\.com'
]
pattern = '|'.join([f'({platform})' for platform in video_platforms])
return bool(re.search(pattern, url, re.IGNORECASE))
def ensure_download_directory(directory: str) -> str:
"""Ensure download directory exists, create if it doesn't"""
path = Path(directory)
path.mkdir(parents=True, exist_ok=True)
return str(path.absolute())
def get_download_options(
output_dir: str = "./downloads/audio",
audio_format: str = DEFAULT_AUDIO_FORMAT,
audio_quality: str = DEFAULT_AUDIO_QUALITY,
progress_callback: Optional[Callable[[str], None]] = None
) -> Dict[str, Any]:
"""
Get yt-dlp download options with specified parameters
Args:
output_dir: Directory to save downloaded files
audio_format: Audio format (mp3, m4a, wav, etc.)
audio_quality: Audio quality in kbps
progress_callback: Function to call with progress updates
Returns:
Dictionary of yt-dlp options
"""
if audio_format not in SUPPORTED_FORMATS:
logger.warning(f"Unsupported format '{audio_format}', falling back to {DEFAULT_AUDIO_FORMAT}")
audio_format = DEFAULT_AUDIO_FORMAT
# Ensure download directory exists
output_dir = ensure_download_directory(output_dir)
return {
"format": "bestaudio/best",
"postprocessors": [{
"key": "FFmpegExtractAudio",
"preferredcodec": audio_format,
"preferredquality": audio_quality,
}],
"logger": DownloadLogger(progress_callback),
"outtmpl": f"{output_dir}/%(title)s.%(ext)s",
"noplaylist": True,
"quiet": False,
"no_warnings": False,
"progress_hooks": [lambda d: download_progress_hook(d, progress_callback)],
"overwrites": True,
}
def download_progress_hook(d: Dict[str, Any], callback: Optional[Callable[[str], None]] = None) -> None:
"""
Hook for tracking download progress
Args:
d: Download information dictionary
callback: Function to call with progress updates
"""
if callback is None:
callback = lambda x: None
if d['status'] == 'downloading':
progress = d.get('_percent_str', 'unknown progress')
speed = d.get('_speed_str', 'unknown speed')
eta = d.get('_eta_str', 'unknown ETA')
callback(f"Downloading: {progress} at {speed}, ETA: {eta}")
elif d['status'] == 'finished':
filename = os.path.basename(d['filename'])
callback(f"Download complete: {filename}")
logger.info(f"Download finished: {d['filename']}")
def estimate_file_size(info: Dict[str, Any]) -> int:
"""
Better estimate file size from video info
Args:
info: Video information dictionary
Returns:
Estimated file size in bytes
"""
# Try different fields that might contain size information
filesize = info.get("filesize")
if filesize is not None:
return filesize
filesize = info.get("filesize_approx")
if filesize is not None:
return filesize
# If we have duration and a bitrate, we can estimate
duration = info.get("duration")
bitrate = info.get("abr") or info.get("tbr")
if duration and bitrate:
# Estimate using bitrate (kbps) * duration (seconds) / 8 (bits to bytes) * 1024 (to KB)
return int(bitrate * duration * 128) # 128 = 1024 / 8
# Default to a reasonable upper limit if we can't determine
return MAX_FILE_SIZE
def download_video_audio(
url: str,
output_dir: str = "./downloads/audio",
audio_format: str = DEFAULT_AUDIO_FORMAT,
audio_quality: str = DEFAULT_AUDIO_QUALITY,
progress_callback: Optional[Callable[[str], None]] = None
) -> Optional[str]:
"""
Download audio from a video URL
Args:
url: URL of the video
output_dir: Directory to save downloaded files
audio_format: Audio format (mp3, m4a, wav, etc.)
audio_quality: Audio quality in kbps
progress_callback: Function to call with progress updates
Returns:
Path to the downloaded audio file or None if download failed
Raises:
DownloadError: If download fails after retries
"""
if not validate_url(url):
error_msg = f"Invalid or unsupported URL: {url}"
logger.error(error_msg)
raise DownloadError(error_msg)
retries = 0
while retries < MAX_RETRIES:
try:
if progress_callback:
progress_callback(f"Starting download (attempt {retries + 1}/{MAX_RETRIES})...")
ydl_opts = get_download_options(output_dir, audio_format, audio_quality, progress_callback)
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
logger.info(f"Downloading audio from: {url}")
# Extract info first without downloading
info = ydl.extract_info(url, download=False)
# Better file size estimation
estimated_size = estimate_file_size(info)
if estimated_size > MAX_FILE_SIZE:
error_msg = f"{FILE_TOO_LARGE_MESSAGE} (Estimated: {estimated_size / 1024 / 1024:.1f}MB)"
logger.error(error_msg)
raise DownloadError(error_msg)
# Now download
ydl.download([url])
# Get the filename - needs some extra handling due to extraction
filename = ydl.prepare_filename(info)
base_filename = os.path.splitext(filename)[0]
final_filename = f"{base_filename}.{audio_format}"
# Verify file exists and return path
if os.path.exists(final_filename):
return final_filename
else:
# Try to find the file with a different extension
for ext in SUPPORTED_FORMATS:
potential_file = f"{base_filename}.{ext}"
if os.path.exists(potential_file):
return potential_file
# If we get here, something went wrong
raise FileNotFoundError(f"Could not locate downloaded file for {url}")
except yt_dlp.utils.DownloadError as e:
retries += 1
error_msg = f"Download error (Attempt {retries}/{MAX_RETRIES}): {str(e)}"
logger.error(error_msg)
if progress_callback:
progress_callback(error_msg)
if "HTTP Error 429" in str(e):
# Rate limiting - wait longer
time.sleep(RETRY_DELAY * 5)
elif retries >= MAX_RETRIES:
raise DownloadError(f"Failed to download after {MAX_RETRIES} attempts: {str(e)}")
else:
time.sleep(RETRY_DELAY)
except Exception as e:
retries += 1
error_msg = f"Unexpected error (Attempt {retries}/{MAX_RETRIES}): {str(e)}"
logger.error(error_msg)
if progress_callback:
progress_callback(error_msg)
if retries >= MAX_RETRIES:
raise DownloadError(f"Failed to download after {MAX_RETRIES} attempts: {str(e)}")
time.sleep(RETRY_DELAY)
return None
def delete_download(path: str) -> bool:
"""
Delete a downloaded file or directory
Args:
path: Path to file or directory to delete
Returns:
True if deletion was successful, False otherwise
"""
try:
if not path or not os.path.exists(path):
logger.warning(f"Path does not exist: {path}")
return False
if os.path.isfile(path):
os.remove(path)
logger.info(f"File deleted: {path}")
elif os.path.isdir(path):
shutil.rmtree(path)
logger.info(f"Directory deleted: {path}")
else:
logger.warning(f"Path is neither a file nor a directory: {path}")
return False
return True
except PermissionError:
logger.error(f"Permission denied: Unable to delete {path}")
except FileNotFoundError:
logger.error(f"File or directory not found: {path}")
except Exception as e:
logger.error(f"Error deleting {path}: {str(e)}")
return False
def trim_audio_file(input_file: str, max_duration_seconds: int = 600) -> str:
"""
Trim an audio file to a maximum duration to reduce file size
Args:
input_file: Path to input audio file
max_duration_seconds: Maximum duration in seconds
Returns:
Path to trimmed file
"""
try:
import ffmpeg
# Create output filename
file_dir = os.path.dirname(input_file)
file_name, file_ext = os.path.splitext(os.path.basename(input_file))
output_file = os.path.join(file_dir, f"{file_name}_trimmed{file_ext}")
# Trim using ffmpeg
ffmpeg.input(input_file).output(
output_file, t=str(max_duration_seconds), acodec='copy'
).run(quiet=True, overwrite_output=True)
logger.info(f"Trimmed {input_file} to {max_duration_seconds} seconds")
return output_file
except Exception as e:
logger.error(f"Error trimming audio: {str(e)}")
return input_file # Return original if trimming fails
def get_video_info(url: str) -> Dict[str, Any]:
"""
Get information about a video without downloading
Args:
url: URL of the video
Returns:
Dictionary of video information
"""
try:
with yt_dlp.YoutubeDL({"quiet": True}) as ydl:
info = ydl.extract_info(url, download=False)
return info
except Exception as e:
logger.error(f"Error getting video info: {str(e)}")
raise DownloadError(f"Could not retrieve video information: {str(e)}")