noumanjavaid commited on
Commit
e01ecae
·
verified ·
1 Parent(s): 422e5df

Update download.py

Browse files
Files changed (1) hide show
  1. download.py +341 -61
download.py CHANGED
@@ -1,96 +1,376 @@
1
  from __future__ import unicode_literals
2
- import yt_dlp as youtube_dl
3
  import os
4
  import time
5
- import os
6
  import shutil
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
 
8
- MAX_FILE_SIZE = 100 * 1024 * 1024 # 100 MB
9
- FILE_TOO_LARGE_MESSAGE = "The audio file is too large for the current size and rate limits using Whisper. If you used a YouTube link, please try a shorter video clip. If you uploaded an audio file, try trimming or compressing the audio to under 100 MB."
10
- max_retries = 3
11
- delay = 2
12
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
 
14
- class MyLogger(object):
15
- def __init__(self, external_logger=lambda x: None):
16
- self.external_logger = external_logger
17
 
18
- def debug(self, msg):
19
- print("[debug]: ", msg)
20
- self.external_logger(msg)
21
 
22
- def warning(self, msg):
23
- print("[warning]: ", msg)
24
 
25
- def error(self, msg):
26
- print("[error]: ", msg)
 
 
 
 
 
 
 
 
 
 
 
 
27
 
28
 
29
- def my_hook(d):
30
- print("hook", d["status"])
31
- if d["status"] == "finished":
32
- print("Done downloading, now converting ...")
 
33
 
34
 
35
- def get_ydl_opts(external_logger=lambda x: None):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36
  return {
37
- "format": "bestaudio/best",
38
- "postprocessors": [
39
- {
40
  "key": "FFmpegExtractAudio",
41
- "preferredcodec": "mp3",
42
- "preferredquality": "192", # set the preferred bitrate to 192kbps
43
- }
44
- ],
45
- "logger": MyLogger(external_logger),
46
- "outtmpl": "./downloads/audio/%(title)s.%(ext)s", # Set the output filename directly
47
- "progress_hooks": [my_hook],
48
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
49
 
50
 
51
- def download_video_audio(url, external_logger=lambda x: None):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
52
  retries = 0
53
- while retries < max_retries:
54
  try:
55
- ydl_opts = get_ydl_opts(external_logger)
56
- with youtube_dl.YoutubeDL(ydl_opts) as ydl:
57
- print("Going to download ", url)
 
 
 
 
 
58
  info = ydl.extract_info(url, download=False)
59
- filesize = info.get("filesize", 0)
60
- if filesize > MAX_FILE_SIZE:
61
- raise Exception(FILE_TOO_LARGE_MESSAGE)
 
 
 
 
 
 
 
 
 
62
  filename = ydl.prepare_filename(info)
63
- res = ydl.download([url])
64
- print("youtube-dl result :", res)
65
- mp3_filename = os.path.splitext(filename)[0] + '.mp3'
66
- print('mp3 file name - ', mp3_filename)
67
- return mp3_filename
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
68
  except Exception as e:
69
  retries += 1
70
- print(
71
- f"An error occurred during downloading (Attempt {retries}/{max_retries}):",
72
- str(e),
73
- )
74
- if retries >= max_retries:
75
- raise e
76
- time.sleep(delay)
 
 
 
77
 
78
 
79
-
80
- def delete_download(path):
 
 
 
 
 
 
 
 
81
  try:
 
 
 
 
82
  if os.path.isfile(path):
83
  os.remove(path)
84
- print(f"File {path} has been deleted.")
85
  elif os.path.isdir(path):
86
  shutil.rmtree(path)
87
- print(f"Directory {path} and its contents have been deleted.")
88
  else:
89
- print(f"The path {path} is neither a file nor a directory.")
 
 
90
  except PermissionError:
91
- print(f"Permission denied: Unable to delete {path}.")
92
  except FileNotFoundError:
93
- print(f"File or directory not found: {path}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
94
  except Exception as e:
95
- print(f"An error occurred while trying to delete {path}: {str(e)}")
96
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  from __future__ import unicode_literals
2
+ import yt_dlp
3
  import os
4
  import time
 
5
  import shutil
6
+ import logging
7
+ import re
8
+ import tempfile
9
+ from pathlib import Path
10
+ from typing import Optional, Callable, Dict, Any, Union
11
+
12
+ # Configuration
13
+ MAX_FILE_SIZE = 40 * 1024 * 1024 # 40 MB (increased from your original 100MB)
14
+ FILE_TOO_LARGE_MESSAGE = "The audio file exceeds the 40MB size limit. Please try a shorter video clip or select a lower quality option."
15
+ MAX_RETRIES = 3
16
+ RETRY_DELAY = 2 # seconds
17
+ DEFAULT_AUDIO_FORMAT = "mp3"
18
+ DEFAULT_AUDIO_QUALITY = "192" # kbps
19
+ SUPPORTED_FORMATS = ["mp3", "m4a", "wav", "aac", "flac", "opus"]
20
+
21
+ # Setup logging
22
+ logging.basicConfig(
23
+ level=logging.INFO,
24
+ format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
25
+ )
26
+ logger = logging.getLogger("youtube_downloader")
27
 
 
 
 
 
28
 
29
+ class DownloadLogger:
30
+ """Enhanced logger for yt-dlp with callback support"""
31
+
32
+ def __init__(self, progress_callback: Optional[Callable[[str], None]] = None):
33
+ self.progress_callback = progress_callback or (lambda x: None)
34
+
35
+ def debug(self, msg: str) -> None:
36
+ if msg.startswith('[download]'):
37
+ # Extract progress information
38
+ if '%' in msg:
39
+ self.progress_callback(msg)
40
+ logger.debug(msg)
41
+
42
+ def warning(self, msg: str) -> None:
43
+ logger.warning(msg)
44
+
45
+ def error(self, msg: str) -> None:
46
+ logger.error(msg)
47
 
 
 
 
48
 
49
+ class DownloadError(Exception):
50
+ """Custom exception for download errors"""
51
+ pass
52
 
 
 
53
 
54
+ def validate_url(url: str) -> bool:
55
+ """Validate if the URL is a supported video platform URL"""
56
+ video_platforms = [
57
+ r'youtube\.com',
58
+ r'youtu\.be',
59
+ r'vimeo\.com',
60
+ r'dailymotion\.com',
61
+ r'twitch\.tv',
62
+ r'soundcloud\.com',
63
+ r'instagram\.com'
64
+ ]
65
+
66
+ pattern = '|'.join([f'({platform})' for platform in video_platforms])
67
+ return bool(re.search(pattern, url, re.IGNORECASE))
68
 
69
 
70
+ def ensure_download_directory(directory: str) -> str:
71
+ """Ensure download directory exists, create if it doesn't"""
72
+ path = Path(directory)
73
+ path.mkdir(parents=True, exist_ok=True)
74
+ return str(path.absolute())
75
 
76
 
77
+ def get_download_options(
78
+ output_dir: str = "./downloads/audio",
79
+ audio_format: str = DEFAULT_AUDIO_FORMAT,
80
+ audio_quality: str = DEFAULT_AUDIO_QUALITY,
81
+ progress_callback: Optional[Callable[[str], None]] = None
82
+ ) -> Dict[str, Any]:
83
+ """
84
+ Get yt-dlp download options with specified parameters
85
+
86
+ Args:
87
+ output_dir: Directory to save downloaded files
88
+ audio_format: Audio format (mp3, m4a, wav, etc.)
89
+ audio_quality: Audio quality in kbps
90
+ progress_callback: Function to call with progress updates
91
+
92
+ Returns:
93
+ Dictionary of yt-dlp options
94
+ """
95
+ if audio_format not in SUPPORTED_FORMATS:
96
+ logger.warning(f"Unsupported format '{audio_format}', falling back to {DEFAULT_AUDIO_FORMAT}")
97
+ audio_format = DEFAULT_AUDIO_FORMAT
98
+
99
+ # Ensure download directory exists
100
+ output_dir = ensure_download_directory(output_dir)
101
+
102
  return {
103
+ "format": "bestaudio/best",
104
+ "postprocessors": [{
 
105
  "key": "FFmpegExtractAudio",
106
+ "preferredcodec": audio_format,
107
+ "preferredquality": audio_quality,
108
+ }],
109
+ "logger": DownloadLogger(progress_callback),
110
+ "outtmpl": f"{output_dir}/%(title)s.%(ext)s",
111
+ "noplaylist": True,
112
+ "quiet": False,
113
+ "no_warnings": False,
114
+ "progress_hooks": [lambda d: download_progress_hook(d, progress_callback)],
115
+ "overwrites": True,
116
+ }
117
+
118
+
119
+ def download_progress_hook(d: Dict[str, Any], callback: Optional[Callable[[str], None]] = None) -> None:
120
+ """
121
+ Hook for tracking download progress
122
+
123
+ Args:
124
+ d: Download information dictionary
125
+ callback: Function to call with progress updates
126
+ """
127
+ if callback is None:
128
+ callback = lambda x: None
129
+
130
+ if d['status'] == 'downloading':
131
+ progress = d.get('_percent_str', 'unknown progress')
132
+ speed = d.get('_speed_str', 'unknown speed')
133
+ eta = d.get('_eta_str', 'unknown ETA')
134
+ callback(f"Downloading: {progress} at {speed}, ETA: {eta}")
135
+
136
+ elif d['status'] == 'finished':
137
+ filename = os.path.basename(d['filename'])
138
+ callback(f"Download complete: {filename}")
139
+ logger.info(f"Download finished: {d['filename']}")
140
 
141
 
142
+ def estimate_file_size(info: Dict[str, Any]) -> int:
143
+ """
144
+ Better estimate file size from video info
145
+
146
+ Args:
147
+ info: Video information dictionary
148
+
149
+ Returns:
150
+ Estimated file size in bytes
151
+ """
152
+ # Try different fields that might contain size information
153
+ filesize = info.get("filesize")
154
+ if filesize is not None:
155
+ return filesize
156
+
157
+ filesize = info.get("filesize_approx")
158
+ if filesize is not None:
159
+ return filesize
160
+
161
+ # If we have duration and a bitrate, we can estimate
162
+ duration = info.get("duration")
163
+ bitrate = info.get("abr") or info.get("tbr")
164
+
165
+ if duration and bitrate:
166
+ # Estimate using bitrate (kbps) * duration (seconds) / 8 (bits to bytes) * 1024 (to KB)
167
+ return int(bitrate * duration * 128) # 128 = 1024 / 8
168
+
169
+ # Default to a reasonable upper limit if we can't determine
170
+ return MAX_FILE_SIZE
171
+
172
+
173
+ def download_video_audio(
174
+ url: str,
175
+ output_dir: str = "./downloads/audio",
176
+ audio_format: str = DEFAULT_AUDIO_FORMAT,
177
+ audio_quality: str = DEFAULT_AUDIO_QUALITY,
178
+ progress_callback: Optional[Callable[[str], None]] = None
179
+ ) -> Optional[str]:
180
+ """
181
+ Download audio from a video URL
182
+
183
+ Args:
184
+ url: URL of the video
185
+ output_dir: Directory to save downloaded files
186
+ audio_format: Audio format (mp3, m4a, wav, etc.)
187
+ audio_quality: Audio quality in kbps
188
+ progress_callback: Function to call with progress updates
189
+
190
+ Returns:
191
+ Path to the downloaded audio file or None if download failed
192
+
193
+ Raises:
194
+ DownloadError: If download fails after retries
195
+ """
196
+ if not validate_url(url):
197
+ error_msg = f"Invalid or unsupported URL: {url}"
198
+ logger.error(error_msg)
199
+ raise DownloadError(error_msg)
200
+
201
  retries = 0
202
+ while retries < MAX_RETRIES:
203
  try:
204
+ if progress_callback:
205
+ progress_callback(f"Starting download (attempt {retries + 1}/{MAX_RETRIES})...")
206
+
207
+ ydl_opts = get_download_options(output_dir, audio_format, audio_quality, progress_callback)
208
+ with yt_dlp.YoutubeDL(ydl_opts) as ydl:
209
+ logger.info(f"Downloading audio from: {url}")
210
+
211
+ # Extract info first without downloading
212
  info = ydl.extract_info(url, download=False)
213
+
214
+ # Better file size estimation
215
+ estimated_size = estimate_file_size(info)
216
+ if estimated_size > MAX_FILE_SIZE:
217
+ error_msg = f"{FILE_TOO_LARGE_MESSAGE} (Estimated: {estimated_size / 1024 / 1024:.1f}MB)"
218
+ logger.error(error_msg)
219
+ raise DownloadError(error_msg)
220
+
221
+ # Now download
222
+ ydl.download([url])
223
+
224
+ # Get the filename - needs some extra handling due to extraction
225
  filename = ydl.prepare_filename(info)
226
+ base_filename = os.path.splitext(filename)[0]
227
+ final_filename = f"{base_filename}.{audio_format}"
228
+
229
+ # Verify file exists and return path
230
+ if os.path.exists(final_filename):
231
+ return final_filename
232
+ else:
233
+ # Try to find the file with a different extension
234
+ for ext in SUPPORTED_FORMATS:
235
+ potential_file = f"{base_filename}.{ext}"
236
+ if os.path.exists(potential_file):
237
+ return potential_file
238
+
239
+ # If we get here, something went wrong
240
+ raise FileNotFoundError(f"Could not locate downloaded file for {url}")
241
+
242
+ except yt_dlp.utils.DownloadError as e:
243
+ retries += 1
244
+ error_msg = f"Download error (Attempt {retries}/{MAX_RETRIES}): {str(e)}"
245
+ logger.error(error_msg)
246
+ if progress_callback:
247
+ progress_callback(error_msg)
248
+
249
+ if "HTTP Error 429" in str(e):
250
+ # Rate limiting - wait longer
251
+ time.sleep(RETRY_DELAY * 5)
252
+ elif retries >= MAX_RETRIES:
253
+ raise DownloadError(f"Failed to download after {MAX_RETRIES} attempts: {str(e)}")
254
+ else:
255
+ time.sleep(RETRY_DELAY)
256
  except Exception as e:
257
  retries += 1
258
+ error_msg = f"Unexpected error (Attempt {retries}/{MAX_RETRIES}): {str(e)}"
259
+ logger.error(error_msg)
260
+ if progress_callback:
261
+ progress_callback(error_msg)
262
+
263
+ if retries >= MAX_RETRIES:
264
+ raise DownloadError(f"Failed to download after {MAX_RETRIES} attempts: {str(e)}")
265
+ time.sleep(RETRY_DELAY)
266
+
267
+ return None
268
 
269
 
270
+ def delete_download(path: str) -> bool:
271
+ """
272
+ Delete a downloaded file or directory
273
+
274
+ Args:
275
+ path: Path to file or directory to delete
276
+
277
+ Returns:
278
+ True if deletion was successful, False otherwise
279
+ """
280
  try:
281
+ if not path or not os.path.exists(path):
282
+ logger.warning(f"Path does not exist: {path}")
283
+ return False
284
+
285
  if os.path.isfile(path):
286
  os.remove(path)
287
+ logger.info(f"File deleted: {path}")
288
  elif os.path.isdir(path):
289
  shutil.rmtree(path)
290
+ logger.info(f"Directory deleted: {path}")
291
  else:
292
+ logger.warning(f"Path is neither a file nor a directory: {path}")
293
+ return False
294
+ return True
295
  except PermissionError:
296
+ logger.error(f"Permission denied: Unable to delete {path}")
297
  except FileNotFoundError:
298
+ logger.error(f"File or directory not found: {path}")
299
+ except Exception as e:
300
+ logger.error(f"Error deleting {path}: {str(e)}")
301
+ return False
302
+
303
+
304
+ def trim_audio_file(input_file: str, max_duration_seconds: int = 600) -> str:
305
+ """
306
+ Trim an audio file to a maximum duration to reduce file size
307
+
308
+ Args:
309
+ input_file: Path to input audio file
310
+ max_duration_seconds: Maximum duration in seconds
311
+
312
+ Returns:
313
+ Path to trimmed file
314
+ """
315
+ try:
316
+ import ffmpeg
317
+
318
+ # Create output filename
319
+ file_dir = os.path.dirname(input_file)
320
+ file_name, file_ext = os.path.splitext(os.path.basename(input_file))
321
+ output_file = os.path.join(file_dir, f"{file_name}_trimmed{file_ext}")
322
+
323
+ # Trim using ffmpeg
324
+ ffmpeg.input(input_file).output(
325
+ output_file, t=str(max_duration_seconds), acodec='copy'
326
+ ).run(quiet=True, overwrite_output=True)
327
+
328
+ logger.info(f"Trimmed {input_file} to {max_duration_seconds} seconds")
329
+ return output_file
330
+ except Exception as e:
331
+ logger.error(f"Error trimming audio: {str(e)}")
332
+ return input_file # Return original if trimming fails
333
+
334
+
335
+ def get_video_info(url: str) -> Dict[str, Any]:
336
+ """
337
+ Get information about a video without downloading
338
+
339
+ Args:
340
+ url: URL of the video
341
+
342
+ Returns:
343
+ Dictionary of video information
344
+ """
345
+ try:
346
+ with yt_dlp.YoutubeDL({"quiet": True}) as ydl:
347
+ info = ydl.extract_info(url, download=False)
348
+ return info
349
  except Exception as e:
350
+ logger.error(f"Error getting video info: {str(e)}")
351
+ raise DownloadError(f"Could not retrieve video information: {str(e)}")
352
+
353
+
354
+ # Example usage
355
+ if __name__ == "__main__":
356
+ # Example progress callback
357
+ def print_progress(msg):
358
+ print(msg)
359
+
360
+ try:
361
+ # Example: Download audio from a YouTube URL
362
+ url = "https://www.youtube.com/watch?v=dQw4w9WgXcQ"
363
+ audio_path = download_video_audio(
364
+ url,
365
+ output_dir="./downloads",
366
+ audio_format="mp3",
367
+ progress_callback=print_progress
368
+ )
369
+
370
+ print(f"Downloaded to: {audio_path}")
371
+
372
+ # Clean up
373
+ if audio_path:
374
+ delete_download(audio_path)
375
+ except DownloadError as e:
376
+ print(f"Download failed: {e}")