| import os |
| import traceback |
|
|
| import requests |
| import yt_dlp |
| from bs4 import BeautifulSoup |
| from download_video import downlaod_video_from_url |
| from pytube import YouTube |
|
|
|
|
| def download_youtube_video(url, download_path="../data/"): |
| try: |
| yt = YouTube(url) |
|
|
| |
| video_stream = ( |
| yt.streams.filter(progressive=True, file_extension="mp4") |
| .order_by("resolution") |
| .desc() |
| .first() |
| ) |
|
|
| |
| if video_stream: |
| video_stream.download(output_path=download_path) |
| print(f"Video downloaded successfully to {download_path}") |
| else: |
| print("No suitable video stream found") |
| except Exception as e: |
| print(f"Error in downloading YouTube video: {e}") |
|
|
|
|
| def download_audio(url, download_path="../data/"): |
| """ |
| Download audio from YouTube and convert to MP3 format. |
| |
| Args: |
| url: YouTube video URL |
| download_path: Path where the MP3 file will be saved |
| """ |
| ydl_opts = { |
| "outtmpl": f"{download_path}%(title)s.%(ext)s", |
| "format": "bestaudio/best", |
| "geo-bypass": True, |
| "noplaylist": True, |
| "force-ipv4": True, |
| |
| "postprocessors": [ |
| { |
| "key": "FFmpegExtractAudio", |
| "preferredcodec": "mp3", |
| "preferredquality": "192", |
| } |
| ], |
| "headers": { |
| "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36" |
| }, |
| } |
|
|
| try: |
| with yt_dlp.YoutubeDL(ydl_opts) as ydl: |
| ydl.download([url]) |
| print(f"Audio downloaded and converted to MP3 successfully at {download_path}") |
| except Exception as e: |
| print(f"An error occurred: {e}") |
|
|
|
|
| |
| def download_file(url, download_path="../data/"): |
| try: |
| response = requests.get(url, stream=True) |
| response.raise_for_status() |
| filename = os.path.join(download_path, url.split("/")[-1]) |
|
|
| with open(filename, "wb") as file: |
| for chunk in response.iter_content(chunk_size=1024): |
| if chunk: |
| file.write(chunk) |
| print(f"File downloaded successfully to {filename}") |
| except Exception as e: |
| print(f"An error occurred: {e}") |
|
|
|
|
| |
| def download_text_or_webpage(url, download_path="../data/", is_text=False): |
| try: |
| response = requests.get(url) |
| response.raise_for_status() |
|
|
| if is_text: |
| filename = os.path.join(download_path, url.split("/")[-1] + ".txt") |
| with open(filename, "w") as file: |
| file.write(response.text) |
| print(f"Text file downloaded successfully to {filename}") |
| else: |
| soup = BeautifulSoup(response.text, "html.parser") |
| filename = os.path.join(download_path, url.split("/")[-1] + ".html") |
| with open(filename, "w", encoding="utf-8") as file: |
| file.write(soup.prettify()) |
| print(f"Webpage downloaded successfully to {filename}") |
|
|
| except Exception as e: |
| print(f"An error occurred: {e}") |
|
|
|
|
| def main(): |
| |
| |
| |
| |
| |
| url_audio = "https://www.youtube.com/watch?v=8OHYynw7Yh4" |
| download_audio(url_audio) |
|
|
| |
| |
|
|
| |
| |
|
|
| |
| |
|
|
|
|
| if __name__ == "__main__": |
| main() |
|
|