import os from src.config.configs import ProjectPaths import json import pytube from tqdm import tqdm from pytube.exceptions import AgeRestrictedError def read_youtube_data(): input_data = ProjectPaths.DATA_DIR.joinpath("json", "youtube_data.json") with open(input_data, "r") as rd: return json.load(rd) def download_mp3(link, download_path, track_full_name): data_dir = ProjectPaths.DATA_DIR.joinpath("audio") try: mp3 = pytube.YouTube(link, use_oauth=True, allow_oauth_cache=True).streams.filter(only_audio=True).first() mp3.download(data_dir) new_file = track_full_name + '.wav' os.rename(download_path.joinpath(mp3.default_filename), data_dir.joinpath(new_file)) return data_dir.joinpath(new_file) except AgeRestrictedError: return "" def start_download_process(): input_data = read_youtube_data() reformatted_input_data = [] done_pieces = os.listdir(ProjectPaths.DATA_DIR.joinpath("audio")) for i in tqdm(input_data): link = i["link"] full_name = f'{i["artist_name"]} - {i["track_name"]}'.replace("/", "_") if full_name + ".wav" in done_pieces: continue path = download_mp3(link, ProjectPaths.DATA_DIR.joinpath("audio") ,full_name) new_data = i.copy() new_data.update({"file_path": str(path)}) reformatted_input_data.append(new_data) with open(ProjectPaths.DATA_DIR.joinpath("json", "final_track_data.json"), "w") as wr: json.dump(reformatted_input_data, wr, indent=4)