File size: 1,145 Bytes
c4683a3
 
 
 
 
 
3988329
 
c4683a3
3988329
 
 
 
 
 
 
 
 
 
c4683a3
3988329
c4683a3
 
 
 
 
 
3988329
c4683a3
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
import wget
import os
import multiprocessing
from functools import partial
import time

save_dir = "/workspace/seungheon/dataset"
os.makedirs(save_dir, exist_ok=True)

urls = []
db_config = {"fma": 34, "mtg_jamendo": 134, "medleydb": 100, "moisesdb": 8, "musicnet": 21}
for db_name, num_files in db_config.items():
    for i in range(num_files):
        urls.append(f"https://huggingface.co/datasets/seungheondoh/cmd-audio-dump/resolve/main/{db_name}{i}.tar.gz")
def download_and_unzip(url):
    # Download file
    filename = wget.download(url)
    # Unzip file
    with tarfile.open(filename, 'r:gz') as tar:
        tar.extractall(path=save_dir)
    
if __name__ == "__main__":    
    os.makedirs(save_dir, exist_ok=True)
    # Start timing
    start_time = time.time()
    num_processes = min(multiprocessing.cpu_count(), len(urls))
    with multiprocessing.Pool(processes=num_processes) as pool:
        pool.map(download_and_unzip, urls)
    # Calculate and display total time
    end_time = time.time()
    elapsed = end_time - start_time
    print(f"\nTotal download time: {int(elapsed // 60)} minutes and {int(elapsed % 60)} seconds")