File size: 916 Bytes
12f2e48
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
from huggingface_hub import snapshot_download
import argparse
import os
import glob
import tqdm


def main(output_dir):
    os.makedirs(output_dir, exist_ok=True)
    dl_path = snapshot_download(repo_id="biglab/webui-all", repo_type="dataset")

    combined_zip_path = os.path.join(output_dir, "webui-merged.zip")
    if not os.path.exists(combined_zip_path):
        part_paths = sorted(glob.glob(os.path.join(dl_path, "*.zip.*")))
        print("Merging...", len(part_paths), "parts")
        with open(combined_zip_path, "wb") as merged_fp:
            for fn in tqdm.tqdm(part_paths):
                with open(fn, "rb") as part_fp:
                    merged_fp.write(part_fp.read())
    print(combined_zip_path)


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument("--output_dir", type=str, default="/data/webui")
    args = parser.parse_args()
    main(args.output_dir)