Spaces:
Running
Running
File size: 2,027 Bytes
09bae93 689c5d0 09bae93 6eca213 09bae93 689c5d0 09bae93 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 |
# -*- coding: utf-8 -*-
# file: compress_datasets.py
# time: 19:13 2023/2/5
# author: yangheng <hy345@exeter.ac.uk>
# github: https://github.com/yangheng95
# huggingface: https://huggingface.co/yangheng
# google scholar: https://scholar.google.com/citations?user=NPq5a_0AAAAJ&hl=en
# Copyright (C) 2021. All Rights Reserved.
# -*- coding: utf-8 -*-
# file: zip_datasets.py
# time: 05/11/2022 17:10
# author: yangheng <hy345@exeter.ac.uk>
# github: https://github.com/yangheng95
# GScholar: https://scholar.google.com/citations?user=NPq5a_0AAAAJ&hl=en
# ResearchGate: https://www.researchgate.net/profile/Heng-Yang-17/research
# Copyright (C) 2022. All Rights Reserved.
import os
import shutil
import zipfile
from pathlib import Path
import findfile
from pyabsa.utils.pyabsa_utils import fprint
def cascade_zip_datasets():
# iterate zip all datasets in the folder
datasets = findfile.find_dirs("integrated_datasets", "datasets", recursive=1)
for dataset in datasets:
if dataset in [
"integrated_datasets",
"integrated_datasets.zip",
]:
continue
task_name = Path(dataset).name
for d in findfile.find_dirs(dataset, ""):
fprint(f"compressing dataset: {d}")
dataset_name = Path(d).name
zip_file = zipfile.ZipFile(
f"integrated_datasets/{task_name}.{dataset_name}.zip".lower(),
"w",
zipfile.ZIP_DEFLATED,
)
for root, dirs, files in os.walk(d):
for file in files:
zip_file.write(os.path.join(root, file).lower())
zip_file.close()
if __name__ == "__main__":
# if os.path.exists('integrated_datasets'):
# try:
# shutil.rmtree('integrated_datasets')
# except:
# os.system('rm -rf integrated_datasets')
#
# from pyabsa import download_all_available_datasets
#
# download_all_available_datasets()
cascade_zip_datasets()
|