Spaces:
Build error
Build error
import os | |
import glob | |
import tarfile | |
def split_lome_files(lome_folder, output_folder): | |
for file in glob.glob(f"{lome_folder}/**/*.comm.*"): | |
doc_id = os.path.basename(file).split(".")[0].split("_")[1] | |
doc_key = doc_id[:2] | |
print(file, "->", doc_key) | |
with tarfile.open(f"{output_folder}/block_{doc_key}.tar", "a") as tar_f: | |
tar_f.add(file) | |
if __name__ == "__main__": | |
#split_lome_files("output/migration/lome/multilabel/lome_0shot/pavia/", "output/migration/lome/lome_0shot/multilabel_pavia_blocks") | |
# split_lome_files("output/femicides/lome/lome_0shot/multilabel/rai/", "output/femicides/lome/lome_0shot/multilabel_rai_blocks") | |
split_lome_files("output/femicides/lome/lome_0shot/multilabel/rai_ALL/", "output/femicides/lome/lome_0shot/multilabel_rai_ALL_blocks") | |
# split_lome_files("output/femicides/lome/lome_0shot/multilabel/olv/", "output/femicides/lome/lome_0shot/multilabel_olv_blocks") | |
# split_lome_files("output/crashes/lome/lome_0shot/multilabel/thecrashes/", "output/crashes/lome/lome_0shot/multilabel_thecrashes_blocks") |