Spaces:
Build error
Build error
File size: 1,101 Bytes
b11ac48 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 |
import os
import glob
import tarfile
def split_lome_files(lome_folder, output_folder):
for file in glob.glob(f"{lome_folder}/**/*.comm.*"):
doc_id = os.path.basename(file).split(".")[0].split("_")[1]
doc_key = doc_id[:2]
print(file, "->", doc_key)
with tarfile.open(f"{output_folder}/block_{doc_key}.tar", "a") as tar_f:
tar_f.add(file)
if __name__ == "__main__":
#split_lome_files("output/migration/lome/multilabel/lome_0shot/pavia/", "output/migration/lome/lome_0shot/multilabel_pavia_blocks")
# split_lome_files("output/femicides/lome/lome_0shot/multilabel/rai/", "output/femicides/lome/lome_0shot/multilabel_rai_blocks")
split_lome_files("output/femicides/lome/lome_0shot/multilabel/rai_ALL/", "output/femicides/lome/lome_0shot/multilabel_rai_ALL_blocks")
# split_lome_files("output/femicides/lome/lome_0shot/multilabel/olv/", "output/femicides/lome/lome_0shot/multilabel_olv_blocks")
# split_lome_files("output/crashes/lome/lome_0shot/multilabel/thecrashes/", "output/crashes/lome/lome_0shot/multilabel_thecrashes_blocks") |