from collections import Counter import json from tqdm import tqdm if __name__ == "__main__": counter = Counter() with open("/media/data/EL/blink/train.alby-format.jsonl") as f_in: for line in tqdm(f_in): sample = json.loads(line) for ss, se, label in sample["doc_annotations"]: if label == "--NME--": continue counter.update([label]) with open("frequency_blink.txt", "w") as f_out: for k, v in counter.most_common(): f_out.write(f"{k}\t{v}\n")