File size: 564 Bytes
8197b11
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
from collections import Counter
import json

from tqdm import tqdm

if __name__ == "__main__":
    counter = Counter()

    with open("/media/data/EL/blink/train.alby-format.jsonl") as f_in:
        for line in tqdm(f_in):
            sample = json.loads(line)
            for ss, se, label in sample["doc_annotations"]:
                if label == "--NME--":
                    continue
                counter.update([label])

    with open("frequency_blink.txt", "w") as f_out:
        for k, v in counter.most_common():
            f_out.write(f"{k}\t{v}\n")