File size: 880 Bytes
0e87828 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 |
#!/usr/bin/env python3
import os
import json
folder_path = "./vocabs"
all_dict = {}
def parse_file(filename):
dictionary = {
"</s>": 2,
"<pad>": 0,
"<s>": 1,
"<unk>": 3,
}
value = 4
with open(filename, 'r') as file:
for line in file:
line = line.strip().split()
if line:
key = line[0]
dictionary[key] = value
value += 1
return dictionary
for filename in os.listdir(folder_path):
filepath = os.path.join(folder_path, filename)
lang = filename.split(".")[0]
if os.path.isfile(filepath):
all_dict[lang] = parse_file(filepath)
output_path = "vocab_1.json" # Replace "output.json" with the desired output file path
with open(output_path, 'w') as output_file:
json.dump(all_dict, output_file, indent=4, sort_keys=True)
|