Spaces:
Runtime error
Runtime error
import os | |
def vocab_process(data_dir): | |
slot_label_vocab = 'slot_label.txt' | |
intent_label_vocab = 'intent_label.txt' | |
train_dir = os.path.join(data_dir, 'train') | |
# intent | |
with open(os.path.join(train_dir, 'label'), 'r', encoding='utf-8') as f_r, open(os.path.join(data_dir, intent_label_vocab), 'w', | |
encoding='utf-8') as f_w: | |
intent_vocab = set() | |
for line in f_r: | |
line = line.strip() | |
intent_vocab.add(line) | |
additional_tokens = ["UNK"] | |
for token in additional_tokens: | |
f_w.write(token + '\n') | |
intent_vocab = sorted(list(intent_vocab)) | |
for intent in intent_vocab: | |
f_w.write(intent + '\n') | |
# slot | |
with open(os.path.join(train_dir, 'seq.out'), 'r', encoding='utf-8') as f_r, open(os.path.join(data_dir, slot_label_vocab), 'w', | |
encoding='utf-8') as f_w: | |
slot_vocab = set() | |
for line in f_r: | |
line = line.strip() | |
slots = line.split() | |
for slot in slots: | |
slot_vocab.add(slot) | |
slot_vocab = sorted(list(slot_vocab), key=lambda x: (x[2:], x[:2])) | |
# Write additional tokens | |
additional_tokens = ["PAD", "UNK"] | |
for token in additional_tokens: | |
f_w.write(token + '\n') | |
for slot in slot_vocab: | |
f_w.write(slot + '\n') | |
if __name__ == "__main__": | |
vocab_process('atis') | |
vocab_process('snips') | |