Spaces:
Sleeping
Sleeping
token_to_id = { | |
str(i): i for i in range(10) | |
} | |
token_to_id["<start>"] = 10 | |
token_to_id["<finish>"] = 11 | |
id_to_token = {v: k for k, v in token_to_id.items()} | |
START = token_to_id["<start>"] | |
FINISH = token_to_id["<finish>"] | |
# i don't need padding or a pad token because the input is a fixed length sequence of 5 | |
def encode(label_list): | |
return [token_to_id[str(d)] for d in label_list] | |
def decode(token_ids): | |
return [id_to_token[t] for t in token_ids] | |
def prepare_decoder_labels(labels): | |
""" | |
Prepare decoder input and target sequences for training. | |
Input labels: [7, 7, 6, 9] | |
Output: | |
decoder_input = [<start>, 7, 7, 6, 9] | |
decoder_target = [7, 7, 6, 9, <finish>] | |
""" | |
token_ids = encode(labels) | |
decoder_input = [START] + token_ids | |
decoder_target = token_ids + [FINISH] | |
return decoder_input, decoder_target | |