Spaces:
Runtime error
Runtime error
import torch | |
class IntCharTokenizer: | |
def __init__(self, text): | |
self.chars, self.vocab_size = self._get_uniq_chars(text) | |
self.int_to_char = {i: c for i, c in enumerate(self.chars)} | |
self.char_to_int = {c: i for i, c in enumerate(self.chars)} | |
def _get_uniq_chars(self, text): | |
chars = sorted(list(set(text))) | |
return chars, len(chars) | |
def encode(self, text): | |
#enc = lambda s: [self.char_to_int[c] for c in s] | |
return [self.char_to_int[c] for c in text] | |
def decode(self, tokens): | |
#dec = lambda s: ''.join(self.int_to_char[i] for i in s) | |
return ''.join(self.int_to_char[i] for i in tokens) | |