LCM / utils /io_ /writer.py
shivrajanand's picture
Upload folder using huggingface_hub
e8f4897 verified
class Writer(object):
def __init__(self, alphabets):
self.__source_file = None
self.alphabets = alphabets
def start(self, file_path):
self.__source_file = open(file_path, 'w')
def close(self):
self.__source_file.close()
def write(self, word, pos, ner, head, arc, lengths, auto_label=None, symbolic_root=False, symbolic_end=False):
batch_size, _ = word.shape
start = 1 if symbolic_root else 0
end = 1 if symbolic_end else 0
for i in range(batch_size):
for j in range(start, lengths[i] - end):
w = self.alphabets['word_alphabet'].get_instance(word[i, j])
p = self.alphabets['pos_alphabet'].get_instance(pos[i, j])
n = self.alphabets['ner_alphabet'].get_instance(ner[i, j])
t = self.alphabets['arc_alphabet'].get_instance(arc[i, j])
h = head[i, j]
if auto_label is not None:
m = self.alphabets['auto_label_alphabet'].get_instance(auto_label[i, j])
self.__source_file.write('%d\t%s\t%s\t%s\t%d\t%s\t%s\n' % (j, w, p, n, h, t, m))
else:
self.__source_file.write('%d\t%s\t%s\t%s\t%d\t%s\n' % (j, w, p, n, h, t))
self.__source_file.write('\n')
class Index2Instance(object):
def __init__(self, alphabet):
self.__alphabet = alphabet
def index2instance(self, indices, lengths, symbolic_root=False, symbolic_end=False):
batch_size, _ = indices.shape
start = 1 if symbolic_root else 0
end = 1 if symbolic_end else 0
instnaces = []
for i in range(batch_size):
tmp_instances = []
for j in range(start, lengths[i] - end):
instamce = self.__alphabet.get_instance(indices[i, j])
tmp_instances.append(instamce)
instnaces.append(tmp_instances)
return instnaces