File size: 1,955 Bytes
e8f4897
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46

class Writer(object):
    def __init__(self, alphabets):
        self.__source_file = None
        self.alphabets = alphabets

    def start(self, file_path):
        self.__source_file = open(file_path, 'w')

    def close(self):
        self.__source_file.close()

    def write(self, word, pos, ner, head, arc, lengths, auto_label=None, symbolic_root=False, symbolic_end=False):
        batch_size, _ = word.shape
        start = 1 if symbolic_root else 0
        end = 1 if symbolic_end else 0
        for i in range(batch_size):
            for j in range(start, lengths[i] - end):
                w = self.alphabets['word_alphabet'].get_instance(word[i, j])
                p = self.alphabets['pos_alphabet'].get_instance(pos[i, j])
                n = self.alphabets['ner_alphabet'].get_instance(ner[i, j])
                t = self.alphabets['arc_alphabet'].get_instance(arc[i, j])
                h = head[i, j]
                if auto_label is not None:
                    m = self.alphabets['auto_label_alphabet'].get_instance(auto_label[i, j])
                    self.__source_file.write('%d\t%s\t%s\t%s\t%d\t%s\t%s\n' % (j, w, p, n, h, t, m))
                else:
                    self.__source_file.write('%d\t%s\t%s\t%s\t%d\t%s\n' % (j, w, p, n, h, t))
            self.__source_file.write('\n')

class Index2Instance(object):
    def __init__(self, alphabet):
        self.__alphabet = alphabet

    def index2instance(self, indices, lengths, symbolic_root=False, symbolic_end=False):
        batch_size, _ = indices.shape
        start = 1 if symbolic_root else 0
        end = 1 if symbolic_end else 0
        instnaces = []
        for i in range(batch_size):
            tmp_instances = []
            for j in range(start, lengths[i] - end):
                instamce = self.__alphabet.get_instance(indices[i, j])
                tmp_instances.append(instamce)
            instnaces.append(tmp_instances)
        return instnaces