File size: 3,695 Bytes
019ee78
 
 
 
 
 
 
 
 
 
 
 
 
15a824f
 
 
 
019ee78
 
 
15a824f
 
 
 
019ee78
15a824f
 
 
019ee78
 
 
 
 
 
 
e907f96
019ee78
15a824f
 
 
 
019ee78
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15a824f
019ee78
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15a824f
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
# -*-coding:utf-8 -*-
import json
import random
import pandas as pd


class Instance(object):
    """
    By Default use few-shot for generation and evaluation
    """

    def __init__(self, loader=None):
        self.samples = loader()
        self.n_few_shot = 0
        self.n_train = 0
        self.n_eval = 0
        self.train_iter = None
        self.train_samples = []
        self.eval_samples = []

    @property
    def n_sample(self):
        return len(self.samples)

    def sample(self, n_train, n_few_shot, n_eval):
        self.n_train = n_train
        self.n_few_shot = n_few_shot
        self.n_eval = n_eval
        n_train = n_train * n_few_shot
        if n_train + n_eval > len(self.samples):
            raise ValueError(f'Train + Eval > total samples {len(self.samples)}, decrease them')

        index = random.sample(list(range(len(self.samples))), n_train + n_eval)
        train_index, eval_index = index[:n_train], index[n_train:]
        self.train_samples = [self.samples[i] for i in train_index]
        self.eval_samples = [self.samples[i] for i in eval_index]

    def get_train_iter(self):
        for i in range(self.n_train):
            yield self.train_samples[(i*self.n_few_shot) :(i+1)* self.n_few_shot]

    @staticmethod
    def display(samples):
        s = ""
        for i in samples:
            s += f'{i[0]} >> {i[1]}\n'
        return s

    @classmethod
    def from_file(cls, loader):
        return cls(loader)

    @classmethod
    def from_list(cls, tuple_list):
        # 直接输入Input,Ouput List 构建Instance
        def func():
            return tuple_list

        return cls(func)


def load_event_extraction(file='./ape/data/event_ie_train.json'):
    data = []
    with open(file, 'rb') as f:
        for i in f.readlines():
            data.append(json.loads(i))
    return data


def load_paraphase(file='./ape/data/paraphase_train.csv'):
    df = pd.read_csv(file, encoding='GBK')
    tuple_list = []
    for i in df.iterrows():
        tuple_list.append((i[1][0], i[1][1]))
    return tuple_list


def load_intent(file='./ape/data/intent_train.csv'):
    df = pd.read_csv(file, encoding='UTF8', sep='\t')
    tuple_list = []
    for i in df.iterrows():
        tuple_list.append((i[1][0], i[1][1]))
    return tuple_list


def upload_file(file):
    tuple_list = []
    with open(file, 'r') as f:
        for i in f.readlines():
            input, output = i.split(' ')
            tuple_list.append((input, output))
    return tuple_list


LoadFactory = {
    'paraphase': load_paraphase,
    'event_extract': load_event_extraction,
    'search_intent': load_intent
}

if __name__ == '__main__':
    n_train = 2
    few_shot = 3
    n_eval = 2
    instance1 = Instance.from_file(load_paraphase)
    instance1.sample(n_train, few_shot, n_eval)
    print(instance1.display(instance1.train_samples))
    instance2 = Instance.from_list([('sane', 'insane'), ('direct', 'indirect'), ('informally', 'formally'),
                                    ('unpopular', 'popular'), ('subtractive', 'additive'),
                                    ('nonresidential', 'residential'), ('inexact', 'exact'),
                                    ('uptown', 'downtown'), ('incomparable', 'comparable'),
                                    ('powerful', 'powerless'), ('gaseous', 'solid'),
                                    ('evenly', 'unevenly'), ('formality', 'informality'),
                                    ('deliberately', 'accidentally'), ('off', 'on')])
    instance2.sample(n_train, few_shot, n_eval)
    print(instance2.display(instance2.train_samples))
    train_iter = instance2.get_train_iter()
    print(next(train_iter))