File size: 2,299 Bytes
019ee78
 
34e7ce5
15a824f
 
019ee78
15a824f
34e7ce5
15a824f
019ee78
 
 
 
 
 
 
15a824f
019ee78
15a824f
019ee78
 
15a824f
019ee78
 
 
 
 
 
15a824f
 
 
 
 
 
 
 
 
 
 
1bfaaaf
15a824f
 
e907f96
15a824f
 
 
 
1bfaaaf
15a824f
e907f96
c9023c8
 
 
1bfaaaf
c9023c8
 
e907f96
c9023c8
1bfaaaf
e907f96
c9023c8
e907f96
1bfaaaf
 
 
 
 
 
34e7ce5
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
# -*-coding:utf-8 -*-

import pandas as pd
from ape.instance import Instance, LoadFactory, upload_file
from ape.llm import LLMGPT
from functools import partial
from itertools import chain

LLM = None


def load_task(task, file):
    global instance
    if task:
        loader = LoadFactory[task]
    else:
        file = [i.name for i in file]
        print(file)
        loader = partial(upload_file, file=file[0])
    instance = Instance.from_file(loader)
    print(instance.samples[0])
    return instance, f'{instance.n_sample} Data Loaded'


def sample_data(instance, n_train, n_few_shot, n_eval):
    instance.sample(n_train, n_few_shot, n_eval)
    train_str = instance.display(instance.train_samples)
    eval_str = instance.display(instance.eval_samples)
    return train_str, eval_str, instance, 'Sample Done'


def esttimate_cost(instance):
    train_text = ''.join(chain(*instance.train_samples))
    eval_text = ''.join(chain(*instance.eval_samples))
    train_cost = LLMGPT.confirm_cost(train_text, 'train')
    eval_cost = LLMGPT.confirm_cost(eval_text, 'eval')
    return f'Train={train_cost} Eval={eval_cost}'


def generate(gen_prompt, instance, openai_key):
    global LLM
    if LLM is None:
        LLM = LLMGPT(openai_key)

    instructions = []
    train_iter = instance.get_train_iter()
    for few_shot in train_iter:
        instruction = LLM.generate_instruction(gen_prompt, few_shot)
        print(instruction)
        instructions.append(instruction['text'])
    return '\n'.join(instructions)


def single_test(test_prompt, instruction, input, openai_key):
    global LLM
    if LLM is None:
        LLM = LLMGPT(openai_key)

    output = LLM.generate_output(test_prompt, instruction, input)
    return output['text']


def score_single(eval_prompt, instance, instruction, openai_key):
    global LLM
    if LLM is None:
        LLM = LLMGPT(openai_key)

    score = LLM.generate_logprobs(eval_prompt, instruction, instance.eval_samples)
    return score


def evaluate(eval_prompt, all_instructions, instance, openai_key):
    scores = []
    for instruction in all_instructions:
        score = score_single(eval_prompt, instance, instruction, openai_key)
        scores.append(score)
    return pd.DataFrame({'instruction': all_instructions, 'likelihood': scores})