File size: 2,429 Bytes
019ee78
84038cf
34e7ce5
64b18b7
15a824f
019ee78
15a824f
34e7ce5
019ee78
 
 
 
 
64b18b7
 
019ee78
84038cf
019ee78
 
15a824f
019ee78
 
 
 
 
 
15a824f
 
 
 
 
 
 
 
 
 
 
1bfaaaf
e998926
15a824f
 
 
 
1bfaaaf
15a824f
e907f96
c9023c8
 
 
1bfaaaf
e998926
3ff2f90
 
 
 
c9023c8
e907f96
1bfaaaf
e998926
1bfaaaf
 
34e7ce5
 
 
 
5c6545c
34e7ce5
 
 
 
5c6545c
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
# -*-coding:utf-8 -*-
from glob import glob
import pandas as pd
from ape.instance import Instance, LoadFactory, upload_file, upload_json
from ape.llm import LLMGPT
from functools import partial
from itertools import chain


def load_task(task, file):
    global instance
    if task:
        loader = LoadFactory[task]
    elif 'json' in file.name:
        loader = partial(upload_json, file=file.name)
    else:
        loader = partial(upload_file, file=file.name)
    instance = Instance.from_file(loader)
    print(instance.samples[0])
    return instance, f'{instance.n_sample} Data Loaded'


def sample_data(instance, n_train, n_few_shot, n_eval):
    instance.sample(n_train, n_few_shot, n_eval)
    train_str = instance.display(instance.train_samples)
    eval_str = instance.display(instance.eval_samples)
    return train_str, eval_str, instance, 'Sample Done'


def esttimate_cost(instance):
    train_text = ''.join(chain(*instance.train_samples))
    eval_text = ''.join(chain(*instance.eval_samples))
    train_cost = LLMGPT.confirm_cost(train_text, 'train')
    eval_cost = LLMGPT.confirm_cost(eval_text, 'eval')
    return f'Train={train_cost} Eval={eval_cost}'


def generate(gen_prompt, instance, openai_key):
    LLM = LLMGPT(openai_key)

    instructions = []
    train_iter = instance.get_train_iter()
    for few_shot in train_iter:
        instruction = LLM.generate_instruction(gen_prompt, few_shot)
        print(instruction)
        instructions.append(instruction['text'])
    return '\n'.join(instructions)


def single_test(test_prompt, instruction, input, openai_key):
    LLM = LLMGPT(openai_key)
    output = []
    for i in input.split('\n'):
        output.append(LLM.generate_output(test_prompt, instruction, i)['text'])
    return '\n'.join(output)


def score_single(eval_prompt, instance, instruction, openai_key):
    LLM = LLMGPT(openai_key)

    score = LLM.generate_logprobs(eval_prompt, instruction, instance.eval_samples)
    return score


def evaluate(eval_prompt, all_instructions, instance, openai_key):
    all_instructions = all_instructions.split('\n')
    scores = []
    for instruction in all_instructions:
        score = score_single(eval_prompt, instance, instruction, openai_key)
        scores.append(score)
    df = pd.DataFrame({'instruction': all_instructions, 'likelihood': scores})
    df.sort_values(by='likelihood', ascending=False, inplace=True)

    return df