Spaces:
Runtime error
Runtime error
File size: 2,429 Bytes
019ee78 84038cf 34e7ce5 64b18b7 15a824f 019ee78 15a824f 34e7ce5 019ee78 64b18b7 019ee78 84038cf 019ee78 15a824f 019ee78 15a824f 1bfaaaf e998926 15a824f 1bfaaaf 15a824f e907f96 c9023c8 1bfaaaf e998926 3ff2f90 c9023c8 e907f96 1bfaaaf e998926 1bfaaaf 34e7ce5 5c6545c 34e7ce5 5c6545c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 |
# -*-coding:utf-8 -*-
from glob import glob
import pandas as pd
from ape.instance import Instance, LoadFactory, upload_file, upload_json
from ape.llm import LLMGPT
from functools import partial
from itertools import chain
def load_task(task, file):
global instance
if task:
loader = LoadFactory[task]
elif 'json' in file.name:
loader = partial(upload_json, file=file.name)
else:
loader = partial(upload_file, file=file.name)
instance = Instance.from_file(loader)
print(instance.samples[0])
return instance, f'{instance.n_sample} Data Loaded'
def sample_data(instance, n_train, n_few_shot, n_eval):
instance.sample(n_train, n_few_shot, n_eval)
train_str = instance.display(instance.train_samples)
eval_str = instance.display(instance.eval_samples)
return train_str, eval_str, instance, 'Sample Done'
def esttimate_cost(instance):
train_text = ''.join(chain(*instance.train_samples))
eval_text = ''.join(chain(*instance.eval_samples))
train_cost = LLMGPT.confirm_cost(train_text, 'train')
eval_cost = LLMGPT.confirm_cost(eval_text, 'eval')
return f'Train={train_cost} Eval={eval_cost}'
def generate(gen_prompt, instance, openai_key):
LLM = LLMGPT(openai_key)
instructions = []
train_iter = instance.get_train_iter()
for few_shot in train_iter:
instruction = LLM.generate_instruction(gen_prompt, few_shot)
print(instruction)
instructions.append(instruction['text'])
return '\n'.join(instructions)
def single_test(test_prompt, instruction, input, openai_key):
LLM = LLMGPT(openai_key)
output = []
for i in input.split('\n'):
output.append(LLM.generate_output(test_prompt, instruction, i)['text'])
return '\n'.join(output)
def score_single(eval_prompt, instance, instruction, openai_key):
LLM = LLMGPT(openai_key)
score = LLM.generate_logprobs(eval_prompt, instruction, instance.eval_samples)
return score
def evaluate(eval_prompt, all_instructions, instance, openai_key):
all_instructions = all_instructions.split('\n')
scores = []
for instruction in all_instructions:
score = score_single(eval_prompt, instance, instruction, openai_key)
scores.append(score)
df = pd.DataFrame({'instruction': all_instructions, 'likelihood': scores})
df.sort_values(by='likelihood', ascending=False, inplace=True)
return df
|