xl2533's picture
add json file loader
64b18b7
# -*-coding:utf-8 -*-
from glob import glob
import pandas as pd
from ape.instance import Instance, LoadFactory, upload_file, upload_json
from ape.llm import LLMGPT
from functools import partial
from itertools import chain
def load_task(task, file):
global instance
if task:
loader = LoadFactory[task]
elif 'json' in file.name:
loader = partial(upload_json, file=file.name)
else:
loader = partial(upload_file, file=file.name)
instance = Instance.from_file(loader)
print(instance.samples[0])
return instance, f'{instance.n_sample} Data Loaded'
def sample_data(instance, n_train, n_few_shot, n_eval):
instance.sample(n_train, n_few_shot, n_eval)
train_str = instance.display(instance.train_samples)
eval_str = instance.display(instance.eval_samples)
return train_str, eval_str, instance, 'Sample Done'
def esttimate_cost(instance):
train_text = ''.join(chain(*instance.train_samples))
eval_text = ''.join(chain(*instance.eval_samples))
train_cost = LLMGPT.confirm_cost(train_text, 'train')
eval_cost = LLMGPT.confirm_cost(eval_text, 'eval')
return f'Train={train_cost} Eval={eval_cost}'
def generate(gen_prompt, instance, openai_key):
LLM = LLMGPT(openai_key)
instructions = []
train_iter = instance.get_train_iter()
for few_shot in train_iter:
instruction = LLM.generate_instruction(gen_prompt, few_shot)
print(instruction)
instructions.append(instruction['text'])
return '\n'.join(instructions)
def single_test(test_prompt, instruction, input, openai_key):
LLM = LLMGPT(openai_key)
output = []
for i in input.split('\n'):
output.append(LLM.generate_output(test_prompt, instruction, i)['text'])
return '\n'.join(output)
def score_single(eval_prompt, instance, instruction, openai_key):
LLM = LLMGPT(openai_key)
score = LLM.generate_logprobs(eval_prompt, instruction, instance.eval_samples)
return score
def evaluate(eval_prompt, all_instructions, instance, openai_key):
all_instructions = all_instructions.split('\n')
scores = []
for instruction in all_instructions:
score = score_single(eval_prompt, instance, instruction, openai_key)
scores.append(score)
df = pd.DataFrame({'instruction': all_instructions, 'likelihood': scores})
df.sort_values(by='likelihood', ascending=False, inplace=True)
return df