DSXiangLi
a
34e7ce5
raw
history blame
No virus
2.3 kB
# -*-coding:utf-8 -*-
import pandas as pd
from ape.instance import Instance, LoadFactory, upload_file
from ape.llm import LLMGPT
from functools import partial
from itertools import chain
LLM = None
def load_task(task, file):
global instance
if task:
loader = LoadFactory[task]
else:
file = [i.name for i in file]
print(file)
loader = partial(upload_file, file=file[0])
instance = Instance.from_file(loader)
print(instance.samples[0])
return instance, f'{instance.n_sample} Data Loaded'
def sample_data(instance, n_train, n_few_shot, n_eval):
instance.sample(n_train, n_few_shot, n_eval)
train_str = instance.display(instance.train_samples)
eval_str = instance.display(instance.eval_samples)
return train_str, eval_str, instance, 'Sample Done'
def esttimate_cost(instance):
train_text = ''.join(chain(*instance.train_samples))
eval_text = ''.join(chain(*instance.eval_samples))
train_cost = LLMGPT.confirm_cost(train_text, 'train')
eval_cost = LLMGPT.confirm_cost(eval_text, 'eval')
return f'Train={train_cost} Eval={eval_cost}'
def generate(gen_prompt, instance, openai_key):
global LLM
if LLM is None:
LLM = LLMGPT(openai_key)
instructions = []
train_iter = instance.get_train_iter()
for few_shot in train_iter:
instruction = LLM.generate_instruction(gen_prompt, few_shot)
print(instruction)
instructions.append(instruction['text'])
return '\n'.join(instructions)
def single_test(test_prompt, instruction, input, openai_key):
global LLM
if LLM is None:
LLM = LLMGPT(openai_key)
output = LLM.generate_output(test_prompt, instruction, input)
return output['text']
def score_single(eval_prompt, instance, instruction, openai_key):
global LLM
if LLM is None:
LLM = LLMGPT(openai_key)
score = LLM.generate_logprobs(eval_prompt, instruction, instance.eval_samples)
return score
def evaluate(eval_prompt, all_instructions, instance, openai_key):
scores = []
for instruction in all_instructions:
score = score_single(eval_prompt, instance, instruction, openai_key)
scores.append(score)
return pd.DataFrame({'instruction': all_instructions, 'likelihood': scores})