# -*-coding:utf-8 -*- from glob import glob import pandas as pd from ape.instance import Instance, LoadFactory, upload_file, upload_json from ape.llm import LLMGPT from functools import partial from itertools import chain def load_task(task, file): global instance if task: loader = LoadFactory[task] elif 'json' in file.name: loader = partial(upload_json, file=file.name) else: loader = partial(upload_file, file=file.name) instance = Instance.from_file(loader) print(instance.samples[0]) return instance, f'{instance.n_sample} Data Loaded' def sample_data(instance, n_train, n_few_shot, n_eval): instance.sample(n_train, n_few_shot, n_eval) train_str = instance.display(instance.train_samples) eval_str = instance.display(instance.eval_samples) return train_str, eval_str, instance, 'Sample Done' def esttimate_cost(instance): train_text = ''.join(chain(*instance.train_samples)) eval_text = ''.join(chain(*instance.eval_samples)) train_cost = LLMGPT.confirm_cost(train_text, 'train') eval_cost = LLMGPT.confirm_cost(eval_text, 'eval') return f'Train={train_cost} Eval={eval_cost}' def generate(gen_prompt, instance, openai_key): LLM = LLMGPT(openai_key) instructions = [] train_iter = instance.get_train_iter() for few_shot in train_iter: instruction = LLM.generate_instruction(gen_prompt, few_shot) print(instruction) instructions.append(instruction['text']) return '\n'.join(instructions) def single_test(test_prompt, instruction, input, openai_key): LLM = LLMGPT(openai_key) output = [] for i in input.split('\n'): output.append(LLM.generate_output(test_prompt, instruction, i)['text']) return '\n'.join(output) def score_single(eval_prompt, instance, instruction, openai_key): LLM = LLMGPT(openai_key) score = LLM.generate_logprobs(eval_prompt, instruction, instance.eval_samples) return score def evaluate(eval_prompt, all_instructions, instance, openai_key): all_instructions = all_instructions.split('\n') scores = [] for instruction in all_instructions: score = score_single(eval_prompt, instance, instruction, openai_key) scores.append(score) df = pd.DataFrame({'instruction': all_instructions, 'likelihood': scores}) df.sort_values(by='likelihood', ascending=False, inplace=True) return df