# -*-coding:utf-8 -*- import os import gradio as gr from ape.instance import LoadFactory from ape.prompt import MyTemplate from ape.ape import * from self.generate import init_instance, generate_instruction from self.prompt import self_prompt with gr.Blocks(title="Automatic Prompt Engineer", theme=gr.themes.Glass()) as demo: gr.Markdown("# Automatic Prompt Engineer") with gr.Row().style(equal_height=True): with gr.Column(scale=2): gr.Markdown("## 第一步:输入参数") with gr.Row(): openai_key = gr.Textbox(type='password', label='输入 API key') with gr.Row(): n_train = gr.Slider(label="训练样本数", minimum=1, maximum=20, step=1, value=5) n_few_shot = gr.Slider(label="每组几个样例", minimum=1, maximum=20, step=1, value=5) with gr.Row(): n_eval = gr.Slider(label="评估样本数", minimum=1, maximum=30, step=5, value=20) with gr.Column(scale=3): gr.Markdown("## 第二步:加载数据(选任务或上传数据)") with gr.Tab("选择数据"): with gr.Row().style(equal_height=True): file = gr.File(label='上传txt文件,input[空格]output[换行]') with gr.Row().style(equal_height=True): task = gr.Dropdown(label="Chosse Existing Task", choices=list(LoadFactory.keys()), value=None) with gr.Row().style(equal_height=True): instance = gr.State() load_button = gr.Button("Load Task") load_flag = gr.Textbox() sample_button = gr.Button('sample Data') sample_flag = gr.Textbox() with gr.Tab("展示数据"): with gr.Row(): train_str = gr.Textbox(max_lines=100, lines=10, label="Data for prompt generation") eval_str = gr.Textbox(max_lines=100, lines=10, label="Data for scoring") with gr.Row().style(equal_height=True): with gr.Column(scale=2): gr.Markdown("## 第三步: Run APE(可替换默认指令)") gen_prompt = gr.Textbox(max_lines=100, lines=3, interative=True, placeholder=MyTemplate['gen_user_prompt'], value='', label="Prompt for generation") eval_prompt = gr.Textbox(max_lines=100, lines=3, interative=True, placeholder=MyTemplate['eval_prompt'], value='', label="Prompt for Evaluation") test_prompt = gr.Textbox(max_lines=100, lines=3, interative=True, placeholder=MyTemplate['test_prompt'], value='', label="Prompt for Single Test") with gr.Row().style(equal_height=True): cost = gr.Textbox(lines=1, value="", label="Estimated Cost ($)") cost_button = gr.Button("Estimate Cost") with gr.Row().style(equal_height=True): gen_button = gr.Button("Generate") eval_button = gr.Button("Eval") with gr.Column(scale=3): gr.Markdown("## 第四步:APE 结果") with gr.Tab("生成指令"): all_prompt = gr.Textbox(label='Generated Prompt') # Display all generated prompt with log probs output_df = gr.DataFrame(type='pandas', headers=['Prompt', 'Likelihood'], wrap=True, interactive=False) with gr.Tab("指令单测"): # Test the output of LLM using prompt with gr.Row().style(equal_height=True): with gr.Column(scale=1): test_instruction = gr.Textbox(lines=4, value="", label="Prompt to test") test_input = gr.Textbox(lines=4, value="", label="Inputs used to test prompt[多个输入以换行分割]") test_button = gr.Button("Test") with gr.Column(scale=1): test_output = gr.Textbox(lines=9, value="", label="Model Output") with gr.Tab("指令评估"): # By Default use the Evaluation Set in APE with gr.Row().style(equal_height=True): with gr.Column(scale=1): score_instruction = gr.Textbox(lines=3, value="", label="Prompt to Evaluate") score_button = gr.Button("Evaluate") with gr.Column(scale=1): test_score = gr.Textbox(lines=1, value="", label="Log(p)", disabled=True) gr.Markdown('\n\n') gr.Markdown('--------') gr.Markdown('\n\n') gr.Markdown("# SELF INSTRUCT") gr.Markdown('## 第一步:确认参数并上传种子指令') with gr.Row().style(equal_height=True): with gr.Column(): n_human = gr.Slider(label="人工指令数", minimum=1, maximum=5, step=1, value=2) n_machine = gr.Slider(label="机器指令数", minimum=1, maximum=5, step=1, value=1) n_instruct = gr.Slider(label="生成指令数", minimum=1, maximum=100, step=1, value=4, help="生成指令数>人工+机器") self_prompt_input = gr.Textbox(max_lines=100, lines=20, interative=True, placeholder=self_prompt, value='', label="Prompt for self-instruct") with gr.Column(): openai_key2 = gr.Textbox(type='password', label='输入 API key') seed_file = gr.File(label='上传json文件, 格式参考./self/data/seed_task.json') self_submit = gr.Button('上传') self_instance = gr.State() gr.Markdown('\n\n') gr.Markdown('## 第二步:采样并生成新指令,每点一次会重采样并生成,生成结果会累计') with gr.Row().style(equal_height=True): with gr.Column(scale=1): fewshot = gr.Textbox(label='采样few-shot') with gr.Column(scale=1): gen_data = gr.JSON(label='新生成指令样本') with gr.Row().style(equal_height=True): with gr.Column(scale=7): generate_instruct_button = gr.Button("指令生成") with gr.Column(scale=1): counter = gr.Textbox() """ APE Callback """ # 1. 选择已有任务/上传文件,实例化Instance load_button.click(load_task, [task, file], [instance, load_flag]) # 2. 按 Configuration Sample数据 得到训练样本和验证集, 并在前端展示。支持重采样 sample_button.click(sample_data, [instance, n_train, n_few_shot, n_eval], [train_str, eval_str, instance, sample_flag]) # 3. Estimate Cost for train + Eval cost_button.click(esttimate_cost, [instance], [cost]) # 4. Run APE -> 所有指令 gen_button.click(generate, [gen_prompt, instance, openai_key], [all_prompt]) # 5. Evaluate -> 得到所有指令的Log Prob eval_button.click(evaluate, [eval_prompt, all_prompt, instance, openai_key], [output_df]) # 6. 输入指令单测 test_button.click(single_test, [test_prompt, test_instruction, test_input, openai_key], [test_output]) # 7. 输入指令打分 score_button.click(score_single, [eval_prompt, instance, score_instruction, openai_key], [test_score]) """ SELF Callback """ # 1. 加载种子文件 self_submit.click(init_instance, inputs=[seed_file, openai_key2, n_human, n_machine, n_instruct, self_prompt_input], outputs=[self_instance]) # 2. 生成 generate_instruct_button.click(generate_instruction, inputs=[self_instance], outputs=[fewshot, gen_data, counter]) demo.launch(show_error=True)