Spaces:

xl2533
/

MakeInstruction

Runtime error

File size: 5,436 Bytes

019ee78
 
 
 
 
 
 
 
15a824f
019ee78
 
 
 
15a824f
 
019ee78
 
 
 
 
25511b9
e907f96
019ee78
15a824f
019ee78
 
 
 
56feabf
019ee78
 
 
15a824f
019ee78
15a824f
019ee78
 
 
 
 
 
 
 
34e7ce5
1bfaaaf
34e7ce5
1bfaaaf
 
34e7ce5
1bfaaaf
 
faf58a4
1bfaaaf
019ee78
 
15a824f
 
34e7ce5
 
 
019ee78
 
15a824f
019ee78
15a824f
019ee78
 
 
 
 
 
 
1bfaaaf
c9023c8
 
019ee78
 
 
 
 
 
 
1bfaaaf
019ee78
c9023c8
019ee78
 
 
 
 
 
 
 
15a824f
019ee78
 
15a824f
019ee78
 
15a824f
 
 
34e7ce5
019ee78
15a824f
34e7ce5
019ee78
15a824f
1bfaaaf
019ee78
15a824f
1bfaaaf
019ee78
15a824f

# -*-coding:utf-8 -*-
import os
import gradio as gr
from ape.instance import LoadFactory
from ape.prompt import MyTemplate
from ape.ape import *


with gr.Blocks(title="Automatic Prompt Engineer", theme=gr.themes.Glass()) as demo:
    gr.Markdown("# Automatic Prompt Engineer")
    with gr.Row():
        with gr.Column(scale=2):
            gr.Markdown("## Configuration")
            with gr.Row():
                openai_key = gr.Textbox(type='password', label='输入 API key')
            with gr.Row():
                n_train = gr.Slider(label="Number of Train", minimum=1, maximum=20, step=1, value=5)
                n_few_shot = gr.Slider(label="Number of FewShot", minimum=1, maximum=20, step=1, value=5)

            with gr.Row():
                n_eval = gr.Slider(label="Number of Eval", minimum=1, maximum=30, step=5, value=20)

        with gr.Column(scale=3):
            gr.Markdown("## Load Data")
            with gr.Tab("Choose Dataset"):
                with gr.Row():
                    file = gr.File(label='上传txt文件，input\toutput\n', file_types=['txt'])
                with gr.Row():
                    task = gr.Dropdown(label="Chosse Existing Task", choices=list(LoadFactory.keys()), value=None)
                with gr.Row():
                    instance = gr.State()
                    load_button = gr.Button("Load Task")
                    load_flag = gr.Textbox()
                    sample_button = gr.Button('sample Data')
                    sample_flag = gr.Textbox()

            with gr.Tab("Display Sampled Dataset"):
                with gr.Row():
                    train_str = gr.Textbox(max_lines=100, lines=10, label="Data for prompt generation")
                    eval_str = gr.Textbox(max_lines=100, lines=10, label="Data for scoring")

    with gr.Row():
        with gr.Column(scale=2):
            gr.Markdown("## Run APE: 可以替换以下默认指令模板")
            gen_prompt = gr.Textbox(max_lines=100, lines=3, interative=True,
                                    placeholder=MyTemplate['gen_user_prompt'],
                                    value='', label="Prompt for generation")
            eval_prompt = gr.Textbox(max_lines=100, lines=3, interative=True,
                                     placeholder=MyTemplate['eval_prompt'],
                                     value='', label="Prompt for Evaluation")
            test_prompt = gr.Textbox(max_lines=100, lines=3, interative=True,
                                     placeholder=MyTemplate['test_prompt'],
                                     value='', label="Prompt for Single Test")

            with gr.Row():
                cost = gr.Textbox(lines=1, value="", label="Estimated Cost ($)")
                cost_button = gr.Button("Estimate Cost")
            with gr.Row():
                gen_button = gr.Button("Generate")
                eval_button = gr.Button("Eval")

        with gr.Column(scale=3):
            gr.Markdown("## Get Result")
            with gr.Tab("APE Results"):
                all_prompt = gr.Textbox(label='Generated Prompt')
                # Display all generated prompt with log probs
                output_df = gr.DataFrame(type='pandas', headers=['Prompt', 'Likelihood'], wrap=True, interactive=False)

            with gr.Tab("Test Prompt"):
                # Test the output of LLM using prompt
                with gr.Row():
                    with gr.Column(scale=1):
                        test_instruction = gr.Textbox(lines=4, value="", label="Prompt to test")
                        test_input = gr.Textbox(lines=1, value="", label="Input used to test prompt")
                        test_button = gr.Button("Test")
                    with gr.Column(scale=1):
                        test_output = gr.Textbox(lines=9, value="", label="Model Output")

            with gr.Tab("Eval Prompt"):
                # By Default use the Evaluation Set in APE
                with gr.Row():
                    with gr.Column(scale=1):
                        score_instruction = gr.Textbox(lines=3, value="",
                                                  label="Prompt to Evaluate")
                        score_button = gr.Button("Evaluate")
                    with gr.Column(scale=1):
                        test_score = gr.Textbox(lines=1, value="", label="Log(p)", disabled=True)


    """
    Callback
    """
    # 1. 选择已有任务/上传文件，实例化Instance
    load_button.click(load_task, [task, file], [instance, load_flag])

    # 2. 按 Configuration Sample数据 得到训练样本和验证集, 并在前端展示。支持重采样
    sample_button.click(sample_data, [instance, n_train, n_few_shot, n_eval], [train_str, eval_str, instance, sample_flag])

    # 3. Estimate Cost for train + Eval
    cost_button.click(esttimate_cost, [instance], [cost])

    # 4. Run APE -> 所有指令
    gen_button.click(generate, [gen_prompt, instance, openai_key], [all_prompt])

    # 5. Evaluate -> 得到所有指令的Log Prob
    eval_button.click(evaluate, [eval_prompt, all_prompt, instance, openai_key], [output_df])

    # 6. 输入指令单测
    test_button.click(single_test, [test_prompt, test_instruction, test_input, openai_key], [test_output])

    # 7. 输入指令打分
    score_button.click(score_single, [eval_prompt, instance, score_instruction, openai_key], [test_score])

    demo.launch(show_error=True)