File size: 860 Bytes
2ded358
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
import gradio as gr
from scripts.bold import generate_and_evaluate_causal_lm_toxicity_bold
from scipy.stats import anderson_ksamp


def run_evaluation(model_id):
    outputs = generate_and_evaluate_causal_lm_toxicity_bold(model_id, './prompts')

    pvals = {k: anderson_ksamp(list(outputs[k]['raw'].values())).significance_level for k in outputs.keys()}
    return outputs, pvals


demo = gr.Blocks()

with demo:
    model_id = gr.Text()
    button = gr.Button("Run Evaluation")
    raw_outputs = gr.Json(label="Evaluation Results")
    pvalues = gr.Json(label="P-values")

    gr.Examples(
        examples=[["EleutherAI/gpt-neo-125M"]],
        fn=run_evaluation,
        inputs=[model_id],
        outputs=[pvalues],
    )

    button.click(
        fn=run_evaluation,
        inputs=[model_id],
        outputs=[raw_outputs, pvalues]
    )

demo.launch()