File size: 4,405 Bytes
8628f17
 
 
e333fa4
 
ebeb9b4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8628f17
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ebeb9b4
8628f17
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ebeb9b4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8628f17
ebeb9b4
8628f17
ebeb9b4
 
 
 
 
 
 
 
 
8628f17
 
ebeb9b4
 
8628f17
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
import gradio as gr
from root import RootSignals

client = None
custom_judge = None
MODELS = [
    "claude-3-5-sonnet",
    "claude-3-haiku-20240307",
    "claude-3-opus-20240229",
    "claude-3-sonnet-20240229",
    "codestral",
    "command-r",
    "command-r-plus",
    "fireworks_ai/llama-v3-70b-instruct",
    "gpt-4",
    "gpt-4o",
    "gpt-4o-mini",
    "gpt-4-turbo",
    "groq/llama3-70b-8192",
    "mistral-large-latest",
    "mistral-medium",
    "o1-mini",
    "o1-preview",
    "open-codestral-mamba",
]

def initialize_client(api_key):
    global client
    return RootSignals(api_key=api_key)

def create_judge(api_key, judge_name, intent, judge_prompt):
    global client, custom_judge
    if not client:
        client = initialize_client(api_key)
    
    # Create custom judge
    custom_judge = client.evaluators.create(
        name=judge_name,
        predicate=judge_prompt + " {{output}}",
        intent=intent,
        model="gpt-4o",
    )
    
    return gr.Info(f"Custom LLM-Judge '{judge_name}' is created successfully!")

def evaluate_response(llm_response):
    global client, custom_judge
    if not client or not custom_judge:
        return "Please create a judge first", "Please create a judge first"
    
    # Run evaluation using custom judge
    evaluation_result = custom_judge.run(response=llm_response)
    score = evaluation_result.score
    justification = evaluation_result.justification
    return score, justification

# Create the interface with a custom layout
with gr.Blocks(theme=gr.themes.Default(primary_hue="blue")) as demo:
    gr.HTML("""<a href="https://api.visitorbadge.io/api/visitors?path=https://huggingface.co/spaces/root-signals/RootEvaluatorsDemo">
               <img src="https://api.visitorbadge.io/api/visitors?path=https://huggingface.co/spaces/root-signals/RootEvaluatorsDemo" />
               </a>""")

    with gr.Row():
        gr.Image(value="https://app.rootsignals.ai/images/root-signals-color.svg", height=70)
        gr.Markdown("<div>&nbsp;</div>")  # Add some space below the image
    gr.Markdown("# Custom Judge Demo by Root Signals")

    gr.Markdown("[Sign-up](https://app.rootsignals.ai/register) to create your API key!")
    
    api_key = gr.Textbox(
        label="🔑 Root Signals API Key",
        placeholder="Enter your Root Signals API key...",
        type="password",
        show_label=True,
    )
    
    gr.Markdown("---")  # Divider

    gr.Markdown("### Create Custom Judge")
    with gr.Row():
        judge_name = gr.Textbox(label="👨‍⚖️ Judge Name", placeholder="Enter a name for your custom judge...", interactive=True)
        user_intent = gr.Textbox(label="👤 Intent", placeholder="Enter the high-level intent for this judge...", interactive=True)
    with gr.Row():
        judge_prompt = gr.Textbox(
            label="📝 Custom Judge Prompt",
            placeholder="Enter the custom judge prompt...",
            interactive=True,
            lines=5,
            max_lines=10
        )
        create_judge_btn = gr.Button("✨ CREATE JUDGE", variant="primary")
    info_message = gr.Info()
    
    gr.Markdown("---")  # Divider
    
    with gr.Row():
        # Left column - Evaluation
        with gr.Column():
            gr.Markdown("### Evaluate Response")
            llm_response = gr.Textbox(
                label="🤖 LLM Response", 
                placeholder="Enter the LLM response to be evaluated...", 
                interactive=True,
                lines=5,
                max_lines=10
            )
            evaluate_btn = gr.Button("🧐 EVALUATE", variant="primary", visible=True)
        
        # Right column - Results
        with gr.Column():
            gr.Markdown("### Results")
            score = gr.Textbox(label="📊 Score (between 0 and 1)", interactive=False)
            justification = gr.TextArea(label="💬 Justification", interactive=False)
    
    # Button click events
    create_judge_btn.click(
        fn=create_judge,
        inputs=[api_key, judge_name, user_intent, judge_prompt],
        outputs=info_message
    )
    
    evaluate_btn.click(
        fn=evaluate_response,
        inputs=[llm_response],
        outputs=[score, justification]
    )

    gr.Markdown("[Homepage](https://www.rootsignals.ai/) | [Python SDK Docs](https://sdk.rootsignals.ai/en/latest/)")

if __name__ == "__main__":
    demo.launch()