import gradio as gr from .config import * from .vote import * from .messages import * from .llm import * import random import pandas as pd df = pd.read_csv('responses.csv') # Drop prompts that only appear once because we can't compare them prompt_counts = df['prompt'].value_counts() df = df[df['prompt'].isin(prompt_counts[prompt_counts > 1].index)] with gr.Blocks() as battle: battle_useridstate = gr.State() gr.Markdown("## 🤖 LLM Battle\nCompare two AI responses and vote for the better one!") with gr.Group(): with gr.Row(): prompt = gr.Textbox( container=False, show_label=False, placeholder="Enter your prompt here...", lines=3, max_lines=10, scale=9999999, min_width=0, rtl=True ) random_prompt = gr.Button('🎲', scale=0, min_width=0, variant='tool') btn = gr.Button("Generate Responses", variant='primary') with gr.Row(visible=False) as response_row: with gr.Column(): with gr.Group(): response1 = gr.Textbox( label="Model A Response", lines=8, max_lines=8, interactive=False, rtl=True ) a_better = gr.Button("A is better", variant='primary') model1_name = gr.Textbox( interactive=False, show_label=False, container=False, value="Vote to reveal model A", text_align="center", visible=False ) with gr.Column(): with gr.Group(): response2 = gr.Textbox( label="Model B Response", lines=8, max_lines=8, interactive=False, rtl=True ) b_better = gr.Button("B is better", variant='primary') model2_name = gr.Textbox( interactive=False, show_label=False, container=False, value="Vote to reveal model B", text_align="center", visible=False ) with gr.Row() as tie_row: prefer_both = gr.Button("Prefer Both", variant='secondary', visible=False) prefer_none = gr.Button("Prefer None", variant='secondary', visible=False) def generate_responses(prompt_text): if len(prompt_text.strip()) < MIN_PROMPT_LENGTH: raise gr.Error(f"Prompt must be at least {MIN_PROMPT_LENGTH} characters") if len(prompt_text.strip()) > MAX_PROMPT_LENGTH: raise gr.Error(f"Prompt must be less than {MAX_PROMPT_LENGTH} characters") # Get responses for this prompt prompt_responses = df[df['prompt'] == prompt_text] if len(prompt_responses) < 2: raise gr.Error(f"Not enough responses for prompt: {prompt_text}") # Randomly select 2 responses selected_responses = prompt_responses.sample(n=2) model_a = selected_responses.iloc[0]['model_name'] model_b = selected_responses.iloc[1]['model_name'] resp_a = selected_responses.iloc[0]['response'] resp_b = selected_responses.iloc[1]['response'] return [ resp_a, # response1 resp_b, # response2 model_a, # model1_name model_b, # model2_name gr.update(visible=True), # response_row gr.update(interactive=True, visible=True), # a_better gr.update(interactive=True, visible=True), # b_better gr.update(interactive=True, visible=True), # prefer_both gr.update(interactive=True, visible=True), # prefer_none gr.update(visible=False), # model1_name visibility gr.update(visible=False) # model2_name visibility ] # Event handlers btn.click( fn=generate_responses, inputs=[prompt], outputs=[ response1, response2, model1_name, model2_name, response_row, a_better, b_better, prefer_both, prefer_none, model1_name, model2_name ] ) a_better.click( fn=a_is_better, inputs=[model1_name, model2_name, battle_useridstate, prompt, response1, response2], outputs=[a_better, b_better, prefer_both, prefer_none, model1_name, model2_name] ) b_better.click( fn=b_is_better, inputs=[model1_name, model2_name, battle_useridstate, prompt, response1, response2], outputs=[a_better, b_better, prefer_both, prefer_none, model1_name, model2_name] ) prefer_both.click( fn=prefer_both_vote, inputs=[model1_name, model2_name, battle_useridstate, prompt, response1, response2], outputs=[a_better, b_better, prefer_both, prefer_none, model1_name, model2_name] ) prefer_none.click( fn=prefer_none_vote, inputs=[model1_name, model2_name, battle_useridstate, prompt, response1, response2], outputs=[a_better, b_better, prefer_both, prefer_none, model1_name, model2_name] ) def get_random_prompt(): # Get a random prompt from the available prompts in df random_prompt = random.choice(df['prompt'].unique()) return random_prompt random_prompt.click( fn=get_random_prompt, outputs=[prompt] )