import gradio as gr import os from evaluation_logic import run_evaluation from eval.predict import PROMPT_FORMATTERS PROMPT_TEMPLATES = { "duckdbinstgraniteshort": PROMPT_FORMATTERS["duckdbinstgraniteshort"]().PROMPT_TEMPLATE, "duckdbinst": PROMPT_FORMATTERS["duckdbinst"]().PROMPT_TEMPLATE, } def gradio_run_evaluation(inference_api, model_name, prompt_format, openrouter_token=None, custom_prompt=None): # Set environment variable if OpenRouter token is provided if inference_api == "openrouter": os.environ["OPENROUTER_API_KEY"] = str(openrouter_token) # We now pass both the format name and content to evaluation output = [] for result in run_evaluation(inference_api, str(model_name).strip(), prompt_format, custom_prompt): output.append(result) yield "\n".join(output) def update_token_visibility(api): """Update visibility of the OpenRouter token input""" return gr.update(visible=api == "openrouter") def update_prompt_template(prompt_format): """Update the template content when a preset is selected""" if prompt_format in PROMPT_TEMPLATES: return PROMPT_FORMATTERS[prompt_format]() return "" def handle_template_edit(prompt_format, new_template): """Handle when user edits the template""" # If the template matches a preset exactly, keep the preset name for format_name, template in PROMPT_TEMPLATES.items(): if template.strip() == new_template.strip(): return format_name # Otherwise switch to custom return "custom" with gr.Blocks(gr.themes.Soft()) as demo: gr.Markdown("# DuckDB SQL Evaluation App") with gr.Row(): with gr.Column(): inference_api = gr.Dropdown( label="Inference API", choices=['openrouter'], value="openrouter" ) openrouter_token = gr.Textbox( label="OpenRouter API Token", placeholder="Enter your OpenRouter API token", type="password", visible=True ) model_name = gr.Textbox( label="Model Name (e.g., qwen/qwen-2.5-72b-instruct)" ) gr.Markdown("[View OpenRouter Models](https://openrouter.ai/models?order=top-weekly)") with gr.Row(): with gr.Column(): # Add 'custom' to the choices prompt_format = gr.Dropdown( label="Prompt Format", choices=['duckdbinst', 'duckdbinstgraniteshort', 'custom'], value="duckdbinstgraniteshort" ) custom_prompt = gr.TextArea( label="Prompt Template Content", placeholder="Enter your custom prompt template here or select a preset format above.", lines=10, value=PROMPT_TEMPLATES['duckdbinstgraniteshort'] # Set initial value ) gr.Examples( examples=[ ["openrouter", "qwen/qwen-2.5-72b-instruct", "duckdbinst", "", PROMPT_TEMPLATES['duckdbinst']], ["openrouter", "meta-llama/llama-3.2-3b-instruct:free", "duckdbinstgraniteshort", "", PROMPT_TEMPLATES['duckdbinstgraniteshort']], ["openrouter", "mistralai/mistral-nemo", "duckdbinst", "", PROMPT_TEMPLATES['duckdbinst']], ], inputs=[inference_api, model_name, prompt_format, openrouter_token, custom_prompt], ) start_btn = gr.Button("Start Evaluation") output = gr.Textbox(label="Output", lines=20) # Update token visibility inference_api.change( fn=update_token_visibility, inputs=[inference_api], outputs=[openrouter_token] ) # Update template content when preset is selected prompt_format.change( fn=update_prompt_template, inputs=[prompt_format], outputs=[custom_prompt] ) # Update format dropdown when template is edited custom_prompt.change( fn=handle_template_edit, inputs=[prompt_format, custom_prompt], outputs=[prompt_format] ) start_btn.click( fn=gradio_run_evaluation, inputs=[inference_api, model_name, prompt_format, openrouter_token, custom_prompt], outputs=output ) demo.queue().launch()