NeonLLM / app.py
NeonBohdan's picture
Dynamic radio buttons
f027a65
raw
history blame
3.63 kB
import os
import json
from typing import List, Tuple
from collections import OrderedDict
import gradio as gr
from shared import Client
config = json.loads(os.environ['CONFIG'])
clients = {}
for name in config:
model_personas = config[name].get("personas", {})
client = Client(
api_url=os.environ[config[name]['api_url']],
api_key=os.environ[config[name]['api_key']],
personas=model_personas
)
clients[name] = client
model_names = list(config.keys())
radio_infos = [f"{name} ({clients[name].vllm_model_name})" for name in model_names]
accordion_info = "Config"
def parse_radio_select(radio_select):
value_index = next(i for i in range(len(radio_select)) if radio_select[i] is not None)
model = model_names[value_index]
persona = radio_select[value_index]
return model, persona
def respond(
message,
history: List[Tuple[str, str]],
conversational,
max_tokens,
*radio_select,
):
model, persona = parse_radio_select(radio_select)
client = clients[model]
messages = []
try:
system_prompt = client.personas[persona]
except KeyError:
supported_personas = list(client.personas.keys())
raise gr.Error(f"Model '{model}' does not support persona '{persona}', only {supported_personas}")
if system_prompt is not None:
messages.append({"role": "system", "content": system_prompt})
if conversational:
for val in history[-2:]:
if val[0]:
messages.append({"role": "user", "content": val[0]})
if val[1]:
messages.append({"role": "assistant", "content": val[1]})
messages.append({"role": "user", "content": message})
completion = client.openai.chat.completions.create(
model=client.vllm_model_name,
messages=messages,
max_tokens=max_tokens,
temperature=0,
extra_body={
"repetition_penalty": 1.05,
"use_beam_search": True,
"best_of": 5,
},
)
response = completion.choices[0].message.content
return response
# Components
radios = [gr.Radio(choices=clients[name].personas.keys(), value=None, label=info) for name, info in zip(model_names, radio_infos)]
radios[0].value = list(clients[model_names[0]].personas.keys())[0]
conversational_checkbox = gr.Checkbox(value=True, label="conversational")
max_tokens_slider = gr.Slider(minimum=64, maximum=2048, value=512, step=64, label="Max new tokens")
with gr.Blocks() as blocks:
# Events
radio_state = gr.State([radio.value for radio in radios])
@gr.on(triggers=[radio.input for radio in radios], inputs=[radio_state, *radios], outputs=[radio_state, *radios])
def radio_click(state, *new_state):
changed_index = next(i for i in range(len(state)) if state[i] != new_state[i])
changed_value = new_state[changed_index]
clean_state = [None if i != changed_index else changed_value for i in range(len(state))]
return clean_state, *clean_state
# Compile
with gr.Accordion(label=accordion_info, open=True, render=False) as accordion:
[radio.render() for radio in radios]
conversational_checkbox.render()
max_tokens_slider.render()
demo = gr.ChatInterface(
respond,
additional_inputs=[
conversational_checkbox,
max_tokens_slider,
*radios,
],
additional_inputs_accordion=accordion,
title="NeonLLM (v2024-07-03)",
concurrency_limit=5,
)
accordion.render()
if __name__ == "__main__":
blocks.launch()