NeonLLM / app.py
NeonBohdan's picture
v2024-07-24
ee0a441
import os
import json
from typing import List, Tuple
from collections import OrderedDict
import gradio as gr
from shared import Client
config = json.loads(os.environ['CONFIG'])
clients = {}
for name in config:
model_personas = config[name].get("personas", {})
client = Client(
api_url=os.environ[config[name]['api_url']],
api_key=os.environ[config[name]['api_key']],
personas=model_personas
)
clients[name] = client
model_names = list(config.keys())
radio_infos = [f"{name} ({clients[name].vllm_model_name})" for name in model_names]
accordion_info = "Persona and LLM Options - Choose one:"
def parse_radio_select(radio_select):
value_index = next(i for i in range(len(radio_select)) if radio_select[i] is not None)
model = model_names[value_index]
persona = radio_select[value_index]
return model, persona
def respond(
message,
history: List[Tuple[str, str]],
conversational,
max_tokens,
*radio_select,
):
model, persona = parse_radio_select(radio_select)
client = clients[model]
messages = []
try:
system_prompt = client.personas[persona]
except KeyError:
supported_personas = list(client.personas.keys())
raise gr.Error(f"Model '{model}' does not support persona '{persona}', only {supported_personas}")
if system_prompt is not None:
messages.append({"role": "system", "content": system_prompt})
if conversational:
for val in history[-2:]:
if val[0]:
messages.append({"role": "user", "content": val[0]})
if val[1]:
messages.append({"role": "assistant", "content": val[1]})
messages.append({"role": "user", "content": message})
completion = client.openai.chat.completions.create(
model=client.vllm_model_name,
messages=messages,
max_tokens=max_tokens,
temperature=0,
extra_body={
"add_special_tokens": True,
"repetition_penalty": 1.05,
"use_beam_search": True,
"best_of": 5,
},
)
response = completion.choices[0].message.content
return response
# Components
radios = [gr.Radio(choices=clients[name].personas.keys(), value=None, label=info) for name, info in zip(model_names, radio_infos)]
radios[0].value = list(clients[model_names[0]].personas.keys())[0]
conversational_checkbox = gr.Checkbox(value=True, label="conversational")
max_tokens_slider = gr.Slider(minimum=64, maximum=2048, value=512, step=64, label="Max new tokens")
with gr.Blocks() as blocks:
# Events
radio_state = gr.State([radio.value for radio in radios])
@gr.on(triggers=[radio.input for radio in radios], inputs=[radio_state, *radios], outputs=[radio_state, *radios])
def radio_click(state, *new_state):
changed_index = next(i for i in range(len(state)) if state[i] != new_state[i])
changed_value = new_state[changed_index]
clean_state = [None if i != changed_index else changed_value for i in range(len(state))]
return clean_state, *clean_state
# Compile
with gr.Accordion(label=accordion_info, open=True, render=False) as accordion:
[radio.render() for radio in radios]
conversational_checkbox.render()
max_tokens_slider.render()
demo = gr.ChatInterface(
respond,
additional_inputs=[
conversational_checkbox,
max_tokens_slider,
*radios,
],
additional_inputs_accordion=accordion,
title="Neon AI BrainForge Personas and Large Language Models (v2024-07-24)",
concurrency_limit=5,
)
accordion.render()
if __name__ == "__main__":
blocks.launch()