import os from typing import List, Tuple import gradio as gr from openai import OpenAI client = OpenAI( base_url=f"{os.environ['BASE_URL']}/v1", api_key=os.environ["API_KEY"], ) def respond( message, history: List[Tuple[str, str]], max_tokens, ): messages = [] messages.append({"role": "user", "content": message}) completion = client.chat.completions.create( model="neongeckocom/NeonLLM", messages=messages, max_tokens=max_tokens, temperature=0, extra_body={ "repetition_penalty": 1.05, "use_beam_search": True, "best_of": 5, }, ) response = completion.choices[0].message.content return response demo = gr.ChatInterface( respond, additional_inputs=[ gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"), ], ) if __name__ == "__main__": demo.launch()