import gradio as gr from text_generation import AsyncClient import text_generation.types.Message as Message endpoint_url = "http://127.0.0.1:8080" client = AsyncClient(endpoint_url) """ frequency_penalty: Optional[float] = None, logit_bias: Optional[List[float]] = None, logprobs: Optional[bool] = None, top_logprobs: Optional[int] = None, max_tokens: Optional[int] = None, n: Optional[int] = None, presence_penalty: Optional[float] = None, stream: bool = False, seed: Optional[int] = None, temperature: Optional[float] = None, top_p: Optional[float] = None, tools: Optional[List[Tool]] = None, tool_choice: Optional[str] = None, """ def _default_parameters(): return { "max_tokens": 256, "stream": True, "temperature": 0.9, } def _translate_messages(history): messages = [] for conv in history: messages.append(Message(role="user", content=conv[0])) messages.append(Message(role="assistant", content=conv[1])) return messages async def echo(message, history): parameters = _default_parameters() messages = _translate_messages(history) history.append([message, ""]) full_resp = "" async for resp in client.chat(messages=messages, **parameters): full_resp = full_resp + resp.choices[0].delta.content history[-1][1] = full_resp yield history demo = gr.ChatInterface( fn=echo, examples=[{"text": "hello"}, {"text": "hola"}, {"text": "merhaba"}], title="Echo Bot", multimodal=False ) demo.queue().launch(server_name="0.0.0.0", server_port=3000)