from openai import AsyncOpenAI base_url = "http://127.0.0.1:8080/v1" client = AsyncOpenAI(base_url=base_url, api_key="-") def _default_parameters(): """ frequency_penalty: Optional[float] = None, logit_bias: Optional[List[float]] = None, logprobs: Optional[bool] = None, top_logprobs: Optional[int] = None, max_tokens: Optional[int] = None, n: Optional[int] = None, presence_penalty: Optional[float] = None, stream: bool = False, seed: Optional[int] = None, temperature: Optional[float] = None, top_p: Optional[float] = None, tools: Optional[List[Tool]] = None, tool_choice: Optional[str] = None, """ return { "max_tokens": 256, "stream": True, "temperature": 0.9, } def translate_messages(history): messages = [] for conv in history: messages.append({"role":"user", "content":conv[0]}) messages.append({"role":"assistant", "content":conv[1]}) return messages async def gen_chat(messages, parameters=None): if parameters is None: parameters = _default_parameters() responses = await client.chat.completions.create( model="tgi", messages=messages, **parameters ) async for resp in responses: yield resp.choices[0].delta.content