import os import gradio as gr from openai import OpenAI client = OpenAI( base_url="https://ueecxhqta9umllae.us-east-1.aws.endpoints.huggingface.cloud/v1/", api_key=os.environ.get("hf_token"), ) def respond( 지자체, 제목, 질문, max_tokens, temperature, top_p, ): messages = [{"role": "municipality", "content": 지자체}] messages.append({"role": "title", "content": 제목}) messages.append({"role": "question", "content": 질문}) response = "" chat_completion = client.chat.completions.create( model="tgi", messages=messages, stream=True, max_tokens=max_tokens, temperature=temperature, top_p=top_p, ) for message in chat_completion: token = message.choices[0].delta.content response += token yield response demo = gr.Interface( respond, inputs=["textbox", "textbox", "textbox"], outputs=["textbox"], additional_inputs=[ gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"), gr.Slider(minimum=0.1, maximum=1.0, value=0.4, step=0.05, label="Temperature"), gr.Slider( minimum=0.1, maximum=1.0, value=0.90, step=0.05, label="Top-p (nucleus sampling)", ), ], ) if __name__ == "__main__": demo.launch()