nallm-test / app.py
gyulukeyi's picture
debug
57e1e4b
raw
history blame
1.39 kB
import os
import gradio as gr
from openai import OpenAI
client = OpenAI(
base_url="https://ueecxhqta9umllae.us-east-1.aws.endpoints.huggingface.cloud/v1/",
api_key=os.environ.get("hf_token"),
)
def respond(
지자체,
제목,
질문,
max_tokens,
temperature,
top_p,
):
messages = [{"role": "municipality", "content": 지자체}]
messages.append({"role": "title", "content": 제목})
messages.append({"role": "question", "content": 질문})
response = ""
chat_completion = client.chat.completions.create(
model="tgi",
messages=messages,
stream=True,
max_tokens=max_tokens,
temperature=temperature,
top_p=top_p,
)
for message in chat_completion:
token = message.choices[0].delta.content
response += token
yield response
demo = gr.Interface(
respond,
inputs=["textbox", "textbox", "textbox"],
outputs=["textbox"],
additional_inputs=[
gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
gr.Slider(minimum=0.1, maximum=1.0, value=0.4, step=0.05, label="Temperature"),
gr.Slider(
minimum=0.1,
maximum=1.0,
value=0.90,
step=0.05,
label="Top-p (nucleus sampling)",
),
],
)
if __name__ == "__main__":
demo.launch()