Spaces:
Running
Running
import gradio as gr | |
from huggingface_hub import InferenceClient | |
import os | |
import requests | |
# Set up the inference API client | |
hf_client = InferenceClient("mistralai/Mistral-Nemo-Instruct-2407", token=os.getenv("HF_TOKEN")) | |
def respond( | |
message, | |
history: list[tuple[str, str]], | |
system_message, | |
max_tokens, | |
temperature, | |
top_p, | |
): | |
system_prefix = """ | |
If the input language is Korean, respond in Korean. If it's English, respond in English. | |
๋์์ ๋๊ฐ์ง ์ธ์ด๋ฅผ ์ถ๋ ฅํ์ง ๋ง๊ฒ. ๋ฐ๋์ ํ๊ตญ์ด ์ง๋ฌธ์๋ ํ๊ตญ์ด๋ก ๋ต๋ณํ๊ณ , ์์ด ์ง๋ฌธ์๋ ์์ด๋ก๋ง ๋ต๋ณํ๋ผ | |
""" | |
messages = [{"role": "system", "content": f"{system_prefix} {system_message}"}] # Add prefix | |
for val in history: | |
if val[0]: | |
messages.append({"role": "user", "content": val[0]}) | |
if val[1]: | |
messages.append({"role": "assistant", "content": val[1]}) | |
messages.append({"role": "user", "content": message}) | |
response = "" | |
for message in hf_client.chat_completion( | |
messages, | |
max_tokens=max_tokens, | |
stream=True, | |
temperature=temperature, | |
top_p=top_p, | |
): | |
token = message.choices[0].delta.content | |
if token is not None: | |
response += token.strip("") # Remove tokens | |
yield response | |
theme = "Nymbo/Nymbo_Theme" | |
css = """ | |
footer { | |
visibility: hidden; | |
} | |
""" | |
demo = gr.ChatInterface( | |
respond, | |
additional_inputs=[ | |
gr.Textbox(value=""" | |
You are an AI assistant. | |
""", label="System Prompt"), | |
gr.Slider(minimum=1, maximum=2000, value=512, step=1, label="Max new tokens"), | |
gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"), | |
gr.Slider( | |
minimum=0.1, | |
maximum=1.0, | |
value=0.95, | |
step=0.05, | |
label="Top-p (nucleus sampling)", | |
), | |
], | |
theme=theme, # Apply theme | |
css=css # Apply CSS | |
) | |
if __name__ == "__main__": | |
demo.launch() |