Spaces:
Running
Running
File size: 2,221 Bytes
e8bac0f 2db0d53 e8bac0f 2db0d53 e8bac0f 8ce49a6 556358f e8bac0f 2db0d53 83746e4 2db0d53 83746e4 2db0d53 8ce49a6 85f74eb 2db0d53 85f74eb 2db0d53 85f74eb 83746e4 85f74eb 2db0d53 cde7a7b 2db0d53 a8032bb 85f74eb a8032bb a6549b1 2db0d53 a6549b1 2db0d53 8ce49a6 2db0d53 a6549b1 2db0d53 8ce49a6 a6549b1 83746e4 2db0d53 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 |
import gradio as gr
from huggingface_hub import InferenceClient
import os
import requests
# Set up the inference API client
hf_client = InferenceClient("google/gemma-2-2b-it", token=os.getenv("HF_TOKEN"))
def respond(
message,
history: list[tuple[str, str]],
system_message,
max_tokens,
temperature,
top_p,
):
system_prefix = """
If the input language is Korean, respond in Korean. If it's English, respond in English.
Do not output in both languages simultaneously. Always respond in Korean to Korean questions and in English to English questions.
"""
messages = [{"role": "system", "content": f"{system_prefix} {system_message}"}]
# Ensure alternating user/assistant messages
for user_msg, assistant_msg in history:
messages.append({"role": "user", "content": user_msg})
if assistant_msg: # Only add assistant message if it exists
messages.append({"role": "assistant", "content": assistant_msg})
# Add the current user message
messages.append({"role": "user", "content": message})
response = ""
try:
for message in hf_client.chat_completion(
messages,
max_tokens=max_tokens,
stream=True,
temperature=temperature,
top_p=top_p,
):
token = message.choices[0].delta.content
if token is not None:
response += token.strip("")
yield response
except Exception as e:
yield f"An error occurred: {str(e)}"
theme = "Nymbo/Nymbo_Theme"
css = """
footer {
visibility: hidden;
}
"""
demo = gr.ChatInterface(
respond,
additional_inputs=[
gr.Textbox(value="""
You are an AI assistant.
""", label="System Prompt"),
gr.Slider(minimum=1, maximum=2000, value=512, step=1, label="Max new tokens"),
gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
gr.Slider(
minimum=0.1,
maximum=1.0,
value=0.95,
step=0.05,
label="Top-p (nucleus sampling)",
),
],
theme=theme, # Apply theme
css=css # Apply CSS
)
if __name__ == "__main__":
demo.launch() |