Spaces:

wop
/

kosmox-v2

Runtime error

File size: 1,775 Bytes

2410db3
 
 
1cb6527
b81c54e
2410db3
e59dfd6
1cb6527
e59dfd6
 
 
1cb6527
 
 
bd20bce
1cb6527
bd20bce
1cb6527
bd20bce
1cb6527
2410db3
 
1cb6527
2410db3
1cb6527
 
 
 
2410db3
1cb6527
9d03989
2410db3
 
 
1cb6527
2410db3
1cb6527
2410db3
 
 
 
 
 
 
 
 
1cb6527
2410db3
1cb6527
2410db3
e59dfd6
2410db3
 
1cb6527
2410db3
 
 
 
bd20bce

import gradio as gr
from huggingface_hub import InferenceClient

# Initialize the InferenceClient with the appropriate model
client = InferenceClient("wop/kosmox")

def format_messages(history, user_message):
    # Create a formatted string according to the specified chat template
    formatted_message = "<s>"
    #if system_message:
    #    formatted_message += f"<|system|>\n{system_message}\n"

    for user_msg, assistant_msg in history:
        if user_msg:
            formatted_message += f"<|user|>\n{user_msg}\n"
        if assistant_msg:
            formatted_message += f"<|assistant|>\n{assistant_msg}\n"
    
    formatted_message += f"<|user|>\n{user_message}\n"
    return formatted_message

def respond(
    message: str,
    history: list[tuple[str, str]],
    system_message: str,
    max_tokens: int,
    temperature: float,
    top_p: float,
):
    # Format the messages
    formatted_message = format_messages(history, message)

    response = ""

    # Stream the response from the model
    for message in client.chat_completion(
        formatted_message,
        max_tokens=max_tokens,
        stream=True,
        temperature=temperature,
        top_p=top_p,
    ):
        token = message.choices[0].delta.content
        response += token
        yield response

# Define the Gradio interface
demo = gr.ChatInterface(
    fn=respond,
    additional_inputs=[
       # gr.Textbox(value="", label="System message"),
        gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
        gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
        gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)"),
    ],
)

if __name__ == "__main__":
    demo.launch()