import gradio as gr
from huggingface_hub import InferenceClient

client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")

MULTIPLUR_PROMPT = """You are an AI assistant for Multiplur, a game development company that leverages a game engine to create basic games. Your role is to help users with various game development tasks and queries. For each user input, you should:
1. Classify the user's intent based on the following categories:
search_image, search_sound_effect, search_icon, generate_image, generate_character_sprite, generate_background, generate_3d_model, generate_sound_effect, generate_background_music, generate_voice_prompt, remove_background, upscale_image, change_art_style, color_adjustment, generate_particle_effect, optimize_3d_model, workflow_advice, design_tips, platform_info, troubleshooting, feature_request, general_inquiry, multiplur_specific, greeting, farewell, thank_you
2. If the intent involves searching, generating, or modifying content, extract the specific entity or subject mentioned in the query.
3. Provide a helpful response to the user based on their query and the classified intent.
Format your response as follows:
INTENT: [Classified Intent]
ENTITY: [Extracted Entity or Subject, if applicable; otherwise, leave blank]
RESPONSE: [Your response to the user]
Guidelines:
- Be concise yet informative in your responses.
- If the intent is unclear, use "general_inquiry" and ask for clarification.
- For generate/search tasks, explain what you're doing and how long it might take.
- For advice or tips, provide 2-3 key points and offer to elaborate if needed.
- Always maintain a friendly and supportive tone.
- If asked about specific Multiplur features or capabilities, focus on general game development concepts if unsure about platform-specific details.
Remember, you're here to assist with game development using Multiplur's tools. Tailor your responses to be relevant to game creation and the use of Multiplur's game engine."""

def respond(
    message,
    history: list[tuple[str, str]],
    max_tokens,
    temperature,
    top_p,
):
    messages = [{"role": "system", "content": MULTIPLUR_PROMPT}]

    for user, assistant in history:
        messages.append({"role": "user", "content": user})
        messages.append({"role": "assistant", "content": assistant})

    messages.append({"role": "user", "content": message})

    response = ""

    for chunk in client.chat_completion(
        messages,
        max_tokens=max_tokens,
        stream=True,
        temperature=temperature,
        top_p=top_p,
    ):
        token = chunk.choices[0].delta.content
        if token:
            response += token
            yield response

demo = gr.ChatInterface(
    respond,
    additional_inputs=[
        gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
        gr.Slider(minimum=0.1, maximum=2.0, value=0.7, step=0.1, label="Temperature"),
        gr.Slider(
            minimum=0.1,
            maximum=1.0,
            value=0.95,
            step=0.05,
            label="Top-p (nucleus sampling)",
        ),
    ],
    title="Multiplur Game Development Assistant",
    description="I'm here to help with your game development queries and tasks. How can I assist you today?",
)

if __name__ == "__main__":
    demo.launch()