import gradio as gr
from llama_cpp import Llama
import requests

# Define available models
MODELS = {
    "Llama-3.2-3B": {
        "repo_id": "lmstudio-community/Llama-3.2-3B-Instruct-GGUF",
        "filename": "*Q4_K_M.gguf"
    },
    "Llama-3.2-1.5B": {
        "repo_id": "lmstudio-community/Llama-3.2-1.5B-Instruct-GGUF",
        "filename": "*Q4_K_M.gguf"
    }
}

# Initialize with default model
current_model = None

def load_model(model_name):
    global current_model
    model_info = MODELS[model_name]
    current_model = Llama.from_pretrained(
        repo_id=model_info["repo_id"],
        filename=model_info["filename"],
        verbose=True,
        n_ctx=32768,
        n_threads=2,
        chat_format="chatml"
    )
    return current_model

# Initialize with first model
current_model = load_model(list(MODELS.keys())[0])

def respond(
    message,
    history: list[tuple[str, str]],
    model_name,
    system_message,
    max_tokens,
    temperature,
    top_p,
):
    global current_model
    
    # Load new model if changed
    if current_model is None or model_name != current_model.model_path:
        current_model = load_model(model_name)
    
    messages = [{"role": "system", "content": system_message}]

    for val in history:
        if val[0]:
            messages.append({"role": "user", "content": val[0]})
        if val[1]:
            messages.append({"role": "assistant", "content": val[1]})

    messages.append({"role": "user", "content": message})

    response = ""
    response = current_model.create_chat_completion(
            messages=messages,
            stream=True,
            max_tokens=max_tokens,
            temperature=temperature,
            top_p=top_p
        )
    message_repl = ""
    for chunk in response:
        if len(chunk['choices'][0]["delta"]) != 0 and "content" in chunk['choices'][0]["delta"]:
            message_repl = message_repl + \
                chunk['choices'][0]["delta"]["content"]
        yield message_repl

"""
For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
"""
demo = gr.ChatInterface(
    respond,
    title="GGUF is popular format on PC in LM Studio or on Tablet/Mobile in PocketPal APPs",
    description="Try models locclay in: 🖥️ [LM Studio AI for PC](https://lmstudio.ai) | 📱 PocketPal AI ([Android](https://play.google.com/store/apps/details?id=com.pocketpalai) & [iOS](https://play.google.com/store/apps/details?id=com.pocketpalai)) on Tablet or Mobile",
   
    additional_inputs=[
        gr.Dropdown(
            choices=list(MODELS.keys()),
            value=list(MODELS.keys())[0],
            label="Select Model"
        ),
        gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
        gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
        gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
        gr.Slider(
            minimum=0.1,
            maximum=1.0,
            value=0.95,
            step=0.05,
            label="Top-p (nucleus sampling)",
        ),
    ],
    theme=gr.themes.Soft(
        primary_hue="blue",
        secondary_hue="purple",
    ),
    css="""
    .message-wrap {
        border: 1px solid #e0e0e0;
        border-radius: 8px;
        padding: 8px;
        margin: 8px 0;
    }
    #component-0, #component-1 {
        border: 4px solid #2196F3;
        border-radius: 12px;
        padding: 15px;
        background-color: #E3F2FD;
        box-shadow: 0 0 10px rgba(33, 150, 243, 0.3);
        margin: 10px 0;
    }
    #component-0:focus-within, #component-1:focus-within {
        border-color: #1976D2;
        box-shadow: 0 0 15px rgba(33, 150, 243, 0.5);
        background-color: #BBDEFB;
    }
    .input-container, .gradio-container .input-container {
        border: 4px solid #2196F3;
        border-radius: 12px;
        padding: 15px;
        background-color: #E3F2FD;
        box-shadow: 0 0 10px rgba(33, 150, 243, 0.3);
        margin: 10px 0;
    }
    .input-container textarea, .input-container input[type="text"] {
        background-color: #E3F2FD;
        border: 2px solid #2196F3;
        border-radius: 8px;
        padding: 10px;
    }
    .input-container textarea:focus, .input-container input[type="text"]:focus {
        background-color: #BBDEFB;
        border-color: #1976D2;
        outline: none;
    }
    """
)


if __name__ == "__main__":
    demo.launch()