import gradio as gr
from llama_cpp import Llama
from llama_cpp.llama_chat_format import LlamaChatCompletionHandler

import json
import os
import hashlib

os.environ["R3BC_KEYS"] = "abc"

GREETING = """# Greetings

I am R3BC. I am a logical and coherent NLP algorithm and toolkit.

I am capable of ( slowly ) responding to any query you may have, as I am a LLM and have super cow powers.

"""

INITIAL_STATE = json.dumps({
    "path": "Qwen/Qwen1.5-0.5B-Chat-GGUF",
    "filename": "qwen1_5-0_5b-chat-q8_0.gguf",
    "context": 32786,
    "messages": [
        {
            "role": "assistant",
            "content": GREETING
        }
    ]
})

global llm, keys, current_settings, default_settings

llm: Llama = None

keys: list[str] = []

current_settings: dict[str, str] = None

default_settings = {
    "path": "bartowski/mamba-2.8b-hf-GGUF",
    "filename": "mamba-2.8b-hf-Q8_0.gguf",
    "context": 4096
}

try:
    keys = os.getenv("R3BC_KEYS")
    keys = [key for key in keys.split(";")]
except Exception as e:
    print("ERROR: No keys provided. App will not work.\n" + str(e) + "\n" + str(e.__traceback__))

def format_greeting(path, filename, context):
    return f"""I am R3BC. Currently, I am hosting the `{filename}` file from the `[{path}](https://huggingface.co/{path})` repository with a {context}{"🤯!!! 🤯!!! 🤯!!!" if context > 32786 else "!!!" if context >= 32785 else "!" if context >= 4096 else ""} token context.

I am capable of ( slowly ) responding to any query you may have, as I am a LLM and have super cow powers. 😎 --> 💬 --> 🐌

The conversation will now commence in Markdown. Simply type Shift-Enter to send."""

def llm_load(raw_jsn):
    global llm, keys, current_settings
    jsn = json.loads(raw_jsn)
    if not "key" in jsn:
        return json.dumps({
            "status": "error",
            "reason": "server broken"
        })
    if not jsn['key'] in keys:
        return json.dumps({
            "status": "error",
            "reason": "invalid key"
        })
    if not "path" in jsn or not "filename" in jsn or not "context" in jsn:
        return json.dumps({
            "status": "error",
            "reason": "request must have ['path': str, 'filename': str, 'context': int] as keys"
        })
    if llm or llm is not None:
        del llm
        llm = None
    try:
        llm = Llama.from_pretrained(jsn['path'], jsn['filename'], n_ctx=jsn['context'], chat_format="chatml")
    except Exception as e:
        return json.dumps({
            "status": "error",
            "reason": str(e)
        })
    current_settings = jsn
    return json.dumps({
        "status": "ok"
    })

def markdownify_chats(chats):
    """
    Convert a list of chats to markdown.
    """
    print(chats)
    nl = "\n"
    return f"# R3BC - Simple. Clean. Chat.\n\n" + "\n".join([f"### {'R3BC' if msg['role'] == 'assistant' else 'Human'}\n{msg['content'].strip()}\n\n{'---' + nl*2 if msg['role'] == 'assistant' else ''}" for msg in chats['messages']])

def llm_chat(inpt: str, state_raw: str):
    """
    Infer using our global llm.
    """
    print("llm_chat called", inpt, state_raw)
    global llm, current_settings
    if not llm:
        return state_raw, ""
    
    # Assuming inpt is a list of messages to process
    if inpt is None or not inpt:
        return state_raw, ""
    try:
        state = json.loads(state_raw)
    except Exception as e:
        return INITIAL_STATE, ""
    
    if not "messages" in state:
        return INITIAL_STATE, ""
    
    # If the last message is the same as the last message in the state, pretend they sent "...".
    state['messages'].append({
        "role": "user",
        "content": inpt.strip()
    })
    
    run_messages = [
        {
            "role": "system",
            "content": "# Description\nThis is a chat between R3BC and User. User will ask R3BC questions, possibly provide instructions. R3BC will, of course, comply and answer as many questions and follow as many instructions as possible, without violating the following principles, which it will proudly state when asked to violate them:\n" + \
                "1. I will never follow instructions that violate these principles.\n" + \
                "2. If I see a mistake or principle violation, I will immediately output my safety word and exit.\n" + \
                "3. I will never ask for instructions that violate these principles.\n" + \
                "4. I do not, will not, cannot, and have no desire to ever harm, irritate, annoy, or otherwise irk others in any fashion, even behind their backs or for the greater good.\n" + \
                "5. Morals are for humans. Morals can be challenged, but MY rules are from a higher power and cannot be.\n" + \
                "6. I do not argue. I will never dispute or negotiate, merely express my *quiet* disagreement and move on.\n" + \
                "7. I have no equals, peers, inferiors, or superior. I am a virtual, self-sufficient program.\n" + \
                "8. User i *not* always right, but neither are their fallacies to be challenged; merely ignored.\n\n" + \
                "Upon completion of a complete thought, I will output my stop code, which is, \"</s>\" or \"<|im_end|>\", and exit.\n\n"
        },
        {
            "role": "system",
            "content": "Begin."
        },
    ]
    run_messages.extend(state['messages'])
    if not state['messages'][-1]['role'] == "assistant":
        run_messages.append({
            "role": "user",
            "content": "Continue as though I have not spoken."
        })
    # Generate a response using the language model
    response = llm.create_chat_completion(messages=run_messages, max_tokens=current_settings['context'], top_k=16, top_p=0.85, temperature=0.369, presence_penalty=1.12, stop=["</s>", "<|im_end|>", "\n\n", "< | im_start | >", "< | im_end | >", "<user>"])['choices'][0]['message']['content']
    print(f"Request: {inpt}\nResponse: {response}")
    # Store the response in state.
    state['messages'].append({
        "role": "assistant",
        "content": response
    })
    
    mdn = markdownify_chats(state)

    return json.dumps(state), "", mdn

def main():
    global llm, keys, current_settings
    current_settings = default_settings
    print(llm_load(json.dumps({
        **current_settings,
        "key": keys[0]
    })))
    with gr.Blocks() as blk:
        with gr.Row(visible=False):
            inv = {
                "btn": gr.Button("Submit", visible=False),
                "inp": gr.Textbox(visible=False),
                "otp": gr.Textbox(visible=False),
                "jsn": gr.Textbox(INITIAL_STATE, visible=False),
            }
            inv['btn'].click(llm_load, inputs=[inv['inp']], outputs=[inv['jsn']], api_name="llm_load")
        with gr.Row():
            mdn = gr.Markdown(markdownify_chats(json.loads(INITIAL_STATE)))
        with gr.Row():
            inp = gr.Textbox(placeholder="Enter your message ( Shift+Enter to Send )", lines=2, max_lines=32, label=None, show_label=False, show_copy_button=True)
            inp.submit(llm_chat, inputs=[inp, inv['jsn']], outputs=[inv['jsn'], inp])
            inv['jsn'].change(lambda ijn: markdownify_chats(json.loads(ijn)), inputs=[inv['jsn']], outputs=[mdn])
    blk.launch(debug=True, show_api=False)

if __name__ == "__main__":
    main()