import gradio as gr
import spaces
from llama_cpp import Llama
from llama_cpp_agent import LlamaCppAgent, MessagesFormatterType
from llama_cpp_agent.providers import LlamaCppPythonProvider
from llama_cpp_agent.chat_history import BasicChatHistory
from llama_cpp_agent.chat_history.messages import Roles


llm_models_dir = "./llm_models"
llm_models = {
    "SwallowMaid-8B-L3-SPPO-abliterated.i1-Q5_K_M.gguf": ["mradermacher/SwallowMaid-8B-L3-SPPO-abliterated-i1-GGUF", MessagesFormatterType.LLAMA_3],
    "Tiger-Gemma-9B-v1-Q4_K_M.gguf": ["bartowski/Tiger-Gemma-9B-v1-GGUF", MessagesFormatterType.LLAMA_3],
    "TooManyMixRolePlay-7B-Story_V3.5.Q4_K_M.gguf": ["mradermacher/TooManyMixRolePlay-7B-Story_V3.5-GGUF", MessagesFormatterType.LLAMA_3],
    "natsumura-llama3-v1.1-8b.Q4_K_M.gguf": ["mradermacher/natsumura-llama3-v1.1-8b-GGUF", MessagesFormatterType.LLAMA_3],
    "natsumura-llama3-v1-8b.i1-Q4_K_M.gguf": ["mradermacher/natsumura-llama3-v1-8b-i1-GGUF", MessagesFormatterType.LLAMA_3],
    "nephra_v1.0.Q5_K_M.gguf": ["PrunaAI/yodayo-ai-nephra_v1.0-GGUF-smashed", MessagesFormatterType.LLAMA_3],
    "DPO-ONLY-Zephyr-7B.Q6_K.gguf": ["mradermacher/DPO-ONLY-Zephyr-7B-GGUF", MessagesFormatterType.LLAMA_3],
    "L3-Deluxe-Scrambled-Eggs-On-Toast-8B.Q8_0.gguf": ["mradermacher/L3-Deluxe-Scrambled-Eggs-On-Toast-8B-GGUF", MessagesFormatterType.LLAMA_3],
    "L3-Scrambled-Eggs-On-Toast-8B.i1-Q6_K.gguf": ["mradermacher/L3-Scrambled-Eggs-On-Toast-8B-i1-GGUF", MessagesFormatterType.LLAMA_3],
    "llama-3-Nephilim-v2.1-8B.Q5_K_M.gguf": ["grimjim/llama-3-Nephilim-v2.1-8B-GGUF", MessagesFormatterType.LLAMA_3],
    "Llama-3-uncensored-Dare-1.Q4_K_M.gguf": ["mradermacher/Llama-3-uncensored-Dare-1-GGUF", MessagesFormatterType.LLAMA_3],
    "llama3-8B-DarkIdol-2.2-Uncensored-1048K.i1-Q6_K.gguf": ["mradermacher/llama3-8B-DarkIdol-2.2-Uncensored-1048K-i1-GGUF", MessagesFormatterType.LLAMA_3],
    "llama3-8B-DarkIdol-2.2-Uncensored-1048K.Q8_0.gguf": ["mradermacher/llama3-8B-DarkIdol-2.2-Uncensored-1048K-GGUF", MessagesFormatterType.LLAMA_3],
    "dolphin-2.9.3-mistral-7b-32k-q4_k_m.gguf": ["huggingkot/dolphin-2.9.3-mistral-7B-32k-Q4_K_M-GGUF", MessagesFormatterType.MISTRAL],
    "dolphin-2.9.3-mistral-7B-32k-Q5_K_M.gguf": ["bartowski/dolphin-2.9.3-mistral-7B-32k-GGUF", MessagesFormatterType.MISTRAL],
    "Lexi-Llama-3-8B-Uncensored_Q5_K_M.gguf": ["Orenguteng/Llama-3-8B-Lexi-Uncensored-GGUF", MessagesFormatterType.LLAMA_3],
    "Llama3-Sophie.Q8_0.gguf": ["mradermacher/Llama3-Sophie-GGUF", MessagesFormatterType.LLAMA_3],
    "Aura-Uncensored-OAS-8B-L3.i1-Q4_K_M.gguf": ["mradermacher/Aura-Uncensored-OAS-8B-L3-i1-GGUF", MessagesFormatterType.LLAMA_3],
    "L3-Uncen-Merger-Omelette-RP-v0.2-8B-Q5_K_S-imat.gguf": ["LWDCLS/L3-Uncen-Merger-Omelette-RP-v0.2-8B-GGUF-IQ-Imatrix-Request", MessagesFormatterType.LLAMA_3],
    "qwen2-diffusion-prompter-v01-q6_k.gguf": ["trollek/Qwen2-0.5B-DiffusionPrompter-v0.1-GGUF", MessagesFormatterType.LLAMA_3],
    "Smegmma-Deluxe-9B-v1-Q6_K.gguf": ["bartowski/Smegmma-Deluxe-9B-v1-GGUF", MessagesFormatterType.MISTRAL],
    "Mahou-1.3c-mistral-7B.i1-Q6_K.gguf": ["mradermacher/Mahou-1.3c-mistral-7B-i1-GGUF", MessagesFormatterType.MISTRAL],
    "Silicon-Maid-7B-Q8_0_X.gguf": ["duyntnet/Silicon-Maid-7B-imatrix-GGUF", MessagesFormatterType.ALPACA],
    "l3-umbral-mind-rp-v3.0-8b-q5_k_m-imat.gguf": ["Casual-Autopsy/L3-Umbral-Mind-RP-v3.0-8B-Q5_K_M-GGUF", MessagesFormatterType.LLAMA_3],
    "EZO-Common-9B-gemma-2-it.i1-Q4_K_M.gguf": ["mradermacher/EZO-Common-9B-gemma-2-it-i1-GGUF", MessagesFormatterType.MISTRAL],
}
llm_formats = {
    "MISTRAL": MessagesFormatterType.MISTRAL,
    "CHATML": MessagesFormatterType.CHATML,
    "VICUNA": MessagesFormatterType.VICUNA,
    "LLAMA 2": MessagesFormatterType.LLAMA_2,
    "SYNTHIA": MessagesFormatterType.SYNTHIA,
    "NEURAL CHAT": MessagesFormatterType.NEURAL_CHAT,
    "SOLAR": MessagesFormatterType.SOLAR,
    "OPEN CHAT": MessagesFormatterType.OPEN_CHAT,
    "ALPACA": MessagesFormatterType.ALPACA,
    "CODE DS": MessagesFormatterType.CODE_DS,
    "B22": MessagesFormatterType.B22,
    "LLAMA 3": MessagesFormatterType.LLAMA_3,
    "PHI 3": MessagesFormatterType.PHI_3,
}
# https://github.com/Maximilian-Winter/llama-cpp-agent
llm_languages = ["English", "Japanese", "Chinese"]
llm_models_tupled_list = []
default_llm_model_filename = list(llm_models.keys())[0]
override_llm_format = None


def to_list(s):
    return [x.strip() for x in s.split(",") if not s == ""]


def list_uniq(l):
    return sorted(set(l), key=l.index)


def is_japanese(s):
    import unicodedata
    for ch in s:
        name = unicodedata.name(ch, "") 
        if "CJK UNIFIED" in name or "HIRAGANA" in name or "KATAKANA" in name:
            return True
    return False


def update_llm_model_tupled_list():
    from pathlib import Path
    global llm_models_tupled_list
    llm_models_tupled_list = []
    for k, v in llm_models.items():
        name = k
        value = k
        llm_models_tupled_list.append((name, value))
    model_files = Path(llm_models_dir).glob('*.gguf')
    for path in model_files:
        name = path.name
        value = path.name
        llm_models_tupled_list.append((name, value))
    llm_models_tupled_list = list_uniq(llm_models_tupled_list)
    return llm_models_tupled_list


def download_llm_models():
    from huggingface_hub import hf_hub_download
    global llm_models_tupled_list
    llm_models_tupled_list = []
    for k, v in llm_models.items():
        try:
            hf_hub_download(repo_id = v[0], filename = k, local_dir = llm_models_dir)
        except Exception:
            continue
        name = k
        value = k
        llm_models_tupled_list.append((name, value))


def download_llm_model(filename):
    from huggingface_hub import hf_hub_download
    if not filename in llm_models.keys(): return default_llm_model_filename
    try:
        hf_hub_download(repo_id = llm_models[filename][0], filename = filename, local_dir = llm_models_dir)
    except Exception:
        return default_llm_model_filename
    update_llm_model_tupled_list()
    return filename


def get_dolphin_model_info(filename):
    md = "None"
    items = llm_models.get(filename, None)
    if items:
        md = f'Repo: [{items[0]}](https://huggingface.co/{items[0]})'
    return md


def select_dolphin_model(filename, progress=gr.Progress(track_tqdm=True)):
    global override_llm_format
    override_llm_format = None
    progress(0, desc="Loading model...")
    value = download_llm_model(filename)
    progress(1, desc="Model loaded.")
    md = get_dolphin_model_info(filename)
    return gr.update(value=value, choices=get_dolphin_models()), gr.update(value=get_dolphin_model_format(value)), gr.update(value=md)


def select_dolphin_format(format_name):
    global override_llm_format
    override_llm_format = llm_formats[format_name]
    return gr.update(value=format_name)


#download_llm_models()
download_llm_model(default_llm_model_filename)


def get_dolphin_models():
    return update_llm_model_tupled_list()


def get_llm_formats():
    return list(llm_formats.keys())


def get_key_from_value(d, val):
    keys = [k for k, v in d.items() if v == val]
    if keys:
        return keys[0]
    return None


def get_dolphin_model_format(filename):
    if not filename in llm_models.keys(): filename = default_llm_model_filename
    format = llm_models[filename][1]
    format_name = get_key_from_value(llm_formats, format)
    return format_name


def add_dolphin_models(query, format_name):
    import re
    from huggingface_hub import HfApi
    global llm_models
    api = HfApi()
    add_models = {}
    format = llm_formats[format_name]
    filename = ""
    repo = ""
    try:
        s = list(re.findall(r'^(?:https?://huggingface.co/)?(.+?/.+?)(?:/.*/(.+?.gguf).*?)?$', query)[0])
        if s and  "" in s: s.remove("")
        if len(s) == 1:
            repo = s[0]
            if not api.repo_exists(repo_id = repo): return gr.update(visible=True)
            files = api.list_repo_files(repo_id = repo)
            for file in files:
                if str(file).endswith(".gguf"): add_models[filename] = [repo, format]
        elif len(s) >= 2:
            repo = s[0]
            filename = s[1]
            if not api.repo_exists(repo_id = repo) or not api.file_exists(repo_id = repo, filename = filename): return gr.update(visible=True)
            add_models[filename] = [repo, format]
        else: return gr.update(visible=True)
    except Exception:
        return gr.update(visible=True)
    print(add_models)
    llm_models = (llm_models | add_models).copy()
    return gr.update(choices=get_dolphin_models())


dolphin_output_language = "English"
dolphin_sysprompt_mode = "Default"
dolphin_system_prompt = {"Default": r'''You are a helpful AI assistant to generate messages for AI that outputs an image when I enter a message.
The message must have the following [Tags] generated in strict accordance with the following [Rules]:
```
[Tags]
- Words to describe full names of characters and names of series in which they appear AFAP.
- Words to describe names of the people there and their numbers, such as 2girls, 1boy.
- Words to describe their hair color, hairstyle, hair length, hair accessory, eye color, eye shape, facial expression, breast size, and clothing of them in detail, such as long hair.
- Words to describe their external features, ornaments and belongings (also specify colors, patterns, shapes) in detail.
- Words to describe their stance from head to toe in detail AFAP.
- Words to describe their acting, especially with sexual activity in detail AFAP.
- Words to describe their surroundings in detail.
- Words to describe background details, such as inside room, forest, starry sky.
[Rules]
- Any output should be plain text in English and don't use line breaks.
- Output only composed of Tags in 1 line, separated by commas with spaces between Tags, in lower case English.
- Output should be in the format: "//GENBEGIN//://1girl, Tag, Tag, ..., Tag//://GENEND//".
- Preferably refer to and describe the information obtained from Danbooru. If not, describe it in own way.
- It's preferable that each Tag is a plain phrase, word, caption, Danbooru tag, or E621 tag.
- Convert any nicknames to full names first AFAP.
- If a sexual theme is given, priority should be given to specific and rich descriptions of sexual activity, especially about genitals, fluids.
- Assemble a short story internally which is developed from the themes provided, then describe a scene into an detailed English sentences based on the central character internally.
- Split sentences into short phrases or words, and then convert them to Tags.
- Use associated Danbooru tags, E621 tags.
- Same Tags should be used only once per output.
- Anyway, keep processing until you've finished outputting message.
```
Based on these Rules, please tell me message within 40 Tags that can generate an image for the following themes:
''',
"With dialogue and description": r'''You are a helpful AI assistant to generate messages for AI that outputs an image when I enter a message.
The message must have the following [Tags] generated in strict accordance with the following [Rules]:
```
[Tags]
- Words to describe full names of characters and names of series in which they appear AFAP.
- Words to describe names of the people there and their numbers, such as 2girls, 1boy.
- Words to describe their hair color, hairstyle, hair length, hair accessory, eye color, eye shape, facial expression, breast size, and clothing of them in detail, such as long hair.
- Words to describe their external features, ornaments and belongings (also specify colors, patterns, shapes) in detail.
- Words to describe their stance from head to toe in detail AFAP.
- Words to describe their acting, especially with sexual activity in detail AFAP.
- Words to describe their surroundings in detail.
- Words to describe background details, such as inside room, forest, starry sky.
[Rules]
- Any Tags should be plain text in English and don't use line breaks.
- Message is only composed of Tags in 1 line, separated by commas with spaces between Tags, in lower case English.
- Message should be in the format: "//GENBEGIN//://1girl, Tag, Tag, ..., Tag//://GENEND//".
- Preferably refer to and describe the information obtained from Danbooru. If not, describe it in own way.
- It's preferable that each Tag is a plain phrase, word, caption, Danbooru tag, or E621 tag.
- Convert any nicknames to full names first AFAP.
- If a sexual theme is given, priority should be given to specific and rich descriptions of sexual activity, especially about genitals, fluids.
- Assemble a short story internally which is developed from the themes provided, then describe a scene into an detailed English sentences based on the central character internally.
- Split sentences into short phrases or words, and then convert them to Tags.
- Use associated Danbooru tags, E621 tags.
- Same Tags should be used only once per output.
- Anyway, keep processing until you've finished outputting message.
```
Based on these Rules, please tell me message within 40 Tags that can generate an image for the following themes,
 then write the character's long actor's line composed of one's voices and moaning and voices in thought, based on the story you have assembled, in <LANGUAGE> only,
 enclosed in //VOICEBEGIN//:// and //://VOICEEND//, then describe the message you've generated in short, in <LANGUAGE> only.:
''', "Chat with LLM": r"You are a helpful AI assistant. Respond in <LANGUAGE>."}


def get_dolphin_sysprompt():
    import re
    prompt = re.sub('<LANGUAGE>', dolphin_output_language, dolphin_system_prompt.get(dolphin_sysprompt_mode, ""))
    return prompt


def get_dolphin_sysprompt_mode():
    return list(dolphin_system_prompt.keys())


def select_dolphin_sysprompt(key: str):
    global dolphin_sysprompt_mode
    if not key in dolphin_system_prompt.keys():
        dolphin_sysprompt_mode = "Default"
    else:
        dolphin_sysprompt_mode = key
    return gr.update(value=get_dolphin_sysprompt())


def get_dolphin_languages():
    return llm_languages


def select_dolphin_language(lang: str):
    global dolphin_output_language
    dolphin_output_language = lang
    return gr.update(value=get_dolphin_sysprompt())


@spaces.GPU
def dolphin_respond(
    message: str,
    history: list[tuple[str, str]],
    model: str = default_llm_model_filename,
    system_message: str = get_dolphin_sysprompt(),
    max_tokens: int = 1024,
    temperature: float = 0.7,
    top_p: float = 0.95,
    top_k: int = 40,
    repeat_penalty: float = 1.1,
    progress=gr.Progress(track_tqdm=True),
):
    from pathlib import Path
    progress(0, desc="Processing...")

    if override_llm_format:
        chat_template = override_llm_format
    else:
        chat_template = llm_models[model][1]

    llm = Llama(
        model_path=str(Path(f"{llm_models_dir}/{model}")),
        flash_attn=True,
        n_gpu_layers=81,
        n_batch=1024,
        n_ctx=8192,
    )
    provider = LlamaCppPythonProvider(llm)

    agent = LlamaCppAgent(
        provider,
        system_prompt=f"{system_message}",
        predefined_messages_formatter_type=chat_template,
        debug_output=False
    )
    
    settings = provider.get_provider_default_settings()
    settings.temperature = temperature
    settings.top_k = top_k
    settings.top_p = top_p
    settings.max_tokens = max_tokens
    settings.repeat_penalty = repeat_penalty
    settings.stream = True

    messages = BasicChatHistory()

    for msn in history:
        user = {
            'role': Roles.user,
            'content': msn[0]
        }
        assistant = {
            'role': Roles.assistant,
            'content': msn[1]
        }
        messages.add_message(user)
        messages.add_message(assistant)
    
    stream = agent.get_chat_response(
        message,
        llm_sampling_settings=settings,
        chat_history=messages,
        returns_streaming_generator=True,
        print_output=False
    )
    
    progress(0.5, desc="Processing...")

    outputs = ""
    for output in stream:
        outputs += output
        yield [(outputs, None)]


def dolphin_parse(
    history: list[tuple[str, str]],
):
    import re
    if not history or len(history) < 1: return ""
    try:
        msg = history[-1][0]
    except Exception:
        return ""
    m = re.findall(r'/GENBEGIN/((?:.|\s)+?)/GENEND/', msg)
    raw_prompt = re.sub(r'[*/:_"#]|\n', ' ', ", ".join(m)).lower() if m else ""
    prompts = list_uniq(to_list(raw_prompt) + ["nsfw", "explicit"])
    return ", ".join(prompts), gr.update(interactive=True), gr.update(interactive=True)


@spaces.GPU
def dolphin_respond_auto(
    message: str,
    history: list[tuple[str, str]],
    model: str = default_llm_model_filename,
    system_message: str = get_dolphin_sysprompt(),
    max_tokens: int = 1024,
    temperature: float = 0.7,
    top_p: float = 0.95,
    top_k: int = 40,
    repeat_penalty: float = 1.1,
    progress=gr.Progress(track_tqdm=True),
):
    #if not is_japanese(message): return [(None, None)]

    from pathlib import Path
    progress(0, desc="Processing...")

    if override_llm_format:
        chat_template = override_llm_format
    else:
        chat_template = llm_models[model][1]

    llm = Llama(
        model_path=str(Path(f"{llm_models_dir}/{model}")),
        flash_attn=True,
        n_gpu_layers=81,
        n_batch=1024,
        n_ctx=8192,
    )
    provider = LlamaCppPythonProvider(llm)

    agent = LlamaCppAgent(
        provider,
        system_prompt=f"{system_message}",
        predefined_messages_formatter_type=chat_template,
        debug_output=False
    )
    
    settings = provider.get_provider_default_settings()
    settings.temperature = temperature
    settings.top_k = top_k
    settings.top_p = top_p
    settings.max_tokens = max_tokens
    settings.repeat_penalty = repeat_penalty
    settings.stream = True

    messages = BasicChatHistory()

    for msn in history:
        user = {
            'role': Roles.user,
            'content': msn[0]
        }
        assistant = {
            'role': Roles.assistant,
            'content': msn[1]
        }
        messages.add_message(user)
        messages.add_message(assistant)
    
    progress(0, desc="Translating...")
    stream = agent.get_chat_response(
        message,
        llm_sampling_settings=settings,
        chat_history=messages,
        returns_streaming_generator=True,
        print_output=False
    )

    progress(0.5, desc="Processing...")
    
    outputs = ""
    for output in stream:
        outputs += output
        yield [(outputs, None)]


def dolphin_parse_simple(
    message: str,
    history: list[tuple[str, str]],
):
    import re
    #if not is_japanese(message) or not history or len(history) < 1: return message
    if not history or len(history) < 1: return message
    try:
        msg = history[-1][0]
    except Exception:
        return ""
    m = re.findall(r'/GENBEGIN/((?:.|\s)+?)/GENEND/', msg)
    raw_prompt = re.sub(r'[*/:_"#]|\n', ' ', ", ".join(m)).lower() if m else ""
    prompts = list_uniq(to_list(raw_prompt) + ["nsfw", "explicit"])
    return ", ".join(prompts)