import gradio as gr import spaces from llama_cpp import Llama from llama_cpp_agent import LlamaCppAgent, MessagesFormatterType from llama_cpp_agent.providers import LlamaCppPythonProvider from llama_cpp_agent.chat_history import BasicChatHistory from llama_cpp_agent.chat_history.messages import Roles llm_models_dir = "./llm_models" llm_models = { "SwallowMaid-8B-L3-SPPO-abliterated.i1-Q5_K_M.gguf": ["mradermacher/SwallowMaid-8B-L3-SPPO-abliterated-i1-GGUF", MessagesFormatterType.LLAMA_3], "Tiger-Gemma-9B-v1-Q4_K_M.gguf": ["bartowski/Tiger-Gemma-9B-v1-GGUF", MessagesFormatterType.LLAMA_3], "TooManyMixRolePlay-7B-Story_V3.5.Q4_K_M.gguf": ["mradermacher/TooManyMixRolePlay-7B-Story_V3.5-GGUF", MessagesFormatterType.LLAMA_3], "natsumura-llama3-v1.1-8b.Q4_K_M.gguf": ["mradermacher/natsumura-llama3-v1.1-8b-GGUF", MessagesFormatterType.LLAMA_3], "natsumura-llama3-v1-8b.i1-Q4_K_M.gguf": ["mradermacher/natsumura-llama3-v1-8b-i1-GGUF", MessagesFormatterType.LLAMA_3], "nephra_v1.0.Q5_K_M.gguf": ["PrunaAI/yodayo-ai-nephra_v1.0-GGUF-smashed", MessagesFormatterType.LLAMA_3], "DPO-ONLY-Zephyr-7B.Q6_K.gguf": ["mradermacher/DPO-ONLY-Zephyr-7B-GGUF", MessagesFormatterType.LLAMA_3], "L3-Deluxe-Scrambled-Eggs-On-Toast-8B.Q8_0.gguf": ["mradermacher/L3-Deluxe-Scrambled-Eggs-On-Toast-8B-GGUF", MessagesFormatterType.LLAMA_3], "L3-Scrambled-Eggs-On-Toast-8B.i1-Q6_K.gguf": ["mradermacher/L3-Scrambled-Eggs-On-Toast-8B-i1-GGUF", MessagesFormatterType.LLAMA_3], "llama-3-Nephilim-v2.1-8B.Q5_K_M.gguf": ["grimjim/llama-3-Nephilim-v2.1-8B-GGUF", MessagesFormatterType.LLAMA_3], "Llama-3-uncensored-Dare-1.Q4_K_M.gguf": ["mradermacher/Llama-3-uncensored-Dare-1-GGUF", MessagesFormatterType.LLAMA_3], "llama3-8B-DarkIdol-2.2-Uncensored-1048K.i1-Q6_K.gguf": ["mradermacher/llama3-8B-DarkIdol-2.2-Uncensored-1048K-i1-GGUF", MessagesFormatterType.LLAMA_3], "llama3-8B-DarkIdol-2.2-Uncensored-1048K.Q8_0.gguf": ["mradermacher/llama3-8B-DarkIdol-2.2-Uncensored-1048K-GGUF", MessagesFormatterType.LLAMA_3], "dolphin-2.9.3-mistral-7b-32k-q4_k_m.gguf": ["huggingkot/dolphin-2.9.3-mistral-7B-32k-Q4_K_M-GGUF", MessagesFormatterType.MISTRAL], "dolphin-2.9.3-mistral-7B-32k-Q5_K_M.gguf": ["bartowski/dolphin-2.9.3-mistral-7B-32k-GGUF", MessagesFormatterType.MISTRAL], "Lexi-Llama-3-8B-Uncensored_Q5_K_M.gguf": ["Orenguteng/Llama-3-8B-Lexi-Uncensored-GGUF", MessagesFormatterType.LLAMA_3], "Llama3-Sophie.Q8_0.gguf": ["mradermacher/Llama3-Sophie-GGUF", MessagesFormatterType.LLAMA_3], "Aura-Uncensored-OAS-8B-L3.i1-Q4_K_M.gguf": ["mradermacher/Aura-Uncensored-OAS-8B-L3-i1-GGUF", MessagesFormatterType.LLAMA_3], "L3-Uncen-Merger-Omelette-RP-v0.2-8B-Q5_K_S-imat.gguf": ["LWDCLS/L3-Uncen-Merger-Omelette-RP-v0.2-8B-GGUF-IQ-Imatrix-Request", MessagesFormatterType.LLAMA_3], "qwen2-diffusion-prompter-v01-q6_k.gguf": ["trollek/Qwen2-0.5B-DiffusionPrompter-v0.1-GGUF", MessagesFormatterType.LLAMA_3], "Smegmma-Deluxe-9B-v1-Q6_K.gguf": ["bartowski/Smegmma-Deluxe-9B-v1-GGUF", MessagesFormatterType.MISTRAL], "Mahou-1.3c-mistral-7B.i1-Q6_K.gguf": ["mradermacher/Mahou-1.3c-mistral-7B-i1-GGUF", MessagesFormatterType.MISTRAL], "Silicon-Maid-7B-Q8_0_X.gguf": ["duyntnet/Silicon-Maid-7B-imatrix-GGUF", MessagesFormatterType.ALPACA], "l3-umbral-mind-rp-v3.0-8b-q5_k_m-imat.gguf": ["Casual-Autopsy/L3-Umbral-Mind-RP-v3.0-8B-Q5_K_M-GGUF", MessagesFormatterType.LLAMA_3], "EZO-Common-9B-gemma-2-it.i1-Q4_K_M.gguf": ["mradermacher/EZO-Common-9B-gemma-2-it-i1-GGUF", MessagesFormatterType.MISTRAL], } llm_formats = { "MISTRAL": MessagesFormatterType.MISTRAL, "CHATML": MessagesFormatterType.CHATML, "VICUNA": MessagesFormatterType.VICUNA, "LLAMA 2": MessagesFormatterType.LLAMA_2, "SYNTHIA": MessagesFormatterType.SYNTHIA, "NEURAL CHAT": MessagesFormatterType.NEURAL_CHAT, "SOLAR": MessagesFormatterType.SOLAR, "OPEN CHAT": MessagesFormatterType.OPEN_CHAT, "ALPACA": MessagesFormatterType.ALPACA, "CODE DS": MessagesFormatterType.CODE_DS, "B22": MessagesFormatterType.B22, "LLAMA 3": MessagesFormatterType.LLAMA_3, "PHI 3": MessagesFormatterType.PHI_3, } # https://github.com/Maximilian-Winter/llama-cpp-agent llm_languages = ["English", "Japanese", "Chinese"] llm_models_tupled_list = [] default_llm_model_filename = list(llm_models.keys())[0] override_llm_format = None def to_list(s): return [x.strip() for x in s.split(",") if not s == ""] def list_uniq(l): return sorted(set(l), key=l.index) def is_japanese(s): import unicodedata for ch in s: name = unicodedata.name(ch, "") if "CJK UNIFIED" in name or "HIRAGANA" in name or "KATAKANA" in name: return True return False def update_llm_model_tupled_list(): from pathlib import Path global llm_models_tupled_list llm_models_tupled_list = [] for k, v in llm_models.items(): name = k value = k llm_models_tupled_list.append((name, value)) model_files = Path(llm_models_dir).glob('*.gguf') for path in model_files: name = path.name value = path.name llm_models_tupled_list.append((name, value)) llm_models_tupled_list = list_uniq(llm_models_tupled_list) return llm_models_tupled_list def download_llm_models(): from huggingface_hub import hf_hub_download global llm_models_tupled_list llm_models_tupled_list = [] for k, v in llm_models.items(): try: hf_hub_download(repo_id = v[0], filename = k, local_dir = llm_models_dir) except Exception: continue name = k value = k llm_models_tupled_list.append((name, value)) def download_llm_model(filename): from huggingface_hub import hf_hub_download if not filename in llm_models.keys(): return default_llm_model_filename try: hf_hub_download(repo_id = llm_models[filename][0], filename = filename, local_dir = llm_models_dir) except Exception: return default_llm_model_filename update_llm_model_tupled_list() return filename def get_dolphin_model_info(filename): md = "None" items = llm_models.get(filename, None) if items: md = f'Repo: [{items[0]}](https://huggingface.co/{items[0]})' return md def select_dolphin_model(filename, progress=gr.Progress(track_tqdm=True)): global override_llm_format override_llm_format = None progress(0, desc="Loading model...") value = download_llm_model(filename) progress(1, desc="Model loaded.") md = get_dolphin_model_info(filename) return gr.update(value=value, choices=get_dolphin_models()), gr.update(value=get_dolphin_model_format(value)), gr.update(value=md) def select_dolphin_format(format_name): global override_llm_format override_llm_format = llm_formats[format_name] return gr.update(value=format_name) #download_llm_models() download_llm_model(default_llm_model_filename) def get_dolphin_models(): return update_llm_model_tupled_list() def get_llm_formats(): return list(llm_formats.keys()) def get_key_from_value(d, val): keys = [k for k, v in d.items() if v == val] if keys: return keys[0] return None def get_dolphin_model_format(filename): if not filename in llm_models.keys(): filename = default_llm_model_filename format = llm_models[filename][1] format_name = get_key_from_value(llm_formats, format) return format_name def add_dolphin_models(query, format_name): import re from huggingface_hub import HfApi global llm_models api = HfApi() add_models = {} format = llm_formats[format_name] filename = "" repo = "" try: s = list(re.findall(r'^(?:https?://huggingface.co/)?(.+?/.+?)(?:/.*/(.+?.gguf).*?)?$', query)[0]) if s and "" in s: s.remove("") if len(s) == 1: repo = s[0] if not api.repo_exists(repo_id = repo): return gr.update(visible=True) files = api.list_repo_files(repo_id = repo) for file in files: if str(file).endswith(".gguf"): add_models[filename] = [repo, format] elif len(s) >= 2: repo = s[0] filename = s[1] if not api.repo_exists(repo_id = repo) or not api.file_exists(repo_id = repo, filename = filename): return gr.update(visible=True) add_models[filename] = [repo, format] else: return gr.update(visible=True) except Exception: return gr.update(visible=True) print(add_models) llm_models = (llm_models | add_models).copy() return gr.update(choices=get_dolphin_models()) dolphin_output_language = "English" dolphin_sysprompt_mode = "Default" dolphin_system_prompt = {"Default": r'''You are a helpful AI assistant to generate messages for AI that outputs an image when I enter a message. The message must have the following [Tags] generated in strict accordance with the following [Rules]: ``` [Tags] - Words to describe full names of characters and names of series in which they appear AFAP. - Words to describe names of the people there and their numbers, such as 2girls, 1boy. - Words to describe their hair color, hairstyle, hair length, hair accessory, eye color, eye shape, facial expression, breast size, and clothing of them in detail, such as long hair. - Words to describe their external features, ornaments and belongings (also specify colors, patterns, shapes) in detail. - Words to describe their stance from head to toe in detail AFAP. - Words to describe their acting, especially with sexual activity in detail AFAP. - Words to describe their surroundings in detail. - Words to describe background details, such as inside room, forest, starry sky. [Rules] - Any output should be plain text in English and don't use line breaks. - Output only composed of Tags in 1 line, separated by commas with spaces between Tags, in lower case English. - Output should be in the format: "//GENBEGIN//://1girl, Tag, Tag, ..., Tag//://GENEND//". - Preferably refer to and describe the information obtained from Danbooru. If not, describe it in own way. - It's preferable that each Tag is a plain phrase, word, caption, Danbooru tag, or E621 tag. - Convert any nicknames to full names first AFAP. - If a sexual theme is given, priority should be given to specific and rich descriptions of sexual activity, especially about genitals, fluids. - Assemble a short story internally which is developed from the themes provided, then describe a scene into an detailed English sentences based on the central character internally. - Split sentences into short phrases or words, and then convert them to Tags. - Use associated Danbooru tags, E621 tags. - Same Tags should be used only once per output. - Anyway, keep processing until you've finished outputting message. ``` Based on these Rules, please tell me message within 40 Tags that can generate an image for the following themes: ''', "With dialogue and description": r'''You are a helpful AI assistant to generate messages for AI that outputs an image when I enter a message. The message must have the following [Tags] generated in strict accordance with the following [Rules]: ``` [Tags] - Words to describe full names of characters and names of series in which they appear AFAP. - Words to describe names of the people there and their numbers, such as 2girls, 1boy. - Words to describe their hair color, hairstyle, hair length, hair accessory, eye color, eye shape, facial expression, breast size, and clothing of them in detail, such as long hair. - Words to describe their external features, ornaments and belongings (also specify colors, patterns, shapes) in detail. - Words to describe their stance from head to toe in detail AFAP. - Words to describe their acting, especially with sexual activity in detail AFAP. - Words to describe their surroundings in detail. - Words to describe background details, such as inside room, forest, starry sky. [Rules] - Any Tags should be plain text in English and don't use line breaks. - Message is only composed of Tags in 1 line, separated by commas with spaces between Tags, in lower case English. - Message should be in the format: "//GENBEGIN//://1girl, Tag, Tag, ..., Tag//://GENEND//". - Preferably refer to and describe the information obtained from Danbooru. If not, describe it in own way. - It's preferable that each Tag is a plain phrase, word, caption, Danbooru tag, or E621 tag. - Convert any nicknames to full names first AFAP. - If a sexual theme is given, priority should be given to specific and rich descriptions of sexual activity, especially about genitals, fluids. - Assemble a short story internally which is developed from the themes provided, then describe a scene into an detailed English sentences based on the central character internally. - Split sentences into short phrases or words, and then convert them to Tags. - Use associated Danbooru tags, E621 tags. - Same Tags should be used only once per output. - Anyway, keep processing until you've finished outputting message. ``` Based on these Rules, please tell me message within 40 Tags that can generate an image for the following themes, then write the character's long actor's line composed of one's voices and moaning and voices in thought, based on the story you have assembled, in only, enclosed in //VOICEBEGIN//:// and //://VOICEEND//, then describe the message you've generated in short, in only.: ''', "Chat with LLM": r"You are a helpful AI assistant. Respond in ."} def get_dolphin_sysprompt(): import re prompt = re.sub('', dolphin_output_language, dolphin_system_prompt.get(dolphin_sysprompt_mode, "")) return prompt def get_dolphin_sysprompt_mode(): return list(dolphin_system_prompt.keys()) def select_dolphin_sysprompt(key: str): global dolphin_sysprompt_mode if not key in dolphin_system_prompt.keys(): dolphin_sysprompt_mode = "Default" else: dolphin_sysprompt_mode = key return gr.update(value=get_dolphin_sysprompt()) def get_dolphin_languages(): return llm_languages def select_dolphin_language(lang: str): global dolphin_output_language dolphin_output_language = lang return gr.update(value=get_dolphin_sysprompt()) @spaces.GPU def dolphin_respond( message: str, history: list[tuple[str, str]], model: str = default_llm_model_filename, system_message: str = get_dolphin_sysprompt(), max_tokens: int = 1024, temperature: float = 0.7, top_p: float = 0.95, top_k: int = 40, repeat_penalty: float = 1.1, progress=gr.Progress(track_tqdm=True), ): from pathlib import Path progress(0, desc="Processing...") if override_llm_format: chat_template = override_llm_format else: chat_template = llm_models[model][1] llm = Llama( model_path=str(Path(f"{llm_models_dir}/{model}")), flash_attn=True, n_gpu_layers=81, n_batch=1024, n_ctx=8192, ) provider = LlamaCppPythonProvider(llm) agent = LlamaCppAgent( provider, system_prompt=f"{system_message}", predefined_messages_formatter_type=chat_template, debug_output=False ) settings = provider.get_provider_default_settings() settings.temperature = temperature settings.top_k = top_k settings.top_p = top_p settings.max_tokens = max_tokens settings.repeat_penalty = repeat_penalty settings.stream = True messages = BasicChatHistory() for msn in history: user = { 'role': Roles.user, 'content': msn[0] } assistant = { 'role': Roles.assistant, 'content': msn[1] } messages.add_message(user) messages.add_message(assistant) stream = agent.get_chat_response( message, llm_sampling_settings=settings, chat_history=messages, returns_streaming_generator=True, print_output=False ) progress(0.5, desc="Processing...") outputs = "" for output in stream: outputs += output yield [(outputs, None)] def dolphin_parse( history: list[tuple[str, str]], ): import re if not history or len(history) < 1: return "" try: msg = history[-1][0] except Exception: return "" m = re.findall(r'/GENBEGIN/((?:.|\s)+?)/GENEND/', msg) raw_prompt = re.sub(r'[*/:_"#]|\n', ' ', ", ".join(m)).lower() if m else "" prompts = list_uniq(to_list(raw_prompt) + ["nsfw", "explicit"]) return ", ".join(prompts), gr.update(interactive=True), gr.update(interactive=True) @spaces.GPU def dolphin_respond_auto( message: str, history: list[tuple[str, str]], model: str = default_llm_model_filename, system_message: str = get_dolphin_sysprompt(), max_tokens: int = 1024, temperature: float = 0.7, top_p: float = 0.95, top_k: int = 40, repeat_penalty: float = 1.1, progress=gr.Progress(track_tqdm=True), ): #if not is_japanese(message): return [(None, None)] from pathlib import Path progress(0, desc="Processing...") if override_llm_format: chat_template = override_llm_format else: chat_template = llm_models[model][1] llm = Llama( model_path=str(Path(f"{llm_models_dir}/{model}")), flash_attn=True, n_gpu_layers=81, n_batch=1024, n_ctx=8192, ) provider = LlamaCppPythonProvider(llm) agent = LlamaCppAgent( provider, system_prompt=f"{system_message}", predefined_messages_formatter_type=chat_template, debug_output=False ) settings = provider.get_provider_default_settings() settings.temperature = temperature settings.top_k = top_k settings.top_p = top_p settings.max_tokens = max_tokens settings.repeat_penalty = repeat_penalty settings.stream = True messages = BasicChatHistory() for msn in history: user = { 'role': Roles.user, 'content': msn[0] } assistant = { 'role': Roles.assistant, 'content': msn[1] } messages.add_message(user) messages.add_message(assistant) progress(0, desc="Translating...") stream = agent.get_chat_response( message, llm_sampling_settings=settings, chat_history=messages, returns_streaming_generator=True, print_output=False ) progress(0.5, desc="Processing...") outputs = "" for output in stream: outputs += output yield [(outputs, None)] def dolphin_parse_simple( message: str, history: list[tuple[str, str]], ): import re #if not is_japanese(message) or not history or len(history) < 1: return message if not history or len(history) < 1: return message try: msg = history[-1][0] except Exception: return "" m = re.findall(r'/GENBEGIN/((?:.|\s)+?)/GENEND/', msg) raw_prompt = re.sub(r'[*/:_"#]|\n', ' ', ", ".join(m)).lower() if m else "" prompts = list_uniq(to_list(raw_prompt) + ["nsfw", "explicit"]) return ", ".join(prompts)