import ast import time # also supports imports from this file from other files from enums import PromptType, gpt_token_mapping, \ anthropic_mapping, google_mapping, mistralai_mapping non_hf_types = ['gpt4all_llama', 'llama', 'gptj'] prompt_type_to_model_name = { 'plain': [ 'EleutherAI/gpt-j-6B', 'EleutherAI/pythia-6.9b', 'EleutherAI/pythia-12b', 'EleutherAI/pythia-12b-deduped', 'EleutherAI/gpt-neox-20b', 'openlm-research/open_llama_7b_700bt_preview', 'decapoda-research/llama-7b-hf', 'decapoda-research/llama-13b-hf', 'decapoda-research/llama-30b-hf', 'decapoda-research/llama-65b-hf', 'facebook/mbart-large-50-many-to-many-mmt', 'philschmid/bart-large-cnn-samsum', 'philschmid/flan-t5-base-samsum', 'gpt2', 'distilgpt2', 'mosaicml/mpt-7b-storywriter', 'tiiuae/falcon-7b', 'tiiuae/falcon-40b', 'tiiuae/falcon-180B', 'meta-llama/Llama-2-7b', 'meta-llama/Llama-2-13b', 'meta-llama/Llama-2-70b', 'h2oai/h2ogpt-4096-llama2-7b', 'h2oai/h2ogpt-4096-llama2-13b', 'h2oai/h2ogpt-4096-llama2-70b', 'h2oai/h2ogpt-16k-codellama-7b', 'h2oai/h2ogpt-16k-codellama-13b', 'h2oai/h2ogpt-16k-codellama-34b', 'h2oai/h2ogpt-16k-codellama-7b-python', 'h2oai/h2ogpt-16k-codellama-13b-python', 'h2oai/h2ogpt-16k-codellama-34b-python', 'h2oai/h2ogpt-32k-codellama-34b-python', 'mistralai/Mistral-7B-v0.1', 'mistralai/Mixtral-8x7B-v0.1', ], 'gptj': ['gptj', 'gpt4all_llama'], 'prompt_answer': [ 'h2oai/h2ogpt-gm-oasst1-en-1024-20b', 'h2oai/h2ogpt-gm-oasst1-en-1024-12b', 'h2oai/h2ogpt-gm-oasst1-multilang-1024-20b', 'h2oai/h2ogpt-gm-oasst1-multilang-2048-falcon-7b', 'h2oai/h2ogpt-gm-oasst1-multilang-2048-falcon-7b-v2', 'h2oai/h2ogpt-gm-oasst1-en-2048-falcon-7b-v3', 'h2oai/h2ogpt-gm-oasst1-en-2048-falcon-7b', 'h2oai/h2ogpt-gm-oasst1-en-2048-falcon-7b-v2', 'h2oai/h2ogpt-gm-oasst1-en-2048-falcon-40b-v1', 'h2oai/h2ogpt-gm-oasst1-en-2048-falcon-40b-v2', 'h2oai/h2ogpt-gm-oasst1-en-xgen-7b-8k', 'h2oai/h2ogpt-gm-oasst1-multilang-xgen-7b-8k', 'TheBloke/h2ogpt-gm-oasst1-en-2048-falcon-40b-v2-GPTQ', ], 'prompt_answer_openllama': [ 'h2oai/h2ogpt-gm-oasst1-en-2048-open-llama-7b-preview-300bt', 'h2oai/h2ogpt-gm-oasst1-en-2048-open-llama-7b-preview-300bt-v2', 'h2oai/h2ogpt-gm-oasst1-en-2048-open-llama-7b-preview-700bt', 'h2oai/h2ogpt-gm-oasst1-en-2048-open-llama-7b', 'h2oai/h2ogpt-gm-oasst1-en-2048-open-llama-13b', ], 'instruct': ['TheBloke/llama-30b-supercot-SuperHOT-8K-fp16', 'TheBloke/Nous-Hermes-13B-GPTQ'], # https://huggingface.co/TheBloke/llama-30b-supercot-SuperHOT-8K-fp16#prompting 'instruct_with_end': ['databricks/dolly-v2-12b'], 'quality': [], 'human_bot': [ 'h2oai/h2ogpt-oasst1-512-12b', 'h2oai/h2ogpt-oasst1-512-20b', 'h2oai/h2ogpt-oig-oasst1-256-6_9b', 'h2oai/h2ogpt-oig-oasst1-512-6_9b', 'h2oai/h2ogpt-oig-oasst1-256-6.9b', # legacy 'h2oai/h2ogpt-oig-oasst1-512-6.9b', # legacy 'h2oai/h2ogpt-research-oasst1-512-30b', 'h2oai/h2ogpt-research-oasst1-llama-65b', 'h2oai/h2ogpt-oasst1-falcon-40b', 'h2oai/h2ogpt-oig-oasst1-falcon-40b', 'llmware/dragon-mistral-7b-v0', # https://huggingface.co/llmware/dragon-mistral-7b-v0 ], 'dai_faq': [], 'summarize': [], 'simple_instruct': ['t5-small', 't5-large', 'google/flan-t5', 'google/flan-t5-xxl', 'google/flan-ul2'], 'instruct_vicuna': ['AlekseyKorshuk/vicuna-7b', 'TheBloke/stable-vicuna-13B-HF', 'junelee/wizard-vicuna-13b'], 'human_bot_orig': ['togethercomputer/GPT-NeoXT-Chat-Base-20B'], "open_assistant": ['OpenAssistant/oasst-sft-7-llama-30b-xor', 'oasst-sft-7-llama-30b'], "wizard_lm": ['ehartford/WizardLM-7B-Uncensored', 'ehartford/WizardLM-13B-Uncensored'], "wizard_mega": ['openaccess-ai-collective/wizard-mega-13b'], "instruct_simple": ['JosephusCheung/Guanaco'], "wizard_vicuna": ['ehartford/Wizard-Vicuna-13B-Uncensored'], # "wizard2": [], "mptinstruct": ['mosaicml/mpt-30b-instruct', 'mosaicml/mpt-7b-instruct', 'mosaicml/mpt-30b-instruct'], "mptchat": ['mosaicml/mpt-7b-chat', 'mosaicml/mpt-30b-chat', 'TheBloke/mpt-30B-chat-GGML'], "orca2": ['TheBloke/Orca-2-13B-GGUF', 'microsoft/Orca-2-13b'], "vicuna11": ['lmsys/vicuna-33b-v1.3', 'lmsys/vicuna-7b-v1.5', 'lmsys/vicuna-13b-v1.5', # https://huggingface.co/lmsys/vicuna-13b-v1.5/discussions/6/files 'NousResearch/Nous-Capybara-34B', ], "vicuna11nosys": ['lmsys/vicuna-13b-v1.5-16k', # system prompt doesn't work, no evidence was trained with it from model card. ], "one_shot": ['lmsys/fastchat-t5-3b-v1.0'], "falcon": ['tiiuae/falcon-40b-instruct', 'tiiuae/falcon-7b-instruct'], "llama2": [ 'meta-llama/Llama-2-7b-chat-hf', 'meta-llama/Llama-2-13b-chat-hf', 'meta-llama/Llama-2-34b-chat-hf', 'meta-llama/Llama-2-70b-chat-hf', 'h2oai/h2ogpt-oasst1-4096-llama2-7b', 'h2oai/h2ogpt-oasst1-4096-llama2-13b', 'h2oai/h2ogpt-oasst1-4096-llama2-70b', # 'llama', # No longer go to llama2 prompt for any llama model, too many not llama2 and auto-detection is confusing then 'TheBloke/Llama-2-7b-Chat-GPTQ', 'TheBloke/Llama-2-7b-chat-fp16', 'TheBloke/Llama-2-13b-chat-fp16', 'TheBloke/Llama-2-70b-chat-fp16', 'h2oai/h2ogpt-4096-llama2-7b-chat', 'h2oai/h2ogpt-4096-llama2-13b-chat', 'h2oai/h2ogpt-4096-llama2-70b-chat', 'h2oai/h2ogpt-16k-codellama-7b-instruct', 'h2oai/h2ogpt-16k-codellama-13b-instruct', 'h2oai/h2ogpt-16k-codellama-34b-instruct', 'h2oai/h2ogpt-32k-codellama-34b-instruct', 'TheBloke/Llama-2-70B-chat-AWQ', 'h2oai/h2ogpt-4096-llama2-70b-chat-4bit', 'TheBloke/Llama-2-70B-chat-AWQ', 'TheBloke/Llama-2-13B-chat-AWQ', 'Yukang/LongAlpaca-70B', # or can be instruct 'TheBloke/Llama-2-7B-Chat-GGUF', ], "mistral": ['mistralai/Mistral-7B-Instruct-v0.1', 'TheBloke/Mistral-7B-Instruct-v0.1-GGUF', 'mistralai/Mistral-7B-Instruct-v0.2', 'TheBloke/Mistral-7B-Instruct-v0.2-GGUF', ], "mixtral": ['mistralai/Mixtral-8x7B-Instruct-v0.1', 'TheBloke/Mixtral-8x7B-Instruct-v0.1-GGUF', 'TheBloke/Mixtral-8x7B-Instruct-v0.1-GPTQ', 'TheBloke/Mixtral-8x7B-Instruct-v0.1-AWQ'], "mixtralnosys": [], "zephyr": ['HuggingFaceH4/zephyr-7b-alpha', 'HuggingFaceH4/zephyr-7b-beta', 'TheBloke/zephyr-7B-beta-GGUF', 'TheBloke/zephyr-7B-beta-AWQ', 'zephyr-7b-beta.Q5_K_M.gguf'], "beluga": ['stabilityai/StableBeluga2', 'psmathur/orca_mini_v3_7b'], "wizard3nospace": ['WizardLM/WizardLM-13B-V1.2'], "falcon_chat": ['tiiuae/falcon-180B-chat'], "xwin": ['Xwin-LM/Xwin-LM-13B-V0.1', 'TheBloke/Xwin-LM-13B-V0.1-GPTQ', 'TheBloke/Xwin-LM-13B-v0.2-GPTQ', 'Xwin-LM/Xwin-LM-70B-V0.1'], "xwincoder": ['Xwin-LM/XwinCoder-7B', 'Xwin-LM/XwinCoder-13B', 'Xwin-LM/XwinCoder-34B'], "xwinmath": ["Xwin-LM/Xwin-Math-7B-V1.0", "Xwin-LM/Xwin-Math-70B-V1.0", "Xwin-LM/Xwin-Math-13B-V1.0"], "mistrallite": ['amazon/MistralLite'], "aquila": ['h2oai/h2ogpt-16k-aquilachat2-34b', 'BAAI/AquilaChat2-34B-16K', 'BAAI/AquilaChat2-34B-16k', 'BAAI/AquilaChat2-7B-16K'], "aquila_legacy": ['BAAI/AquilaChat2-34B'], "aquila_v1": ['BAAI/AquilaChat2-7B'], "mistralgerman": ['TheBloke/em_german_leo_mistral-GPTQ'], "deepseek_coder": ['deepseek-ai/deepseek-coder-1.3b-instruct', 'deepseek-ai/deepseek-coder-6.7b-instruct', 'deepseek-ai/deepseek-coder-33b-instruct', ], "open_chat": ['openchat/openchat_3.5', 'TheBloke/openchat_3.5-GPTQ', 'TheBloke/openchat_3.5-GGUF', 'TheBloke/openchat_3.5-AWQ', 'TheBloke/openchat_3.5-16k-AWQ', 'openchat_3.5.Q5_K_M.gguf', 'NurtureAI/openchat_3.5-16k'], "open_chat_correct": ['berkeley-nest/Starling-LM-7B-alpha', 'openchat/openchat-3.5-1210', 'openchat/openchat_3.5', 'openchat/openchat_v3.2_super', ], # can be any from open_chat list, by using this prompt "open_chat_code": [], # can be any from open_chat list, by using this prompt "open_chat_math": [], # can be any from open_chat list, by using this prompt "jais": ['core42/jais-30b-chat-v1'], "yi": ['01-ai/Yi-34B-Chat', 'TheBloke/Yi-34B-Chat-AWQ'], "docsgpt": ['Arc53/docsgpt-7b-mistral'], # could be plain, but default is correct prompt_type for default TheBloke model ggml-wizardLM-7B.q4_2.bin } anthropic_gpts = sorted(anthropic_mapping.keys()) prompt_type_to_model_name['anthropic'] = anthropic_gpts google_gpts = sorted(google_mapping.keys()) prompt_type_to_model_name['google'] = google_gpts mistralai_gpts = sorted(mistralai_mapping.keys()) prompt_type_to_model_name['mistralai'] = mistralai_gpts model_names_curated_big = ['Yukang/LongAlpaca-70B', 'lmsys/vicuna-13b-v1.5-16k', 'h2oai/h2ogpt-32k-codellama-34b-instruct'] model_names_curated = ['TheBloke/Xwin-LM-13B-V0.1-GPTQ', 'TheBloke/Llama-2-7B-Chat-GGUF', 'HuggingFaceH4/zephyr-7b-beta', 'TheBloke/zephyr-7B-beta-GGUF', 'TheBloke/zephyr-7B-beta-AWQ'] + model_names_curated_big openai_gpts = list(gpt_token_mapping.keys()) prompt_type_to_model_name.update({ "openai": ["text-davinci-003", "text-curie-001", "text-babbage-001", "text-ada-001"], "openai_chat": openai_gpts, }) model_names_curated += ['gpt-3.5-turbo'] inv_prompt_type_to_model_name = {v.strip(): k for k, l in prompt_type_to_model_name.items() for v in l} inv_prompt_type_to_model_lower = {v.strip().lower(): k for k, l in prompt_type_to_model_name.items() for v in l} prompt_types_strings = [] for p in PromptType: prompt_types_strings.extend([p.name]) prompt_types = [] for p in PromptType: prompt_types.extend([p.name, p.value, str(p.value)]) def get_prompt(prompt_type, prompt_dict, context, reduced, making_context, return_dict=False, system_prompt=None, histi=-1): prompt_dict_error = '' generates_leading_space = False can_handle_system_prompt = False if prompt_type == PromptType.custom.name and not isinstance(prompt_dict, dict): try: prompt_dict = ast.literal_eval(prompt_dict) except BaseException as e: prompt_dict_error = str(e) if prompt_dict_error: promptA = None promptB = None PreInstruct = None PreInput = '' PreResponse = '' terminate_response = None chat_sep = '' chat_turn_sep = '' humanstr = '' botstr = '' generates_leading_space = False elif prompt_type in [PromptType.custom.value, str(PromptType.custom.value), PromptType.custom.name]: promptA = prompt_dict.get('promptA', '') promptB = prompt_dict.get('promptB', '') PreInstruct = prompt_dict.get('PreInstruct', '') PreInput = prompt_dict.get('PreInput', '') PreResponse = prompt_dict.get('PreResponse', '') terminate_response = prompt_dict.get('terminate_response', None) chat_sep = prompt_dict.get('chat_sep', '\n') chat_turn_sep = prompt_dict.get('chat_turn_sep', '\n') humanstr = prompt_dict.get('humanstr', '') botstr = prompt_dict.get('botstr', '') elif prompt_type in [PromptType.plain.value, str(PromptType.plain.value), PromptType.plain.name]: promptA = promptB = PreInstruct = PreInput = PreResponse = None terminate_response = [] chat_turn_sep = chat_sep = '' # plain should have None for human/bot, so nothing truncated out, not '' that would truncate after first token humanstr = None botstr = None elif prompt_type == 'simple_instruct': promptA = promptB = PreInstruct = PreInput = PreResponse = None terminate_response = [] chat_turn_sep = chat_sep = '\n' humanstr = None botstr = None elif prompt_type in [PromptType.instruct.value, str(PromptType.instruct.value), PromptType.instruct.name] + [PromptType.instruct_with_end.value, str(PromptType.instruct_with_end.value), PromptType.instruct_with_end.name]: promptA = 'Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.\n' if not reduced else '' promptB = 'Below is an instruction that describes a task. Write a response that appropriately completes the request.\n' if not reduced else '' PreInstruct = """ ### Instruction: """ PreInput = """ ### Input: """ PreResponse = """ ### Response: """ if prompt_type in [PromptType.instruct_with_end.value, str(PromptType.instruct_with_end.value), PromptType.instruct_with_end.name]: terminate_response = ['### End'] else: terminate_response = None chat_turn_sep = chat_sep = '\n' humanstr = PreInstruct botstr = PreResponse elif prompt_type in [PromptType.quality.value, str(PromptType.quality.value), PromptType.quality.name]: promptA = 'Write a detailed high-quality, accurate, fair, Response with about 100 words by following the Instruction as applied on the Input.\n' if not reduced else '' promptB = 'Write a detailed high-quality, accurate, fair, Response with about 100 words by following the Instruction.\n' if not reduced else '' PreInstruct = """ ### Instruction: """ PreInput = """ ### Input: """ PreResponse = """ ### Response: """ terminate_response = None chat_turn_sep = chat_sep = '\n' humanstr = PreInstruct # first thing human says botstr = PreResponse # first thing bot says elif prompt_type in [PromptType.human_bot.value, str(PromptType.human_bot.value), PromptType.human_bot.name] + [PromptType.human_bot_orig.value, str(PromptType.human_bot_orig.value), PromptType.human_bot_orig.name]: human = ':' bot = ":" if reduced or context or prompt_type in [PromptType.human_bot.value, str(PromptType.human_bot.value), PromptType.human_bot.name]: preprompt = '' else: cur_date = time.strftime('%Y-%m-%d') cur_time = time.strftime('%H:%M:%S %p %Z') PRE_PROMPT = """\ Current Date: {} Current Time: {} """ preprompt = PRE_PROMPT.format(cur_date, cur_time) start = '' promptB = promptA = '%s%s' % (preprompt, start) PreInstruct = human + ' ' PreInput = None if making_context: # when making context, want it to appear as-if LLM generated, which starts with space after : PreResponse = bot + ' ' else: # normally LLM adds space after this, because was how trained. # if add space here, non-unique tokenization will often make LLM produce wrong output PreResponse = bot terminate_response = ['\n' + human, '\n' + bot, human, bot, PreResponse] chat_turn_sep = chat_sep = '\n' humanstr = human # tag before human talks botstr = bot # tag before bot talks generates_leading_space = True elif prompt_type in [PromptType.dai_faq.value, str(PromptType.dai_faq.value), PromptType.dai_faq.name]: promptA = '' promptB = 'Answer the following Driverless AI question.\n' PreInstruct = """ ### Driverless AI frequently asked question: """ PreInput = None PreResponse = """ ### Driverless AI documentation answer: """ terminate_response = ['\n\n'] chat_turn_sep = chat_sep = terminate_response humanstr = PreInstruct botstr = PreResponse elif prompt_type in [PromptType.summarize.value, str(PromptType.summarize.value), PromptType.summarize.name]: promptA = promptB = PreInput = '' PreInstruct = '## Main Text\n\n' PreResponse = '\n\n## Summary\n\n' terminate_response = None chat_turn_sep = chat_sep = '\n' humanstr = PreInstruct botstr = PreResponse elif prompt_type in [PromptType.instruct_vicuna.value, str(PromptType.instruct_vicuna.value), PromptType.instruct_vicuna.name]: can_handle_system_prompt = True if system_prompt in [None, 'None', 'auto']: system_prompt = "A chat between a curious human and an artificial intelligence assistant. " \ "The assistant gives helpful, detailed, and polite answers to the human's questions." promptA = promptB = system_prompt if not reduced else '' PreInstruct = """ ### Human: """ PreInput = None PreResponse = """ ### Assistant: """ # but only allow terminate after prompt is found correctly, else can't terminate terminate_response = ['### Human:', '### Human: ', ' ### Human:', '### Assistant:'] chat_turn_sep = chat_sep = '\n' humanstr = PreInstruct botstr = PreResponse elif prompt_type in [PromptType.prompt_answer.value, str(PromptType.prompt_answer.value), PromptType.prompt_answer.name]: preprompt = '' prompt_tokens = "<|prompt|>" answer_tokens = "<|answer|>" start = '' promptB = promptA = '%s%s' % (preprompt, start) PreInstruct = prompt_tokens PreInput = None PreResponse = answer_tokens eos = '<|endoftext|>' # neox eos humanstr = prompt_tokens botstr = answer_tokens terminate_response = [humanstr, PreResponse, eos] chat_sep = eos chat_turn_sep = eos elif prompt_type in [PromptType.prompt_answer_openllama.value, str(PromptType.prompt_answer_openllama.value), PromptType.prompt_answer_openllama.name]: preprompt = '' prompt_tokens = "<|prompt|>" answer_tokens = "<|answer|>" start = '' promptB = promptA = '%s%s' % (preprompt, start) PreInstruct = prompt_tokens PreInput = None PreResponse = answer_tokens eos = '' # llama eos humanstr = prompt_tokens botstr = answer_tokens terminate_response = [humanstr, PreResponse, eos] chat_sep = eos chat_turn_sep = eos elif prompt_type in [PromptType.open_assistant.value, str(PromptType.open_assistant.value), PromptType.open_assistant.name]: # From added_tokens.json preprompt = '' prompt_tokens = "<|prompter|>" answer_tokens = "<|assistant|>" start = '' promptB = promptA = '%s%s' % (preprompt, start) PreInstruct = prompt_tokens PreInput = None PreResponse = answer_tokens pend = "<|prefix_end|>" eos = "" humanstr = prompt_tokens botstr = answer_tokens terminate_response = [humanstr, PreResponse, pend, eos] chat_turn_sep = chat_sep = eos elif prompt_type in [PromptType.wizard_lm.value, str(PromptType.wizard_lm.value), PromptType.wizard_lm.name]: # https://github.com/ehartford/WizardLM/blob/main/src/train_freeform.py preprompt = '' start = '' promptB = promptA = '%s%s' % (preprompt, start) PreInstruct = "" PreInput = None PreResponse = "\n\n### Response\n" eos = "" terminate_response = [PreResponse, eos] chat_turn_sep = chat_sep = eos humanstr = promptA botstr = PreResponse elif prompt_type in [PromptType.wizard_mega.value, str(PromptType.wizard_mega.value), PromptType.wizard_mega.name]: preprompt = '' start = '' promptB = promptA = '%s%s' % (preprompt, start) PreInstruct = """ ### Instruction: """ PreInput = None PreResponse = """ ### Assistant: """ terminate_response = [PreResponse] chat_turn_sep = chat_sep = '\n' humanstr = PreInstruct botstr = PreResponse elif prompt_type in [PromptType.instruct_vicuna2.value, str(PromptType.instruct_vicuna2.value), PromptType.instruct_vicuna2.name]: promptA = promptB = "" if not reduced else '' PreInstruct = """ HUMAN: """ PreInput = None PreResponse = """ ASSISTANT: """ terminate_response = [ 'HUMAN:'] # but only allow terminate after prompt is found correctly, else can't terminate chat_turn_sep = chat_sep = '\n' humanstr = PreInstruct botstr = PreResponse elif prompt_type in [PromptType.instruct_vicuna3.value, str(PromptType.instruct_vicuna3.value), PromptType.instruct_vicuna3.name]: promptA = promptB = "" if not reduced else '' PreInstruct = """ ### User: """ PreInput = None PreResponse = """ ### Assistant: """ terminate_response = [ '### User:'] # but only allow terminate after prompt is found correctly, else can't terminate chat_turn_sep = chat_sep = '\n' humanstr = PreInstruct botstr = PreResponse elif prompt_type in [PromptType.wizard2.value, str(PromptType.wizard2.value), PromptType.wizard2.name]: can_handle_system_prompt = True # https://huggingface.co/TheBloke/WizardLM-7B-uncensored-GGML if system_prompt in [None, 'None', 'auto']: system_prompt = "Below is an instruction that describes a task. Write a response that appropriately completes the request." preprompt = """%s""" % system_prompt if not reduced else '' start = '' promptB = promptA = '%s%s' % (preprompt, start) PreInstruct = """ ### Instruction: """ PreInput = None PreResponse = """ ### Response: """ terminate_response = [PreResponse] chat_turn_sep = chat_sep = '\n' humanstr = PreInstruct botstr = PreResponse elif prompt_type in [PromptType.wizard3.value, str(PromptType.wizard3.value), PromptType.wizard3.name]: # https://huggingface.co/TheBloke/wizardLM-13B-1.0-GGML can_handle_system_prompt = True if system_prompt in [None, 'None', 'auto']: system_prompt = "A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions." preprompt = """%s""" % system_prompt if not reduced else '' start = '' promptB = promptA = '%s%s' % (preprompt, start) PreInstruct = """USER: """ PreInput = None PreResponse = """ASSISTANT: """ terminate_response = [PreResponse] chat_turn_sep = chat_sep = '\n' humanstr = PreInstruct botstr = PreResponse elif prompt_type in [PromptType.wizard_vicuna.value, str(PromptType.wizard_vicuna.value), PromptType.wizard_vicuna.name]: preprompt = '' start = '' promptB = promptA = '%s%s' % (preprompt, start) PreInstruct = """USER: """ PreInput = None PreResponse = """ASSISTANT: """ terminate_response = [PreResponse] chat_turn_sep = chat_sep = '\n' humanstr = PreInstruct botstr = PreResponse elif prompt_type in [PromptType.instruct_simple.value, str(PromptType.instruct_simple.value), PromptType.instruct_simple.name]: promptB = promptA = '' if not reduced else '' PreInstruct = """ ### Instruction: """ PreInput = """ ### Input: """ PreResponse = """ ### Response: """ terminate_response = None chat_turn_sep = chat_sep = '\n' humanstr = PreInstruct botstr = PreResponse elif prompt_type in [PromptType.openai.value, str(PromptType.openai.value), PromptType.openai.name]: can_handle_system_prompt = True if system_prompt in [None, 'None', 'auto']: system_prompt = "The following is a conversation with an AI assistant. The assistant is helpful, creative, clever, and very friendly." preprompt = """%s""" % system_prompt if not reduced else '' start = '' promptB = promptA = '%s%s' % (preprompt, start) PreInstruct = "\nHuman: " PreInput = None PreResponse = "\nAI:" terminate_response = [PreResponse] + [" Human:", " AI:"] chat_turn_sep = chat_sep = '\n' humanstr = PreInstruct botstr = PreResponse elif prompt_type in [PromptType.gptj.value, str(PromptType.gptj.value), PromptType.gptj.name]: preprompt = "### Instruction:\n The prompt below is a question to answer, a task to complete, or a conversation to respond to; decide which and write an appropriate response." if not reduced else '' start = '' promptB = promptA = '%s%s' % (preprompt, start) PreInstruct = "\n### Prompt: " PreInput = None PreResponse = "\n### Response: " terminate_response = [PreResponse] + ["Prompt:", "Response:"] chat_turn_sep = chat_sep = '\n' humanstr = PreInstruct botstr = PreResponse elif prompt_type in [PromptType.openai_chat.value, str(PromptType.openai_chat.value), PromptType.openai_chat.name] or \ prompt_type in [PromptType.anthropic.value, str(PromptType.anthropic.value), PromptType.anthropic.name] or \ prompt_type in [PromptType.google.value, str(PromptType.google.value), PromptType.google.name] or \ prompt_type in [PromptType.mistralai.value, str(PromptType.mistralai.value), PromptType.mistralai.name]: can_handle_system_prompt = True # handled via special messages/arguments not part of prompt # mistral safe_mode=True is same as this system prompt: # Always assist with care, respect, and truth. Respond with utmost utility yet securely. Avoid harmful, unethical, prejudiced, or negative content. Ensure replies promote fairness and positivity. # prompting and termination all handled by endpoint preprompt = """""" start = '' promptB = promptA = '%s%s' % (preprompt, start) PreInstruct = "" PreInput = None PreResponse = "" terminate_response = [] chat_turn_sep = chat_sep = '\n' humanstr = None botstr = None if prompt_type in [PromptType.google.value, str(PromptType.google.value), PromptType.google.name] and system_prompt == 'auto': # google throws safety/harassment errors if don't tell the model it's helpful, even for asking "what is 1+1?" # so give basic prompt if auto, the current default, so part of pre-conversation always system_prompt = 'I am a helpful assistant. I will accurately answer all your questions.' elif prompt_type in [PromptType.vicuna11.value, str(PromptType.vicuna11.value), PromptType.vicuna11.name] or \ prompt_type in [PromptType.vicuna11nosys.value, str(PromptType.vicuna11nosys.value), PromptType.vicuna11nosys.name]: can_handle_system_prompt = prompt_type in [PromptType.vicuna11.value, str(PromptType.vicuna11.value), PromptType.vicuna11.name] if system_prompt in [None, 'None', 'auto']: system_prompt = "A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions." if not can_handle_system_prompt: # totally remove system prompt stuff, maybe not always done for every model like this preprompt = "" else: preprompt = """%s """ % system_prompt if not reduced else '' start = '' promptB = promptA = '%s%s' % (preprompt, start) eos = '' PreInstruct = """USER: """ PreInput = None PreResponse = """ASSISTANT:""" terminate_response = [PreResponse] chat_sep = ' ' chat_turn_sep = eos humanstr = PreInstruct botstr = PreResponse if making_context: # when making context, want it to appear as-if LLM generated, which starts with space after : PreResponse = PreResponse + ' ' else: # normally LLM adds space after this, because was how trained. # if add space here, non-unique tokenization will often make LLM produce wrong output PreResponse = PreResponse elif prompt_type in [PromptType.mptinstruct.value, str(PromptType.mptinstruct.value), PromptType.mptinstruct.name]: can_handle_system_prompt = True # https://huggingface.co/mosaicml/mpt-30b-instruct#formatting if system_prompt in [None, 'None', 'auto']: system_prompt = "Below is an instruction that describes a task. Write a response that appropriately completes the request." promptA = promptB = '%s\n' % system_prompt if not reduced else '' PreInstruct = """ ### Instruction """ PreInput = """ ### Input """ PreResponse = """ ### Response """ terminate_response = None chat_turn_sep = chat_sep = '\n' humanstr = PreInstruct botstr = PreResponse elif prompt_type in [PromptType.mptchat.value, str(PromptType.mptchat.value), PromptType.mptchat.name]: can_handle_system_prompt = True # https://huggingface.co/TheBloke/mpt-30B-chat-GGML#prompt-template if system_prompt in [None, 'None', 'auto']: system_prompt = "A conversation between a user and an LLM-based AI assistant. The assistant gives helpful and honest answers." promptA = promptB = """<|im_start|>system\n%s\n<|im_end|>""" % system_prompt if not reduced else '' PreInstruct = """<|im_start|>user """ PreInput = None PreResponse = """<|im_end|><|im_start|>assistant """ terminate_response = ['<|im_end|>'] chat_sep = '' chat_turn_sep = '<|im_end|>' humanstr = PreInstruct botstr = PreResponse elif prompt_type in [PromptType.orca2.value, str(PromptType.orca2.value), PromptType.orca2.name]: can_handle_system_prompt = True # https://huggingface.co/microsoft/Orca-2-13b#getting-started-with-orca-2 if system_prompt in [None, 'None', 'auto']: system_prompt = "You are Orca, an AI language model created by Microsoft. You are a cautious assistant. You carefully follow instructions. You are helpful and harmless and you follow ethical guidelines and promote positive behavior." promptA = promptB = """<|im_start|>system\n%s\n<|im_end|>""" % system_prompt if not reduced else '' PreInstruct = """<|im_start|>user """ PreInput = None PreResponse = """<|im_end|><|im_start|>assistant """ terminate_response = ['<|im_end|>'] chat_sep = '' chat_turn_sep = '<|im_end|>' humanstr = PreInstruct botstr = PreResponse elif prompt_type in [PromptType.falcon.value, str(PromptType.falcon.value), PromptType.falcon.name]: promptA = promptB = "" if not reduced else '' PreInstruct = """User: """ PreInput = None PreResponse = """Assistant:""" terminate_response = ['\nUser', "<|endoftext|>"] chat_sep = '\n\n' chat_turn_sep = '\n\n' humanstr = PreInstruct botstr = PreResponse if making_context: # when making context, want it to appear as-if LLM generated, which starts with space after : PreResponse = 'Assistant: ' else: # normally LLM adds space after this, because was how trained. # if add space here, non-unique tokenization will often make LLM produce wrong output PreResponse = PreResponse # generates_leading_space = True elif prompt_type in [PromptType.guanaco.value, str(PromptType.guanaco.value), PromptType.guanaco.name]: # https://huggingface.co/TheBloke/guanaco-65B-GPTQ promptA = promptB = "" if not reduced else '' PreInstruct = """### Human: """ PreInput = None PreResponse = """### Assistant:""" terminate_response = [ '### Human:'] # but only allow terminate after prompt is found correctly, else can't terminate chat_turn_sep = chat_sep = '\n' humanstr = PreInstruct botstr = PreResponse elif prompt_type in [PromptType.llama2.value, str(PromptType.llama2.value), PromptType.llama2.name]: can_handle_system_prompt = True if system_prompt in [None, 'None', 'auto']: # automatic system_prompt = """You are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe. Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature.\n\nIf a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information.""" # too much safety, hurts accuracy if system_prompt: sys_msg = """<>\n%s\n<>\n\n""" % system_prompt else: sys_msg = '' if not reduced: promptA = promptB = '' else: promptA = promptB = '' PreInput = None PreInstruct = "[INST] " if making_context and histi == 0 or not making_context and not reduced: PreInstruct += sys_msg PreResponse = "[/INST]" terminate_response = ["[INST]", ""] chat_sep = ' ' chat_turn_sep = ' ' humanstr = '[INST]' botstr = '[/INST]' if making_context: PreResponse += " " elif prompt_type in [PromptType.beluga.value, str(PromptType.beluga.value), PromptType.beluga.name]: can_handle_system_prompt = True if system_prompt in [None, 'None', 'auto']: # automatic system_prompt = "You are Stable Beluga, an AI that follows instructions extremely well. Help as much as you can. Remember, be safe, and don't do anything illegal." if system_prompt: sys_msg = """### System:\n%s\n\n""" % system_prompt else: sys_msg = '' if sys_msg and not reduced: # too much safety, hurts accuracy promptA = promptB = sys_msg else: promptA = promptB = '' PreInput = None PreInstruct = "### User:\n" PreResponse = "\n### Assistant:\n" terminate_response = ['### Assistant:', ""] chat_sep = '\n' chat_turn_sep = '\n\n' humanstr = '### User:' botstr = '### Assistant:' elif prompt_type in [PromptType.wizard3nospace.value, str(PromptType.wizard3nospace.value), PromptType.wizard3nospace.name]: # https://huggingface.co/WizardLM/WizardLM-13B-V1.2/discussions/3 preprompt = """A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions.""" if not reduced else '' start = '' promptB = promptA = '%s%s' % (preprompt, start) PreInstruct = """USER: """ PreInput = None PreResponse = """ASSISTANT:""" terminate_response = [PreResponse] chat_turn_sep = chat_sep = '\n' humanstr = PreInstruct botstr = PreResponse elif prompt_type in [PromptType.one_shot.value, str(PromptType.one_shot.value), PromptType.one_shot.name]: promptA = promptB = """A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions. ### Human: Got any creative ideas for a 10 year old’s birthday? ### Assistant: Of course! Here are some creative ideas for a 10-year-old's birthday party: 1. Treasure Hunt: Organize a treasure hunt in your backyard or nearby park. Create clues and riddles for the kids to solve, leading them to hidden treasures and surprises. 2. Science Party: Plan a science-themed party where kids can engage in fun and interactive experiments. You can set up different stations with activities like making slime, erupting volcanoes, or creating simple chemical reactions. 3. Outdoor Movie Night: Set up a backyard movie night with a projector and a large screen or white sheet. Create a cozy seating area with blankets and pillows, and serve popcorn and snacks while the kids enjoy a favorite movie under the stars. 4. DIY Crafts Party: Arrange a craft party where kids can unleash their creativity. Provide a variety of craft supplies like beads, paints, and fabrics, and let them create their own unique masterpieces to take home as party favors. 5. Sports Olympics: Host a mini Olympics event with various sports and games. Set up different stations for activities like sack races, relay races, basketball shooting, and obstacle courses. Give out medals or certificates to the participants. 6. Cooking Party: Have a cooking-themed party where the kids can prepare their own mini pizzas, cupcakes, or cookies. Provide toppings, frosting, and decorating supplies, and let them get hands-on in the kitchen. 7. Superhero Training Camp: Create a superhero-themed party where the kids can engage in fun training activities. Set up an obstacle course, have them design their own superhero capes or masks, and organize superhero-themed games and challenges. 8. Outdoor Adventure: Plan an outdoor adventure party at a local park or nature reserve. Arrange activities like hiking, nature scavenger hunts, or a picnic with games. Encourage exploration and appreciation for the outdoors. Remember to tailor the activities to the birthday child's interests and preferences. Have a great celebration!""" if not reduced else '' PreInstruct = """ ### Human: """ PreInput = None PreResponse = """ ### Assistant:""" # but only allow terminate after prompt is found correctly, else can't terminate terminate_response = ['### Human:', '### Human: ', ' ### Human:', '### Assistant:'] chat_turn_sep = chat_sep = '\n' humanstr = PreInstruct botstr = PreResponse elif prompt_type in [PromptType.falcon_chat.value, str(PromptType.falcon_chat.value), PromptType.falcon_chat.name]: can_handle_system_prompt = True if system_prompt in [None, 'None', 'auto']: # automatic system_prompt = "You are an intelligent and helpful assistant." if system_prompt: sys_msg = "System: %s\n" % system_prompt else: sys_msg = '' if sys_msg and not reduced: # too much safety, hurts accuracy promptA = promptB = sys_msg else: promptA = promptB = '' PreInstruct = """User: """ PreInput = None PreResponse = """Falcon:""" terminate_response = ['\nUser:', "<|endoftext|>", " User:", "###"] chat_sep = '\n' chat_turn_sep = '\n' humanstr = PreInstruct botstr = PreResponse if making_context: # when making context, want it to appear as-if LLM generated, which starts with space after : PreResponse = botstr + ' ' elif prompt_type in [PromptType.mistral.value, str(PromptType.mistral.value), PromptType.mistral.name]: promptA = promptB = '' PreInput = None PreInstruct = "[INST] " if making_context and histi == 0 or not making_context and not reduced: PreInstruct = '' + PreInstruct PreResponse = "[/INST]" terminate_response = ["[INST]", ""] chat_sep = ' ' chat_turn_sep = ' ' humanstr = '[INST]' botstr = '[/INST]' if making_context: PreResponse += "" elif prompt_type in [PromptType.mixtral.value, str(PromptType.mixtral.value), PromptType.mixtral.name] or \ prompt_type in [PromptType.mixtralnosys.value, str(PromptType.mixtralnosys.value), PromptType.mixtralnosys.name]: if prompt_type in [PromptType.mixtral.value, str(PromptType.mixtral.value), PromptType.mixtral.name]: can_handle_system_prompt = True if system_prompt in [None, 'None', 'auto']: # automatic system_prompt = "You are an AI that follows instructions extremely well and as helpful as possible." if system_prompt: # sys_msg = """<|system|>\n%s""" % system_prompt sys_msg = """<>\n%s\n<>\n\n""" % system_prompt else: sys_msg = '' else: sys_msg = '' if sys_msg and not reduced: # too much safety, hurts accuracy promptA = promptB = sys_msg else: promptA = promptB = '' PreInput = None PreInstruct = "[INST] " if making_context and histi == 0 or not making_context and not reduced: PreInstruct = ' ' + PreInstruct PreResponse = "[/INST]" terminate_response = ["[INST]", ""] chat_sep = ' ' chat_turn_sep = ' ' humanstr = '[INST]' botstr = '[/INST]' if making_context: PreResponse += "" elif prompt_type in [PromptType.zephyr0.value, str(PromptType.zephyr0.value), PromptType.zephyr0.name]: can_handle_system_prompt = True # https://huggingface.co/HuggingFaceH4/zephyr-7b-alpha#intended-uses--limitations # prompt_template = "<|system|>\n\n<|user|>\n{query}\n<|assistant|>\n" if system_prompt in [None, 'None', 'auto']: # automatic system_prompt = "You are an AI that follows instructions extremely well and as helpful as possible." if system_prompt: sys_msg = """<|system|>\n%s""" % system_prompt else: sys_msg = '' if sys_msg and not reduced: # too much safety, hurts accuracy promptA = promptB = sys_msg else: promptA = promptB = '' PreInput = None PreInstruct = "\n<|user|>\n" PreResponse = "\n<|assistant|>\n" terminate_response = ['<|assistant|>', ""] chat_sep = '\n' chat_turn_sep = '\n' humanstr = '<|user|>' botstr = '<|assistant|>' elif prompt_type in [PromptType.zephyr.value, str(PromptType.zephyr.value), PromptType.zephyr.name]: can_handle_system_prompt = True # fixed version of zephyr0, and passes tests, but doesn't take system prompt as well # https://huggingface.co/HuggingFaceH4/zephyr-7b-alpha#intended-uses--limitations # prompt_template = "<|system|>\n\n<|user|>\n{query}\n<|assistant|>\n" if system_prompt in [None, 'None', 'auto']: # automatic system_prompt = "You are an AI that follows instructions extremely well and as helpful as possible." if system_prompt: sys_msg = """<|system|>\n%s\n""" % system_prompt else: sys_msg = '' if sys_msg and not reduced: # too much safety, hurts accuracy promptA = promptB = sys_msg else: promptA = promptB = '' PreInput = None PreInstruct = "<|user|>\n" PreResponse = "\n<|assistant|>\n" terminate_response = ['<|assistant|>', ""] chat_sep = '' chat_turn_sep = '\n' humanstr = '<|user|>' botstr = '<|assistant|>' elif prompt_type in [PromptType.xwin.value, str(PromptType.xwin.value), PromptType.xwin.name]: can_handle_system_prompt = True # https://huggingface.co/Xwin-LM/Xwin-LM-13B-V0.1#huggingface-example if system_prompt in [None, 'None', 'auto']: system_prompt = "A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions." # space below intended preprompt = """%s """ % system_prompt if not reduced else '' start = '' promptB = promptA = '%s%s' % (preprompt, start) PreInstruct = """USER: """ PreInput = None PreResponse = """ASSISTANT:""" terminate_response = [PreResponse, 'ASSISTANT:', ''] chat_turn_sep = '\n' # docs say multi-turn uses but doesn't work, so use huggingface/vllm example chat_sep = '\n' # docs say multi-turn uses ' ' but doesn't work, so use huggingface/vllm example humanstr = PreInstruct botstr = PreResponse if making_context: PreResponse = botstr + ' ' elif prompt_type in [PromptType.xwincoder.value, str(PromptType.xwincoder.value), PromptType.xwincoder.name]: can_handle_system_prompt = True # https://github.com/Xwin-LM/Xwin-LM/blob/main/Xwin-Coder/online_chat.py#L38-L48 if system_prompt in [None, 'None', 'auto']: system_prompt = "You are an AI coding assistant that helps people with programming. Write a response that appropriately completes the user's request.\n" # space below intended preprompt = """: %s\n""" % system_prompt if not reduced else '' start = '' promptB = promptA = '%s%s' % (preprompt, start) PreInstruct = """: """ PreInput = None PreResponse = """:""" terminate_response = [PreResponse, ':', ''] chat_turn_sep = '\n' # docs say multi-turn uses but doesn't work, so use huggingface/vllm example chat_sep = '\n' # docs say multi-turn uses ' ' but doesn't work, so use huggingface/vllm example humanstr = PreInstruct botstr = PreResponse if making_context: PreResponse = botstr + ' ' elif prompt_type in [PromptType.xwinmath.value, str(PromptType.xwinmath.value), PromptType.xwinmath.name]: can_handle_system_prompt = True # https://huggingface.co/Xwin-LM/Xwin-Math-70B-V1.0#generate if system_prompt in [None, 'None', 'auto']: system_prompt = "A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions." # space below intended preprompt = """%s """ % system_prompt if not reduced else '' start = '' promptB = promptA = '%s%s' % (preprompt, start) PreInstruct = """USER: """ PreInput = None PreResponse = """Give your solution in detail. In the end, write your final answer in the format of 'The answer is: .'. ASSISTANT:""" terminate_response = [PreResponse, 'ASSISTANT:', ''] chat_turn_sep = '\n' # docs say multi-turn uses but doesn't work, so use huggingface/vllm example chat_sep = '\n' # docs say multi-turn uses ' ' but doesn't work, so use huggingface/vllm example humanstr = PreInstruct botstr = PreResponse if making_context: PreResponse = botstr + ' ' elif prompt_type in [PromptType.mistralgerman.value, str(PromptType.mistralgerman.value), PromptType.mistralgerman.name]: can_handle_system_prompt = True # https://huggingface.co/TheBloke/em_german_leo_mistral-GPTQ#prompt-template-emgerman if system_prompt in [None, 'None', 'auto']: system_prompt = "Du bist ein hilfreicher" # space below intended preprompt = """%s """ % system_prompt if not reduced else '' start = '' promptB = promptA = '%s%s' % (preprompt, start) PreInstruct = """USER: """ PreInput = None PreResponse = """ASSISTANT:""" terminate_response = [PreResponse, 'ASSISTANT:', ''] chat_turn_sep = '\n' chat_sep = '\n' humanstr = PreInstruct botstr = PreResponse if making_context: PreResponse = botstr + ' ' elif prompt_type in [PromptType.mistrallite.value, str(PromptType.mistrallite.value), PromptType.mistrallite.name]: # From added_tokens.json preprompt = '' prompt_tokens = "<|prompter|>" answer_tokens = "<|assistant|>" start = '' promptB = promptA = '%s%s' % (preprompt, start) PreInstruct = prompt_tokens PreInput = None PreResponse = answer_tokens pend = "<|prefix_end|>" eos = "" humanstr = prompt_tokens botstr = answer_tokens terminate_response = [humanstr, PreResponse, pend, eos] chat_turn_sep = chat_sep = eos elif prompt_type in [PromptType.aquila.value, str(PromptType.aquila.value), PromptType.aquila.name]: can_handle_system_prompt = True # https://huggingface.co/BAAI/AquilaChat2-34B-16K/blob/main/predict.py#L197-L210 if system_prompt in [None, 'None', 'auto']: system_prompt = "A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions." promptA = promptB = "%s###" % system_prompt if not reduced else '' PreInstruct = """Human: """ PreInput = None PreResponse = """Assistant:""" terminate_response = ['###Human:', "###", "", "[UNK]"] chat_turn_sep = '' # turn-by-turn works with '' too chat_sep = '###' humanstr = PreInstruct botstr = PreResponse if making_context: PreResponse = botstr + ' ' elif prompt_type in [PromptType.aquila_simple.value, str(PromptType.aquila_simple.value), PromptType.aquila_simple.name]: can_handle_system_prompt = True # like aquila but less strictly correct (but less complex) for multi-turn if system_prompt in [None, 'None', 'auto']: system_prompt = "A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions." promptA = promptB = "%s" % system_prompt if not reduced else '' PreInstruct = """###Human: """ PreInput = None PreResponse = """###Assistant:""" terminate_response = ['###Human:', "###", "", "[UNK]"] chat_turn_sep = '' chat_sep = '' humanstr = PreInstruct botstr = PreResponse if making_context: PreResponse = botstr + '' elif prompt_type in [PromptType.aquila_legacy.value, str(PromptType.aquila_legacy.value), PromptType.aquila_legacy.name]: can_handle_system_prompt = True if system_prompt in [None, 'None', 'auto']: system_prompt = "A chat between a curious human and an artificial intelligence assistant. " \ "The assistant gives helpful, detailed, and polite answers to the human's questions.\n\n" promptA = promptB = "%s" % system_prompt if not reduced else '' PreInstruct = """### Human: """ PreInput = None PreResponse = """### Assistant:""" terminate_response = ['### Human:', "", "[UNK]"] chat_turn_sep = '' chat_sep = '\n' humanstr = PreInstruct botstr = PreResponse if True: PreResponse = botstr + ' ' elif prompt_type in [PromptType.aquila_v1.value, str(PromptType.aquila_v1.value), PromptType.aquila_v1.name]: promptA = promptB = "" if not reduced else '' PreInstruct = """<|startofpiece|>""" PreInput = None PreResponse = """<|endofpiece|>""" terminate_response = ["", "<|endoftext|>"] chat_turn_sep = '' chat_sep = '' humanstr = PreInstruct botstr = PreResponse if making_context: PreResponse = botstr + '' elif prompt_type in [PromptType.deepseek_coder.value, str(PromptType.deepseek_coder.value), PromptType.deepseek_coder.name]: can_handle_system_prompt = True # https://huggingface.co/deepseek-ai/deepseek-coder-33b-instruct if system_prompt in [None, 'None', 'auto']: system_prompt = "<|begin▁of▁sentence|>You are an AI programming assistant, utilizing the Deepseek Coder model, developed by Deepseek Company, and you only answer questions related to computer science. For politically sensitive questions, security and privacy issues, and other non-computer science questions, you will refuse to answer\n" promptA = promptB = "%s" % system_prompt if not reduced else '' PreInput = None PreInstruct = "### Instruction:\n" PreResponse = "### Response:\n" eos = '<|end▁of▁sentence|>' terminate_response = [PreResponse, eos, '<|EOT|>'] chat_sep = '\n' chat_turn_sep = '\n<|EOT|>\n' humanstr = PreInstruct botstr = PreResponse if making_context: PreResponse += "" elif prompt_type in [PromptType.open_chat.value, str(PromptType.open_chat.value), PromptType.open_chat.name] or \ prompt_type in [PromptType.open_chat_correct.value, str(PromptType.open_chat_correct.value), PromptType.open_chat_correct.name] or \ prompt_type in [PromptType.open_chat_code.value, str(PromptType.open_chat_code.value), PromptType.open_chat_code.name] or \ prompt_type in [PromptType.open_chat_math.value, str(PromptType.open_chat_math.value), PromptType.open_chat_math.name]: # https://huggingface.co/TheBloke/openchat_3.5-GPTQ#prompt-template-openchat # https://github.com/imoneoi/openchat/tree/master#-inference-with-transformers # GPT4 Correct User: Hello<|end_of_turn|>GPT4 Correct Assistant: Hi<|end_of_turn|>GPT4 Correct User: How are you today?<|end_of_turn|>GPT4 Correct Assistant: # GPT4 User: {prompt}<|end_of_turn|>GPT4 Assistant: # GPT4 User: {prompt}<|end_of_turn|>GPT4 Assistant: # Code User: Implement quicksort using C++<|end_of_turn|>Code Assistant: promptA = promptB = "" # no apparent system prompt PreInput = None if prompt_type in [PromptType.open_chat.value, str(PromptType.open_chat.value), PromptType.open_chat.name]: PreInstruct = "GPT4 User: " PreResponse = "GPT4 Assistant:" elif prompt_type in [PromptType.open_chat_correct.value, str(PromptType.open_chat_correct.value), PromptType.open_chat_correct.name]: PreInstruct = "GPT4 Correct User: " PreResponse = "GPT4 Correct Assistant:" elif prompt_type in [PromptType.open_chat_math.value, str(PromptType.open_chat_math.value), PromptType.open_chat_math.name]: PreInstruct = "Math Correct User: " PreResponse = "Math Correct Assistant:" else: PreInstruct = "Code User: " PreResponse = "Code Assistant:" eos = '<|end_of_turn|>' terminate_response = [PreResponse, eos] chat_sep = eos chat_turn_sep = eos humanstr = PreInstruct botstr = PreResponse if making_context: PreResponse += " " elif prompt_type in [PromptType.jais.value, str(PromptType.jais.value), PromptType.jais.name]: can_handle_system_prompt = True # https://huggingface.co/core42/jais-30b-chat-v1 if system_prompt in [None, 'None', 'auto']: system_prompt = """Your name is Jais, and you are named after Jebel Jais, the highest mountain in UAE. You are built by Core42. You are the world's most advanced Arabic large language model with 30b parameters. You outperform all existing Arabic models by a sizable margin and you are very competitive with English models of similar size. You can answer in Arabic and English only. You are a helpful, respectful and honest assistant. When answering, abide by the following guidelines meticulously: Always answer as helpfully as possible, while being safe. Your answers should not include any harmful, unethical, racist, sexist, explicit, offensive, toxic, dangerous, or illegal content. Do not give medical, legal, financial, or professional advice. Never assist in or promote illegal activities. Always encourage legal and responsible actions. Do not encourage or provide instructions for unsafe, harmful, or unethical actions. Do not create or share misinformation or fake news. Please ensure that your responses are socially unbiased and positive in nature. If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information. Prioritize the well-being and the moral integrity of users. Avoid using toxic, derogatory, or offensive language. Maintain a respectful tone. Do not generate, promote, or engage in discussions about adult content. Avoid making comments, remarks, or generalizations based on stereotypes. Do not attempt to access, produce, or spread personal or private information. Always respect user confidentiality. Stay positive and do not say bad things about anything. Your primary objective is to avoid harmful responses, even when faced with deceptive inputs. Recognize when users may be attempting to trick or to misuse you and respond with caution.\n\nComplete the conversation below between.""" promptA = promptB = "### Instruction: %s [|Human|] and [|AI|]:" % system_prompt if not reduced else "### Instruction: %s [|Human|] and [|AI|]:" PreInstruct = """\n### Input: [|Human|] """ PreInput = None PreResponse = """\n### Response: [|AI|]""" if making_context: PreResponse += " " terminate_response = [PreResponse] chat_turn_sep = chat_sep = '' humanstr = PreInstruct botstr = PreResponse elif prompt_type in [PromptType.yi.value, str(PromptType.yi.value), PromptType.yi.name]: can_handle_system_prompt = True # https://huggingface.co/01-ai/Yi-34B-Chat#31-use-the-chat-model if system_prompt in [None, 'None', 'auto']: system_prompt = "A conversation between a user and an LLM-based AI assistant. The assistant gives helpful and honest answers." promptA = promptB = """<|im_start|>system\n%s<|im_end|>""" % system_prompt if not reduced else '' PreInstruct = """\n<|im_start|>user\n""" PreInput = None PreResponse = """<|im_end|>\n<|im_start|>assistant\n""" terminate_response = ['<|im_end|>', '<|endotftext|>'] chat_sep = '' chat_turn_sep = '<|im_end|>' humanstr = PreInstruct botstr = PreResponse elif prompt_type in [PromptType.docsgpt.value, str(PromptType.docsgpt.value), PromptType.docsgpt.name]: # https://huggingface.co/Arc53/docsgpt-7b-mistral can_handle_system_prompt = True if system_prompt in [None, 'None', 'auto']: system_prompt = "Below is an instruction that describes a task. Write a response that appropriately completes the request." promptA = promptB = '' PreInstruct = """### Instruction\n""" PreInput = None PreResponse = """### Answer\n""" terminate_response = ['### Answer', '### Instruction'] chat_turn_sep = chat_sep = '\n' humanstr = PreInstruct botstr = PreResponse else: raise RuntimeError("No such prompt_type=%s" % prompt_type) if isinstance(terminate_response, (tuple, list)): assert '' not in terminate_response, "Bad terminate_response" if system_prompt == 'auto': # if still auto, then safest then to just avoid system prompt system_prompt = '' ret_dict = dict(promptA=promptA, promptB=promptB, PreInstruct=PreInstruct, PreInput=PreInput, PreResponse=PreResponse, terminate_response=terminate_response, chat_sep=chat_sep, chat_turn_sep=chat_turn_sep, humanstr=humanstr, botstr=botstr, generates_leading_space=generates_leading_space, system_prompt=system_prompt, can_handle_system_prompt=can_handle_system_prompt, ) if return_dict: return ret_dict, prompt_dict_error else: return tuple(list(ret_dict.values())) def generate_prompt(data_point, prompt_type, prompt_dict, reduced, making_context, system_prompt=None, histi=-1): context = data_point.get('context') if context is None: context = '' instruction = data_point.get('instruction') input = data_point.get('input') output = data_point.get('output') prompt_type = data_point.get('prompt_type', prompt_type) prompt_dict = data_point.get('prompt_dict', prompt_dict) assert prompt_type in prompt_types, "Bad prompt type: %s" % prompt_type promptA, promptB, PreInstruct, PreInput, PreResponse, \ terminate_response, chat_sep, chat_turn_sep, humanstr, botstr, \ generates_leading_space, system_prompt, can_handle_system_prompt = \ get_prompt(prompt_type, prompt_dict, context, reduced, making_context, system_prompt=system_prompt, histi=histi) # could avoid if reduce=True, but too complex for parent functions to handle prompt = context if input and promptA: prompt += f"""{promptA}""" elif promptB: prompt += f"""{promptB}""" if instruction and PreInstruct is not None and input and PreInput is not None: prompt += f"""{PreInstruct}{instruction}{PreInput}{input}""" prompt = inject_chatsep(prompt_type, prompt, chat_sep=chat_sep) elif instruction and input and PreInstruct is None and PreInput is not None: prompt += f"""{PreInput}{instruction} {input}""" prompt = inject_chatsep(prompt_type, prompt, chat_sep=chat_sep) elif input and instruction and PreInput is None and PreInstruct is not None: prompt += f"""{PreInstruct}{instruction} {input}""" prompt = inject_chatsep(prompt_type, prompt, chat_sep=chat_sep) elif instruction and PreInstruct is not None: prompt += f"""{PreInstruct}{instruction}""" prompt = inject_chatsep(prompt_type, prompt, chat_sep=chat_sep) elif input and PreInput is not None: prompt += f"""{PreInput}{input}""" prompt = inject_chatsep(prompt_type, prompt, chat_sep=chat_sep) elif input and instruction and PreInput is not None: prompt += f"""{PreInput}{instruction}{input}""" prompt = inject_chatsep(prompt_type, prompt, chat_sep=chat_sep) elif input and instruction and PreInstruct is not None: prompt += f"""{PreInstruct}{instruction}{input}""" prompt = inject_chatsep(prompt_type, prompt, chat_sep=chat_sep) elif input and instruction: # i.e. for simple_instruct prompt += f"""{instruction}: {input}""" prompt = inject_chatsep(prompt_type, prompt, chat_sep=chat_sep) elif input: prompt += f"""{input}""" prompt = inject_chatsep(prompt_type, prompt, chat_sep=chat_sep) elif instruction: prompt += f"""{instruction}""" prompt = inject_chatsep(prompt_type, prompt, chat_sep=chat_sep) if PreResponse is not None: prompt += f"""{PreResponse}""" pre_response = PreResponse # Don't use strip else: pre_response = '' if output: prompt += f"""{output}""" return prompt, pre_response, terminate_response, chat_sep, chat_turn_sep def inject_chatsep(prompt_type, prompt, chat_sep=None): if chat_sep: # only add new line if structured prompt, while 'plain' is just generation of next tokens from input prompt += chat_sep return prompt class Prompter(object): def __init__(self, prompt_type, prompt_dict, debug=False, stream_output=False, repeat_penalty=False, allowed_repeat_line_length=10, system_prompt=None): self.prompt_type = prompt_type self.prompt_dict = prompt_dict self.debug = debug self.stream_output = stream_output self.repeat_penalty = repeat_penalty self.allowed_repeat_line_length = allowed_repeat_line_length self.prompt = None self.system_prompt = system_prompt context = "" # not for chat context reduced = False # not for chat context making_context = False # not for chat context self.promptA, self.promptB, self.PreInstruct, self.PreInput, self.PreResponse, \ self.terminate_response, self.chat_sep, self.chat_turn_sep, self.humanstr, self.botstr, \ self.generates_leading_space, self.system_prompt, self.can_handle_system_prompt = \ get_prompt(self.prompt_type, self.prompt_dict, context, reduced, making_context, system_prompt=system_prompt) self.pre_response = self.PreResponse @property def stop_sequences(self): terminate_response = self.terminate_response or [] stop_sequences = list(set(terminate_response + [self.PreResponse])) stop_sequences = [x for x in stop_sequences if x] return stop_sequences def generate_prompt(self, data_point, reduced=False, context_from_history=None): """ data_point['context'] is assumed to be like a system prompt or pre-conversation, not inserted after user prompt :param data_point: :param reduced: :param context_from_history: whether context is from reduced=True version of history in prompt form In which case we need to put promptA at very front to recover correct behavior :return: """ if context_from_history is None and data_point.get('context'): context_from_history = True reduced = True making_context = False # whether really making final prompt or just generating context prompt, _, _, _, _ = generate_prompt(data_point, self.prompt_type, self.prompt_dict, reduced, making_context, histi=-1, system_prompt=self.system_prompt) if self.debug: print("prompt: %s" % prompt, flush=True) # if have context, should have always reduced and only preappend promptA/B here if data_point.get('context') and context_from_history: if data_point.get('input') and self.promptA: prompt = self.promptA + prompt elif self.promptB: prompt = self.promptB + prompt self.prompt = prompt return prompt def get_response(self, outputs, prompt=None, sanitize_bot_response=False, only_new_text=False, plain_prompt_special=False): if isinstance(outputs, str): outputs = [outputs] if self.debug: print("output:\n%s" % '\n\n'.join(outputs), flush=True) if prompt is not None: self.prompt = prompt def clean_response(response): meaningless_words = ['', '', '<|endoftext|>'] for word in meaningless_words: response = response.replace(word, "") if sanitize_bot_response: # from better_profanity import profanity # response = profanity.censor(response) pass if self.generates_leading_space and isinstance(response, str) and len(response) > 0 and response[0] == ' ': response = response[1:] return response def clean_repeats(response): lines = response.split('\n') new_lines = [] [new_lines.append(line) for line in lines if line not in new_lines or len(line) < self.allowed_repeat_line_length] if self.debug and len(lines) != len(new_lines): print("cleaned repeats: %s %s" % (len(lines), len(new_lines)), flush=True) response = '\n'.join(new_lines) return response multi_output = len(outputs) > 1 for oi, output in enumerate(outputs): if plain_prompt_special and \ self.prompt_type in [PromptType.plain.value, str(PromptType.plain.value), PromptType.plain.name]: output = clean_response(output) allow_terminate = True elif only_new_text: # only use terminate, that will have other variations of cleaning that include \n etc. not just simple human bot that will leave residual \n allow_terminate = True elif prompt is None: allow_terminate = True # then use most basic parsing like pipeline if not self.botstr: pass else: if self.humanstr: output = clean_response(output.split(self.botstr)[-1].split(self.humanstr)[0]) else: # i.e. use after bot but only up to next bot output = clean_response(output.split(self.botstr)[-1].split(self.botstr)[0]) else: # find first instance of prereponse # prompt sometimes has odd characters, that mutate length, # so can't go by length alone if self.pre_response: outputi = output.find(prompt) if outputi >= 0: output = output[outputi + len(prompt):] allow_terminate = True else: # subtraction is risky due to space offsets sometimes, so only do if necessary output = output[len(prompt) - len(self.pre_response):] # [1] to avoid repeated pre_response, just take first (after prompt - pre_response for chat) if self.pre_response in output: output = output.split(self.pre_response)[1] allow_terminate = True else: if output: print("Failure of parsing or not enough output yet: %s" % output, flush=True) allow_terminate = False else: allow_terminate = True output = output[len(prompt):] # clean after subtract prompt out, so correct removal of pre_response output = clean_response(output) if self.repeat_penalty: output = clean_repeats(output) if self.terminate_response and allow_terminate: finds = [] for term in self.terminate_response: finds.append(output.find(term)) finds = [x for x in finds if x >= 0] if len(finds) > 0: termi = finds[0] output = output[:termi] else: output = output if multi_output: # prefix with output counter output = "\n=========== Output %d\n\n" % (1 + oi) + output if oi > 0: # post fix outputs with seperator output += '\n' output = self.fix_text(self.prompt_type, output) outputs[oi] = output # join all outputs, only one extra new line between outputs output = '\n'.join(outputs) if self.debug: print("outputclean:\n%s" % '\n\n'.join(outputs), flush=True) return output @staticmethod def fix_text(prompt_type1, text1): if prompt_type1 == 'human_bot': # hack bug in vLLM with stopping, stops right, but doesn't return last token hfix = '