import os # ! UI Markdown information MODEL_TITLE = """
SeaLMMM - Large Multilingual Multimodal Models for Southeast Asia
""" # # # MODEL_DESC = f"""
SeaLMMM-7B-early - multilingual multimodal assistant for Southeast Asia. It handles both text-only (LLMs and vision instructions (LVMs). SeaLMMM-7B has not finished training.
The chatbot may produce false and harmful content! By using our service, you are required to agree to our Terms Of Use """.strip() """ By using our service, you are required to agree to our Terms Of Use, which includes not to use our service to generate any harmful, inappropriate or illegal content. The service collects user dialogue data for testing and improvement under (CC-BY) or similar license. So do not enter any personal information! """ MODEL_TITLE = """
SeaLMMM - Large Multilingual Multimodal Models for Southeast Asia
""" MODEL_DESC = f""" SeaLMMM-7B - multilingual multimodal assistant for Southeast Asia. It handles text-only (LLMs and vision instructions (LVMs).
This UI is powered by Multipurpose-Chatbot project. The chatbot may produce false and harmful content! By using our service, you agree to our Terms Of Use """.strip() # MODEL_INFO = """ #

Model Name: {model_path}

# """ MODEL_INFO = "" CITE_MARKDOWN = """ ## Citation If you find our project useful, hope you can star our repo and cite our paper as follows: ``` @article{damonlpsg2023seallm, author = {Xuan-Phi Nguyen*, Wenxuan Zhang*, Xin Li*, Mahani Aljunied*, Zhiqiang Hu, Chenhui Shen^, Yew Ken Chia^, Xingxuan Li, Jianyu Wang, Qingyu Tan, Liying Cheng, Guanzheng Chen, Yue Deng, Sen Yang, Chaoqun Liu, Hang Zhang, Lidong Bing}, title = {SeaLLMs - Large Language Models for Southeast Asia}, year = 2023, } ``` """ CSS = """ .message-wrap.svelte-1lcyrx4>div.svelte-1lcyrx4 img { min-width: 200px; min-height: 150px; max-height: 600px; max-width; 90%; width: auto; object-fit: contain; } .panel-full-width.svelte-1lcyrx4.svelte-1lcyrx4.svelte-1lcyrx4 { padding: calc(var(--spacing-xxl) * 1); width: 100% } .panel-full-width { padding: calc(var(--spacing-xxl) * 1); width: 100% } img.tag { max-height: 1.5em; width: auto; } span.prose { font-size: var(--text-lg); } """ # .message-wrap.svelte-1lcyrx4>div.svelte-1lcyrx4 img { # min-width: 200px; # } # .panel-full-width.svelte-1lcyrx4.svelte-1lcyrx4.svelte-1lcyrx4 { # padding: calc(var(--spacing-xxl) * 1); # width: 100% # } USE_PANEL = bool(int(os.environ.get("USE_PANEL", "1"))) CHATBOT_HEIGHT = int(os.environ.get("CHATBOT_HEIGHT", "500")) ALLOWED_PATHS = ["seammm_2.png"] DEMOS = os.environ.get("DEMOS", "") DEMOS = DEMOS.split(",") if DEMOS.strip() != "" else [ "DocChatInterfaceDemo", "ChatInterfaceDemo", "TextCompletionDemo", # "RagChatInterfaceDemo", # "VisionChatInterfaceDemo", # "VisionDocChatInterfaceDemo", ] # DEMOS=DocChatInterfaceDemo,ChatInterfaceDemo,RagChatInterfaceDemo,TextCompletionDemo # ! server info DELETE_FOLDER = os.environ.get("DELETE_FOLDER", "") PORT = int(os.environ.get("PORT", "7860")) PROXY = os.environ.get("PROXY", "").strip() # ! backend info BACKEND = os.environ.get("BACKEND", "debug") # ! model information # for RAG RAG_EMBED_MODEL_NAME = os.environ.get("RAG_EMBED_MODEL_NAME", "sentence-transformers/all-MiniLM-L6-v2") CHUNK_SIZE = int(os.environ.get("CHUNK_SIZE", "1024")) CHUNK_OVERLAP = int(os.environ.get("CHUNK_SIZE", "50")) SYSTEM_PROMPT = os.environ.get("SYSTEM_PROMPT", """You are a helpful, respectful, honest and safe AI assistant.""") MAX_TOKENS = int(os.environ.get("MAX_TOKENS", "2048")) TEMPERATURE = float(os.environ.get("TEMPERATURE", "0.1")) # ! these values currently not used FREQUENCE_PENALTY = float(os.environ.get("FREQUENCE_PENALTY", "0.0")) PRESENCE_PENALTY = float(os.environ.get("PRESENCE_PENALTY", "0.0")) # Transformers or vllm MODEL_PATH = os.environ.get("MODEL_PATH", "mistralai/Mistral-7B-Instruct-v0.2") MODEL_NAME = os.environ.get("MODEL_NAME", "Cool-Chatbot") DTYPE = os.environ.get("DTYPE", "bfloat16") DEVICE = os.environ.get("DEVICE", "cuda") # VLLM GPU_MEMORY_UTILIZATION = float(os.environ.get("GPU_MEMORY_UTILIZATION", "0.9")) TENSOR_PARALLEL = int(os.environ.get("TENSOR_PARALLEL", "1")) QUANTIZATION = str(os.environ.get("QUANTIZATION", "")) STREAM_YIELD_MULTIPLE = int(os.environ.get("STREAM_YIELD_MULTIPLE", "1")) # how many iterations to perform safety check on response STREAM_CHECK_MULTIPLE = int(os.environ.get("STREAM_CHECK_MULTIPLE", "0")) # llama.cpp DEFAULT_CHAT_TEMPLATE = os.environ.get("DEFAULT_CHAT_TEMPLATE", "chatml") N_CTX = int(os.environ.get("N_CTX", "4096")) N_GPU_LAYERS = int(os.environ.get("N_GPU_LAYERS", "-1")) # llava.llama.cpp # Multimodal # IMAGE_TOKEN = os.environ.get("IMAGE_TOKEN", "[IMAGE]<|image|>[/IMAGE]") IMAGE_TOKEN = os.environ.get("IMAGE_TOKEN", "<|image|>") IMAGE_TOKEN_INTERACTIVE = bool(int(os.environ.get("IMAGE_TOKEN_INTERACTIVE", "0"))) IMAGE_TOKEN_LENGTH = int(os.environ.get("IMAGE_TOKEN_LENGTH", "576")) MAX_PACHES = int(os.environ.get("MAX_PACHES", "1"))