|
|
|
import os |
|
|
|
|
|
|
|
MODEL_TITLE = "<h1>Multi-Purpose Chatbot</h1>" |
|
|
|
MODEL_DESC = f""" |
|
<div style='display:flex; gap: 0.25rem; '> |
|
<a href='https://github.com/DAMO-NLP-SG/Multipurpose-Chatbot'><img src='https://img.shields.io/badge/Github-Code-success'></a> |
|
</div> |
|
<span style="font-size: larger"> |
|
A multi-purpose helpful assistant with multiple functionalities (Chat, text-completion, RAG chat, batch inference). |
|
</span> |
|
<br> |
|
<span> |
|
This DEMO is on `export BACKEND=debug` to demonstrate the UI - there is NO LLM running and it always return "Wow that's very very cool, please try again." |
|
</span> |
|
|
|
""".strip() |
|
|
|
|
|
|
|
MODEL_INFO = """ |
|
<h4>Model Name: {model_path} (debug mode)</h4> |
|
""" |
|
|
|
CITE_MARKDOWN = """ |
|
## Citation |
|
If you find our project useful, hope you can star our repo and cite our repo as follows: |
|
``` |
|
@article{multipurpose_chatbot_2024, |
|
author = {Xuan-Phi Nguyen, }, |
|
title = {Multipurpose Chatbot}, |
|
year = 2024, |
|
} |
|
``` |
|
""" |
|
|
|
USE_PANEL = bool(int(os.environ.get("USE_PANEL", "1"))) |
|
CHATBOT_HEIGHT = int(os.environ.get("CHATBOT_HEIGHT", "500")) |
|
|
|
ALLOWED_PATHS = [] |
|
|
|
|
|
DEMOS = os.environ.get("DEMOS", "") |
|
|
|
DEMOS = DEMOS.split(",") if DEMOS.strip() != "" else [ |
|
"DocChatInterfaceDemo", |
|
"ChatInterfaceDemo", |
|
"TextCompletionDemo", |
|
|
|
|
|
|
|
] |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
PORT = int(os.environ.get("PORT", "7860")) |
|
PROXY = os.environ.get("PROXY", "").strip() |
|
|
|
|
|
|
|
BACKEND = os.environ.get("BACKEND", "debug") |
|
|
|
|
|
|
|
RAG_EMBED_MODEL_NAME = os.environ.get("RAG_EMBED_MODEL_NAME", "sentence-transformers/all-MiniLM-L6-v2") |
|
CHUNK_SIZE = int(os.environ.get("CHUNK_SIZE", "1024")) |
|
CHUNK_OVERLAP = int(os.environ.get("CHUNK_SIZE", "50")) |
|
|
|
|
|
SYSTEM_PROMPT = os.environ.get("SYSTEM_PROMPT", """You are a helpful, respectful, honest and safe AI assistant.""") |
|
|
|
MAX_TOKENS = int(os.environ.get("MAX_TOKENS", "2048")) |
|
TEMPERATURE = float(os.environ.get("TEMPERATURE", "0.7")) |
|
|
|
FREQUENCE_PENALTY = float(os.environ.get("FREQUENCE_PENALTY", "0.0")) |
|
PRESENCE_PENALTY = float(os.environ.get("PRESENCE_PENALTY", "0.0")) |
|
|
|
|
|
|
|
MODEL_PATH = os.environ.get("MODEL_PATH", "teknium/OpenHermes-2.5-Mistral-7B") |
|
MODEL_NAME = os.environ.get("MODEL_NAME", "Cool-Chatbot") |
|
DTYPE = os.environ.get("DTYPE", "bfloat16") |
|
DEVICE = os.environ.get("DEVICE", "cuda") |
|
|
|
|
|
GPU_MEMORY_UTILIZATION = float(os.environ.get("GPU_MEMORY_UTILIZATION", "0.9")) |
|
TENSOR_PARALLEL = int(os.environ.get("TENSOR_PARALLEL", "1")) |
|
QUANTIZATION = str(os.environ.get("QUANTIZATION", "")) |
|
STREAM_YIELD_MULTIPLE = int(os.environ.get("STREAM_YIELD_MULTIPLE", "1")) |
|
|
|
STREAM_CHECK_MULTIPLE = int(os.environ.get("STREAM_CHECK_MULTIPLE", "0")) |
|
|
|
|
|
DEFAULT_CHAT_TEMPLATE = os.environ.get("DEFAULT_CHAT_TEMPLATE", "chatml") |
|
N_CTX = int(os.environ.get("N_CTX", "4096")) |
|
N_GPU_LAYERS = int(os.environ.get("N_GPU_LAYERS", "-1")) |
|
|
|
|
|
|
|
|
|
|
|
|
|
IMAGE_TOKEN = os.environ.get("IMAGE_TOKEN", "<image>") |
|
IMAGE_TOKEN_INTERACTIVE = bool(int(os.environ.get("IMAGE_TOKEN_INTERACTIVE", "0"))) |
|
|
|
IMAGE_TOKEN_LENGTH = int(os.environ.get("IMAGE_TOKEN_LENGTH", "576")) |
|
|
|
MAX_PACHES = int(os.environ.get("MAX_PACHES", "1")) |
|
|