Spaces:

cyberagent
/

calm3-22b-chat-demo

Running

App Files Files

tuxedocat commited on Jul 2, 2024

Commit

a7566b2

0 Parent(s):

init

Browse files

Files changed (8) hide show

.dockerignore +6 -0
.gitattributes +35 -0
.gitignore +1 -0
Dockerfile +28 -0
README.md +17 -0
app.py +175 -0
const.py +79 -0
requirements.txt +3 -0

.dockerignore ADDED Viewed

	@@ -0,0 +1,6 @@

+.env
+.env.*
+*.json
+.venv/
+__pycache__/
+*.pyc

.gitattributes ADDED Viewed

	@@ -0,0 +1,35 @@

+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tar filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text

.gitignore ADDED Viewed

	@@ -0,0 +1 @@


1	+ .venv/

Dockerfile ADDED Viewed

	@@ -0,0 +1,28 @@

+FROM google/cloud-sdk:slim AS retriever
+# Get service account key from HF Spaces' secrets
+# https://huggingface.co/docs/hub/spaces-sdks-docker#buildtime
+RUN --mount=type=secret,id=BUILD_CREDENTIALS,mode=0444,required=true \
+  --mount=type=secret,id=BUILD_ASSET_BUCKET,mode=0444,required=true \
+  --mount=type=secret,id=BUILD_ASSET_NAME,mode=0444,required=true \
+  cat /run/secrets/BUILD_CREDENTIALS > /tmp/creds.json && \
+  /bin/gcloud auth activate-service-account --key-file=/tmp/creds.json > /dev/null 2>&1 && \
+  GOOGLE_APPLICATION_CREDENTIALS=/tmp/creds.json /bin/gcloud storage cp gs://$(cat /run/secrets/BUILD_ASSET_BUCKET)/$(cat /run/secrets/BUILD_ASSET_NAME) /tmp/ > /dev/null 2>&1 && \
+  rm /tmp/creds.json
+FROM python:3.11-slim AS gradio
+RUN useradd -m -u 1000 app
+USER app
+ENV HOME=/home/app \
+	PATH=/home/app/.local/bin:$PATH
+WORKDIR ${HOME}
+COPY --from=retriever /tmp/*_linux_amd64.tar.gz ${HOME}/
+RUN tar -xf *_linux_amd64.tar.gz && rm *_linux_amd64.tar.gz
+COPY . .
+RUN pip install --no-cache-dir -r requirements.txt
+RUN --mount=type=secret,id=LLM_CREDENTIALS,mode=0444,required=true \
+   cat /run/secrets/LLM_CREDENTIALS > ${HOME}/credentials.json
+EXPOSE 7860
+ENV GRADIO_SERVER_NAME="0.0.0.0"
+CMD ["python", "app.py"]

README.md ADDED Viewed

	@@ -0,0 +1,17 @@

+---
+title: CALM3-22B-Chat-Demo
+emoji: 😌
+colorFrom: green
+colorTo: yellow
+sdk: docker
+app_port: 7860
+pinned: false
+---
+# Model
+See [CyberAgentLM3-22B-Chat](https://huggingface.co/cyberagent/calm3-22b-chat) for details and licensing information to use the model.
+# Terms of Use
+See the App tab.

app.py ADDED Viewed

	@@ -0,0 +1,175 @@

+from functools import partial
+import gradio as gr
+import httpx
+import subprocess
+import os
+from openai import OpenAI
+from const import (
+    LLM_BASE_URL,
+    AUTH_CMD,
+    SYSTEM_PROMPTS,
+    EXAMPLES,
+    CSS,
+    HEADER,
+    FOOTER,
+    PLACEHOLDER,
+    ModelInfo,
+    MODELS,
+)
+def get_token() -> str:
+    try:
+        t = (
+            subprocess.run(
+                AUTH_CMD,
+                stdout=subprocess.PIPE,
+                stderr=subprocess.DEVNULL,
+                env=os.environ.copy(),
+            )
+            .stdout.decode("utf-8")
+            .strip()
+        )
+        assert t, "Failed to get auth token"
+        return t
+    except Exception:
+        raise ValueError("Failed to get auth token")
+def get_headers(host: str) -> dict:
+    return {
+        "Authorization": f"Bearer {get_token()}",
+        "Host": host,
+        "Accept": "application/json",
+        "Content-Type": "application/json",
+    }
+def proxy(request: httpx.Request, model_info: ModelInfo) -> httpx.Request:
+    request.url = request.url.copy_with(path=model_info.endpoint)
+    request.headers.update(get_headers(host=model_info.host))
+    return request
+def call_llm(
+    message: str,
+    history: list[dict],
+    model_name: str,
+    system_prompt: str,
+    max_tokens: int,
+    temperature: float,
+    top_p: float,
+):
+    history_openai_format = []
+    system_prompt_text = SYSTEM_PROMPTS[system_prompt]
+    if len(history) == 0:
+        init = {
+            "role": "system",
+            "content": system_prompt_text,
+        }
+        history_openai_format.append(init)
+        history_openai_format.append({"role": "user", "content": message})
+    else:
+        for human, assistant in history:
+            history_openai_format.append({"role": "user", "content": human})
+            history_openai_format.append({"role": "assistant", "content": assistant})
+        history_openai_format.append({"role": "user", "content": message})
+    model_info = MODELS[model_name]
+    client = OpenAI(
+        api_key="",
+        base_url=LLM_BASE_URL,
+        http_client=httpx.Client(
+            event_hooks={
+                "request": [partial(proxy, model_info=model_info)],
+            },
+            verify=False,
+        ),
+    )
+    stream = client.chat.completions.create(
+        model=f"/data/cyberagent/{model_info.name}",
+        messages=history_openai_format,
+        temperature=temperature,
+        top_p=top_p,
+        max_tokens=max_tokens,
+        n=1,
+        stream=True,
+        extra_body={"repetition_penalty": 1.1},
+    )
+    message = ""
+    for chunk in stream:
+        content = chunk.choices[0].delta.content or ""
+        message = message + content
+        yield message
+def run():
+    chatbot = gr.Chatbot(
+        elem_id="chatbot",
+        scale=1,
+        show_copy_button=True,
+        placeholder=PLACEHOLDER,
+        layout="panel",
+    )
+    with gr.Blocks(fill_height=True) as demo:
+        gr.Markdown(HEADER)
+        gr.ChatInterface(
+            fn=call_llm,
+            stop_btn="Stop Generation",
+            examples=EXAMPLES,
+            cache_examples=False,
+            multimodal=False,
+            chatbot=chatbot,
+            additional_inputs_accordion=gr.Accordion(
+                label="Parameters", open=False, render=False
+            ),
+            additional_inputs=[
+                gr.Dropdown(
+                    choices=list(MODELS.keys()),
+                    value=list(MODELS.keys())[0],
+                    label="Model",
+                    visible=False,
+                ),
+                gr.Dropdown(
+                    choices=list(SYSTEM_PROMPTS.keys()),
+                    value=list(SYSTEM_PROMPTS.keys())[0],
+                    label="System Prompt",
+                    visible=False,
+                ),
+                gr.Slider(
+                    minimum=32,
+                    maximum=4096,
+                    step=1,
+                    value=1024,
+                    label="Max tokens",
+                    render=False,
+                ),
+                gr.Slider(
+                    minimum=0,
+                    maximum=1,
+                    step=0.1,
+                    value=0.3,
+                    label="Temperature",
+                    render=False,
+                ),
+                gr.Slider(
+                    minimum=0,
+                    maximum=1,
+                    step=0.1,
+                    value=1.0,
+                    label="Top-p",
+                    render=False,
+                ),
+            ],
+            analytics_enabled=False,
+        )
+        gr.Markdown(FOOTER)
+    demo.queue(max_size=256, api_open=False)
+    demo.launch(share=False, quiet=True)
+if __name__ == "__main__":
+    run()

const.py ADDED Viewed

	@@ -0,0 +1,79 @@

+import os
+from pathlib import Path
+from dataclasses import dataclass
+SECRET_PREFIX = os.environ.get("SECRET_PREFIX")
+if SECRET_PREFIX is None:
+    raise ValueError("SECRET_PREFIX is not set")
+PROJECT_ID = os.environ.get("PROJECT_ID")
+ROLE_SUBJECT = os.environ.get("ROLE_SUBJECT")
+CREDENTIALS = os.environ.get("CREDENTIALS")
+os.environ[SECRET_PREFIX + "PROJECT_ID"] = PROJECT_ID
+os.environ[SECRET_PREFIX + "ROLE_SUBJECT"] = ROLE_SUBJECT
+os.environ[SECRET_PREFIX + "CREDENTIALS"] = CREDENTIALS
+ROOT_DIR = Path(__file__).parent.absolute()
+AUTH_CMD = os.environ.get("AUTH_CMD", "").split(" ")
+LLM_BASE_URL = os.environ.get("LLM_BASE_URL")
+LLM_ENDPOINT = os.environ.get("LLM_ENDPOINT")
+LLM_HOST = os.environ.get("LLM_HOST")
+@dataclass
+class ModelInfo:
+    name: str
+    endpoint: str
+    host: str
+_MODELS = [ModelInfo("calm3-22b-chat", LLM_ENDPOINT, LLM_HOST)]
+MODELS = {model.name: model for model in _MODELS}
+SYSTEM_PROMPTS = {
+    "assistant": "あなたは親切なAIアシスタントです。",
+}
+EXAMPLES = [
+    [
+        "サイバーエージェントってどんな会社？",
+    ],
+    [
+        "AIの進化で人類の暮らしはどうなると思いますか？",
+    ],
+    [
+        "大規模言語モデルの仕組みについて詳しく説明して。",
+    ],
+    [
+        "大規模言語モデルの仕組みについて、子供でもわかるように易しく説明して。",
+    ],
+]
+HEADER = """
+# CALM3-22B-Chat
+"""
+FOOTER = """
+## Terms of Use
+Please note that by using this service, you agree to the following terms: This model is provided for research purposes only. CyberAgent expressly disclaim any liability for direct, indirect, special, incidental, or consequential damages, as well as for any losses that may result from using this model, regardless of the outcomes. It is essential for users to fully understand these limitations before employing the model.
+## License
+The service is a research preview intended for non-commercial use only.
+"""
+PLACEHOLDER = """
+<div style="padding: 30px; text-align: center; display: flex; flex-direction: column; align-items: center;">
+   <img src="https://d23iyfk1a359di.cloudfront.net/files/topics/26317_ext_03_0.jpg" style="width: 80%; max-width: 550px; height: auto; opacity: 0.55;  ">
+   <h1 style="font-size: 28px; margin-bottom: 2px; opacity: 0.55;">CALM3-22B-Chat</h1>
+</div>
+"""
+CSS = """
+#chatbot {
+    height: auto !important;
+    max_height: none !important;
+    overflow: auto !important;
+    flex-grow: 1 !important;
+}
+"""

requirements.txt ADDED Viewed

	@@ -0,0 +1,3 @@

+gradio
+httpx
+openai