tuxedocat commited on
Commit
a7566b2
0 Parent(s):
Files changed (8) hide show
  1. .dockerignore +6 -0
  2. .gitattributes +35 -0
  3. .gitignore +1 -0
  4. Dockerfile +28 -0
  5. README.md +17 -0
  6. app.py +175 -0
  7. const.py +79 -0
  8. requirements.txt +3 -0
.dockerignore ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ .env
2
+ .env.*
3
+ *.json
4
+ .venv/
5
+ __pycache__/
6
+ *.pyc
.gitattributes ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ .venv/
Dockerfile ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM google/cloud-sdk:slim AS retriever
2
+ # Get service account key from HF Spaces' secrets
3
+ # https://huggingface.co/docs/hub/spaces-sdks-docker#buildtime
4
+ RUN --mount=type=secret,id=BUILD_CREDENTIALS,mode=0444,required=true \
5
+ --mount=type=secret,id=BUILD_ASSET_BUCKET,mode=0444,required=true \
6
+ --mount=type=secret,id=BUILD_ASSET_NAME,mode=0444,required=true \
7
+ cat /run/secrets/BUILD_CREDENTIALS > /tmp/creds.json && \
8
+ /bin/gcloud auth activate-service-account --key-file=/tmp/creds.json > /dev/null 2>&1 && \
9
+ GOOGLE_APPLICATION_CREDENTIALS=/tmp/creds.json /bin/gcloud storage cp gs://$(cat /run/secrets/BUILD_ASSET_BUCKET)/$(cat /run/secrets/BUILD_ASSET_NAME) /tmp/ > /dev/null 2>&1 && \
10
+ rm /tmp/creds.json
11
+
12
+ FROM python:3.11-slim AS gradio
13
+ RUN useradd -m -u 1000 app
14
+ USER app
15
+ ENV HOME=/home/app \
16
+ PATH=/home/app/.local/bin:$PATH
17
+ WORKDIR ${HOME}
18
+ COPY --from=retriever /tmp/*_linux_amd64.tar.gz ${HOME}/
19
+ RUN tar -xf *_linux_amd64.tar.gz && rm *_linux_amd64.tar.gz
20
+
21
+ COPY . .
22
+ RUN pip install --no-cache-dir -r requirements.txt
23
+ RUN --mount=type=secret,id=LLM_CREDENTIALS,mode=0444,required=true \
24
+ cat /run/secrets/LLM_CREDENTIALS > ${HOME}/credentials.json
25
+ EXPOSE 7860
26
+
27
+ ENV GRADIO_SERVER_NAME="0.0.0.0"
28
+ CMD ["python", "app.py"]
README.md ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: CALM3-22B-Chat-Demo
3
+ emoji: 😌
4
+ colorFrom: green
5
+ colorTo: yellow
6
+ sdk: docker
7
+ app_port: 7860
8
+ pinned: false
9
+ ---
10
+
11
+ # Model
12
+
13
+ See [CyberAgentLM3-22B-Chat](https://huggingface.co/cyberagent/calm3-22b-chat) for details and licensing information to use the model.
14
+
15
+ # Terms of Use
16
+
17
+ See the App tab.
app.py ADDED
@@ -0,0 +1,175 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from functools import partial
2
+ import gradio as gr
3
+ import httpx
4
+ import subprocess
5
+ import os
6
+ from openai import OpenAI
7
+
8
+ from const import (
9
+ LLM_BASE_URL,
10
+ AUTH_CMD,
11
+ SYSTEM_PROMPTS,
12
+ EXAMPLES,
13
+ CSS,
14
+ HEADER,
15
+ FOOTER,
16
+ PLACEHOLDER,
17
+ ModelInfo,
18
+ MODELS,
19
+ )
20
+
21
+
22
+ def get_token() -> str:
23
+ try:
24
+ t = (
25
+ subprocess.run(
26
+ AUTH_CMD,
27
+ stdout=subprocess.PIPE,
28
+ stderr=subprocess.DEVNULL,
29
+ env=os.environ.copy(),
30
+ )
31
+ .stdout.decode("utf-8")
32
+ .strip()
33
+ )
34
+ assert t, "Failed to get auth token"
35
+ return t
36
+ except Exception:
37
+ raise ValueError("Failed to get auth token")
38
+
39
+
40
+ def get_headers(host: str) -> dict:
41
+ return {
42
+ "Authorization": f"Bearer {get_token()}",
43
+ "Host": host,
44
+ "Accept": "application/json",
45
+ "Content-Type": "application/json",
46
+ }
47
+
48
+
49
+ def proxy(request: httpx.Request, model_info: ModelInfo) -> httpx.Request:
50
+ request.url = request.url.copy_with(path=model_info.endpoint)
51
+ request.headers.update(get_headers(host=model_info.host))
52
+ return request
53
+
54
+
55
+ def call_llm(
56
+ message: str,
57
+ history: list[dict],
58
+ model_name: str,
59
+ system_prompt: str,
60
+ max_tokens: int,
61
+ temperature: float,
62
+ top_p: float,
63
+ ):
64
+ history_openai_format = []
65
+ system_prompt_text = SYSTEM_PROMPTS[system_prompt]
66
+ if len(history) == 0:
67
+ init = {
68
+ "role": "system",
69
+ "content": system_prompt_text,
70
+ }
71
+ history_openai_format.append(init)
72
+ history_openai_format.append({"role": "user", "content": message})
73
+ else:
74
+ for human, assistant in history:
75
+ history_openai_format.append({"role": "user", "content": human})
76
+ history_openai_format.append({"role": "assistant", "content": assistant})
77
+ history_openai_format.append({"role": "user", "content": message})
78
+
79
+ model_info = MODELS[model_name]
80
+ client = OpenAI(
81
+ api_key="",
82
+ base_url=LLM_BASE_URL,
83
+ http_client=httpx.Client(
84
+ event_hooks={
85
+ "request": [partial(proxy, model_info=model_info)],
86
+ },
87
+ verify=False,
88
+ ),
89
+ )
90
+
91
+ stream = client.chat.completions.create(
92
+ model=f"/data/cyberagent/{model_info.name}",
93
+ messages=history_openai_format,
94
+ temperature=temperature,
95
+ top_p=top_p,
96
+ max_tokens=max_tokens,
97
+ n=1,
98
+ stream=True,
99
+ extra_body={"repetition_penalty": 1.1},
100
+ )
101
+
102
+ message = ""
103
+ for chunk in stream:
104
+ content = chunk.choices[0].delta.content or ""
105
+ message = message + content
106
+ yield message
107
+
108
+
109
+ def run():
110
+ chatbot = gr.Chatbot(
111
+ elem_id="chatbot",
112
+ scale=1,
113
+ show_copy_button=True,
114
+ placeholder=PLACEHOLDER,
115
+ layout="panel",
116
+ )
117
+ with gr.Blocks(fill_height=True) as demo:
118
+ gr.Markdown(HEADER)
119
+ gr.ChatInterface(
120
+ fn=call_llm,
121
+ stop_btn="Stop Generation",
122
+ examples=EXAMPLES,
123
+ cache_examples=False,
124
+ multimodal=False,
125
+ chatbot=chatbot,
126
+ additional_inputs_accordion=gr.Accordion(
127
+ label="Parameters", open=False, render=False
128
+ ),
129
+ additional_inputs=[
130
+ gr.Dropdown(
131
+ choices=list(MODELS.keys()),
132
+ value=list(MODELS.keys())[0],
133
+ label="Model",
134
+ visible=False,
135
+ ),
136
+ gr.Dropdown(
137
+ choices=list(SYSTEM_PROMPTS.keys()),
138
+ value=list(SYSTEM_PROMPTS.keys())[0],
139
+ label="System Prompt",
140
+ visible=False,
141
+ ),
142
+ gr.Slider(
143
+ minimum=32,
144
+ maximum=4096,
145
+ step=1,
146
+ value=1024,
147
+ label="Max tokens",
148
+ render=False,
149
+ ),
150
+ gr.Slider(
151
+ minimum=0,
152
+ maximum=1,
153
+ step=0.1,
154
+ value=0.3,
155
+ label="Temperature",
156
+ render=False,
157
+ ),
158
+ gr.Slider(
159
+ minimum=0,
160
+ maximum=1,
161
+ step=0.1,
162
+ value=1.0,
163
+ label="Top-p",
164
+ render=False,
165
+ ),
166
+ ],
167
+ analytics_enabled=False,
168
+ )
169
+ gr.Markdown(FOOTER)
170
+ demo.queue(max_size=256, api_open=False)
171
+ demo.launch(share=False, quiet=True)
172
+
173
+
174
+ if __name__ == "__main__":
175
+ run()
const.py ADDED
@@ -0,0 +1,79 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from pathlib import Path
3
+ from dataclasses import dataclass
4
+
5
+ SECRET_PREFIX = os.environ.get("SECRET_PREFIX")
6
+ if SECRET_PREFIX is None:
7
+ raise ValueError("SECRET_PREFIX is not set")
8
+
9
+ PROJECT_ID = os.environ.get("PROJECT_ID")
10
+ ROLE_SUBJECT = os.environ.get("ROLE_SUBJECT")
11
+ CREDENTIALS = os.environ.get("CREDENTIALS")
12
+ os.environ[SECRET_PREFIX + "PROJECT_ID"] = PROJECT_ID
13
+ os.environ[SECRET_PREFIX + "ROLE_SUBJECT"] = ROLE_SUBJECT
14
+ os.environ[SECRET_PREFIX + "CREDENTIALS"] = CREDENTIALS
15
+
16
+ ROOT_DIR = Path(__file__).parent.absolute()
17
+ AUTH_CMD = os.environ.get("AUTH_CMD", "").split(" ")
18
+
19
+ LLM_BASE_URL = os.environ.get("LLM_BASE_URL")
20
+ LLM_ENDPOINT = os.environ.get("LLM_ENDPOINT")
21
+ LLM_HOST = os.environ.get("LLM_HOST")
22
+
23
+
24
+ @dataclass
25
+ class ModelInfo:
26
+ name: str
27
+ endpoint: str
28
+ host: str
29
+
30
+
31
+ _MODELS = [ModelInfo("calm3-22b-chat", LLM_ENDPOINT, LLM_HOST)]
32
+ MODELS = {model.name: model for model in _MODELS}
33
+
34
+ SYSTEM_PROMPTS = {
35
+ "assistant": "あなたは親切なAIアシスタントです。",
36
+ }
37
+
38
+ EXAMPLES = [
39
+ [
40
+ "サイバーエージェントってどんな会社?",
41
+ ],
42
+ [
43
+ "AIの進化で人類の暮らしはどうなると思いますか?",
44
+ ],
45
+ [
46
+ "大規模言語モデルの仕組みについて詳しく説明して。",
47
+ ],
48
+ [
49
+ "大規模言語モデルの仕組みについて、子供でもわかるように易しく説明して。",
50
+ ],
51
+ ]
52
+
53
+ HEADER = """
54
+ # CALM3-22B-Chat
55
+ """
56
+
57
+ FOOTER = """
58
+ ## Terms of Use
59
+ Please note that by using this service, you agree to the following terms: This model is provided for research purposes only. CyberAgent expressly disclaim any liability for direct, indirect, special, incidental, or consequential damages, as well as for any losses that may result from using this model, regardless of the outcomes. It is essential for users to fully understand these limitations before employing the model.
60
+
61
+ ## License
62
+ The service is a research preview intended for non-commercial use only.
63
+ """
64
+
65
+ PLACEHOLDER = """
66
+ <div style="padding: 30px; text-align: center; display: flex; flex-direction: column; align-items: center;">
67
+ <img src="https://d23iyfk1a359di.cloudfront.net/files/topics/26317_ext_03_0.jpg" style="width: 80%; max-width: 550px; height: auto; opacity: 0.55; ">
68
+ <h1 style="font-size: 28px; margin-bottom: 2px; opacity: 0.55;">CALM3-22B-Chat</h1>
69
+ </div>
70
+ """
71
+
72
+ CSS = """
73
+ #chatbot {
74
+ height: auto !important;
75
+ max_height: none !important;
76
+ overflow: auto !important;
77
+ flex-grow: 1 !important;
78
+ }
79
+ """
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ gradio
2
+ httpx
3
+ openai