nam pham commited on
Commit
4e5c5cb
1 Parent(s): ef64aec

feat: first update

Browse files
.gitignore ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ .env
2
+ models/**
3
+ !models/.gitkeep
Dockerfile ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ ARG PYTHON_VERSION=3.12
3
+ FROM python:$PYTHON_VERSION-slim as builder
4
+
5
+ RUN pip install poetry==1.8.2
6
+
7
+ ENV POETRY_NO_INTERACTION=1 \
8
+ POETRY_VIRTUALENVS_IN_PROJECT=1 \
9
+ POETRY_VIRTUALENVS_CREATE=1 \
10
+ POETRY_CACHE_DIR=/tmp/poetry_cache
11
+
12
+ WORKDIR /app
13
+
14
+ COPY pyproject.toml poetry.lock ./
15
+
16
+ RUN poetry install --no-root && rm -rf $POETRY_CACHE_DIR
17
+
18
+ FROM python:$PYTHON_VERSION-slim as runtime
19
+
20
+ ####### Add your own installation commands here #######
21
+ # RUN pip install some-package
22
+ # RUN wget https://path/to/some/data/or/weights
23
+ # RUN apt-get update && apt-get install -y <package-name>
24
+
25
+
26
+ ENV VIRTUAL_ENV=/app/.venv \
27
+ PATH="/app/.venv/bin:$PATH"
28
+
29
+ COPY --from=builder ${VIRTUAL_ENV} ${VIRTUAL_ENV}
30
+
31
+ COPY . /app
32
+
33
+ # Install litserve and requirements
34
+ EXPOSE 8000
35
+ CMD ["sh", "/app/bin/start-api.sh"]
app/server.py ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from sentence_transformers import SentenceTransformer
2
+ import litserve as ls
3
+ from huggingface_hub import login
4
+ import os
5
+
6
+ login(token=os.getenv("HF_TOKEN"))
7
+
8
+ DATA_PATH = os.getenv("DATA_PATH")
9
+ RETRIEVAL_MODEL_NAME = os.getenv("RETRIEVAL_MODEL_NAME")
10
+ SIMILARITY_MODEL_NAME = os.getenv("SIMILARITY_MODEL_NAME")
11
+
12
+ class EmbeddingModelAPI(ls.LitAPI):
13
+ def setup(self, device):
14
+ self.retrieval_model = SentenceTransformer(
15
+ os.path.join(DATA_PATH, RETRIEVAL_MODEL_NAME),
16
+ backend="onnx",
17
+ model_kwargs={"file_name": "onnx/model.onnx"},
18
+ trust_remote_code=True,
19
+ )
20
+ self.similarity_model = SentenceTransformer(
21
+ os.path.join(DATA_PATH, SIMILARITY_MODEL_NAME),
22
+ backend="onnx",
23
+ model_kwargs={"file_name": "onnx/model.onnx"},
24
+ trust_remote_code=True,
25
+ )
26
+
27
+ def decode_request(self, request, **kwargs):
28
+ sentences = request["sentences"]
29
+ type = request["type"]
30
+ return sentences, type
31
+
32
+ def predict(self, x, **kwargs):
33
+ chunks, type = x
34
+ if type == "default":
35
+ return self.retrieval_model.encode(chunks).tolist()
36
+ elif type == "similarity":
37
+ return self.similarity_model.encode(chunks).tolist()
38
+
39
+ def encode_response(self, output, **kwargs):
40
+ return {"data": output}
41
+
42
+
43
+ if __name__ == "__main__":
44
+ api = EmbeddingModelAPI()
45
+ server = ls.LitServer(api)
46
+ server.run(port=8000)
bin/start-api.sh ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ #!bin/sh
2
+
3
+ python /app/scripts/hf_model_download.py -i "BookingCare/gte-multilingual-base-v2.1-onnx-quantized" -m "snapshot"
4
+ python /app/scripts/hf_model_download.py -i "BookingCare/gte-multilingual-base-onnx-quantized" -m "snapshot"
5
+
6
+ python /app/app/server.py
client.py ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ # Copyright The Lightning AI team.
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ import requests
16
+
17
+ response = requests.post("http://127.0.0.1:8000/predict", json={"input": 4.0})
18
+ print(f"Status: {response.status_code}\nResponse:\n {response.text}")
models/.gitkeep ADDED
File without changes
poetry.lock ADDED
The diff for this file is too large to render. See raw diff
 
pyproject.toml ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [tool.poetry]
2
+ name = "embedding"
3
+ version = "0.1.0"
4
+ description = "embedding model serving"
5
+ authors = ["nam pham <contact.nampham.work@gmail.com>"]
6
+ readme = "README.md"
7
+
8
+ [tool.poetry.dependencies]
9
+ python = "^3.12"
10
+ sentence-transformers = {extras = ["onnx"], version = "^3.3.1"}
11
+ litserve = "^0.2.5"
12
+ torch = {version = "^2.5.1+cpu", source = "pytorch_cpu"}
13
+
14
+
15
+ [[tool.poetry.source]]
16
+ name = "pytorch_cpu"
17
+ url = "https://download.pytorch.org/whl/cpu"
18
+ priority = "explicit"
19
+
20
+ [build-system]
21
+ requires = ["poetry-core"]
22
+ build-backend = "poetry.core.masonry.api"
scripts/hf_model_download.py ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import argparse
3
+ from huggingface_hub import login, snapshot_download
4
+ from typing import Literal
5
+ from sentence_transformers import SentenceTransformer
6
+ from pydantic import BaseModel
7
+
8
+ DATA_PATH = os.getenv('DATA_PATH')
9
+ HUGGINGFACE_ACCESS_TOKEN = os.getenv('HF_TOKEN')
10
+
11
+ login(token=HUGGINGFACE_ACCESS_TOKEN)
12
+
13
+ class ModelConfig(BaseModel):
14
+ model_id : str
15
+ mode: Literal['snapshot', 'model'] = 'model'
16
+
17
+ class Config:
18
+ protected_namespaces = ()
19
+
20
+ def download(config: ModelConfig):
21
+ try:
22
+
23
+ if config.mode == 'snapshot':
24
+ snapshot_download(
25
+ config.model_id,
26
+ revision='main',
27
+ ignore_patterns=['*.git*', '*README.md'],
28
+ local_dir=os.path.join(DATA_PATH, config.model_id)
29
+ )
30
+ else:
31
+ model = SentenceTransformer(
32
+ config.model_id,
33
+ trust_remote_code=True,
34
+ )
35
+ model.save(os.path.join(DATA_PATH, config.model_id))
36
+ except Exception as e:
37
+ raise e
38
+
39
+ def run():
40
+ parser = argparse.ArgumentParser()
41
+ parser.add_argument(
42
+ '-i', '--input',
43
+ help='model id to download',
44
+ required=True,
45
+ )
46
+ parser.add_argument(
47
+ '-m', '--mode',
48
+ help='mode to download',
49
+ default='model',
50
+ )
51
+
52
+ args = parser.parse_args()
53
+ config = ModelConfig(
54
+ model_id=args.input,
55
+ mode=args.mode
56
+ )
57
+
58
+
59
+ download(config)
60
+
61
+ if __name__ == '__main__':
62
+ run()