Spaces:

nampham1106
/

embedding

Sleeping

App Files Files Community

nam pham commited on Dec 23, 2024

Commit

4e5c5cb

1 Parent(s): ef64aec

feat: first update

Browse files

Files changed (9) hide show

.gitignore +3 -0
Dockerfile +35 -0
app/server.py +46 -0
bin/start-api.sh +6 -0
client.py +18 -0
models/.gitkeep +0 -0
poetry.lock +0 -0
pyproject.toml +22 -0
scripts/hf_model_download.py +62 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,3 @@

+.env
+models/**
+!models/.gitkeep

Dockerfile ADDED Viewed

	@@ -0,0 +1,35 @@

+ARG PYTHON_VERSION=3.12
+FROM python:$PYTHON_VERSION-slim as builder
+RUN pip install poetry==1.8.2
+ENV POETRY_NO_INTERACTION=1 \
+    POETRY_VIRTUALENVS_IN_PROJECT=1 \
+    POETRY_VIRTUALENVS_CREATE=1 \
+    POETRY_CACHE_DIR=/tmp/poetry_cache
+WORKDIR /app
+COPY pyproject.toml poetry.lock ./
+RUN poetry install --no-root && rm -rf $POETRY_CACHE_DIR
+FROM python:$PYTHON_VERSION-slim as runtime
+####### Add your own installation commands here #######
+# RUN pip install some-package
+# RUN wget https://path/to/some/data/or/weights
+# RUN apt-get update && apt-get install -y <package-name>
+ENV VIRTUAL_ENV=/app/.venv \
+    PATH="/app/.venv/bin:$PATH"
+COPY --from=builder ${VIRTUAL_ENV} ${VIRTUAL_ENV}
+COPY . /app
+# Install litserve and requirements
+EXPOSE 8000
+CMD ["sh", "/app/bin/start-api.sh"]

app/server.py ADDED Viewed

	@@ -0,0 +1,46 @@

+from sentence_transformers import SentenceTransformer
+import litserve as ls
+from huggingface_hub import login
+import os
+login(token=os.getenv("HF_TOKEN"))
+DATA_PATH = os.getenv("DATA_PATH")
+RETRIEVAL_MODEL_NAME = os.getenv("RETRIEVAL_MODEL_NAME")
+SIMILARITY_MODEL_NAME = os.getenv("SIMILARITY_MODEL_NAME")
+class EmbeddingModelAPI(ls.LitAPI):
+    def setup(self, device):
+        self.retrieval_model = SentenceTransformer(
+            os.path.join(DATA_PATH, RETRIEVAL_MODEL_NAME),
+            backend="onnx",
+            model_kwargs={"file_name": "onnx/model.onnx"},
+            trust_remote_code=True,
+        )
+        self.similarity_model = SentenceTransformer(
+            os.path.join(DATA_PATH, SIMILARITY_MODEL_NAME),
+            backend="onnx",
+            model_kwargs={"file_name": "onnx/model.onnx"},
+            trust_remote_code=True,
+        )
+    def decode_request(self, request, **kwargs):
+        sentences = request["sentences"]
+        type = request["type"]
+        return sentences, type
+    def predict(self, x, **kwargs):
+        chunks, type = x
+        if type == "default":
+            return self.retrieval_model.encode(chunks).tolist()
+        elif type == "similarity":
+            return self.similarity_model.encode(chunks).tolist()
+    def encode_response(self, output, **kwargs):
+        return {"data": output}
+if __name__ == "__main__":
+    api = EmbeddingModelAPI()
+    server = ls.LitServer(api)
+    server.run(port=8000)

bin/start-api.sh ADDED Viewed

	@@ -0,0 +1,6 @@

+#!bin/sh
+python /app/scripts/hf_model_download.py -i "BookingCare/gte-multilingual-base-v2.1-onnx-quantized" -m "snapshot"
+python /app/scripts/hf_model_download.py -i "BookingCare/gte-multilingual-base-onnx-quantized" -m "snapshot"
+python /app/app/server.py

client.py ADDED Viewed

	@@ -0,0 +1,18 @@

+# Copyright The Lightning AI team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import requests
+response = requests.post("http://127.0.0.1:8000/predict", json={"input": 4.0})
+print(f"Status: {response.status_code}\nResponse:\n {response.text}")

models/.gitkeep ADDED Viewed

File without changes

poetry.lock ADDED Viewed

The diff for this file is too large to render. See raw diff

pyproject.toml ADDED Viewed

	@@ -0,0 +1,22 @@

+[tool.poetry]
+name = "embedding"
+version = "0.1.0"
+description = "embedding model serving"
+authors = ["nam pham <contact.nampham.work@gmail.com>"]
+readme = "README.md"
+[tool.poetry.dependencies]
+python = "^3.12"
+sentence-transformers = {extras = ["onnx"], version = "^3.3.1"}
+litserve = "^0.2.5"
+torch = {version = "^2.5.1+cpu", source = "pytorch_cpu"}
+[[tool.poetry.source]]
+name = "pytorch_cpu"
+url = "https://download.pytorch.org/whl/cpu"
+priority = "explicit"
+[build-system]
+requires = ["poetry-core"]
+build-backend = "poetry.core.masonry.api"

scripts/hf_model_download.py ADDED Viewed

	@@ -0,0 +1,62 @@

+import os
+import argparse
+from huggingface_hub import login, snapshot_download
+from typing import Literal
+from sentence_transformers import SentenceTransformer
+from pydantic import BaseModel
+DATA_PATH = os.getenv('DATA_PATH')
+HUGGINGFACE_ACCESS_TOKEN = os.getenv('HF_TOKEN')
+login(token=HUGGINGFACE_ACCESS_TOKEN)
+class ModelConfig(BaseModel):
+    model_id : str
+    mode: Literal['snapshot', 'model'] = 'model'
+    class Config:
+        protected_namespaces = ()
+def download(config: ModelConfig):
+    try:
+        if config.mode == 'snapshot':
+            snapshot_download(
+                config.model_id,
+                revision='main',
+                ignore_patterns=['*.git*', '*README.md'],
+                local_dir=os.path.join(DATA_PATH, config.model_id)
+            )
+        else:
+            model = SentenceTransformer(
+                config.model_id,
+                trust_remote_code=True,
+            )
+            model.save(os.path.join(DATA_PATH, config.model_id))
+    except Exception as e:
+        raise e
+def run():
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        '-i', '--input',
+        help='model id to download',
+        required=True,
+    )
+    parser.add_argument(
+        '-m', '--mode',
+        help='mode to download',
+        default='model',
+    )
+    args = parser.parse_args()
+    config = ModelConfig(
+        model_id=args.input,
+        mode=args.mode
+    )
+    download(config)
+if __name__ == '__main__':
+    run()