Spaces:
Running
Running
nam pham
commited on
Commit
•
4e5c5cb
1
Parent(s):
ef64aec
feat: first update
Browse files- .gitignore +3 -0
- Dockerfile +35 -0
- app/server.py +46 -0
- bin/start-api.sh +6 -0
- client.py +18 -0
- models/.gitkeep +0 -0
- poetry.lock +0 -0
- pyproject.toml +22 -0
- scripts/hf_model_download.py +62 -0
.gitignore
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
.env
|
2 |
+
models/**
|
3 |
+
!models/.gitkeep
|
Dockerfile
ADDED
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
ARG PYTHON_VERSION=3.12
|
3 |
+
FROM python:$PYTHON_VERSION-slim as builder
|
4 |
+
|
5 |
+
RUN pip install poetry==1.8.2
|
6 |
+
|
7 |
+
ENV POETRY_NO_INTERACTION=1 \
|
8 |
+
POETRY_VIRTUALENVS_IN_PROJECT=1 \
|
9 |
+
POETRY_VIRTUALENVS_CREATE=1 \
|
10 |
+
POETRY_CACHE_DIR=/tmp/poetry_cache
|
11 |
+
|
12 |
+
WORKDIR /app
|
13 |
+
|
14 |
+
COPY pyproject.toml poetry.lock ./
|
15 |
+
|
16 |
+
RUN poetry install --no-root && rm -rf $POETRY_CACHE_DIR
|
17 |
+
|
18 |
+
FROM python:$PYTHON_VERSION-slim as runtime
|
19 |
+
|
20 |
+
####### Add your own installation commands here #######
|
21 |
+
# RUN pip install some-package
|
22 |
+
# RUN wget https://path/to/some/data/or/weights
|
23 |
+
# RUN apt-get update && apt-get install -y <package-name>
|
24 |
+
|
25 |
+
|
26 |
+
ENV VIRTUAL_ENV=/app/.venv \
|
27 |
+
PATH="/app/.venv/bin:$PATH"
|
28 |
+
|
29 |
+
COPY --from=builder ${VIRTUAL_ENV} ${VIRTUAL_ENV}
|
30 |
+
|
31 |
+
COPY . /app
|
32 |
+
|
33 |
+
# Install litserve and requirements
|
34 |
+
EXPOSE 8000
|
35 |
+
CMD ["sh", "/app/bin/start-api.sh"]
|
app/server.py
ADDED
@@ -0,0 +1,46 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from sentence_transformers import SentenceTransformer
|
2 |
+
import litserve as ls
|
3 |
+
from huggingface_hub import login
|
4 |
+
import os
|
5 |
+
|
6 |
+
login(token=os.getenv("HF_TOKEN"))
|
7 |
+
|
8 |
+
DATA_PATH = os.getenv("DATA_PATH")
|
9 |
+
RETRIEVAL_MODEL_NAME = os.getenv("RETRIEVAL_MODEL_NAME")
|
10 |
+
SIMILARITY_MODEL_NAME = os.getenv("SIMILARITY_MODEL_NAME")
|
11 |
+
|
12 |
+
class EmbeddingModelAPI(ls.LitAPI):
|
13 |
+
def setup(self, device):
|
14 |
+
self.retrieval_model = SentenceTransformer(
|
15 |
+
os.path.join(DATA_PATH, RETRIEVAL_MODEL_NAME),
|
16 |
+
backend="onnx",
|
17 |
+
model_kwargs={"file_name": "onnx/model.onnx"},
|
18 |
+
trust_remote_code=True,
|
19 |
+
)
|
20 |
+
self.similarity_model = SentenceTransformer(
|
21 |
+
os.path.join(DATA_PATH, SIMILARITY_MODEL_NAME),
|
22 |
+
backend="onnx",
|
23 |
+
model_kwargs={"file_name": "onnx/model.onnx"},
|
24 |
+
trust_remote_code=True,
|
25 |
+
)
|
26 |
+
|
27 |
+
def decode_request(self, request, **kwargs):
|
28 |
+
sentences = request["sentences"]
|
29 |
+
type = request["type"]
|
30 |
+
return sentences, type
|
31 |
+
|
32 |
+
def predict(self, x, **kwargs):
|
33 |
+
chunks, type = x
|
34 |
+
if type == "default":
|
35 |
+
return self.retrieval_model.encode(chunks).tolist()
|
36 |
+
elif type == "similarity":
|
37 |
+
return self.similarity_model.encode(chunks).tolist()
|
38 |
+
|
39 |
+
def encode_response(self, output, **kwargs):
|
40 |
+
return {"data": output}
|
41 |
+
|
42 |
+
|
43 |
+
if __name__ == "__main__":
|
44 |
+
api = EmbeddingModelAPI()
|
45 |
+
server = ls.LitServer(api)
|
46 |
+
server.run(port=8000)
|
bin/start-api.sh
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!bin/sh
|
2 |
+
|
3 |
+
python /app/scripts/hf_model_download.py -i "BookingCare/gte-multilingual-base-v2.1-onnx-quantized" -m "snapshot"
|
4 |
+
python /app/scripts/hf_model_download.py -i "BookingCare/gte-multilingual-base-onnx-quantized" -m "snapshot"
|
5 |
+
|
6 |
+
python /app/app/server.py
|
client.py
ADDED
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
# Copyright The Lightning AI team.
|
3 |
+
#
|
4 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
5 |
+
# you may not use this file except in compliance with the License.
|
6 |
+
# You may obtain a copy of the License at
|
7 |
+
#
|
8 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
9 |
+
#
|
10 |
+
# Unless required by applicable law or agreed to in writing, software
|
11 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
12 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
13 |
+
# See the License for the specific language governing permissions and
|
14 |
+
# limitations under the License.
|
15 |
+
import requests
|
16 |
+
|
17 |
+
response = requests.post("http://127.0.0.1:8000/predict", json={"input": 4.0})
|
18 |
+
print(f"Status: {response.status_code}\nResponse:\n {response.text}")
|
models/.gitkeep
ADDED
File without changes
|
poetry.lock
ADDED
The diff for this file is too large to render.
See raw diff
|
|
pyproject.toml
ADDED
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[tool.poetry]
|
2 |
+
name = "embedding"
|
3 |
+
version = "0.1.0"
|
4 |
+
description = "embedding model serving"
|
5 |
+
authors = ["nam pham <contact.nampham.work@gmail.com>"]
|
6 |
+
readme = "README.md"
|
7 |
+
|
8 |
+
[tool.poetry.dependencies]
|
9 |
+
python = "^3.12"
|
10 |
+
sentence-transformers = {extras = ["onnx"], version = "^3.3.1"}
|
11 |
+
litserve = "^0.2.5"
|
12 |
+
torch = {version = "^2.5.1+cpu", source = "pytorch_cpu"}
|
13 |
+
|
14 |
+
|
15 |
+
[[tool.poetry.source]]
|
16 |
+
name = "pytorch_cpu"
|
17 |
+
url = "https://download.pytorch.org/whl/cpu"
|
18 |
+
priority = "explicit"
|
19 |
+
|
20 |
+
[build-system]
|
21 |
+
requires = ["poetry-core"]
|
22 |
+
build-backend = "poetry.core.masonry.api"
|
scripts/hf_model_download.py
ADDED
@@ -0,0 +1,62 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import argparse
|
3 |
+
from huggingface_hub import login, snapshot_download
|
4 |
+
from typing import Literal
|
5 |
+
from sentence_transformers import SentenceTransformer
|
6 |
+
from pydantic import BaseModel
|
7 |
+
|
8 |
+
DATA_PATH = os.getenv('DATA_PATH')
|
9 |
+
HUGGINGFACE_ACCESS_TOKEN = os.getenv('HF_TOKEN')
|
10 |
+
|
11 |
+
login(token=HUGGINGFACE_ACCESS_TOKEN)
|
12 |
+
|
13 |
+
class ModelConfig(BaseModel):
|
14 |
+
model_id : str
|
15 |
+
mode: Literal['snapshot', 'model'] = 'model'
|
16 |
+
|
17 |
+
class Config:
|
18 |
+
protected_namespaces = ()
|
19 |
+
|
20 |
+
def download(config: ModelConfig):
|
21 |
+
try:
|
22 |
+
|
23 |
+
if config.mode == 'snapshot':
|
24 |
+
snapshot_download(
|
25 |
+
config.model_id,
|
26 |
+
revision='main',
|
27 |
+
ignore_patterns=['*.git*', '*README.md'],
|
28 |
+
local_dir=os.path.join(DATA_PATH, config.model_id)
|
29 |
+
)
|
30 |
+
else:
|
31 |
+
model = SentenceTransformer(
|
32 |
+
config.model_id,
|
33 |
+
trust_remote_code=True,
|
34 |
+
)
|
35 |
+
model.save(os.path.join(DATA_PATH, config.model_id))
|
36 |
+
except Exception as e:
|
37 |
+
raise e
|
38 |
+
|
39 |
+
def run():
|
40 |
+
parser = argparse.ArgumentParser()
|
41 |
+
parser.add_argument(
|
42 |
+
'-i', '--input',
|
43 |
+
help='model id to download',
|
44 |
+
required=True,
|
45 |
+
)
|
46 |
+
parser.add_argument(
|
47 |
+
'-m', '--mode',
|
48 |
+
help='mode to download',
|
49 |
+
default='model',
|
50 |
+
)
|
51 |
+
|
52 |
+
args = parser.parse_args()
|
53 |
+
config = ModelConfig(
|
54 |
+
model_id=args.input,
|
55 |
+
mode=args.mode
|
56 |
+
)
|
57 |
+
|
58 |
+
|
59 |
+
download(config)
|
60 |
+
|
61 |
+
if __name__ == '__main__':
|
62 |
+
run()
|