Upload 17 files
Browse files- docker/mmclassification/Dockerfile +29 -0
- docker/mmclassification/app/__init__.py +0 -0
- docker/mmclassification/app/main.py +91 -0
- docker/mmclassification/app/pipelines/__init__.py +3 -0
- docker/mmclassification/app/pipelines/image_classification.py +51 -0
- docker/mmclassification/prestart.sh +1 -0
- docker/mmclassification/requirements.txt +5 -0
- docker/mmclassification/tests/__init__.py +0 -0
- docker/mmclassification/tests/samples/malformed.flac +0 -0
- docker/mmclassification/tests/samples/plane.jpg +0 -0
- docker/mmclassification/tests/samples/plane2.jpg +0 -0
- docker/mmclassification/tests/samples/sample1.flac +0 -0
- docker/mmclassification/tests/samples/sample1.webm +0 -0
- docker/mmclassification/tests/samples/sample1_dual.ogg +0 -0
- docker/mmclassification/tests/test_api.py +59 -0
- docker/mmclassification/tests/test_api_image_classification.py +81 -0
- docker/mmclassification/tests/test_docker_build.py +23 -0
docker/mmclassification/Dockerfile
ADDED
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
FROM tiangolo/uvicorn-gunicorn:python3.8
|
2 |
+
LABEL maintainer="me <me@example.com>"
|
3 |
+
|
4 |
+
# Add any system dependency here
|
5 |
+
# RUN apt-get update -y && apt-get install libXXX -y
|
6 |
+
|
7 |
+
COPY ./requirements.txt /app
|
8 |
+
RUN pip install --no-cache-dir -r requirements.txt
|
9 |
+
COPY ./prestart.sh /app/
|
10 |
+
|
11 |
+
|
12 |
+
# Most DL models are quite large in terms of memory, using workers is a HUGE
|
13 |
+
# slowdown because of the fork and GIL with python.
|
14 |
+
# Using multiple pods seems like a better default strategy.
|
15 |
+
# Feel free to override if it does not make sense for your library.
|
16 |
+
ARG max_workers=1
|
17 |
+
ENV MAX_WORKERS=$max_workers
|
18 |
+
ENV HUGGINGFACE_HUB_CACHE=/data
|
19 |
+
|
20 |
+
# Necessary on GPU environment docker.
|
21 |
+
# TIMEOUT env variable is used by nvcr.io/nvidia/pytorch:xx for another purpose
|
22 |
+
# rendering TIMEOUT defined by uvicorn impossible to use correctly
|
23 |
+
# We're overriding it to be renamed UVICORN_TIMEOUT
|
24 |
+
# UVICORN_TIMEOUT is a useful variable for very large models that take more
|
25 |
+
# than 30s (the default) to load in memory.
|
26 |
+
# If UVICORN_TIMEOUT is too low, uvicorn will simply never loads as it will
|
27 |
+
# kill workers all the time before they finish.
|
28 |
+
RUN sed -i 's/TIMEOUT/UVICORN_TIMEOUT/g' /gunicorn_conf.py
|
29 |
+
COPY ./app /app/app
|
docker/mmclassification/app/__init__.py
ADDED
File without changes
|
docker/mmclassification/app/main.py
ADDED
@@ -0,0 +1,91 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import functools
|
2 |
+
import logging
|
3 |
+
import os
|
4 |
+
from typing import Dict, Type
|
5 |
+
|
6 |
+
from api_inference_community.routes import pipeline_route, status_ok
|
7 |
+
from app.pipelines import Pipeline
|
8 |
+
from starlette.applications import Starlette
|
9 |
+
from starlette.middleware import Middleware
|
10 |
+
from starlette.middleware.gzip import GZipMiddleware
|
11 |
+
from starlette.routing import Route
|
12 |
+
|
13 |
+
|
14 |
+
TASK = os.getenv("TASK")
|
15 |
+
MODEL_ID = os.getenv("MODEL_ID")
|
16 |
+
|
17 |
+
|
18 |
+
logger = logging.getLogger(__name__)
|
19 |
+
|
20 |
+
|
21 |
+
# Add the allowed tasks
|
22 |
+
# Supported tasks are:
|
23 |
+
# - text-generation
|
24 |
+
# - text-classification
|
25 |
+
# - token-classification
|
26 |
+
# - translation
|
27 |
+
# - summarization
|
28 |
+
# - automatic-speech-recognition
|
29 |
+
# - ...
|
30 |
+
# For instance
|
31 |
+
# from app.pipelines import AutomaticSpeechRecognitionPipeline
|
32 |
+
# ALLOWED_TASKS = {"automatic-speech-recognition": AutomaticSpeechRecognitionPipeline}
|
33 |
+
# You can check the requirements and expectations of each pipelines in their respective
|
34 |
+
# directories. Implement directly within the directories.
|
35 |
+
ALLOWED_TASKS: Dict[str, Type[Pipeline]] = {
|
36 |
+
"image-classification": ImageClassificationPipeline
|
37 |
+
}
|
38 |
+
|
39 |
+
|
40 |
+
@functools.lru_cache()
|
41 |
+
def get_pipeline() -> Pipeline:
|
42 |
+
task = os.environ["TASK"]
|
43 |
+
model_id = os.environ["MODEL_ID"]
|
44 |
+
if task not in ALLOWED_TASKS:
|
45 |
+
raise EnvironmentError(f"{task} is not a valid pipeline for model : {model_id}")
|
46 |
+
return ALLOWED_TASKS[task](model_id)
|
47 |
+
|
48 |
+
|
49 |
+
routes = [
|
50 |
+
Route("/{whatever:path}", status_ok),
|
51 |
+
Route("/{whatever:path}", pipeline_route, methods=["POST"]),
|
52 |
+
]
|
53 |
+
|
54 |
+
middleware = [Middleware(GZipMiddleware, minimum_size=1000)]
|
55 |
+
if os.environ.get("DEBUG", "") == "1":
|
56 |
+
from starlette.middleware.cors import CORSMiddleware
|
57 |
+
|
58 |
+
middleware.append(
|
59 |
+
Middleware(
|
60 |
+
CORSMiddleware,
|
61 |
+
allow_origins=["*"],
|
62 |
+
allow_headers=["*"],
|
63 |
+
allow_methods=["*"],
|
64 |
+
)
|
65 |
+
)
|
66 |
+
|
67 |
+
app = Starlette(routes=routes, middleware=middleware)
|
68 |
+
|
69 |
+
|
70 |
+
@app.on_event("startup")
|
71 |
+
async def startup_event():
|
72 |
+
logger = logging.getLogger("uvicorn.access")
|
73 |
+
handler = logging.StreamHandler()
|
74 |
+
handler.setFormatter(logging.Formatter("%(asctime)s - %(levelname)s - %(message)s"))
|
75 |
+
logger.handlers = [handler]
|
76 |
+
|
77 |
+
# Link between `api-inference-community` and framework code.
|
78 |
+
app.get_pipeline = get_pipeline
|
79 |
+
try:
|
80 |
+
get_pipeline()
|
81 |
+
except Exception:
|
82 |
+
# We can fail so we can show exception later.
|
83 |
+
pass
|
84 |
+
|
85 |
+
|
86 |
+
if __name__ == "__main__":
|
87 |
+
try:
|
88 |
+
get_pipeline()
|
89 |
+
except Exception:
|
90 |
+
# We can fail so we can show exception later.
|
91 |
+
pass
|
docker/mmclassification/app/pipelines/__init__.py
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
from app.pipelines.base import Pipeline, PipelineException # isort:skip
|
2 |
+
|
3 |
+
from app.pipelines.image_classification import ImageClassificationPipeline
|
docker/mmclassification/app/pipelines/image_classification.py
ADDED
@@ -0,0 +1,51 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from typing import Any, Dict, List
|
2 |
+
|
3 |
+
import torch
|
4 |
+
from huggingface_hub import hf_hub_download
|
5 |
+
from app.pipelines import Pipeline
|
6 |
+
from PIL import Image
|
7 |
+
from mmcv.parallel import collate, scatter
|
8 |
+
from mmcv.runner import load_checkpoint
|
9 |
+
|
10 |
+
from mmcls.datasets.pipelines import Compose
|
11 |
+
from mmcls.apis import init_model
|
12 |
+
|
13 |
+
CONFIG_FILENAME = "config.py"
|
14 |
+
CHECKPOINT_FILENAME = "model.pth"
|
15 |
+
|
16 |
+
def inference_model_hf(model, img):
|
17 |
+
cfg = model.cfg
|
18 |
+
device = next(model.parameters()).device # model device
|
19 |
+
# build the data pipeline
|
20 |
+
if isinstance(img, str):
|
21 |
+
if cfg.data.test.pipeline[0]['type'] != 'LoadImageFromFile':
|
22 |
+
cfg.data.test.pipeline.insert(0, dict(type='LoadImageFromFile'))
|
23 |
+
data = dict(img_info=dict(filename=img), img_prefix=None)
|
24 |
+
else:
|
25 |
+
if cfg.data.test.pipeline[0]['type'] == 'LoadImageFromFile':
|
26 |
+
cfg.data.test.pipeline.pop(0)
|
27 |
+
data = dict(img=img)
|
28 |
+
test_pipeline = Compose(cfg.data.test.pipeline)
|
29 |
+
data = test_pipeline(data)
|
30 |
+
data = collate([data], samples_per_gpu=1)
|
31 |
+
if next(model.parameters()).is_cuda:
|
32 |
+
# scatter to specified GPU
|
33 |
+
data = scatter(data, [device])[0]
|
34 |
+
|
35 |
+
# forward the model
|
36 |
+
with torch.no_grad():
|
37 |
+
scores = model(return_loss=False, **data)
|
38 |
+
result = [{"score": float(scores[0][i]), "label": model.CLASSES[i]} for i in range(len(model.CLASSES))]
|
39 |
+
return sorted(result, key=lambda x: x["score"], reverse=True)
|
40 |
+
|
41 |
+
|
42 |
+
|
43 |
+
class ImageClassificationPipeline(Pipeline):
|
44 |
+
def __init__(self, model_id: str):
|
45 |
+
config = hf_hub_download(model_id, filename=CONFIG_FILENAME)
|
46 |
+
ckpt = hf_hub_download(model_id, filename=CHECKPOINT_FILENAME)
|
47 |
+
self.model = init_model(config, ckpt, device="cpu")
|
48 |
+
|
49 |
+
def __call__(self, inputs: Image.Image) -> List[Dict[str, Any]]:
|
50 |
+
labels = inference_model_hf(self.model, inputs.filename)
|
51 |
+
return labels
|
docker/mmclassification/prestart.sh
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
python app/main.py
|
docker/mmclassification/requirements.txt
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
starlette==0.25.0
|
2 |
+
api-inference-community==0.0.25
|
3 |
+
huggingface_hub==0.11.0
|
4 |
+
mmcv==1.7.0
|
5 |
+
mmcls=0.25.0
|
docker/mmclassification/tests/__init__.py
ADDED
File without changes
|
docker/mmclassification/tests/samples/malformed.flac
ADDED
Binary file (1.02 kB). View file
|
|
docker/mmclassification/tests/samples/plane.jpg
ADDED
docker/mmclassification/tests/samples/plane2.jpg
ADDED
docker/mmclassification/tests/samples/sample1.flac
ADDED
Binary file (282 kB). View file
|
|
docker/mmclassification/tests/samples/sample1.webm
ADDED
Binary file (135 kB). View file
|
|
docker/mmclassification/tests/samples/sample1_dual.ogg
ADDED
Binary file (83.3 kB). View file
|
|
docker/mmclassification/tests/test_api.py
ADDED
@@ -0,0 +1,59 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
from typing import Dict
|
3 |
+
from unittest import TestCase, skipIf
|
4 |
+
|
5 |
+
from app.main import ALLOWED_TASKS, get_pipeline
|
6 |
+
|
7 |
+
|
8 |
+
# Must contain at least one example of each implemented pipeline
|
9 |
+
# Tests do not check the actual values of the model output, so small dummy
|
10 |
+
# models are recommended for faster tests.
|
11 |
+
TESTABLE_MODELS: Dict[str, str] = {
|
12 |
+
"image-classification": "porntech/outdoor"
|
13 |
+
}
|
14 |
+
|
15 |
+
|
16 |
+
ALL_TASKS = {
|
17 |
+
"audio-classification",
|
18 |
+
"audio-to-audio",
|
19 |
+
"automatic-speech-recognition",
|
20 |
+
"feature-extraction",
|
21 |
+
"image-classification",
|
22 |
+
"question-answering",
|
23 |
+
"sentence-similarity",
|
24 |
+
"speech-segmentation",
|
25 |
+
"tabular-classification",
|
26 |
+
"tabular-regression",
|
27 |
+
"text-to-image",
|
28 |
+
"text-to-speech",
|
29 |
+
"token-classification",
|
30 |
+
"conversational",
|
31 |
+
"feature-extraction",
|
32 |
+
"sentence-similarity",
|
33 |
+
"fill-mask",
|
34 |
+
"table-question-answering",
|
35 |
+
"summarization",
|
36 |
+
"text2text-generation",
|
37 |
+
"text-classification",
|
38 |
+
"zero-shot-classification",
|
39 |
+
}
|
40 |
+
|
41 |
+
|
42 |
+
class PipelineTestCase(TestCase):
|
43 |
+
@skipIf(
|
44 |
+
os.path.dirname(os.path.dirname(__file__)).endswith("common"),
|
45 |
+
"common is a special case",
|
46 |
+
)
|
47 |
+
def test_has_at_least_one_task_enabled(self):
|
48 |
+
self.assertGreater(
|
49 |
+
len(ALLOWED_TASKS.keys()), 0, "You need to implement at least one task"
|
50 |
+
)
|
51 |
+
|
52 |
+
def test_unsupported_tasks(self):
|
53 |
+
unsupported_tasks = ALL_TASKS - ALLOWED_TASKS.keys()
|
54 |
+
for unsupported_task in unsupported_tasks:
|
55 |
+
with self.subTest(msg=unsupported_task, task=unsupported_task):
|
56 |
+
os.environ["TASK"] = unsupported_task
|
57 |
+
os.environ["MODEL_ID"] = "XX"
|
58 |
+
with self.assertRaises(EnvironmentError):
|
59 |
+
get_pipeline()
|
docker/mmclassification/tests/test_api_image_classification.py
ADDED
@@ -0,0 +1,81 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import json
|
2 |
+
import os
|
3 |
+
from unittest import TestCase, skipIf
|
4 |
+
|
5 |
+
from app.main import ALLOWED_TASKS
|
6 |
+
from starlette.testclient import TestClient
|
7 |
+
from tests.test_api import TESTABLE_MODELS
|
8 |
+
|
9 |
+
|
10 |
+
@skipIf(
|
11 |
+
"image-classification" not in ALLOWED_TASKS,
|
12 |
+
"image-classification not implemented",
|
13 |
+
)
|
14 |
+
class ImageClassificationTestCase(TestCase):
|
15 |
+
def setUp(self):
|
16 |
+
model_id = TESTABLE_MODELS["image-classification"]
|
17 |
+
self.old_model_id = os.getenv("MODEL_ID")
|
18 |
+
self.old_task = os.getenv("TASK")
|
19 |
+
os.environ["MODEL_ID"] = model_id
|
20 |
+
os.environ["TASK"] = "image-classification"
|
21 |
+
from app.main import app
|
22 |
+
|
23 |
+
self.app = app
|
24 |
+
|
25 |
+
@classmethod
|
26 |
+
def setUpClass(cls):
|
27 |
+
from app.main import get_pipeline
|
28 |
+
|
29 |
+
get_pipeline.cache_clear()
|
30 |
+
|
31 |
+
def tearDown(self):
|
32 |
+
if self.old_model_id is not None:
|
33 |
+
os.environ["MODEL_ID"] = self.old_model_id
|
34 |
+
else:
|
35 |
+
del os.environ["MODEL_ID"]
|
36 |
+
if self.old_task is not None:
|
37 |
+
os.environ["TASK"] = self.old_task
|
38 |
+
else:
|
39 |
+
del os.environ["TASK"]
|
40 |
+
|
41 |
+
def read(self, filename: str) -> bytes:
|
42 |
+
dirname = os.path.dirname(os.path.abspath(__file__))
|
43 |
+
filename = os.path.join(dirname, "samples", filename)
|
44 |
+
with open(filename, "rb") as f:
|
45 |
+
bpayload = f.read()
|
46 |
+
return bpayload
|
47 |
+
|
48 |
+
def test_simple(self):
|
49 |
+
bpayload = self.read("plane.jpg")
|
50 |
+
|
51 |
+
with TestClient(self.app) as client:
|
52 |
+
response = client.post("/", data=bpayload)
|
53 |
+
|
54 |
+
self.assertEqual(
|
55 |
+
response.status_code,
|
56 |
+
200,
|
57 |
+
)
|
58 |
+
content = json.loads(response.content)
|
59 |
+
self.assertEqual(type(content), list)
|
60 |
+
self.assertEqual(set(type(el) for el in content), {dict})
|
61 |
+
self.assertEqual(
|
62 |
+
set((k, type(v)) for el in content for (k, v) in el.items()),
|
63 |
+
{("label", str), ("score", float)},
|
64 |
+
)
|
65 |
+
|
66 |
+
def test_different_resolution(self):
|
67 |
+
bpayload = self.read("plane2.jpg")
|
68 |
+
|
69 |
+
with TestClient(self.app) as client:
|
70 |
+
response = client.post("/", data=bpayload)
|
71 |
+
|
72 |
+
self.assertEqual(
|
73 |
+
response.status_code,
|
74 |
+
200,
|
75 |
+
)
|
76 |
+
content = json.loads(response.content)
|
77 |
+
self.assertEqual(type(content), list)
|
78 |
+
self.assertEqual(set(type(el) for el in content), {dict})
|
79 |
+
self.assertEqual(
|
80 |
+
set(k for el in content for k in el.keys()), {"label", "score"}
|
81 |
+
)
|
docker/mmclassification/tests/test_docker_build.py
ADDED
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import subprocess
|
3 |
+
from unittest import TestCase
|
4 |
+
|
5 |
+
|
6 |
+
class cd:
|
7 |
+
"""Context manager for changing the current working directory"""
|
8 |
+
|
9 |
+
def __init__(self, newPath):
|
10 |
+
self.newPath = os.path.expanduser(newPath)
|
11 |
+
|
12 |
+
def __enter__(self):
|
13 |
+
self.savedPath = os.getcwd()
|
14 |
+
os.chdir(self.newPath)
|
15 |
+
|
16 |
+
def __exit__(self, etype, value, traceback):
|
17 |
+
os.chdir(self.savedPath)
|
18 |
+
|
19 |
+
|
20 |
+
class DockerBuildTestCase(TestCase):
|
21 |
+
def test_can_build_docker_image(self):
|
22 |
+
with cd(os.path.dirname(os.path.dirname(__file__))):
|
23 |
+
subprocess.check_output(["docker", "build", "."])
|