Upload 17 files

Browse files

Files changed (17) hide show

docker/mmclassification/Dockerfile +29 -0
docker/mmclassification/app/__init__.py +0 -0
docker/mmclassification/app/main.py +91 -0
docker/mmclassification/app/pipelines/__init__.py +3 -0
docker/mmclassification/app/pipelines/image_classification.py +51 -0
docker/mmclassification/prestart.sh +1 -0
docker/mmclassification/requirements.txt +5 -0
docker/mmclassification/tests/__init__.py +0 -0
docker/mmclassification/tests/samples/malformed.flac +0 -0
docker/mmclassification/tests/samples/plane.jpg +0 -0
docker/mmclassification/tests/samples/plane2.jpg +0 -0
docker/mmclassification/tests/samples/sample1.flac +0 -0
docker/mmclassification/tests/samples/sample1.webm +0 -0
docker/mmclassification/tests/samples/sample1_dual.ogg +0 -0
docker/mmclassification/tests/test_api.py +59 -0
docker/mmclassification/tests/test_api_image_classification.py +81 -0
docker/mmclassification/tests/test_docker_build.py +23 -0

docker/mmclassification/Dockerfile ADDED Viewed

	@@ -0,0 +1,29 @@

+FROM tiangolo/uvicorn-gunicorn:python3.8
+LABEL maintainer="me <me@example.com>"
+# Add any system dependency here
+# RUN apt-get update -y && apt-get install libXXX -y
+COPY ./requirements.txt /app
+RUN pip install --no-cache-dir -r requirements.txt
+COPY ./prestart.sh /app/
+# Most DL models are quite large in terms of memory, using workers is a HUGE
+# slowdown because of the fork and GIL with python.
+# Using multiple pods seems like a better default strategy.
+# Feel free to override if it does not make sense for your library.
+ARG max_workers=1
+ENV MAX_WORKERS=$max_workers
+ENV HUGGINGFACE_HUB_CACHE=/data
+# Necessary on GPU environment docker.
+# TIMEOUT env variable is used by nvcr.io/nvidia/pytorch:xx for another purpose
+# rendering TIMEOUT defined by uvicorn impossible to use correctly
+# We're overriding it to be renamed UVICORN_TIMEOUT
+# UVICORN_TIMEOUT is a useful variable for very large models that take more
+# than 30s (the default) to load in memory.
+# If UVICORN_TIMEOUT is too low, uvicorn will simply never loads as it will
+# kill workers all the time before they finish.
+RUN sed -i 's/TIMEOUT/UVICORN_TIMEOUT/g' /gunicorn_conf.py
+COPY ./app /app/app

docker/mmclassification/app/__init__.py ADDED Viewed

File without changes

docker/mmclassification/app/main.py ADDED Viewed

	@@ -0,0 +1,91 @@

+import functools
+import logging
+import os
+from typing import Dict, Type
+from api_inference_community.routes import pipeline_route, status_ok
+from app.pipelines import Pipeline
+from starlette.applications import Starlette
+from starlette.middleware import Middleware
+from starlette.middleware.gzip import GZipMiddleware
+from starlette.routing import Route
+TASK = os.getenv("TASK")
+MODEL_ID = os.getenv("MODEL_ID")
+logger = logging.getLogger(__name__)
+# Add the allowed tasks
+# Supported tasks are:
+# - text-generation
+# - text-classification
+# - token-classification
+# - translation
+# - summarization
+# - automatic-speech-recognition
+# - ...
+# For instance
+# from app.pipelines import AutomaticSpeechRecognitionPipeline
+# ALLOWED_TASKS = {"automatic-speech-recognition": AutomaticSpeechRecognitionPipeline}
+# You can check the requirements and expectations of each pipelines in their respective
+# directories. Implement directly within the directories.
+ALLOWED_TASKS: Dict[str, Type[Pipeline]] = {
+    "image-classification": ImageClassificationPipeline
+}
+@functools.lru_cache()
+def get_pipeline() -> Pipeline:
+    task = os.environ["TASK"]
+    model_id = os.environ["MODEL_ID"]
+    if task not in ALLOWED_TASKS:
+        raise EnvironmentError(f"{task} is not a valid pipeline for model : {model_id}")
+    return ALLOWED_TASKS[task](model_id)
+routes = [
+    Route("/{whatever:path}", status_ok),
+    Route("/{whatever:path}", pipeline_route, methods=["POST"]),
+]
+middleware = [Middleware(GZipMiddleware, minimum_size=1000)]
+if os.environ.get("DEBUG", "") == "1":
+    from starlette.middleware.cors import CORSMiddleware
+    middleware.append(
+        Middleware(
+            CORSMiddleware,
+            allow_origins=["*"],
+            allow_headers=["*"],
+            allow_methods=["*"],
+        )
+    )
+app = Starlette(routes=routes, middleware=middleware)
+@app.on_event("startup")
+async def startup_event():
+    logger = logging.getLogger("uvicorn.access")
+    handler = logging.StreamHandler()
+    handler.setFormatter(logging.Formatter("%(asctime)s - %(levelname)s - %(message)s"))
+    logger.handlers = [handler]
+    # Link between `api-inference-community` and framework code.
+    app.get_pipeline = get_pipeline
+    try:
+        get_pipeline()
+    except Exception:
+        # We can fail so we can show exception later.
+        pass
+if __name__ == "__main__":
+    try:
+        get_pipeline()
+    except Exception:
+        # We can fail so we can show exception later.
+        pass

docker/mmclassification/app/pipelines/__init__.py ADDED Viewed

	@@ -0,0 +1,3 @@


1	+ from app.pipelines.base import Pipeline, PipelineException # isort:skip
2	+
3	+ from app.pipelines.image_classification import ImageClassificationPipeline

docker/mmclassification/app/pipelines/image_classification.py ADDED Viewed

	@@ -0,0 +1,51 @@

+from typing import Any, Dict, List
+import torch
+from huggingface_hub import hf_hub_download
+from app.pipelines import Pipeline
+from PIL import Image
+from mmcv.parallel import collate, scatter
+from mmcv.runner import load_checkpoint
+from mmcls.datasets.pipelines import Compose
+from mmcls.apis import init_model
+CONFIG_FILENAME = "config.py"
+CHECKPOINT_FILENAME = "model.pth"
+def inference_model_hf(model, img):
+    cfg = model.cfg
+    device = next(model.parameters()).device  # model device
+    # build the data pipeline
+    if isinstance(img, str):
+        if cfg.data.test.pipeline[0]['type'] != 'LoadImageFromFile':
+            cfg.data.test.pipeline.insert(0, dict(type='LoadImageFromFile'))
+        data = dict(img_info=dict(filename=img), img_prefix=None)
+    else:
+        if cfg.data.test.pipeline[0]['type'] == 'LoadImageFromFile':
+            cfg.data.test.pipeline.pop(0)
+        data = dict(img=img)
+    test_pipeline = Compose(cfg.data.test.pipeline)
+    data = test_pipeline(data)
+    data = collate([data], samples_per_gpu=1)
+    if next(model.parameters()).is_cuda:
+        # scatter to specified GPU
+        data = scatter(data, [device])[0]
+    # forward the model
+    with torch.no_grad():
+        scores = model(return_loss=False, **data)
+    result = [{"score": float(scores[0][i]), "label": model.CLASSES[i]} for i in range(len(model.CLASSES))]
+    return sorted(result, key=lambda x: x["score"], reverse=True)
+class ImageClassificationPipeline(Pipeline):
+    def __init__(self, model_id: str):
+        config = hf_hub_download(model_id, filename=CONFIG_FILENAME)
+        ckpt = hf_hub_download(model_id, filename=CHECKPOINT_FILENAME)
+        self.model = init_model(config, ckpt, device="cpu")
+    def __call__(self, inputs: Image.Image) -> List[Dict[str, Any]]:
+        labels = inference_model_hf(self.model, inputs.filename)
+        return labels

docker/mmclassification/prestart.sh ADDED Viewed

	@@ -0,0 +1 @@


1	+ python app/main.py

docker/mmclassification/requirements.txt ADDED Viewed

	@@ -0,0 +1,5 @@

+starlette==0.25.0
+api-inference-community==0.0.25
+huggingface_hub==0.11.0
+mmcv==1.7.0
+mmcls=0.25.0

docker/mmclassification/tests/__init__.py ADDED Viewed

File without changes

docker/mmclassification/tests/samples/malformed.flac ADDED Viewed

Binary file (1.02 kB). View file

docker/mmclassification/tests/samples/plane.jpg ADDED Viewed

docker/mmclassification/tests/samples/plane2.jpg ADDED Viewed

docker/mmclassification/tests/samples/sample1.flac ADDED Viewed

Binary file (282 kB). View file

docker/mmclassification/tests/samples/sample1.webm ADDED Viewed

Binary file (135 kB). View file

docker/mmclassification/tests/samples/sample1_dual.ogg ADDED Viewed

Binary file (83.3 kB). View file

docker/mmclassification/tests/test_api.py ADDED Viewed

	@@ -0,0 +1,59 @@

+import os
+from typing import Dict
+from unittest import TestCase, skipIf
+from app.main import ALLOWED_TASKS, get_pipeline
+# Must contain at least one example of each implemented pipeline
+# Tests do not check the actual values of the model output, so small dummy
+# models are recommended for faster tests.
+TESTABLE_MODELS: Dict[str, str] = {
+    "image-classification": "porntech/outdoor"
+}
+ALL_TASKS = {
+    "audio-classification",
+    "audio-to-audio",
+    "automatic-speech-recognition",
+    "feature-extraction",
+    "image-classification",
+    "question-answering",
+    "sentence-similarity",
+    "speech-segmentation",
+    "tabular-classification",
+    "tabular-regression",
+    "text-to-image",
+    "text-to-speech",
+    "token-classification",
+    "conversational",
+    "feature-extraction",
+    "sentence-similarity",
+    "fill-mask",
+    "table-question-answering",
+    "summarization",
+    "text2text-generation",
+    "text-classification",
+    "zero-shot-classification",
+}
+class PipelineTestCase(TestCase):
+    @skipIf(
+        os.path.dirname(os.path.dirname(__file__)).endswith("common"),
+        "common is a special case",
+    )
+    def test_has_at_least_one_task_enabled(self):
+        self.assertGreater(
+            len(ALLOWED_TASKS.keys()), 0, "You need to implement at least one task"
+        )
+    def test_unsupported_tasks(self):
+        unsupported_tasks = ALL_TASKS - ALLOWED_TASKS.keys()
+        for unsupported_task in unsupported_tasks:
+            with self.subTest(msg=unsupported_task, task=unsupported_task):
+                os.environ["TASK"] = unsupported_task
+                os.environ["MODEL_ID"] = "XX"
+                with self.assertRaises(EnvironmentError):
+                    get_pipeline()

docker/mmclassification/tests/test_api_image_classification.py ADDED Viewed

	@@ -0,0 +1,81 @@

+import json
+import os
+from unittest import TestCase, skipIf
+from app.main import ALLOWED_TASKS
+from starlette.testclient import TestClient
+from tests.test_api import TESTABLE_MODELS
+@skipIf(
+    "image-classification" not in ALLOWED_TASKS,
+    "image-classification not implemented",
+)
+class ImageClassificationTestCase(TestCase):
+    def setUp(self):
+        model_id = TESTABLE_MODELS["image-classification"]
+        self.old_model_id = os.getenv("MODEL_ID")
+        self.old_task = os.getenv("TASK")
+        os.environ["MODEL_ID"] = model_id
+        os.environ["TASK"] = "image-classification"
+        from app.main import app
+        self.app = app
+    @classmethod
+    def setUpClass(cls):
+        from app.main import get_pipeline
+        get_pipeline.cache_clear()
+    def tearDown(self):
+        if self.old_model_id is not None:
+            os.environ["MODEL_ID"] = self.old_model_id
+        else:
+            del os.environ["MODEL_ID"]
+        if self.old_task is not None:
+            os.environ["TASK"] = self.old_task
+        else:
+            del os.environ["TASK"]
+    def read(self, filename: str) -> bytes:
+        dirname = os.path.dirname(os.path.abspath(__file__))
+        filename = os.path.join(dirname, "samples", filename)
+        with open(filename, "rb") as f:
+            bpayload = f.read()
+        return bpayload
+    def test_simple(self):
+        bpayload = self.read("plane.jpg")
+        with TestClient(self.app) as client:
+            response = client.post("/", data=bpayload)
+        self.assertEqual(
+            response.status_code,
+            200,
+        )
+        content = json.loads(response.content)
+        self.assertEqual(type(content), list)
+        self.assertEqual(set(type(el) for el in content), {dict})
+        self.assertEqual(
+            set((k, type(v)) for el in content for (k, v) in el.items()),
+            {("label", str), ("score", float)},
+        )
+    def test_different_resolution(self):
+        bpayload = self.read("plane2.jpg")
+        with TestClient(self.app) as client:
+            response = client.post("/", data=bpayload)
+        self.assertEqual(
+            response.status_code,
+            200,
+        )
+        content = json.loads(response.content)
+        self.assertEqual(type(content), list)
+        self.assertEqual(set(type(el) for el in content), {dict})
+        self.assertEqual(
+            set(k for el in content for k in el.keys()), {"label", "score"}
+        )

docker/mmclassification/tests/test_docker_build.py ADDED Viewed

	@@ -0,0 +1,23 @@

+import os
+import subprocess
+from unittest import TestCase
+class cd:
+    """Context manager for changing the current working directory"""
+    def __init__(self, newPath):
+        self.newPath = os.path.expanduser(newPath)
+    def __enter__(self):
+        self.savedPath = os.getcwd()
+        os.chdir(self.newPath)
+    def __exit__(self, etype, value, traceback):
+        os.chdir(self.savedPath)
+class DockerBuildTestCase(TestCase):
+    def test_can_build_docker_image(self):
+        with cd(os.path.dirname(os.path.dirname(__file__))):
+            subprocess.check_output(["docker", "build", "."])