Added handler.py and requirements.txt

Browse files

Files changed (3) hide show

handler.py +146 -0
requirements.txt +153 -0
test_handler.py +63 -0

handler.py ADDED Viewed

	@@ -0,0 +1,146 @@

+import io
+from typing import Dict, List, Any
+import torch
+from transformers import AutoProcessor, LlavaForConditionalGeneration
+from transformers import BitsAndBytesConfig
+from sentence_transformers import SentenceTransformer, util
+from PIL import Image
+def _fake_generate(n: int = 3):
+    generate = list()
+    for _ in range(n):
+        generate.append(torch.IntTensor([103, 23, 48, 498, 536]))
+    return torch.stack(generate, dim=0)
+class EndpointHandler():
+    def __init__(self, use_cuda: bool = False, test_mode: bool= False):
+        # Preload all the elements you are going to need at inference.
+        # pseudo:
+        # self.model= load_model(path)
+        self.test_mode = test_mode
+        self.MAXIMUM_PIXEL_VALUES = 3725568
+        self.quantization_config = BitsAndBytesConfig(
+            load_in_4bit=True,
+            bnb_4bit_compute_dtype=torch.float16
+        )
+        self.embedder = SentenceTransformer('all-mpnet-base-v2')
+        self.model_id = "llava-hf/llava-1.5-7b-hf"
+        self.processor = AutoProcessor.from_pretrained(self.model_id)
+        if use_cuda:
+            self.model = LlavaForConditionalGeneration.from_pretrained(
+                self.model_id,
+                quantization_config=self.quantization_config,
+                device_map="auto",
+                low_cpu_mem_usage=True,
+            )
+        else:
+            # Testing without CUDA device does not allow quantization
+            self.model = LlavaForConditionalGeneration.from_pretrained(
+                self.model_id,
+                device_map="auto",
+                low_cpu_mem_usage=True,
+            )
+    def text_to_image(self, image_batch, prompt):
+        prompt = f'USER: <image>\n{prompt}\nASSISTANT:'
+        prompt_batch = [prompt for _ in range(len(image_batch))]
+        inputs = self.processor(prompt_batch, images=image_batch, padding=True, return_tensors="pt")
+        batched_inputs: list[dict[str, torch.Tensor]] = list()
+        if inputs['pixel_values'].flatten().shape[0] > self.MAXIMUM_PIXEL_VALUES:
+            batch = dict(input_ids=list(), attention_mask=list(), pixel_values=list())
+            i = 0
+            while i < len(inputs['pixel_values']):
+                batch['input_ids'].append(inputs['input_ids'][i])
+                batch['attention_mask'].append(inputs['attention_mask'][i])
+                batch['pixel_values'].append(inputs['pixel_values'][i])
+                if torch.cat(batch['pixel_values'], dim=0).flatten().shape[0] > self.MAXIMUM_PIXEL_VALUES:
+                    print(f'[{i}/{len(inputs["pixel_values"])}] - Reached max pixel values for batch prediction on T4 '
+                          f'16GB GPU. Will split in more batches')
+                    # Remove the last added image because it's too big to process
+                    batch['input_ids'].pop()
+                    batch['attention_mask'].pop()
+                    batch['pixel_values'].pop()
+                    # transform lists to tensors
+                    batch['input_ids'] = torch.stack(batch['input_ids'], dim=0)
+                    batch['attention_mask'] = torch.stack(batch['attention_mask'], dim=0)
+                    batch['pixel_values'] = torch.stack(batch['pixel_values'], dim=0)
+                    # Add to the batched_inputs
+                    batched_inputs.append(batch)
+                    batch = dict(input_ids=list(), attention_mask=list(), pixel_values=list())
+                else:
+                    i += 1
+            if i >= len(inputs['pixel_values']) and len(batch['input_ids']) > 0:
+                batch['input_ids'] = torch.stack(batch['input_ids'], dim=0)
+                batch['attention_mask'] = torch.stack(batch['attention_mask'], dim=0)
+                batch['pixel_values'] = torch.stack(batch['pixel_values'], dim=0)
+                # Add to the batched_inputs
+                batched_inputs.append(batch)
+                batch = dict(input_ids=list(), attention_mask=list(), pixel_values=list())
+        else:
+            batched_inputs.append(inputs)
+        maurice_description = list()
+        maurice_embeddings = list()
+        for batch in batched_inputs:
+            # Load on device
+            batch['input_ids'] = batch['input_ids'].to(self.model.device)
+            batch['attention_mask'] = batch['attention_mask'].to(self.model.device)
+            batch['pixel_values'] = batch['pixel_values'].to(self.model.device)
+            # output = model.generate(**batch, max_new_tokens=500, temperature=0.3)
+            if self.test_mode:
+                output = _fake_generate(n=len(batch['input_ids']))
+            else:
+                output = self.model.generate(**batch, max_new_tokens=500)
+            # Unload GPU
+            batch['input_ids'].to('cpu')
+            batch['attention_mask'].to('cpu')
+            batch['pixel_values'].to('cpu')
+            generated_text = self.processor.batch_decode(output, skip_special_tokens=True)
+            output = output.to('cpu')
+            for text in generated_text:
+                text_output = text.split("ASSISTANT:")[-1]
+                text_embeddings = self.embedder.encode(text_output)
+                maurice_description.append(text_output)
+                maurice_embeddings.append(text_embeddings)
+        return maurice_description, maurice_embeddings
+    def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:
+        """
+       data args:
+            inputs (:obj: `str` | `PIL.Image` | `np.array`)
+            kwargs
+      Return:
+            A :obj:`list` | `dict`: will be serialized and returned
+        """
+        images = data['inputs']
+        prompt = data['prompt']
+        pil_images = list()
+        for image in images:
+            pil_images.append(Image.open(io.BytesIO(image)))
+        output_text, output_embedded = self.text_to_image(pil_images, prompt)
+        result = list()
+        for text, embed in zip(output_text, output_embedded):
+            result.append(
+                dict(
+                    maurice_description=text,
+                    maurice_embedding=embed
+                )
+            )
+        return result

requirements.txt ADDED Viewed

	@@ -0,0 +1,153 @@

+accelerate==0.25.0
+aiofiles==23.2.1
+altair==5.2.0
+annotated-types==0.6.0
+anyio==3.7.1
+appnope==0.1.3
+argon2-cffi==23.1.0
+argon2-cffi-bindings==21.2.0
+arrow==1.3.0
+asttokens==2.4.1
+async-lru==2.0.4
+attrs==23.1.0
+Babel==2.14.0
+beautifulsoup4==4.12.2
+bitsandbytes==0.41.3.post2
+bleach==6.1.0
+certifi==2023.11.17
+cffi==1.16.0
+charset-normalizer==3.3.2
+click==8.1.7
+colorama==0.4.6
+comm==0.2.0
+contourpy==1.2.0
+cycler==0.12.1
+debugpy==1.8.0
+decorator==5.1.1
+defusedxml==0.7.1
+executing==2.0.1
+fastapi==0.105.0
+fastjsonschema==2.19.0
+ffmpy==0.3.1
+filelock==3.13.1
+fonttools==4.46.0
+fqdn==1.5.1
+fsspec==2023.12.2
+gradio==4.10.0
+gradio_client==0.7.3
+h11==0.14.0
+httpcore==1.0.2
+httpx==0.25.2
+huggingface-hub==0.19.4
+idna==3.6
+importlib-resources==6.1.1
+ipykernel==6.27.1
+ipython==8.18.1
+ipywidgets==8.1.1
+isoduration==20.11.0
+jedi==0.19.1
+Jinja2==3.1.2
+json5==0.9.14
+jsonpointer==2.4
+jsonschema==4.20.0
+jsonschema-specifications==2023.11.2
+jupyter==1.0.0
+jupyter-console==6.6.3
+jupyter-events==0.9.0
+jupyter-lsp==2.2.1
+jupyter_client==8.6.0
+jupyter_core==5.5.1
+jupyter_server==2.12.1
+jupyter_server_terminals==0.5.0
+jupyterlab==4.0.9
+jupyterlab-widgets==3.0.9
+jupyterlab_pygments==0.3.0
+jupyterlab_server==2.25.2
+kiwisolver==1.4.5
+markdown-it-py==3.0.0
+MarkupSafe==2.1.3
+matplotlib==3.8.2
+matplotlib-inline==0.1.6
+mdurl==0.1.2
+mistune==3.0.2
+mpmath==1.3.0
+nbclient==0.9.0
+nbconvert==7.13.0
+nbformat==5.9.2
+nest-asyncio==1.5.8
+networkx==3.2.1
+notebook==7.0.6
+notebook_shim==0.2.3
+numpy==1.26.2
+orjson==3.9.10
+overrides==7.4.0
+packaging==23.2
+pandas==2.1.4
+pandocfilters==1.5.0
+parso==0.8.3
+pexpect==4.9.0
+Pillow==10.1.0
+platformdirs==4.1.0
+prometheus-client==0.19.0
+prompt-toolkit==3.0.43
+psutil==5.9.6
+ptyprocess==0.7.0
+pure-eval==0.2.2
+pycparser==2.21
+pydantic==2.5.2
+pydantic_core==2.14.5
+pydub==0.25.1
+Pygments==2.17.2
+pyparsing==3.1.1
+python-dateutil==2.8.2
+python-json-logger==2.0.7
+python-multipart==0.0.6
+pytz==2023.3.post1
+PyYAML==6.0.1
+pyzmq==25.1.2
+qtconsole==5.5.1
+QtPy==2.4.1
+referencing==0.32.0
+regex==2023.10.3
+requests==2.31.0
+rfc3339-validator==0.1.4
+rfc3986-validator==0.1.1
+rich==13.7.0
+rpds-py==0.13.2
+safetensors==0.4.1
+scipy==1.11.4
+semantic-version==2.10.0
+Send2Trash==1.8.2
+shellingham==1.5.4
+six==1.16.0
+sniffio==1.3.0
+soupsieve==2.5
+stack-data==0.6.3
+starlette==0.27.0
+sympy==1.12
+terminado==0.18.0
+tinycss2==1.2.1
+tokenizers==0.15.0
+tomlkit==0.12.0
+toolz==0.12.0
+torch==2.1.2
+torchaudio==2.1.2
+torchvision==0.16.2
+tornado==6.4
+tqdm==4.66.1
+traitlets==5.14.0
+transformers
+typer==0.9.0
+types-python-dateutil==2.8.19.14
+typing_extensions==4.9.0
+tzdata==2023.3
+uri-template==1.3.0
+urllib3==2.1.0
+uvicorn==0.24.0.post1
+wcwidth==0.2.12
+webcolors==1.13
+webencodings==0.5.1
+websocket-client==1.7.0
+websockets==11.0.3
+widgetsnbextension==4.0.9
+sentence_transformers

test_handler.py ADDED Viewed

	@@ -0,0 +1,63 @@

+from handler import EndpointHandler
+from PIL import Image
+import requests, json
+# init handler
+my_handler = EndpointHandler(use_cuda=False, test_mode=True)
+# API_URL = "https://oncm9ojdmjwesag2.eu-west-1.aws.endpoints.huggingface.cloud"
+# headers = {
+#   "Authorization": "Bearer MY_API_TOKEN",
+#   "Content-Type": "image/jpg"
+# }
+# def query(filename):
+#     with open(filename, "rb") as f:
+#         data = f.read()
+#     response = requests.request("POST", API_URL, headers=headers, data=data)
+#     return json.loads(response.content.decode("utf-8"))
+# output = query("food.jpg")
+# prepare sample payload
+image_path = '/Users/francois/Documents/dev/Maurice/maurice/test_602.jpg'
+with open(image_path, 'rb') as f:
+    img = f.read()
+single_image = {
+    'inputs': [
+        img
+    ],
+    'prompt': 'Describe the image'
+}
+multiple_images = {
+    'inputs': [
+        img, img, img
+    ],
+    'prompt': 'Describe the image'
+}
+# test the handler
+print(my_handler(single_image))
+print(my_handler(multiple_images))
+# non_holiday_payload = {"inputs": "I am quite excited how this will turn out", "date": "2022-08-08"}
+# holiday_payload = {"inputs": "Today is a though day", "date": "2022-07-04"}
+#
+#
+#
+# # test the handler
+# non_holiday_pred=my_handler(non_holiday_payload)
+# holiday_payload=my_handler(holiday_payload)
+#
+# # show results
+# print("non_holiday_pred", non_holiday_pred)
+# print("holiday_payload", holiday_payload)
+#
+# # non_holiday_pred [{'label': 'joy', 'score': 0.9985942244529724}]
+# # holiday_payload [{'label': 'happy', 'score': 1}]