adbrebs
/

whatsapp_mistral

Text Generation

text-generation-inference

Inference Endpoints

Model card Files Files and versions Community

adbrebs commited on Jul 23

Commit

c9c4226

•

1 Parent(s): fa2a93b

Upload 2 files

Files changed (2) hide show

handler.py +53 -0
requirements.txt +1 -0

handler.py ADDED Viewed

	@@ -0,0 +1,53 @@

+import re
+from typing import Dict, List, Any
+from unsloth import FastLanguageModel
+class EndpointHandler():
+    def __init__(self, path=""):
+        # Preload all the elements you are going to need at inference.
+        # pseudo:
+        # self.model= load_model(path)
+        max_seq_length = 2048
+        dtype = None
+        load_in_4bit = True
+        self.model, self.tokenizer = FastLanguageModel.from_pretrained(
+            model_name=path,  # YOUR MODEL YOU USED FOR TRAINING
+            max_seq_length=max_seq_length,
+            dtype=dtype,
+            load_in_4bit=load_in_4bit,
+        )
+        FastLanguageModel.for_inference(self.model)  # Enable native 2x faster inference
+    def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:
+        """
+       data args:
+            inputs (:obj: `str` | `PIL.Image` | `np.array`)
+            kwargs
+      Return:
+            A :obj:`list` | `dict`: will be serialized and returned
+        """
+        messages = data.pop("inputs", data)
+        # messages = [
+        #     {"from": "human", "value": "What is a famous tall tower in Paris?"},
+        # ]
+        inputs = self.tokenizer.apply_chat_template(
+            messages,
+            tokenize=True,
+            add_generation_prompt=True,  # Must add for generation
+            return_tensors="pt",
+        ).to("cuda")
+        outputs = self.model.generate(input_ids=inputs, max_new_tokens=1000, use_cache=True)
+        content = self.tokenizer.batch_decode(outputs)
+        pattern = r'\[INST\].*?\[/INST\]'
+        content = re.sub(pattern, '', content, flags=re.DOTALL)
+        content = content.replace('<s>', '').replace('</s>', '').strip()
+        return content

requirements.txt ADDED Viewed

	@@ -0,0 +1 @@


1	+ unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git