OpenSistemas
/

Llama-2-7b-chat-hf

handler uses LlamaForCausalLM

Files changed (1) hide show

handler.py CHANGED Viewed

@@ -13,7 +13,7 @@ for text generation, leveraging the capabilities of the Llama 2 model.
 """
 import torch
-from transformers import pipeline, BitsAndBytesConfig
 from typing import Dict, List, Any
 import logging
 import sys
@@ -51,7 +51,10 @@ class EndpointHandler:
         bnb_4bit_compute_dtype=torch.bfloat16
         )
-        self.pipeline = pipeline('text-generation', model=path, quantization_config=self.bnb_config)
     def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:

 """
 import torch
+from transformers import LlamaForCausalLM, LlamaTokenizer, pipeline, BitsAndBytesConfig
 from typing import Dict, List, Any
 import logging
 import sys
         bnb_4bit_compute_dtype=torch.bfloat16
         )
+        tokenizer = LlamaTokenizer.from_pretrained(path)
+        model = LlamaForCausalLM.from_pretrained(path, device_map=0,  quantization_config=self.bnb_config)
+        self.pipeline = pipeline('text-generation', model=model, tokenizer=tokenizer)
     def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]: