YasaminAbb
/

Llama-2-7b-CNN_Q_lora_Summarizer

Inference Endpoints

Model card Files Files and versions Community

YasaminAbb commited on May 15

Commit

43d5442

•

1 Parent(s): 1227d51

Update handler.py

Files changed (1) hide show

handler.py +4 -4

handler.py CHANGED Viewed

@@ -5,22 +5,22 @@ from peft import PeftConfig, PeftModel
 class EndpointHandler:
   def __init__(self,path=""):
     bnb_config = BitsAndBytesConfig(
         load_in_4bit=True,
         bnb_4bit_use_double_quant=True,
         bnb_4bit_quant_type="nf4",
-        bnb_4bit_compute_dtype=torch.bfloat16
     )
     config = PeftConfig.from_pretrained(path)
-    dtype = torch.bfloat16 if torch.cuda.get_device_capability()[0] == 8 else torch.float16
     model = AutoModelForCausalLM.from_pretrained(
                                               config.base_model_name_or_path,
                                               return_dict=True,
                                               quantization_config=bnb_config,
-                                              device_map="auto" , #"auto",
                                               torch_dtype=dtype,
                                               trust_remote_code=True,
-                                                  load_in_8bit=True
                                               )
     tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path)
     tokenizer.pad_token = tokenizer.eos_token

 class EndpointHandler:
   def __init__(self,path=""):
+    dtype = torch.bfloat16 if torch.cuda.get_device_capability()[0] == 8 else torch.float16
     bnb_config = BitsAndBytesConfig(
         load_in_4bit=True,
         bnb_4bit_use_double_quant=True,
         bnb_4bit_quant_type="nf4",
+        bnb_4bit_compute_dtype=dtype
     )
     config = PeftConfig.from_pretrained(path)
     model = AutoModelForCausalLM.from_pretrained(
                                               config.base_model_name_or_path,
                                               return_dict=True,
                                               quantization_config=bnb_config,
+                                              device_map="auto" ,
                                               torch_dtype=dtype,
                                               trust_remote_code=True,
                                               )
     tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path)
     tokenizer.pad_token = tokenizer.eos_token