ammarnasr
/

codegen-350M-mono-java

Text Generation

Inference Endpoints

Model card Files Files and versions Community

ammarnasr commited on Dec 9, 2023

Commit

f2fdfe8

•

1 Parent(s): 1366ad0

handler

Files changed (1) hide show

handler.py +3 -2

handler.py CHANGED Viewed

@@ -1,5 +1,6 @@
 from transformers import AutoModelForCausalLM, AutoTokenizer
-from peft import PeftConfig, PeftModel
 import torch.cuda
 from typing import Any, Dict
 device = "cuda" if torch.cuda.is_available() else "cpu"
@@ -7,7 +8,7 @@ device = "cuda" if torch.cuda.is_available() else "cpu"
 class EndpointHandler():
     def __init__(self, path=""):
         config = PeftConfig.from_pretrained(path)
-        model = AutoModelForCausalLM.from_pretrained(config.base_model_name_or_path, load_in_8bit=True, device_map='auto')
         self.tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path)
         # Load the Lora model
         self.model = PeftModel.from_pretrained(model, path)

 from transformers import AutoModelForCausalLM, AutoTokenizer
+from peft import PeftConfig
+from peft import PeftModel
 import torch.cuda
 from typing import Any, Dict
 device = "cuda" if torch.cuda.is_available() else "cpu"
 class EndpointHandler():
     def __init__(self, path=""):
         config = PeftConfig.from_pretrained(path)
+        model = AutoModelForCausalLM.from_pretrained(config.base_model_name_or_path)
         self.tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path)
         # Load the Lora model
         self.model = PeftModel.from_pretrained(model, path)