Deepthoughtworks
/

gpt-neo-2.7B__low-cpu

Text Generation

text generation

Inference Endpoints

Model card Files Files and versions Community

fwittel commited on Nov 18, 2022

Commit

22bc6be

·

1 Parent(s): d4725fd

switch to AutoModelForCausalLM

Files changed (2) hide show

handler.py +6 -6
test.py +0 -13

handler.py CHANGED Viewed

@@ -1,21 +1,21 @@
 import torch
 from typing import Dict, List, Any
-from transformers import AutoModel, AutoTokenizer, pipeline
 # check for GPU
 device = 0 if torch.cuda.is_available() else -1
 class EndpointHandler:
     def __init__(self, path=""):
         # load the model
         tokenizer = AutoTokenizer.from_pretrained(path)
-        model = AutoModel.from_pretrained(path, low_cpu_mem_usage=True)
         # create inference pipeline
-        # Do I have to check device?
-        self.pipeline = pipeline(
-            "text-generation", model=model, tokenizer=tokenizer, device=device)
-    # (Might have to adjust typing)
     def __call__(self, data: Any) -> List[List[Dict[str, float]]]:
         inputs = data.pop("inputs", data)
         parameters = data.pop("parameters", None)

 import torch
 from typing import Dict, List, Any
+from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
 # check for GPU
 device = 0 if torch.cuda.is_available() else -1
 class EndpointHandler:
     def __init__(self, path=""):
         # load the model
         tokenizer = AutoTokenizer.from_pretrained(path)
+        # model = AutoModel.from_pretrained(path, low_cpu_mem_usage=True)
+        model = AutoModelForCausalLM.from_pretrained(path, low_cpu_mem_usage=True)
+        # model = AutoModelForSeq2SeqLM.from_pretrained(path, low_cpu_mem_usage=True)
         # create inference pipeline
+        self.pipeline = pipeline("text-generation", model=model, tokenizer=tokenizer, device=device)
     def __call__(self, data: Any) -> List[List[Dict[str, float]]]:
         inputs = data.pop("inputs", data)
         parameters = data.pop("parameters", None)

test.py DELETED Viewed

@@ -1,13 +0,0 @@
-from handler import EndpointHandler
-# init handler
-my_handler = EndpointHandler(path=".")
-# prepare sample payload
-payload = {"inputs": "I am Bob and I want to "}
-# test the handler
-pred=my_handler(payload)
-# show results
-print("pred", pred)