fwittel commited on
Commit
0af1b4a
·
1 Parent(s): 9ff7017

Switch to AutoModelForSeq2SeqLM

Browse files
Files changed (1) hide show
  1. handler.py +4 -3
handler.py CHANGED
@@ -1,6 +1,7 @@
1
  import torch
2
  from typing import Dict, List, Any
3
- from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
 
4
 
5
  # check for GPU
6
  device = 0 if torch.cuda.is_available() else -1
@@ -11,8 +12,8 @@ class EndpointHandler:
11
  # load the model
12
  tokenizer = AutoTokenizer.from_pretrained(path)
13
  # model = AutoModel.from_pretrained(path, low_cpu_mem_usage=True)
14
- model = AutoModelForCausalLM.from_pretrained(path, low_cpu_mem_usage=True)
15
- # model = AutoModelForSeq2SeqLM.from_pretrained(path, low_cpu_mem_usage=True)
16
  # create inference pipeline
17
  self.pipeline = pipeline("text-generation", model=model, tokenizer=tokenizer, device=device)
18
 
 
1
  import torch
2
  from typing import Dict, List, Any
3
+ # from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
4
+ from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
5
 
6
  # check for GPU
7
  device = 0 if torch.cuda.is_available() else -1
 
12
  # load the model
13
  tokenizer = AutoTokenizer.from_pretrained(path)
14
  # model = AutoModel.from_pretrained(path, low_cpu_mem_usage=True)
15
+ # model = AutoModelForCausalLM.from_pretrained(path, low_cpu_mem_usage=True)
16
+ model = AutoModelForSeq2SeqLM.from_pretrained(path, low_cpu_mem_usage=True)
17
  # create inference pipeline
18
  self.pipeline = pipeline("text-generation", model=model, tokenizer=tokenizer, device=device)
19