virginie-d commited on
Commit
b320464
·
1 Parent(s): e7a8a00

changing xformers

Browse files
Files changed (1) hide show
  1. handler.py +88 -7
handler.py CHANGED
@@ -1,10 +1,14 @@
1
- from typing import Dict, List, Any
2
- from transformers import pipeline
3
- import torch, PIL, transformers, triton, sentencepiece, protobuf
4
- import torchvision, einops
5
- import xformers, accelerate
6
- from transformers import AutoModelForCausalLM, LlamaTokenizer
 
7
 
 
 
 
8
 
9
  class EndpointHandler():
10
  def __init__(self, path=""):
@@ -17,7 +21,7 @@ class EndpointHandler():
17
  )
18
  self.tokenizer = LlamaTokenizer.from_pretrained('lmsys/vicuna-7b-v1.5')
19
  # create inference pipeline
20
- # self.pipeline = pipeline(model=model, tokenizer=tokenizer)
21
 
22
  def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:
23
  """
@@ -29,9 +33,86 @@ class EndpointHandler():
29
  - "label": A string representing what the label/class is. There can be multiple labels.
30
  - "score": A score between 0 and 1 describing how confident the model is for this label/class.
31
  """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
  inputs = data.pop("inputs", data)
 
 
 
33
  gen_kwargs = {"max_length": 2048, "do_sample": False}
34
 
 
 
 
 
 
 
 
 
 
 
35
  # pass inputs with all kwargs in data
36
  # prediction = self.pipeline(inputs)
37
 
 
1
+ from typing import Dict, List, Any, Optional, Tuple, Literal
2
+ # from transformers import pipeline
3
+ import torch, PIL, triton, protobuf
4
+ from torchvision import transforms
5
+ # import torchvision, einops
6
+ # import xformers, accelerate
7
+ from transformers import AutoModelForCausalLM, LlamaTokenizer, PretrainedConfig
8
 
9
+ LANGUAGE_TOKEN_TYPE = 0
10
+ VISION_TOKEN_TYPE = 1
11
+ config = PretrainedConfig.from_json_file('config.json')
12
 
13
  class EndpointHandler():
14
  def __init__(self, path=""):
 
21
  )
22
  self.tokenizer = LlamaTokenizer.from_pretrained('lmsys/vicuna-7b-v1.5')
23
  # create inference pipeline
24
+ # self.pipeline = pipeline("text-generation", model="THUDM/cogvlm-chat-hf", trust_remote_code=True)
25
 
26
  def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:
27
  """
 
33
  - "label": A string representing what the label/class is. There can be multiple labels.
34
  - "score": A score between 0 and 1 describing how confident the model is for this label/class.
35
  """
36
+
37
+ def _history_to_prompt(signal_type, history, query):
38
+ if signal_type == 'base':
39
+ return query
40
+ elif signal_type == 'vqa':
41
+ answer_format = 'Short answer:'
42
+ elif signal_type == 'chat':
43
+ answer_format = 'Answer:'
44
+ else:
45
+ assert False, f"Unknown signal type {signal_type}"
46
+
47
+ prompt = ''
48
+ for i, (old_query, response) in enumerate(history):
49
+ prompt += 'Question: ' + old_query + " {} ".format(answer_format) + response + "\n"
50
+ prompt += 'Question: {} {}'.format(query, answer_format)
51
+ return prompt
52
+
53
+ def build_conversation_input_ids(
54
+ tokenizer: "PreTrainedTokenizer",
55
+ *,
56
+ query: str,
57
+ history: Optional[List[Tuple[str, str]]] = None,
58
+ images: Optional[List["PIL.Image"]] = None,
59
+ template_version: Optional[Literal["base", "chat", "vqa"]] = None,
60
+ config=config
61
+ ):
62
+ image_size: int = config.vision_config['image_size']
63
+ patch_size: int = config.vision_config['patch_size']
64
+ template_version = template_version or config.template_version
65
+ assert images is None or len(images) <= 1, f"not support multi images by now."
66
+ history = history or []
67
+ text = _history_to_prompt(template_version, history, query)
68
+
69
+ input_ids = [tokenizer.bos_token_id]
70
+ token_type_ids = [LANGUAGE_TOKEN_TYPE]
71
+ if images is not None and len(images) == 1:
72
+ # vision
73
+ transform = transforms.Compose(
74
+ [
75
+ transforms.Resize(
76
+ (image_size, image_size), interpolation=transforms.InterpolationMode.BICUBIC
77
+ ),
78
+ transforms.ToTensor(),
79
+ transforms.Normalize((0.48145466, 0.4578275, 0.40821073), (0.26862954, 0.26130258, 0.27577711)),
80
+ ]
81
+ )
82
+ images = [transform(images[0])]
83
+ # language
84
+ vision_token_num = (image_size // patch_size) * (image_size // patch_size) + 2
85
+ input_ids += [tokenizer.pad_token_id] * vision_token_num
86
+ token_type_ids += [VISION_TOKEN_TYPE] * vision_token_num
87
+ text_ids = tokenizer.encode(text, add_special_tokens=False)
88
+
89
+ input_ids += text_ids
90
+ token_type_ids += [LANGUAGE_TOKEN_TYPE] * len(text_ids)
91
+ attention_mask = [1] * len(input_ids)
92
+
93
+ return {
94
+ 'input_ids': torch.tensor(input_ids, dtype=torch.long),
95
+ 'token_type_ids': torch.tensor(token_type_ids, dtype=torch.long),
96
+ 'attention_mask': torch.tensor(attention_mask, dtype=torch.long),
97
+ 'images': images,
98
+ }
99
+
100
  inputs = data.pop("inputs", data)
101
+ query = inputs.pop("query", data)
102
+ image = inputs.pop("image", data)
103
+
104
  gen_kwargs = {"max_length": 2048, "do_sample": False}
105
 
106
+ inputs = build_conversation_input_ids(self.tokenizer, query=query, history=[], images=[image],
107
+ template_version='vqa')
108
+
109
+ inputs = {'inputs': {
110
+ 'input_ids': inputs['input_ids'].unsqueeze(0).to('cuda'),
111
+ 'token_type_ids': inputs['token_type_ids'].unsqueeze(0).to('cuda'),
112
+ 'attention_mask': inputs['attention_mask'].unsqueeze(0).to('cuda'),
113
+ 'images': [[inputs['images'][0].to('cuda').to(torch.bfloat16)]],
114
+ }}
115
+
116
  # pass inputs with all kwargs in data
117
  # prediction = self.pipeline(inputs)
118