alessandro trinca tornidor commited on
Commit
accca77
·
1 Parent(s): bbb9bc0

feat: support new ZeroGPU version addig device_map/device/device2 app_helpers.py functions

Browse files
Files changed (1) hide show
  1. lisa_on_cuda/utils/app_helpers.py +43 -12
lisa_on_cuda/utils/app_helpers.py CHANGED
@@ -22,6 +22,13 @@ from . import constants, utils
22
  placeholders = utils.create_placeholder_variables()
23
 
24
 
 
 
 
 
 
 
 
25
  def parse_args(args_to_parse, internal_logger=None):
26
  if internal_logger is None:
27
  internal_logger = app_logger
@@ -118,12 +125,13 @@ def preprocess(
118
 
119
  def load_model_for_causal_llm_pretrained(
120
  version, torch_dtype, load_in_8bit, load_in_4bit, seg_token_idx, vision_tower,
121
- internal_logger: logging = None, device_map="auto"
122
  ):
123
  if internal_logger is None:
124
  internal_logger = app_logger
125
  internal_logger.debug(f"prepare kwargs, 4bit:{load_in_4bit}, 8bit:{load_in_8bit}.")
126
- kwargs = {"torch_dtype": torch_dtype}
 
127
  if load_in_4bit:
128
  kwargs.update(
129
  {
@@ -154,15 +162,27 @@ def load_model_for_causal_llm_pretrained(
154
  low_cpu_mem_usage=True,
155
  vision_tower=vision_tower,
156
  seg_token_idx=seg_token_idx,
157
- # try to avoid CUDA init RuntimeError on ZeroGPU huggingface hardware
158
- device_map=device_map,
159
  **kwargs
160
  )
161
  internal_logger.debug("model loaded!")
162
  return _model
163
 
164
 
165
- def get_model(args_to_parse, internal_logger: logging = None, inference_decorator: Callable = None, device_map="auto"):
 
 
 
 
 
 
 
 
 
 
 
 
 
166
  if internal_logger is None:
167
  internal_logger = app_logger
168
  internal_logger.info(f"starting model preparation, folder creation for path: {args_to_parse.vis_save_path}.")
@@ -201,7 +221,8 @@ def get_model(args_to_parse, internal_logger: logging = None, inference_decorato
201
  load_in_4bit=args_to_parse.load_in_4bit,
202
  seg_token_idx=args_to_parse.seg_token_idx,
203
  vision_tower=args_to_parse.vision_tower,
204
- device_map=device_map # try to avoid CUDA init RuntimeError on ZeroGPU huggingface hardware
 
205
  )) if inference_decorator else load_model_for_causal_llm_pretrained(
206
  args_to_parse.version,
207
  torch_dtype=torch_dtype,
@@ -226,10 +247,8 @@ def get_model(args_to_parse, internal_logger: logging = None, inference_decorato
226
  )
227
  internal_logger.debug(f"_model type:{type(_model)} => {_model}.")
228
  internal_logger.debug(f"vision_tower type:{type(vision_tower)} => {vision_tower}.")
229
- # set device to device_map try to avoid CUDA init RuntimeError on ZeroGPU huggingface hardware
230
- device = device_map if device_map else args_to_parse.local_rank
231
- internal_logger.debug(f"device to use with vision tower:{device}, device_map:{device_map}, local_rank:{args_to_parse.local_rank}.")
232
- vision_tower.to(device=device)
233
  internal_logger.debug("vision tower loaded, prepare clip image processor...")
234
  _clip_image_processor = CLIPImageProcessor.from_pretrained(_model.config.vision_tower)
235
  internal_logger.debug("clip image processor done.")
@@ -274,12 +293,24 @@ def prepare_model_vision_tower(_model, args_to_parse, torch_dtype, internal_logg
274
 
275
 
276
  def get_inference_model_by_args(
277
- args_to_parse, internal_logger0: logging = None, inference_decorator: Callable = None, device_map="auto"
278
  ):
 
 
 
 
 
 
 
 
 
 
 
 
279
  if internal_logger0 is None:
280
  internal_logger0 = app_logger
281
  internal_logger0.info(f"args_to_parse:{args_to_parse}, creating model...")
282
- model, clip_image_processor, tokenizer, transform = get_model(args_to_parse, device_map=device_map)
283
  internal_logger0.info("created model, preparing inference function")
284
  no_seg_out = placeholders["no_seg_out"]
285
 
 
22
  placeholders = utils.create_placeholder_variables()
23
 
24
 
25
+ def get_device_map_kwargs(device_map="auto", device="cuda"):
26
+ kwargs = {"device_map": device_map}
27
+ if device != "cuda":
28
+ kwargs['device_map'] = {"": device}
29
+ return kwargs
30
+
31
+
32
  def parse_args(args_to_parse, internal_logger=None):
33
  if internal_logger is None:
34
  internal_logger = app_logger
 
125
 
126
  def load_model_for_causal_llm_pretrained(
127
  version, torch_dtype, load_in_8bit, load_in_4bit, seg_token_idx, vision_tower,
128
+ internal_logger: logging = None, device_map="auto", device="cuda"
129
  ):
130
  if internal_logger is None:
131
  internal_logger = app_logger
132
  internal_logger.debug(f"prepare kwargs, 4bit:{load_in_4bit}, 8bit:{load_in_8bit}.")
133
+ kwargs_device_map = get_device_map_kwargs(device_map=device_map, device=device)
134
+ kwargs = {"torch_dtype": torch_dtype, **kwargs_device_map}
135
  if load_in_4bit:
136
  kwargs.update(
137
  {
 
162
  low_cpu_mem_usage=True,
163
  vision_tower=vision_tower,
164
  seg_token_idx=seg_token_idx,
165
+ # try to avoid CUDA init RuntimeError on ZeroGPU huggingface hardware (injected into kwargs)
 
166
  **kwargs
167
  )
168
  internal_logger.debug("model loaded!")
169
  return _model
170
 
171
 
172
+ def get_model(args_to_parse, internal_logger: logging = None, inference_decorator: Callable = None, device_map="auto", device="cpu", device2="cuda"):
173
+ """Load model and inference function with arguments. Compatible with ZeroGPU (spaces 0.30.2)
174
+
175
+ Args:
176
+ args_to_parse: default input arguments
177
+ internal_logger: logger
178
+ inference_decorator: inference decorator (now it's supported and tested ZeroGPU spaces.GPU decorator)
179
+ device_map: device type needed for ZeroGPU cuda hw
180
+ device: device type needed for ZeroGPU cuda hw
181
+ device2: device type needed for ZeroGPU cuda hw, default to cpu to avoid bug on loading model
182
+
183
+ Returns:
184
+ inference function with LISA model
185
+ """
186
  if internal_logger is None:
187
  internal_logger = app_logger
188
  internal_logger.info(f"starting model preparation, folder creation for path: {args_to_parse.vis_save_path}.")
 
221
  load_in_4bit=args_to_parse.load_in_4bit,
222
  seg_token_idx=args_to_parse.seg_token_idx,
223
  vision_tower=args_to_parse.vision_tower,
224
+ device_map=device_map, # try to avoid CUDA init RuntimeError on ZeroGPU huggingface hardware
225
+ device=device
226
  )) if inference_decorator else load_model_for_causal_llm_pretrained(
227
  args_to_parse.version,
228
  torch_dtype=torch_dtype,
 
247
  )
248
  internal_logger.debug(f"_model type:{type(_model)} => {_model}.")
249
  internal_logger.debug(f"vision_tower type:{type(vision_tower)} => {vision_tower}.")
250
+ # set device to "cuda" try to avoid CUDA init RuntimeError on ZeroGPU huggingface hardware
251
+ vision_tower.to(device=device2)
 
 
252
  internal_logger.debug("vision tower loaded, prepare clip image processor...")
253
  _clip_image_processor = CLIPImageProcessor.from_pretrained(_model.config.vision_tower)
254
  internal_logger.debug("clip image processor done.")
 
293
 
294
 
295
  def get_inference_model_by_args(
296
+ args_to_parse, internal_logger0: logging = None, inference_decorator: Callable = None, device_map="auto", device="cuda"
297
  ):
298
+ """Load model and inference function with arguments. Compatible with ZeroGPU (spaces 0.30.2)
299
+
300
+ Args:
301
+ args_to_parse: default input arguments
302
+ internal_logger0: logger
303
+ inference_decorator: inference decorator (now it's supported and tested ZeroGPU spaces.GPU decorator)
304
+ device_map: device type needed for ZeroGPU cuda hw
305
+ device: device type needed for ZeroGPU cuda hw
306
+
307
+ Returns:
308
+ inference function with LISA model
309
+ """
310
  if internal_logger0 is None:
311
  internal_logger0 = app_logger
312
  internal_logger0.info(f"args_to_parse:{args_to_parse}, creating model...")
313
+ model, clip_image_processor, tokenizer, transform = get_model(args_to_parse, device_map=device_map, device=device)
314
  internal_logger0.info("created model, preparing inference function")
315
  no_seg_out = placeholders["no_seg_out"]
316