Spaces:
Paused
Paused
alessandro trinca tornidor
commited on
Commit
·
accca77
1
Parent(s):
bbb9bc0
feat: support new ZeroGPU version addig device_map/device/device2 app_helpers.py functions
Browse files
lisa_on_cuda/utils/app_helpers.py
CHANGED
@@ -22,6 +22,13 @@ from . import constants, utils
|
|
22 |
placeholders = utils.create_placeholder_variables()
|
23 |
|
24 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
25 |
def parse_args(args_to_parse, internal_logger=None):
|
26 |
if internal_logger is None:
|
27 |
internal_logger = app_logger
|
@@ -118,12 +125,13 @@ def preprocess(
|
|
118 |
|
119 |
def load_model_for_causal_llm_pretrained(
|
120 |
version, torch_dtype, load_in_8bit, load_in_4bit, seg_token_idx, vision_tower,
|
121 |
-
internal_logger: logging = None, device_map="auto"
|
122 |
):
|
123 |
if internal_logger is None:
|
124 |
internal_logger = app_logger
|
125 |
internal_logger.debug(f"prepare kwargs, 4bit:{load_in_4bit}, 8bit:{load_in_8bit}.")
|
126 |
-
|
|
|
127 |
if load_in_4bit:
|
128 |
kwargs.update(
|
129 |
{
|
@@ -154,15 +162,27 @@ def load_model_for_causal_llm_pretrained(
|
|
154 |
low_cpu_mem_usage=True,
|
155 |
vision_tower=vision_tower,
|
156 |
seg_token_idx=seg_token_idx,
|
157 |
-
# try to avoid CUDA init RuntimeError on ZeroGPU huggingface hardware
|
158 |
-
device_map=device_map,
|
159 |
**kwargs
|
160 |
)
|
161 |
internal_logger.debug("model loaded!")
|
162 |
return _model
|
163 |
|
164 |
|
165 |
-
def get_model(args_to_parse, internal_logger: logging = None, inference_decorator: Callable = None, device_map="auto"):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
166 |
if internal_logger is None:
|
167 |
internal_logger = app_logger
|
168 |
internal_logger.info(f"starting model preparation, folder creation for path: {args_to_parse.vis_save_path}.")
|
@@ -201,7 +221,8 @@ def get_model(args_to_parse, internal_logger: logging = None, inference_decorato
|
|
201 |
load_in_4bit=args_to_parse.load_in_4bit,
|
202 |
seg_token_idx=args_to_parse.seg_token_idx,
|
203 |
vision_tower=args_to_parse.vision_tower,
|
204 |
-
device_map=device_map # try to avoid CUDA init RuntimeError on ZeroGPU huggingface hardware
|
|
|
205 |
)) if inference_decorator else load_model_for_causal_llm_pretrained(
|
206 |
args_to_parse.version,
|
207 |
torch_dtype=torch_dtype,
|
@@ -226,10 +247,8 @@ def get_model(args_to_parse, internal_logger: logging = None, inference_decorato
|
|
226 |
)
|
227 |
internal_logger.debug(f"_model type:{type(_model)} => {_model}.")
|
228 |
internal_logger.debug(f"vision_tower type:{type(vision_tower)} => {vision_tower}.")
|
229 |
-
# set device to
|
230 |
-
device
|
231 |
-
internal_logger.debug(f"device to use with vision tower:{device}, device_map:{device_map}, local_rank:{args_to_parse.local_rank}.")
|
232 |
-
vision_tower.to(device=device)
|
233 |
internal_logger.debug("vision tower loaded, prepare clip image processor...")
|
234 |
_clip_image_processor = CLIPImageProcessor.from_pretrained(_model.config.vision_tower)
|
235 |
internal_logger.debug("clip image processor done.")
|
@@ -274,12 +293,24 @@ def prepare_model_vision_tower(_model, args_to_parse, torch_dtype, internal_logg
|
|
274 |
|
275 |
|
276 |
def get_inference_model_by_args(
|
277 |
-
args_to_parse, internal_logger0: logging = None, inference_decorator: Callable = None, device_map="auto"
|
278 |
):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
279 |
if internal_logger0 is None:
|
280 |
internal_logger0 = app_logger
|
281 |
internal_logger0.info(f"args_to_parse:{args_to_parse}, creating model...")
|
282 |
-
model, clip_image_processor, tokenizer, transform = get_model(args_to_parse, device_map=device_map)
|
283 |
internal_logger0.info("created model, preparing inference function")
|
284 |
no_seg_out = placeholders["no_seg_out"]
|
285 |
|
|
|
22 |
placeholders = utils.create_placeholder_variables()
|
23 |
|
24 |
|
25 |
+
def get_device_map_kwargs(device_map="auto", device="cuda"):
|
26 |
+
kwargs = {"device_map": device_map}
|
27 |
+
if device != "cuda":
|
28 |
+
kwargs['device_map'] = {"": device}
|
29 |
+
return kwargs
|
30 |
+
|
31 |
+
|
32 |
def parse_args(args_to_parse, internal_logger=None):
|
33 |
if internal_logger is None:
|
34 |
internal_logger = app_logger
|
|
|
125 |
|
126 |
def load_model_for_causal_llm_pretrained(
|
127 |
version, torch_dtype, load_in_8bit, load_in_4bit, seg_token_idx, vision_tower,
|
128 |
+
internal_logger: logging = None, device_map="auto", device="cuda"
|
129 |
):
|
130 |
if internal_logger is None:
|
131 |
internal_logger = app_logger
|
132 |
internal_logger.debug(f"prepare kwargs, 4bit:{load_in_4bit}, 8bit:{load_in_8bit}.")
|
133 |
+
kwargs_device_map = get_device_map_kwargs(device_map=device_map, device=device)
|
134 |
+
kwargs = {"torch_dtype": torch_dtype, **kwargs_device_map}
|
135 |
if load_in_4bit:
|
136 |
kwargs.update(
|
137 |
{
|
|
|
162 |
low_cpu_mem_usage=True,
|
163 |
vision_tower=vision_tower,
|
164 |
seg_token_idx=seg_token_idx,
|
165 |
+
# try to avoid CUDA init RuntimeError on ZeroGPU huggingface hardware (injected into kwargs)
|
|
|
166 |
**kwargs
|
167 |
)
|
168 |
internal_logger.debug("model loaded!")
|
169 |
return _model
|
170 |
|
171 |
|
172 |
+
def get_model(args_to_parse, internal_logger: logging = None, inference_decorator: Callable = None, device_map="auto", device="cpu", device2="cuda"):
|
173 |
+
"""Load model and inference function with arguments. Compatible with ZeroGPU (spaces 0.30.2)
|
174 |
+
|
175 |
+
Args:
|
176 |
+
args_to_parse: default input arguments
|
177 |
+
internal_logger: logger
|
178 |
+
inference_decorator: inference decorator (now it's supported and tested ZeroGPU spaces.GPU decorator)
|
179 |
+
device_map: device type needed for ZeroGPU cuda hw
|
180 |
+
device: device type needed for ZeroGPU cuda hw
|
181 |
+
device2: device type needed for ZeroGPU cuda hw, default to cpu to avoid bug on loading model
|
182 |
+
|
183 |
+
Returns:
|
184 |
+
inference function with LISA model
|
185 |
+
"""
|
186 |
if internal_logger is None:
|
187 |
internal_logger = app_logger
|
188 |
internal_logger.info(f"starting model preparation, folder creation for path: {args_to_parse.vis_save_path}.")
|
|
|
221 |
load_in_4bit=args_to_parse.load_in_4bit,
|
222 |
seg_token_idx=args_to_parse.seg_token_idx,
|
223 |
vision_tower=args_to_parse.vision_tower,
|
224 |
+
device_map=device_map, # try to avoid CUDA init RuntimeError on ZeroGPU huggingface hardware
|
225 |
+
device=device
|
226 |
)) if inference_decorator else load_model_for_causal_llm_pretrained(
|
227 |
args_to_parse.version,
|
228 |
torch_dtype=torch_dtype,
|
|
|
247 |
)
|
248 |
internal_logger.debug(f"_model type:{type(_model)} => {_model}.")
|
249 |
internal_logger.debug(f"vision_tower type:{type(vision_tower)} => {vision_tower}.")
|
250 |
+
# set device to "cuda" try to avoid CUDA init RuntimeError on ZeroGPU huggingface hardware
|
251 |
+
vision_tower.to(device=device2)
|
|
|
|
|
252 |
internal_logger.debug("vision tower loaded, prepare clip image processor...")
|
253 |
_clip_image_processor = CLIPImageProcessor.from_pretrained(_model.config.vision_tower)
|
254 |
internal_logger.debug("clip image processor done.")
|
|
|
293 |
|
294 |
|
295 |
def get_inference_model_by_args(
|
296 |
+
args_to_parse, internal_logger0: logging = None, inference_decorator: Callable = None, device_map="auto", device="cuda"
|
297 |
):
|
298 |
+
"""Load model and inference function with arguments. Compatible with ZeroGPU (spaces 0.30.2)
|
299 |
+
|
300 |
+
Args:
|
301 |
+
args_to_parse: default input arguments
|
302 |
+
internal_logger0: logger
|
303 |
+
inference_decorator: inference decorator (now it's supported and tested ZeroGPU spaces.GPU decorator)
|
304 |
+
device_map: device type needed for ZeroGPU cuda hw
|
305 |
+
device: device type needed for ZeroGPU cuda hw
|
306 |
+
|
307 |
+
Returns:
|
308 |
+
inference function with LISA model
|
309 |
+
"""
|
310 |
if internal_logger0 is None:
|
311 |
internal_logger0 = app_logger
|
312 |
internal_logger0.info(f"args_to_parse:{args_to_parse}, creating model...")
|
313 |
+
model, clip_image_processor, tokenizer, transform = get_model(args_to_parse, device_map=device_map, device=device)
|
314 |
internal_logger0.info("created model, preparing inference function")
|
315 |
no_seg_out = placeholders["no_seg_out"]
|
316 |
|