finalf0 commited on
Commit
f002ad9
1 Parent(s): b806a3a
Files changed (1) hide show
  1. app.py +2 -3
app.py CHANGED
@@ -42,13 +42,12 @@ if 'int4' in model_path:
42
  exit()
43
  model = AutoModel.from_pretrained(model_path, trust_remote_code=True)
44
  else:
45
- if True: #args.multi_gpus:
46
  from accelerate import load_checkpoint_and_dispatch, init_empty_weights, infer_auto_device_map
47
  with init_empty_weights():
48
  #model = AutoModel.from_pretrained(model_path, trust_remote_code=True, attn_implementation='sdpa', torch_dtype=torch.bfloat16)
49
  model = AutoModel.from_pretrained(model_path, trust_remote_code=True, torch_dtype=torch.bfloat16)
50
- #device_map = infer_auto_device_map(model, max_memory={0: "10GB", 1: "10GB"},
51
- device_map = infer_auto_device_map(model,
52
  no_split_module_classes=['SiglipVisionTransformer', 'Qwen2DecoderLayer'])
53
  device_id = device_map["llm.model.embed_tokens"]
54
  device_map["llm.lm_head"] = device_id # firtt and last layer should be in same device
 
42
  exit()
43
  model = AutoModel.from_pretrained(model_path, trust_remote_code=True)
44
  else:
45
+ if False: #args.multi_gpus:
46
  from accelerate import load_checkpoint_and_dispatch, init_empty_weights, infer_auto_device_map
47
  with init_empty_weights():
48
  #model = AutoModel.from_pretrained(model_path, trust_remote_code=True, attn_implementation='sdpa', torch_dtype=torch.bfloat16)
49
  model = AutoModel.from_pretrained(model_path, trust_remote_code=True, torch_dtype=torch.bfloat16)
50
+ device_map = infer_auto_device_map(model, max_memory={0: "10GB", 1: "10GB"},
 
51
  no_split_module_classes=['SiglipVisionTransformer', 'Qwen2DecoderLayer'])
52
  device_id = device_map["llm.model.embed_tokens"]
53
  device_map["llm.lm_head"] = device_id # firtt and last layer should be in same device