Spaces:

hbXNov
/

owl-con-demo

Sleeping

Hritik commited on Nov 27, 2023

Commit

cfe5653

1 Parent(s): 6ab097e

load on cpu for memory requirements

Files changed (1) hide show

app.py CHANGED Viewed

@@ -35,11 +35,14 @@ processor = MplugOwlProcessor(image_processor, tokenizer)
 model = MplugOwlForConditionalGeneration.from_pretrained(
     pretrained_ckpt,
     torch_dtype=torch.bfloat16,
-    device_map={'':0}
 )
-for name, param in model.named_parameters():
-    param.requires_grad = False
 peft_config = LoraConfig(
     target_modules=r'.*language_model.*\.(q_proj|v_proj|k_proj|o_proj|gate_proj|down_proj|up_proj)',
     inference_mode=True,
@@ -50,14 +53,14 @@ peft_config = LoraConfig(
 model = get_peft_model(model, peft_config)
 model.print_trainable_parameters()
 with open(trained_ckpt, 'rb') as f:
-    ckpt = torch.load(f, map_location = torch.device(f"cuda:0"))
 model.load_state_dict(ckpt)
-model = model.to(torch.bfloat16)
 print('Model Loaded')
 PROMPT = """The following is a conversation between a curious human and AI assistant. The assistant gives helpful, detailed, and polite answers to the user's questions.
 Human: <|video|>
-Human: Does this video entail the description: ""A basketball team walking off the field while the audience claps.""?
 AI: """
 valid_data = MultiModalDataset("examples/y5xuvHpDPZQ_000005_000015.mp4", PROMPT, tokenizer, processor, max_length = 256, loss_objective = 'sequential')

 model = MplugOwlForConditionalGeneration.from_pretrained(
     pretrained_ckpt,
     torch_dtype=torch.bfloat16,
+    device_map={'': 'cpu'}
+    # device_map={'':0}
 )
+# for name, param in model.named_parameters():
+#     print(param.device)
+#     break
 peft_config = LoraConfig(
     target_modules=r'.*language_model.*\.(q_proj|v_proj|k_proj|o_proj|gate_proj|down_proj|up_proj)',
     inference_mode=True,
 model = get_peft_model(model, peft_config)
 model.print_trainable_parameters()
 with open(trained_ckpt, 'rb') as f:
+    ckpt = torch.load(f, map_location = torch.device("cpu"))
 model.load_state_dict(ckpt)
+model = model.to("cuda:0").to(torch.bfloat16)
 print('Model Loaded')
 PROMPT = """The following is a conversation between a curious human and AI assistant. The assistant gives helpful, detailed, and polite answers to the user's questions.
 Human: <|video|>
+Human: Does this video entail the description: ""A soccer team walking off the field while the audience claps.""?
 AI: """
 valid_data = MultiModalDataset("examples/y5xuvHpDPZQ_000005_000015.mp4", PROMPT, tokenizer, processor, max_length = 256, loss_objective = 'sequential')