pizb commited on
Commit
812fd7c
1 Parent(s): 01247ee

feat: clean remarks

Browse files
article_base_train_no_qlora_test.py CHANGED
@@ -31,50 +31,28 @@ def main():
31
 
32
  train_ds = train_val_split['train']
33
  val_ds = train_val_split['test']
34
- # train_ds = load_custom_dataset_from_csv('dataset/train_samples.csv', 'dataset/images')
35
- # val_ds = load_custom_dataset_from_csv('dataset/val.csv', 'dataset/images')
36
 
37
  model_id = "google/paligemma-3b-pt-224"
38
  processor = PaliGemmaProcessor.from_pretrained(model_id)
39
  device = "cuda"
40
 
41
- # bnb_config = BitsAndBytesConfig(
42
- # load_in_4bit=True,
43
- # bnb_4bit_quant_type="nf4",
44
- # # bnb_4bit_compute_type=torch.bfloat16,
45
- # # bnb_4bit_compute_type=torch.float16
46
- # bnb_4bit_compute_dtype=torch.bfloat16
47
- # # bnb_4bit_use_double_quant=True,
48
- # )
49
- # lora_config = LoraConfig(
50
- # r=8,
51
- # target_modules=["q_proj", "o_proj", "k_proj", "v_proj", "gate_proj", "up_proj", "down_proj"],
52
- # task_type="CAUSAL_LM"
53
- # )
54
-
55
- # model = PaliGemmaForConditionalGeneration.from_pretrained(model_id, quantization_config=bnb_config, device_map={"": 0})
56
- # model.gradient_checkpointing_enable()
57
  model = PaliGemmaForConditionalGeneration.from_pretrained(model_id, torch_dtype=torch.bfloat16).to(device)
58
  for param in model.vision_tower.parameters():
59
  param.requires_grad = False
60
 
61
  for param in model.multi_modal_projector.parameters():
62
  param.requires_grad = True
63
-
64
- # model.print_trainable_parameters()
65
 
66
  args = TrainingArguments(
67
  output_dir=f"./output/{math.floor(time.time())}",
68
  num_train_epochs=2,
69
  remove_unused_columns=False,
70
- # per_device_train_batch_size=16,
71
  per_device_train_batch_size=1,
72
  gradient_accumulation_steps=4,
73
  warmup_steps=2,
74
  learning_rate=2e-5,
75
  weight_decay=1e-6,
76
  logging_steps=100,
77
- # optim="paged_adamw_8bit",
78
  optim="adamw_hf",
79
  save_strategy="steps",
80
  save_steps=1000,
@@ -86,10 +64,8 @@ def main():
86
 
87
  # Custom collate function
88
  def collate_fn(examples):
89
- # texts = ["answer " + example["question"] for example in examples]
90
  texts = [example["question"] for example in examples]
91
  labels = [example['answer'] for example in examples]
92
- # images = [Image.open(image_path).convert("RGB") for image_path in examples['image']]
93
  images = [Image.open(example['image']).convert("RGB") for example in examples]
94
  tokens = processor(text=texts, images=images, suffix=labels, return_tensors="pt", padding="longest")
95
  tokens = tokens.to(torch.bfloat16).to(device)
 
31
 
32
  train_ds = train_val_split['train']
33
  val_ds = train_val_split['test']
 
 
34
 
35
  model_id = "google/paligemma-3b-pt-224"
36
  processor = PaliGemmaProcessor.from_pretrained(model_id)
37
  device = "cuda"
38
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
39
  model = PaliGemmaForConditionalGeneration.from_pretrained(model_id, torch_dtype=torch.bfloat16).to(device)
40
  for param in model.vision_tower.parameters():
41
  param.requires_grad = False
42
 
43
  for param in model.multi_modal_projector.parameters():
44
  param.requires_grad = True
 
 
45
 
46
  args = TrainingArguments(
47
  output_dir=f"./output/{math.floor(time.time())}",
48
  num_train_epochs=2,
49
  remove_unused_columns=False,
 
50
  per_device_train_batch_size=1,
51
  gradient_accumulation_steps=4,
52
  warmup_steps=2,
53
  learning_rate=2e-5,
54
  weight_decay=1e-6,
55
  logging_steps=100,
 
56
  optim="adamw_hf",
57
  save_strategy="steps",
58
  save_steps=1000,
 
64
 
65
  # Custom collate function
66
  def collate_fn(examples):
 
67
  texts = [example["question"] for example in examples]
68
  labels = [example['answer'] for example in examples]
 
69
  images = [Image.open(example['image']).convert("RGB") for example in examples]
70
  tokens = processor(text=texts, images=images, suffix=labels, return_tensors="pt", padding="longest")
71
  tokens = tokens.to(torch.bfloat16).to(device)
article_base_train_test.py CHANGED
@@ -31,8 +31,6 @@ def main():
31
 
32
  train_ds = train_val_split['train']
33
  val_ds = train_val_split['test']
34
- # train_ds = load_custom_dataset_from_csv('dataset/train_samples.csv', 'dataset/images')
35
- # val_ds = load_custom_dataset_from_csv('dataset/val.csv', 'dataset/images')
36
 
37
  model_id = "google/paligemma-3b-pt-224"
38
  processor = PaliGemmaProcessor.from_pretrained(model_id)
@@ -41,10 +39,7 @@ def main():
41
  bnb_config = BitsAndBytesConfig(
42
  load_in_4bit=True,
43
  bnb_4bit_quant_type="nf4",
44
- # bnb_4bit_compute_type=torch.bfloat16,
45
- # bnb_4bit_compute_type=torch.float16
46
  bnb_4bit_compute_dtype=torch.bfloat16
47
- # bnb_4bit_use_double_quant=True,
48
  )
49
  lora_config = LoraConfig(
50
  r=8,
@@ -53,7 +48,6 @@ def main():
53
  )
54
 
55
  model = PaliGemmaForConditionalGeneration.from_pretrained(model_id, quantization_config=bnb_config, device_map={"": 0})
56
- # model.gradient_checkpointing_enable()
57
  model = get_peft_model(model, lora_config)
58
  model.print_trainable_parameters()
59
 
@@ -61,7 +55,6 @@ def main():
61
  output_dir=f"./output/{math.floor(time.time())}",
62
  num_train_epochs=2,
63
  remove_unused_columns=False,
64
- # per_device_train_batch_size=16,
65
  per_device_train_batch_size=4,
66
  gradient_accumulation_steps=4,
67
  warmup_steps=2,
@@ -79,10 +72,8 @@ def main():
79
 
80
  # Custom collate function
81
  def collate_fn(examples):
82
- # texts = ["answer " + example["question"] for example in examples]
83
  texts = [example["question"] for example in examples]
84
  labels = [example['answer'] for example in examples]
85
- # images = [Image.open(image_path).convert("RGB") for image_path in examples['image']]
86
  images = [Image.open(example['image']).convert("RGB") for example in examples]
87
  tokens = processor(text=texts, images=images, suffix=labels, return_tensors="pt", padding="longest")
88
  tokens = tokens.to(torch.bfloat16).to(device)
 
31
 
32
  train_ds = train_val_split['train']
33
  val_ds = train_val_split['test']
 
 
34
 
35
  model_id = "google/paligemma-3b-pt-224"
36
  processor = PaliGemmaProcessor.from_pretrained(model_id)
 
39
  bnb_config = BitsAndBytesConfig(
40
  load_in_4bit=True,
41
  bnb_4bit_quant_type="nf4",
 
 
42
  bnb_4bit_compute_dtype=torch.bfloat16
 
43
  )
44
  lora_config = LoraConfig(
45
  r=8,
 
48
  )
49
 
50
  model = PaliGemmaForConditionalGeneration.from_pretrained(model_id, quantization_config=bnb_config, device_map={"": 0})
 
51
  model = get_peft_model(model, lora_config)
52
  model.print_trainable_parameters()
53
 
 
55
  output_dir=f"./output/{math.floor(time.time())}",
56
  num_train_epochs=2,
57
  remove_unused_columns=False,
 
58
  per_device_train_batch_size=4,
59
  gradient_accumulation_steps=4,
60
  warmup_steps=2,
 
72
 
73
  # Custom collate function
74
  def collate_fn(examples):
 
75
  texts = [example["question"] for example in examples]
76
  labels = [example['answer'] for example in examples]
 
77
  images = [Image.open(example['image']).convert("RGB") for example in examples]
78
  tokens = processor(text=texts, images=images, suffix=labels, return_tensors="pt", padding="longest")
79
  tokens = tokens.to(torch.bfloat16).to(device)