feat: clean remarks
Browse files
article_base_train_no_qlora_test.py
CHANGED
@@ -31,50 +31,28 @@ def main():
|
|
31 |
|
32 |
train_ds = train_val_split['train']
|
33 |
val_ds = train_val_split['test']
|
34 |
-
# train_ds = load_custom_dataset_from_csv('dataset/train_samples.csv', 'dataset/images')
|
35 |
-
# val_ds = load_custom_dataset_from_csv('dataset/val.csv', 'dataset/images')
|
36 |
|
37 |
model_id = "google/paligemma-3b-pt-224"
|
38 |
processor = PaliGemmaProcessor.from_pretrained(model_id)
|
39 |
device = "cuda"
|
40 |
|
41 |
-
# bnb_config = BitsAndBytesConfig(
|
42 |
-
# load_in_4bit=True,
|
43 |
-
# bnb_4bit_quant_type="nf4",
|
44 |
-
# # bnb_4bit_compute_type=torch.bfloat16,
|
45 |
-
# # bnb_4bit_compute_type=torch.float16
|
46 |
-
# bnb_4bit_compute_dtype=torch.bfloat16
|
47 |
-
# # bnb_4bit_use_double_quant=True,
|
48 |
-
# )
|
49 |
-
# lora_config = LoraConfig(
|
50 |
-
# r=8,
|
51 |
-
# target_modules=["q_proj", "o_proj", "k_proj", "v_proj", "gate_proj", "up_proj", "down_proj"],
|
52 |
-
# task_type="CAUSAL_LM"
|
53 |
-
# )
|
54 |
-
|
55 |
-
# model = PaliGemmaForConditionalGeneration.from_pretrained(model_id, quantization_config=bnb_config, device_map={"": 0})
|
56 |
-
# model.gradient_checkpointing_enable()
|
57 |
model = PaliGemmaForConditionalGeneration.from_pretrained(model_id, torch_dtype=torch.bfloat16).to(device)
|
58 |
for param in model.vision_tower.parameters():
|
59 |
param.requires_grad = False
|
60 |
|
61 |
for param in model.multi_modal_projector.parameters():
|
62 |
param.requires_grad = True
|
63 |
-
|
64 |
-
# model.print_trainable_parameters()
|
65 |
|
66 |
args = TrainingArguments(
|
67 |
output_dir=f"./output/{math.floor(time.time())}",
|
68 |
num_train_epochs=2,
|
69 |
remove_unused_columns=False,
|
70 |
-
# per_device_train_batch_size=16,
|
71 |
per_device_train_batch_size=1,
|
72 |
gradient_accumulation_steps=4,
|
73 |
warmup_steps=2,
|
74 |
learning_rate=2e-5,
|
75 |
weight_decay=1e-6,
|
76 |
logging_steps=100,
|
77 |
-
# optim="paged_adamw_8bit",
|
78 |
optim="adamw_hf",
|
79 |
save_strategy="steps",
|
80 |
save_steps=1000,
|
@@ -86,10 +64,8 @@ def main():
|
|
86 |
|
87 |
# Custom collate function
|
88 |
def collate_fn(examples):
|
89 |
-
# texts = ["answer " + example["question"] for example in examples]
|
90 |
texts = [example["question"] for example in examples]
|
91 |
labels = [example['answer'] for example in examples]
|
92 |
-
# images = [Image.open(image_path).convert("RGB") for image_path in examples['image']]
|
93 |
images = [Image.open(example['image']).convert("RGB") for example in examples]
|
94 |
tokens = processor(text=texts, images=images, suffix=labels, return_tensors="pt", padding="longest")
|
95 |
tokens = tokens.to(torch.bfloat16).to(device)
|
|
|
31 |
|
32 |
train_ds = train_val_split['train']
|
33 |
val_ds = train_val_split['test']
|
|
|
|
|
34 |
|
35 |
model_id = "google/paligemma-3b-pt-224"
|
36 |
processor = PaliGemmaProcessor.from_pretrained(model_id)
|
37 |
device = "cuda"
|
38 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
39 |
model = PaliGemmaForConditionalGeneration.from_pretrained(model_id, torch_dtype=torch.bfloat16).to(device)
|
40 |
for param in model.vision_tower.parameters():
|
41 |
param.requires_grad = False
|
42 |
|
43 |
for param in model.multi_modal_projector.parameters():
|
44 |
param.requires_grad = True
|
|
|
|
|
45 |
|
46 |
args = TrainingArguments(
|
47 |
output_dir=f"./output/{math.floor(time.time())}",
|
48 |
num_train_epochs=2,
|
49 |
remove_unused_columns=False,
|
|
|
50 |
per_device_train_batch_size=1,
|
51 |
gradient_accumulation_steps=4,
|
52 |
warmup_steps=2,
|
53 |
learning_rate=2e-5,
|
54 |
weight_decay=1e-6,
|
55 |
logging_steps=100,
|
|
|
56 |
optim="adamw_hf",
|
57 |
save_strategy="steps",
|
58 |
save_steps=1000,
|
|
|
64 |
|
65 |
# Custom collate function
|
66 |
def collate_fn(examples):
|
|
|
67 |
texts = [example["question"] for example in examples]
|
68 |
labels = [example['answer'] for example in examples]
|
|
|
69 |
images = [Image.open(example['image']).convert("RGB") for example in examples]
|
70 |
tokens = processor(text=texts, images=images, suffix=labels, return_tensors="pt", padding="longest")
|
71 |
tokens = tokens.to(torch.bfloat16).to(device)
|
article_base_train_test.py
CHANGED
@@ -31,8 +31,6 @@ def main():
|
|
31 |
|
32 |
train_ds = train_val_split['train']
|
33 |
val_ds = train_val_split['test']
|
34 |
-
# train_ds = load_custom_dataset_from_csv('dataset/train_samples.csv', 'dataset/images')
|
35 |
-
# val_ds = load_custom_dataset_from_csv('dataset/val.csv', 'dataset/images')
|
36 |
|
37 |
model_id = "google/paligemma-3b-pt-224"
|
38 |
processor = PaliGemmaProcessor.from_pretrained(model_id)
|
@@ -41,10 +39,7 @@ def main():
|
|
41 |
bnb_config = BitsAndBytesConfig(
|
42 |
load_in_4bit=True,
|
43 |
bnb_4bit_quant_type="nf4",
|
44 |
-
# bnb_4bit_compute_type=torch.bfloat16,
|
45 |
-
# bnb_4bit_compute_type=torch.float16
|
46 |
bnb_4bit_compute_dtype=torch.bfloat16
|
47 |
-
# bnb_4bit_use_double_quant=True,
|
48 |
)
|
49 |
lora_config = LoraConfig(
|
50 |
r=8,
|
@@ -53,7 +48,6 @@ def main():
|
|
53 |
)
|
54 |
|
55 |
model = PaliGemmaForConditionalGeneration.from_pretrained(model_id, quantization_config=bnb_config, device_map={"": 0})
|
56 |
-
# model.gradient_checkpointing_enable()
|
57 |
model = get_peft_model(model, lora_config)
|
58 |
model.print_trainable_parameters()
|
59 |
|
@@ -61,7 +55,6 @@ def main():
|
|
61 |
output_dir=f"./output/{math.floor(time.time())}",
|
62 |
num_train_epochs=2,
|
63 |
remove_unused_columns=False,
|
64 |
-
# per_device_train_batch_size=16,
|
65 |
per_device_train_batch_size=4,
|
66 |
gradient_accumulation_steps=4,
|
67 |
warmup_steps=2,
|
@@ -79,10 +72,8 @@ def main():
|
|
79 |
|
80 |
# Custom collate function
|
81 |
def collate_fn(examples):
|
82 |
-
# texts = ["answer " + example["question"] for example in examples]
|
83 |
texts = [example["question"] for example in examples]
|
84 |
labels = [example['answer'] for example in examples]
|
85 |
-
# images = [Image.open(image_path).convert("RGB") for image_path in examples['image']]
|
86 |
images = [Image.open(example['image']).convert("RGB") for example in examples]
|
87 |
tokens = processor(text=texts, images=images, suffix=labels, return_tensors="pt", padding="longest")
|
88 |
tokens = tokens.to(torch.bfloat16).to(device)
|
|
|
31 |
|
32 |
train_ds = train_val_split['train']
|
33 |
val_ds = train_val_split['test']
|
|
|
|
|
34 |
|
35 |
model_id = "google/paligemma-3b-pt-224"
|
36 |
processor = PaliGemmaProcessor.from_pretrained(model_id)
|
|
|
39 |
bnb_config = BitsAndBytesConfig(
|
40 |
load_in_4bit=True,
|
41 |
bnb_4bit_quant_type="nf4",
|
|
|
|
|
42 |
bnb_4bit_compute_dtype=torch.bfloat16
|
|
|
43 |
)
|
44 |
lora_config = LoraConfig(
|
45 |
r=8,
|
|
|
48 |
)
|
49 |
|
50 |
model = PaliGemmaForConditionalGeneration.from_pretrained(model_id, quantization_config=bnb_config, device_map={"": 0})
|
|
|
51 |
model = get_peft_model(model, lora_config)
|
52 |
model.print_trainable_parameters()
|
53 |
|
|
|
55 |
output_dir=f"./output/{math.floor(time.time())}",
|
56 |
num_train_epochs=2,
|
57 |
remove_unused_columns=False,
|
|
|
58 |
per_device_train_batch_size=4,
|
59 |
gradient_accumulation_steps=4,
|
60 |
warmup_steps=2,
|
|
|
72 |
|
73 |
# Custom collate function
|
74 |
def collate_fn(examples):
|
|
|
75 |
texts = [example["question"] for example in examples]
|
76 |
labels = [example['answer'] for example in examples]
|
|
|
77 |
images = [Image.open(example['image']).convert("RGB") for example in examples]
|
78 |
tokens = processor(text=texts, images=images, suffix=labels, return_tensors="pt", padding="longest")
|
79 |
tokens = tokens.to(torch.bfloat16).to(device)
|