Upload folder using huggingface_hub
Browse files
main.py
CHANGED
@@ -64,9 +64,9 @@ class ORPO(object):
|
|
64 |
test = self.data[test_split].filter(self.filter_dataset)
|
65 |
self.test = test.map(self.preprocess_dataset, batched=True, num_proc=self.args.num_proc, remove_columns=self.data[test_split].column_names)
|
66 |
|
67 |
-
train = self.data[train_split].filter(self.filter_dataset)[self.args.max_samples]
|
68 |
print(f"\n\n>>> {len(train)} / {len(self.data[train_split])} rows left after filtering by prompt length.")
|
69 |
-
self.train = train.map(self.preprocess_dataset, batched=True, num_proc=self.args.num_proc, remove_columns=self.data[
|
70 |
|
71 |
# Set WANDB & Logging Configurations
|
72 |
self.run_name = f"{self.args.model_name.split('/')[-1]}-{self.args.data_name.split('/')[-1]}-lambda{self.args.alpha}-ORPO-{self.start.tm_mday}-{self.start.tm_hour}-{self.start.tm_min}"
|
|
|
64 |
test = self.data[test_split].filter(self.filter_dataset)
|
65 |
self.test = test.map(self.preprocess_dataset, batched=True, num_proc=self.args.num_proc, remove_columns=self.data[test_split].column_names)
|
66 |
|
67 |
+
train = self.data[train_split].filter(self.filter_dataset)[:self.args.max_samples]
|
68 |
print(f"\n\n>>> {len(train)} / {len(self.data[train_split])} rows left after filtering by prompt length.")
|
69 |
+
self.train = train.map(self.preprocess_dataset, batched=True, num_proc=self.args.num_proc, remove_columns=self.data[train_split].column_names)
|
70 |
|
71 |
# Set WANDB & Logging Configurations
|
72 |
self.run_name = f"{self.args.model_name.split('/')[-1]}-{self.args.data_name.split('/')[-1]}-lambda{self.args.alpha}-ORPO-{self.start.tm_mday}-{self.start.tm_hour}-{self.start.tm_min}"
|