pietrolesci commited on
Commit
7806f40
1 Parent(s): 253cf88

Upload folder using huggingface_hub

Browse files
README.md ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ## Run info
2
+ - complete_hash: b97998c098e0d34fa00d918d670b8f9b
3
+ - short_hash: b9799b8f9b
4
+
5
+ ### Configuration
6
+ ```yaml
7
+ data:
8
+ batch_size: 32
9
+ data_seed: 42
10
+ drop_last: false
11
+ eval_batch_size: 128
12
+ max_length: 512
13
+ multiprocessing_context: null
14
+ num_workers: 8
15
+ persistent_workers: false
16
+ pin_memory: true
17
+ replacement: false
18
+ shuffle: true
19
+ dataset: mnli
20
+ estimator:
21
+ accelerator: gpu
22
+ convert_to_bettertransformer: false
23
+ deterministic: true
24
+ precision: bf16-true
25
+ tf32_mode: high
26
+ fit:
27
+ enable_progress_bar: true
28
+ limit_train_batches: null
29
+ log_interval: 100
30
+ max_epochs: 20
31
+ min_epochs: null
32
+ optimizer_kwargs:
33
+ init_kwargs:
34
+ fused: true
35
+ lr: 3.0e-05
36
+ name: adamw
37
+ scheduler_kwargs:
38
+ name: constant_schedule_with_warmup
39
+ num_warmup_steps: 2000
40
+ model:
41
+ base_model: roberta-base
42
+ name: roberta-base
43
+ revision: null
44
+ seed: 42
45
+ seed: 42
46
+ ```
hparams.yaml ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ experiment_group: training
2
+ run_name: roberta-base_2024-04-23T15-11-06
3
+ seed: 42
4
+ model:
5
+ name: roberta-base
6
+ revision: null
7
+ seed: 42
8
+ base_model: roberta-base
9
+ estimator:
10
+ accelerator: gpu
11
+ precision: bf16-true
12
+ deterministic: true
13
+ tf32_mode: high
14
+ convert_to_bettertransformer: false
15
+ fit:
16
+ max_epochs: 20
17
+ min_epochs: null
18
+ optimizer_kwargs:
19
+ name: adamw
20
+ lr: 3.0e-05
21
+ init_kwargs:
22
+ fused: true
23
+ scheduler_kwargs:
24
+ name: constant_schedule_with_warmup
25
+ num_warmup_steps: 2000
26
+ log_interval: 100
27
+ enable_progress_bar: true
28
+ limit_train_batches: null
29
+ data:
30
+ batch_size: 32
31
+ eval_batch_size: 128
32
+ shuffle: true
33
+ replacement: false
34
+ data_seed: 42
35
+ drop_last: false
36
+ num_workers: 8
37
+ pin_memory: true
38
+ persistent_workers: false
39
+ multiprocessing_context: null
40
+ max_length: 512
41
+ root_path: /home/pl487/coreset-project
42
+ data_path: /home/pl487/coreset-project/data/processed
43
+ dataset: mnli
44
+ dataset_split: train
45
+ evaluation: null
46
+ loggers:
47
+ tensorboard:
48
+ _target_: energizer.loggers.TensorBoardLogger
49
+ root_dir: ./
50
+ name: tb_logs
51
+ version: null
52
+ callbacks:
53
+ timer:
54
+ _target_: energizer.active_learning.callbacks.Timer
55
+ lr_monitor:
56
+ _target_: energizer.callbacks.lr_monitor.LearningRateMonitor
57
+ model_checkpoint:
58
+ _target_: energizer.callbacks.model_checkpoint.ModelCheckpoint
59
+ dirpath: .checkpoints
60
+ stage: train
61
+ frequency: 1:epoch
62
+ user:
63
+ id: pl487
main.log ADDED
@@ -0,0 +1,140 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [2024-04-23 15:11:06,594][hydra][INFO] -
2
+ experiment_group: training
3
+ run_name: roberta-base_2024-04-23T15-11-06
4
+ seed: 42
5
+ model:
6
+ name: roberta-base
7
+ revision: null
8
+ seed: 42
9
+ base_model: roberta-base
10
+ estimator:
11
+ accelerator: gpu
12
+ precision: bf16-true
13
+ deterministic: true
14
+ tf32_mode: high
15
+ convert_to_bettertransformer: false
16
+ fit:
17
+ max_epochs: 20
18
+ min_epochs: null
19
+ optimizer_kwargs:
20
+ name: adamw
21
+ lr: 3.0e-05
22
+ init_kwargs:
23
+ fused: true
24
+ scheduler_kwargs:
25
+ name: constant_schedule_with_warmup
26
+ num_warmup_steps: 2000
27
+ log_interval: 100
28
+ enable_progress_bar: true
29
+ limit_train_batches: null
30
+ data:
31
+ batch_size: 32
32
+ eval_batch_size: 128
33
+ shuffle: true
34
+ replacement: false
35
+ data_seed: 42
36
+ drop_last: false
37
+ num_workers: 8
38
+ pin_memory: true
39
+ persistent_workers: false
40
+ multiprocessing_context: null
41
+ max_length: 512
42
+ root_path: /home/pl487/coreset-project
43
+ data_path: /home/pl487/coreset-project/data/processed
44
+ dataset: mnli
45
+ dataset_split: train
46
+ evaluation: null
47
+ loggers:
48
+ tensorboard:
49
+ _target_: energizer.loggers.TensorBoardLogger
50
+ root_dir: ./
51
+ name: tb_logs
52
+ version: null
53
+ callbacks:
54
+ timer:
55
+ _target_: energizer.active_learning.callbacks.Timer
56
+ lr_monitor:
57
+ _target_: energizer.callbacks.lr_monitor.LearningRateMonitor
58
+ model_checkpoint:
59
+ _target_: energizer.callbacks.model_checkpoint.ModelCheckpoint
60
+ dirpath: .checkpoints
61
+ stage: train
62
+ frequency: 1:epoch
63
+ user:
64
+ id: pl487
65
+
66
+ ======================================================================
67
+ [2024-04-23 15:11:06,595][hydra][INFO] - Seed enabled: 42
68
+ [2024-04-23 15:11:06,963][hydra][INFO] - Label distribution:
69
+ {<RunningStage.TRAIN: 'train'>: {'0-(entailment)': 130899, '1-(neutral)': 130900, '2-(contradiction)': 130903}}
70
+ [2024-04-23 15:11:19,109][hydra][INFO] - Loggers: [<energizer.loggers.tensorboard.TensorBoardLogger object at 0x7f86f05beb00>]
71
+ [2024-04-23 15:11:19,110][hydra][INFO] - Callbacks: [<energizer.active_learning.callbacks.Timer object at 0x7f86deae60b0>, <energizer.callbacks.lr_monitor.LearningRateMonitor object at 0x7f86deae6110>, <energizer.callbacks.model_checkpoint.ModelCheckpoint object at 0x7f86deae6620>]
72
+ [2024-04-23 15:11:19,113][hydra][INFO] - Model summary:
73
+ Total num params: 124.6M
74
+ Of which trainable: 124.6M
75
+ With a memory footprint of 0.25GB
76
+ Total memory allocated 0.77GB
77
+ [2024-04-23 15:11:19,754][hydra][INFO] - Dataloading params:
78
+ SequenceClassificationDataloaderArgs(batch_size=32, eval_batch_size=128, num_workers=8, pin_memory=True, drop_last=False, persistent_workers=False, shuffle=True, replacement=False, data_seed=42, multiprocessing_context=None, max_length=512)
79
+ [2024-04-23 15:11:19,760][hydra][INFO] - Batch:
80
+ {<InputKeys.INPUT_IDS: 'input_ids'>: tensor([[ 0, 3056, 37463, 14, 18, 761, 9, 4678, 939, 1266,
81
+ 51, 214, 190, 3406, 24, 7, 7, 147, 122, 37463,
82
+ 14, 51, 26112, 15, 1012, 47, 216, 114, 110, 114,
83
+ 47, 37463, 47, 216, 33, 626, 42, 50, 114, 47,
84
+ 240, 42, 37463, 37463, 52, 581, 14811, 13, 47, 8,
85
+ 47, 218, 75, 33, 7, 582, 201, 3867, 47, 53,
86
+ 172, 99, 51, 218, 75, 1137, 47, 16, 14, 114,
87
+ 47, 114, 51, 339, 47, 492, 106, 23, 513, 10,
88
+ 371, 9, 5, 9, 5, 631, 14, 51, 339, 98,
89
+ 1437, 939, 218, 75, 216, 24, 16, 37463, 24, 18,
90
+ 562, 7, 28, 55, 265, 122, 1195, 87, 37463, 888,
91
+ 37463, 4098, 19, 5, 1846, 87, 19, 37463, 7252, 5,
92
+ 37463, 8653, 51, 5, 5, 3969, 32, 95, 11, 24,
93
+ 13, 5, 418, 1437, 939, 437, 939, 437, 7013, 939,
94
+ 216, 939, 939, 2854, 19, 47, 939, 206, 47, 214,
95
+ 588, 47, 214, 182, 235, 14, 5, 3770, 197, 939,
96
+ 206, 51, 197, 33, 41, 3871, 1280, 9, 47, 216,
97
+ 2085, 51, 64, 33, 10, 367, 53, 939, 206, 144,
98
+ 9, 106, 197, 28, 45, 37463, 3969, 11, 5, 754,
99
+ 8, 14, 18, 169, 169, 51, 348, 5335, 88, 2302,
100
+ 24, 18, 142, 9, 5, 5, 488, 383, 47, 216,
101
+ 5, 37482, 8, 960, 53, 37463, 53, 117, 939, 216,
102
+ 52, 24, 7252, 52, 214, 11, 4788, 8, 37463, 52,
103
+ 33, 5, 276, 631, 22002, 154, 8, 8, 37463, 51,
104
+ 32, 6901, 106, 66, 939, 1266, 95, 5, 1675, 2878,
105
+ 3645, 631, 51, 905, 106, 66, 142, 9, 51, 218,
106
+ 75, 33, 143, 317, 7, 489, 7, 342, 106, 98,
107
+ 4909, 14, 115, 47, 216, 37463, 3867, 37463, 57, 10,
108
+ 538, 2970, 53, 37463, 37463, 190, 5, 181, 6502, 352,
109
+ 410, 2682, 939, 1266, 5, 5, 1669, 14, 13585, 452,
110
+ 11, 730, 16, 5, 1802, 8, 14051, 14, 189, 28,
111
+ 549, 24, 18, 10, 6279, 50, 2196, 50, 3046, 1493,
112
+ 7252, 51, 32, 5, 1980, 14, 32, 164, 7, 582,
113
+ 8, 51, 32, 5, 65, 14, 32, 164, 7, 6297,
114
+ 8, 5, 97, 621, 47, 216, 114, 51, 582, 114,
115
+ 51, 114, 51, 37463, 2237, 106, 37463, 8, 37463, 172,
116
+ 5, 47, 216, 86, 5, 488, 1239, 81, 37463, 457,
117
+ 5, 86, 37463, 51, 1169, 905, 106, 213, 50, 51,
118
+ 120, 160, 19, 10, 10, 3645, 142, 51, 348, 56,
119
+ 10, 2470, 14, 47, 216, 2653, 14, 14, 51, 3559,
120
+ 75, 70, 561, 77, 51, 222, 24, 2, 2, 100,
121
+ 206, 14, 89, 197, 28, 41, 3871, 8985, 9, 14218,
122
+ 11, 84, 3770, 4, 2]]), <InputKeys.ATT_MASK: 'attention_mask'>: tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
123
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
124
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
125
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
126
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
127
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
128
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
129
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
130
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
131
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
132
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
133
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
134
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
135
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
136
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
137
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
138
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
139
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]]), <InputKeys.LABELS: 'labels'>: tensor([0]), <InputKeys.ON_CPU: 'on_cpu'>: {<SpecialKeys.ID: 'uid'>: [221950]}}
140
+ [2024-04-23 20:57:35,566][hydra][INFO] - Training complete
tb_logs/version_0/events.out.tfevents.1713881489.dev-gpu-pl487.1865581.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ce788d1e3351858236cf312c2923a9c037c088eb82249cdb01fed9520a256f5f
3
+ size 1855634