jdorairaj commited on
Commit
296cc80
1 Parent(s): 3f490c1

sst2 progress

Browse files
Files changed (38) hide show
  1. outputs/args.json +2 -2
  2. outputs/sst2/roberta-base_adapterstrain_val_0.0001_42_8_10000/logfile_la.log +846 -0
  3. outputs/sst2/roberta-base_adapterstrain_val_0.0001_42_8_10000/step_0/all_results_la_kron_all_homo_mc_corr_1000.json +1 -0
  4. outputs/sst2/roberta-base_adapterstrain_val_0.0001_42_8_10000/step_0/eval_res_la_kron_all_homo_mc_corr_1000.json +0 -0
  5. outputs/sst2/roberta-base_adapterstrain_val_0.0001_42_8_10000/step_0/gpu_stats_la.json +130 -0
  6. outputs/sst2/roberta-base_adapterstrain_val_0.0001_42_8_10000/step_1999/all_results_la_kron_all_homo_mc_corr_1000.json +1 -0
  7. outputs/sst2/roberta-base_adapterstrain_val_0.0001_42_8_10000/step_1999/eval_res_la_kron_all_homo_mc_corr_1000.json +0 -0
  8. outputs/sst2/roberta-base_adapterstrain_val_0.0001_42_8_10000/step_1999/gpu_stats_la.json +130 -0
  9. outputs/sst2/roberta-base_adapterstrain_val_0.0001_42_8_10000/step_3999/all_results_la_kron_all_homo_mc_corr_1000.json +1 -0
  10. outputs/sst2/roberta-base_adapterstrain_val_0.0001_42_8_10000/step_3999/eval_res_la_kron_all_homo_mc_corr_1000.json +0 -0
  11. outputs/sst2/roberta-base_adapterstrain_val_0.0001_42_8_10000/step_3999/gpu_stats_la.json +130 -0
  12. outputs/sst2/roberta-base_adapterstrain_val_0.0001_42_8_10000/step_5999/all_results_la_kron_all_homo_mc_corr_1000.json +1 -0
  13. outputs/sst2/roberta-base_adapterstrain_val_0.0001_42_8_10000/step_5999/eval_res_la_kron_all_homo_mc_corr_1000.json +0 -0
  14. outputs/sst2/roberta-base_adapterstrain_val_0.0001_42_8_10000/step_5999/gpu_stats_la.json +130 -0
  15. outputs/sst2/roberta-base_adapterstrain_val_0.0001_42_8_10000/step_7999/all_results_la_kron_all_homo_mc_corr_1000.json +1 -0
  16. outputs/sst2/roberta-base_adapterstrain_val_0.0001_42_8_10000/step_7999/eval_res_la_kron_all_homo_mc_corr_1000.json +0 -0
  17. outputs/sst2/roberta-base_adapterstrain_val_0.0001_42_8_10000/step_7999/gpu_stats_la.json +130 -0
  18. outputs/sst2/roberta-base_adapterstrain_val_0.0001_42_8_10000/step_9999/all_results_la_kron_all_homo_mc_corr_1000.json +1 -0
  19. outputs/sst2/roberta-base_adapterstrain_val_0.0001_42_8_10000/step_9999/eval_res_la_kron_all_homo_mc_corr_1000.json +0 -0
  20. outputs/sst2/roberta-base_adapterstrain_val_0.0001_42_8_10000/step_9999/gpu_stats_la.json +130 -0
  21. outputs_laplace/sst2/roberta-base_adapterstrain_val_0.0001_42/step_0/f_mu_kron_all_homo_1000.pt +3 -0
  22. outputs_laplace/sst2/roberta-base_adapterstrain_val_0.0001_42/step_0/f_var_kron_all_homo_1000.pt +3 -0
  23. outputs_laplace/sst2/roberta-base_adapterstrain_val_0.0001_42/step_0/prior_precision_kron_all_homo_1000.pt +3 -0
  24. outputs_laplace/sst2/roberta-base_adapterstrain_val_0.0001_42/step_1999/f_mu_kron_all_homo_1000.pt +3 -0
  25. outputs_laplace/sst2/roberta-base_adapterstrain_val_0.0001_42/step_1999/f_var_kron_all_homo_1000.pt +3 -0
  26. outputs_laplace/sst2/roberta-base_adapterstrain_val_0.0001_42/step_1999/prior_precision_kron_all_homo_1000.pt +3 -0
  27. outputs_laplace/sst2/roberta-base_adapterstrain_val_0.0001_42/step_3999/f_mu_kron_all_homo_1000.pt +3 -0
  28. outputs_laplace/sst2/roberta-base_adapterstrain_val_0.0001_42/step_3999/f_var_kron_all_homo_1000.pt +3 -0
  29. outputs_laplace/sst2/roberta-base_adapterstrain_val_0.0001_42/step_3999/prior_precision_kron_all_homo_1000.pt +3 -0
  30. outputs_laplace/sst2/roberta-base_adapterstrain_val_0.0001_42/step_5999/f_mu_kron_all_homo_1000.pt +3 -0
  31. outputs_laplace/sst2/roberta-base_adapterstrain_val_0.0001_42/step_5999/f_var_kron_all_homo_1000.pt +3 -0
  32. outputs_laplace/sst2/roberta-base_adapterstrain_val_0.0001_42/step_5999/prior_precision_kron_all_homo_1000.pt +3 -0
  33. outputs_laplace/sst2/roberta-base_adapterstrain_val_0.0001_42/step_7999/f_mu_kron_all_homo_1000.pt +3 -0
  34. outputs_laplace/sst2/roberta-base_adapterstrain_val_0.0001_42/step_7999/f_var_kron_all_homo_1000.pt +3 -0
  35. outputs_laplace/sst2/roberta-base_adapterstrain_val_0.0001_42/step_7999/prior_precision_kron_all_homo_1000.pt +3 -0
  36. outputs_laplace/sst2/roberta-base_adapterstrain_val_0.0001_42/step_9999/f_mu_kron_all_homo_1000.pt +3 -0
  37. outputs_laplace/sst2/roberta-base_adapterstrain_val_0.0001_42/step_9999/f_var_kron_all_homo_1000.pt +3 -0
  38. outputs_laplace/sst2/roberta-base_adapterstrain_val_0.0001_42/step_9999/prior_precision_kron_all_homo_1000.pt +3 -0
outputs/args.json CHANGED
@@ -1,11 +1,11 @@
1
  {
2
- "task_name": "mrpc",
3
  "train_file": null,
4
  "validation_file": null,
5
  "max_length": 300,
6
  "pad_to_max_length": false,
7
  "model_name_or_path": "roberta-base",
8
- "use_slow_tokenizer": false,
9
  "per_device_train_batch_size": 8,
10
  "per_device_eval_batch_size": 8,
11
  "learning_rate": 0.0001,
 
1
  {
2
+ "task_name": "sst2",
3
  "train_file": null,
4
  "validation_file": null,
5
  "max_length": 300,
6
  "pad_to_max_length": false,
7
  "model_name_or_path": "roberta-base",
8
+ "use_slow_tokenizer": true,
9
  "per_device_train_batch_size": 8,
10
  "per_device_eval_batch_size": 8,
11
  "learning_rate": 0.0001,
outputs/sst2/roberta-base_adapterstrain_val_0.0001_42_8_10000/logfile_la.log CHANGED
@@ -110,3 +110,849 @@
110
  06/02/2024 09:39:42 - INFO - __main__ - Sample 41905 of the training set: {'input_ids': [0, 17615, 1899, 385, 16314, 2156, 10985, 25, 7, 549, 47, 128, 548, 450, 15570, 50, 6717, 479, 1437, 2], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 0}.
111
  06/02/2024 09:39:42 - INFO - __main__ - Sample 7296 of the training set: {'input_ids': [0, 1250, 5, 5567, 23959, 9, 41259, 11605, 23, 63, 275, 1437, 2], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 1}.
112
  06/02/2024 09:39:42 - INFO - __main__ - Sample 1639 of the training set: {'input_ids': [0, 506, 11791, 98, 16894, 15, 358, 9164, 672, 14, 24, 35499, 103, 761, 9, 36302, 2821, 16235, 1437, 2], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 0}.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
110
  06/02/2024 09:39:42 - INFO - __main__ - Sample 41905 of the training set: {'input_ids': [0, 17615, 1899, 385, 16314, 2156, 10985, 25, 7, 549, 47, 128, 548, 450, 15570, 50, 6717, 479, 1437, 2], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 0}.
111
  06/02/2024 09:39:42 - INFO - __main__ - Sample 7296 of the training set: {'input_ids': [0, 1250, 5, 5567, 23959, 9, 41259, 11605, 23, 63, 275, 1437, 2], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 1}.
112
  06/02/2024 09:39:42 - INFO - __main__ - Sample 1639 of the training set: {'input_ids': [0, 506, 11791, 98, 16894, 15, 358, 9164, 672, 14, 24, 35499, 103, 761, 9, 36302, 2821, 16235, 1437, 2], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 0}.
113
+ 06/02/2024 10:31:49 - INFO - __main__ - Number of labels detected = 2
114
+ 06/02/2024 10:31:50 - INFO - adapters.heads.model_mixin - Adding head 'default' with config {'head_type': 'masked_lm', 'vocab_size': 50265, 'embedding_size': 768, 'layers': 2, 'activation_function': 'gelu', 'layer_norm': True, 'bias': True, 'shift_labels': False, 'label2id': None}.
115
+ 06/02/2024 10:31:50 - INFO - adapters.loading - Loading module configuration from ./outputs/sst2/roberta-base_adapterstrain_val_0.0001_42_8_10000/step_0/adapter_config.json
116
+ 06/02/2024 10:31:50 - INFO - adapters.configuration.model_adapters_config - Adding adapter 'sst2'.
117
+ 06/02/2024 10:31:50 - INFO - adapters.loading - Loading module weights from ./outputs/sst2/roberta-base_adapterstrain_val_0.0001_42_8_10000/step_0/pytorch_adapter.bin
118
+ 06/02/2024 10:31:50 - INFO - adapters.loading - Loading module configuration from ./outputs/sst2/roberta-base_adapterstrain_val_0.0001_42_8_10000/step_0/head_config.json
119
+ 06/02/2024 10:31:50 - INFO - adapters.heads.model_mixin - Adding head 'sst2' with config {'head_type': 'classification', 'num_labels': 2, 'layers': 2, 'activation_function': 'tanh', 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'use_pooler': False, 'bias': True, 'dropout_prob': None}.
120
+ 06/02/2024 10:31:50 - INFO - adapters.loading - Loading module weights from ./outputs/sst2/roberta-base_adapterstrain_val_0.0001_42_8_10000/step_0/pytorch_model_head.bin
121
+ 06/02/2024 10:31:50 - INFO - __main__ - Adapter Name = sst2
122
+ 06/02/2024 10:31:50 - INFO - __main__ - roberta.encoder.layer.0.attention.output.adapters.sst2.adapter_down.0.weight
123
+ 06/02/2024 10:31:50 - INFO - __main__ - roberta.encoder.layer.0.attention.output.adapters.sst2.adapter_down.0.bias
124
+ 06/02/2024 10:31:50 - INFO - __main__ - roberta.encoder.layer.0.attention.output.adapters.sst2.adapter_up.weight
125
+ 06/02/2024 10:31:50 - INFO - __main__ - roberta.encoder.layer.0.attention.output.adapters.sst2.adapter_up.bias
126
+ 06/02/2024 10:31:50 - INFO - __main__ - roberta.encoder.layer.0.output.adapters.sst2.adapter_down.0.weight
127
+ 06/02/2024 10:31:50 - INFO - __main__ - roberta.encoder.layer.0.output.adapters.sst2.adapter_down.0.bias
128
+ 06/02/2024 10:31:50 - INFO - __main__ - roberta.encoder.layer.0.output.adapters.sst2.adapter_up.weight
129
+ 06/02/2024 10:31:50 - INFO - __main__ - roberta.encoder.layer.0.output.adapters.sst2.adapter_up.bias
130
+ 06/02/2024 10:31:50 - INFO - __main__ - roberta.encoder.layer.1.attention.output.adapters.sst2.adapter_down.0.weight
131
+ 06/02/2024 10:31:50 - INFO - __main__ - roberta.encoder.layer.1.attention.output.adapters.sst2.adapter_down.0.bias
132
+ 06/02/2024 10:31:50 - INFO - __main__ - roberta.encoder.layer.1.attention.output.adapters.sst2.adapter_up.weight
133
+ 06/02/2024 10:31:50 - INFO - __main__ - roberta.encoder.layer.1.attention.output.adapters.sst2.adapter_up.bias
134
+ 06/02/2024 10:31:50 - INFO - __main__ - roberta.encoder.layer.1.output.adapters.sst2.adapter_down.0.weight
135
+ 06/02/2024 10:31:50 - INFO - __main__ - roberta.encoder.layer.1.output.adapters.sst2.adapter_down.0.bias
136
+ 06/02/2024 10:31:50 - INFO - __main__ - roberta.encoder.layer.1.output.adapters.sst2.adapter_up.weight
137
+ 06/02/2024 10:31:50 - INFO - __main__ - roberta.encoder.layer.1.output.adapters.sst2.adapter_up.bias
138
+ 06/02/2024 10:31:50 - INFO - __main__ - roberta.encoder.layer.2.attention.output.adapters.sst2.adapter_down.0.weight
139
+ 06/02/2024 10:31:50 - INFO - __main__ - roberta.encoder.layer.2.attention.output.adapters.sst2.adapter_down.0.bias
140
+ 06/02/2024 10:31:50 - INFO - __main__ - roberta.encoder.layer.2.attention.output.adapters.sst2.adapter_up.weight
141
+ 06/02/2024 10:31:50 - INFO - __main__ - roberta.encoder.layer.2.attention.output.adapters.sst2.adapter_up.bias
142
+ 06/02/2024 10:31:50 - INFO - __main__ - roberta.encoder.layer.2.output.adapters.sst2.adapter_down.0.weight
143
+ 06/02/2024 10:31:50 - INFO - __main__ - roberta.encoder.layer.2.output.adapters.sst2.adapter_down.0.bias
144
+ 06/02/2024 10:31:50 - INFO - __main__ - roberta.encoder.layer.2.output.adapters.sst2.adapter_up.weight
145
+ 06/02/2024 10:31:50 - INFO - __main__ - roberta.encoder.layer.2.output.adapters.sst2.adapter_up.bias
146
+ 06/02/2024 10:31:50 - INFO - __main__ - roberta.encoder.layer.3.attention.output.adapters.sst2.adapter_down.0.weight
147
+ 06/02/2024 10:31:50 - INFO - __main__ - roberta.encoder.layer.3.attention.output.adapters.sst2.adapter_down.0.bias
148
+ 06/02/2024 10:31:50 - INFO - __main__ - roberta.encoder.layer.3.attention.output.adapters.sst2.adapter_up.weight
149
+ 06/02/2024 10:31:50 - INFO - __main__ - roberta.encoder.layer.3.attention.output.adapters.sst2.adapter_up.bias
150
+ 06/02/2024 10:31:50 - INFO - __main__ - roberta.encoder.layer.3.output.adapters.sst2.adapter_down.0.weight
151
+ 06/02/2024 10:31:50 - INFO - __main__ - roberta.encoder.layer.3.output.adapters.sst2.adapter_down.0.bias
152
+ 06/02/2024 10:31:50 - INFO - __main__ - roberta.encoder.layer.3.output.adapters.sst2.adapter_up.weight
153
+ 06/02/2024 10:31:50 - INFO - __main__ - roberta.encoder.layer.3.output.adapters.sst2.adapter_up.bias
154
+ 06/02/2024 10:31:50 - INFO - __main__ - roberta.encoder.layer.4.attention.output.adapters.sst2.adapter_down.0.weight
155
+ 06/02/2024 10:31:50 - INFO - __main__ - roberta.encoder.layer.4.attention.output.adapters.sst2.adapter_down.0.bias
156
+ 06/02/2024 10:31:50 - INFO - __main__ - roberta.encoder.layer.4.attention.output.adapters.sst2.adapter_up.weight
157
+ 06/02/2024 10:31:50 - INFO - __main__ - roberta.encoder.layer.4.attention.output.adapters.sst2.adapter_up.bias
158
+ 06/02/2024 10:31:50 - INFO - __main__ - roberta.encoder.layer.4.output.adapters.sst2.adapter_down.0.weight
159
+ 06/02/2024 10:31:50 - INFO - __main__ - roberta.encoder.layer.4.output.adapters.sst2.adapter_down.0.bias
160
+ 06/02/2024 10:31:50 - INFO - __main__ - roberta.encoder.layer.4.output.adapters.sst2.adapter_up.weight
161
+ 06/02/2024 10:31:50 - INFO - __main__ - roberta.encoder.layer.4.output.adapters.sst2.adapter_up.bias
162
+ 06/02/2024 10:31:50 - INFO - __main__ - roberta.encoder.layer.5.attention.output.adapters.sst2.adapter_down.0.weight
163
+ 06/02/2024 10:31:50 - INFO - __main__ - roberta.encoder.layer.5.attention.output.adapters.sst2.adapter_down.0.bias
164
+ 06/02/2024 10:31:50 - INFO - __main__ - roberta.encoder.layer.5.attention.output.adapters.sst2.adapter_up.weight
165
+ 06/02/2024 10:31:50 - INFO - __main__ - roberta.encoder.layer.5.attention.output.adapters.sst2.adapter_up.bias
166
+ 06/02/2024 10:31:50 - INFO - __main__ - roberta.encoder.layer.5.output.adapters.sst2.adapter_down.0.weight
167
+ 06/02/2024 10:31:50 - INFO - __main__ - roberta.encoder.layer.5.output.adapters.sst2.adapter_down.0.bias
168
+ 06/02/2024 10:31:50 - INFO - __main__ - roberta.encoder.layer.5.output.adapters.sst2.adapter_up.weight
169
+ 06/02/2024 10:31:50 - INFO - __main__ - roberta.encoder.layer.5.output.adapters.sst2.adapter_up.bias
170
+ 06/02/2024 10:31:50 - INFO - __main__ - roberta.encoder.layer.6.attention.output.adapters.sst2.adapter_down.0.weight
171
+ 06/02/2024 10:31:50 - INFO - __main__ - roberta.encoder.layer.6.attention.output.adapters.sst2.adapter_down.0.bias
172
+ 06/02/2024 10:31:50 - INFO - __main__ - roberta.encoder.layer.6.attention.output.adapters.sst2.adapter_up.weight
173
+ 06/02/2024 10:31:50 - INFO - __main__ - roberta.encoder.layer.6.attention.output.adapters.sst2.adapter_up.bias
174
+ 06/02/2024 10:31:50 - INFO - __main__ - roberta.encoder.layer.6.output.adapters.sst2.adapter_down.0.weight
175
+ 06/02/2024 10:31:50 - INFO - __main__ - roberta.encoder.layer.6.output.adapters.sst2.adapter_down.0.bias
176
+ 06/02/2024 10:31:50 - INFO - __main__ - roberta.encoder.layer.6.output.adapters.sst2.adapter_up.weight
177
+ 06/02/2024 10:31:50 - INFO - __main__ - roberta.encoder.layer.6.output.adapters.sst2.adapter_up.bias
178
+ 06/02/2024 10:31:50 - INFO - __main__ - roberta.encoder.layer.7.attention.output.adapters.sst2.adapter_down.0.weight
179
+ 06/02/2024 10:31:50 - INFO - __main__ - roberta.encoder.layer.7.attention.output.adapters.sst2.adapter_down.0.bias
180
+ 06/02/2024 10:31:50 - INFO - __main__ - roberta.encoder.layer.7.attention.output.adapters.sst2.adapter_up.weight
181
+ 06/02/2024 10:31:50 - INFO - __main__ - roberta.encoder.layer.7.attention.output.adapters.sst2.adapter_up.bias
182
+ 06/02/2024 10:31:50 - INFO - __main__ - roberta.encoder.layer.7.output.adapters.sst2.adapter_down.0.weight
183
+ 06/02/2024 10:31:50 - INFO - __main__ - roberta.encoder.layer.7.output.adapters.sst2.adapter_down.0.bias
184
+ 06/02/2024 10:31:50 - INFO - __main__ - roberta.encoder.layer.7.output.adapters.sst2.adapter_up.weight
185
+ 06/02/2024 10:31:50 - INFO - __main__ - roberta.encoder.layer.7.output.adapters.sst2.adapter_up.bias
186
+ 06/02/2024 10:31:50 - INFO - __main__ - roberta.encoder.layer.8.attention.output.adapters.sst2.adapter_down.0.weight
187
+ 06/02/2024 10:31:50 - INFO - __main__ - roberta.encoder.layer.8.attention.output.adapters.sst2.adapter_down.0.bias
188
+ 06/02/2024 10:31:50 - INFO - __main__ - roberta.encoder.layer.8.attention.output.adapters.sst2.adapter_up.weight
189
+ 06/02/2024 10:31:50 - INFO - __main__ - roberta.encoder.layer.8.attention.output.adapters.sst2.adapter_up.bias
190
+ 06/02/2024 10:31:50 - INFO - __main__ - roberta.encoder.layer.8.output.adapters.sst2.adapter_down.0.weight
191
+ 06/02/2024 10:31:50 - INFO - __main__ - roberta.encoder.layer.8.output.adapters.sst2.adapter_down.0.bias
192
+ 06/02/2024 10:31:50 - INFO - __main__ - roberta.encoder.layer.8.output.adapters.sst2.adapter_up.weight
193
+ 06/02/2024 10:31:50 - INFO - __main__ - roberta.encoder.layer.8.output.adapters.sst2.adapter_up.bias
194
+ 06/02/2024 10:31:50 - INFO - __main__ - roberta.encoder.layer.9.attention.output.adapters.sst2.adapter_down.0.weight
195
+ 06/02/2024 10:31:50 - INFO - __main__ - roberta.encoder.layer.9.attention.output.adapters.sst2.adapter_down.0.bias
196
+ 06/02/2024 10:31:50 - INFO - __main__ - roberta.encoder.layer.9.attention.output.adapters.sst2.adapter_up.weight
197
+ 06/02/2024 10:31:50 - INFO - __main__ - roberta.encoder.layer.9.attention.output.adapters.sst2.adapter_up.bias
198
+ 06/02/2024 10:31:50 - INFO - __main__ - roberta.encoder.layer.9.output.adapters.sst2.adapter_down.0.weight
199
+ 06/02/2024 10:31:50 - INFO - __main__ - roberta.encoder.layer.9.output.adapters.sst2.adapter_down.0.bias
200
+ 06/02/2024 10:31:50 - INFO - __main__ - roberta.encoder.layer.9.output.adapters.sst2.adapter_up.weight
201
+ 06/02/2024 10:31:50 - INFO - __main__ - roberta.encoder.layer.9.output.adapters.sst2.adapter_up.bias
202
+ 06/02/2024 10:31:50 - INFO - __main__ - roberta.encoder.layer.10.attention.output.adapters.sst2.adapter_down.0.weight
203
+ 06/02/2024 10:31:50 - INFO - __main__ - roberta.encoder.layer.10.attention.output.adapters.sst2.adapter_down.0.bias
204
+ 06/02/2024 10:31:50 - INFO - __main__ - roberta.encoder.layer.10.attention.output.adapters.sst2.adapter_up.weight
205
+ 06/02/2024 10:31:50 - INFO - __main__ - roberta.encoder.layer.10.attention.output.adapters.sst2.adapter_up.bias
206
+ 06/02/2024 10:31:50 - INFO - __main__ - roberta.encoder.layer.10.output.adapters.sst2.adapter_down.0.weight
207
+ 06/02/2024 10:31:50 - INFO - __main__ - roberta.encoder.layer.10.output.adapters.sst2.adapter_down.0.bias
208
+ 06/02/2024 10:31:50 - INFO - __main__ - roberta.encoder.layer.10.output.adapters.sst2.adapter_up.weight
209
+ 06/02/2024 10:31:50 - INFO - __main__ - roberta.encoder.layer.10.output.adapters.sst2.adapter_up.bias
210
+ 06/02/2024 10:31:50 - INFO - __main__ - roberta.encoder.layer.11.attention.output.adapters.sst2.adapter_down.0.weight
211
+ 06/02/2024 10:31:50 - INFO - __main__ - roberta.encoder.layer.11.attention.output.adapters.sst2.adapter_down.0.bias
212
+ 06/02/2024 10:31:50 - INFO - __main__ - roberta.encoder.layer.11.attention.output.adapters.sst2.adapter_up.weight
213
+ 06/02/2024 10:31:50 - INFO - __main__ - roberta.encoder.layer.11.attention.output.adapters.sst2.adapter_up.bias
214
+ 06/02/2024 10:31:50 - INFO - __main__ - roberta.encoder.layer.11.output.adapters.sst2.adapter_down.0.weight
215
+ 06/02/2024 10:31:50 - INFO - __main__ - roberta.encoder.layer.11.output.adapters.sst2.adapter_down.0.bias
216
+ 06/02/2024 10:31:50 - INFO - __main__ - roberta.encoder.layer.11.output.adapters.sst2.adapter_up.weight
217
+ 06/02/2024 10:31:50 - INFO - __main__ - roberta.encoder.layer.11.output.adapters.sst2.adapter_up.bias
218
+ 06/02/2024 10:31:50 - INFO - __main__ - heads.sst2.1.weight
219
+ 06/02/2024 10:31:50 - INFO - __main__ - heads.sst2.1.bias
220
+ 06/02/2024 10:31:50 - INFO - __main__ - heads.sst2.4.weight
221
+ 06/02/2024 10:31:50 - INFO - __main__ - heads.sst2.4.bias
222
+ 06/02/2024 10:32:04 - INFO - __main__ - Sample 41905 of the training set: {'input_ids': [0, 17615, 1899, 385, 16314, 2156, 10985, 25, 7, 549, 47, 128, 548, 450, 15570, 50, 6717, 479, 1437, 2], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 0}.
223
+ 06/02/2024 10:32:04 - INFO - __main__ - Sample 7296 of the training set: {'input_ids': [0, 1250, 5, 5567, 23959, 9, 41259, 11605, 23, 63, 275, 1437, 2], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 1}.
224
+ 06/02/2024 10:32:04 - INFO - __main__ - Sample 1639 of the training set: {'input_ids': [0, 506, 11791, 98, 16894, 15, 358, 9164, 672, 14, 24, 35499, 103, 761, 9, 36302, 2821, 16235, 1437, 2], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 0}.
225
+ 06/02/2024 11:03:38 - INFO - __main__ - f_mu shape : torch.Size([872, 2])
226
+ 06/02/2024 11:03:38 - INFO - __main__ - f_var shape : torch.Size([872, 2, 2])
227
+ 06/02/2024 11:03:38 - INFO - __main__ - tensor([[ 0.0365, -0.0280],
228
+ [ 0.1001, -0.0578],
229
+ [-0.1319, -0.0868],
230
+ ...,
231
+ [-0.0384, -0.0989],
232
+ [ 0.0418, 0.0235],
233
+ [-0.1502, -0.0896]], device='cuda:0')
234
+ 06/02/2024 11:03:38 - INFO - __main__ - tensor([[[3.8495, 3.8092],
235
+ [3.8092, 3.8473]],
236
+
237
+ [[4.0645, 4.0188],
238
+ [4.0188, 4.0531]],
239
+
240
+ [[3.7370, 3.7325],
241
+ [3.7325, 3.7369]],
242
+
243
+ ...,
244
+
245
+ [[4.2868, 4.2499],
246
+ [4.2499, 4.2829]],
247
+
248
+ [[4.5908, 4.4313],
249
+ [4.4313, 4.5912]],
250
+
251
+ [[3.9085, 3.9041],
252
+ [3.9041, 3.9098]]], device='cuda:0')
253
+ 06/02/2024 11:03:38 - INFO - __main__ - ***** Completed training *****
254
+ 06/02/2024 11:03:41 - INFO - __main__ - Number of labels detected = 2
255
+ 06/02/2024 11:03:42 - INFO - adapters.heads.model_mixin - Adding head 'default' with config {'head_type': 'masked_lm', 'vocab_size': 50265, 'embedding_size': 768, 'layers': 2, 'activation_function': 'gelu', 'layer_norm': True, 'bias': True, 'shift_labels': False, 'label2id': None}.
256
+ 06/02/2024 11:03:42 - INFO - adapters.loading - Loading module configuration from ./outputs/sst2/roberta-base_adapterstrain_val_0.0001_42_8_10000/step_1999/adapter_config.json
257
+ 06/02/2024 11:03:42 - INFO - adapters.configuration.model_adapters_config - Adding adapter 'sst2'.
258
+ 06/02/2024 11:03:42 - INFO - adapters.loading - Loading module weights from ./outputs/sst2/roberta-base_adapterstrain_val_0.0001_42_8_10000/step_1999/pytorch_adapter.bin
259
+ 06/02/2024 11:03:42 - INFO - adapters.loading - Loading module configuration from ./outputs/sst2/roberta-base_adapterstrain_val_0.0001_42_8_10000/step_1999/head_config.json
260
+ 06/02/2024 11:03:42 - INFO - adapters.heads.model_mixin - Adding head 'sst2' with config {'head_type': 'classification', 'num_labels': 2, 'layers': 2, 'activation_function': 'tanh', 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'use_pooler': False, 'bias': True, 'dropout_prob': None}.
261
+ 06/02/2024 11:03:42 - INFO - adapters.loading - Loading module weights from ./outputs/sst2/roberta-base_adapterstrain_val_0.0001_42_8_10000/step_1999/pytorch_model_head.bin
262
+ 06/02/2024 11:03:42 - INFO - __main__ - Adapter Name = sst2
263
+ 06/02/2024 11:03:42 - INFO - __main__ - roberta.encoder.layer.0.attention.output.adapters.sst2.adapter_down.0.weight
264
+ 06/02/2024 11:03:42 - INFO - __main__ - roberta.encoder.layer.0.attention.output.adapters.sst2.adapter_down.0.bias
265
+ 06/02/2024 11:03:42 - INFO - __main__ - roberta.encoder.layer.0.attention.output.adapters.sst2.adapter_up.weight
266
+ 06/02/2024 11:03:42 - INFO - __main__ - roberta.encoder.layer.0.attention.output.adapters.sst2.adapter_up.bias
267
+ 06/02/2024 11:03:42 - INFO - __main__ - roberta.encoder.layer.0.output.adapters.sst2.adapter_down.0.weight
268
+ 06/02/2024 11:03:42 - INFO - __main__ - roberta.encoder.layer.0.output.adapters.sst2.adapter_down.0.bias
269
+ 06/02/2024 11:03:42 - INFO - __main__ - roberta.encoder.layer.0.output.adapters.sst2.adapter_up.weight
270
+ 06/02/2024 11:03:42 - INFO - __main__ - roberta.encoder.layer.0.output.adapters.sst2.adapter_up.bias
271
+ 06/02/2024 11:03:42 - INFO - __main__ - roberta.encoder.layer.1.attention.output.adapters.sst2.adapter_down.0.weight
272
+ 06/02/2024 11:03:42 - INFO - __main__ - roberta.encoder.layer.1.attention.output.adapters.sst2.adapter_down.0.bias
273
+ 06/02/2024 11:03:42 - INFO - __main__ - roberta.encoder.layer.1.attention.output.adapters.sst2.adapter_up.weight
274
+ 06/02/2024 11:03:42 - INFO - __main__ - roberta.encoder.layer.1.attention.output.adapters.sst2.adapter_up.bias
275
+ 06/02/2024 11:03:42 - INFO - __main__ - roberta.encoder.layer.1.output.adapters.sst2.adapter_down.0.weight
276
+ 06/02/2024 11:03:42 - INFO - __main__ - roberta.encoder.layer.1.output.adapters.sst2.adapter_down.0.bias
277
+ 06/02/2024 11:03:42 - INFO - __main__ - roberta.encoder.layer.1.output.adapters.sst2.adapter_up.weight
278
+ 06/02/2024 11:03:42 - INFO - __main__ - roberta.encoder.layer.1.output.adapters.sst2.adapter_up.bias
279
+ 06/02/2024 11:03:42 - INFO - __main__ - roberta.encoder.layer.2.attention.output.adapters.sst2.adapter_down.0.weight
280
+ 06/02/2024 11:03:42 - INFO - __main__ - roberta.encoder.layer.2.attention.output.adapters.sst2.adapter_down.0.bias
281
+ 06/02/2024 11:03:42 - INFO - __main__ - roberta.encoder.layer.2.attention.output.adapters.sst2.adapter_up.weight
282
+ 06/02/2024 11:03:42 - INFO - __main__ - roberta.encoder.layer.2.attention.output.adapters.sst2.adapter_up.bias
283
+ 06/02/2024 11:03:42 - INFO - __main__ - roberta.encoder.layer.2.output.adapters.sst2.adapter_down.0.weight
284
+ 06/02/2024 11:03:42 - INFO - __main__ - roberta.encoder.layer.2.output.adapters.sst2.adapter_down.0.bias
285
+ 06/02/2024 11:03:42 - INFO - __main__ - roberta.encoder.layer.2.output.adapters.sst2.adapter_up.weight
286
+ 06/02/2024 11:03:42 - INFO - __main__ - roberta.encoder.layer.2.output.adapters.sst2.adapter_up.bias
287
+ 06/02/2024 11:03:42 - INFO - __main__ - roberta.encoder.layer.3.attention.output.adapters.sst2.adapter_down.0.weight
288
+ 06/02/2024 11:03:42 - INFO - __main__ - roberta.encoder.layer.3.attention.output.adapters.sst2.adapter_down.0.bias
289
+ 06/02/2024 11:03:42 - INFO - __main__ - roberta.encoder.layer.3.attention.output.adapters.sst2.adapter_up.weight
290
+ 06/02/2024 11:03:42 - INFO - __main__ - roberta.encoder.layer.3.attention.output.adapters.sst2.adapter_up.bias
291
+ 06/02/2024 11:03:42 - INFO - __main__ - roberta.encoder.layer.3.output.adapters.sst2.adapter_down.0.weight
292
+ 06/02/2024 11:03:42 - INFO - __main__ - roberta.encoder.layer.3.output.adapters.sst2.adapter_down.0.bias
293
+ 06/02/2024 11:03:42 - INFO - __main__ - roberta.encoder.layer.3.output.adapters.sst2.adapter_up.weight
294
+ 06/02/2024 11:03:42 - INFO - __main__ - roberta.encoder.layer.3.output.adapters.sst2.adapter_up.bias
295
+ 06/02/2024 11:03:42 - INFO - __main__ - roberta.encoder.layer.4.attention.output.adapters.sst2.adapter_down.0.weight
296
+ 06/02/2024 11:03:42 - INFO - __main__ - roberta.encoder.layer.4.attention.output.adapters.sst2.adapter_down.0.bias
297
+ 06/02/2024 11:03:42 - INFO - __main__ - roberta.encoder.layer.4.attention.output.adapters.sst2.adapter_up.weight
298
+ 06/02/2024 11:03:42 - INFO - __main__ - roberta.encoder.layer.4.attention.output.adapters.sst2.adapter_up.bias
299
+ 06/02/2024 11:03:42 - INFO - __main__ - roberta.encoder.layer.4.output.adapters.sst2.adapter_down.0.weight
300
+ 06/02/2024 11:03:42 - INFO - __main__ - roberta.encoder.layer.4.output.adapters.sst2.adapter_down.0.bias
301
+ 06/02/2024 11:03:42 - INFO - __main__ - roberta.encoder.layer.4.output.adapters.sst2.adapter_up.weight
302
+ 06/02/2024 11:03:42 - INFO - __main__ - roberta.encoder.layer.4.output.adapters.sst2.adapter_up.bias
303
+ 06/02/2024 11:03:42 - INFO - __main__ - roberta.encoder.layer.5.attention.output.adapters.sst2.adapter_down.0.weight
304
+ 06/02/2024 11:03:42 - INFO - __main__ - roberta.encoder.layer.5.attention.output.adapters.sst2.adapter_down.0.bias
305
+ 06/02/2024 11:03:42 - INFO - __main__ - roberta.encoder.layer.5.attention.output.adapters.sst2.adapter_up.weight
306
+ 06/02/2024 11:03:42 - INFO - __main__ - roberta.encoder.layer.5.attention.output.adapters.sst2.adapter_up.bias
307
+ 06/02/2024 11:03:42 - INFO - __main__ - roberta.encoder.layer.5.output.adapters.sst2.adapter_down.0.weight
308
+ 06/02/2024 11:03:42 - INFO - __main__ - roberta.encoder.layer.5.output.adapters.sst2.adapter_down.0.bias
309
+ 06/02/2024 11:03:42 - INFO - __main__ - roberta.encoder.layer.5.output.adapters.sst2.adapter_up.weight
310
+ 06/02/2024 11:03:42 - INFO - __main__ - roberta.encoder.layer.5.output.adapters.sst2.adapter_up.bias
311
+ 06/02/2024 11:03:42 - INFO - __main__ - roberta.encoder.layer.6.attention.output.adapters.sst2.adapter_down.0.weight
312
+ 06/02/2024 11:03:42 - INFO - __main__ - roberta.encoder.layer.6.attention.output.adapters.sst2.adapter_down.0.bias
313
+ 06/02/2024 11:03:42 - INFO - __main__ - roberta.encoder.layer.6.attention.output.adapters.sst2.adapter_up.weight
314
+ 06/02/2024 11:03:42 - INFO - __main__ - roberta.encoder.layer.6.attention.output.adapters.sst2.adapter_up.bias
315
+ 06/02/2024 11:03:42 - INFO - __main__ - roberta.encoder.layer.6.output.adapters.sst2.adapter_down.0.weight
316
+ 06/02/2024 11:03:42 - INFO - __main__ - roberta.encoder.layer.6.output.adapters.sst2.adapter_down.0.bias
317
+ 06/02/2024 11:03:42 - INFO - __main__ - roberta.encoder.layer.6.output.adapters.sst2.adapter_up.weight
318
+ 06/02/2024 11:03:42 - INFO - __main__ - roberta.encoder.layer.6.output.adapters.sst2.adapter_up.bias
319
+ 06/02/2024 11:03:42 - INFO - __main__ - roberta.encoder.layer.7.attention.output.adapters.sst2.adapter_down.0.weight
320
+ 06/02/2024 11:03:42 - INFO - __main__ - roberta.encoder.layer.7.attention.output.adapters.sst2.adapter_down.0.bias
321
+ 06/02/2024 11:03:42 - INFO - __main__ - roberta.encoder.layer.7.attention.output.adapters.sst2.adapter_up.weight
322
+ 06/02/2024 11:03:42 - INFO - __main__ - roberta.encoder.layer.7.attention.output.adapters.sst2.adapter_up.bias
323
+ 06/02/2024 11:03:42 - INFO - __main__ - roberta.encoder.layer.7.output.adapters.sst2.adapter_down.0.weight
324
+ 06/02/2024 11:03:42 - INFO - __main__ - roberta.encoder.layer.7.output.adapters.sst2.adapter_down.0.bias
325
+ 06/02/2024 11:03:42 - INFO - __main__ - roberta.encoder.layer.7.output.adapters.sst2.adapter_up.weight
326
+ 06/02/2024 11:03:42 - INFO - __main__ - roberta.encoder.layer.7.output.adapters.sst2.adapter_up.bias
327
+ 06/02/2024 11:03:42 - INFO - __main__ - roberta.encoder.layer.8.attention.output.adapters.sst2.adapter_down.0.weight
328
+ 06/02/2024 11:03:42 - INFO - __main__ - roberta.encoder.layer.8.attention.output.adapters.sst2.adapter_down.0.bias
329
+ 06/02/2024 11:03:42 - INFO - __main__ - roberta.encoder.layer.8.attention.output.adapters.sst2.adapter_up.weight
330
+ 06/02/2024 11:03:42 - INFO - __main__ - roberta.encoder.layer.8.attention.output.adapters.sst2.adapter_up.bias
331
+ 06/02/2024 11:03:42 - INFO - __main__ - roberta.encoder.layer.8.output.adapters.sst2.adapter_down.0.weight
332
+ 06/02/2024 11:03:42 - INFO - __main__ - roberta.encoder.layer.8.output.adapters.sst2.adapter_down.0.bias
333
+ 06/02/2024 11:03:42 - INFO - __main__ - roberta.encoder.layer.8.output.adapters.sst2.adapter_up.weight
334
+ 06/02/2024 11:03:42 - INFO - __main__ - roberta.encoder.layer.8.output.adapters.sst2.adapter_up.bias
335
+ 06/02/2024 11:03:42 - INFO - __main__ - roberta.encoder.layer.9.attention.output.adapters.sst2.adapter_down.0.weight
336
+ 06/02/2024 11:03:42 - INFO - __main__ - roberta.encoder.layer.9.attention.output.adapters.sst2.adapter_down.0.bias
337
+ 06/02/2024 11:03:42 - INFO - __main__ - roberta.encoder.layer.9.attention.output.adapters.sst2.adapter_up.weight
338
+ 06/02/2024 11:03:42 - INFO - __main__ - roberta.encoder.layer.9.attention.output.adapters.sst2.adapter_up.bias
339
+ 06/02/2024 11:03:42 - INFO - __main__ - roberta.encoder.layer.9.output.adapters.sst2.adapter_down.0.weight
340
+ 06/02/2024 11:03:42 - INFO - __main__ - roberta.encoder.layer.9.output.adapters.sst2.adapter_down.0.bias
341
+ 06/02/2024 11:03:42 - INFO - __main__ - roberta.encoder.layer.9.output.adapters.sst2.adapter_up.weight
342
+ 06/02/2024 11:03:42 - INFO - __main__ - roberta.encoder.layer.9.output.adapters.sst2.adapter_up.bias
343
+ 06/02/2024 11:03:42 - INFO - __main__ - roberta.encoder.layer.10.attention.output.adapters.sst2.adapter_down.0.weight
344
+ 06/02/2024 11:03:42 - INFO - __main__ - roberta.encoder.layer.10.attention.output.adapters.sst2.adapter_down.0.bias
345
+ 06/02/2024 11:03:42 - INFO - __main__ - roberta.encoder.layer.10.attention.output.adapters.sst2.adapter_up.weight
346
+ 06/02/2024 11:03:42 - INFO - __main__ - roberta.encoder.layer.10.attention.output.adapters.sst2.adapter_up.bias
347
+ 06/02/2024 11:03:42 - INFO - __main__ - roberta.encoder.layer.10.output.adapters.sst2.adapter_down.0.weight
348
+ 06/02/2024 11:03:42 - INFO - __main__ - roberta.encoder.layer.10.output.adapters.sst2.adapter_down.0.bias
349
+ 06/02/2024 11:03:42 - INFO - __main__ - roberta.encoder.layer.10.output.adapters.sst2.adapter_up.weight
350
+ 06/02/2024 11:03:42 - INFO - __main__ - roberta.encoder.layer.10.output.adapters.sst2.adapter_up.bias
351
+ 06/02/2024 11:03:42 - INFO - __main__ - roberta.encoder.layer.11.attention.output.adapters.sst2.adapter_down.0.weight
352
+ 06/02/2024 11:03:42 - INFO - __main__ - roberta.encoder.layer.11.attention.output.adapters.sst2.adapter_down.0.bias
353
+ 06/02/2024 11:03:42 - INFO - __main__ - roberta.encoder.layer.11.attention.output.adapters.sst2.adapter_up.weight
354
+ 06/02/2024 11:03:42 - INFO - __main__ - roberta.encoder.layer.11.attention.output.adapters.sst2.adapter_up.bias
355
+ 06/02/2024 11:03:42 - INFO - __main__ - roberta.encoder.layer.11.output.adapters.sst2.adapter_down.0.weight
356
+ 06/02/2024 11:03:42 - INFO - __main__ - roberta.encoder.layer.11.output.adapters.sst2.adapter_down.0.bias
357
+ 06/02/2024 11:03:42 - INFO - __main__ - roberta.encoder.layer.11.output.adapters.sst2.adapter_up.weight
358
+ 06/02/2024 11:03:42 - INFO - __main__ - roberta.encoder.layer.11.output.adapters.sst2.adapter_up.bias
359
+ 06/02/2024 11:03:42 - INFO - __main__ - heads.sst2.1.weight
360
+ 06/02/2024 11:03:42 - INFO - __main__ - heads.sst2.1.bias
361
+ 06/02/2024 11:03:42 - INFO - __main__ - heads.sst2.4.weight
362
+ 06/02/2024 11:03:42 - INFO - __main__ - heads.sst2.4.bias
363
+ 06/02/2024 11:03:57 - INFO - __main__ - Sample 41905 of the training set: {'input_ids': [0, 17615, 1899, 385, 16314, 2156, 10985, 25, 7, 549, 47, 128, 548, 450, 15570, 50, 6717, 479, 1437, 2], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 0}.
364
+ 06/02/2024 11:03:57 - INFO - __main__ - Sample 7296 of the training set: {'input_ids': [0, 1250, 5, 5567, 23959, 9, 41259, 11605, 23, 63, 275, 1437, 2], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 1}.
365
+ 06/02/2024 11:03:57 - INFO - __main__ - Sample 1639 of the training set: {'input_ids': [0, 506, 11791, 98, 16894, 15, 358, 9164, 672, 14, 24, 35499, 103, 761, 9, 36302, 2821, 16235, 1437, 2], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 0}.
366
+ 06/02/2024 11:37:04 - INFO - __main__ - f_mu shape : torch.Size([872, 2])
367
+ 06/02/2024 11:37:04 - INFO - __main__ - f_var shape : torch.Size([872, 2, 2])
368
+ 06/02/2024 11:37:04 - INFO - __main__ - tensor([[-4.6105, 4.5437],
369
+ [ 0.2353, -0.3928],
370
+ [-1.6659, 1.4105],
371
+ ...,
372
+ [ 0.4902, -0.7562],
373
+ [ 2.4113, -2.6370],
374
+ [-2.5681, 2.2276]], device='cuda:0')
375
+ 06/02/2024 11:37:04 - INFO - __main__ - tensor([[[3.3568, 3.0713],
376
+ [3.0713, 3.4248]],
377
+
378
+ [[1.3207, 0.5219],
379
+ [0.5219, 1.3381]],
380
+
381
+ [[1.5923, 0.8295],
382
+ [0.8295, 1.6030]],
383
+
384
+ ...,
385
+
386
+ [[1.1830, 0.3497],
387
+ [0.3497, 1.1694]],
388
+
389
+ [[3.5182, 0.3294],
390
+ [0.3294, 3.3632]],
391
+
392
+ [[2.5374, 1.2292],
393
+ [1.2292, 2.5529]]], device='cuda:0')
394
+ 06/02/2024 11:37:04 - INFO - __main__ - ***** Completed training *****
395
+ 06/02/2024 11:37:06 - INFO - __main__ - Number of labels detected = 2
396
+ 06/02/2024 11:37:07 - INFO - adapters.heads.model_mixin - Adding head 'default' with config {'head_type': 'masked_lm', 'vocab_size': 50265, 'embedding_size': 768, 'layers': 2, 'activation_function': 'gelu', 'layer_norm': True, 'bias': True, 'shift_labels': False, 'label2id': None}.
397
+ 06/02/2024 11:37:08 - INFO - adapters.loading - Loading module configuration from ./outputs/sst2/roberta-base_adapterstrain_val_0.0001_42_8_10000/step_3999/adapter_config.json
398
+ 06/02/2024 11:37:08 - INFO - adapters.configuration.model_adapters_config - Adding adapter 'sst2'.
399
+ 06/02/2024 11:37:08 - INFO - adapters.loading - Loading module weights from ./outputs/sst2/roberta-base_adapterstrain_val_0.0001_42_8_10000/step_3999/pytorch_adapter.bin
400
+ 06/02/2024 11:37:08 - INFO - adapters.loading - Loading module configuration from ./outputs/sst2/roberta-base_adapterstrain_val_0.0001_42_8_10000/step_3999/head_config.json
401
+ 06/02/2024 11:37:08 - INFO - adapters.heads.model_mixin - Adding head 'sst2' with config {'head_type': 'classification', 'num_labels': 2, 'layers': 2, 'activation_function': 'tanh', 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'use_pooler': False, 'bias': True, 'dropout_prob': None}.
402
+ 06/02/2024 11:37:08 - INFO - adapters.loading - Loading module weights from ./outputs/sst2/roberta-base_adapterstrain_val_0.0001_42_8_10000/step_3999/pytorch_model_head.bin
403
+ 06/02/2024 11:37:08 - INFO - __main__ - Adapter Name = sst2
404
+ 06/02/2024 11:37:08 - INFO - __main__ - roberta.encoder.layer.0.attention.output.adapters.sst2.adapter_down.0.weight
405
+ 06/02/2024 11:37:08 - INFO - __main__ - roberta.encoder.layer.0.attention.output.adapters.sst2.adapter_down.0.bias
406
+ 06/02/2024 11:37:08 - INFO - __main__ - roberta.encoder.layer.0.attention.output.adapters.sst2.adapter_up.weight
407
+ 06/02/2024 11:37:08 - INFO - __main__ - roberta.encoder.layer.0.attention.output.adapters.sst2.adapter_up.bias
408
+ 06/02/2024 11:37:08 - INFO - __main__ - roberta.encoder.layer.0.output.adapters.sst2.adapter_down.0.weight
409
+ 06/02/2024 11:37:08 - INFO - __main__ - roberta.encoder.layer.0.output.adapters.sst2.adapter_down.0.bias
410
+ 06/02/2024 11:37:08 - INFO - __main__ - roberta.encoder.layer.0.output.adapters.sst2.adapter_up.weight
411
+ 06/02/2024 11:37:08 - INFO - __main__ - roberta.encoder.layer.0.output.adapters.sst2.adapter_up.bias
412
+ 06/02/2024 11:37:08 - INFO - __main__ - roberta.encoder.layer.1.attention.output.adapters.sst2.adapter_down.0.weight
413
+ 06/02/2024 11:37:08 - INFO - __main__ - roberta.encoder.layer.1.attention.output.adapters.sst2.adapter_down.0.bias
414
+ 06/02/2024 11:37:08 - INFO - __main__ - roberta.encoder.layer.1.attention.output.adapters.sst2.adapter_up.weight
415
+ 06/02/2024 11:37:08 - INFO - __main__ - roberta.encoder.layer.1.attention.output.adapters.sst2.adapter_up.bias
416
+ 06/02/2024 11:37:08 - INFO - __main__ - roberta.encoder.layer.1.output.adapters.sst2.adapter_down.0.weight
417
+ 06/02/2024 11:37:08 - INFO - __main__ - roberta.encoder.layer.1.output.adapters.sst2.adapter_down.0.bias
418
+ 06/02/2024 11:37:08 - INFO - __main__ - roberta.encoder.layer.1.output.adapters.sst2.adapter_up.weight
419
+ 06/02/2024 11:37:08 - INFO - __main__ - roberta.encoder.layer.1.output.adapters.sst2.adapter_up.bias
420
+ 06/02/2024 11:37:08 - INFO - __main__ - roberta.encoder.layer.2.attention.output.adapters.sst2.adapter_down.0.weight
421
+ 06/02/2024 11:37:08 - INFO - __main__ - roberta.encoder.layer.2.attention.output.adapters.sst2.adapter_down.0.bias
422
+ 06/02/2024 11:37:08 - INFO - __main__ - roberta.encoder.layer.2.attention.output.adapters.sst2.adapter_up.weight
423
+ 06/02/2024 11:37:08 - INFO - __main__ - roberta.encoder.layer.2.attention.output.adapters.sst2.adapter_up.bias
424
+ 06/02/2024 11:37:08 - INFO - __main__ - roberta.encoder.layer.2.output.adapters.sst2.adapter_down.0.weight
425
+ 06/02/2024 11:37:08 - INFO - __main__ - roberta.encoder.layer.2.output.adapters.sst2.adapter_down.0.bias
426
+ 06/02/2024 11:37:08 - INFO - __main__ - roberta.encoder.layer.2.output.adapters.sst2.adapter_up.weight
427
+ 06/02/2024 11:37:08 - INFO - __main__ - roberta.encoder.layer.2.output.adapters.sst2.adapter_up.bias
428
+ 06/02/2024 11:37:08 - INFO - __main__ - roberta.encoder.layer.3.attention.output.adapters.sst2.adapter_down.0.weight
429
+ 06/02/2024 11:37:08 - INFO - __main__ - roberta.encoder.layer.3.attention.output.adapters.sst2.adapter_down.0.bias
430
+ 06/02/2024 11:37:08 - INFO - __main__ - roberta.encoder.layer.3.attention.output.adapters.sst2.adapter_up.weight
431
+ 06/02/2024 11:37:08 - INFO - __main__ - roberta.encoder.layer.3.attention.output.adapters.sst2.adapter_up.bias
432
+ 06/02/2024 11:37:08 - INFO - __main__ - roberta.encoder.layer.3.output.adapters.sst2.adapter_down.0.weight
433
+ 06/02/2024 11:37:08 - INFO - __main__ - roberta.encoder.layer.3.output.adapters.sst2.adapter_down.0.bias
434
+ 06/02/2024 11:37:08 - INFO - __main__ - roberta.encoder.layer.3.output.adapters.sst2.adapter_up.weight
435
+ 06/02/2024 11:37:08 - INFO - __main__ - roberta.encoder.layer.3.output.adapters.sst2.adapter_up.bias
436
+ 06/02/2024 11:37:08 - INFO - __main__ - roberta.encoder.layer.4.attention.output.adapters.sst2.adapter_down.0.weight
437
+ 06/02/2024 11:37:08 - INFO - __main__ - roberta.encoder.layer.4.attention.output.adapters.sst2.adapter_down.0.bias
438
+ 06/02/2024 11:37:08 - INFO - __main__ - roberta.encoder.layer.4.attention.output.adapters.sst2.adapter_up.weight
439
+ 06/02/2024 11:37:08 - INFO - __main__ - roberta.encoder.layer.4.attention.output.adapters.sst2.adapter_up.bias
440
+ 06/02/2024 11:37:08 - INFO - __main__ - roberta.encoder.layer.4.output.adapters.sst2.adapter_down.0.weight
441
+ 06/02/2024 11:37:08 - INFO - __main__ - roberta.encoder.layer.4.output.adapters.sst2.adapter_down.0.bias
442
+ 06/02/2024 11:37:08 - INFO - __main__ - roberta.encoder.layer.4.output.adapters.sst2.adapter_up.weight
443
+ 06/02/2024 11:37:08 - INFO - __main__ - roberta.encoder.layer.4.output.adapters.sst2.adapter_up.bias
444
+ 06/02/2024 11:37:08 - INFO - __main__ - roberta.encoder.layer.5.attention.output.adapters.sst2.adapter_down.0.weight
445
+ 06/02/2024 11:37:08 - INFO - __main__ - roberta.encoder.layer.5.attention.output.adapters.sst2.adapter_down.0.bias
446
+ 06/02/2024 11:37:08 - INFO - __main__ - roberta.encoder.layer.5.attention.output.adapters.sst2.adapter_up.weight
447
+ 06/02/2024 11:37:08 - INFO - __main__ - roberta.encoder.layer.5.attention.output.adapters.sst2.adapter_up.bias
448
+ 06/02/2024 11:37:08 - INFO - __main__ - roberta.encoder.layer.5.output.adapters.sst2.adapter_down.0.weight
449
+ 06/02/2024 11:37:08 - INFO - __main__ - roberta.encoder.layer.5.output.adapters.sst2.adapter_down.0.bias
450
+ 06/02/2024 11:37:08 - INFO - __main__ - roberta.encoder.layer.5.output.adapters.sst2.adapter_up.weight
451
+ 06/02/2024 11:37:08 - INFO - __main__ - roberta.encoder.layer.5.output.adapters.sst2.adapter_up.bias
452
+ 06/02/2024 11:37:08 - INFO - __main__ - roberta.encoder.layer.6.attention.output.adapters.sst2.adapter_down.0.weight
453
+ 06/02/2024 11:37:08 - INFO - __main__ - roberta.encoder.layer.6.attention.output.adapters.sst2.adapter_down.0.bias
454
+ 06/02/2024 11:37:08 - INFO - __main__ - roberta.encoder.layer.6.attention.output.adapters.sst2.adapter_up.weight
455
+ 06/02/2024 11:37:08 - INFO - __main__ - roberta.encoder.layer.6.attention.output.adapters.sst2.adapter_up.bias
456
+ 06/02/2024 11:37:08 - INFO - __main__ - roberta.encoder.layer.6.output.adapters.sst2.adapter_down.0.weight
457
+ 06/02/2024 11:37:08 - INFO - __main__ - roberta.encoder.layer.6.output.adapters.sst2.adapter_down.0.bias
458
+ 06/02/2024 11:37:08 - INFO - __main__ - roberta.encoder.layer.6.output.adapters.sst2.adapter_up.weight
459
+ 06/02/2024 11:37:08 - INFO - __main__ - roberta.encoder.layer.6.output.adapters.sst2.adapter_up.bias
460
+ 06/02/2024 11:37:08 - INFO - __main__ - roberta.encoder.layer.7.attention.output.adapters.sst2.adapter_down.0.weight
461
+ 06/02/2024 11:37:08 - INFO - __main__ - roberta.encoder.layer.7.attention.output.adapters.sst2.adapter_down.0.bias
462
+ 06/02/2024 11:37:08 - INFO - __main__ - roberta.encoder.layer.7.attention.output.adapters.sst2.adapter_up.weight
463
+ 06/02/2024 11:37:08 - INFO - __main__ - roberta.encoder.layer.7.attention.output.adapters.sst2.adapter_up.bias
464
+ 06/02/2024 11:37:08 - INFO - __main__ - roberta.encoder.layer.7.output.adapters.sst2.adapter_down.0.weight
465
+ 06/02/2024 11:37:08 - INFO - __main__ - roberta.encoder.layer.7.output.adapters.sst2.adapter_down.0.bias
466
+ 06/02/2024 11:37:08 - INFO - __main__ - roberta.encoder.layer.7.output.adapters.sst2.adapter_up.weight
467
+ 06/02/2024 11:37:08 - INFO - __main__ - roberta.encoder.layer.7.output.adapters.sst2.adapter_up.bias
468
+ 06/02/2024 11:37:08 - INFO - __main__ - roberta.encoder.layer.8.attention.output.adapters.sst2.adapter_down.0.weight
469
+ 06/02/2024 11:37:08 - INFO - __main__ - roberta.encoder.layer.8.attention.output.adapters.sst2.adapter_down.0.bias
470
+ 06/02/2024 11:37:08 - INFO - __main__ - roberta.encoder.layer.8.attention.output.adapters.sst2.adapter_up.weight
471
+ 06/02/2024 11:37:08 - INFO - __main__ - roberta.encoder.layer.8.attention.output.adapters.sst2.adapter_up.bias
472
+ 06/02/2024 11:37:08 - INFO - __main__ - roberta.encoder.layer.8.output.adapters.sst2.adapter_down.0.weight
473
+ 06/02/2024 11:37:08 - INFO - __main__ - roberta.encoder.layer.8.output.adapters.sst2.adapter_down.0.bias
474
+ 06/02/2024 11:37:08 - INFO - __main__ - roberta.encoder.layer.8.output.adapters.sst2.adapter_up.weight
475
+ 06/02/2024 11:37:08 - INFO - __main__ - roberta.encoder.layer.8.output.adapters.sst2.adapter_up.bias
476
+ 06/02/2024 11:37:08 - INFO - __main__ - roberta.encoder.layer.9.attention.output.adapters.sst2.adapter_down.0.weight
477
+ 06/02/2024 11:37:08 - INFO - __main__ - roberta.encoder.layer.9.attention.output.adapters.sst2.adapter_down.0.bias
478
+ 06/02/2024 11:37:08 - INFO - __main__ - roberta.encoder.layer.9.attention.output.adapters.sst2.adapter_up.weight
479
+ 06/02/2024 11:37:08 - INFO - __main__ - roberta.encoder.layer.9.attention.output.adapters.sst2.adapter_up.bias
480
+ 06/02/2024 11:37:08 - INFO - __main__ - roberta.encoder.layer.9.output.adapters.sst2.adapter_down.0.weight
481
+ 06/02/2024 11:37:08 - INFO - __main__ - roberta.encoder.layer.9.output.adapters.sst2.adapter_down.0.bias
482
+ 06/02/2024 11:37:08 - INFO - __main__ - roberta.encoder.layer.9.output.adapters.sst2.adapter_up.weight
483
+ 06/02/2024 11:37:08 - INFO - __main__ - roberta.encoder.layer.9.output.adapters.sst2.adapter_up.bias
484
+ 06/02/2024 11:37:08 - INFO - __main__ - roberta.encoder.layer.10.attention.output.adapters.sst2.adapter_down.0.weight
485
+ 06/02/2024 11:37:08 - INFO - __main__ - roberta.encoder.layer.10.attention.output.adapters.sst2.adapter_down.0.bias
486
+ 06/02/2024 11:37:08 - INFO - __main__ - roberta.encoder.layer.10.attention.output.adapters.sst2.adapter_up.weight
487
+ 06/02/2024 11:37:08 - INFO - __main__ - roberta.encoder.layer.10.attention.output.adapters.sst2.adapter_up.bias
488
+ 06/02/2024 11:37:08 - INFO - __main__ - roberta.encoder.layer.10.output.adapters.sst2.adapter_down.0.weight
489
+ 06/02/2024 11:37:08 - INFO - __main__ - roberta.encoder.layer.10.output.adapters.sst2.adapter_down.0.bias
490
+ 06/02/2024 11:37:08 - INFO - __main__ - roberta.encoder.layer.10.output.adapters.sst2.adapter_up.weight
491
+ 06/02/2024 11:37:08 - INFO - __main__ - roberta.encoder.layer.10.output.adapters.sst2.adapter_up.bias
492
+ 06/02/2024 11:37:08 - INFO - __main__ - roberta.encoder.layer.11.attention.output.adapters.sst2.adapter_down.0.weight
493
+ 06/02/2024 11:37:08 - INFO - __main__ - roberta.encoder.layer.11.attention.output.adapters.sst2.adapter_down.0.bias
494
+ 06/02/2024 11:37:08 - INFO - __main__ - roberta.encoder.layer.11.attention.output.adapters.sst2.adapter_up.weight
495
+ 06/02/2024 11:37:08 - INFO - __main__ - roberta.encoder.layer.11.attention.output.adapters.sst2.adapter_up.bias
496
+ 06/02/2024 11:37:08 - INFO - __main__ - roberta.encoder.layer.11.output.adapters.sst2.adapter_down.0.weight
497
+ 06/02/2024 11:37:08 - INFO - __main__ - roberta.encoder.layer.11.output.adapters.sst2.adapter_down.0.bias
498
+ 06/02/2024 11:37:08 - INFO - __main__ - roberta.encoder.layer.11.output.adapters.sst2.adapter_up.weight
499
+ 06/02/2024 11:37:08 - INFO - __main__ - roberta.encoder.layer.11.output.adapters.sst2.adapter_up.bias
500
+ 06/02/2024 11:37:08 - INFO - __main__ - heads.sst2.1.weight
501
+ 06/02/2024 11:37:08 - INFO - __main__ - heads.sst2.1.bias
502
+ 06/02/2024 11:37:08 - INFO - __main__ - heads.sst2.4.weight
503
+ 06/02/2024 11:37:08 - INFO - __main__ - heads.sst2.4.bias
504
+ 06/02/2024 11:37:23 - INFO - __main__ - Sample 41905 of the training set: {'input_ids': [0, 17615, 1899, 385, 16314, 2156, 10985, 25, 7, 549, 47, 128, 548, 450, 15570, 50, 6717, 479, 1437, 2], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 0}.
505
+ 06/02/2024 11:37:23 - INFO - __main__ - Sample 7296 of the training set: {'input_ids': [0, 1250, 5, 5567, 23959, 9, 41259, 11605, 23, 63, 275, 1437, 2], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 1}.
506
+ 06/02/2024 11:37:23 - INFO - __main__ - Sample 1639 of the training set: {'input_ids': [0, 506, 11791, 98, 16894, 15, 358, 9164, 672, 14, 24, 35499, 103, 761, 9, 36302, 2821, 16235, 1437, 2], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 0}.
507
+ 06/02/2024 12:10:28 - INFO - __main__ - f_mu shape : torch.Size([872, 2])
508
+ 06/02/2024 12:10:28 - INFO - __main__ - f_var shape : torch.Size([872, 2, 2])
509
+ 06/02/2024 12:10:28 - INFO - __main__ - tensor([[-3.3808, 3.1699],
510
+ [ 0.6090, -0.8818],
511
+ [-1.8422, 1.5469],
512
+ ...,
513
+ [ 1.0225, -1.2842],
514
+ [ 2.4053, -2.6460],
515
+ [-2.3469, 1.9917]], device='cuda:0')
516
+ 06/02/2024 12:10:28 - INFO - __main__ - tensor([[[3.2091, 3.0468],
517
+ [3.0468, 3.2406]],
518
+
519
+ [[1.5145, 0.6457],
520
+ [0.6457, 1.4886]],
521
+
522
+ [[2.1047, 1.3565],
523
+ [1.3565, 2.0765]],
524
+
525
+ ...,
526
+
527
+ [[1.5240, 0.6244],
528
+ [0.6244, 1.4904]],
529
+
530
+ [[2.5651, 1.6968],
531
+ [1.6968, 2.5272]],
532
+
533
+ [[3.1113, 1.9239],
534
+ [1.9239, 3.1397]]], device='cuda:0')
535
+ 06/02/2024 12:10:28 - INFO - __main__ - ***** Completed training *****
536
+ 06/02/2024 12:10:31 - INFO - __main__ - Number of labels detected = 2
537
+ 06/02/2024 12:10:33 - INFO - adapters.heads.model_mixin - Adding head 'default' with config {'head_type': 'masked_lm', 'vocab_size': 50265, 'embedding_size': 768, 'layers': 2, 'activation_function': 'gelu', 'layer_norm': True, 'bias': True, 'shift_labels': False, 'label2id': None}.
538
+ 06/02/2024 12:10:34 - INFO - adapters.loading - Loading module configuration from ./outputs/sst2/roberta-base_adapterstrain_val_0.0001_42_8_10000/step_5999/adapter_config.json
539
+ 06/02/2024 12:10:34 - INFO - adapters.configuration.model_adapters_config - Adding adapter 'sst2'.
540
+ 06/02/2024 12:10:34 - INFO - adapters.loading - Loading module weights from ./outputs/sst2/roberta-base_adapterstrain_val_0.0001_42_8_10000/step_5999/pytorch_adapter.bin
541
+ 06/02/2024 12:10:34 - INFO - adapters.loading - Loading module configuration from ./outputs/sst2/roberta-base_adapterstrain_val_0.0001_42_8_10000/step_5999/head_config.json
542
+ 06/02/2024 12:10:34 - INFO - adapters.heads.model_mixin - Adding head 'sst2' with config {'head_type': 'classification', 'num_labels': 2, 'layers': 2, 'activation_function': 'tanh', 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'use_pooler': False, 'bias': True, 'dropout_prob': None}.
543
+ 06/02/2024 12:10:34 - INFO - adapters.loading - Loading module weights from ./outputs/sst2/roberta-base_adapterstrain_val_0.0001_42_8_10000/step_5999/pytorch_model_head.bin
544
+ 06/02/2024 12:10:34 - INFO - __main__ - Adapter Name = sst2
545
+ 06/02/2024 12:10:34 - INFO - __main__ - roberta.encoder.layer.0.attention.output.adapters.sst2.adapter_down.0.weight
546
+ 06/02/2024 12:10:34 - INFO - __main__ - roberta.encoder.layer.0.attention.output.adapters.sst2.adapter_down.0.bias
547
+ 06/02/2024 12:10:34 - INFO - __main__ - roberta.encoder.layer.0.attention.output.adapters.sst2.adapter_up.weight
548
+ 06/02/2024 12:10:34 - INFO - __main__ - roberta.encoder.layer.0.attention.output.adapters.sst2.adapter_up.bias
549
+ 06/02/2024 12:10:34 - INFO - __main__ - roberta.encoder.layer.0.output.adapters.sst2.adapter_down.0.weight
550
+ 06/02/2024 12:10:34 - INFO - __main__ - roberta.encoder.layer.0.output.adapters.sst2.adapter_down.0.bias
551
+ 06/02/2024 12:10:34 - INFO - __main__ - roberta.encoder.layer.0.output.adapters.sst2.adapter_up.weight
552
+ 06/02/2024 12:10:34 - INFO - __main__ - roberta.encoder.layer.0.output.adapters.sst2.adapter_up.bias
553
+ 06/02/2024 12:10:34 - INFO - __main__ - roberta.encoder.layer.1.attention.output.adapters.sst2.adapter_down.0.weight
554
+ 06/02/2024 12:10:34 - INFO - __main__ - roberta.encoder.layer.1.attention.output.adapters.sst2.adapter_down.0.bias
555
+ 06/02/2024 12:10:34 - INFO - __main__ - roberta.encoder.layer.1.attention.output.adapters.sst2.adapter_up.weight
556
+ 06/02/2024 12:10:34 - INFO - __main__ - roberta.encoder.layer.1.attention.output.adapters.sst2.adapter_up.bias
557
+ 06/02/2024 12:10:34 - INFO - __main__ - roberta.encoder.layer.1.output.adapters.sst2.adapter_down.0.weight
558
+ 06/02/2024 12:10:34 - INFO - __main__ - roberta.encoder.layer.1.output.adapters.sst2.adapter_down.0.bias
559
+ 06/02/2024 12:10:34 - INFO - __main__ - roberta.encoder.layer.1.output.adapters.sst2.adapter_up.weight
560
+ 06/02/2024 12:10:34 - INFO - __main__ - roberta.encoder.layer.1.output.adapters.sst2.adapter_up.bias
561
+ 06/02/2024 12:10:34 - INFO - __main__ - roberta.encoder.layer.2.attention.output.adapters.sst2.adapter_down.0.weight
562
+ 06/02/2024 12:10:34 - INFO - __main__ - roberta.encoder.layer.2.attention.output.adapters.sst2.adapter_down.0.bias
563
+ 06/02/2024 12:10:34 - INFO - __main__ - roberta.encoder.layer.2.attention.output.adapters.sst2.adapter_up.weight
564
+ 06/02/2024 12:10:34 - INFO - __main__ - roberta.encoder.layer.2.attention.output.adapters.sst2.adapter_up.bias
565
+ 06/02/2024 12:10:34 - INFO - __main__ - roberta.encoder.layer.2.output.adapters.sst2.adapter_down.0.weight
566
+ 06/02/2024 12:10:34 - INFO - __main__ - roberta.encoder.layer.2.output.adapters.sst2.adapter_down.0.bias
567
+ 06/02/2024 12:10:34 - INFO - __main__ - roberta.encoder.layer.2.output.adapters.sst2.adapter_up.weight
568
+ 06/02/2024 12:10:34 - INFO - __main__ - roberta.encoder.layer.2.output.adapters.sst2.adapter_up.bias
569
+ 06/02/2024 12:10:34 - INFO - __main__ - roberta.encoder.layer.3.attention.output.adapters.sst2.adapter_down.0.weight
570
+ 06/02/2024 12:10:34 - INFO - __main__ - roberta.encoder.layer.3.attention.output.adapters.sst2.adapter_down.0.bias
571
+ 06/02/2024 12:10:34 - INFO - __main__ - roberta.encoder.layer.3.attention.output.adapters.sst2.adapter_up.weight
572
+ 06/02/2024 12:10:34 - INFO - __main__ - roberta.encoder.layer.3.attention.output.adapters.sst2.adapter_up.bias
573
+ 06/02/2024 12:10:34 - INFO - __main__ - roberta.encoder.layer.3.output.adapters.sst2.adapter_down.0.weight
574
+ 06/02/2024 12:10:34 - INFO - __main__ - roberta.encoder.layer.3.output.adapters.sst2.adapter_down.0.bias
575
+ 06/02/2024 12:10:34 - INFO - __main__ - roberta.encoder.layer.3.output.adapters.sst2.adapter_up.weight
576
+ 06/02/2024 12:10:34 - INFO - __main__ - roberta.encoder.layer.3.output.adapters.sst2.adapter_up.bias
577
+ 06/02/2024 12:10:34 - INFO - __main__ - roberta.encoder.layer.4.attention.output.adapters.sst2.adapter_down.0.weight
578
+ 06/02/2024 12:10:34 - INFO - __main__ - roberta.encoder.layer.4.attention.output.adapters.sst2.adapter_down.0.bias
579
+ 06/02/2024 12:10:34 - INFO - __main__ - roberta.encoder.layer.4.attention.output.adapters.sst2.adapter_up.weight
580
+ 06/02/2024 12:10:34 - INFO - __main__ - roberta.encoder.layer.4.attention.output.adapters.sst2.adapter_up.bias
581
+ 06/02/2024 12:10:34 - INFO - __main__ - roberta.encoder.layer.4.output.adapters.sst2.adapter_down.0.weight
582
+ 06/02/2024 12:10:34 - INFO - __main__ - roberta.encoder.layer.4.output.adapters.sst2.adapter_down.0.bias
583
+ 06/02/2024 12:10:34 - INFO - __main__ - roberta.encoder.layer.4.output.adapters.sst2.adapter_up.weight
584
+ 06/02/2024 12:10:34 - INFO - __main__ - roberta.encoder.layer.4.output.adapters.sst2.adapter_up.bias
585
+ 06/02/2024 12:10:34 - INFO - __main__ - roberta.encoder.layer.5.attention.output.adapters.sst2.adapter_down.0.weight
586
+ 06/02/2024 12:10:34 - INFO - __main__ - roberta.encoder.layer.5.attention.output.adapters.sst2.adapter_down.0.bias
587
+ 06/02/2024 12:10:34 - INFO - __main__ - roberta.encoder.layer.5.attention.output.adapters.sst2.adapter_up.weight
588
+ 06/02/2024 12:10:34 - INFO - __main__ - roberta.encoder.layer.5.attention.output.adapters.sst2.adapter_up.bias
589
+ 06/02/2024 12:10:34 - INFO - __main__ - roberta.encoder.layer.5.output.adapters.sst2.adapter_down.0.weight
590
+ 06/02/2024 12:10:34 - INFO - __main__ - roberta.encoder.layer.5.output.adapters.sst2.adapter_down.0.bias
591
+ 06/02/2024 12:10:34 - INFO - __main__ - roberta.encoder.layer.5.output.adapters.sst2.adapter_up.weight
592
+ 06/02/2024 12:10:34 - INFO - __main__ - roberta.encoder.layer.5.output.adapters.sst2.adapter_up.bias
593
+ 06/02/2024 12:10:34 - INFO - __main__ - roberta.encoder.layer.6.attention.output.adapters.sst2.adapter_down.0.weight
594
+ 06/02/2024 12:10:34 - INFO - __main__ - roberta.encoder.layer.6.attention.output.adapters.sst2.adapter_down.0.bias
595
+ 06/02/2024 12:10:34 - INFO - __main__ - roberta.encoder.layer.6.attention.output.adapters.sst2.adapter_up.weight
596
+ 06/02/2024 12:10:34 - INFO - __main__ - roberta.encoder.layer.6.attention.output.adapters.sst2.adapter_up.bias
597
+ 06/02/2024 12:10:34 - INFO - __main__ - roberta.encoder.layer.6.output.adapters.sst2.adapter_down.0.weight
598
+ 06/02/2024 12:10:34 - INFO - __main__ - roberta.encoder.layer.6.output.adapters.sst2.adapter_down.0.bias
599
+ 06/02/2024 12:10:34 - INFO - __main__ - roberta.encoder.layer.6.output.adapters.sst2.adapter_up.weight
600
+ 06/02/2024 12:10:34 - INFO - __main__ - roberta.encoder.layer.6.output.adapters.sst2.adapter_up.bias
601
+ 06/02/2024 12:10:34 - INFO - __main__ - roberta.encoder.layer.7.attention.output.adapters.sst2.adapter_down.0.weight
602
+ 06/02/2024 12:10:34 - INFO - __main__ - roberta.encoder.layer.7.attention.output.adapters.sst2.adapter_down.0.bias
603
+ 06/02/2024 12:10:34 - INFO - __main__ - roberta.encoder.layer.7.attention.output.adapters.sst2.adapter_up.weight
604
+ 06/02/2024 12:10:34 - INFO - __main__ - roberta.encoder.layer.7.attention.output.adapters.sst2.adapter_up.bias
605
+ 06/02/2024 12:10:34 - INFO - __main__ - roberta.encoder.layer.7.output.adapters.sst2.adapter_down.0.weight
606
+ 06/02/2024 12:10:34 - INFO - __main__ - roberta.encoder.layer.7.output.adapters.sst2.adapter_down.0.bias
607
+ 06/02/2024 12:10:34 - INFO - __main__ - roberta.encoder.layer.7.output.adapters.sst2.adapter_up.weight
608
+ 06/02/2024 12:10:34 - INFO - __main__ - roberta.encoder.layer.7.output.adapters.sst2.adapter_up.bias
609
+ 06/02/2024 12:10:34 - INFO - __main__ - roberta.encoder.layer.8.attention.output.adapters.sst2.adapter_down.0.weight
610
+ 06/02/2024 12:10:34 - INFO - __main__ - roberta.encoder.layer.8.attention.output.adapters.sst2.adapter_down.0.bias
611
+ 06/02/2024 12:10:34 - INFO - __main__ - roberta.encoder.layer.8.attention.output.adapters.sst2.adapter_up.weight
612
+ 06/02/2024 12:10:34 - INFO - __main__ - roberta.encoder.layer.8.attention.output.adapters.sst2.adapter_up.bias
613
+ 06/02/2024 12:10:34 - INFO - __main__ - roberta.encoder.layer.8.output.adapters.sst2.adapter_down.0.weight
614
+ 06/02/2024 12:10:34 - INFO - __main__ - roberta.encoder.layer.8.output.adapters.sst2.adapter_down.0.bias
615
+ 06/02/2024 12:10:34 - INFO - __main__ - roberta.encoder.layer.8.output.adapters.sst2.adapter_up.weight
616
+ 06/02/2024 12:10:34 - INFO - __main__ - roberta.encoder.layer.8.output.adapters.sst2.adapter_up.bias
617
+ 06/02/2024 12:10:34 - INFO - __main__ - roberta.encoder.layer.9.attention.output.adapters.sst2.adapter_down.0.weight
618
+ 06/02/2024 12:10:34 - INFO - __main__ - roberta.encoder.layer.9.attention.output.adapters.sst2.adapter_down.0.bias
619
+ 06/02/2024 12:10:34 - INFO - __main__ - roberta.encoder.layer.9.attention.output.adapters.sst2.adapter_up.weight
620
+ 06/02/2024 12:10:34 - INFO - __main__ - roberta.encoder.layer.9.attention.output.adapters.sst2.adapter_up.bias
621
+ 06/02/2024 12:10:34 - INFO - __main__ - roberta.encoder.layer.9.output.adapters.sst2.adapter_down.0.weight
622
+ 06/02/2024 12:10:34 - INFO - __main__ - roberta.encoder.layer.9.output.adapters.sst2.adapter_down.0.bias
623
+ 06/02/2024 12:10:34 - INFO - __main__ - roberta.encoder.layer.9.output.adapters.sst2.adapter_up.weight
624
+ 06/02/2024 12:10:34 - INFO - __main__ - roberta.encoder.layer.9.output.adapters.sst2.adapter_up.bias
625
+ 06/02/2024 12:10:34 - INFO - __main__ - roberta.encoder.layer.10.attention.output.adapters.sst2.adapter_down.0.weight
626
+ 06/02/2024 12:10:34 - INFO - __main__ - roberta.encoder.layer.10.attention.output.adapters.sst2.adapter_down.0.bias
627
+ 06/02/2024 12:10:34 - INFO - __main__ - roberta.encoder.layer.10.attention.output.adapters.sst2.adapter_up.weight
628
+ 06/02/2024 12:10:34 - INFO - __main__ - roberta.encoder.layer.10.attention.output.adapters.sst2.adapter_up.bias
629
+ 06/02/2024 12:10:34 - INFO - __main__ - roberta.encoder.layer.10.output.adapters.sst2.adapter_down.0.weight
630
+ 06/02/2024 12:10:34 - INFO - __main__ - roberta.encoder.layer.10.output.adapters.sst2.adapter_down.0.bias
631
+ 06/02/2024 12:10:34 - INFO - __main__ - roberta.encoder.layer.10.output.adapters.sst2.adapter_up.weight
632
+ 06/02/2024 12:10:34 - INFO - __main__ - roberta.encoder.layer.10.output.adapters.sst2.adapter_up.bias
633
+ 06/02/2024 12:10:34 - INFO - __main__ - roberta.encoder.layer.11.attention.output.adapters.sst2.adapter_down.0.weight
634
+ 06/02/2024 12:10:34 - INFO - __main__ - roberta.encoder.layer.11.attention.output.adapters.sst2.adapter_down.0.bias
635
+ 06/02/2024 12:10:34 - INFO - __main__ - roberta.encoder.layer.11.attention.output.adapters.sst2.adapter_up.weight
636
+ 06/02/2024 12:10:34 - INFO - __main__ - roberta.encoder.layer.11.attention.output.adapters.sst2.adapter_up.bias
637
+ 06/02/2024 12:10:34 - INFO - __main__ - roberta.encoder.layer.11.output.adapters.sst2.adapter_down.0.weight
638
+ 06/02/2024 12:10:34 - INFO - __main__ - roberta.encoder.layer.11.output.adapters.sst2.adapter_down.0.bias
639
+ 06/02/2024 12:10:34 - INFO - __main__ - roberta.encoder.layer.11.output.adapters.sst2.adapter_up.weight
640
+ 06/02/2024 12:10:34 - INFO - __main__ - roberta.encoder.layer.11.output.adapters.sst2.adapter_up.bias
641
+ 06/02/2024 12:10:34 - INFO - __main__ - heads.sst2.1.weight
642
+ 06/02/2024 12:10:34 - INFO - __main__ - heads.sst2.1.bias
643
+ 06/02/2024 12:10:34 - INFO - __main__ - heads.sst2.4.weight
644
+ 06/02/2024 12:10:34 - INFO - __main__ - heads.sst2.4.bias
645
+ 06/02/2024 12:10:48 - INFO - __main__ - Sample 41905 of the training set: {'input_ids': [0, 17615, 1899, 385, 16314, 2156, 10985, 25, 7, 549, 47, 128, 548, 450, 15570, 50, 6717, 479, 1437, 2], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 0}.
646
+ 06/02/2024 12:10:48 - INFO - __main__ - Sample 7296 of the training set: {'input_ids': [0, 1250, 5, 5567, 23959, 9, 41259, 11605, 23, 63, 275, 1437, 2], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 1}.
647
+ 06/02/2024 12:10:48 - INFO - __main__ - Sample 1639 of the training set: {'input_ids': [0, 506, 11791, 98, 16894, 15, 358, 9164, 672, 14, 24, 35499, 103, 761, 9, 36302, 2821, 16235, 1437, 2], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 0}.
648
+ 06/02/2024 12:43:40 - INFO - __main__ - f_mu shape : torch.Size([872, 2])
649
+ 06/02/2024 12:43:40 - INFO - __main__ - f_var shape : torch.Size([872, 2, 2])
650
+ 06/02/2024 12:43:40 - INFO - __main__ - tensor([[-3.4760, 3.4337],
651
+ [ 1.0837, -1.3577],
652
+ [-2.4944, 2.2840],
653
+ ...,
654
+ [ 1.6618, -1.9399],
655
+ [ 3.0225, -3.2862],
656
+ [-3.0789, 2.8654]], device='cuda:0')
657
+ 06/02/2024 12:43:40 - INFO - __main__ - tensor([[[2.5527, 2.3429],
658
+ [2.3429, 2.5670]],
659
+
660
+ [[1.8293, 0.6629],
661
+ [0.6629, 1.8090]],
662
+
663
+ [[2.7591, 1.7970],
664
+ [1.7970, 2.8230]],
665
+
666
+ ...,
667
+
668
+ [[2.0569, 0.8383],
669
+ [0.8383, 2.0088]],
670
+
671
+ [[2.9821, 2.4184],
672
+ [2.4184, 2.9587]],
673
+
674
+ [[2.8792, 2.6094],
675
+ [2.6094, 2.9325]]], device='cuda:0')
676
+ 06/02/2024 12:43:40 - INFO - __main__ - ***** Completed training *****
677
+ 06/02/2024 12:43:43 - INFO - __main__ - Number of labels detected = 2
678
+ 06/02/2024 12:43:44 - INFO - adapters.heads.model_mixin - Adding head 'default' with config {'head_type': 'masked_lm', 'vocab_size': 50265, 'embedding_size': 768, 'layers': 2, 'activation_function': 'gelu', 'layer_norm': True, 'bias': True, 'shift_labels': False, 'label2id': None}.
679
+ 06/02/2024 12:43:45 - INFO - adapters.loading - Loading module configuration from ./outputs/sst2/roberta-base_adapterstrain_val_0.0001_42_8_10000/step_7999/adapter_config.json
680
+ 06/02/2024 12:43:45 - INFO - adapters.configuration.model_adapters_config - Adding adapter 'sst2'.
681
+ 06/02/2024 12:43:45 - INFO - adapters.loading - Loading module weights from ./outputs/sst2/roberta-base_adapterstrain_val_0.0001_42_8_10000/step_7999/pytorch_adapter.bin
682
+ 06/02/2024 12:43:45 - INFO - adapters.loading - Loading module configuration from ./outputs/sst2/roberta-base_adapterstrain_val_0.0001_42_8_10000/step_7999/head_config.json
683
+ 06/02/2024 12:43:45 - INFO - adapters.heads.model_mixin - Adding head 'sst2' with config {'head_type': 'classification', 'num_labels': 2, 'layers': 2, 'activation_function': 'tanh', 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'use_pooler': False, 'bias': True, 'dropout_prob': None}.
684
+ 06/02/2024 12:43:45 - INFO - adapters.loading - Loading module weights from ./outputs/sst2/roberta-base_adapterstrain_val_0.0001_42_8_10000/step_7999/pytorch_model_head.bin
685
+ 06/02/2024 12:43:45 - INFO - __main__ - Adapter Name = sst2
686
+ 06/02/2024 12:43:45 - INFO - __main__ - roberta.encoder.layer.0.attention.output.adapters.sst2.adapter_down.0.weight
687
+ 06/02/2024 12:43:45 - INFO - __main__ - roberta.encoder.layer.0.attention.output.adapters.sst2.adapter_down.0.bias
688
+ 06/02/2024 12:43:45 - INFO - __main__ - roberta.encoder.layer.0.attention.output.adapters.sst2.adapter_up.weight
689
+ 06/02/2024 12:43:45 - INFO - __main__ - roberta.encoder.layer.0.attention.output.adapters.sst2.adapter_up.bias
690
+ 06/02/2024 12:43:45 - INFO - __main__ - roberta.encoder.layer.0.output.adapters.sst2.adapter_down.0.weight
691
+ 06/02/2024 12:43:45 - INFO - __main__ - roberta.encoder.layer.0.output.adapters.sst2.adapter_down.0.bias
692
+ 06/02/2024 12:43:45 - INFO - __main__ - roberta.encoder.layer.0.output.adapters.sst2.adapter_up.weight
693
+ 06/02/2024 12:43:45 - INFO - __main__ - roberta.encoder.layer.0.output.adapters.sst2.adapter_up.bias
694
+ 06/02/2024 12:43:45 - INFO - __main__ - roberta.encoder.layer.1.attention.output.adapters.sst2.adapter_down.0.weight
695
+ 06/02/2024 12:43:45 - INFO - __main__ - roberta.encoder.layer.1.attention.output.adapters.sst2.adapter_down.0.bias
696
+ 06/02/2024 12:43:45 - INFO - __main__ - roberta.encoder.layer.1.attention.output.adapters.sst2.adapter_up.weight
697
+ 06/02/2024 12:43:45 - INFO - __main__ - roberta.encoder.layer.1.attention.output.adapters.sst2.adapter_up.bias
698
+ 06/02/2024 12:43:45 - INFO - __main__ - roberta.encoder.layer.1.output.adapters.sst2.adapter_down.0.weight
699
+ 06/02/2024 12:43:45 - INFO - __main__ - roberta.encoder.layer.1.output.adapters.sst2.adapter_down.0.bias
700
+ 06/02/2024 12:43:45 - INFO - __main__ - roberta.encoder.layer.1.output.adapters.sst2.adapter_up.weight
701
+ 06/02/2024 12:43:45 - INFO - __main__ - roberta.encoder.layer.1.output.adapters.sst2.adapter_up.bias
702
+ 06/02/2024 12:43:45 - INFO - __main__ - roberta.encoder.layer.2.attention.output.adapters.sst2.adapter_down.0.weight
703
+ 06/02/2024 12:43:45 - INFO - __main__ - roberta.encoder.layer.2.attention.output.adapters.sst2.adapter_down.0.bias
704
+ 06/02/2024 12:43:45 - INFO - __main__ - roberta.encoder.layer.2.attention.output.adapters.sst2.adapter_up.weight
705
+ 06/02/2024 12:43:45 - INFO - __main__ - roberta.encoder.layer.2.attention.output.adapters.sst2.adapter_up.bias
706
+ 06/02/2024 12:43:45 - INFO - __main__ - roberta.encoder.layer.2.output.adapters.sst2.adapter_down.0.weight
707
+ 06/02/2024 12:43:45 - INFO - __main__ - roberta.encoder.layer.2.output.adapters.sst2.adapter_down.0.bias
708
+ 06/02/2024 12:43:45 - INFO - __main__ - roberta.encoder.layer.2.output.adapters.sst2.adapter_up.weight
709
+ 06/02/2024 12:43:45 - INFO - __main__ - roberta.encoder.layer.2.output.adapters.sst2.adapter_up.bias
710
+ 06/02/2024 12:43:45 - INFO - __main__ - roberta.encoder.layer.3.attention.output.adapters.sst2.adapter_down.0.weight
711
+ 06/02/2024 12:43:45 - INFO - __main__ - roberta.encoder.layer.3.attention.output.adapters.sst2.adapter_down.0.bias
712
+ 06/02/2024 12:43:45 - INFO - __main__ - roberta.encoder.layer.3.attention.output.adapters.sst2.adapter_up.weight
713
+ 06/02/2024 12:43:45 - INFO - __main__ - roberta.encoder.layer.3.attention.output.adapters.sst2.adapter_up.bias
714
+ 06/02/2024 12:43:45 - INFO - __main__ - roberta.encoder.layer.3.output.adapters.sst2.adapter_down.0.weight
715
+ 06/02/2024 12:43:45 - INFO - __main__ - roberta.encoder.layer.3.output.adapters.sst2.adapter_down.0.bias
716
+ 06/02/2024 12:43:45 - INFO - __main__ - roberta.encoder.layer.3.output.adapters.sst2.adapter_up.weight
717
+ 06/02/2024 12:43:45 - INFO - __main__ - roberta.encoder.layer.3.output.adapters.sst2.adapter_up.bias
718
+ 06/02/2024 12:43:45 - INFO - __main__ - roberta.encoder.layer.4.attention.output.adapters.sst2.adapter_down.0.weight
719
+ 06/02/2024 12:43:45 - INFO - __main__ - roberta.encoder.layer.4.attention.output.adapters.sst2.adapter_down.0.bias
720
+ 06/02/2024 12:43:45 - INFO - __main__ - roberta.encoder.layer.4.attention.output.adapters.sst2.adapter_up.weight
721
+ 06/02/2024 12:43:45 - INFO - __main__ - roberta.encoder.layer.4.attention.output.adapters.sst2.adapter_up.bias
722
+ 06/02/2024 12:43:45 - INFO - __main__ - roberta.encoder.layer.4.output.adapters.sst2.adapter_down.0.weight
723
+ 06/02/2024 12:43:45 - INFO - __main__ - roberta.encoder.layer.4.output.adapters.sst2.adapter_down.0.bias
724
+ 06/02/2024 12:43:45 - INFO - __main__ - roberta.encoder.layer.4.output.adapters.sst2.adapter_up.weight
725
+ 06/02/2024 12:43:45 - INFO - __main__ - roberta.encoder.layer.4.output.adapters.sst2.adapter_up.bias
726
+ 06/02/2024 12:43:45 - INFO - __main__ - roberta.encoder.layer.5.attention.output.adapters.sst2.adapter_down.0.weight
727
+ 06/02/2024 12:43:45 - INFO - __main__ - roberta.encoder.layer.5.attention.output.adapters.sst2.adapter_down.0.bias
728
+ 06/02/2024 12:43:45 - INFO - __main__ - roberta.encoder.layer.5.attention.output.adapters.sst2.adapter_up.weight
729
+ 06/02/2024 12:43:45 - INFO - __main__ - roberta.encoder.layer.5.attention.output.adapters.sst2.adapter_up.bias
730
+ 06/02/2024 12:43:45 - INFO - __main__ - roberta.encoder.layer.5.output.adapters.sst2.adapter_down.0.weight
731
+ 06/02/2024 12:43:45 - INFO - __main__ - roberta.encoder.layer.5.output.adapters.sst2.adapter_down.0.bias
732
+ 06/02/2024 12:43:45 - INFO - __main__ - roberta.encoder.layer.5.output.adapters.sst2.adapter_up.weight
733
+ 06/02/2024 12:43:45 - INFO - __main__ - roberta.encoder.layer.5.output.adapters.sst2.adapter_up.bias
734
+ 06/02/2024 12:43:45 - INFO - __main__ - roberta.encoder.layer.6.attention.output.adapters.sst2.adapter_down.0.weight
735
+ 06/02/2024 12:43:45 - INFO - __main__ - roberta.encoder.layer.6.attention.output.adapters.sst2.adapter_down.0.bias
736
+ 06/02/2024 12:43:45 - INFO - __main__ - roberta.encoder.layer.6.attention.output.adapters.sst2.adapter_up.weight
737
+ 06/02/2024 12:43:45 - INFO - __main__ - roberta.encoder.layer.6.attention.output.adapters.sst2.adapter_up.bias
738
+ 06/02/2024 12:43:45 - INFO - __main__ - roberta.encoder.layer.6.output.adapters.sst2.adapter_down.0.weight
739
+ 06/02/2024 12:43:45 - INFO - __main__ - roberta.encoder.layer.6.output.adapters.sst2.adapter_down.0.bias
740
+ 06/02/2024 12:43:45 - INFO - __main__ - roberta.encoder.layer.6.output.adapters.sst2.adapter_up.weight
741
+ 06/02/2024 12:43:45 - INFO - __main__ - roberta.encoder.layer.6.output.adapters.sst2.adapter_up.bias
742
+ 06/02/2024 12:43:45 - INFO - __main__ - roberta.encoder.layer.7.attention.output.adapters.sst2.adapter_down.0.weight
743
+ 06/02/2024 12:43:45 - INFO - __main__ - roberta.encoder.layer.7.attention.output.adapters.sst2.adapter_down.0.bias
744
+ 06/02/2024 12:43:45 - INFO - __main__ - roberta.encoder.layer.7.attention.output.adapters.sst2.adapter_up.weight
745
+ 06/02/2024 12:43:45 - INFO - __main__ - roberta.encoder.layer.7.attention.output.adapters.sst2.adapter_up.bias
746
+ 06/02/2024 12:43:45 - INFO - __main__ - roberta.encoder.layer.7.output.adapters.sst2.adapter_down.0.weight
747
+ 06/02/2024 12:43:45 - INFO - __main__ - roberta.encoder.layer.7.output.adapters.sst2.adapter_down.0.bias
748
+ 06/02/2024 12:43:45 - INFO - __main__ - roberta.encoder.layer.7.output.adapters.sst2.adapter_up.weight
749
+ 06/02/2024 12:43:45 - INFO - __main__ - roberta.encoder.layer.7.output.adapters.sst2.adapter_up.bias
750
+ 06/02/2024 12:43:45 - INFO - __main__ - roberta.encoder.layer.8.attention.output.adapters.sst2.adapter_down.0.weight
751
+ 06/02/2024 12:43:45 - INFO - __main__ - roberta.encoder.layer.8.attention.output.adapters.sst2.adapter_down.0.bias
752
+ 06/02/2024 12:43:45 - INFO - __main__ - roberta.encoder.layer.8.attention.output.adapters.sst2.adapter_up.weight
753
+ 06/02/2024 12:43:45 - INFO - __main__ - roberta.encoder.layer.8.attention.output.adapters.sst2.adapter_up.bias
754
+ 06/02/2024 12:43:45 - INFO - __main__ - roberta.encoder.layer.8.output.adapters.sst2.adapter_down.0.weight
755
+ 06/02/2024 12:43:45 - INFO - __main__ - roberta.encoder.layer.8.output.adapters.sst2.adapter_down.0.bias
756
+ 06/02/2024 12:43:45 - INFO - __main__ - roberta.encoder.layer.8.output.adapters.sst2.adapter_up.weight
757
+ 06/02/2024 12:43:45 - INFO - __main__ - roberta.encoder.layer.8.output.adapters.sst2.adapter_up.bias
758
+ 06/02/2024 12:43:45 - INFO - __main__ - roberta.encoder.layer.9.attention.output.adapters.sst2.adapter_down.0.weight
759
+ 06/02/2024 12:43:45 - INFO - __main__ - roberta.encoder.layer.9.attention.output.adapters.sst2.adapter_down.0.bias
760
+ 06/02/2024 12:43:45 - INFO - __main__ - roberta.encoder.layer.9.attention.output.adapters.sst2.adapter_up.weight
761
+ 06/02/2024 12:43:45 - INFO - __main__ - roberta.encoder.layer.9.attention.output.adapters.sst2.adapter_up.bias
762
+ 06/02/2024 12:43:45 - INFO - __main__ - roberta.encoder.layer.9.output.adapters.sst2.adapter_down.0.weight
763
+ 06/02/2024 12:43:45 - INFO - __main__ - roberta.encoder.layer.9.output.adapters.sst2.adapter_down.0.bias
764
+ 06/02/2024 12:43:45 - INFO - __main__ - roberta.encoder.layer.9.output.adapters.sst2.adapter_up.weight
765
+ 06/02/2024 12:43:45 - INFO - __main__ - roberta.encoder.layer.9.output.adapters.sst2.adapter_up.bias
766
+ 06/02/2024 12:43:45 - INFO - __main__ - roberta.encoder.layer.10.attention.output.adapters.sst2.adapter_down.0.weight
767
+ 06/02/2024 12:43:45 - INFO - __main__ - roberta.encoder.layer.10.attention.output.adapters.sst2.adapter_down.0.bias
768
+ 06/02/2024 12:43:45 - INFO - __main__ - roberta.encoder.layer.10.attention.output.adapters.sst2.adapter_up.weight
769
+ 06/02/2024 12:43:45 - INFO - __main__ - roberta.encoder.layer.10.attention.output.adapters.sst2.adapter_up.bias
770
+ 06/02/2024 12:43:45 - INFO - __main__ - roberta.encoder.layer.10.output.adapters.sst2.adapter_down.0.weight
771
+ 06/02/2024 12:43:45 - INFO - __main__ - roberta.encoder.layer.10.output.adapters.sst2.adapter_down.0.bias
772
+ 06/02/2024 12:43:45 - INFO - __main__ - roberta.encoder.layer.10.output.adapters.sst2.adapter_up.weight
773
+ 06/02/2024 12:43:45 - INFO - __main__ - roberta.encoder.layer.10.output.adapters.sst2.adapter_up.bias
774
+ 06/02/2024 12:43:45 - INFO - __main__ - roberta.encoder.layer.11.attention.output.adapters.sst2.adapter_down.0.weight
775
+ 06/02/2024 12:43:45 - INFO - __main__ - roberta.encoder.layer.11.attention.output.adapters.sst2.adapter_down.0.bias
776
+ 06/02/2024 12:43:45 - INFO - __main__ - roberta.encoder.layer.11.attention.output.adapters.sst2.adapter_up.weight
777
+ 06/02/2024 12:43:45 - INFO - __main__ - roberta.encoder.layer.11.attention.output.adapters.sst2.adapter_up.bias
778
+ 06/02/2024 12:43:45 - INFO - __main__ - roberta.encoder.layer.11.output.adapters.sst2.adapter_down.0.weight
779
+ 06/02/2024 12:43:45 - INFO - __main__ - roberta.encoder.layer.11.output.adapters.sst2.adapter_down.0.bias
780
+ 06/02/2024 12:43:45 - INFO - __main__ - roberta.encoder.layer.11.output.adapters.sst2.adapter_up.weight
781
+ 06/02/2024 12:43:45 - INFO - __main__ - roberta.encoder.layer.11.output.adapters.sst2.adapter_up.bias
782
+ 06/02/2024 12:43:45 - INFO - __main__ - heads.sst2.1.weight
783
+ 06/02/2024 12:43:45 - INFO - __main__ - heads.sst2.1.bias
784
+ 06/02/2024 12:43:45 - INFO - __main__ - heads.sst2.4.weight
785
+ 06/02/2024 12:43:45 - INFO - __main__ - heads.sst2.4.bias
786
+ 06/02/2024 12:44:00 - INFO - __main__ - Sample 41905 of the training set: {'input_ids': [0, 17615, 1899, 385, 16314, 2156, 10985, 25, 7, 549, 47, 128, 548, 450, 15570, 50, 6717, 479, 1437, 2], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 0}.
787
+ 06/02/2024 12:44:00 - INFO - __main__ - Sample 7296 of the training set: {'input_ids': [0, 1250, 5, 5567, 23959, 9, 41259, 11605, 23, 63, 275, 1437, 2], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 1}.
788
+ 06/02/2024 12:44:00 - INFO - __main__ - Sample 1639 of the training set: {'input_ids': [0, 506, 11791, 98, 16894, 15, 358, 9164, 672, 14, 24, 35499, 103, 761, 9, 36302, 2821, 16235, 1437, 2], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 0}.
789
+ 06/02/2024 13:17:21 - INFO - __main__ - f_mu shape : torch.Size([872, 2])
790
+ 06/02/2024 13:17:21 - INFO - __main__ - f_var shape : torch.Size([872, 2, 2])
791
+ 06/02/2024 13:17:21 - INFO - __main__ - tensor([[-3.5407, 3.4897],
792
+ [ 1.0327, -1.3232],
793
+ [-2.7013, 2.5161],
794
+ ...,
795
+ [ 1.7484, -2.0215],
796
+ [ 3.0637, -3.3128],
797
+ [-2.9782, 2.7441]], device='cuda:0')
798
+ 06/02/2024 13:17:21 - INFO - __main__ - tensor([[[2.8754, 2.6784],
799
+ [2.6784, 2.8978]],
800
+
801
+ [[1.9772, 0.6767],
802
+ [0.6767, 1.9429]],
803
+
804
+ [[2.6757, 2.0673],
805
+ [2.0673, 2.7422]],
806
+
807
+ ...,
808
+
809
+ [[2.5259, 0.9129],
810
+ [0.9129, 2.4381]],
811
+
812
+ [[3.0168, 2.6318],
813
+ [2.6318, 2.9878]],
814
+
815
+ [[2.8295, 2.5386],
816
+ [2.5386, 2.8820]]], device='cuda:0')
817
+ 06/02/2024 13:17:21 - INFO - __main__ - ***** Completed training *****
818
+ 06/02/2024 13:17:24 - INFO - __main__ - Number of labels detected = 2
819
+ 06/02/2024 13:17:25 - INFO - adapters.heads.model_mixin - Adding head 'default' with config {'head_type': 'masked_lm', 'vocab_size': 50265, 'embedding_size': 768, 'layers': 2, 'activation_function': 'gelu', 'layer_norm': True, 'bias': True, 'shift_labels': False, 'label2id': None}.
820
+ 06/02/2024 13:17:26 - INFO - adapters.loading - Loading module configuration from ./outputs/sst2/roberta-base_adapterstrain_val_0.0001_42_8_10000/step_9999/adapter_config.json
821
+ 06/02/2024 13:17:26 - INFO - adapters.configuration.model_adapters_config - Adding adapter 'sst2'.
822
+ 06/02/2024 13:17:26 - INFO - adapters.loading - Loading module weights from ./outputs/sst2/roberta-base_adapterstrain_val_0.0001_42_8_10000/step_9999/pytorch_adapter.bin
823
+ 06/02/2024 13:17:26 - INFO - adapters.loading - Loading module configuration from ./outputs/sst2/roberta-base_adapterstrain_val_0.0001_42_8_10000/step_9999/head_config.json
824
+ 06/02/2024 13:17:26 - INFO - adapters.heads.model_mixin - Adding head 'sst2' with config {'head_type': 'classification', 'num_labels': 2, 'layers': 2, 'activation_function': 'tanh', 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'use_pooler': False, 'bias': True, 'dropout_prob': None}.
825
+ 06/02/2024 13:17:26 - INFO - adapters.loading - Loading module weights from ./outputs/sst2/roberta-base_adapterstrain_val_0.0001_42_8_10000/step_9999/pytorch_model_head.bin
826
+ 06/02/2024 13:17:26 - INFO - __main__ - Adapter Name = sst2
827
+ 06/02/2024 13:17:26 - INFO - __main__ - roberta.encoder.layer.0.attention.output.adapters.sst2.adapter_down.0.weight
828
+ 06/02/2024 13:17:26 - INFO - __main__ - roberta.encoder.layer.0.attention.output.adapters.sst2.adapter_down.0.bias
829
+ 06/02/2024 13:17:26 - INFO - __main__ - roberta.encoder.layer.0.attention.output.adapters.sst2.adapter_up.weight
830
+ 06/02/2024 13:17:26 - INFO - __main__ - roberta.encoder.layer.0.attention.output.adapters.sst2.adapter_up.bias
831
+ 06/02/2024 13:17:26 - INFO - __main__ - roberta.encoder.layer.0.output.adapters.sst2.adapter_down.0.weight
832
+ 06/02/2024 13:17:26 - INFO - __main__ - roberta.encoder.layer.0.output.adapters.sst2.adapter_down.0.bias
833
+ 06/02/2024 13:17:26 - INFO - __main__ - roberta.encoder.layer.0.output.adapters.sst2.adapter_up.weight
834
+ 06/02/2024 13:17:26 - INFO - __main__ - roberta.encoder.layer.0.output.adapters.sst2.adapter_up.bias
835
+ 06/02/2024 13:17:26 - INFO - __main__ - roberta.encoder.layer.1.attention.output.adapters.sst2.adapter_down.0.weight
836
+ 06/02/2024 13:17:26 - INFO - __main__ - roberta.encoder.layer.1.attention.output.adapters.sst2.adapter_down.0.bias
837
+ 06/02/2024 13:17:26 - INFO - __main__ - roberta.encoder.layer.1.attention.output.adapters.sst2.adapter_up.weight
838
+ 06/02/2024 13:17:26 - INFO - __main__ - roberta.encoder.layer.1.attention.output.adapters.sst2.adapter_up.bias
839
+ 06/02/2024 13:17:26 - INFO - __main__ - roberta.encoder.layer.1.output.adapters.sst2.adapter_down.0.weight
840
+ 06/02/2024 13:17:26 - INFO - __main__ - roberta.encoder.layer.1.output.adapters.sst2.adapter_down.0.bias
841
+ 06/02/2024 13:17:26 - INFO - __main__ - roberta.encoder.layer.1.output.adapters.sst2.adapter_up.weight
842
+ 06/02/2024 13:17:26 - INFO - __main__ - roberta.encoder.layer.1.output.adapters.sst2.adapter_up.bias
843
+ 06/02/2024 13:17:26 - INFO - __main__ - roberta.encoder.layer.2.attention.output.adapters.sst2.adapter_down.0.weight
844
+ 06/02/2024 13:17:26 - INFO - __main__ - roberta.encoder.layer.2.attention.output.adapters.sst2.adapter_down.0.bias
845
+ 06/02/2024 13:17:26 - INFO - __main__ - roberta.encoder.layer.2.attention.output.adapters.sst2.adapter_up.weight
846
+ 06/02/2024 13:17:26 - INFO - __main__ - roberta.encoder.layer.2.attention.output.adapters.sst2.adapter_up.bias
847
+ 06/02/2024 13:17:26 - INFO - __main__ - roberta.encoder.layer.2.output.adapters.sst2.adapter_down.0.weight
848
+ 06/02/2024 13:17:26 - INFO - __main__ - roberta.encoder.layer.2.output.adapters.sst2.adapter_down.0.bias
849
+ 06/02/2024 13:17:26 - INFO - __main__ - roberta.encoder.layer.2.output.adapters.sst2.adapter_up.weight
850
+ 06/02/2024 13:17:26 - INFO - __main__ - roberta.encoder.layer.2.output.adapters.sst2.adapter_up.bias
851
+ 06/02/2024 13:17:26 - INFO - __main__ - roberta.encoder.layer.3.attention.output.adapters.sst2.adapter_down.0.weight
852
+ 06/02/2024 13:17:26 - INFO - __main__ - roberta.encoder.layer.3.attention.output.adapters.sst2.adapter_down.0.bias
853
+ 06/02/2024 13:17:26 - INFO - __main__ - roberta.encoder.layer.3.attention.output.adapters.sst2.adapter_up.weight
854
+ 06/02/2024 13:17:26 - INFO - __main__ - roberta.encoder.layer.3.attention.output.adapters.sst2.adapter_up.bias
855
+ 06/02/2024 13:17:26 - INFO - __main__ - roberta.encoder.layer.3.output.adapters.sst2.adapter_down.0.weight
856
+ 06/02/2024 13:17:26 - INFO - __main__ - roberta.encoder.layer.3.output.adapters.sst2.adapter_down.0.bias
857
+ 06/02/2024 13:17:26 - INFO - __main__ - roberta.encoder.layer.3.output.adapters.sst2.adapter_up.weight
858
+ 06/02/2024 13:17:26 - INFO - __main__ - roberta.encoder.layer.3.output.adapters.sst2.adapter_up.bias
859
+ 06/02/2024 13:17:26 - INFO - __main__ - roberta.encoder.layer.4.attention.output.adapters.sst2.adapter_down.0.weight
860
+ 06/02/2024 13:17:26 - INFO - __main__ - roberta.encoder.layer.4.attention.output.adapters.sst2.adapter_down.0.bias
861
+ 06/02/2024 13:17:26 - INFO - __main__ - roberta.encoder.layer.4.attention.output.adapters.sst2.adapter_up.weight
862
+ 06/02/2024 13:17:26 - INFO - __main__ - roberta.encoder.layer.4.attention.output.adapters.sst2.adapter_up.bias
863
+ 06/02/2024 13:17:26 - INFO - __main__ - roberta.encoder.layer.4.output.adapters.sst2.adapter_down.0.weight
864
+ 06/02/2024 13:17:26 - INFO - __main__ - roberta.encoder.layer.4.output.adapters.sst2.adapter_down.0.bias
865
+ 06/02/2024 13:17:26 - INFO - __main__ - roberta.encoder.layer.4.output.adapters.sst2.adapter_up.weight
866
+ 06/02/2024 13:17:26 - INFO - __main__ - roberta.encoder.layer.4.output.adapters.sst2.adapter_up.bias
867
+ 06/02/2024 13:17:26 - INFO - __main__ - roberta.encoder.layer.5.attention.output.adapters.sst2.adapter_down.0.weight
868
+ 06/02/2024 13:17:26 - INFO - __main__ - roberta.encoder.layer.5.attention.output.adapters.sst2.adapter_down.0.bias
869
+ 06/02/2024 13:17:26 - INFO - __main__ - roberta.encoder.layer.5.attention.output.adapters.sst2.adapter_up.weight
870
+ 06/02/2024 13:17:26 - INFO - __main__ - roberta.encoder.layer.5.attention.output.adapters.sst2.adapter_up.bias
871
+ 06/02/2024 13:17:26 - INFO - __main__ - roberta.encoder.layer.5.output.adapters.sst2.adapter_down.0.weight
872
+ 06/02/2024 13:17:26 - INFO - __main__ - roberta.encoder.layer.5.output.adapters.sst2.adapter_down.0.bias
873
+ 06/02/2024 13:17:26 - INFO - __main__ - roberta.encoder.layer.5.output.adapters.sst2.adapter_up.weight
874
+ 06/02/2024 13:17:26 - INFO - __main__ - roberta.encoder.layer.5.output.adapters.sst2.adapter_up.bias
875
+ 06/02/2024 13:17:26 - INFO - __main__ - roberta.encoder.layer.6.attention.output.adapters.sst2.adapter_down.0.weight
876
+ 06/02/2024 13:17:26 - INFO - __main__ - roberta.encoder.layer.6.attention.output.adapters.sst2.adapter_down.0.bias
877
+ 06/02/2024 13:17:26 - INFO - __main__ - roberta.encoder.layer.6.attention.output.adapters.sst2.adapter_up.weight
878
+ 06/02/2024 13:17:26 - INFO - __main__ - roberta.encoder.layer.6.attention.output.adapters.sst2.adapter_up.bias
879
+ 06/02/2024 13:17:26 - INFO - __main__ - roberta.encoder.layer.6.output.adapters.sst2.adapter_down.0.weight
880
+ 06/02/2024 13:17:26 - INFO - __main__ - roberta.encoder.layer.6.output.adapters.sst2.adapter_down.0.bias
881
+ 06/02/2024 13:17:26 - INFO - __main__ - roberta.encoder.layer.6.output.adapters.sst2.adapter_up.weight
882
+ 06/02/2024 13:17:26 - INFO - __main__ - roberta.encoder.layer.6.output.adapters.sst2.adapter_up.bias
883
+ 06/02/2024 13:17:26 - INFO - __main__ - roberta.encoder.layer.7.attention.output.adapters.sst2.adapter_down.0.weight
884
+ 06/02/2024 13:17:26 - INFO - __main__ - roberta.encoder.layer.7.attention.output.adapters.sst2.adapter_down.0.bias
885
+ 06/02/2024 13:17:26 - INFO - __main__ - roberta.encoder.layer.7.attention.output.adapters.sst2.adapter_up.weight
886
+ 06/02/2024 13:17:26 - INFO - __main__ - roberta.encoder.layer.7.attention.output.adapters.sst2.adapter_up.bias
887
+ 06/02/2024 13:17:26 - INFO - __main__ - roberta.encoder.layer.7.output.adapters.sst2.adapter_down.0.weight
888
+ 06/02/2024 13:17:26 - INFO - __main__ - roberta.encoder.layer.7.output.adapters.sst2.adapter_down.0.bias
889
+ 06/02/2024 13:17:26 - INFO - __main__ - roberta.encoder.layer.7.output.adapters.sst2.adapter_up.weight
890
+ 06/02/2024 13:17:26 - INFO - __main__ - roberta.encoder.layer.7.output.adapters.sst2.adapter_up.bias
891
+ 06/02/2024 13:17:26 - INFO - __main__ - roberta.encoder.layer.8.attention.output.adapters.sst2.adapter_down.0.weight
892
+ 06/02/2024 13:17:26 - INFO - __main__ - roberta.encoder.layer.8.attention.output.adapters.sst2.adapter_down.0.bias
893
+ 06/02/2024 13:17:26 - INFO - __main__ - roberta.encoder.layer.8.attention.output.adapters.sst2.adapter_up.weight
894
+ 06/02/2024 13:17:26 - INFO - __main__ - roberta.encoder.layer.8.attention.output.adapters.sst2.adapter_up.bias
895
+ 06/02/2024 13:17:26 - INFO - __main__ - roberta.encoder.layer.8.output.adapters.sst2.adapter_down.0.weight
896
+ 06/02/2024 13:17:26 - INFO - __main__ - roberta.encoder.layer.8.output.adapters.sst2.adapter_down.0.bias
897
+ 06/02/2024 13:17:26 - INFO - __main__ - roberta.encoder.layer.8.output.adapters.sst2.adapter_up.weight
898
+ 06/02/2024 13:17:26 - INFO - __main__ - roberta.encoder.layer.8.output.adapters.sst2.adapter_up.bias
899
+ 06/02/2024 13:17:26 - INFO - __main__ - roberta.encoder.layer.9.attention.output.adapters.sst2.adapter_down.0.weight
900
+ 06/02/2024 13:17:26 - INFO - __main__ - roberta.encoder.layer.9.attention.output.adapters.sst2.adapter_down.0.bias
901
+ 06/02/2024 13:17:26 - INFO - __main__ - roberta.encoder.layer.9.attention.output.adapters.sst2.adapter_up.weight
902
+ 06/02/2024 13:17:26 - INFO - __main__ - roberta.encoder.layer.9.attention.output.adapters.sst2.adapter_up.bias
903
+ 06/02/2024 13:17:26 - INFO - __main__ - roberta.encoder.layer.9.output.adapters.sst2.adapter_down.0.weight
904
+ 06/02/2024 13:17:26 - INFO - __main__ - roberta.encoder.layer.9.output.adapters.sst2.adapter_down.0.bias
905
+ 06/02/2024 13:17:26 - INFO - __main__ - roberta.encoder.layer.9.output.adapters.sst2.adapter_up.weight
906
+ 06/02/2024 13:17:26 - INFO - __main__ - roberta.encoder.layer.9.output.adapters.sst2.adapter_up.bias
907
+ 06/02/2024 13:17:26 - INFO - __main__ - roberta.encoder.layer.10.attention.output.adapters.sst2.adapter_down.0.weight
908
+ 06/02/2024 13:17:26 - INFO - __main__ - roberta.encoder.layer.10.attention.output.adapters.sst2.adapter_down.0.bias
909
+ 06/02/2024 13:17:26 - INFO - __main__ - roberta.encoder.layer.10.attention.output.adapters.sst2.adapter_up.weight
910
+ 06/02/2024 13:17:26 - INFO - __main__ - roberta.encoder.layer.10.attention.output.adapters.sst2.adapter_up.bias
911
+ 06/02/2024 13:17:26 - INFO - __main__ - roberta.encoder.layer.10.output.adapters.sst2.adapter_down.0.weight
912
+ 06/02/2024 13:17:26 - INFO - __main__ - roberta.encoder.layer.10.output.adapters.sst2.adapter_down.0.bias
913
+ 06/02/2024 13:17:26 - INFO - __main__ - roberta.encoder.layer.10.output.adapters.sst2.adapter_up.weight
914
+ 06/02/2024 13:17:26 - INFO - __main__ - roberta.encoder.layer.10.output.adapters.sst2.adapter_up.bias
915
+ 06/02/2024 13:17:26 - INFO - __main__ - roberta.encoder.layer.11.attention.output.adapters.sst2.adapter_down.0.weight
916
+ 06/02/2024 13:17:26 - INFO - __main__ - roberta.encoder.layer.11.attention.output.adapters.sst2.adapter_down.0.bias
917
+ 06/02/2024 13:17:26 - INFO - __main__ - roberta.encoder.layer.11.attention.output.adapters.sst2.adapter_up.weight
918
+ 06/02/2024 13:17:26 - INFO - __main__ - roberta.encoder.layer.11.attention.output.adapters.sst2.adapter_up.bias
919
+ 06/02/2024 13:17:26 - INFO - __main__ - roberta.encoder.layer.11.output.adapters.sst2.adapter_down.0.weight
920
+ 06/02/2024 13:17:26 - INFO - __main__ - roberta.encoder.layer.11.output.adapters.sst2.adapter_down.0.bias
921
+ 06/02/2024 13:17:26 - INFO - __main__ - roberta.encoder.layer.11.output.adapters.sst2.adapter_up.weight
922
+ 06/02/2024 13:17:26 - INFO - __main__ - roberta.encoder.layer.11.output.adapters.sst2.adapter_up.bias
923
+ 06/02/2024 13:17:26 - INFO - __main__ - heads.sst2.1.weight
924
+ 06/02/2024 13:17:26 - INFO - __main__ - heads.sst2.1.bias
925
+ 06/02/2024 13:17:26 - INFO - __main__ - heads.sst2.4.weight
926
+ 06/02/2024 13:17:26 - INFO - __main__ - heads.sst2.4.bias
927
+ 06/02/2024 13:17:41 - INFO - __main__ - Sample 41905 of the training set: {'input_ids': [0, 17615, 1899, 385, 16314, 2156, 10985, 25, 7, 549, 47, 128, 548, 450, 15570, 50, 6717, 479, 1437, 2], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 0}.
928
+ 06/02/2024 13:17:41 - INFO - __main__ - Sample 7296 of the training set: {'input_ids': [0, 1250, 5, 5567, 23959, 9, 41259, 11605, 23, 63, 275, 1437, 2], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 1}.
929
+ 06/02/2024 13:17:41 - INFO - __main__ - Sample 1639 of the training set: {'input_ids': [0, 506, 11791, 98, 16894, 15, 358, 9164, 672, 14, 24, 35499, 103, 761, 9, 36302, 2821, 16235, 1437, 2], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 0}.
930
+ 06/02/2024 13:50:46 - INFO - __main__ - f_mu shape : torch.Size([872, 2])
931
+ 06/02/2024 13:50:46 - INFO - __main__ - f_var shape : torch.Size([872, 2, 2])
932
+ 06/02/2024 13:50:46 - INFO - __main__ - tensor([[-3.6560, 3.6338],
933
+ [ 1.0281, -1.3132],
934
+ [-2.8401, 2.6913],
935
+ ...,
936
+ [ 1.6054, -1.8725],
937
+ [ 2.8347, -3.0855],
938
+ [-3.1332, 2.9356]], device='cuda:0')
939
+ 06/02/2024 13:50:46 - INFO - __main__ - tensor([[[2.9387, 2.7454],
940
+ [2.7454, 2.9648]],
941
+
942
+ [[1.8944, 0.7329],
943
+ [0.7329, 1.8677]],
944
+
945
+ [[2.7856, 2.1121],
946
+ [2.1121, 2.8775]],
947
+
948
+ ...,
949
+
950
+ [[2.2402, 0.9416],
951
+ [0.9416, 2.1762]],
952
+
953
+ [[2.9031, 2.5499],
954
+ [2.5499, 2.8769]],
955
+
956
+ [[2.8708, 2.5867],
957
+ [2.5867, 2.9321]]], device='cuda:0')
958
+ 06/02/2024 13:50:46 - INFO - __main__ - ***** Completed training *****
outputs/sst2/roberta-base_adapterstrain_val_0.0001_42_8_10000/step_0/all_results_la_kron_all_homo_mc_corr_1000.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"eval_accuracy": 0.48509174311926606}
outputs/sst2/roberta-base_adapterstrain_val_0.0001_42_8_10000/step_0/eval_res_la_kron_all_homo_mc_corr_1000.json ADDED
The diff for this file is too large to render. See raw diff
 
outputs/sst2/roberta-base_adapterstrain_val_0.0001_42_8_10000/step_0/gpu_stats_la.json ADDED
@@ -0,0 +1,130 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "memory_allocated": 1031100416,
3
+ "max_memory_allocated": 5066555904,
4
+ "memory_reserved": 5360320512,
5
+ "max_memory_reserved": 15634268160,
6
+ "memory_stats": {
7
+ "active.all.allocated": 14684790,
8
+ "active.all.current": 1189,
9
+ "active.all.freed": 14683601,
10
+ "active.all.peak": 1441,
11
+ "active.large_pool.allocated": 2773752,
12
+ "active.large_pool.current": 284,
13
+ "active.large_pool.freed": 2773468,
14
+ "active.large_pool.peak": 457,
15
+ "active.small_pool.allocated": 11911038,
16
+ "active.small_pool.current": 905,
17
+ "active.small_pool.freed": 11910133,
18
+ "active.small_pool.peak": 1107,
19
+ "active_bytes.all.allocated": 12193528267264,
20
+ "active_bytes.all.current": 1031100416,
21
+ "active_bytes.all.freed": 12192497166848,
22
+ "active_bytes.all.peak": 5066555904,
23
+ "active_bytes.large_pool.allocated": 7530574299648,
24
+ "active_bytes.large_pool.current": 1013856256,
25
+ "active_bytes.large_pool.freed": 7529560443392,
26
+ "active_bytes.large_pool.peak": 5042234880,
27
+ "active_bytes.small_pool.allocated": 4662953967616,
28
+ "active_bytes.small_pool.current": 17244160,
29
+ "active_bytes.small_pool.freed": 4662936723456,
30
+ "active_bytes.small_pool.peak": 137228800,
31
+ "allocated_bytes.all.allocated": 12193528267264,
32
+ "allocated_bytes.all.current": 1031100416,
33
+ "allocated_bytes.all.freed": 12192497166848,
34
+ "allocated_bytes.all.peak": 5066555904,
35
+ "allocated_bytes.large_pool.allocated": 7530574299648,
36
+ "allocated_bytes.large_pool.current": 1013856256,
37
+ "allocated_bytes.large_pool.freed": 7529560443392,
38
+ "allocated_bytes.large_pool.peak": 5042234880,
39
+ "allocated_bytes.small_pool.allocated": 4662953967616,
40
+ "allocated_bytes.small_pool.current": 17244160,
41
+ "allocated_bytes.small_pool.freed": 4662936723456,
42
+ "allocated_bytes.small_pool.peak": 137228800,
43
+ "allocation.all.allocated": 14684790,
44
+ "allocation.all.current": 1189,
45
+ "allocation.all.freed": 14683601,
46
+ "allocation.all.peak": 1441,
47
+ "allocation.large_pool.allocated": 2773752,
48
+ "allocation.large_pool.current": 284,
49
+ "allocation.large_pool.freed": 2773468,
50
+ "allocation.large_pool.peak": 457,
51
+ "allocation.small_pool.allocated": 11911038,
52
+ "allocation.small_pool.current": 905,
53
+ "allocation.small_pool.freed": 11910133,
54
+ "allocation.small_pool.peak": 1107,
55
+ "inactive_split.all.allocated": 7202822,
56
+ "inactive_split.all.current": 159,
57
+ "inactive_split.all.freed": 7202663,
58
+ "inactive_split.all.peak": 244,
59
+ "inactive_split.large_pool.allocated": 1392589,
60
+ "inactive_split.large_pool.current": 49,
61
+ "inactive_split.large_pool.freed": 1392540,
62
+ "inactive_split.large_pool.peak": 101,
63
+ "inactive_split.small_pool.allocated": 5810233,
64
+ "inactive_split.small_pool.current": 110,
65
+ "inactive_split.small_pool.freed": 5810123,
66
+ "inactive_split.small_pool.peak": 153,
67
+ "inactive_split_bytes.all.allocated": 14528689113600,
68
+ "inactive_split_bytes.all.current": 166373376,
69
+ "inactive_split_bytes.all.freed": 14528522740224,
70
+ "inactive_split_bytes.all.peak": 1919711232,
71
+ "inactive_split_bytes.large_pool.allocated": 9696670192640,
72
+ "inactive_split_bytes.large_pool.current": 106022912,
73
+ "inactive_split_bytes.large_pool.freed": 9696564169728,
74
+ "inactive_split_bytes.large_pool.peak": 1888642048,
75
+ "inactive_split_bytes.small_pool.allocated": 4832018920960,
76
+ "inactive_split_bytes.small_pool.current": 60350464,
77
+ "inactive_split_bytes.small_pool.freed": 4831958570496,
78
+ "inactive_split_bytes.small_pool.peak": 110444544,
79
+ "max_split_size": -1,
80
+ "num_alloc_retries": 1,
81
+ "num_device_alloc": 471,
82
+ "num_device_free": 302,
83
+ "num_ooms": 0,
84
+ "num_sync_all_streams": 2,
85
+ "oversize_allocations.allocated": 0,
86
+ "oversize_allocations.current": 0,
87
+ "oversize_allocations.freed": 0,
88
+ "oversize_allocations.peak": 0,
89
+ "oversize_segments.allocated": 0,
90
+ "oversize_segments.current": 0,
91
+ "oversize_segments.freed": 0,
92
+ "oversize_segments.peak": 0,
93
+ "requested_bytes.all.allocated": 11934048984819,
94
+ "requested_bytes.all.current": 1027774812,
95
+ "requested_bytes.all.freed": 11933021210007,
96
+ "requested_bytes.all.peak": 5016531548,
97
+ "requested_bytes.large_pool.allocated": 7271698310208,
98
+ "requested_bytes.large_pool.current": 1010670088,
99
+ "requested_bytes.large_pool.freed": 7270687640120,
100
+ "requested_bytes.large_pool.peak": 4992349256,
101
+ "requested_bytes.small_pool.allocated": 4662350674611,
102
+ "requested_bytes.small_pool.current": 17104724,
103
+ "requested_bytes.small_pool.freed": 4662333569887,
104
+ "requested_bytes.small_pool.peak": 137155084,
105
+ "reserved_bytes.all.allocated": 17515413504,
106
+ "reserved_bytes.all.current": 5360320512,
107
+ "reserved_bytes.all.freed": 12155092992,
108
+ "reserved_bytes.all.peak": 15634268160,
109
+ "reserved_bytes.large_pool.allocated": 17211326464,
110
+ "reserved_bytes.large_pool.current": 5221908480,
111
+ "reserved_bytes.large_pool.freed": 11989417984,
112
+ "reserved_bytes.large_pool.peak": 15487467520,
113
+ "reserved_bytes.small_pool.allocated": 304087040,
114
+ "reserved_bytes.small_pool.current": 138412032,
115
+ "reserved_bytes.small_pool.freed": 165675008,
116
+ "reserved_bytes.small_pool.peak": 146800640,
117
+ "segment.all.allocated": 471,
118
+ "segment.all.current": 169,
119
+ "segment.all.freed": 302,
120
+ "segment.all.peak": 366,
121
+ "segment.large_pool.allocated": 326,
122
+ "segment.large_pool.current": 103,
123
+ "segment.large_pool.freed": 223,
124
+ "segment.large_pool.peak": 296,
125
+ "segment.small_pool.allocated": 145,
126
+ "segment.small_pool.current": 66,
127
+ "segment.small_pool.freed": 79,
128
+ "segment.small_pool.peak": 70
129
+ }
130
+ }
outputs/sst2/roberta-base_adapterstrain_val_0.0001_42_8_10000/step_1999/all_results_la_kron_all_homo_mc_corr_1000.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"eval_accuracy": 0.9243119266055045}
outputs/sst2/roberta-base_adapterstrain_val_0.0001_42_8_10000/step_1999/eval_res_la_kron_all_homo_mc_corr_1000.json ADDED
The diff for this file is too large to render. See raw diff
 
outputs/sst2/roberta-base_adapterstrain_val_0.0001_42_8_10000/step_1999/gpu_stats_la.json ADDED
@@ -0,0 +1,130 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "memory_allocated": 1031100416,
3
+ "max_memory_allocated": 5067507712,
4
+ "memory_reserved": 5360320512,
5
+ "max_memory_reserved": 15634268160,
6
+ "memory_stats": {
7
+ "active.all.allocated": 29369628,
8
+ "active.all.current": 1189,
9
+ "active.all.freed": 29368439,
10
+ "active.all.peak": 1441,
11
+ "active.large_pool.allocated": 5547502,
12
+ "active.large_pool.current": 284,
13
+ "active.large_pool.freed": 5547218,
14
+ "active.large_pool.peak": 457,
15
+ "active.small_pool.allocated": 23822126,
16
+ "active.small_pool.current": 905,
17
+ "active.small_pool.freed": 23821221,
18
+ "active.small_pool.peak": 1107,
19
+ "active_bytes.all.allocated": 24378448427008,
20
+ "active_bytes.all.current": 1031100416,
21
+ "active_bytes.all.freed": 24377417326592,
22
+ "active_bytes.all.peak": 5067507712,
23
+ "active_bytes.large_pool.allocated": 15052540465152,
24
+ "active_bytes.large_pool.current": 1013856256,
25
+ "active_bytes.large_pool.freed": 15051526608896,
26
+ "active_bytes.large_pool.peak": 5043186688,
27
+ "active_bytes.small_pool.allocated": 9325907961856,
28
+ "active_bytes.small_pool.current": 17244160,
29
+ "active_bytes.small_pool.freed": 9325890717696,
30
+ "active_bytes.small_pool.peak": 137228800,
31
+ "allocated_bytes.all.allocated": 24378448427008,
32
+ "allocated_bytes.all.current": 1031100416,
33
+ "allocated_bytes.all.freed": 24377417326592,
34
+ "allocated_bytes.all.peak": 5067507712,
35
+ "allocated_bytes.large_pool.allocated": 15052540465152,
36
+ "allocated_bytes.large_pool.current": 1013856256,
37
+ "allocated_bytes.large_pool.freed": 15051526608896,
38
+ "allocated_bytes.large_pool.peak": 5043186688,
39
+ "allocated_bytes.small_pool.allocated": 9325907961856,
40
+ "allocated_bytes.small_pool.current": 17244160,
41
+ "allocated_bytes.small_pool.freed": 9325890717696,
42
+ "allocated_bytes.small_pool.peak": 137228800,
43
+ "allocation.all.allocated": 29369628,
44
+ "allocation.all.current": 1189,
45
+ "allocation.all.freed": 29368439,
46
+ "allocation.all.peak": 1441,
47
+ "allocation.large_pool.allocated": 5547502,
48
+ "allocation.large_pool.current": 284,
49
+ "allocation.large_pool.freed": 5547218,
50
+ "allocation.large_pool.peak": 457,
51
+ "allocation.small_pool.allocated": 23822126,
52
+ "allocation.small_pool.current": 905,
53
+ "allocation.small_pool.freed": 23821221,
54
+ "allocation.small_pool.peak": 1107,
55
+ "inactive_split.all.allocated": 14471885,
56
+ "inactive_split.all.current": 156,
57
+ "inactive_split.all.freed": 14471729,
58
+ "inactive_split.all.peak": 244,
59
+ "inactive_split.large_pool.allocated": 2772493,
60
+ "inactive_split.large_pool.current": 54,
61
+ "inactive_split.large_pool.freed": 2772439,
62
+ "inactive_split.large_pool.peak": 105,
63
+ "inactive_split.small_pool.allocated": 11699392,
64
+ "inactive_split.small_pool.current": 102,
65
+ "inactive_split.small_pool.freed": 11699290,
66
+ "inactive_split.small_pool.peak": 161,
67
+ "inactive_split_bytes.all.allocated": 29045068343808,
68
+ "inactive_split_bytes.all.current": 202024960,
69
+ "inactive_split_bytes.all.freed": 29044866318848,
70
+ "inactive_split_bytes.all.peak": 1976334336,
71
+ "inactive_split_bytes.large_pool.allocated": 19373720484864,
72
+ "inactive_split_bytes.large_pool.current": 147965952,
73
+ "inactive_split_bytes.large_pool.freed": 19373572518912,
74
+ "inactive_split_bytes.large_pool.peak": 1943168000,
75
+ "inactive_split_bytes.small_pool.allocated": 9671347858944,
76
+ "inactive_split_bytes.small_pool.current": 54059008,
77
+ "inactive_split_bytes.small_pool.freed": 9671293799936,
78
+ "inactive_split_bytes.small_pool.peak": 110444544,
79
+ "max_split_size": -1,
80
+ "num_alloc_retries": 2,
81
+ "num_device_alloc": 882,
82
+ "num_device_free": 713,
83
+ "num_ooms": 0,
84
+ "num_sync_all_streams": 5,
85
+ "oversize_allocations.allocated": 0,
86
+ "oversize_allocations.current": 0,
87
+ "oversize_allocations.freed": 0,
88
+ "oversize_allocations.peak": 0,
89
+ "oversize_segments.allocated": 0,
90
+ "oversize_segments.current": 0,
91
+ "oversize_segments.freed": 0,
92
+ "oversize_segments.peak": 0,
93
+ "requested_bytes.all.allocated": 23868080933314,
94
+ "requested_bytes.all.current": 1027774812,
95
+ "requested_bytes.all.freed": 23867053158502,
96
+ "requested_bytes.all.peak": 5016531548,
97
+ "requested_bytes.large_pool.allocated": 14543379581056,
98
+ "requested_bytes.large_pool.current": 1010670088,
99
+ "requested_bytes.large_pool.freed": 14542368910968,
100
+ "requested_bytes.large_pool.peak": 4992349256,
101
+ "requested_bytes.small_pool.allocated": 9324701352258,
102
+ "requested_bytes.small_pool.current": 17104724,
103
+ "requested_bytes.small_pool.freed": 9324684247534,
104
+ "requested_bytes.small_pool.peak": 137155084,
105
+ "reserved_bytes.all.allocated": 34204549120,
106
+ "reserved_bytes.all.current": 5360320512,
107
+ "reserved_bytes.all.freed": 28844228608,
108
+ "reserved_bytes.all.peak": 15634268160,
109
+ "reserved_bytes.large_pool.allocated": 33659289600,
110
+ "reserved_bytes.large_pool.current": 5221908480,
111
+ "reserved_bytes.large_pool.freed": 28437381120,
112
+ "reserved_bytes.large_pool.peak": 15487467520,
113
+ "reserved_bytes.small_pool.allocated": 545259520,
114
+ "reserved_bytes.small_pool.current": 138412032,
115
+ "reserved_bytes.small_pool.freed": 406847488,
116
+ "reserved_bytes.small_pool.peak": 146800640,
117
+ "segment.all.allocated": 882,
118
+ "segment.all.current": 169,
119
+ "segment.all.freed": 713,
120
+ "segment.all.peak": 366,
121
+ "segment.large_pool.allocated": 622,
122
+ "segment.large_pool.current": 103,
123
+ "segment.large_pool.freed": 519,
124
+ "segment.large_pool.peak": 296,
125
+ "segment.small_pool.allocated": 260,
126
+ "segment.small_pool.current": 66,
127
+ "segment.small_pool.freed": 194,
128
+ "segment.small_pool.peak": 70
129
+ }
130
+ }
outputs/sst2/roberta-base_adapterstrain_val_0.0001_42_8_10000/step_3999/all_results_la_kron_all_homo_mc_corr_1000.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"eval_accuracy": 0.9311926605504587}
outputs/sst2/roberta-base_adapterstrain_val_0.0001_42_8_10000/step_3999/eval_res_la_kron_all_homo_mc_corr_1000.json ADDED
The diff for this file is too large to render. See raw diff
 
outputs/sst2/roberta-base_adapterstrain_val_0.0001_42_8_10000/step_3999/gpu_stats_la.json ADDED
@@ -0,0 +1,130 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "memory_allocated": 1031133184,
3
+ "max_memory_allocated": 5067620352,
4
+ "memory_reserved": 5360320512,
5
+ "max_memory_reserved": 15634268160,
6
+ "memory_stats": {
7
+ "active.all.allocated": 44054516,
8
+ "active.all.current": 1189,
9
+ "active.all.freed": 44053327,
10
+ "active.all.peak": 1441,
11
+ "active.large_pool.allocated": 8321252,
12
+ "active.large_pool.current": 284,
13
+ "active.large_pool.freed": 8320968,
14
+ "active.large_pool.peak": 482,
15
+ "active.small_pool.allocated": 35733264,
16
+ "active.small_pool.current": 905,
17
+ "active.small_pool.freed": 35732359,
18
+ "active.small_pool.peak": 1107,
19
+ "active_bytes.all.allocated": 36577830414848,
20
+ "active_bytes.all.current": 1031133184,
21
+ "active_bytes.all.freed": 36576799281664,
22
+ "active_bytes.all.peak": 5067620352,
23
+ "active_bytes.large_pool.allocated": 22588968432128,
24
+ "active_bytes.large_pool.current": 1013889024,
25
+ "active_bytes.large_pool.freed": 22587954543104,
26
+ "active_bytes.large_pool.peak": 5043299328,
27
+ "active_bytes.small_pool.allocated": 13988861982720,
28
+ "active_bytes.small_pool.current": 17244160,
29
+ "active_bytes.small_pool.freed": 13988844738560,
30
+ "active_bytes.small_pool.peak": 137228800,
31
+ "allocated_bytes.all.allocated": 36577830414848,
32
+ "allocated_bytes.all.current": 1031133184,
33
+ "allocated_bytes.all.freed": 36576799281664,
34
+ "allocated_bytes.all.peak": 5067620352,
35
+ "allocated_bytes.large_pool.allocated": 22588968432128,
36
+ "allocated_bytes.large_pool.current": 1013889024,
37
+ "allocated_bytes.large_pool.freed": 22587954543104,
38
+ "allocated_bytes.large_pool.peak": 5043299328,
39
+ "allocated_bytes.small_pool.allocated": 13988861982720,
40
+ "allocated_bytes.small_pool.current": 17244160,
41
+ "allocated_bytes.small_pool.freed": 13988844738560,
42
+ "allocated_bytes.small_pool.peak": 137228800,
43
+ "allocation.all.allocated": 44054516,
44
+ "allocation.all.current": 1189,
45
+ "allocation.all.freed": 44053327,
46
+ "allocation.all.peak": 1441,
47
+ "allocation.large_pool.allocated": 8321252,
48
+ "allocation.large_pool.current": 284,
49
+ "allocation.large_pool.freed": 8320968,
50
+ "allocation.large_pool.peak": 482,
51
+ "allocation.small_pool.allocated": 35733264,
52
+ "allocation.small_pool.current": 905,
53
+ "allocation.small_pool.freed": 35732359,
54
+ "allocation.small_pool.peak": 1107,
55
+ "inactive_split.all.allocated": 21715349,
56
+ "inactive_split.all.current": 183,
57
+ "inactive_split.all.freed": 21715166,
58
+ "inactive_split.all.peak": 244,
59
+ "inactive_split.large_pool.allocated": 4140876,
60
+ "inactive_split.large_pool.current": 52,
61
+ "inactive_split.large_pool.freed": 4140824,
62
+ "inactive_split.large_pool.peak": 105,
63
+ "inactive_split.small_pool.allocated": 17574473,
64
+ "inactive_split.small_pool.current": 131,
65
+ "inactive_split.small_pool.freed": 17574342,
66
+ "inactive_split.small_pool.peak": 161,
67
+ "inactive_split_bytes.all.allocated": 43064789464064,
68
+ "inactive_split_bytes.all.current": 189409280,
69
+ "inactive_split_bytes.all.freed": 43064600054784,
70
+ "inactive_split_bytes.all.peak": 1976334336,
71
+ "inactive_split_bytes.large_pool.allocated": 28631634917888,
72
+ "inactive_split_bytes.large_pool.current": 126961664,
73
+ "inactive_split_bytes.large_pool.freed": 28631507956224,
74
+ "inactive_split_bytes.large_pool.peak": 1943168000,
75
+ "inactive_split_bytes.small_pool.allocated": 14433154546176,
76
+ "inactive_split_bytes.small_pool.current": 62447616,
77
+ "inactive_split_bytes.small_pool.freed": 14433092098560,
78
+ "inactive_split_bytes.small_pool.peak": 110444544,
79
+ "max_split_size": -1,
80
+ "num_alloc_retries": 3,
81
+ "num_device_alloc": 1324,
82
+ "num_device_free": 1155,
83
+ "num_ooms": 0,
84
+ "num_sync_all_streams": 8,
85
+ "oversize_allocations.allocated": 0,
86
+ "oversize_allocations.current": 0,
87
+ "oversize_allocations.freed": 0,
88
+ "oversize_allocations.peak": 0,
89
+ "oversize_segments.allocated": 0,
90
+ "oversize_segments.current": 0,
91
+ "oversize_segments.freed": 0,
92
+ "oversize_segments.peak": 0,
93
+ "requested_bytes.all.allocated": 35802112884845,
94
+ "requested_bytes.all.current": 1027774812,
95
+ "requested_bytes.all.freed": 35801085110033,
96
+ "requested_bytes.all.peak": 5016531548,
97
+ "requested_bytes.large_pool.allocated": 21815060851904,
98
+ "requested_bytes.large_pool.current": 1010670088,
99
+ "requested_bytes.large_pool.freed": 21814050181816,
100
+ "requested_bytes.large_pool.peak": 4992349256,
101
+ "requested_bytes.small_pool.allocated": 13987052032941,
102
+ "requested_bytes.small_pool.current": 17104724,
103
+ "requested_bytes.small_pool.freed": 13987034928217,
104
+ "requested_bytes.small_pool.peak": 137155084,
105
+ "reserved_bytes.all.allocated": 51564773376,
106
+ "reserved_bytes.all.current": 5360320512,
107
+ "reserved_bytes.all.freed": 46204452864,
108
+ "reserved_bytes.all.peak": 15634268160,
109
+ "reserved_bytes.large_pool.allocated": 50765758464,
110
+ "reserved_bytes.large_pool.current": 5221908480,
111
+ "reserved_bytes.large_pool.freed": 45543849984,
112
+ "reserved_bytes.large_pool.peak": 15487467520,
113
+ "reserved_bytes.small_pool.allocated": 799014912,
114
+ "reserved_bytes.small_pool.current": 138412032,
115
+ "reserved_bytes.small_pool.freed": 660602880,
116
+ "reserved_bytes.small_pool.peak": 146800640,
117
+ "segment.all.allocated": 1324,
118
+ "segment.all.current": 169,
119
+ "segment.all.freed": 1155,
120
+ "segment.all.peak": 366,
121
+ "segment.large_pool.allocated": 943,
122
+ "segment.large_pool.current": 103,
123
+ "segment.large_pool.freed": 840,
124
+ "segment.large_pool.peak": 296,
125
+ "segment.small_pool.allocated": 381,
126
+ "segment.small_pool.current": 66,
127
+ "segment.small_pool.freed": 315,
128
+ "segment.small_pool.peak": 70
129
+ }
130
+ }
outputs/sst2/roberta-base_adapterstrain_val_0.0001_42_8_10000/step_5999/all_results_la_kron_all_homo_mc_corr_1000.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"eval_accuracy": 0.9323394495412844}
outputs/sst2/roberta-base_adapterstrain_val_0.0001_42_8_10000/step_5999/eval_res_la_kron_all_homo_mc_corr_1000.json ADDED
The diff for this file is too large to render. See raw diff
 
outputs/sst2/roberta-base_adapterstrain_val_0.0001_42_8_10000/step_5999/gpu_stats_la.json ADDED
@@ -0,0 +1,130 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "memory_allocated": 1031204864,
3
+ "max_memory_allocated": 5067620352,
4
+ "memory_reserved": 5360320512,
5
+ "max_memory_reserved": 15634268160,
6
+ "memory_stats": {
7
+ "active.all.allocated": 58739454,
8
+ "active.all.current": 1189,
9
+ "active.all.freed": 58738265,
10
+ "active.all.peak": 1441,
11
+ "active.large_pool.allocated": 11095002,
12
+ "active.large_pool.current": 284,
13
+ "active.large_pool.freed": 11094718,
14
+ "active.large_pool.peak": 482,
15
+ "active.small_pool.allocated": 47644452,
16
+ "active.small_pool.current": 905,
17
+ "active.small_pool.freed": 47643547,
18
+ "active.small_pool.peak": 1107,
19
+ "active_bytes.all.allocated": 48769671589888,
20
+ "active_bytes.all.current": 1031204864,
21
+ "active_bytes.all.freed": 48768640385024,
22
+ "active_bytes.all.peak": 5067620352,
23
+ "active_bytes.large_pool.allocated": 30117855559680,
24
+ "active_bytes.large_pool.current": 1013960704,
25
+ "active_bytes.large_pool.freed": 30116841598976,
26
+ "active_bytes.large_pool.peak": 5043299328,
27
+ "active_bytes.small_pool.allocated": 18651816030208,
28
+ "active_bytes.small_pool.current": 17244160,
29
+ "active_bytes.small_pool.freed": 18651798786048,
30
+ "active_bytes.small_pool.peak": 137228800,
31
+ "allocated_bytes.all.allocated": 48769671589888,
32
+ "allocated_bytes.all.current": 1031204864,
33
+ "allocated_bytes.all.freed": 48768640385024,
34
+ "allocated_bytes.all.peak": 5067620352,
35
+ "allocated_bytes.large_pool.allocated": 30117855559680,
36
+ "allocated_bytes.large_pool.current": 1013960704,
37
+ "allocated_bytes.large_pool.freed": 30116841598976,
38
+ "allocated_bytes.large_pool.peak": 5043299328,
39
+ "allocated_bytes.small_pool.allocated": 18651816030208,
40
+ "allocated_bytes.small_pool.current": 17244160,
41
+ "allocated_bytes.small_pool.freed": 18651798786048,
42
+ "allocated_bytes.small_pool.peak": 137228800,
43
+ "allocation.all.allocated": 58739454,
44
+ "allocation.all.current": 1189,
45
+ "allocation.all.freed": 58738265,
46
+ "allocation.all.peak": 1441,
47
+ "allocation.large_pool.allocated": 11095002,
48
+ "allocation.large_pool.current": 284,
49
+ "allocation.large_pool.freed": 11094718,
50
+ "allocation.large_pool.peak": 482,
51
+ "allocation.small_pool.allocated": 47644452,
52
+ "allocation.small_pool.current": 905,
53
+ "allocation.small_pool.freed": 47643547,
54
+ "allocation.small_pool.peak": 1107,
55
+ "inactive_split.all.allocated": 28758130,
56
+ "inactive_split.all.current": 163,
57
+ "inactive_split.all.freed": 28757967,
58
+ "inactive_split.all.peak": 248,
59
+ "inactive_split.large_pool.allocated": 5497315,
60
+ "inactive_split.large_pool.current": 50,
61
+ "inactive_split.large_pool.freed": 5497265,
62
+ "inactive_split.large_pool.peak": 105,
63
+ "inactive_split.small_pool.allocated": 23260815,
64
+ "inactive_split.small_pool.current": 113,
65
+ "inactive_split.small_pool.freed": 23260702,
66
+ "inactive_split.small_pool.peak": 213,
67
+ "inactive_split_bytes.all.allocated": 56505825048576,
68
+ "inactive_split_bytes.all.current": 174657536,
69
+ "inactive_split_bytes.all.freed": 56505650391040,
70
+ "inactive_split_bytes.all.peak": 1976334336,
71
+ "inactive_split_bytes.large_pool.allocated": 37367989028352,
72
+ "inactive_split_bytes.large_pool.current": 105918464,
73
+ "inactive_split_bytes.large_pool.freed": 37367883109888,
74
+ "inactive_split_bytes.large_pool.peak": 1943168000,
75
+ "inactive_split_bytes.small_pool.allocated": 19137836020224,
76
+ "inactive_split_bytes.small_pool.current": 68739072,
77
+ "inactive_split_bytes.small_pool.freed": 19137767281152,
78
+ "inactive_split_bytes.small_pool.peak": 110444544,
79
+ "max_split_size": -1,
80
+ "num_alloc_retries": 4,
81
+ "num_device_alloc": 1745,
82
+ "num_device_free": 1576,
83
+ "num_ooms": 0,
84
+ "num_sync_all_streams": 11,
85
+ "oversize_allocations.allocated": 0,
86
+ "oversize_allocations.current": 0,
87
+ "oversize_allocations.freed": 0,
88
+ "oversize_allocations.peak": 0,
89
+ "oversize_segments.allocated": 0,
90
+ "oversize_segments.current": 0,
91
+ "oversize_segments.freed": 0,
92
+ "oversize_segments.peak": 0,
93
+ "requested_bytes.all.allocated": 47736144839412,
94
+ "requested_bytes.all.current": 1027774812,
95
+ "requested_bytes.all.freed": 47735117064600,
96
+ "requested_bytes.all.peak": 5016531548,
97
+ "requested_bytes.large_pool.allocated": 29086742122752,
98
+ "requested_bytes.large_pool.current": 1010670088,
99
+ "requested_bytes.large_pool.freed": 29085731452664,
100
+ "requested_bytes.large_pool.peak": 4992349256,
101
+ "requested_bytes.small_pool.allocated": 18649402716660,
102
+ "requested_bytes.small_pool.current": 17104724,
103
+ "requested_bytes.small_pool.freed": 18649385611936,
104
+ "requested_bytes.small_pool.peak": 137155084,
105
+ "reserved_bytes.all.allocated": 68937580544,
106
+ "reserved_bytes.all.current": 5360320512,
107
+ "reserved_bytes.all.freed": 63577260032,
108
+ "reserved_bytes.all.peak": 15634268160,
109
+ "reserved_bytes.large_pool.allocated": 67935141888,
110
+ "reserved_bytes.large_pool.current": 5221908480,
111
+ "reserved_bytes.large_pool.freed": 62713233408,
112
+ "reserved_bytes.large_pool.peak": 15487467520,
113
+ "reserved_bytes.small_pool.allocated": 1002438656,
114
+ "reserved_bytes.small_pool.current": 138412032,
115
+ "reserved_bytes.small_pool.freed": 864026624,
116
+ "reserved_bytes.small_pool.peak": 146800640,
117
+ "segment.all.allocated": 1745,
118
+ "segment.all.current": 169,
119
+ "segment.all.freed": 1576,
120
+ "segment.all.peak": 366,
121
+ "segment.large_pool.allocated": 1267,
122
+ "segment.large_pool.current": 103,
123
+ "segment.large_pool.freed": 1164,
124
+ "segment.large_pool.peak": 296,
125
+ "segment.small_pool.allocated": 478,
126
+ "segment.small_pool.current": 66,
127
+ "segment.small_pool.freed": 412,
128
+ "segment.small_pool.peak": 70
129
+ }
130
+ }
outputs/sst2/roberta-base_adapterstrain_val_0.0001_42_8_10000/step_7999/all_results_la_kron_all_homo_mc_corr_1000.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"eval_accuracy": 0.9357798165137615}
outputs/sst2/roberta-base_adapterstrain_val_0.0001_42_8_10000/step_7999/eval_res_la_kron_all_homo_mc_corr_1000.json ADDED
The diff for this file is too large to render. See raw diff
 
outputs/sst2/roberta-base_adapterstrain_val_0.0001_42_8_10000/step_7999/gpu_stats_la.json ADDED
@@ -0,0 +1,130 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "memory_allocated": 1031221248,
3
+ "max_memory_allocated": 5067620352,
4
+ "memory_reserved": 5385486336,
5
+ "max_memory_reserved": 15655239680,
6
+ "memory_stats": {
7
+ "active.all.allocated": 73424442,
8
+ "active.all.current": 1189,
9
+ "active.all.freed": 73423253,
10
+ "active.all.peak": 1441,
11
+ "active.large_pool.allocated": 13868752,
12
+ "active.large_pool.current": 284,
13
+ "active.large_pool.freed": 13868468,
14
+ "active.large_pool.peak": 482,
15
+ "active.small_pool.allocated": 59555690,
16
+ "active.small_pool.current": 905,
17
+ "active.small_pool.freed": 59554785,
18
+ "active.small_pool.peak": 1107,
19
+ "active_bytes.all.allocated": 60947820335104,
20
+ "active_bytes.all.current": 1031221248,
21
+ "active_bytes.all.freed": 60946789113856,
22
+ "active_bytes.all.peak": 5067620352,
23
+ "active_bytes.large_pool.allocated": 37633050230784,
24
+ "active_bytes.large_pool.current": 1013977088,
25
+ "active_bytes.large_pool.freed": 37632036253696,
26
+ "active_bytes.large_pool.peak": 5043299328,
27
+ "active_bytes.small_pool.allocated": 23314770104320,
28
+ "active_bytes.small_pool.current": 17244160,
29
+ "active_bytes.small_pool.freed": 23314752860160,
30
+ "active_bytes.small_pool.peak": 137228800,
31
+ "allocated_bytes.all.allocated": 60947820335104,
32
+ "allocated_bytes.all.current": 1031221248,
33
+ "allocated_bytes.all.freed": 60946789113856,
34
+ "allocated_bytes.all.peak": 5067620352,
35
+ "allocated_bytes.large_pool.allocated": 37633050230784,
36
+ "allocated_bytes.large_pool.current": 1013977088,
37
+ "allocated_bytes.large_pool.freed": 37632036253696,
38
+ "allocated_bytes.large_pool.peak": 5043299328,
39
+ "allocated_bytes.small_pool.allocated": 23314770104320,
40
+ "allocated_bytes.small_pool.current": 17244160,
41
+ "allocated_bytes.small_pool.freed": 23314752860160,
42
+ "allocated_bytes.small_pool.peak": 137228800,
43
+ "allocation.all.allocated": 73424442,
44
+ "allocation.all.current": 1189,
45
+ "allocation.all.freed": 73423253,
46
+ "allocation.all.peak": 1441,
47
+ "allocation.large_pool.allocated": 13868752,
48
+ "allocation.large_pool.current": 284,
49
+ "allocation.large_pool.freed": 13868468,
50
+ "allocation.large_pool.peak": 482,
51
+ "allocation.small_pool.allocated": 59555690,
52
+ "allocation.small_pool.current": 905,
53
+ "allocation.small_pool.freed": 59554785,
54
+ "allocation.small_pool.peak": 1107,
55
+ "inactive_split.all.allocated": 35907759,
56
+ "inactive_split.all.current": 173,
57
+ "inactive_split.all.freed": 35907586,
58
+ "inactive_split.all.peak": 248,
59
+ "inactive_split.large_pool.allocated": 6841602,
60
+ "inactive_split.large_pool.current": 53,
61
+ "inactive_split.large_pool.freed": 6841549,
62
+ "inactive_split.large_pool.peak": 105,
63
+ "inactive_split.small_pool.allocated": 29066157,
64
+ "inactive_split.small_pool.current": 120,
65
+ "inactive_split.small_pool.freed": 29066037,
66
+ "inactive_split.small_pool.peak": 213,
67
+ "inactive_split_bytes.all.allocated": 70103403785728,
68
+ "inactive_split_bytes.all.current": 195612672,
69
+ "inactive_split_bytes.all.freed": 70103208173056,
70
+ "inactive_split_bytes.all.peak": 1976334336,
71
+ "inactive_split_bytes.large_pool.allocated": 46254990833152,
72
+ "inactive_split_bytes.large_pool.current": 126873600,
73
+ "inactive_split_bytes.large_pool.freed": 46254863959552,
74
+ "inactive_split_bytes.large_pool.peak": 1943168000,
75
+ "inactive_split_bytes.small_pool.allocated": 23848412952576,
76
+ "inactive_split_bytes.small_pool.current": 68739072,
77
+ "inactive_split_bytes.small_pool.freed": 23848344213504,
78
+ "inactive_split_bytes.small_pool.peak": 110444544,
79
+ "max_split_size": -1,
80
+ "num_alloc_retries": 5,
81
+ "num_device_alloc": 2171,
82
+ "num_device_free": 1999,
83
+ "num_ooms": 0,
84
+ "num_sync_all_streams": 14,
85
+ "oversize_allocations.allocated": 0,
86
+ "oversize_allocations.current": 0,
87
+ "oversize_allocations.freed": 0,
88
+ "oversize_allocations.peak": 0,
89
+ "oversize_segments.allocated": 0,
90
+ "oversize_segments.current": 0,
91
+ "oversize_segments.freed": 0,
92
+ "oversize_segments.peak": 0,
93
+ "requested_bytes.all.allocated": 59670176797015,
94
+ "requested_bytes.all.current": 1027774812,
95
+ "requested_bytes.all.freed": 59669149022203,
96
+ "requested_bytes.all.peak": 5016531548,
97
+ "requested_bytes.large_pool.allocated": 36358423393600,
98
+ "requested_bytes.large_pool.current": 1010670088,
99
+ "requested_bytes.large_pool.freed": 36357412723512,
100
+ "requested_bytes.large_pool.peak": 4992349256,
101
+ "requested_bytes.small_pool.allocated": 23311753403415,
102
+ "requested_bytes.small_pool.current": 17104724,
103
+ "requested_bytes.small_pool.freed": 23311736298691,
104
+ "requested_bytes.small_pool.peak": 137155084,
105
+ "reserved_bytes.all.allocated": 86283124736,
106
+ "reserved_bytes.all.current": 5385486336,
107
+ "reserved_bytes.all.freed": 80897638400,
108
+ "reserved_bytes.all.peak": 15655239680,
109
+ "reserved_bytes.large_pool.allocated": 85062582272,
110
+ "reserved_bytes.large_pool.current": 5242880000,
111
+ "reserved_bytes.large_pool.freed": 79819702272,
112
+ "reserved_bytes.large_pool.peak": 15508439040,
113
+ "reserved_bytes.small_pool.allocated": 1220542464,
114
+ "reserved_bytes.small_pool.current": 142606336,
115
+ "reserved_bytes.small_pool.freed": 1077936128,
116
+ "reserved_bytes.small_pool.peak": 146800640,
117
+ "segment.all.allocated": 2171,
118
+ "segment.all.current": 172,
119
+ "segment.all.freed": 1999,
120
+ "segment.all.peak": 367,
121
+ "segment.large_pool.allocated": 1589,
122
+ "segment.large_pool.current": 104,
123
+ "segment.large_pool.freed": 1485,
124
+ "segment.large_pool.peak": 297,
125
+ "segment.small_pool.allocated": 582,
126
+ "segment.small_pool.current": 68,
127
+ "segment.small_pool.freed": 514,
128
+ "segment.small_pool.peak": 70
129
+ }
130
+ }
outputs/sst2/roberta-base_adapterstrain_val_0.0001_42_8_10000/step_9999/all_results_la_kron_all_homo_mc_corr_1000.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"eval_accuracy": 0.9346330275229358}
outputs/sst2/roberta-base_adapterstrain_val_0.0001_42_8_10000/step_9999/eval_res_la_kron_all_homo_mc_corr_1000.json ADDED
The diff for this file is too large to render. See raw diff
 
outputs/sst2/roberta-base_adapterstrain_val_0.0001_42_8_10000/step_9999/gpu_stats_la.json ADDED
@@ -0,0 +1,130 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "memory_allocated": 1031100416,
3
+ "max_memory_allocated": 5067620352,
4
+ "memory_reserved": 5366611968,
5
+ "max_memory_reserved": 15655239680,
6
+ "memory_stats": {
7
+ "active.all.allocated": 88109480,
8
+ "active.all.current": 1189,
9
+ "active.all.freed": 88108291,
10
+ "active.all.peak": 1441,
11
+ "active.large_pool.allocated": 16642502,
12
+ "active.large_pool.current": 284,
13
+ "active.large_pool.freed": 16642218,
14
+ "active.large_pool.peak": 482,
15
+ "active.small_pool.allocated": 71466978,
16
+ "active.small_pool.current": 905,
17
+ "active.small_pool.freed": 71466073,
18
+ "active.small_pool.peak": 1107,
19
+ "active_bytes.all.allocated": 73133448412672,
20
+ "active_bytes.all.current": 1031100416,
21
+ "active_bytes.all.freed": 73132417312256,
22
+ "active_bytes.all.peak": 5067620352,
23
+ "active_bytes.large_pool.allocated": 45155724207616,
24
+ "active_bytes.large_pool.current": 1013856256,
25
+ "active_bytes.large_pool.freed": 45154710351360,
26
+ "active_bytes.large_pool.peak": 5043299328,
27
+ "active_bytes.small_pool.allocated": 27977724205056,
28
+ "active_bytes.small_pool.current": 17244160,
29
+ "active_bytes.small_pool.freed": 27977706960896,
30
+ "active_bytes.small_pool.peak": 137228800,
31
+ "allocated_bytes.all.allocated": 73133448412672,
32
+ "allocated_bytes.all.current": 1031100416,
33
+ "allocated_bytes.all.freed": 73132417312256,
34
+ "allocated_bytes.all.peak": 5067620352,
35
+ "allocated_bytes.large_pool.allocated": 45155724207616,
36
+ "allocated_bytes.large_pool.current": 1013856256,
37
+ "allocated_bytes.large_pool.freed": 45154710351360,
38
+ "allocated_bytes.large_pool.peak": 5043299328,
39
+ "allocated_bytes.small_pool.allocated": 27977724205056,
40
+ "allocated_bytes.small_pool.current": 17244160,
41
+ "allocated_bytes.small_pool.freed": 27977706960896,
42
+ "allocated_bytes.small_pool.peak": 137228800,
43
+ "allocation.all.allocated": 88109480,
44
+ "allocation.all.current": 1189,
45
+ "allocation.all.freed": 88108291,
46
+ "allocation.all.peak": 1441,
47
+ "allocation.large_pool.allocated": 16642502,
48
+ "allocation.large_pool.current": 284,
49
+ "allocation.large_pool.freed": 16642218,
50
+ "allocation.large_pool.peak": 482,
51
+ "allocation.small_pool.allocated": 71466978,
52
+ "allocation.small_pool.current": 905,
53
+ "allocation.small_pool.freed": 71466073,
54
+ "allocation.small_pool.peak": 1107,
55
+ "inactive_split.all.allocated": 43175447,
56
+ "inactive_split.all.current": 169,
57
+ "inactive_split.all.freed": 43175278,
58
+ "inactive_split.all.peak": 252,
59
+ "inactive_split.large_pool.allocated": 8221432,
60
+ "inactive_split.large_pool.current": 54,
61
+ "inactive_split.large_pool.freed": 8221378,
62
+ "inactive_split.large_pool.peak": 105,
63
+ "inactive_split.small_pool.allocated": 34954015,
64
+ "inactive_split.small_pool.current": 115,
65
+ "inactive_split.small_pool.freed": 34953900,
66
+ "inactive_split.small_pool.peak": 213,
67
+ "inactive_split_bytes.all.allocated": 84620994070016,
68
+ "inactive_split_bytes.all.current": 199927808,
69
+ "inactive_split_bytes.all.freed": 84620794142208,
70
+ "inactive_split_bytes.all.peak": 1976334336,
71
+ "inactive_split_bytes.large_pool.allocated": 55933383973376,
72
+ "inactive_split_bytes.large_pool.current": 147965952,
73
+ "inactive_split_bytes.large_pool.freed": 55933236007424,
74
+ "inactive_split_bytes.large_pool.peak": 1943168000,
75
+ "inactive_split_bytes.small_pool.allocated": 28687610096640,
76
+ "inactive_split_bytes.small_pool.current": 51961856,
77
+ "inactive_split_bytes.small_pool.freed": 28687558134784,
78
+ "inactive_split_bytes.small_pool.peak": 110444544,
79
+ "max_split_size": -1,
80
+ "num_alloc_retries": 6,
81
+ "num_device_alloc": 2580,
82
+ "num_device_free": 2408,
83
+ "num_ooms": 0,
84
+ "num_sync_all_streams": 17,
85
+ "oversize_allocations.allocated": 0,
86
+ "oversize_allocations.current": 0,
87
+ "oversize_allocations.freed": 0,
88
+ "oversize_allocations.peak": 0,
89
+ "oversize_segments.allocated": 0,
90
+ "oversize_segments.current": 0,
91
+ "oversize_segments.freed": 0,
92
+ "oversize_segments.peak": 0,
93
+ "requested_bytes.all.allocated": 71604208757654,
94
+ "requested_bytes.all.current": 1027774812,
95
+ "requested_bytes.all.freed": 71603180982842,
96
+ "requested_bytes.all.peak": 5016531548,
97
+ "requested_bytes.large_pool.allocated": 43630104664448,
98
+ "requested_bytes.large_pool.current": 1010670088,
99
+ "requested_bytes.large_pool.freed": 43629093994360,
100
+ "requested_bytes.large_pool.peak": 4992349256,
101
+ "requested_bytes.small_pool.allocated": 27974104093206,
102
+ "requested_bytes.small_pool.current": 17104724,
103
+ "requested_bytes.small_pool.freed": 27974086988482,
104
+ "requested_bytes.small_pool.peak": 137155084,
105
+ "reserved_bytes.all.allocated": 102949191680,
106
+ "reserved_bytes.all.current": 5366611968,
107
+ "reserved_bytes.all.freed": 97582579712,
108
+ "reserved_bytes.all.peak": 15655239680,
109
+ "reserved_bytes.large_pool.allocated": 101489573888,
110
+ "reserved_bytes.large_pool.current": 5221908480,
111
+ "reserved_bytes.large_pool.freed": 96267665408,
112
+ "reserved_bytes.large_pool.peak": 15508439040,
113
+ "reserved_bytes.small_pool.allocated": 1459617792,
114
+ "reserved_bytes.small_pool.current": 144703488,
115
+ "reserved_bytes.small_pool.freed": 1314914304,
116
+ "reserved_bytes.small_pool.peak": 146800640,
117
+ "segment.all.allocated": 2580,
118
+ "segment.all.current": 172,
119
+ "segment.all.freed": 2408,
120
+ "segment.all.peak": 367,
121
+ "segment.large_pool.allocated": 1884,
122
+ "segment.large_pool.current": 103,
123
+ "segment.large_pool.freed": 1781,
124
+ "segment.large_pool.peak": 297,
125
+ "segment.small_pool.allocated": 696,
126
+ "segment.small_pool.current": 69,
127
+ "segment.small_pool.freed": 627,
128
+ "segment.small_pool.peak": 70
129
+ }
130
+ }
outputs_laplace/sst2/roberta-base_adapterstrain_val_0.0001_42/step_0/f_mu_kron_all_homo_1000.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:928ffb0df5485bf0397015454ccaa43e890bcce307824866cf56a3e327d21990
3
+ size 8300
outputs_laplace/sst2/roberta-base_adapterstrain_val_0.0001_42/step_0/f_var_kron_all_homo_1000.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c9fcc8fece67f14be217973c5782cfef45eda5a6c9a61559c4f2c402c56720fb
3
+ size 15281
outputs_laplace/sst2/roberta-base_adapterstrain_val_0.0001_42/step_0/prior_precision_kron_all_homo_1000.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2be496cc60c8d15f1137dcfc25e5602ed0661c67e899977579d0d39e2a5706a1
3
+ size 1379
outputs_laplace/sst2/roberta-base_adapterstrain_val_0.0001_42/step_1999/f_mu_kron_all_homo_1000.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d1648a38498be9795dd334dda6672dd6155dfda428113004e9a0b15e7bd55085
3
+ size 8300
outputs_laplace/sst2/roberta-base_adapterstrain_val_0.0001_42/step_1999/f_var_kron_all_homo_1000.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1409fa65646a47b069ecee0fdb551548ecdeadf4ecabb496ee7fe0cdae43d37e
3
+ size 15281
outputs_laplace/sst2/roberta-base_adapterstrain_val_0.0001_42/step_1999/prior_precision_kron_all_homo_1000.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dac561eaf953c91cf5880f89f1c29b165cae43d951c03a936e08c86e2d32cfe4
3
+ size 1379
outputs_laplace/sst2/roberta-base_adapterstrain_val_0.0001_42/step_3999/f_mu_kron_all_homo_1000.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:25bb1a8a6f82878c6c804368e6af4c1aa671134ae51a229fb6e05eb16dbca26c
3
+ size 8300
outputs_laplace/sst2/roberta-base_adapterstrain_val_0.0001_42/step_3999/f_var_kron_all_homo_1000.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:91c326ac14d63e7b52ca7a4b4f195f6e685dd2ff074544850cc762dd11b1f068
3
+ size 15281
outputs_laplace/sst2/roberta-base_adapterstrain_val_0.0001_42/step_3999/prior_precision_kron_all_homo_1000.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7dbe11e2c06704d9091be38e5a71d380e66bda6560203b7773b2335cf5e6b523
3
+ size 1379
outputs_laplace/sst2/roberta-base_adapterstrain_val_0.0001_42/step_5999/f_mu_kron_all_homo_1000.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a7c1cc538c924dc97600766873ae5e54aaa68d09378f80a4009fb1b9c76f280c
3
+ size 8300
outputs_laplace/sst2/roberta-base_adapterstrain_val_0.0001_42/step_5999/f_var_kron_all_homo_1000.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:db486ed4cb67f64de058c510c73f9cec05792dccbfdd0200f123e1bbfcb47525
3
+ size 15281
outputs_laplace/sst2/roberta-base_adapterstrain_val_0.0001_42/step_5999/prior_precision_kron_all_homo_1000.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7bfd12011edeccccce3c4077a976103e2b3c477194e56b9705a9885104c1da18
3
+ size 1379
outputs_laplace/sst2/roberta-base_adapterstrain_val_0.0001_42/step_7999/f_mu_kron_all_homo_1000.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:30ded4b8bfaaaae05db1a4e6344f64491ac38bdfaeacf652641e29d2ec7e93c8
3
+ size 8300
outputs_laplace/sst2/roberta-base_adapterstrain_val_0.0001_42/step_7999/f_var_kron_all_homo_1000.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:da025088b864ff9e0672a58d9b36dc562ccb3b431657ed5a90084ea57b53a8b1
3
+ size 15281
outputs_laplace/sst2/roberta-base_adapterstrain_val_0.0001_42/step_7999/prior_precision_kron_all_homo_1000.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1a381d97d30ef73c2ca234a18a162d13d28634f02e814c0747fee4b1569b4c0c
3
+ size 1379
outputs_laplace/sst2/roberta-base_adapterstrain_val_0.0001_42/step_9999/f_mu_kron_all_homo_1000.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d7d7d3f7aa341a08c11bd742246d570de634fa94000ff9583ffb394af39117da
3
+ size 8300
outputs_laplace/sst2/roberta-base_adapterstrain_val_0.0001_42/step_9999/f_var_kron_all_homo_1000.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3eea9596cf09b129a2fdba462e16abcb93a6ad313a0981437b831ffa023a3c41
3
+ size 15281
outputs_laplace/sst2/roberta-base_adapterstrain_val_0.0001_42/step_9999/prior_precision_kron_all_homo_1000.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c1fe29df39294e84f1b528cc78a123824b759b9826e7ebdb34fab9f8a191351c
3
+ size 1379