m4lw4r3exe commited on
Commit
2f6e8d8
·
1 Parent(s): ddf00f9

Training in progress, step 4096

Browse files
Files changed (3) hide show
  1. pytorch_model.bin +1 -1
  2. training_args.bin +1 -1
  3. training_args.json +9 -9
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8611b00fb559665ca26e68dae93dfb4ebdf3717ac3a1f548409581450a0a18a4
3
  size 105666297
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4224574c9e85152ad061f5cfd88e5ade72f84f20aa6d778d16c5a0ab0ab115b4
3
  size 105666297
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:908dc25e857dbdefbfe009b9ed0b992ce31760b393bf814f450b0d3c701c2397
3
  size 3579
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bdf489f059ab44f9bc26200d314d3fc5954dabfb501b51ecc19cdc4d4be8a527
3
  size 3579
training_args.json CHANGED
@@ -6,34 +6,34 @@
6
  "do_predict": false,
7
  "evaluation_strategy": "steps",
8
  "prediction_loss_only": false,
9
- "per_device_train_batch_size": 10,
10
  "per_device_eval_batch_size": 8,
11
  "per_gpu_train_batch_size": null,
12
  "per_gpu_eval_batch_size": null,
13
  "gradient_accumulation_steps": 1,
14
  "eval_accumulation_steps": null,
15
  "eval_delay": 0,
16
- "learning_rate": 0.0005,
17
- "weight_decay": 0.1,
18
  "adam_beta1": 0.9,
19
  "adam_beta2": 0.999,
20
  "adam_epsilon": 1e-08,
21
  "max_grad_norm": 1.0,
22
- "num_train_epochs": 6,
23
  "max_steps": -1,
24
- "lr_scheduler_type": "cosine",
25
  "warmup_ratio": 0.0,
26
- "warmup_steps": 200,
27
  "log_level": "passive",
28
  "log_level_replica": "passive",
29
  "log_on_each_node": true,
30
  "logging_dir": "models/elec-gmusic-familized/logs",
31
  "logging_strategy": "steps",
32
  "logging_first_step": false,
33
- "logging_steps": 4096,
34
  "logging_nan_inf_filter": true,
35
  "save_strategy": "steps",
36
- "save_steps": 16384,
37
  "save_total_limit": 5,
38
  "save_on_each_node": false,
39
  "no_cuda": false,
@@ -55,7 +55,7 @@
55
  "tpu_metrics_debug": false,
56
  "debug": [],
57
  "dataloader_drop_last": false,
58
- "eval_steps": 4096,
59
  "dataloader_num_workers": 0,
60
  "past_index": -1,
61
  "run_name": "models/elec-gmusic-familized",
 
6
  "do_predict": false,
7
  "evaluation_strategy": "steps",
8
  "prediction_loss_only": false,
9
+ "per_device_train_batch_size": 9,
10
  "per_device_eval_batch_size": 8,
11
  "per_gpu_train_batch_size": null,
12
  "per_gpu_eval_batch_size": null,
13
  "gradient_accumulation_steps": 1,
14
  "eval_accumulation_steps": null,
15
  "eval_delay": 0,
16
+ "learning_rate": 5e-05,
17
+ "weight_decay": 0.0,
18
  "adam_beta1": 0.9,
19
  "adam_beta2": 0.999,
20
  "adam_epsilon": 1e-08,
21
  "max_grad_norm": 1.0,
22
+ "num_train_epochs": 5,
23
  "max_steps": -1,
24
+ "lr_scheduler_type": "linear",
25
  "warmup_ratio": 0.0,
26
+ "warmup_steps": 0,
27
  "log_level": "passive",
28
  "log_level_replica": "passive",
29
  "log_on_each_node": true,
30
  "logging_dir": "models/elec-gmusic-familized/logs",
31
  "logging_strategy": "steps",
32
  "logging_first_step": false,
33
+ "logging_steps": 1024,
34
  "logging_nan_inf_filter": true,
35
  "save_strategy": "steps",
36
+ "save_steps": 4096,
37
  "save_total_limit": 5,
38
  "save_on_each_node": false,
39
  "no_cuda": false,
 
55
  "tpu_metrics_debug": false,
56
  "debug": [],
57
  "dataloader_drop_last": false,
58
+ "eval_steps": 1024,
59
  "dataloader_num_workers": 0,
60
  "past_index": -1,
61
  "run_name": "models/elec-gmusic-familized",