ramdhanfirdaus commited on
Commit
c21bb4f
1 Parent(s): 3195d4e

Training in progress, step 3200, checkpoint

Browse files
last-checkpoint/README.md CHANGED
@@ -201,18 +201,6 @@ Carbon emissions can be estimated using the [Machine Learning Impact calculator]
201
  ## Training procedure
202
 
203
 
204
- The following `bitsandbytes` quantization config was used during training:
205
- - quant_method: bitsandbytes
206
- - load_in_8bit: False
207
- - load_in_4bit: True
208
- - llm_int8_threshold: 6.0
209
- - llm_int8_skip_modules: None
210
- - llm_int8_enable_fp32_cpu_offload: False
211
- - llm_int8_has_fp16_weight: False
212
- - bnb_4bit_quant_type: nf4
213
- - bnb_4bit_use_double_quant: True
214
- - bnb_4bit_compute_dtype: float16
215
-
216
  ### Framework versions
217
 
218
 
 
201
  ## Training procedure
202
 
203
 
 
 
 
 
 
 
 
 
 
 
 
 
204
  ### Framework versions
205
 
206
 
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e17afdd5ee88313e3bb7f01e153b4d1ebe56021d4123305e609e23d3cd06fd74
3
  size 75507072
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:51285f336292e7c2fdd6412ce56c0212934410ad3dc0c472b93adc76bc0b3f77
3
  size 75507072
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:76585761215f7c28b01c08237b1a8e04a0a675a97605ae7ab28746453d9762fe
3
- size 151034501
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e4fddfb02ad1801dab56ea0155f7e7788d159e94e876de4ddb2af82d3fbc7917
3
+ size 151032837
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:63998d98da08278c470436d7f3090e0552bc25f2e2fd93ff495fe8ccda5df6f6
3
  size 14575
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eefe083b6454775aee01bb69e64bad53187f7b97719dea614c013fe397ac511b
3
  size 14575
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:43e3cf8d56a3f083d00cc85544d76ada2f884a1018c8752332d96f2799911117
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:50b57a34df83b700e2c13775ff734b4569b74ce7e20da3479db76577bb4e906e
3
  size 627
last-checkpoint/special_tokens_map.json CHANGED
@@ -12,6 +12,12 @@
12
  ">>SUFFIX<<",
13
  ">>MIDDLE<<"
14
  ],
15
- "eos_token": "<|endoftext|>",
 
 
 
 
 
 
16
  "pad_token": "<|endoftext|>"
17
  }
 
12
  ">>SUFFIX<<",
13
  ">>MIDDLE<<"
14
  ],
15
+ "eos_token": {
16
+ "content": "<|endoftext|>",
17
+ "lstrip": false,
18
+ "normalized": false,
19
+ "rstrip": false,
20
+ "single_word": false
21
+ },
22
  "pad_token": "<|endoftext|>"
23
  }
last-checkpoint/tokenizer_config.json CHANGED
@@ -113,11 +113,15 @@
113
  ],
114
  "clean_up_tokenization_spaces": true,
115
  "eos_token": "<|endoftext|>",
 
116
  "model_input_names": [
117
  "input_ids",
118
  "attention_mask"
119
  ],
120
  "model_max_length": 2048,
121
  "pad_token": "<|endoftext|>",
122
- "tokenizer_class": "PreTrainedTokenizerFast"
 
 
 
123
  }
 
113
  ],
114
  "clean_up_tokenization_spaces": true,
115
  "eos_token": "<|endoftext|>",
116
+ "max_length": 512,
117
  "model_input_names": [
118
  "input_ids",
119
  "attention_mask"
120
  ],
121
  "model_max_length": 2048,
122
  "pad_token": "<|endoftext|>",
123
+ "stride": 0,
124
+ "tokenizer_class": "PreTrainedTokenizerFast",
125
+ "truncation_side": "right",
126
+ "truncation_strategy": "longest_first"
127
  }
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 1.1208428144454956,
3
- "best_model_checkpoint": "./outputs/checkpoint-3600",
4
- "epoch": 2.6229508196721314,
5
  "eval_steps": 100,
6
- "global_step": 3600,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -445,79 +445,23 @@
445
  {
446
  "epoch": 2.33,
447
  "learning_rate": 0.0002,
448
- "loss": 1.1424,
449
  "step": 3200
450
  },
451
  {
452
  "epoch": 2.33,
453
- "eval_loss": 1.1579593420028687,
454
- "eval_runtime": 417.5279,
455
- "eval_samples_per_second": 15.027,
456
- "eval_steps_per_second": 1.88,
457
  "step": 3200
458
- },
459
- {
460
- "epoch": 2.4,
461
- "learning_rate": 0.0002,
462
- "loss": 1.1263,
463
- "step": 3300
464
- },
465
- {
466
- "epoch": 2.4,
467
- "eval_loss": 1.147441029548645,
468
- "eval_runtime": 424.7891,
469
- "eval_samples_per_second": 14.77,
470
- "eval_steps_per_second": 1.848,
471
- "step": 3300
472
- },
473
- {
474
- "epoch": 2.48,
475
- "learning_rate": 0.0002,
476
- "loss": 1.1114,
477
- "step": 3400
478
- },
479
- {
480
- "epoch": 2.48,
481
- "eval_loss": 1.1393115520477295,
482
- "eval_runtime": 423.9015,
483
- "eval_samples_per_second": 14.801,
484
- "eval_steps_per_second": 1.852,
485
- "step": 3400
486
- },
487
- {
488
- "epoch": 2.55,
489
- "learning_rate": 0.0002,
490
- "loss": 1.1116,
491
- "step": 3500
492
- },
493
- {
494
- "epoch": 2.55,
495
- "eval_loss": 1.1295558214187622,
496
- "eval_runtime": 417.5664,
497
- "eval_samples_per_second": 15.025,
498
- "eval_steps_per_second": 1.88,
499
- "step": 3500
500
- },
501
- {
502
- "epoch": 2.62,
503
- "learning_rate": 0.0002,
504
- "loss": 1.1054,
505
- "step": 3600
506
- },
507
- {
508
- "epoch": 2.62,
509
- "eval_loss": 1.1208428144454956,
510
- "eval_runtime": 437.4502,
511
- "eval_samples_per_second": 14.342,
512
- "eval_steps_per_second": 1.794,
513
- "step": 3600
514
  }
515
  ],
516
  "logging_steps": 100,
517
  "max_steps": 4116,
518
  "num_train_epochs": 3,
519
  "save_steps": 100,
520
- "total_flos": 1.1670242076660173e+18,
521
  "trial_name": null,
522
  "trial_params": null
523
  }
 
1
  {
2
+ "best_metric": 1.1677733659744263,
3
+ "best_model_checkpoint": "./outputs/checkpoint-3100",
4
+ "epoch": 2.33224043715847,
5
  "eval_steps": 100,
6
+ "global_step": 3200,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
445
  {
446
  "epoch": 2.33,
447
  "learning_rate": 0.0002,
448
+ "loss": 1.1528,
449
  "step": 3200
450
  },
451
  {
452
  "epoch": 2.33,
453
+ "eval_loss": 1.1819865703582764,
454
+ "eval_runtime": 339.2738,
455
+ "eval_samples_per_second": 18.492,
456
+ "eval_steps_per_second": 2.314,
457
  "step": 3200
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
458
  }
459
  ],
460
  "logging_steps": 100,
461
  "max_steps": 4116,
462
  "num_train_epochs": 3,
463
  "save_steps": 100,
464
+ "total_flos": 1.0375834790343045e+18,
465
  "trial_name": null,
466
  "trial_params": null
467
  }
last-checkpoint/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9be4a5c3125bd86c3b49d502b0c0839bdc6f3434e2dafd94eac7cbe0088004e2
3
  size 4219
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7a34ff3b82cda74a4055322bc8d99fe3f390b24562c7e145c3f2497c2ff62607
3
  size 4219