ramdhanfirdaus commited on
Commit
84d141c
1 Parent(s): 7f80a89

Training in progress, step 3600, checkpoint

Browse files
last-checkpoint/README.md CHANGED
@@ -201,18 +201,6 @@ Carbon emissions can be estimated using the [Machine Learning Impact calculator]
201
  ## Training procedure
202
 
203
 
204
- The following `bitsandbytes` quantization config was used during training:
205
- - quant_method: bitsandbytes
206
- - load_in_8bit: False
207
- - load_in_4bit: True
208
- - llm_int8_threshold: 6.0
209
- - llm_int8_skip_modules: None
210
- - llm_int8_enable_fp32_cpu_offload: False
211
- - llm_int8_has_fp16_weight: False
212
- - bnb_4bit_quant_type: nf4
213
- - bnb_4bit_use_double_quant: True
214
- - bnb_4bit_compute_dtype: float16
215
-
216
  ### Framework versions
217
 
218
 
 
201
  ## Training procedure
202
 
203
 
 
 
 
 
 
 
 
 
 
 
 
 
204
  ### Framework versions
205
 
206
 
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:665743a09bf79dcd69946448918fb0c16aa79bd6616b8cfb311dda0ec4b4bdf5
3
  size 50338848
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ab272531ca4f7f21d89ada6774e8e5bc07ab11ecffda04d158c5f53998ca655d
3
  size 50338848
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c545798a80cf3e21310f966375ea19a1ece615f35be28021957920f347dae719
3
- size 100693001
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c562f3b307809fe2836dff271710f225ad458098595e0784fc6a7f5379d7ae0b
3
+ size 100691721
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:be9cef20fcb5836bc994b3161e3caaa39c6412b1a8cf4e470a339da81589749a
3
  size 14575
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a0fe9debfc0931dca2aebaaa146c3890943a5809c00b1b2ba715c1a4eb6e57da
3
  size 14575
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1ef51ed05818f33aebca9592cb64baf0df006aad2a34a7cf944f4a48b70afce3
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:43e3cf8d56a3f083d00cc85544d76ada2f884a1018c8752332d96f2799911117
3
  size 627
last-checkpoint/special_tokens_map.json CHANGED
@@ -1,6 +1,24 @@
1
  {
2
- "bos_token": "<|endoftext|>",
3
- "eos_token": "<|endoftext|>",
 
 
 
 
 
 
 
 
 
 
 
 
4
  "pad_token": "<|endoftext|>",
5
- "unk_token": "<|endoftext|>"
 
 
 
 
 
 
6
  }
 
1
  {
2
+ "bos_token": {
3
+ "content": "<|endoftext|>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "<|endoftext|>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
  "pad_token": "<|endoftext|>",
17
+ "unk_token": {
18
+ "content": "<|endoftext|>",
19
+ "lstrip": false,
20
+ "normalized": false,
21
+ "rstrip": false,
22
+ "single_word": false
23
+ }
24
  }
last-checkpoint/tokenizer.json CHANGED
@@ -1,6 +1,11 @@
1
  {
2
  "version": "1.0",
3
- "truncation": null,
 
 
 
 
 
4
  "padding": null,
5
  "added_tokens": [
6
  {
 
1
  {
2
  "version": "1.0",
3
+ "truncation": {
4
+ "direction": "Right",
5
+ "max_length": 512,
6
+ "strategy": "LongestFirst",
7
+ "stride": 0
8
+ },
9
  "padding": null,
10
  "added_tokens": [
11
  {
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 1.205187439918518,
3
- "best_model_checkpoint": "./outputs/checkpoint-4100",
4
- "epoch": 2.987249544626594,
5
  "eval_steps": 100,
6
- "global_step": 4100,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -501,93 +501,23 @@
501
  {
502
  "epoch": 2.62,
503
  "learning_rate": 0.0002,
504
- "loss": 1.2425,
505
  "step": 3600
506
  },
507
  {
508
  "epoch": 2.62,
509
- "eval_loss": 1.2600913047790527,
510
- "eval_runtime": 143.6927,
511
- "eval_samples_per_second": 43.663,
512
- "eval_steps_per_second": 5.463,
513
  "step": 3600
514
- },
515
- {
516
- "epoch": 2.7,
517
- "learning_rate": 0.0002,
518
- "loss": 1.225,
519
- "step": 3700
520
- },
521
- {
522
- "epoch": 2.7,
523
- "eval_loss": 1.2477346658706665,
524
- "eval_runtime": 143.7426,
525
- "eval_samples_per_second": 43.647,
526
- "eval_steps_per_second": 5.461,
527
- "step": 3700
528
- },
529
- {
530
- "epoch": 2.77,
531
- "learning_rate": 0.0002,
532
- "loss": 1.2032,
533
- "step": 3800
534
- },
535
- {
536
- "epoch": 2.77,
537
- "eval_loss": 1.2393803596496582,
538
- "eval_runtime": 148.064,
539
- "eval_samples_per_second": 42.374,
540
- "eval_steps_per_second": 5.302,
541
- "step": 3800
542
- },
543
- {
544
- "epoch": 2.84,
545
- "learning_rate": 0.0002,
546
- "loss": 1.2152,
547
- "step": 3900
548
- },
549
- {
550
- "epoch": 2.84,
551
- "eval_loss": 1.2263625860214233,
552
- "eval_runtime": 143.4886,
553
- "eval_samples_per_second": 43.725,
554
- "eval_steps_per_second": 5.471,
555
- "step": 3900
556
- },
557
- {
558
- "epoch": 2.91,
559
- "learning_rate": 0.0002,
560
- "loss": 1.1959,
561
- "step": 4000
562
- },
563
- {
564
- "epoch": 2.91,
565
- "eval_loss": 1.2177170515060425,
566
- "eval_runtime": 143.5576,
567
- "eval_samples_per_second": 43.704,
568
- "eval_steps_per_second": 5.468,
569
- "step": 4000
570
- },
571
- {
572
- "epoch": 2.99,
573
- "learning_rate": 0.0002,
574
- "loss": 1.1936,
575
- "step": 4100
576
- },
577
- {
578
- "epoch": 2.99,
579
- "eval_loss": 1.205187439918518,
580
- "eval_runtime": 143.4816,
581
- "eval_samples_per_second": 43.727,
582
- "eval_steps_per_second": 5.471,
583
- "step": 4100
584
  }
585
  ],
586
  "logging_steps": 100,
587
  "max_steps": 4116,
588
  "num_train_epochs": 3,
589
  "save_steps": 100,
590
- "total_flos": 2.444085005899776e+17,
591
  "trial_name": null,
592
  "trial_params": null
593
  }
 
1
  {
2
+ "best_metric": 1.2722229957580566,
3
+ "best_model_checkpoint": "./outputs/checkpoint-3500",
4
+ "epoch": 2.62367941712204,
5
  "eval_steps": 100,
6
+ "global_step": 3600,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
501
  {
502
  "epoch": 2.62,
503
  "learning_rate": 0.0002,
504
+ "loss": 1.3031,
505
  "step": 3600
506
  },
507
  {
508
  "epoch": 2.62,
509
+ "eval_loss": 1.3205279111862183,
510
+ "eval_runtime": 138.0704,
511
+ "eval_samples_per_second": 45.441,
512
+ "eval_steps_per_second": 5.686,
513
  "step": 3600
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
514
  }
515
  ],
516
  "logging_steps": 100,
517
  "max_steps": 4116,
518
  "num_train_epochs": 3,
519
  "save_steps": 100,
520
+ "total_flos": 2.146160479353815e+17,
521
  "trial_name": null,
522
  "trial_params": null
523
  }
last-checkpoint/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fffa2fe9c9c78adddfbb05849466ee1b7dd93baaa7a7769de151728e5912f64d
3
  size 4219
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8359692bdb4a6bb3efa0d10dd8036b5f919a9ea002e4dc4eb94a27d30c9b489f
3
  size 4219