danielkty22 commited on
Commit
7d9a9d1
1 Parent(s): af5633b

Training in progress, epoch 2

Browse files
tmp-checkpoint-4380/merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
tmp-checkpoint-4380/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6aa8dfd73098f82a3dec027052255349df935c2ded8f41261ee3350c67ceb52a
3
+ size 995656250
tmp-checkpoint-4380/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:47a5c75c8b035d85260c13f7899ccb4ee06184934a866349e8c37fc43986d0ce
3
+ size 14180
tmp-checkpoint-4380/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:38672eacb6674d473013568eda9e8a1fa2eb4919d6f05bbf641c818f7740ab1e
3
+ size 1064
tmp-checkpoint-4380/special_tokens_map.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "<|endoftext|>",
3
+ "eos_token": "<|endoftext|>",
4
+ "pad_token": "<|endoftext|>",
5
+ "sep_token": "<|endoftext|>",
6
+ "unk_token": "<|endoftext|>"
7
+ }
tmp-checkpoint-4380/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tmp-checkpoint-4380/tokenizer_config.json ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "added_tokens_decoder": {
4
+ "50256": {
5
+ "content": "<|endoftext|>",
6
+ "lstrip": false,
7
+ "normalized": true,
8
+ "rstrip": false,
9
+ "single_word": false,
10
+ "special": true
11
+ }
12
+ },
13
+ "bos_token": "<|endoftext|>",
14
+ "clean_up_tokenization_spaces": true,
15
+ "eos_token": "<|endoftext|>",
16
+ "model_max_length": 1024,
17
+ "pad_token": "<|endoftext|>",
18
+ "sep_token": "<|endoftext|>",
19
+ "tokenizer_class": "GPT2Tokenizer",
20
+ "unk_token": "<|endoftext|>"
21
+ }
tmp-checkpoint-4380/trainer_state.json ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 2.0,
5
+ "eval_steps": 1000000000,
6
+ "global_step": 4380,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.23,
13
+ "learning_rate": 3.680365296803653e-06,
14
+ "loss": 39.2485,
15
+ "step": 500
16
+ },
17
+ {
18
+ "epoch": 0.46,
19
+ "learning_rate": 3.354207436399217e-06,
20
+ "loss": 6.6145,
21
+ "step": 1000
22
+ },
23
+ {
24
+ "epoch": 0.68,
25
+ "learning_rate": 3.0280495759947813e-06,
26
+ "loss": 5.3605,
27
+ "step": 1500
28
+ },
29
+ {
30
+ "epoch": 0.91,
31
+ "learning_rate": 2.7018917155903456e-06,
32
+ "loss": 4.9587,
33
+ "step": 2000
34
+ },
35
+ {
36
+ "epoch": 1.14,
37
+ "learning_rate": 2.37573385518591e-06,
38
+ "loss": 4.7091,
39
+ "step": 2500
40
+ },
41
+ {
42
+ "epoch": 1.37,
43
+ "learning_rate": 2.0495759947814745e-06,
44
+ "loss": 4.4555,
45
+ "step": 3000
46
+ },
47
+ {
48
+ "epoch": 1.6,
49
+ "learning_rate": 1.7234181343770385e-06,
50
+ "loss": 4.2992,
51
+ "step": 3500
52
+ },
53
+ {
54
+ "epoch": 1.83,
55
+ "learning_rate": 1.3972602739726028e-06,
56
+ "loss": 4.2272,
57
+ "step": 4000
58
+ }
59
+ ],
60
+ "logging_steps": 500,
61
+ "max_steps": 6132,
62
+ "num_input_tokens_seen": 0,
63
+ "num_train_epochs": 3,
64
+ "save_steps": 500,
65
+ "total_flos": 4577396584501248.0,
66
+ "train_batch_size": 4,
67
+ "trial_name": null,
68
+ "trial_params": null
69
+ }
tmp-checkpoint-4380/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6a07cf7dc1b94a74872caec21a7cfe2d03c4c6da040f651bac83c8a18e944504
3
+ size 4792
tmp-checkpoint-4380/vocab.json ADDED
The diff for this file is too large to render. See raw diff
 
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4f5e945e62bb6a518fb6b7fdb956a1396f584ee70c880836e9ba8df74dd24688
3
  size 4792
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ff9eec41bbcef746a266c20ae508585043dd26b6844823aa20d53c295703c056
3
  size 4792