AAA01101312 commited on
Commit
0df5363
1 Parent(s): 170967c

Training in progress, step 1500

Browse files
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:890f2b5c6b334c01518b708c745617f7e1f6ca198ef1e61d2b107056d3a623fe
3
  size 268290900
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ea13dd79b7d221b92eff6f24a9d9fe21ed9b9fa745d29bfb237fab5a2b9fdda6
3
  size 268290900
run-2/checkpoint-1500/config.json CHANGED
@@ -326,6 +326,6 @@
326
  "sinusoidal_pos_embds": false,
327
  "tie_weights_": true,
328
  "torch_dtype": "float32",
329
- "transformers_version": "4.40.1",
330
  "vocab_size": 30522
331
  }
 
326
  "sinusoidal_pos_embds": false,
327
  "tie_weights_": true,
328
  "torch_dtype": "float32",
329
+ "transformers_version": "4.39.3",
330
  "vocab_size": 30522
331
  }
run-2/checkpoint-1500/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:93982c596a316534ec6049dc4326fd8bb8c68519f3bd73a807df5fbac8ae6656
3
  size 268290900
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ea13dd79b7d221b92eff6f24a9d9fe21ed9b9fa745d29bfb237fab5a2b9fdda6
3
  size 268290900
run-2/checkpoint-1500/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1fde47893799b8f3f4f56489a4110ea1da0e51e3702f9cc3187ffeb0bf02f547
3
- size 536643898
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:db64de92c26b834267871c2947871dfd8ad5586a3facf3d7fa53a3c213f1beb2
3
+ size 536641018
run-2/checkpoint-1500/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b83ca61bd7724275023e64cd7d92d8b465e3385fc718bbe5532f6035ab566e96
3
- size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:790f927e864dc602a6dd0fb8c8c71f1ae010ed9ea0a5c6ab40e2bf06a86ac736
3
+ size 13990
run-2/checkpoint-1500/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bcd6ed9c4966f04e48f7e49950e7ebd650efdad29179dc7040c5fcd1878b86b2
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b2d0d96760a1cdcbc417a57dd4ff944b6ece136ebbdfecf57b1e511053d5ab0b
3
  size 1064
run-2/checkpoint-1500/tokenizer.json CHANGED
@@ -1,6 +1,11 @@
1
  {
2
  "version": "1.0",
3
- "truncation": null,
 
 
 
 
 
4
  "padding": null,
5
  "added_tokens": [
6
  {
 
1
  {
2
  "version": "1.0",
3
+ "truncation": {
4
+ "direction": "Right",
5
+ "max_length": 512,
6
+ "strategy": "LongestFirst",
7
+ "stride": 0
8
+ },
9
  "padding": null,
10
  "added_tokens": [
11
  {
run-2/checkpoint-1500/trainer_state.json CHANGED
@@ -10,73 +10,73 @@
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
- "eval_accuracy": 0.5770967741935484,
14
- "eval_loss": 0.21370309591293335,
15
- "eval_runtime": 16.8593,
16
- "eval_samples_per_second": 183.874,
17
- "eval_steps_per_second": 23.014,
18
  "step": 318
19
  },
20
  {
21
- "epoch": 1.5723270440251573,
22
- "grad_norm": 0.547938346862793,
23
- "learning_rate": 1.371069182389937e-05,
24
- "loss": 0.3338,
25
  "step": 500
26
  },
27
  {
28
  "epoch": 2.0,
29
- "eval_accuracy": 0.8070967741935484,
30
- "eval_loss": 0.10994400829076767,
31
- "eval_runtime": 17.5313,
32
- "eval_samples_per_second": 176.827,
33
- "eval_steps_per_second": 22.132,
34
  "step": 636
35
  },
36
  {
37
  "epoch": 3.0,
38
- "eval_accuracy": 0.8664516129032258,
39
- "eval_loss": 0.07803630828857422,
40
- "eval_runtime": 17.0312,
41
- "eval_samples_per_second": 182.019,
42
- "eval_steps_per_second": 22.782,
43
  "step": 954
44
  },
45
  {
46
- "epoch": 3.1446540880503147,
47
- "grad_norm": 0.510848343372345,
48
- "learning_rate": 7.421383647798742e-06,
49
- "loss": 0.1268,
50
  "step": 1000
51
  },
52
  {
53
  "epoch": 4.0,
54
- "eval_accuracy": 0.8819354838709678,
55
- "eval_loss": 0.06529980897903442,
56
- "eval_runtime": 17.6931,
57
- "eval_samples_per_second": 175.21,
58
- "eval_steps_per_second": 21.929,
59
  "step": 1272
60
  },
61
  {
62
- "epoch": 4.716981132075472,
63
- "grad_norm": 0.4325341582298279,
64
- "learning_rate": 1.1320754716981133e-06,
65
- "loss": 0.0905,
66
  "step": 1500
67
  }
68
  ],
69
  "logging_steps": 500,
70
- "max_steps": 1590,
71
  "num_input_tokens_seen": 0,
72
- "num_train_epochs": 5,
73
  "save_steps": 500,
74
  "total_flos": 391368939443328.0,
75
  "train_batch_size": 48,
76
  "trial_name": null,
77
  "trial_params": {
78
- "alpha": 0.9912258247523545,
79
- "num_train_epochs": 5,
80
- "temperature": 9
81
  }
82
  }
 
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
+ "eval_accuracy": 0.5670967741935484,
14
+ "eval_loss": 0.2081591933965683,
15
+ "eval_runtime": 156.9924,
16
+ "eval_samples_per_second": 19.746,
17
+ "eval_steps_per_second": 2.471,
18
  "step": 318
19
  },
20
  {
21
+ "epoch": 1.57,
22
+ "grad_norm": 0.5434728860855103,
23
+ "learning_rate": 1.4758909853249476e-05,
24
+ "loss": 0.3265,
25
  "step": 500
26
  },
27
  {
28
  "epoch": 2.0,
29
+ "eval_accuracy": 0.8138709677419355,
30
+ "eval_loss": 0.10343673080205917,
31
+ "eval_runtime": 157.419,
32
+ "eval_samples_per_second": 19.693,
33
+ "eval_steps_per_second": 2.465,
34
  "step": 636
35
  },
36
  {
37
  "epoch": 3.0,
38
+ "eval_accuracy": 0.8751612903225806,
39
+ "eval_loss": 0.072260282933712,
40
+ "eval_runtime": 163.8821,
41
+ "eval_samples_per_second": 18.916,
42
+ "eval_steps_per_second": 2.368,
43
  "step": 954
44
  },
45
  {
46
+ "epoch": 3.14,
47
+ "grad_norm": 0.5225204229354858,
48
+ "learning_rate": 9.517819706498952e-06,
49
+ "loss": 0.1197,
50
  "step": 1000
51
  },
52
  {
53
  "epoch": 4.0,
54
+ "eval_accuracy": 0.8935483870967742,
55
+ "eval_loss": 0.057823408395051956,
56
+ "eval_runtime": 165.2908,
57
+ "eval_samples_per_second": 18.755,
58
+ "eval_steps_per_second": 2.347,
59
  "step": 1272
60
  },
61
  {
62
+ "epoch": 4.72,
63
+ "grad_norm": 0.3549090027809143,
64
+ "learning_rate": 4.276729559748428e-06,
65
+ "loss": 0.0814,
66
  "step": 1500
67
  }
68
  ],
69
  "logging_steps": 500,
70
+ "max_steps": 1908,
71
  "num_input_tokens_seen": 0,
72
+ "num_train_epochs": 6,
73
  "save_steps": 500,
74
  "total_flos": 391368939443328.0,
75
  "train_batch_size": 48,
76
  "trial_name": null,
77
  "trial_params": {
78
+ "alpha": 0.3214650528165388,
79
+ "num_train_epochs": 6,
80
+ "temperature": 10
81
  }
82
  }
run-2/checkpoint-1500/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6a22dbca109cab4f81784b30eb06d09e06f3687ff1178732329b8bef5dda3767
3
- size 5048
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fd374451ee63716cdb38ad75eccd0c0ed5c4dd252067384d270fc60f9e7e9434
3
+ size 4984
runs/May19_17-29-07_notePC-Aya/events.out.tfevents.1716166304.notePC-Aya.19556.3 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1075fcdb1e50119975c97ca9cda0e5da276bd0b512b591a48aa0c83703812b79
3
- size 13524
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b32fbcb91cfb3bbf0a2247afe9c84e281a055eeb87aa8be94095e2f4015c1dad
3
+ size 14058