anaryegen commited on
Commit
780a12c
1 Parent(s): fad8ca4

update model

Browse files
all_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 3.0,
3
- "train_loss": 0.21816863666881214,
4
- "train_runtime": 391.9097,
5
  "train_samples": 17598,
6
- "train_samples_per_second": 134.71,
7
- "train_steps_per_second": 8.42
8
  }
 
1
  {
2
  "epoch": 3.0,
3
+ "train_loss": 0.27864328904585406,
4
+ "train_runtime": 413.6435,
5
  "train_samples": 17598,
6
+ "train_samples_per_second": 127.632,
7
+ "train_steps_per_second": 7.978
8
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:66f4a81fd111458f0c5c0299dab1abfe4ce42e61648d5d3fbaa00ae3cce0de7c
3
  size 709090132
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b94a2b7dc1b945a12c97f126d710acab219fcdb45dd98e053089eeb5cb23ba9f
3
  size 709090132
tokenizer.json CHANGED
@@ -1,6 +1,11 @@
1
  {
2
  "version": "1.0",
3
- "truncation": null,
 
 
 
 
 
4
  "padding": null,
5
  "added_tokens": [
6
  {
 
1
  {
2
  "version": "1.0",
3
+ "truncation": {
4
+ "direction": "Right",
5
+ "max_length": 256,
6
+ "strategy": "LongestFirst",
7
+ "stride": 0
8
+ },
9
  "padding": null,
10
  "added_tokens": [
11
  {
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 3.0,
3
- "train_loss": 0.21816863666881214,
4
- "train_runtime": 391.9097,
5
  "train_samples": 17598,
6
- "train_samples_per_second": 134.71,
7
- "train_steps_per_second": 8.42
8
  }
 
1
  {
2
  "epoch": 3.0,
3
+ "train_loss": 0.27864328904585406,
4
+ "train_runtime": 413.6435,
5
  "train_samples": 17598,
6
+ "train_samples_per_second": 127.632,
7
+ "train_steps_per_second": 7.978
8
  }
trainer_state.json CHANGED
@@ -10,54 +10,54 @@
10
  "log_history": [
11
  {
12
  "epoch": 0.45,
13
- "grad_norm": 4.369685173034668,
14
  "learning_rate": 4.242424242424243e-05,
15
- "loss": 0.4661,
16
  "step": 500
17
  },
18
  {
19
  "epoch": 0.91,
20
- "grad_norm": 3.9891207218170166,
21
  "learning_rate": 3.484848484848485e-05,
22
- "loss": 0.3327,
23
  "step": 1000
24
  },
25
  {
26
  "epoch": 1.36,
27
- "grad_norm": 9.819967269897461,
28
  "learning_rate": 2.7272727272727273e-05,
29
- "loss": 0.2258,
30
  "step": 1500
31
  },
32
  {
33
  "epoch": 1.82,
34
- "grad_norm": 1.7621924877166748,
35
  "learning_rate": 1.9696969696969697e-05,
36
- "loss": 0.1772,
37
  "step": 2000
38
  },
39
  {
40
  "epoch": 2.27,
41
- "grad_norm": 21.205034255981445,
42
  "learning_rate": 1.2121212121212122e-05,
43
- "loss": 0.1106,
44
  "step": 2500
45
  },
46
  {
47
  "epoch": 2.73,
48
- "grad_norm": 8.332528114318848,
49
  "learning_rate": 4.5454545454545455e-06,
50
- "loss": 0.0867,
51
  "step": 3000
52
  },
53
  {
54
  "epoch": 3.0,
55
  "step": 3300,
56
- "total_flos": 2669084030031120.0,
57
- "train_loss": 0.21816863666881214,
58
- "train_runtime": 391.9097,
59
- "train_samples_per_second": 134.71,
60
- "train_steps_per_second": 8.42
61
  }
62
  ],
63
  "logging_steps": 500,
@@ -65,7 +65,7 @@
65
  "num_input_tokens_seen": 0,
66
  "num_train_epochs": 3,
67
  "save_steps": 500,
68
- "total_flos": 2669084030031120.0,
69
  "train_batch_size": 16,
70
  "trial_name": null,
71
  "trial_params": null
 
10
  "log_history": [
11
  {
12
  "epoch": 0.45,
13
+ "grad_norm": 3.782728910446167,
14
  "learning_rate": 4.242424242424243e-05,
15
+ "loss": 0.532,
16
  "step": 500
17
  },
18
  {
19
  "epoch": 0.91,
20
+ "grad_norm": 3.925285816192627,
21
  "learning_rate": 3.484848484848485e-05,
22
+ "loss": 0.4013,
23
  "step": 1000
24
  },
25
  {
26
  "epoch": 1.36,
27
+ "grad_norm": 7.782624244689941,
28
  "learning_rate": 2.7272727272727273e-05,
29
+ "loss": 0.281,
30
  "step": 1500
31
  },
32
  {
33
  "epoch": 1.82,
34
+ "grad_norm": 3.6547534465789795,
35
  "learning_rate": 1.9696969696969697e-05,
36
+ "loss": 0.2454,
37
  "step": 2000
38
  },
39
  {
40
  "epoch": 2.27,
41
+ "grad_norm": 0.9583206176757812,
42
  "learning_rate": 1.2121212121212122e-05,
43
+ "loss": 0.1696,
44
  "step": 2500
45
  },
46
  {
47
  "epoch": 2.73,
48
+ "grad_norm": 11.225361824035645,
49
  "learning_rate": 4.5454545454545455e-06,
50
+ "loss": 0.1393,
51
  "step": 3000
52
  },
53
  {
54
  "epoch": 3.0,
55
  "step": 3300,
56
+ "total_flos": 2871828253461180.0,
57
+ "train_loss": 0.27864328904585406,
58
+ "train_runtime": 413.6435,
59
+ "train_samples_per_second": 127.632,
60
+ "train_steps_per_second": 7.978
61
  }
62
  ],
63
  "logging_steps": 500,
 
65
  "num_input_tokens_seen": 0,
66
  "num_train_epochs": 3,
67
  "save_steps": 500,
68
+ "total_flos": 2871828253461180.0,
69
  "train_batch_size": 16,
70
  "trial_name": null,
71
  "trial_params": null
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8ce0ec1d3c14f032ca50e426364a62c0adbd65c3b9fdc8fd4787ebbcb999f0a3
3
  size 4984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:33f26567c9db33a95d0e6f1ccfebacde8b5cba77cf3ea0f0c2923835aa7ee837
3
  size 4984