arvkevi commited on
Commit
64faf3d
1 Parent(s): c871f7e

End of training

Browse files
all_results.json CHANGED
@@ -1,15 +1,15 @@
1
  {
2
  "epoch": 3.0,
3
- "eval_accuracy": 0.7679501651406831,
4
- "eval_loss": 0.7943122386932373,
5
- "eval_runtime": 7.2439,
6
- "eval_samples": 469,
7
- "eval_samples_per_second": 64.744,
8
- "eval_steps_per_second": 16.29,
9
- "perplexity": 2.2129185135829017,
10
- "train_loss": 0.8620750466894024,
11
- "train_runtime": 287.9872,
12
- "train_samples": 1794,
13
- "train_samples_per_second": 18.688,
14
- "train_steps_per_second": 4.677
15
  }
 
1
  {
2
  "epoch": 3.0,
3
+ "eval_accuracy": 0.8116554844749571,
4
+ "eval_loss": 0.6323513984680176,
5
+ "eval_runtime": 23.456,
6
+ "eval_samples": 1508,
7
+ "eval_samples_per_second": 64.291,
8
+ "eval_steps_per_second": 16.073,
9
+ "perplexity": 1.8820307846994662,
10
+ "train_loss": 0.6928272205209627,
11
+ "train_runtime": 684.2817,
12
+ "train_samples": 4226,
13
+ "train_samples_per_second": 18.527,
14
+ "train_steps_per_second": 4.634
15
  }
eval_results.json CHANGED
@@ -1,10 +1,10 @@
1
  {
2
  "epoch": 3.0,
3
- "eval_accuracy": 0.7679501651406831,
4
- "eval_loss": 0.7943122386932373,
5
- "eval_runtime": 7.2439,
6
- "eval_samples": 469,
7
- "eval_samples_per_second": 64.744,
8
- "eval_steps_per_second": 16.29,
9
- "perplexity": 2.2129185135829017
10
  }
 
1
  {
2
  "epoch": 3.0,
3
+ "eval_accuracy": 0.8116554844749571,
4
+ "eval_loss": 0.6323513984680176,
5
+ "eval_runtime": 23.456,
6
+ "eval_samples": 1508,
7
+ "eval_samples_per_second": 64.291,
8
+ "eval_steps_per_second": 16.073,
9
+ "perplexity": 1.8820307846994662
10
  }
runs/Aug14_18-45-40_409033645ffb/events.out.tfevents.1660503921.409033645ffb.237.2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0dd5d5d2248ce8afdfdbd900737df8ea408e37750bb160056a92413b93b8bbeb
3
+ size 363
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 3.0,
3
- "train_loss": 0.8620750466894024,
4
- "train_runtime": 287.9872,
5
- "train_samples": 1794,
6
- "train_samples_per_second": 18.688,
7
- "train_steps_per_second": 4.677
8
  }
 
1
  {
2
  "epoch": 3.0,
3
+ "train_loss": 0.6928272205209627,
4
+ "train_runtime": 684.2817,
5
+ "train_samples": 4226,
6
+ "train_samples_per_second": 18.527,
7
+ "train_steps_per_second": 4.634
8
  }
trainer_state.json CHANGED
@@ -2,36 +2,60 @@
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
  "epoch": 3.0,
5
- "global_step": 1347,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
- "epoch": 1.11,
12
- "learning_rate": 3.144023756495917e-05,
13
- "loss": 0.988,
14
  "step": 500
15
  },
16
  {
17
- "epoch": 2.23,
18
- "learning_rate": 1.2880475129918337e-05,
19
- "loss": 0.7995,
20
  "step": 1000
21
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
  {
23
  "epoch": 3.0,
24
- "step": 1347,
25
- "total_flos": 351574777921536.0,
26
- "train_loss": 0.8620750466894024,
27
- "train_runtime": 287.9872,
28
- "train_samples_per_second": 18.688,
29
- "train_steps_per_second": 4.677
30
  }
31
  ],
32
- "max_steps": 1347,
33
  "num_train_epochs": 3,
34
- "total_flos": 351574777921536.0,
35
  "trial_name": null,
36
  "trial_params": null
37
  }
 
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
  "epoch": 3.0,
5
+ "global_step": 3171,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
+ "epoch": 0.47,
12
+ "learning_rate": 4.211605171870073e-05,
13
+ "loss": 0.8922,
14
  "step": 500
15
  },
16
  {
17
+ "epoch": 0.95,
18
+ "learning_rate": 3.423210343740145e-05,
19
+ "loss": 0.7122,
20
  "step": 1000
21
  },
22
+ {
23
+ "epoch": 1.42,
24
+ "learning_rate": 2.634815515610218e-05,
25
+ "loss": 0.6706,
26
+ "step": 1500
27
+ },
28
+ {
29
+ "epoch": 1.89,
30
+ "learning_rate": 1.8464206874802904e-05,
31
+ "loss": 0.648,
32
+ "step": 2000
33
+ },
34
+ {
35
+ "epoch": 2.37,
36
+ "learning_rate": 1.0580258593503627e-05,
37
+ "loss": 0.6318,
38
+ "step": 2500
39
+ },
40
+ {
41
+ "epoch": 2.84,
42
+ "learning_rate": 2.6963103122043523e-06,
43
+ "loss": 0.6272,
44
+ "step": 3000
45
+ },
46
  {
47
  "epoch": 3.0,
48
+ "step": 3171,
49
+ "total_flos": 828180051001344.0,
50
+ "train_loss": 0.6928272205209627,
51
+ "train_runtime": 684.2817,
52
+ "train_samples_per_second": 18.527,
53
+ "train_steps_per_second": 4.634
54
  }
55
  ],
56
+ "max_steps": 3171,
57
  "num_train_epochs": 3,
58
+ "total_flos": 828180051001344.0,
59
  "trial_name": null,
60
  "trial_params": null
61
  }