duyvu8373 commited on
Commit
9bf5cf7
1 Parent(s): 65dbdff

Upload 12 files

Browse files
Files changed (5) hide show
  1. model.safetensors +1 -1
  2. optimizer.pt +3 -0
  3. rng_state.pth +3 -0
  4. scheduler.pt +3 -0
  5. trainer_state.json +82 -0
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:566154a4dd8ca5ddf6cdf02c4ec1b80dd864ac6c9a268f2ce54b2aca0fa9cb95
3
  size 903834408
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b45c7b495dc66a53e343991c9516e0d110bb2c061da4cec98096722e0f80f440
3
  size 903834408
optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5861ce6f7a1779157da89556e25eacdf319ceee413c09009fd5a928f39524cc5
3
+ size 1807824186
rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:16b713008e4eea7860540db04296a11a09d44804872b072ac510cef9f0391ff3
3
+ size 14244
scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d61a714e2d67c35f405d9bbd18b18087fe1e54b1e2e331d14c8409a81e182f79
3
+ size 1064
trainer_state.json ADDED
@@ -0,0 +1,82 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 5.4945054945054945,
5
+ "eval_steps": 500,
6
+ "global_step": 500,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 1.0,
13
+ "eval_bleu": 52.2825,
14
+ "eval_gen_len": 17.3764,
15
+ "eval_loss": 0.2236185073852539,
16
+ "eval_meteor": 0.7138,
17
+ "eval_runtime": 20.1266,
18
+ "eval_samples_per_second": 26.135,
19
+ "eval_steps_per_second": 0.845,
20
+ "step": 91
21
+ },
22
+ {
23
+ "epoch": 2.0,
24
+ "eval_bleu": 58.1966,
25
+ "eval_gen_len": 17.3175,
26
+ "eval_loss": 0.1642024964094162,
27
+ "eval_meteor": 0.7742,
28
+ "eval_runtime": 15.2591,
29
+ "eval_samples_per_second": 34.471,
30
+ "eval_steps_per_second": 1.114,
31
+ "step": 182
32
+ },
33
+ {
34
+ "epoch": 3.0,
35
+ "eval_bleu": 62.8589,
36
+ "eval_gen_len": 17.6293,
37
+ "eval_loss": 0.15181176364421844,
38
+ "eval_meteor": 0.7831,
39
+ "eval_runtime": 15.3539,
40
+ "eval_samples_per_second": 34.258,
41
+ "eval_steps_per_second": 1.107,
42
+ "step": 273
43
+ },
44
+ {
45
+ "epoch": 4.0,
46
+ "eval_bleu": 64.9987,
47
+ "eval_gen_len": 17.5798,
48
+ "eval_loss": 0.14291420578956604,
49
+ "eval_meteor": 0.8085,
50
+ "eval_runtime": 15.3569,
51
+ "eval_samples_per_second": 34.252,
52
+ "eval_steps_per_second": 1.107,
53
+ "step": 364
54
+ },
55
+ {
56
+ "epoch": 5.0,
57
+ "eval_bleu": 65.7474,
58
+ "eval_gen_len": 17.5171,
59
+ "eval_loss": 0.13588279485702515,
60
+ "eval_meteor": 0.821,
61
+ "eval_runtime": 15.5893,
62
+ "eval_samples_per_second": 33.741,
63
+ "eval_steps_per_second": 1.09,
64
+ "step": 455
65
+ },
66
+ {
67
+ "epoch": 5.49,
68
+ "learning_rate": 9.010989010989011e-06,
69
+ "loss": 0.2938,
70
+ "step": 500
71
+ }
72
+ ],
73
+ "logging_steps": 500,
74
+ "max_steps": 910,
75
+ "num_input_tokens_seen": 0,
76
+ "num_train_epochs": 10,
77
+ "save_steps": 500,
78
+ "total_flos": 1951729069178880.0,
79
+ "train_batch_size": 32,
80
+ "trial_name": null,
81
+ "trial_params": null
82
+ }