sekarmulyani commited on
Commit
381c55f
1 Parent(s): af28818

Upload 7 files

Browse files
Files changed (5) hide show
  1. config.json +1 -1
  2. pytorch_model.bin +1 -1
  3. rng_state.pth +1 -1
  4. scheduler.pt +1 -1
  5. trainer_state.json +57 -3
config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": ".",
3
  "activation_function": "gelu_new",
4
  "architectures": [
5
  "GPT2LMHeadModel"
 
1
  {
2
+ "_name_or_path": "flax-community/gpt2-small-indonesian",
3
  "activation_function": "gelu_new",
4
  "architectures": [
5
  "GPT2LMHeadModel"
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9cf4ecd8a5b06187890c4066192de56ba0f136e5e1f7946399af28c3e64711db
3
  size 497807197
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6e876a7b1ab0ee2bb64e9174fee3e0fc3e841c680d1aa89fa9ba2e64c6d25083
3
  size 497807197
rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1784c9e20ffdc46b706882695c2108245d7626a328b6d70a37d079ad1fbbc989
3
  size 14575
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6162bb9db25c89c41e126a7a00a5d0695219447bff9b18d08731531620758440
3
  size 14575
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fa302b087d13df1585059f19c40718ddcbe9f202f5ca1867898c19a359f17e46
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:139d564a4ff8b0110bedb5670b2421665ae2dae7192939285858c6b23b9de487
3
  size 627
trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 1.0,
5
  "eval_steps": 500,
6
- "global_step": 837,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -21,13 +21,67 @@
21
  "eval_samples_per_second": 42.083,
22
  "eval_steps_per_second": 5.275,
23
  "step": 837
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
  }
25
  ],
26
  "logging_steps": 500,
27
  "max_steps": 6696,
28
  "num_train_epochs": 8,
29
  "save_steps": 500,
30
- "total_flos": 1310836801536000.0,
31
  "trial_name": null,
32
  "trial_params": null
33
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 4.0,
5
  "eval_steps": 500,
6
+ "global_step": 3348,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
21
  "eval_samples_per_second": 42.083,
22
  "eval_steps_per_second": 5.275,
23
  "step": 837
24
+ },
25
+ {
26
+ "epoch": 1.19,
27
+ "learning_rate": 1.7013142174432496e-05,
28
+ "loss": 2.4595,
29
+ "step": 1000
30
+ },
31
+ {
32
+ "epoch": 1.79,
33
+ "learning_rate": 1.5519713261648747e-05,
34
+ "loss": 2.3818,
35
+ "step": 1500
36
+ },
37
+ {
38
+ "epoch": 2.0,
39
+ "eval_loss": 2.373106002807617,
40
+ "eval_runtime": 42.2281,
41
+ "eval_samples_per_second": 42.318,
42
+ "eval_steps_per_second": 5.305,
43
+ "step": 1674
44
+ },
45
+ {
46
+ "epoch": 2.39,
47
+ "learning_rate": 1.4026284348864996e-05,
48
+ "loss": 2.3449,
49
+ "step": 2000
50
+ },
51
+ {
52
+ "epoch": 2.99,
53
+ "learning_rate": 1.2532855436081244e-05,
54
+ "loss": 2.3116,
55
+ "step": 2500
56
+ },
57
+ {
58
+ "epoch": 3.0,
59
+ "eval_loss": 2.333420515060425,
60
+ "eval_runtime": 42.4622,
61
+ "eval_samples_per_second": 42.084,
62
+ "eval_steps_per_second": 5.275,
63
+ "step": 2511
64
+ },
65
+ {
66
+ "epoch": 3.58,
67
+ "learning_rate": 1.1039426523297491e-05,
68
+ "loss": 2.2842,
69
+ "step": 3000
70
+ },
71
+ {
72
+ "epoch": 4.0,
73
+ "eval_loss": 2.3103713989257812,
74
+ "eval_runtime": 42.488,
75
+ "eval_samples_per_second": 42.059,
76
+ "eval_steps_per_second": 5.272,
77
+ "step": 3348
78
  }
79
  ],
80
  "logging_steps": 500,
81
  "max_steps": 6696,
82
  "num_train_epochs": 8,
83
  "save_steps": 500,
84
+ "total_flos": 5243347206144000.0,
85
  "trial_name": null,
86
  "trial_params": null
87
  }