MHGanainy commited on
Commit
33166b9
·
verified ·
1 Parent(s): 62715cc

MHGanainy/gpt2-xl-lora-ecthr-random-imbalanced-skewed-cluster-8-id-0

Browse files
Files changed (5) hide show
  1. README.md +2 -0
  2. all_results.json +13 -0
  3. eval_results.json +8 -0
  4. train_results.json +8 -0
  5. trainer_state.json +133 -0
README.md CHANGED
@@ -15,6 +15,8 @@ should probably proofread and complete it, then remove this comment. -->
15
  # gpt2-xl-lora-ecthr-random-imbalanced-skewed-cluster-8-id-0
16
 
17
  This model is a fine-tuned version of [openai-community/gpt2-xl](https://huggingface.co/openai-community/gpt2-xl) on an unknown dataset.
 
 
18
 
19
  ## Model description
20
 
 
15
  # gpt2-xl-lora-ecthr-random-imbalanced-skewed-cluster-8-id-0
16
 
17
  This model is a fine-tuned version of [openai-community/gpt2-xl](https://huggingface.co/openai-community/gpt2-xl) on an unknown dataset.
18
+ It achieves the following results on the evaluation set:
19
+ - Loss: 2.1801
20
 
21
  ## Model description
22
 
all_results.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 1.0,
3
+ "eval_loss": 2.18013596534729,
4
+ "eval_runtime": 9.846,
5
+ "eval_samples_per_second": 22.141,
6
+ "eval_steps_per_second": 2.844,
7
+ "perplexity": 8.847509131596244,
8
+ "total_flos": 2.42426139967488e+16,
9
+ "train_loss": 2.3308336977473276,
10
+ "train_runtime": 355.7712,
11
+ "train_samples_per_second": 7.508,
12
+ "train_steps_per_second": 3.755
13
+ }
eval_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 1.0,
3
+ "eval_loss": 2.18013596534729,
4
+ "eval_runtime": 9.846,
5
+ "eval_samples_per_second": 22.141,
6
+ "eval_steps_per_second": 2.844,
7
+ "perplexity": 8.847509131596244
8
+ }
train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 1.0,
3
+ "total_flos": 2.42426139967488e+16,
4
+ "train_loss": 2.3308336977473276,
5
+ "train_runtime": 355.7712,
6
+ "train_samples_per_second": 7.508,
7
+ "train_steps_per_second": 3.755
8
+ }
trainer_state.json ADDED
@@ -0,0 +1,133 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 1.0,
5
+ "eval_steps": 500,
6
+ "global_step": 1336,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.0748502994011976,
13
+ "grad_norm": 0.05376848578453064,
14
+ "learning_rate": 2.0000000000000003e-06,
15
+ "loss": 2.4256,
16
+ "step": 100
17
+ },
18
+ {
19
+ "epoch": 0.1497005988023952,
20
+ "grad_norm": 0.10420384258031845,
21
+ "learning_rate": 4.000000000000001e-06,
22
+ "loss": 2.4157,
23
+ "step": 200
24
+ },
25
+ {
26
+ "epoch": 0.2245508982035928,
27
+ "grad_norm": 0.10769318789243698,
28
+ "learning_rate": 6e-06,
29
+ "loss": 2.4214,
30
+ "step": 300
31
+ },
32
+ {
33
+ "epoch": 0.2994011976047904,
34
+ "grad_norm": 0.22069992125034332,
35
+ "learning_rate": 8.000000000000001e-06,
36
+ "loss": 2.4348,
37
+ "step": 400
38
+ },
39
+ {
40
+ "epoch": 0.37425149700598803,
41
+ "grad_norm": 0.22704525291919708,
42
+ "learning_rate": 1e-05,
43
+ "loss": 2.3753,
44
+ "step": 500
45
+ },
46
+ {
47
+ "epoch": 0.4491017964071856,
48
+ "grad_norm": 0.26333746314048767,
49
+ "learning_rate": 1.2e-05,
50
+ "loss": 2.3588,
51
+ "step": 600
52
+ },
53
+ {
54
+ "epoch": 0.5239520958083832,
55
+ "grad_norm": 0.2797996699810028,
56
+ "learning_rate": 1.4e-05,
57
+ "loss": 2.3308,
58
+ "step": 700
59
+ },
60
+ {
61
+ "epoch": 0.5988023952095808,
62
+ "grad_norm": 0.3928057551383972,
63
+ "learning_rate": 1.6000000000000003e-05,
64
+ "loss": 2.3049,
65
+ "step": 800
66
+ },
67
+ {
68
+ "epoch": 0.6736526946107785,
69
+ "grad_norm": 0.4333711564540863,
70
+ "learning_rate": 1.8e-05,
71
+ "loss": 2.2758,
72
+ "step": 900
73
+ },
74
+ {
75
+ "epoch": 0.7485029940119761,
76
+ "grad_norm": 0.7743755578994751,
77
+ "learning_rate": 2e-05,
78
+ "loss": 2.2787,
79
+ "step": 1000
80
+ },
81
+ {
82
+ "epoch": 0.8233532934131736,
83
+ "grad_norm": 0.5272874236106873,
84
+ "learning_rate": 1.5938201855735017e-05,
85
+ "loss": 2.2501,
86
+ "step": 1100
87
+ },
88
+ {
89
+ "epoch": 0.8982035928143712,
90
+ "grad_norm": 0.5362841486930847,
91
+ "learning_rate": 7.052448255890958e-06,
92
+ "loss": 2.2681,
93
+ "step": 1200
94
+ },
95
+ {
96
+ "epoch": 0.9730538922155688,
97
+ "grad_norm": 0.5505067110061646,
98
+ "learning_rate": 5.611666969163243e-07,
99
+ "loss": 2.1974,
100
+ "step": 1300
101
+ },
102
+ {
103
+ "epoch": 1.0,
104
+ "step": 1336,
105
+ "total_flos": 2.42426139967488e+16,
106
+ "train_loss": 2.3308336977473276,
107
+ "train_runtime": 355.7712,
108
+ "train_samples_per_second": 7.508,
109
+ "train_steps_per_second": 3.755
110
+ }
111
+ ],
112
+ "logging_steps": 100,
113
+ "max_steps": 1336,
114
+ "num_input_tokens_seen": 0,
115
+ "num_train_epochs": 1,
116
+ "save_steps": 500,
117
+ "stateful_callbacks": {
118
+ "TrainerControl": {
119
+ "args": {
120
+ "should_epoch_stop": false,
121
+ "should_evaluate": false,
122
+ "should_log": false,
123
+ "should_save": true,
124
+ "should_training_stop": true
125
+ },
126
+ "attributes": {}
127
+ }
128
+ },
129
+ "total_flos": 2.42426139967488e+16,
130
+ "train_batch_size": 2,
131
+ "trial_name": null,
132
+ "trial_params": null
133
+ }