AlekseyKorshuk commited on
Commit
4dede35
1 Parent(s): 76014b7

huggingartists

Browse files
README.md CHANGED
@@ -45,15 +45,15 @@ from datasets import load_dataset
45
  dataset = load_dataset("huggingartists/bill-wurtz")
46
  ```
47
 
48
- [Explore the data](https://wandb.ai/huggingartists/huggingartists/runs/3m596wb1/artifacts), which is tracked with [W&B artifacts](https://docs.wandb.com/artifacts) at every step of the pipeline.
49
 
50
  ## Training procedure
51
 
52
  The model is based on a pre-trained [GPT-2](https://huggingface.co/gpt2) which is fine-tuned on Bill Wurtz's lyrics.
53
 
54
- Hyperparameters and metrics are recorded in the [W&B training run](https://wandb.ai/huggingartists/huggingartists/runs/tn25vkjp) for full transparency and reproducibility.
55
 
56
- At the end of training, [the final model](https://wandb.ai/huggingartists/huggingartists/runs/tn25vkjp/artifacts) is logged and versioned.
57
 
58
  ## How to use
59
 
45
  dataset = load_dataset("huggingartists/bill-wurtz")
46
  ```
47
 
48
+ [Explore the data](https://wandb.ai/huggingartists/huggingartists/runs/1y01b0sy/artifacts), which is tracked with [W&B artifacts](https://docs.wandb.com/artifacts) at every step of the pipeline.
49
 
50
  ## Training procedure
51
 
52
  The model is based on a pre-trained [GPT-2](https://huggingface.co/gpt2) which is fine-tuned on Bill Wurtz's lyrics.
53
 
54
+ Hyperparameters and metrics are recorded in the [W&B training run](https://wandb.ai/huggingartists/huggingartists/runs/1l9hr5w2) for full transparency and reproducibility.
55
 
56
+ At the end of training, [the final model](https://wandb.ai/huggingartists/huggingartists/runs/1l9hr5w2/artifacts) is logged and versioned.
57
 
58
  ## How to use
59
 
config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "gpt2",
3
  "activation_function": "gelu_new",
4
  "architectures": [
5
  "GPT2LMHeadModel"
1
  {
2
+ "_name_or_path": "huggingartists/bill-wurtz",
3
  "activation_function": "gelu_new",
4
  "architectures": [
5
  "GPT2LMHeadModel"
evaluation.txt CHANGED
@@ -1 +1 @@
1
- {"eval_loss": 3.056016445159912, "eval_runtime": 0.7021, "eval_samples_per_second": 38.458, "eval_steps_per_second": 5.698, "epoch": 5.0}
1
+ {"eval_loss": 4.039820671081543, "eval_runtime": 0.5683, "eval_samples_per_second": 36.954, "eval_steps_per_second": 5.279, "epoch": 22.0}
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:94a38c311db28275ebd22bd1a96b5f1ae0705768584e0b02b91c6d3f51f16d1a
3
  size 995603825
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aa1422428e38213576ff235f2001350e546a4c4643f2b2f472d00667df459b33
3
  size 995603825
rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1232337b23975f069a764f8e344e5636ceb4c40315428483d2aca49f6796a108
3
- size 14503
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e1d50a042836fb7244e3a57318b3664fd2cb3c84d1ecf895aeecf11cd286b9d1
3
+ size 14567
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e9d0d89fe51412bc7eecc43912be03d94b03466976a036bba499f0a41644d0f8
3
  size 623
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b1aee7c44e1d1ea5ba4c4593abd0231fd0c216d063ddd82618d0f2d2c0e4a5dc
3
  size 623
tokenizer_config.json CHANGED
@@ -1 +1 @@
1
- {"unk_token": "<|endoftext|>", "bos_token": "<|endoftext|>", "eos_token": "<|endoftext|>", "add_prefix_space": false, "model_max_length": 1024, "special_tokens_map_file": null, "name_or_path": "gpt2", "tokenizer_class": "GPT2Tokenizer"}
1
+ {"unk_token": "<|endoftext|>", "bos_token": "<|endoftext|>", "eos_token": "<|endoftext|>", "add_prefix_space": false, "model_max_length": 1024, "special_tokens_map_file": null, "name_or_path": "huggingartists/bill-wurtz", "tokenizer_class": "GPT2Tokenizer"}
trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "best_metric": 3.056016445159912,
3
- "best_model_checkpoint": "output/bill-wurtz/checkpoint-54",
4
- "epoch": 2.0,
5
- "global_step": 54,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -82,11 +82,151 @@
82
  "eval_samples_per_second": 42.746,
83
  "eval_steps_per_second": 6.333,
84
  "step": 54
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
85
  }
86
  ],
87
- "max_steps": 135,
88
- "num_train_epochs": 5,
89
- "total_flos": 56439078912000.0,
90
  "trial_name": null,
91
  "trial_params": null
92
  }
1
  {
2
+ "best_metric": 2.6812992095947266,
3
+ "best_model_checkpoint": "output/bill-wurtz/checkpoint-140",
4
+ "epoch": 5.0,
5
+ "global_step": 140,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
82
  "eval_samples_per_second": 42.746,
83
  "eval_steps_per_second": 6.333,
84
  "step": 54
85
+ },
86
+ {
87
+ "epoch": 1.96,
88
+ "learning_rate": 0.00013676865759867644,
89
+ "loss": 2.5878,
90
+ "step": 55
91
+ },
92
+ {
93
+ "epoch": 2.0,
94
+ "eval_loss": 2.762599468231201,
95
+ "eval_runtime": 0.5151,
96
+ "eval_samples_per_second": 40.769,
97
+ "eval_steps_per_second": 5.824,
98
+ "step": 56
99
+ },
100
+ {
101
+ "epoch": 2.14,
102
+ "learning_rate": 0.00013040646433810595,
103
+ "loss": 2.8115,
104
+ "step": 60
105
+ },
106
+ {
107
+ "epoch": 2.32,
108
+ "learning_rate": 0.00010509740044895205,
109
+ "loss": 2.5949,
110
+ "step": 65
111
+ },
112
+ {
113
+ "epoch": 2.5,
114
+ "learning_rate": 6.860000000000001e-05,
115
+ "loss": 2.6643,
116
+ "step": 70
117
+ },
118
+ {
119
+ "epoch": 2.68,
120
+ "learning_rate": 3.210259955104798e-05,
121
+ "loss": 2.5837,
122
+ "step": 75
123
+ },
124
+ {
125
+ "epoch": 2.86,
126
+ "learning_rate": 6.793535661894062e-06,
127
+ "loss": 2.7461,
128
+ "step": 80
129
+ },
130
+ {
131
+ "epoch": 3.0,
132
+ "eval_loss": 2.6894850730895996,
133
+ "eval_runtime": 0.5306,
134
+ "eval_samples_per_second": 39.579,
135
+ "eval_steps_per_second": 5.654,
136
+ "step": 84
137
+ },
138
+ {
139
+ "epoch": 3.04,
140
+ "learning_rate": 4.313424013235498e-07,
141
+ "loss": 2.4693,
142
+ "step": 85
143
+ },
144
+ {
145
+ "epoch": 3.21,
146
+ "learning_rate": 1.496636030269314e-05,
147
+ "loss": 2.4734,
148
+ "step": 90
149
+ },
150
+ {
151
+ "epoch": 3.39,
152
+ "learning_rate": 4.594285634987545e-05,
153
+ "loss": 2.5555,
154
+ "step": 95
155
+ },
156
+ {
157
+ "epoch": 3.57,
158
+ "learning_rate": 8.386493606940314e-05,
159
+ "loss": 2.498,
160
+ "step": 100
161
+ },
162
+ {
163
+ "epoch": 3.75,
164
+ "learning_rate": 0.0001171075251893971,
165
+ "loss": 2.3626,
166
+ "step": 105
167
+ },
168
+ {
169
+ "epoch": 3.93,
170
+ "learning_rate": 0.0001354800547756731,
171
+ "loss": 2.4603,
172
+ "step": 110
173
+ },
174
+ {
175
+ "epoch": 4.0,
176
+ "eval_loss": 2.6920154094696045,
177
+ "eval_runtime": 0.5456,
178
+ "eval_samples_per_second": 38.49,
179
+ "eval_steps_per_second": 5.499,
180
+ "step": 112
181
+ },
182
+ {
183
+ "epoch": 4.11,
184
+ "learning_rate": 0.00013335039645915404,
185
+ "loss": 2.3237,
186
+ "step": 115
187
+ },
188
+ {
189
+ "epoch": 4.29,
190
+ "learning_rate": 0.00011137140040750914,
191
+ "loss": 2.2052,
192
+ "step": 120
193
+ },
194
+ {
195
+ "epoch": 4.46,
196
+ "learning_rate": 7.62807630606869e-05,
197
+ "loss": 2.2121,
198
+ "step": 125
199
+ },
200
+ {
201
+ "epoch": 4.64,
202
+ "learning_rate": 3.883557549653544e-05,
203
+ "loss": 2.2551,
204
+ "step": 130
205
+ },
206
+ {
207
+ "epoch": 4.82,
208
+ "learning_rate": 1.0514719932939762e-05,
209
+ "loss": 2.2849,
210
+ "step": 135
211
+ },
212
+ {
213
+ "epoch": 5.0,
214
+ "learning_rate": 0.0,
215
+ "loss": 2.2654,
216
+ "step": 140
217
+ },
218
+ {
219
+ "epoch": 5.0,
220
+ "eval_loss": 2.6812992095947266,
221
+ "eval_runtime": 0.5264,
222
+ "eval_samples_per_second": 39.891,
223
+ "eval_steps_per_second": 5.699,
224
+ "step": 140
225
  }
226
  ],
227
+ "max_steps": 616,
228
+ "num_train_epochs": 22,
229
+ "total_flos": 145278369792000.0,
230
  "trial_name": null,
231
  "trial_params": null
232
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:74110011f543625430a4ba42009eb3a204c303837c8ad074906c1a42d538d532
3
  size 2671
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:41eee99e83d6fdcd4ae4efff4b1f5e17120c0670785282724daf91004a53de18
3
  size 2671