AbinayaM02 commited on
Commit
ea3104c
1 Parent(s): a8c2f96

Updated model

Browse files
Files changed (27) hide show
  1. gpt-2-tamil/flax_model.msgpack +1 -1
  2. src/wandb/latest-run +1 -0
  3. src/wandb/run-20210712_164633-1ddv4131/run-1ddv4131.wandb +3 -0
  4. src/wandb/run-20210715_080856-2mpx5n1j/files/config.yaml +305 -0
  5. src/wandb/run-20210715_080856-2mpx5n1j/files/events.out.tfevents.1626336540.t1v-n-ebe36c53-w-0.751183.3.v2 +1 -0
  6. src/wandb/run-20210715_080856-2mpx5n1j/files/requirements.txt +123 -0
  7. src/wandb/run-20210715_080856-2mpx5n1j/files/wandb-metadata.json +49 -0
  8. src/wandb/run-20210715_080856-2mpx5n1j/files/wandb-summary.json +1 -0
  9. src/wandb/run-20210715_080856-2mpx5n1j/run-2mpx5n1j.wandb +3 -0
  10. src/wandb/run-20210715_085943-1ize2alk/files/config.yaml +301 -0
  11. src/wandb/run-20210715_085943-1ize2alk/files/events.out.tfevents.1626339585.t1v-n-ebe36c53-w-0.759145.3.v2 +1 -0
  12. src/wandb/run-20210715_085943-1ize2alk/files/requirements.txt +123 -0
  13. src/wandb/run-20210715_085943-1ize2alk/files/wandb-metadata.json +49 -0
  14. src/wandb/run-20210715_085943-1ize2alk/files/wandb-summary.json +1 -0
  15. src/wandb/run-20210715_085943-1ize2alk/run-1ize2alk.wandb +3 -0
  16. src/wandb/run-20210715_091856-2v0tf7h4/files/config.yaml +305 -0
  17. src/wandb/run-20210715_091856-2v0tf7h4/files/events.out.tfevents.1626340740.t1v-n-ebe36c53-w-0.765413.3.v2 +1 -0
  18. src/wandb/run-20210715_091856-2v0tf7h4/files/requirements.txt +123 -0
  19. src/wandb/run-20210715_091856-2v0tf7h4/files/wandb-metadata.json +49 -0
  20. src/wandb/run-20210715_091856-2v0tf7h4/files/wandb-summary.json +1 -0
  21. src/wandb/run-20210715_091856-2v0tf7h4/run-2v0tf7h4.wandb +3 -0
  22. src/wandb/run-20210715_092837-watdq7ib/files/config.yaml +301 -0
  23. src/wandb/run-20210715_092837-watdq7ib/files/events.out.tfevents.1626341319.t1v-n-ebe36c53-w-0.768105.3.v2 +1 -0
  24. src/wandb/run-20210715_092837-watdq7ib/files/requirements.txt +123 -0
  25. src/wandb/run-20210715_092837-watdq7ib/files/wandb-metadata.json +49 -0
  26. src/wandb/run-20210715_092837-watdq7ib/files/wandb-summary.json +1 -0
  27. src/wandb/run-20210715_092837-watdq7ib/run-watdq7ib.wandb +3 -0
gpt-2-tamil/flax_model.msgpack CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a2d3c0a2be67a4eaacda46f9a3390f57fa5997222765b6bea6d3bd8c1dcc9b87
3
  size 497764120
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:89396995064d16071519a20c2771d661400da8c3d644966f0a586d299d1b2fa3
3
  size 497764120
src/wandb/latest-run ADDED
@@ -0,0 +1 @@
 
 
1
+ run-20210715_092837-watdq7ib
src/wandb/run-20210712_164633-1ddv4131/run-1ddv4131.wandb ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d8211487b4d0a0489ae4728120abad1be7ee4190520afc47fdae166087ae6068
3
+ size 60817322
src/wandb/run-20210715_080856-2mpx5n1j/files/config.yaml ADDED
@@ -0,0 +1,305 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ wandb_version: 1
2
+
3
+ __cached__setup_devices:
4
+ desc: null
5
+ value: cpu
6
+ _n_gpu:
7
+ desc: null
8
+ value: 0
9
+ _wandb:
10
+ desc: null
11
+ value:
12
+ cli_version: 0.10.33
13
+ framework: huggingface
14
+ huggingface_version: 4.9.0.dev0
15
+ is_jupyter_run: false
16
+ is_kaggle_kernel: false
17
+ python_version: 3.8.10
18
+ t:
19
+ 1:
20
+ - 1
21
+ - 3
22
+ - 11
23
+ 2:
24
+ - 1
25
+ - 3
26
+ - 11
27
+ 4: 3.8.10
28
+ 5: 0.10.33
29
+ 6: 4.9.0.dev0
30
+ 8:
31
+ - 5
32
+ adafactor:
33
+ desc: null
34
+ value: false
35
+ adam_beta1:
36
+ desc: null
37
+ value: 0.9
38
+ adam_beta2:
39
+ desc: null
40
+ value: 0.98
41
+ adam_epsilon:
42
+ desc: null
43
+ value: 1.0e-08
44
+ block_size:
45
+ desc: null
46
+ value: 512
47
+ cache_dir:
48
+ desc: null
49
+ value: null
50
+ config_name:
51
+ desc: null
52
+ value: ../gpt-2-tamil
53
+ dataloader_drop_last:
54
+ desc: null
55
+ value: false
56
+ dataloader_num_workers:
57
+ desc: null
58
+ value: 0
59
+ dataloader_pin_memory:
60
+ desc: null
61
+ value: true
62
+ dataset_config_name:
63
+ desc: null
64
+ value: unshuffled_deduplicated_ta
65
+ dataset_name:
66
+ desc: null
67
+ value: oscar
68
+ ddp_find_unused_parameters:
69
+ desc: null
70
+ value: null
71
+ debug:
72
+ desc: null
73
+ value: []
74
+ deepspeed:
75
+ desc: null
76
+ value: null
77
+ disable_tqdm:
78
+ desc: null
79
+ value: false
80
+ do_eval:
81
+ desc: null
82
+ value: true
83
+ do_predict:
84
+ desc: null
85
+ value: false
86
+ do_train:
87
+ desc: null
88
+ value: true
89
+ dtype:
90
+ desc: null
91
+ value: float32
92
+ eval_accumulation_steps:
93
+ desc: null
94
+ value: null
95
+ eval_steps:
96
+ desc: null
97
+ value: 2500
98
+ evaluation_strategy:
99
+ desc: null
100
+ value: IntervalStrategy.NO
101
+ fp16:
102
+ desc: null
103
+ value: false
104
+ fp16_backend:
105
+ desc: null
106
+ value: auto
107
+ fp16_full_eval:
108
+ desc: null
109
+ value: false
110
+ fp16_opt_level:
111
+ desc: null
112
+ value: O1
113
+ gradient_accumulation_steps:
114
+ desc: null
115
+ value: 1
116
+ greater_is_better:
117
+ desc: null
118
+ value: null
119
+ group_by_length:
120
+ desc: null
121
+ value: false
122
+ ignore_data_skip:
123
+ desc: null
124
+ value: false
125
+ label_names:
126
+ desc: null
127
+ value: null
128
+ label_smoothing_factor:
129
+ desc: null
130
+ value: 0.0
131
+ learning_rate:
132
+ desc: null
133
+ value: 3.0e-05
134
+ length_column_name:
135
+ desc: null
136
+ value: length
137
+ load_best_model_at_end:
138
+ desc: null
139
+ value: false
140
+ local_rank:
141
+ desc: null
142
+ value: -1
143
+ log_level:
144
+ desc: null
145
+ value: -1
146
+ log_level_replica:
147
+ desc: null
148
+ value: -1
149
+ log_on_each_node:
150
+ desc: null
151
+ value: true
152
+ logging_dir:
153
+ desc: null
154
+ value: ../gpt-2-tamil/runs/Jul15_06-31-48_t1v-n-ebe36c53-w-0
155
+ logging_first_step:
156
+ desc: null
157
+ value: false
158
+ logging_steps:
159
+ desc: null
160
+ value: 500
161
+ logging_strategy:
162
+ desc: null
163
+ value: IntervalStrategy.STEPS
164
+ lr_scheduler_type:
165
+ desc: null
166
+ value: SchedulerType.LINEAR
167
+ max_eval_samples:
168
+ desc: null
169
+ value: null
170
+ max_grad_norm:
171
+ desc: null
172
+ value: 1.0
173
+ max_steps:
174
+ desc: null
175
+ value: -1
176
+ max_train_samples:
177
+ desc: null
178
+ value: null
179
+ metric_for_best_model:
180
+ desc: null
181
+ value: null
182
+ model_name_or_path:
183
+ desc: null
184
+ value: null
185
+ model_type:
186
+ desc: null
187
+ value: gpt2
188
+ mp_parameters:
189
+ desc: null
190
+ value: ''
191
+ no_cuda:
192
+ desc: null
193
+ value: false
194
+ num_train_epochs:
195
+ desc: null
196
+ value: 10.0
197
+ output_dir:
198
+ desc: null
199
+ value: ../gpt-2-tamil
200
+ overwrite_cache:
201
+ desc: null
202
+ value: false
203
+ overwrite_output_dir:
204
+ desc: null
205
+ value: true
206
+ past_index:
207
+ desc: null
208
+ value: -1
209
+ per_device_eval_batch_size:
210
+ desc: null
211
+ value: 128
212
+ per_device_train_batch_size:
213
+ desc: null
214
+ value: 128
215
+ per_gpu_eval_batch_size:
216
+ desc: null
217
+ value: null
218
+ per_gpu_train_batch_size:
219
+ desc: null
220
+ value: null
221
+ prediction_loss_only:
222
+ desc: null
223
+ value: false
224
+ preprocessing_num_workers:
225
+ desc: null
226
+ value: 90
227
+ push_to_hub:
228
+ desc: null
229
+ value: false
230
+ push_to_hub_model_id:
231
+ desc: null
232
+ value: gpt-2-tamil
233
+ push_to_hub_organization:
234
+ desc: null
235
+ value: null
236
+ push_to_hub_token:
237
+ desc: null
238
+ value: null
239
+ remove_unused_columns:
240
+ desc: null
241
+ value: true
242
+ report_to:
243
+ desc: null
244
+ value:
245
+ - wandb
246
+ resume_from_checkpoint:
247
+ desc: null
248
+ value: null
249
+ run_name:
250
+ desc: null
251
+ value: trial
252
+ save_on_each_node:
253
+ desc: null
254
+ value: false
255
+ save_steps:
256
+ desc: null
257
+ value: 2500
258
+ save_strategy:
259
+ desc: null
260
+ value: IntervalStrategy.STEPS
261
+ save_total_limit:
262
+ desc: null
263
+ value: null
264
+ seed:
265
+ desc: null
266
+ value: 42
267
+ sharded_ddp:
268
+ desc: null
269
+ value: []
270
+ skip_memory_metrics:
271
+ desc: null
272
+ value: true
273
+ tokenizer_name:
274
+ desc: null
275
+ value: ../gpt-2-tamil
276
+ tpu_metrics_debug:
277
+ desc: null
278
+ value: false
279
+ tpu_num_cores:
280
+ desc: null
281
+ value: null
282
+ train_file:
283
+ desc: null
284
+ value: null
285
+ use_fast_tokenizer:
286
+ desc: null
287
+ value: true
288
+ use_legacy_prediction_loop:
289
+ desc: null
290
+ value: false
291
+ validation_file:
292
+ desc: null
293
+ value: null
294
+ validation_split_percentage:
295
+ desc: null
296
+ value: 5
297
+ warmup_ratio:
298
+ desc: null
299
+ value: 0.0
300
+ warmup_steps:
301
+ desc: null
302
+ value: 1000
303
+ weight_decay:
304
+ desc: null
305
+ value: 0.01
src/wandb/run-20210715_080856-2mpx5n1j/files/events.out.tfevents.1626336540.t1v-n-ebe36c53-w-0.751183.3.v2 ADDED
@@ -0,0 +1 @@
 
 
1
+ /home/tweety_abi/GPT2-Tamil/gpt-2-tamil/events.out.tfevents.1626336540.t1v-n-ebe36c53-w-0.751183.3.v2
src/wandb/run-20210715_080856-2mpx5n1j/files/requirements.txt ADDED
@@ -0,0 +1,123 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ absl-py==0.13.0
2
+ aiohttp==3.7.4.post0
3
+ appdirs==1.4.4
4
+ astunparse==1.6.3
5
+ async-timeout==3.0.1
6
+ attrs==21.2.0
7
+ backcall==0.2.0
8
+ black==21.6b0
9
+ cachetools==4.2.2
10
+ certifi==2021.5.30
11
+ cfgv==3.3.0
12
+ chardet==4.0.0
13
+ chex==0.0.7
14
+ click==8.0.1
15
+ configparser==5.0.2
16
+ cycler==0.10.0
17
+ datasets==1.8.1.dev0
18
+ decorator==5.0.9
19
+ dill==0.3.4
20
+ distlib==0.3.2
21
+ dm-tree==0.1.6
22
+ docker-pycreds==0.4.0
23
+ filelock==3.0.12
24
+ flake8==3.9.2
25
+ flatbuffers==1.12
26
+ flax==0.3.4
27
+ fsspec==2021.6.1
28
+ gast==0.4.0
29
+ gitdb==4.0.7
30
+ gitpython==3.1.18
31
+ google-auth-oauthlib==0.4.4
32
+ google-auth==1.32.1
33
+ google-pasta==0.2.0
34
+ grpcio==1.34.1
35
+ h5py==3.1.0
36
+ huggingface-hub==0.0.12
37
+ identify==2.2.10
38
+ idna==2.10
39
+ ipython-genutils==0.2.0
40
+ ipython==7.25.0
41
+ isort==5.9.1
42
+ jax==0.2.16
43
+ jaxlib==0.1.68
44
+ jedi==0.18.0
45
+ joblib==1.0.1
46
+ keras-nightly==2.5.0.dev2021032900
47
+ keras-preprocessing==1.1.2
48
+ kiwisolver==1.3.1
49
+ libtpu-nightly==0.1.dev20210615
50
+ markdown==3.3.4
51
+ matplotlib-inline==0.1.2
52
+ matplotlib==3.4.2
53
+ mccabe==0.6.1
54
+ msgpack==1.0.2
55
+ multidict==5.1.0
56
+ multiprocess==0.70.12.2
57
+ mypy-extensions==0.4.3
58
+ nodeenv==1.6.0
59
+ numpy==1.19.5
60
+ oauthlib==3.1.1
61
+ opt-einsum==3.3.0
62
+ optax==0.0.8
63
+ packaging==20.9
64
+ pandas==1.2.5
65
+ parso==0.8.2
66
+ pathspec==0.8.1
67
+ pathtools==0.1.2
68
+ pexpect==4.8.0
69
+ pickleshare==0.7.5
70
+ pillow==8.3.0
71
+ pip==20.0.2
72
+ pkg-resources==0.0.0
73
+ pre-commit==2.13.0
74
+ promise==2.3
75
+ prompt-toolkit==3.0.19
76
+ protobuf==3.17.3
77
+ psutil==5.8.0
78
+ ptyprocess==0.7.0
79
+ pyarrow==4.0.1
80
+ pyasn1-modules==0.2.8
81
+ pyasn1==0.4.8
82
+ pycodestyle==2.7.0
83
+ pyflakes==2.3.1
84
+ pygments==2.9.0
85
+ pyparsing==2.4.7
86
+ python-dateutil==2.8.1
87
+ pytz==2021.1
88
+ pyyaml==5.4.1
89
+ regex==2021.7.1
90
+ requests-oauthlib==1.3.0
91
+ requests==2.25.1
92
+ rsa==4.7.2
93
+ sacremoses==0.0.45
94
+ scipy==1.7.0
95
+ sentry-sdk==1.3.0
96
+ setuptools==44.0.0
97
+ shortuuid==1.0.1
98
+ six==1.15.0
99
+ smmap==4.0.0
100
+ subprocess32==3.5.4
101
+ tensorboard-data-server==0.6.1
102
+ tensorboard-plugin-wit==1.8.0
103
+ tensorboard==2.5.0
104
+ tensorflow-estimator==2.5.0
105
+ tensorflow==2.5.0
106
+ termcolor==1.1.0
107
+ tokenizers==0.10.3
108
+ toml==0.10.2
109
+ toolz==0.11.1
110
+ torch==1.9.0
111
+ tqdm==4.61.1
112
+ traitlets==5.0.5
113
+ transformers==4.9.0.dev0
114
+ typing-extensions==3.7.4.3
115
+ urllib3==1.26.6
116
+ virtualenv==20.4.7
117
+ wandb==0.10.33
118
+ wcwidth==0.2.5
119
+ werkzeug==2.0.1
120
+ wheel==0.36.2
121
+ wrapt==1.12.1
122
+ xxhash==2.0.2
123
+ yarl==1.6.3
src/wandb/run-20210715_080856-2mpx5n1j/files/wandb-metadata.json ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "os": "Linux-5.4.0-1043-gcp-x86_64-with-glibc2.29",
3
+ "python": "3.8.10",
4
+ "heartbeatAt": "2021-07-15T08:09:00.134255",
5
+ "startedAt": "2021-07-15T08:08:56.269238",
6
+ "docker": null,
7
+ "cpu_count": 96,
8
+ "cuda": null,
9
+ "args": [
10
+ "--output_dir=../gpt-2-tamil",
11
+ "--model_type=gpt2",
12
+ "--config_name=../gpt-2-tamil",
13
+ "--tokenizer_name=../gpt-2-tamil",
14
+ "--dataset_name=oscar",
15
+ "--dataset_config_name=unshuffled_deduplicated_ta",
16
+ "--do_train",
17
+ "--do_eval",
18
+ "--block_size=512",
19
+ "--per_device_train_batch_size=128",
20
+ "--per_device_eval_batch_size=128",
21
+ "--learning_rate=3e-5",
22
+ "--warmup_steps=1000",
23
+ "--adam_beta1=0.9",
24
+ "--adam_beta2=0.98",
25
+ "--weight_decay=0.01",
26
+ "--overwrite_output_dir",
27
+ "--num_train_epochs=10",
28
+ "--report_to",
29
+ "wandb",
30
+ "--run_name",
31
+ "trial",
32
+ "--logging_steps=500",
33
+ "--save_steps=2500",
34
+ "--eval_steps=2500",
35
+ "--preprocessing_num_workers=90"
36
+ ],
37
+ "state": "running",
38
+ "program": "../src/run_clm_flax.py",
39
+ "codePath": "src/run_clm_flax.py",
40
+ "git": {
41
+ "remote": "https://github.com/AbinayaM02/GPT2-Tamil.git",
42
+ "commit": "69c9b7bf75b708a8f62cf5833d1b89acf5d1760b"
43
+ },
44
+ "email": "abinaya.m02@mphasis.com",
45
+ "root": "/home/tweety_abi/GPT2-Tamil",
46
+ "host": "t1v-n-ebe36c53-w-0",
47
+ "username": "tweety_abi",
48
+ "executable": "/home/tweety_abi/gpt2_env/bin/python"
49
+ }
src/wandb/run-20210715_080856-2mpx5n1j/files/wandb-summary.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {}
src/wandb/run-20210715_080856-2mpx5n1j/run-2mpx5n1j.wandb ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ad2816f7f07dec6835ab15fdfb6fa81ca124f1b3f1dfbaccb9b2f3658286d158
3
+ size 38211
src/wandb/run-20210715_085943-1ize2alk/files/config.yaml ADDED
@@ -0,0 +1,301 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ wandb_version: 1
2
+
3
+ __cached__setup_devices:
4
+ desc: null
5
+ value: cpu
6
+ _n_gpu:
7
+ desc: null
8
+ value: 0
9
+ _wandb:
10
+ desc: null
11
+ value:
12
+ cli_version: 0.10.33
13
+ framework: huggingface
14
+ huggingface_version: 4.9.0.dev0
15
+ is_jupyter_run: false
16
+ is_kaggle_kernel: false
17
+ python_version: 3.8.10
18
+ t:
19
+ 1:
20
+ - 1
21
+ - 3
22
+ - 11
23
+ 4: 3.8.10
24
+ 5: 0.10.33
25
+ 6: 4.9.0.dev0
26
+ 8:
27
+ - 5
28
+ adafactor:
29
+ desc: null
30
+ value: false
31
+ adam_beta1:
32
+ desc: null
33
+ value: 0.9
34
+ adam_beta2:
35
+ desc: null
36
+ value: 0.98
37
+ adam_epsilon:
38
+ desc: null
39
+ value: 1.0e-08
40
+ block_size:
41
+ desc: null
42
+ value: 512
43
+ cache_dir:
44
+ desc: null
45
+ value: null
46
+ config_name:
47
+ desc: null
48
+ value: ../gpt-2-tamil
49
+ dataloader_drop_last:
50
+ desc: null
51
+ value: false
52
+ dataloader_num_workers:
53
+ desc: null
54
+ value: 0
55
+ dataloader_pin_memory:
56
+ desc: null
57
+ value: true
58
+ dataset_config_name:
59
+ desc: null
60
+ value: unshuffled_deduplicated_ta
61
+ dataset_name:
62
+ desc: null
63
+ value: oscar
64
+ ddp_find_unused_parameters:
65
+ desc: null
66
+ value: null
67
+ debug:
68
+ desc: null
69
+ value: []
70
+ deepspeed:
71
+ desc: null
72
+ value: null
73
+ disable_tqdm:
74
+ desc: null
75
+ value: false
76
+ do_eval:
77
+ desc: null
78
+ value: true
79
+ do_predict:
80
+ desc: null
81
+ value: false
82
+ do_train:
83
+ desc: null
84
+ value: true
85
+ dtype:
86
+ desc: null
87
+ value: float32
88
+ eval_accumulation_steps:
89
+ desc: null
90
+ value: null
91
+ eval_steps:
92
+ desc: null
93
+ value: 2500
94
+ evaluation_strategy:
95
+ desc: null
96
+ value: IntervalStrategy.NO
97
+ fp16:
98
+ desc: null
99
+ value: false
100
+ fp16_backend:
101
+ desc: null
102
+ value: auto
103
+ fp16_full_eval:
104
+ desc: null
105
+ value: false
106
+ fp16_opt_level:
107
+ desc: null
108
+ value: O1
109
+ gradient_accumulation_steps:
110
+ desc: null
111
+ value: 1
112
+ greater_is_better:
113
+ desc: null
114
+ value: null
115
+ group_by_length:
116
+ desc: null
117
+ value: false
118
+ ignore_data_skip:
119
+ desc: null
120
+ value: false
121
+ label_names:
122
+ desc: null
123
+ value: null
124
+ label_smoothing_factor:
125
+ desc: null
126
+ value: 0.0
127
+ learning_rate:
128
+ desc: null
129
+ value: 3.0e-05
130
+ length_column_name:
131
+ desc: null
132
+ value: length
133
+ load_best_model_at_end:
134
+ desc: null
135
+ value: false
136
+ local_rank:
137
+ desc: null
138
+ value: -1
139
+ log_level:
140
+ desc: null
141
+ value: -1
142
+ log_level_replica:
143
+ desc: null
144
+ value: -1
145
+ log_on_each_node:
146
+ desc: null
147
+ value: true
148
+ logging_dir:
149
+ desc: null
150
+ value: ../gpt-2-tamil/runs/Jul15_07-55-49_t1v-n-ebe36c53-w-0
151
+ logging_first_step:
152
+ desc: null
153
+ value: false
154
+ logging_steps:
155
+ desc: null
156
+ value: 500
157
+ logging_strategy:
158
+ desc: null
159
+ value: IntervalStrategy.STEPS
160
+ lr_scheduler_type:
161
+ desc: null
162
+ value: SchedulerType.LINEAR
163
+ max_eval_samples:
164
+ desc: null
165
+ value: null
166
+ max_grad_norm:
167
+ desc: null
168
+ value: 1.0
169
+ max_steps:
170
+ desc: null
171
+ value: -1
172
+ max_train_samples:
173
+ desc: null
174
+ value: null
175
+ metric_for_best_model:
176
+ desc: null
177
+ value: null
178
+ model_name_or_path:
179
+ desc: null
180
+ value: null
181
+ model_type:
182
+ desc: null
183
+ value: gpt2
184
+ mp_parameters:
185
+ desc: null
186
+ value: ''
187
+ no_cuda:
188
+ desc: null
189
+ value: false
190
+ num_train_epochs:
191
+ desc: null
192
+ value: 10.0
193
+ output_dir:
194
+ desc: null
195
+ value: ../gpt-2-tamil
196
+ overwrite_cache:
197
+ desc: null
198
+ value: false
199
+ overwrite_output_dir:
200
+ desc: null
201
+ value: true
202
+ past_index:
203
+ desc: null
204
+ value: -1
205
+ per_device_eval_batch_size:
206
+ desc: null
207
+ value: 128
208
+ per_device_train_batch_size:
209
+ desc: null
210
+ value: 128
211
+ per_gpu_eval_batch_size:
212
+ desc: null
213
+ value: null
214
+ per_gpu_train_batch_size:
215
+ desc: null
216
+ value: null
217
+ prediction_loss_only:
218
+ desc: null
219
+ value: false
220
+ preprocessing_num_workers:
221
+ desc: null
222
+ value: 90
223
+ push_to_hub:
224
+ desc: null
225
+ value: false
226
+ push_to_hub_model_id:
227
+ desc: null
228
+ value: gpt-2-tamil
229
+ push_to_hub_organization:
230
+ desc: null
231
+ value: null
232
+ push_to_hub_token:
233
+ desc: null
234
+ value: null
235
+ remove_unused_columns:
236
+ desc: null
237
+ value: true
238
+ report_to:
239
+ desc: null
240
+ value:
241
+ - wandb
242
+ resume_from_checkpoint:
243
+ desc: null
244
+ value: null
245
+ run_name:
246
+ desc: null
247
+ value: trial
248
+ save_on_each_node:
249
+ desc: null
250
+ value: false
251
+ save_steps:
252
+ desc: null
253
+ value: 2500
254
+ save_strategy:
255
+ desc: null
256
+ value: IntervalStrategy.STEPS
257
+ save_total_limit:
258
+ desc: null
259
+ value: null
260
+ seed:
261
+ desc: null
262
+ value: 42
263
+ sharded_ddp:
264
+ desc: null
265
+ value: []
266
+ skip_memory_metrics:
267
+ desc: null
268
+ value: true
269
+ tokenizer_name:
270
+ desc: null
271
+ value: ../gpt-2-tamil
272
+ tpu_metrics_debug:
273
+ desc: null
274
+ value: false
275
+ tpu_num_cores:
276
+ desc: null
277
+ value: null
278
+ train_file:
279
+ desc: null
280
+ value: null
281
+ use_fast_tokenizer:
282
+ desc: null
283
+ value: true
284
+ use_legacy_prediction_loop:
285
+ desc: null
286
+ value: false
287
+ validation_file:
288
+ desc: null
289
+ value: null
290
+ validation_split_percentage:
291
+ desc: null
292
+ value: 5
293
+ warmup_ratio:
294
+ desc: null
295
+ value: 0.0
296
+ warmup_steps:
297
+ desc: null
298
+ value: 1000
299
+ weight_decay:
300
+ desc: null
301
+ value: 0.01
src/wandb/run-20210715_085943-1ize2alk/files/events.out.tfevents.1626339585.t1v-n-ebe36c53-w-0.759145.3.v2 ADDED
@@ -0,0 +1 @@
 
 
1
+ /home/tweety_abi/GPT2-Tamil/gpt-2-tamil/events.out.tfevents.1626339585.t1v-n-ebe36c53-w-0.759145.3.v2
src/wandb/run-20210715_085943-1ize2alk/files/requirements.txt ADDED
@@ -0,0 +1,123 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ absl-py==0.13.0
2
+ aiohttp==3.7.4.post0
3
+ appdirs==1.4.4
4
+ astunparse==1.6.3
5
+ async-timeout==3.0.1
6
+ attrs==21.2.0
7
+ backcall==0.2.0
8
+ black==21.6b0
9
+ cachetools==4.2.2
10
+ certifi==2021.5.30
11
+ cfgv==3.3.0
12
+ chardet==4.0.0
13
+ chex==0.0.7
14
+ click==8.0.1
15
+ configparser==5.0.2
16
+ cycler==0.10.0
17
+ datasets==1.8.1.dev0
18
+ decorator==5.0.9
19
+ dill==0.3.4
20
+ distlib==0.3.2
21
+ dm-tree==0.1.6
22
+ docker-pycreds==0.4.0
23
+ filelock==3.0.12
24
+ flake8==3.9.2
25
+ flatbuffers==1.12
26
+ flax==0.3.4
27
+ fsspec==2021.6.1
28
+ gast==0.4.0
29
+ gitdb==4.0.7
30
+ gitpython==3.1.18
31
+ google-auth-oauthlib==0.4.4
32
+ google-auth==1.32.1
33
+ google-pasta==0.2.0
34
+ grpcio==1.34.1
35
+ h5py==3.1.0
36
+ huggingface-hub==0.0.12
37
+ identify==2.2.10
38
+ idna==2.10
39
+ ipython-genutils==0.2.0
40
+ ipython==7.25.0
41
+ isort==5.9.1
42
+ jax==0.2.16
43
+ jaxlib==0.1.68
44
+ jedi==0.18.0
45
+ joblib==1.0.1
46
+ keras-nightly==2.5.0.dev2021032900
47
+ keras-preprocessing==1.1.2
48
+ kiwisolver==1.3.1
49
+ libtpu-nightly==0.1.dev20210615
50
+ markdown==3.3.4
51
+ matplotlib-inline==0.1.2
52
+ matplotlib==3.4.2
53
+ mccabe==0.6.1
54
+ msgpack==1.0.2
55
+ multidict==5.1.0
56
+ multiprocess==0.70.12.2
57
+ mypy-extensions==0.4.3
58
+ nodeenv==1.6.0
59
+ numpy==1.19.5
60
+ oauthlib==3.1.1
61
+ opt-einsum==3.3.0
62
+ optax==0.0.8
63
+ packaging==20.9
64
+ pandas==1.2.5
65
+ parso==0.8.2
66
+ pathspec==0.8.1
67
+ pathtools==0.1.2
68
+ pexpect==4.8.0
69
+ pickleshare==0.7.5
70
+ pillow==8.3.0
71
+ pip==20.0.2
72
+ pkg-resources==0.0.0
73
+ pre-commit==2.13.0
74
+ promise==2.3
75
+ prompt-toolkit==3.0.19
76
+ protobuf==3.17.3
77
+ psutil==5.8.0
78
+ ptyprocess==0.7.0
79
+ pyarrow==4.0.1
80
+ pyasn1-modules==0.2.8
81
+ pyasn1==0.4.8
82
+ pycodestyle==2.7.0
83
+ pyflakes==2.3.1
84
+ pygments==2.9.0
85
+ pyparsing==2.4.7
86
+ python-dateutil==2.8.1
87
+ pytz==2021.1
88
+ pyyaml==5.4.1
89
+ regex==2021.7.1
90
+ requests-oauthlib==1.3.0
91
+ requests==2.25.1
92
+ rsa==4.7.2
93
+ sacremoses==0.0.45
94
+ scipy==1.7.0
95
+ sentry-sdk==1.3.0
96
+ setuptools==44.0.0
97
+ shortuuid==1.0.1
98
+ six==1.15.0
99
+ smmap==4.0.0
100
+ subprocess32==3.5.4
101
+ tensorboard-data-server==0.6.1
102
+ tensorboard-plugin-wit==1.8.0
103
+ tensorboard==2.5.0
104
+ tensorflow-estimator==2.5.0
105
+ tensorflow==2.5.0
106
+ termcolor==1.1.0
107
+ tokenizers==0.10.3
108
+ toml==0.10.2
109
+ toolz==0.11.1
110
+ torch==1.9.0
111
+ tqdm==4.61.1
112
+ traitlets==5.0.5
113
+ transformers==4.9.0.dev0
114
+ typing-extensions==3.7.4.3
115
+ urllib3==1.26.6
116
+ virtualenv==20.4.7
117
+ wandb==0.10.33
118
+ wcwidth==0.2.5
119
+ werkzeug==2.0.1
120
+ wheel==0.36.2
121
+ wrapt==1.12.1
122
+ xxhash==2.0.2
123
+ yarl==1.6.3
src/wandb/run-20210715_085943-1ize2alk/files/wandb-metadata.json ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "os": "Linux-5.4.0-1043-gcp-x86_64-with-glibc2.29",
3
+ "python": "3.8.10",
4
+ "heartbeatAt": "2021-07-15T08:59:45.122600",
5
+ "startedAt": "2021-07-15T08:59:43.232731",
6
+ "docker": null,
7
+ "cpu_count": 96,
8
+ "cuda": null,
9
+ "args": [
10
+ "--output_dir=../gpt-2-tamil",
11
+ "--model_type=gpt2",
12
+ "--config_name=../gpt-2-tamil",
13
+ "--tokenizer_name=../gpt-2-tamil",
14
+ "--dataset_name=oscar",
15
+ "--dataset_config_name=unshuffled_deduplicated_ta",
16
+ "--do_train",
17
+ "--do_eval",
18
+ "--block_size=512",
19
+ "--per_device_train_batch_size=128",
20
+ "--per_device_eval_batch_size=128",
21
+ "--learning_rate=3e-5",
22
+ "--warmup_steps=1000",
23
+ "--adam_beta1=0.9",
24
+ "--adam_beta2=0.98",
25
+ "--weight_decay=0.01",
26
+ "--overwrite_output_dir",
27
+ "--num_train_epochs=10",
28
+ "--report_to",
29
+ "wandb",
30
+ "--run_name",
31
+ "trial",
32
+ "--logging_steps=500",
33
+ "--save_steps=2500",
34
+ "--eval_steps=2500",
35
+ "--preprocessing_num_workers=90"
36
+ ],
37
+ "state": "running",
38
+ "program": "../src/run_clm_flax.py",
39
+ "codePath": "src/run_clm_flax.py",
40
+ "git": {
41
+ "remote": "https://github.com/AbinayaM02/GPT2-Tamil.git",
42
+ "commit": "69c9b7bf75b708a8f62cf5833d1b89acf5d1760b"
43
+ },
44
+ "email": "abinaya.m02@mphasis.com",
45
+ "root": "/home/tweety_abi/GPT2-Tamil",
46
+ "host": "t1v-n-ebe36c53-w-0",
47
+ "username": "tweety_abi",
48
+ "executable": "/home/tweety_abi/gpt2_env/bin/python"
49
+ }
src/wandb/run-20210715_085943-1ize2alk/files/wandb-summary.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {}
src/wandb/run-20210715_085943-1ize2alk/run-1ize2alk.wandb ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4ddd483c4184ad35f642b4c9ddd01c8f4915a2cd4d811fb5e6395adec23ec07e
3
+ size 11149
src/wandb/run-20210715_091856-2v0tf7h4/files/config.yaml ADDED
@@ -0,0 +1,305 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ wandb_version: 1
2
+
3
+ __cached__setup_devices:
4
+ desc: null
5
+ value: cpu
6
+ _n_gpu:
7
+ desc: null
8
+ value: 0
9
+ _wandb:
10
+ desc: null
11
+ value:
12
+ cli_version: 0.10.33
13
+ framework: huggingface
14
+ huggingface_version: 4.9.0.dev0
15
+ is_jupyter_run: false
16
+ is_kaggle_kernel: false
17
+ python_version: 3.8.10
18
+ t:
19
+ 1:
20
+ - 1
21
+ - 3
22
+ - 11
23
+ 2:
24
+ - 1
25
+ - 3
26
+ - 11
27
+ 4: 3.8.10
28
+ 5: 0.10.33
29
+ 6: 4.9.0.dev0
30
+ 8:
31
+ - 5
32
+ adafactor:
33
+ desc: null
34
+ value: false
35
+ adam_beta1:
36
+ desc: null
37
+ value: 0.9
38
+ adam_beta2:
39
+ desc: null
40
+ value: 0.98
41
+ adam_epsilon:
42
+ desc: null
43
+ value: 1.0e-08
44
+ block_size:
45
+ desc: null
46
+ value: 512
47
+ cache_dir:
48
+ desc: null
49
+ value: null
50
+ config_name:
51
+ desc: null
52
+ value: ../gpt-2-tamil
53
+ dataloader_drop_last:
54
+ desc: null
55
+ value: false
56
+ dataloader_num_workers:
57
+ desc: null
58
+ value: 0
59
+ dataloader_pin_memory:
60
+ desc: null
61
+ value: true
62
+ dataset_config_name:
63
+ desc: null
64
+ value: unshuffled_deduplicated_ta
65
+ dataset_name:
66
+ desc: null
67
+ value: oscar
68
+ ddp_find_unused_parameters:
69
+ desc: null
70
+ value: null
71
+ debug:
72
+ desc: null
73
+ value: []
74
+ deepspeed:
75
+ desc: null
76
+ value: null
77
+ disable_tqdm:
78
+ desc: null
79
+ value: false
80
+ do_eval:
81
+ desc: null
82
+ value: true
83
+ do_predict:
84
+ desc: null
85
+ value: false
86
+ do_train:
87
+ desc: null
88
+ value: true
89
+ dtype:
90
+ desc: null
91
+ value: float32
92
+ eval_accumulation_steps:
93
+ desc: null
94
+ value: null
95
+ eval_steps:
96
+ desc: null
97
+ value: 2500
98
+ evaluation_strategy:
99
+ desc: null
100
+ value: IntervalStrategy.NO
101
+ fp16:
102
+ desc: null
103
+ value: false
104
+ fp16_backend:
105
+ desc: null
106
+ value: auto
107
+ fp16_full_eval:
108
+ desc: null
109
+ value: false
110
+ fp16_opt_level:
111
+ desc: null
112
+ value: O1
113
+ gradient_accumulation_steps:
114
+ desc: null
115
+ value: 1
116
+ greater_is_better:
117
+ desc: null
118
+ value: null
119
+ group_by_length:
120
+ desc: null
121
+ value: false
122
+ ignore_data_skip:
123
+ desc: null
124
+ value: false
125
+ label_names:
126
+ desc: null
127
+ value: null
128
+ label_smoothing_factor:
129
+ desc: null
130
+ value: 0.0
131
+ learning_rate:
132
+ desc: null
133
+ value: 3.0e-05
134
+ length_column_name:
135
+ desc: null
136
+ value: length
137
+ load_best_model_at_end:
138
+ desc: null
139
+ value: false
140
+ local_rank:
141
+ desc: null
142
+ value: -1
143
+ log_level:
144
+ desc: null
145
+ value: -1
146
+ log_level_replica:
147
+ desc: null
148
+ value: -1
149
+ log_on_each_node:
150
+ desc: null
151
+ value: true
152
+ logging_dir:
153
+ desc: null
154
+ value: ../gpt-2-tamil/runs/Jul15_09-16-14_t1v-n-ebe36c53-w-0
155
+ logging_first_step:
156
+ desc: null
157
+ value: false
158
+ logging_steps:
159
+ desc: null
160
+ value: 500
161
+ logging_strategy:
162
+ desc: null
163
+ value: IntervalStrategy.STEPS
164
+ lr_scheduler_type:
165
+ desc: null
166
+ value: SchedulerType.LINEAR
167
+ max_eval_samples:
168
+ desc: null
169
+ value: null
170
+ max_grad_norm:
171
+ desc: null
172
+ value: 1.0
173
+ max_steps:
174
+ desc: null
175
+ value: -1
176
+ max_train_samples:
177
+ desc: null
178
+ value: null
179
+ metric_for_best_model:
180
+ desc: null
181
+ value: null
182
+ model_name_or_path:
183
+ desc: null
184
+ value: null
185
+ model_type:
186
+ desc: null
187
+ value: gpt2
188
+ mp_parameters:
189
+ desc: null
190
+ value: ''
191
+ no_cuda:
192
+ desc: null
193
+ value: false
194
+ num_train_epochs:
195
+ desc: null
196
+ value: 10.0
197
+ output_dir:
198
+ desc: null
199
+ value: ../gpt-2-tamil
200
+ overwrite_cache:
201
+ desc: null
202
+ value: false
203
+ overwrite_output_dir:
204
+ desc: null
205
+ value: true
206
+ past_index:
207
+ desc: null
208
+ value: -1
209
+ per_device_eval_batch_size:
210
+ desc: null
211
+ value: 128
212
+ per_device_train_batch_size:
213
+ desc: null
214
+ value: 128
215
+ per_gpu_eval_batch_size:
216
+ desc: null
217
+ value: null
218
+ per_gpu_train_batch_size:
219
+ desc: null
220
+ value: null
221
+ prediction_loss_only:
222
+ desc: null
223
+ value: false
224
+ preprocessing_num_workers:
225
+ desc: null
226
+ value: 90
227
+ push_to_hub:
228
+ desc: null
229
+ value: false
230
+ push_to_hub_model_id:
231
+ desc: null
232
+ value: gpt-2-tamil
233
+ push_to_hub_organization:
234
+ desc: null
235
+ value: null
236
+ push_to_hub_token:
237
+ desc: null
238
+ value: null
239
+ remove_unused_columns:
240
+ desc: null
241
+ value: true
242
+ report_to:
243
+ desc: null
244
+ value:
245
+ - wandb
246
+ resume_from_checkpoint:
247
+ desc: null
248
+ value: null
249
+ run_name:
250
+ desc: null
251
+ value: trial
252
+ save_on_each_node:
253
+ desc: null
254
+ value: false
255
+ save_steps:
256
+ desc: null
257
+ value: 2500
258
+ save_strategy:
259
+ desc: null
260
+ value: IntervalStrategy.STEPS
261
+ save_total_limit:
262
+ desc: null
263
+ value: null
264
+ seed:
265
+ desc: null
266
+ value: 42
267
+ sharded_ddp:
268
+ desc: null
269
+ value: []
270
+ skip_memory_metrics:
271
+ desc: null
272
+ value: true
273
+ tokenizer_name:
274
+ desc: null
275
+ value: ../gpt-2-tamil
276
+ tpu_metrics_debug:
277
+ desc: null
278
+ value: false
279
+ tpu_num_cores:
280
+ desc: null
281
+ value: null
282
+ train_file:
283
+ desc: null
284
+ value: null
285
+ use_fast_tokenizer:
286
+ desc: null
287
+ value: true
288
+ use_legacy_prediction_loop:
289
+ desc: null
290
+ value: false
291
+ validation_file:
292
+ desc: null
293
+ value: null
294
+ validation_split_percentage:
295
+ desc: null
296
+ value: 5
297
+ warmup_ratio:
298
+ desc: null
299
+ value: 0.0
300
+ warmup_steps:
301
+ desc: null
302
+ value: 1000
303
+ weight_decay:
304
+ desc: null
305
+ value: 0.01
src/wandb/run-20210715_091856-2v0tf7h4/files/events.out.tfevents.1626340740.t1v-n-ebe36c53-w-0.765413.3.v2 ADDED
@@ -0,0 +1 @@
 
 
1
+ /home/tweety_abi/GPT2-Tamil/gpt-2-tamil/events.out.tfevents.1626340740.t1v-n-ebe36c53-w-0.765413.3.v2
src/wandb/run-20210715_091856-2v0tf7h4/files/requirements.txt ADDED
@@ -0,0 +1,123 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ absl-py==0.13.0
2
+ aiohttp==3.7.4.post0
3
+ appdirs==1.4.4
4
+ astunparse==1.6.3
5
+ async-timeout==3.0.1
6
+ attrs==21.2.0
7
+ backcall==0.2.0
8
+ black==21.6b0
9
+ cachetools==4.2.2
10
+ certifi==2021.5.30
11
+ cfgv==3.3.0
12
+ chardet==4.0.0
13
+ chex==0.0.7
14
+ click==8.0.1
15
+ configparser==5.0.2
16
+ cycler==0.10.0
17
+ datasets==1.8.1.dev0
18
+ decorator==5.0.9
19
+ dill==0.3.4
20
+ distlib==0.3.2
21
+ dm-tree==0.1.6
22
+ docker-pycreds==0.4.0
23
+ filelock==3.0.12
24
+ flake8==3.9.2
25
+ flatbuffers==1.12
26
+ flax==0.3.4
27
+ fsspec==2021.6.1
28
+ gast==0.4.0
29
+ gitdb==4.0.7
30
+ gitpython==3.1.18
31
+ google-auth-oauthlib==0.4.4
32
+ google-auth==1.32.1
33
+ google-pasta==0.2.0
34
+ grpcio==1.34.1
35
+ h5py==3.1.0
36
+ huggingface-hub==0.0.12
37
+ identify==2.2.10
38
+ idna==2.10
39
+ ipython-genutils==0.2.0
40
+ ipython==7.25.0
41
+ isort==5.9.1
42
+ jax==0.2.16
43
+ jaxlib==0.1.68
44
+ jedi==0.18.0
45
+ joblib==1.0.1
46
+ keras-nightly==2.5.0.dev2021032900
47
+ keras-preprocessing==1.1.2
48
+ kiwisolver==1.3.1
49
+ libtpu-nightly==0.1.dev20210615
50
+ markdown==3.3.4
51
+ matplotlib-inline==0.1.2
52
+ matplotlib==3.4.2
53
+ mccabe==0.6.1
54
+ msgpack==1.0.2
55
+ multidict==5.1.0
56
+ multiprocess==0.70.12.2
57
+ mypy-extensions==0.4.3
58
+ nodeenv==1.6.0
59
+ numpy==1.19.5
60
+ oauthlib==3.1.1
61
+ opt-einsum==3.3.0
62
+ optax==0.0.8
63
+ packaging==20.9
64
+ pandas==1.2.5
65
+ parso==0.8.2
66
+ pathspec==0.8.1
67
+ pathtools==0.1.2
68
+ pexpect==4.8.0
69
+ pickleshare==0.7.5
70
+ pillow==8.3.0
71
+ pip==20.0.2
72
+ pkg-resources==0.0.0
73
+ pre-commit==2.13.0
74
+ promise==2.3
75
+ prompt-toolkit==3.0.19
76
+ protobuf==3.17.3
77
+ psutil==5.8.0
78
+ ptyprocess==0.7.0
79
+ pyarrow==4.0.1
80
+ pyasn1-modules==0.2.8
81
+ pyasn1==0.4.8
82
+ pycodestyle==2.7.0
83
+ pyflakes==2.3.1
84
+ pygments==2.9.0
85
+ pyparsing==2.4.7
86
+ python-dateutil==2.8.1
87
+ pytz==2021.1
88
+ pyyaml==5.4.1
89
+ regex==2021.7.1
90
+ requests-oauthlib==1.3.0
91
+ requests==2.25.1
92
+ rsa==4.7.2
93
+ sacremoses==0.0.45
94
+ scipy==1.7.0
95
+ sentry-sdk==1.3.0
96
+ setuptools==44.0.0
97
+ shortuuid==1.0.1
98
+ six==1.15.0
99
+ smmap==4.0.0
100
+ subprocess32==3.5.4
101
+ tensorboard-data-server==0.6.1
102
+ tensorboard-plugin-wit==1.8.0
103
+ tensorboard==2.5.0
104
+ tensorflow-estimator==2.5.0
105
+ tensorflow==2.5.0
106
+ termcolor==1.1.0
107
+ tokenizers==0.10.3
108
+ toml==0.10.2
109
+ toolz==0.11.1
110
+ torch==1.9.0
111
+ tqdm==4.61.1
112
+ traitlets==5.0.5
113
+ transformers==4.9.0.dev0
114
+ typing-extensions==3.7.4.3
115
+ urllib3==1.26.6
116
+ virtualenv==20.4.7
117
+ wandb==0.10.33
118
+ wcwidth==0.2.5
119
+ werkzeug==2.0.1
120
+ wheel==0.36.2
121
+ wrapt==1.12.1
122
+ xxhash==2.0.2
123
+ yarl==1.6.3
src/wandb/run-20210715_091856-2v0tf7h4/files/wandb-metadata.json ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "os": "Linux-5.4.0-1043-gcp-x86_64-with-glibc2.29",
3
+ "python": "3.8.10",
4
+ "heartbeatAt": "2021-07-15T09:19:00.102585",
5
+ "startedAt": "2021-07-15T09:18:56.277815",
6
+ "docker": null,
7
+ "cpu_count": 96,
8
+ "cuda": null,
9
+ "args": [
10
+ "--output_dir=../gpt-2-tamil",
11
+ "--model_type=gpt2",
12
+ "--config_name=../gpt-2-tamil",
13
+ "--tokenizer_name=../gpt-2-tamil",
14
+ "--dataset_name=oscar",
15
+ "--dataset_config_name=unshuffled_deduplicated_ta",
16
+ "--do_train",
17
+ "--do_eval",
18
+ "--block_size=512",
19
+ "--per_device_train_batch_size=128",
20
+ "--per_device_eval_batch_size=128",
21
+ "--learning_rate=3e-5",
22
+ "--warmup_steps=1000",
23
+ "--adam_beta1=0.9",
24
+ "--adam_beta2=0.98",
25
+ "--weight_decay=0.01",
26
+ "--overwrite_output_dir",
27
+ "--num_train_epochs=10",
28
+ "--report_to",
29
+ "wandb",
30
+ "--run_name",
31
+ "trial",
32
+ "--logging_steps=500",
33
+ "--save_steps=2500",
34
+ "--eval_steps=2500",
35
+ "--preprocessing_num_workers=90"
36
+ ],
37
+ "state": "running",
38
+ "program": "../src/run_clm_flax.py",
39
+ "codePath": "src/run_clm_flax.py",
40
+ "git": {
41
+ "remote": "https://github.com/AbinayaM02/GPT2-Tamil.git",
42
+ "commit": "69c9b7bf75b708a8f62cf5833d1b89acf5d1760b"
43
+ },
44
+ "email": "abinaya.m02@mphasis.com",
45
+ "root": "/home/tweety_abi/GPT2-Tamil",
46
+ "host": "t1v-n-ebe36c53-w-0",
47
+ "username": "tweety_abi",
48
+ "executable": "/home/tweety_abi/gpt2_env/bin/python"
49
+ }
src/wandb/run-20210715_091856-2v0tf7h4/files/wandb-summary.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {}
src/wandb/run-20210715_091856-2v0tf7h4/run-2v0tf7h4.wandb ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:74c24264810cc8a5625c9a6fd0093d95ea89e0980f556fce2e873e00ba0254c5
3
+ size 38212
src/wandb/run-20210715_092837-watdq7ib/files/config.yaml ADDED
@@ -0,0 +1,301 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ wandb_version: 1
2
+
3
+ __cached__setup_devices:
4
+ desc: null
5
+ value: cpu
6
+ _n_gpu:
7
+ desc: null
8
+ value: 0
9
+ _wandb:
10
+ desc: null
11
+ value:
12
+ cli_version: 0.10.33
13
+ framework: huggingface
14
+ huggingface_version: 4.9.0.dev0
15
+ is_jupyter_run: false
16
+ is_kaggle_kernel: false
17
+ python_version: 3.8.10
18
+ t:
19
+ 1:
20
+ - 1
21
+ - 3
22
+ - 11
23
+ 4: 3.8.10
24
+ 5: 0.10.33
25
+ 6: 4.9.0.dev0
26
+ 8:
27
+ - 5
28
+ adafactor:
29
+ desc: null
30
+ value: false
31
+ adam_beta1:
32
+ desc: null
33
+ value: 0.9
34
+ adam_beta2:
35
+ desc: null
36
+ value: 0.98
37
+ adam_epsilon:
38
+ desc: null
39
+ value: 1.0e-08
40
+ block_size:
41
+ desc: null
42
+ value: 512
43
+ cache_dir:
44
+ desc: null
45
+ value: null
46
+ config_name:
47
+ desc: null
48
+ value: ../gpt-2-tamil
49
+ dataloader_drop_last:
50
+ desc: null
51
+ value: false
52
+ dataloader_num_workers:
53
+ desc: null
54
+ value: 0
55
+ dataloader_pin_memory:
56
+ desc: null
57
+ value: true
58
+ dataset_config_name:
59
+ desc: null
60
+ value: unshuffled_deduplicated_ta
61
+ dataset_name:
62
+ desc: null
63
+ value: oscar
64
+ ddp_find_unused_parameters:
65
+ desc: null
66
+ value: null
67
+ debug:
68
+ desc: null
69
+ value: []
70
+ deepspeed:
71
+ desc: null
72
+ value: null
73
+ disable_tqdm:
74
+ desc: null
75
+ value: false
76
+ do_eval:
77
+ desc: null
78
+ value: true
79
+ do_predict:
80
+ desc: null
81
+ value: false
82
+ do_train:
83
+ desc: null
84
+ value: true
85
+ dtype:
86
+ desc: null
87
+ value: float32
88
+ eval_accumulation_steps:
89
+ desc: null
90
+ value: null
91
+ eval_steps:
92
+ desc: null
93
+ value: 2500
94
+ evaluation_strategy:
95
+ desc: null
96
+ value: IntervalStrategy.NO
97
+ fp16:
98
+ desc: null
99
+ value: false
100
+ fp16_backend:
101
+ desc: null
102
+ value: auto
103
+ fp16_full_eval:
104
+ desc: null
105
+ value: false
106
+ fp16_opt_level:
107
+ desc: null
108
+ value: O1
109
+ gradient_accumulation_steps:
110
+ desc: null
111
+ value: 1
112
+ greater_is_better:
113
+ desc: null
114
+ value: null
115
+ group_by_length:
116
+ desc: null
117
+ value: false
118
+ ignore_data_skip:
119
+ desc: null
120
+ value: false
121
+ label_names:
122
+ desc: null
123
+ value: null
124
+ label_smoothing_factor:
125
+ desc: null
126
+ value: 0.0
127
+ learning_rate:
128
+ desc: null
129
+ value: 3.0e-05
130
+ length_column_name:
131
+ desc: null
132
+ value: length
133
+ load_best_model_at_end:
134
+ desc: null
135
+ value: false
136
+ local_rank:
137
+ desc: null
138
+ value: -1
139
+ log_level:
140
+ desc: null
141
+ value: -1
142
+ log_level_replica:
143
+ desc: null
144
+ value: -1
145
+ log_on_each_node:
146
+ desc: null
147
+ value: true
148
+ logging_dir:
149
+ desc: null
150
+ value: ../gpt-2-tamil/runs/Jul15_09-27-21_t1v-n-ebe36c53-w-0
151
+ logging_first_step:
152
+ desc: null
153
+ value: false
154
+ logging_steps:
155
+ desc: null
156
+ value: 500
157
+ logging_strategy:
158
+ desc: null
159
+ value: IntervalStrategy.STEPS
160
+ lr_scheduler_type:
161
+ desc: null
162
+ value: SchedulerType.LINEAR
163
+ max_eval_samples:
164
+ desc: null
165
+ value: null
166
+ max_grad_norm:
167
+ desc: null
168
+ value: 1.0
169
+ max_steps:
170
+ desc: null
171
+ value: -1
172
+ max_train_samples:
173
+ desc: null
174
+ value: null
175
+ metric_for_best_model:
176
+ desc: null
177
+ value: null
178
+ model_name_or_path:
179
+ desc: null
180
+ value: null
181
+ model_type:
182
+ desc: null
183
+ value: gpt2
184
+ mp_parameters:
185
+ desc: null
186
+ value: ''
187
+ no_cuda:
188
+ desc: null
189
+ value: false
190
+ num_train_epochs:
191
+ desc: null
192
+ value: 10.0
193
+ output_dir:
194
+ desc: null
195
+ value: ../gpt-2-tamil
196
+ overwrite_cache:
197
+ desc: null
198
+ value: false
199
+ overwrite_output_dir:
200
+ desc: null
201
+ value: true
202
+ past_index:
203
+ desc: null
204
+ value: -1
205
+ per_device_eval_batch_size:
206
+ desc: null
207
+ value: 64
208
+ per_device_train_batch_size:
209
+ desc: null
210
+ value: 64
211
+ per_gpu_eval_batch_size:
212
+ desc: null
213
+ value: null
214
+ per_gpu_train_batch_size:
215
+ desc: null
216
+ value: null
217
+ prediction_loss_only:
218
+ desc: null
219
+ value: false
220
+ preprocessing_num_workers:
221
+ desc: null
222
+ value: 90
223
+ push_to_hub:
224
+ desc: null
225
+ value: false
226
+ push_to_hub_model_id:
227
+ desc: null
228
+ value: gpt-2-tamil
229
+ push_to_hub_organization:
230
+ desc: null
231
+ value: null
232
+ push_to_hub_token:
233
+ desc: null
234
+ value: null
235
+ remove_unused_columns:
236
+ desc: null
237
+ value: true
238
+ report_to:
239
+ desc: null
240
+ value:
241
+ - wandb
242
+ resume_from_checkpoint:
243
+ desc: null
244
+ value: null
245
+ run_name:
246
+ desc: null
247
+ value: trial
248
+ save_on_each_node:
249
+ desc: null
250
+ value: false
251
+ save_steps:
252
+ desc: null
253
+ value: 2500
254
+ save_strategy:
255
+ desc: null
256
+ value: IntervalStrategy.STEPS
257
+ save_total_limit:
258
+ desc: null
259
+ value: null
260
+ seed:
261
+ desc: null
262
+ value: 42
263
+ sharded_ddp:
264
+ desc: null
265
+ value: []
266
+ skip_memory_metrics:
267
+ desc: null
268
+ value: true
269
+ tokenizer_name:
270
+ desc: null
271
+ value: ../gpt-2-tamil
272
+ tpu_metrics_debug:
273
+ desc: null
274
+ value: false
275
+ tpu_num_cores:
276
+ desc: null
277
+ value: null
278
+ train_file:
279
+ desc: null
280
+ value: null
281
+ use_fast_tokenizer:
282
+ desc: null
283
+ value: true
284
+ use_legacy_prediction_loop:
285
+ desc: null
286
+ value: false
287
+ validation_file:
288
+ desc: null
289
+ value: null
290
+ validation_split_percentage:
291
+ desc: null
292
+ value: 5
293
+ warmup_ratio:
294
+ desc: null
295
+ value: 0.0
296
+ warmup_steps:
297
+ desc: null
298
+ value: 1000
299
+ weight_decay:
300
+ desc: null
301
+ value: 0.01
src/wandb/run-20210715_092837-watdq7ib/files/events.out.tfevents.1626341319.t1v-n-ebe36c53-w-0.768105.3.v2 ADDED
@@ -0,0 +1 @@
 
 
1
+ /home/tweety_abi/GPT2-Tamil/gpt-2-tamil/events.out.tfevents.1626341319.t1v-n-ebe36c53-w-0.768105.3.v2
src/wandb/run-20210715_092837-watdq7ib/files/requirements.txt ADDED
@@ -0,0 +1,123 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ absl-py==0.13.0
2
+ aiohttp==3.7.4.post0
3
+ appdirs==1.4.4
4
+ astunparse==1.6.3
5
+ async-timeout==3.0.1
6
+ attrs==21.2.0
7
+ backcall==0.2.0
8
+ black==21.6b0
9
+ cachetools==4.2.2
10
+ certifi==2021.5.30
11
+ cfgv==3.3.0
12
+ chardet==4.0.0
13
+ chex==0.0.7
14
+ click==8.0.1
15
+ configparser==5.0.2
16
+ cycler==0.10.0
17
+ datasets==1.8.1.dev0
18
+ decorator==5.0.9
19
+ dill==0.3.4
20
+ distlib==0.3.2
21
+ dm-tree==0.1.6
22
+ docker-pycreds==0.4.0
23
+ filelock==3.0.12
24
+ flake8==3.9.2
25
+ flatbuffers==1.12
26
+ flax==0.3.4
27
+ fsspec==2021.6.1
28
+ gast==0.4.0
29
+ gitdb==4.0.7
30
+ gitpython==3.1.18
31
+ google-auth-oauthlib==0.4.4
32
+ google-auth==1.32.1
33
+ google-pasta==0.2.0
34
+ grpcio==1.34.1
35
+ h5py==3.1.0
36
+ huggingface-hub==0.0.12
37
+ identify==2.2.10
38
+ idna==2.10
39
+ ipython-genutils==0.2.0
40
+ ipython==7.25.0
41
+ isort==5.9.1
42
+ jax==0.2.16
43
+ jaxlib==0.1.68
44
+ jedi==0.18.0
45
+ joblib==1.0.1
46
+ keras-nightly==2.5.0.dev2021032900
47
+ keras-preprocessing==1.1.2
48
+ kiwisolver==1.3.1
49
+ libtpu-nightly==0.1.dev20210615
50
+ markdown==3.3.4
51
+ matplotlib-inline==0.1.2
52
+ matplotlib==3.4.2
53
+ mccabe==0.6.1
54
+ msgpack==1.0.2
55
+ multidict==5.1.0
56
+ multiprocess==0.70.12.2
57
+ mypy-extensions==0.4.3
58
+ nodeenv==1.6.0
59
+ numpy==1.19.5
60
+ oauthlib==3.1.1
61
+ opt-einsum==3.3.0
62
+ optax==0.0.8
63
+ packaging==20.9
64
+ pandas==1.2.5
65
+ parso==0.8.2
66
+ pathspec==0.8.1
67
+ pathtools==0.1.2
68
+ pexpect==4.8.0
69
+ pickleshare==0.7.5
70
+ pillow==8.3.0
71
+ pip==20.0.2
72
+ pkg-resources==0.0.0
73
+ pre-commit==2.13.0
74
+ promise==2.3
75
+ prompt-toolkit==3.0.19
76
+ protobuf==3.17.3
77
+ psutil==5.8.0
78
+ ptyprocess==0.7.0
79
+ pyarrow==4.0.1
80
+ pyasn1-modules==0.2.8
81
+ pyasn1==0.4.8
82
+ pycodestyle==2.7.0
83
+ pyflakes==2.3.1
84
+ pygments==2.9.0
85
+ pyparsing==2.4.7
86
+ python-dateutil==2.8.1
87
+ pytz==2021.1
88
+ pyyaml==5.4.1
89
+ regex==2021.7.1
90
+ requests-oauthlib==1.3.0
91
+ requests==2.25.1
92
+ rsa==4.7.2
93
+ sacremoses==0.0.45
94
+ scipy==1.7.0
95
+ sentry-sdk==1.3.0
96
+ setuptools==44.0.0
97
+ shortuuid==1.0.1
98
+ six==1.15.0
99
+ smmap==4.0.0
100
+ subprocess32==3.5.4
101
+ tensorboard-data-server==0.6.1
102
+ tensorboard-plugin-wit==1.8.0
103
+ tensorboard==2.5.0
104
+ tensorflow-estimator==2.5.0
105
+ tensorflow==2.5.0
106
+ termcolor==1.1.0
107
+ tokenizers==0.10.3
108
+ toml==0.10.2
109
+ toolz==0.11.1
110
+ torch==1.9.0
111
+ tqdm==4.61.1
112
+ traitlets==5.0.5
113
+ transformers==4.9.0.dev0
114
+ typing-extensions==3.7.4.3
115
+ urllib3==1.26.6
116
+ virtualenv==20.4.7
117
+ wandb==0.10.33
118
+ wcwidth==0.2.5
119
+ werkzeug==2.0.1
120
+ wheel==0.36.2
121
+ wrapt==1.12.1
122
+ xxhash==2.0.2
123
+ yarl==1.6.3
src/wandb/run-20210715_092837-watdq7ib/files/wandb-metadata.json ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "os": "Linux-5.4.0-1043-gcp-x86_64-with-glibc2.29",
3
+ "python": "3.8.10",
4
+ "heartbeatAt": "2021-07-15T09:28:39.248463",
5
+ "startedAt": "2021-07-15T09:28:37.215410",
6
+ "docker": null,
7
+ "cpu_count": 96,
8
+ "cuda": null,
9
+ "args": [
10
+ "--output_dir=../gpt-2-tamil",
11
+ "--model_type=gpt2",
12
+ "--config_name=../gpt-2-tamil",
13
+ "--tokenizer_name=../gpt-2-tamil",
14
+ "--dataset_name=oscar",
15
+ "--dataset_config_name=unshuffled_deduplicated_ta",
16
+ "--do_train",
17
+ "--do_eval",
18
+ "--block_size=512",
19
+ "--per_device_train_batch_size=64",
20
+ "--per_device_eval_batch_size=64",
21
+ "--learning_rate=3e-5",
22
+ "--warmup_steps=1000",
23
+ "--adam_beta1=0.9",
24
+ "--adam_beta2=0.98",
25
+ "--weight_decay=0.01",
26
+ "--overwrite_output_dir",
27
+ "--num_train_epochs=10",
28
+ "--report_to",
29
+ "wandb",
30
+ "--run_name",
31
+ "trial",
32
+ "--logging_steps=500",
33
+ "--save_steps=2500",
34
+ "--eval_steps=2500",
35
+ "--preprocessing_num_workers=90"
36
+ ],
37
+ "state": "running",
38
+ "program": "../src/run_clm_flax.py",
39
+ "codePath": "src/run_clm_flax.py",
40
+ "git": {
41
+ "remote": "https://github.com/AbinayaM02/GPT2-Tamil.git",
42
+ "commit": "69c9b7bf75b708a8f62cf5833d1b89acf5d1760b"
43
+ },
44
+ "email": "abinaya.m02@mphasis.com",
45
+ "root": "/home/tweety_abi/GPT2-Tamil",
46
+ "host": "t1v-n-ebe36c53-w-0",
47
+ "username": "tweety_abi",
48
+ "executable": "/home/tweety_abi/gpt2_env/bin/python"
49
+ }
src/wandb/run-20210715_092837-watdq7ib/files/wandb-summary.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"global_step": 162500, "_timestamp": 1626515598.82067, "train_time": 3039175.75, "train_learning_rate": 2.0749264422192937e-06, "_step": 324025, "train_loss": 1.1235102415084839, "eval_loss": 1.1323037147521973, "eval_perplexity": 3.1027963161468506}
src/wandb/run-20210715_092837-watdq7ib/run-watdq7ib.wandb ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:17ccbfb69a2e91865a50d34837db9291fa2687143f65c6f6c712e23f40a46343
3
+ size 71362583