quim-motger commited on
Commit
f9dd32d
1 Parent(s): 18e674d

Upload 10 files

Browse files
config.json CHANGED
@@ -45,7 +45,7 @@
45
  }
46
  },
47
  "torch_dtype": "float32",
48
- "transformers_version": "4.30.2",
49
  "untie_r": true,
50
  "use_mems_eval": true,
51
  "use_mems_train": false,
 
45
  }
46
  },
47
  "torch_dtype": "float32",
48
+ "transformers_version": "4.39.1",
49
  "untie_r": true,
50
  "use_mems_eval": true,
51
  "use_mems_train": false,
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cc00d31ec6c55096a1dd4ad366316500a413155e9e504fb4b318e790659034d5
3
+ size 466905100
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9b1818057b2517e29ef7bc3bc638f8e7988199a15f9e90f6d498ea36115b848d
3
- size 933881285
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a773951628e019398980299eeb7e360951a6c7180203d6f494e0418d5659e8f1
3
+ size 933939962
rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7e142ede7b5258f1993bc097bb1982682b3be6b8db767d1f28a07f4da04918c4
3
- size 14575
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8556b9ce063e9a3174297b1b4804ca0ce88037a08f18c1d859b216d1a9407b13
3
+ size 14244
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a81b9e64a6d36ab9d27ca33efaa732434676daf622cfdf89ee1179762e733965
3
- size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:91aa10bda283c4c6d7bb1d45f481e3baef71e77946a9d038f500bc8e00072662
3
+ size 1064
tokenizer.json CHANGED
@@ -123,7 +123,8 @@
123
  {
124
  "type": "Metaspace",
125
  "replacement": "▁",
126
- "add_prefix_space": true
 
127
  }
128
  ]
129
  },
@@ -205,7 +206,8 @@
205
  "decoder": {
206
  "type": "Metaspace",
207
  "replacement": "▁",
208
- "add_prefix_space": true
 
209
  },
210
  "model": {
211
  "type": "Unigram",
@@ -128211,6 +128213,7 @@
128211
  "•",
128212
  -14.79216480255127
128213
  ]
128214
- ]
 
128215
  }
128216
  }
 
123
  {
124
  "type": "Metaspace",
125
  "replacement": "▁",
126
+ "add_prefix_space": true,
127
+ "prepend_scheme": "always"
128
  }
129
  ]
130
  },
 
206
  "decoder": {
207
  "type": "Metaspace",
208
  "replacement": "▁",
209
+ "add_prefix_space": true,
210
+ "prepend_scheme": "always"
211
  },
212
  "model": {
213
  "type": "Unigram",
 
128213
  "•",
128214
  -14.79216480255127
128215
  ]
128216
+ ],
128217
+ "byte_fallback": false
128218
  }
128219
  }
tokenizer_config.json CHANGED
@@ -1,4 +1,78 @@
1
  {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  "additional_special_tokens": [
3
  "<eop>",
4
  "<eod>"
@@ -9,14 +83,7 @@
9
  "do_lower_case": false,
10
  "eos_token": "</s>",
11
  "keep_accents": false,
12
- "mask_token": {
13
- "__type": "AddedToken",
14
- "content": "<mask>",
15
- "lstrip": true,
16
- "normalized": true,
17
- "rstrip": false,
18
- "single_word": false
19
- },
20
  "model_max_length": 1000000000000000019884624838656,
21
  "pad_token": "<pad>",
22
  "remove_space": true,
 
1
  {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "<unk>",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "1": {
12
+ "content": "<s>",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "2": {
20
+ "content": "</s>",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "3": {
28
+ "content": "<cls>",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "4": {
36
+ "content": "<sep>",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ },
43
+ "5": {
44
+ "content": "<pad>",
45
+ "lstrip": false,
46
+ "normalized": false,
47
+ "rstrip": false,
48
+ "single_word": false,
49
+ "special": true
50
+ },
51
+ "6": {
52
+ "content": "<mask>",
53
+ "lstrip": true,
54
+ "normalized": false,
55
+ "rstrip": false,
56
+ "single_word": false,
57
+ "special": true
58
+ },
59
+ "7": {
60
+ "content": "<eod>",
61
+ "lstrip": false,
62
+ "normalized": false,
63
+ "rstrip": false,
64
+ "single_word": false,
65
+ "special": true
66
+ },
67
+ "8": {
68
+ "content": "<eop>",
69
+ "lstrip": false,
70
+ "normalized": false,
71
+ "rstrip": false,
72
+ "single_word": false,
73
+ "special": true
74
+ }
75
+ },
76
  "additional_special_tokens": [
77
  "<eop>",
78
  "<eod>"
 
83
  "do_lower_case": false,
84
  "eos_token": "</s>",
85
  "keep_accents": false,
86
+ "mask_token": "<mask>",
 
 
 
 
 
 
 
87
  "model_max_length": 1000000000000000019884624838656,
88
  "pad_token": "<pad>",
89
  "remove_space": true,
trainer_state.json CHANGED
@@ -1,64 +1,52 @@
1
  {
2
- "best_metric": 0.0058293770998716354,
3
- "best_model_checkpoint": "data/train-test///model/checkpoint-3351",
4
- "epoch": 1.0,
5
- "global_step": 3351,
 
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
- "epoch": 0.15,
12
- "learning_rate": 1.850790808713817e-05,
13
- "loss": 0.076,
14
- "step": 500
15
- },
16
- {
17
- "epoch": 0.3,
18
- "learning_rate": 1.7015816174276335e-05,
19
- "loss": 0.0181,
20
- "step": 1000
21
- },
22
- {
23
- "epoch": 0.45,
24
- "learning_rate": 1.5523724261414504e-05,
25
- "loss": 0.0105,
26
- "step": 1500
27
- },
28
- {
29
- "epoch": 0.6,
30
- "learning_rate": 1.4031632348552673e-05,
31
- "loss": 0.0068,
32
- "step": 2000
33
- },
34
- {
35
- "epoch": 0.75,
36
- "learning_rate": 1.253954043569084e-05,
37
- "loss": 0.0074,
38
- "step": 2500
39
  },
40
  {
41
- "epoch": 0.9,
42
- "learning_rate": 1.1047448522829008e-05,
43
- "loss": 0.0067,
44
- "step": 3000
 
45
  },
46
  {
47
- "epoch": 1.0,
48
  "eval_accuracy": null,
49
- "eval_f1": 0.9892673821745217,
50
- "eval_loss": 0.0058293770998716354,
51
- "eval_precision": 0.9878844361602982,
52
- "eval_recall": 0.9906542056074766,
53
- "eval_runtime": 0.6721,
54
- "eval_samples_per_second": 1443.166,
55
- "eval_steps_per_second": 181.512,
56
- "step": 3351
57
  }
58
  ],
59
- "max_steps": 6702,
 
 
60
  "num_train_epochs": 2,
61
- "total_flos": 730895731218864.0,
 
 
62
  "trial_name": null,
63
  "trial_params": null
64
  }
 
1
  {
2
+ "best_metric": 0.9816007359705612,
3
+ "best_model_checkpoint": "data/train-test/xlnet-base-cased//model/checkpoint-874",
4
+ "epoch": 2.0,
5
+ "eval_steps": 500,
6
+ "global_step": 874,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
+ "epoch": 1.0,
13
+ "eval_accuracy": null,
14
+ "eval_f1": 0.9355716878402904,
15
+ "eval_loss": 0.025213107466697693,
16
+ "eval_precision": 0.9230080572963295,
17
+ "eval_recall": 0.9484820607175714,
18
+ "eval_runtime": 1.8189,
19
+ "eval_samples_per_second": 533.284,
20
+ "eval_steps_per_second": 17.043,
21
+ "step": 437
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
  },
23
  {
24
+ "epoch": 1.14,
25
+ "grad_norm": 1.2998504638671875,
26
+ "learning_rate": 8.558352402745997e-06,
27
+ "loss": 0.0831,
28
+ "step": 500
29
  },
30
  {
31
+ "epoch": 2.0,
32
  "eval_accuracy": null,
33
+ "eval_f1": 0.9775538250114522,
34
+ "eval_loss": 0.015987424179911613,
35
+ "eval_precision": 0.9735401459854015,
36
+ "eval_recall": 0.9816007359705612,
37
+ "eval_runtime": 1.8215,
38
+ "eval_samples_per_second": 532.541,
39
+ "eval_steps_per_second": 17.019,
40
+ "step": 874
41
  }
42
  ],
43
+ "logging_steps": 500,
44
+ "max_steps": 874,
45
+ "num_input_tokens_seen": 0,
46
  "num_train_epochs": 2,
47
+ "save_steps": 500,
48
+ "total_flos": 841186234475568.0,
49
+ "train_batch_size": 32,
50
  "trial_name": null,
51
  "trial_params": null
52
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:95a948392a188468b6c62767fe55d4271d576399b3f70178aedb695206df8396
3
- size 3963
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bb2471f1d44fc04999f3013cd24731f3aaf98458fbb4696f19379c1fe6cd52fe
3
+ size 4984