crescendonow commited on
Commit
b9575a6
1 Parent(s): 3fb54dc

Upload 11 files

Browse files
config.json ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "airesearch/wangchanberta-base-att-spm-uncased",
3
+ "architectures": [
4
+ "CamembertForTokenClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "bos_token_id": 0,
8
+ "classifier_dropout": null,
9
+ "eos_token_id": 2,
10
+ "gradient_checkpointing": false,
11
+ "hidden_act": "gelu",
12
+ "hidden_dropout_prob": 0.1,
13
+ "hidden_size": 768,
14
+ "id2label": {
15
+ "0": "LABEL_0",
16
+ "1": "LABEL_1",
17
+ "2": "LABEL_2",
18
+ "3": "LABEL_3",
19
+ "4": "LABEL_4",
20
+ "5": "LABEL_5",
21
+ "6": "LABEL_6",
22
+ "7": "LABEL_7",
23
+ "8": "LABEL_8",
24
+ "9": "LABEL_9",
25
+ "10": "LABEL_10",
26
+ "11": "LABEL_11",
27
+ "12": "LABEL_12",
28
+ "13": "LABEL_13",
29
+ "14": "LABEL_14",
30
+ "15": "LABEL_15",
31
+ "16": "LABEL_16",
32
+ "17": "LABEL_17",
33
+ "18": "LABEL_18",
34
+ "19": "LABEL_19",
35
+ "20": "LABEL_20",
36
+ "21": "LABEL_21",
37
+ "22": "LABEL_22",
38
+ "23": "LABEL_23",
39
+ "24": "LABEL_24",
40
+ "25": "LABEL_25",
41
+ "26": "LABEL_26"
42
+ },
43
+ "initializer_range": 0.02,
44
+ "intermediate_size": 3072,
45
+ "label2id": {
46
+ "LABEL_0": 0,
47
+ "LABEL_1": 1,
48
+ "LABEL_10": 10,
49
+ "LABEL_11": 11,
50
+ "LABEL_12": 12,
51
+ "LABEL_13": 13,
52
+ "LABEL_14": 14,
53
+ "LABEL_15": 15,
54
+ "LABEL_16": 16,
55
+ "LABEL_17": 17,
56
+ "LABEL_18": 18,
57
+ "LABEL_19": 19,
58
+ "LABEL_2": 2,
59
+ "LABEL_20": 20,
60
+ "LABEL_21": 21,
61
+ "LABEL_22": 22,
62
+ "LABEL_23": 23,
63
+ "LABEL_24": 24,
64
+ "LABEL_25": 25,
65
+ "LABEL_26": 26,
66
+ "LABEL_3": 3,
67
+ "LABEL_4": 4,
68
+ "LABEL_5": 5,
69
+ "LABEL_6": 6,
70
+ "LABEL_7": 7,
71
+ "LABEL_8": 8,
72
+ "LABEL_9": 9
73
+ },
74
+ "layer_norm_eps": 1e-12,
75
+ "max_position_embeddings": 512,
76
+ "model_type": "camembert",
77
+ "num_attention_head": 12,
78
+ "num_attention_heads": 12,
79
+ "num_hidden_layers": 12,
80
+ "pad_token_id": 1,
81
+ "position_embedding_type": "absolute",
82
+ "torch_dtype": "float32",
83
+ "transformers_version": "4.23.1",
84
+ "type_vocab_size": 1,
85
+ "use_cache": true,
86
+ "vocab_size": 25005
87
+ }
optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:de780104feb5c4cff0884c25e5ac39c758cc54685a0d7c139a966289fd26253d
3
+ size 837512037
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9e74fbfd0c761a7b29ac116b26812101746d38c88555e2adef851e392b156ca1
3
+ size 418769329
rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1b0bb276ee8399a1641b4b7ad757366561db477e8297e9c145856a0d3b2ceaf4
3
+ size 14503
scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e19606327df4c1d089d104041faf8c722c43bd36f141a71d6f1c9544e9fb5e7d
3
+ size 623
sentencepiece.bpe.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:49c4ba4e495ddf31eb2fdba7fc6aef3c233091d25d35bc9d24694ccf48ae114c
3
+ size 904693
special_tokens_map.json ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<s>NOTUSED",
4
+ "</s>NOTUSED",
5
+ "<_>"
6
+ ],
7
+ "bos_token": "<s>",
8
+ "cls_token": "<s>",
9
+ "eos_token": "</s>",
10
+ "mask_token": {
11
+ "content": "<mask>",
12
+ "lstrip": true,
13
+ "normalized": true,
14
+ "rstrip": false,
15
+ "single_word": false
16
+ },
17
+ "pad_token": "<pad>",
18
+ "sep_token": "</s>",
19
+ "unk_token": "<unk>"
20
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<s>NOTUSED",
4
+ "</s>NOTUSED",
5
+ "<_>"
6
+ ],
7
+ "bos_token": "<s>",
8
+ "cls_token": "<s>",
9
+ "eos_token": "</s>",
10
+ "mask_token": {
11
+ "__type": "AddedToken",
12
+ "content": "<mask>",
13
+ "lstrip": true,
14
+ "normalized": true,
15
+ "rstrip": false,
16
+ "single_word": false
17
+ },
18
+ "model_max_length": 416,
19
+ "name_or_path": "airesearch/wangchanberta-base-att-spm-uncased",
20
+ "pad_token": "<pad>",
21
+ "sep_token": "</s>",
22
+ "sp_model_kwargs": {},
23
+ "special_tokens_map_file": null,
24
+ "tokenizer_class": "CamembertTokenizer",
25
+ "unk_token": "<unk>"
26
+ }
trainer_state.json ADDED
@@ -0,0 +1,142 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 9.49367088607595,
5
+ "global_step": 1500,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 1.0,
12
+ "eval_accuracy": 0.9254942164969132,
13
+ "eval_f1": 0.5899065420560747,
14
+ "eval_loss": 0.2620357871055603,
15
+ "eval_precision": 0.6196335078534031,
16
+ "eval_recall": 0.5629013079667063,
17
+ "eval_runtime": 9.86,
18
+ "eval_samples_per_second": 63.894,
19
+ "eval_steps_per_second": 4.057,
20
+ "step": 158
21
+ },
22
+ {
23
+ "epoch": 2.0,
24
+ "eval_accuracy": 0.9576990795426686,
25
+ "eval_f1": 0.8084220268394261,
26
+ "eval_loss": 0.13847729563713074,
27
+ "eval_precision": 0.7871142149132687,
28
+ "eval_recall": 0.8309155766944114,
29
+ "eval_runtime": 9.7783,
30
+ "eval_samples_per_second": 64.428,
31
+ "eval_steps_per_second": 4.091,
32
+ "step": 316
33
+ },
34
+ {
35
+ "epoch": 3.0,
36
+ "eval_accuracy": 0.9653435556843255,
37
+ "eval_f1": 0.8456001857441375,
38
+ "eval_loss": 0.11949469894170761,
39
+ "eval_precision": 0.8260376502608301,
40
+ "eval_recall": 0.8661117717003567,
41
+ "eval_runtime": 9.7534,
42
+ "eval_samples_per_second": 64.593,
43
+ "eval_steps_per_second": 4.101,
44
+ "step": 474
45
+ },
46
+ {
47
+ "epoch": 3.16,
48
+ "learning_rate": 1.5189873417721521e-05,
49
+ "loss": 0.5092,
50
+ "step": 500
51
+ },
52
+ {
53
+ "epoch": 4.0,
54
+ "eval_accuracy": 0.9678842853640598,
55
+ "eval_f1": 0.8628684609111034,
56
+ "eval_loss": 0.11460543423891068,
57
+ "eval_precision": 0.845820009136592,
58
+ "eval_recall": 0.8806183115338883,
59
+ "eval_runtime": 9.7942,
60
+ "eval_samples_per_second": 64.324,
61
+ "eval_steps_per_second": 4.084,
62
+ "step": 632
63
+ },
64
+ {
65
+ "epoch": 5.0,
66
+ "eval_accuracy": 0.9680625821836903,
67
+ "eval_f1": 0.8695449784708483,
68
+ "eval_loss": 0.11910449713468552,
69
+ "eval_precision": 0.8514129443938013,
70
+ "eval_recall": 0.8884661117717003,
71
+ "eval_runtime": 9.7653,
72
+ "eval_samples_per_second": 64.514,
73
+ "eval_steps_per_second": 4.096,
74
+ "step": 790
75
+ },
76
+ {
77
+ "epoch": 6.0,
78
+ "eval_accuracy": 0.9688426307695737,
79
+ "eval_f1": 0.8747533372025537,
80
+ "eval_loss": 0.11907244473695755,
81
+ "eval_precision": 0.854421768707483,
82
+ "eval_recall": 0.8960760998810939,
83
+ "eval_runtime": 9.72,
84
+ "eval_samples_per_second": 64.815,
85
+ "eval_steps_per_second": 4.115,
86
+ "step": 948
87
+ },
88
+ {
89
+ "epoch": 6.33,
90
+ "learning_rate": 8.157524613220817e-06,
91
+ "loss": 0.0551,
92
+ "step": 1000
93
+ },
94
+ {
95
+ "epoch": 7.0,
96
+ "eval_accuracy": 0.9694666696382803,
97
+ "eval_f1": 0.879330388281795,
98
+ "eval_loss": 0.12043958157300949,
99
+ "eval_precision": 0.8601319081191722,
100
+ "eval_recall": 0.8994054696789536,
101
+ "eval_runtime": 9.8003,
102
+ "eval_samples_per_second": 64.284,
103
+ "eval_steps_per_second": 4.082,
104
+ "step": 1106
105
+ },
106
+ {
107
+ "epoch": 8.0,
108
+ "eval_accuracy": 0.9703581537364327,
109
+ "eval_f1": 0.8830837106699176,
110
+ "eval_loss": 0.12078419327735901,
111
+ "eval_precision": 0.8627495462794919,
112
+ "eval_recall": 0.9043995243757431,
113
+ "eval_runtime": 9.7367,
114
+ "eval_samples_per_second": 64.704,
115
+ "eval_steps_per_second": 4.108,
116
+ "step": 1264
117
+ },
118
+ {
119
+ "epoch": 9.0,
120
+ "eval_accuracy": 0.970335866633979,
121
+ "eval_f1": 0.8844769534424707,
122
+ "eval_loss": 0.12255486100912094,
123
+ "eval_precision": 0.8641107078039928,
124
+ "eval_recall": 0.9058263971462545,
125
+ "eval_runtime": 9.7627,
126
+ "eval_samples_per_second": 64.531,
127
+ "eval_steps_per_second": 4.097,
128
+ "step": 1422
129
+ },
130
+ {
131
+ "epoch": 9.49,
132
+ "learning_rate": 1.1251758087201126e-06,
133
+ "loss": 0.0342,
134
+ "step": 1500
135
+ }
136
+ ],
137
+ "max_steps": 1580,
138
+ "num_train_epochs": 10,
139
+ "total_flos": 5349608480859192.0,
140
+ "trial_name": null,
141
+ "trial_params": null
142
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:802a606423ff22d7728a2e00ba42adb8367add81cc37f7ce56fae47db6663635
3
+ size 3439