djagatiya commited on
Commit
5c41f6b
1 Parent(s): 2daacf9

Initial checkin

Browse files
config.json ADDED
@@ -0,0 +1,110 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "microsoft/deberta-base",
3
+ "architectures": [
4
+ "DebertaForTokenClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "hidden_act": "gelu",
8
+ "hidden_dropout_prob": 0.1,
9
+ "hidden_size": 768,
10
+ "id2label": {
11
+ "0": "O",
12
+ "1": "B-PERSON",
13
+ "2": "I-PERSON",
14
+ "3": "B-NORP",
15
+ "4": "I-NORP",
16
+ "5": "B-FAC",
17
+ "6": "I-FAC",
18
+ "7": "B-ORG",
19
+ "8": "I-ORG",
20
+ "9": "B-GPE",
21
+ "10": "I-GPE",
22
+ "11": "B-LOC",
23
+ "12": "I-LOC",
24
+ "13": "B-PRODUCT",
25
+ "14": "I-PRODUCT",
26
+ "15": "B-DATE",
27
+ "16": "I-DATE",
28
+ "17": "B-TIME",
29
+ "18": "I-TIME",
30
+ "19": "B-PERCENT",
31
+ "20": "I-PERCENT",
32
+ "21": "B-MONEY",
33
+ "22": "I-MONEY",
34
+ "23": "B-QUANTITY",
35
+ "24": "I-QUANTITY",
36
+ "25": "B-ORDINAL",
37
+ "26": "I-ORDINAL",
38
+ "27": "B-CARDINAL",
39
+ "28": "I-CARDINAL",
40
+ "29": "B-EVENT",
41
+ "30": "I-EVENT",
42
+ "31": "B-WORK_OF_ART",
43
+ "32": "I-WORK_OF_ART",
44
+ "33": "B-LAW",
45
+ "34": "I-LAW",
46
+ "35": "B-LANGUAGE",
47
+ "36": "I-LANGUAGE"
48
+ },
49
+ "initializer_range": 0.02,
50
+ "intermediate_size": 3072,
51
+ "label2id": {
52
+ "B-CARDINAL": 27,
53
+ "B-DATE": 15,
54
+ "B-EVENT": 29,
55
+ "B-FAC": 5,
56
+ "B-GPE": 9,
57
+ "B-LANGUAGE": 35,
58
+ "B-LAW": 33,
59
+ "B-LOC": 11,
60
+ "B-MONEY": 21,
61
+ "B-NORP": 3,
62
+ "B-ORDINAL": 25,
63
+ "B-ORG": 7,
64
+ "B-PERCENT": 19,
65
+ "B-PERSON": 1,
66
+ "B-PRODUCT": 13,
67
+ "B-QUANTITY": 23,
68
+ "B-TIME": 17,
69
+ "B-WORK_OF_ART": 31,
70
+ "I-CARDINAL": 28,
71
+ "I-DATE": 16,
72
+ "I-EVENT": 30,
73
+ "I-FAC": 6,
74
+ "I-GPE": 10,
75
+ "I-LANGUAGE": 36,
76
+ "I-LAW": 34,
77
+ "I-LOC": 12,
78
+ "I-MONEY": 22,
79
+ "I-NORP": 4,
80
+ "I-ORDINAL": 26,
81
+ "I-ORG": 8,
82
+ "I-PERCENT": 20,
83
+ "I-PERSON": 2,
84
+ "I-PRODUCT": 14,
85
+ "I-QUANTITY": 24,
86
+ "I-TIME": 18,
87
+ "I-WORK_OF_ART": 32,
88
+ "O": 0
89
+ },
90
+ "layer_norm_eps": 1e-07,
91
+ "max_position_embeddings": 512,
92
+ "max_relative_positions": -1,
93
+ "model_type": "deberta",
94
+ "num_attention_heads": 12,
95
+ "num_hidden_layers": 12,
96
+ "pad_token_id": 0,
97
+ "pooler_dropout": 0,
98
+ "pooler_hidden_act": "gelu",
99
+ "pooler_hidden_size": 768,
100
+ "pos_att_type": [
101
+ "c2p",
102
+ "p2c"
103
+ ],
104
+ "position_biased_input": false,
105
+ "relative_attention": true,
106
+ "torch_dtype": "float32",
107
+ "transformers_version": "4.23.0",
108
+ "type_vocab_size": 0,
109
+ "vocab_size": 50265
110
+ }
merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3ab235be6d875536f6ea6ae372fba48ce781e67a30607363bd1501128f17dbb8
3
+ size 554591539
rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6de13a5bb5071790a9af50795884d2fba4140bf5f6c5b1bcb0dc7e0b2c0347f4
3
+ size 14503
run.log ADDED
@@ -0,0 +1,512 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2022-11-16 17:06:48,577 - __main__ - INFO - Label List:['O', 'B-PERSON', 'I-PERSON', 'B-NORP', 'I-NORP', 'B-FAC', 'I-FAC', 'B-ORG', 'I-ORG', 'B-GPE', 'I-GPE', 'B-LOC', 'I-LOC', 'B-PRODUCT', 'I-PRODUCT', 'B-DATE', 'I-DATE', 'B-TIME', 'I-TIME', 'B-PERCENT', 'I-PERCENT', 'B-MONEY', 'I-MONEY', 'B-QUANTITY', 'I-QUANTITY', 'B-ORDINAL', 'I-ORDINAL', 'B-CARDINAL', 'I-CARDINAL', 'B-EVENT', 'I-EVENT', 'B-WORK_OF_ART', 'I-WORK_OF_ART', 'B-LAW', 'I-LAW', 'B-LANGUAGE', 'I-LANGUAGE']
2
+ 2022-11-16 17:06:55,367 - __main__ - INFO - Dataset({
3
+ features: ['id', 'words', 'ner_tags'],
4
+ num_rows: 75187
5
+ })
6
+ 2022-11-16 17:06:56,175 - __main__ - INFO - Dataset({
7
+ features: ['id', 'words', 'ner_tags'],
8
+ num_rows: 9479
9
+ })
10
+ 2022-11-16 17:06:56,185 - transformers.tokenization_utils_base - INFO - loading file vocab.json
11
+ 2022-11-16 17:06:56,185 - transformers.tokenization_utils_base - INFO - loading file merges.txt
12
+ 2022-11-16 17:06:56,185 - transformers.tokenization_utils_base - INFO - loading file tokenizer.json
13
+ 2022-11-16 17:06:56,185 - transformers.tokenization_utils_base - INFO - loading file added_tokens.json
14
+ 2022-11-16 17:06:56,185 - transformers.tokenization_utils_base - INFO - loading file special_tokens_map.json
15
+ 2022-11-16 17:06:56,185 - transformers.tokenization_utils_base - INFO - loading file tokenizer_config.json
16
+ 2022-11-16 17:06:56,250 - __main__ - INFO - {'input_ids': [[1, 653, 761, 9, 3783, 17487, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [1, 166, 32928, 9603, 47, 7, 1183, 10, 780, 5403, 9, 15581, 436, 479, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [1, 15584, 3082, 3192, 23959, 15, 5, 2860, 3875, 9, 436, 4832, 41876, 38628, 9, 15643, 24610, 4743, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [1, 15125, 6764, 15, 15643, 24610, 4743, 16, 5, 23001, 7, 5, 41184, 6304, 25132, 23909, 479, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [1, 85, 16, 14092, 9, 10, 2270, 11235, 459, 2156, 5929, 1690, 523, 293, 2156, 10, 1307, 1062, 18185, 8, 30943, 9368, 2156, 8, 5, 2860, 2298, 2156, 566, 97, 383, 479, 2]], 'token_type_ids': [[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]], 'attention_mask': [[1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]]}
17
+ 2022-11-16 17:06:56,251 - __main__ - INFO - ['[CLS]', 'ĠWhat', 'Ġkind', 'Ġof', 'Ġmemory', 'Ġ?', '[SEP]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]']
18
+ 2022-11-16 17:06:56,251 - __main__ - INFO - ['[CLS]', 'ĠWe', 'Ġrespectfully', 'Ġinvite', 'Ġyou', 'Ġto', 'Ġwatch', 'Ġa', 'Ġspecial', 'Ġedition', 'Ġof', 'ĠAcross', 'ĠChina', 'Ġ.', '[SEP]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]']
19
+ 2022-11-16 17:06:56,251 - __main__ - INFO - ['[CLS]', 'ĠWW', 'ĠII', 'ĠLand', 'marks', 'Ġon', 'Ġthe', 'ĠGreat', 'ĠEarth', 'Ġof', 'ĠChina', 'Ġ:', 'ĠEternal', 'ĠMemories', 'Ġof', 'ĠTai', 'hang', 'ĠMountain', '[SEP]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]']
20
+ 2022-11-16 17:06:56,251 - __main__ - INFO - ['[CLS]', 'ĠStanding', 'Ġtall', 'Ġon', 'ĠTai', 'hang', 'ĠMountain', 'Ġis', 'Ġthe', 'ĠMonument', 'Ġto', 'Ġthe', 'ĠHundred', 'ĠReg', 'iments', 'ĠOffensive', 'Ġ.', '[SEP]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]']
21
+ 2022-11-16 17:06:56,252 - __main__ - INFO - ['[CLS]', 'ĠIt', 'Ġis', 'Ġcomposed', 'Ġof', 'Ġa', 'Ġprimary', 'Ġste', 'le', 'Ġ,', 'Ġsecondary', 'Ġst', 'el', 'es', 'Ġ,', 'Ġa', 'Ġhuge', 'Ġround', 'Ġsculpture', 'Ġand', 'Ġbeacon', 'Ġtower', 'Ġ,', 'Ġand', 'Ġthe', 'ĠGreat', 'ĠWall', 'Ġ,', 'Ġamong', 'Ġother', 'Ġthings', 'Ġ.', '[SEP]']
22
+ 2022-11-16 17:06:56,252 - __main__ - INFO - -------------
23
+ 2022-11-16 17:06:56,252 - __main__ - INFO - ['[CLS]', 'ĠWe', 'Ġrespectfully', 'Ġinvite', 'Ġyou', 'Ġto', 'Ġwatch', 'Ġa', 'Ġspecial', 'Ġedition', 'Ġof', 'ĠAcross', 'ĠChina', 'Ġ.', '[SEP]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]']
24
+ 2022-11-16 17:06:56,252 - __main__ - INFO - [None, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None]
25
+ 2022-11-16 17:07:05,682 - __main__ - INFO - {'id': [0, 1, 2, 3, 4], 'words': [['What', 'kind', 'of', 'memory', '?'], ['We', 'respectfully', 'invite', 'you', 'to', 'watch', 'a', 'special', 'edition', 'of', 'Across', 'China', '.'], ['WW', 'II', 'Landmarks', 'on', 'the', 'Great', 'Earth', 'of', 'China', ':', 'Eternal', 'Memories', 'of', 'Taihang', 'Mountain'], ['Standing', 'tall', 'on', 'Taihang', 'Mountain', 'is', 'the', 'Monument', 'to', 'the', 'Hundred', 'Regiments', 'Offensive', '.'], ['It', 'is', 'composed', 'of', 'a', 'primary', 'stele', ',', 'secondary', 'steles', ',', 'a', 'huge', 'round', 'sculpture', 'and', 'beacon', 'tower', ',', 'and', 'the', 'Great', 'Wall', ',', 'among', 'other', 'things', '.']], 'ner_tags': [[0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7, 8, 0], [31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32], [0, 0, 0, 11, 12, 0, 31, 32, 32, 32, 32, 32, 32, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 31, 32, 32, 0, 0, 0, 0, 0]], 'input_ids': [[1, 653, 761, 9, 3783, 17487, 2], [1, 166, 32928, 9603, 47, 7, 1183, 10, 780, 5403, 9, 15581, 436, 479, 2], [1, 15584, 3082, 3192, 23959, 15, 5, 2860, 3875, 9, 436, 4832, 41876, 38628, 9, 15643, 24610, 4743, 2], [1, 15125, 6764, 15, 15643, 24610, 4743, 16, 5, 23001, 7, 5, 41184, 6304, 25132, 23909, 479, 2], [1, 85, 16, 14092, 9, 10, 2270, 11235, 459, 2156, 5929, 1690, 523, 293, 2156, 10, 1307, 1062, 18185, 8, 30943, 9368, 2156, 8, 5, 2860, 2298, 2156, 566, 97, 383, 479, 2]], 'token_type_ids': [[0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]], 'attention_mask': [[1, 1, 1, 1, 1, 1, 1], [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]], 'labels': [[-100, 0, 0, 0, 0, 0, -100], [-100, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7, 8, 0, -100], [-100, 31, 32, 32, -100, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, -100, 32, -100], [-100, 0, 0, 0, 11, -100, 12, 0, 31, 32, 32, 32, 32, 32, -100, 32, 0, -100], [-100, 0, 0, 0, 0, 0, 0, 0, -100, 0, 0, 0, -100, -100, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 31, 32, 32, 0, 0, 0, 0, 0, -100]]}
26
+ 2022-11-16 17:07:06,902 - transformers.configuration_utils - INFO - loading configuration file /content/NER-System/models/microsoft/deberta-base_1668615764.565312/checkpoint-14100/config.json
27
+ 2022-11-16 17:07:06,904 - transformers.configuration_utils - INFO - Model config DebertaConfig {
28
+ "_name_or_path": "/content/NER-System/models/microsoft/deberta-base_1668615764.565312/checkpoint-14100",
29
+ "architectures": [
30
+ "DebertaForTokenClassification"
31
+ ],
32
+ "attention_probs_dropout_prob": 0.1,
33
+ "hidden_act": "gelu",
34
+ "hidden_dropout_prob": 0.1,
35
+ "hidden_size": 768,
36
+ "id2label": {
37
+ "0": "O",
38
+ "1": "B-PERSON",
39
+ "2": "I-PERSON",
40
+ "3": "B-NORP",
41
+ "4": "I-NORP",
42
+ "5": "B-FAC",
43
+ "6": "I-FAC",
44
+ "7": "B-ORG",
45
+ "8": "I-ORG",
46
+ "9": "B-GPE",
47
+ "10": "I-GPE",
48
+ "11": "B-LOC",
49
+ "12": "I-LOC",
50
+ "13": "B-PRODUCT",
51
+ "14": "I-PRODUCT",
52
+ "15": "B-DATE",
53
+ "16": "I-DATE",
54
+ "17": "B-TIME",
55
+ "18": "I-TIME",
56
+ "19": "B-PERCENT",
57
+ "20": "I-PERCENT",
58
+ "21": "B-MONEY",
59
+ "22": "I-MONEY",
60
+ "23": "B-QUANTITY",
61
+ "24": "I-QUANTITY",
62
+ "25": "B-ORDINAL",
63
+ "26": "I-ORDINAL",
64
+ "27": "B-CARDINAL",
65
+ "28": "I-CARDINAL",
66
+ "29": "B-EVENT",
67
+ "30": "I-EVENT",
68
+ "31": "B-WORK_OF_ART",
69
+ "32": "I-WORK_OF_ART",
70
+ "33": "B-LAW",
71
+ "34": "I-LAW",
72
+ "35": "B-LANGUAGE",
73
+ "36": "I-LANGUAGE"
74
+ },
75
+ "initializer_range": 0.02,
76
+ "intermediate_size": 3072,
77
+ "label2id": {
78
+ "B-CARDINAL": 27,
79
+ "B-DATE": 15,
80
+ "B-EVENT": 29,
81
+ "B-FAC": 5,
82
+ "B-GPE": 9,
83
+ "B-LANGUAGE": 35,
84
+ "B-LAW": 33,
85
+ "B-LOC": 11,
86
+ "B-MONEY": 21,
87
+ "B-NORP": 3,
88
+ "B-ORDINAL": 25,
89
+ "B-ORG": 7,
90
+ "B-PERCENT": 19,
91
+ "B-PERSON": 1,
92
+ "B-PRODUCT": 13,
93
+ "B-QUANTITY": 23,
94
+ "B-TIME": 17,
95
+ "B-WORK_OF_ART": 31,
96
+ "I-CARDINAL": 28,
97
+ "I-DATE": 16,
98
+ "I-EVENT": 30,
99
+ "I-FAC": 6,
100
+ "I-GPE": 10,
101
+ "I-LANGUAGE": 36,
102
+ "I-LAW": 34,
103
+ "I-LOC": 12,
104
+ "I-MONEY": 22,
105
+ "I-NORP": 4,
106
+ "I-ORDINAL": 26,
107
+ "I-ORG": 8,
108
+ "I-PERCENT": 20,
109
+ "I-PERSON": 2,
110
+ "I-PRODUCT": 14,
111
+ "I-QUANTITY": 24,
112
+ "I-TIME": 18,
113
+ "I-WORK_OF_ART": 32,
114
+ "O": 0
115
+ },
116
+ "layer_norm_eps": 1e-07,
117
+ "max_position_embeddings": 512,
118
+ "max_relative_positions": -1,
119
+ "model_type": "deberta",
120
+ "num_attention_heads": 12,
121
+ "num_hidden_layers": 12,
122
+ "pad_token_id": 0,
123
+ "pooler_dropout": 0,
124
+ "pooler_hidden_act": "gelu",
125
+ "pooler_hidden_size": 768,
126
+ "pos_att_type": [
127
+ "c2p",
128
+ "p2c"
129
+ ],
130
+ "position_biased_input": false,
131
+ "relative_attention": true,
132
+ "torch_dtype": "float32",
133
+ "transformers_version": "4.23.0",
134
+ "type_vocab_size": 0,
135
+ "vocab_size": 50265
136
+ }
137
+
138
+ 2022-11-16 17:07:06,933 - transformers.modeling_utils - INFO - loading weights file /content/NER-System/models/microsoft/deberta-base_1668615764.565312/checkpoint-14100/pytorch_model.bin
139
+ 2022-11-16 17:07:08,422 - transformers.modeling_utils - INFO - All model checkpoint weights were used when initializing DebertaForTokenClassification.
140
+
141
+ 2022-11-16 17:07:08,422 - transformers.modeling_utils - INFO - All the weights of DebertaForTokenClassification were initialized from the model checkpoint at /content/NER-System/models/microsoft/deberta-base_1668615764.565312/checkpoint-14100.
142
+ If your task is similar to the task the model of the checkpoint was trained on, you can already use DebertaForTokenClassification for predictions without further training.
143
+ 2022-11-16 17:07:08,500 - __main__ - INFO - DebertaForTokenClassification(
144
+ (deberta): DebertaModel(
145
+ (embeddings): DebertaEmbeddings(
146
+ (word_embeddings): Embedding(50265, 768, padding_idx=0)
147
+ (LayerNorm): DebertaLayerNorm()
148
+ (dropout): StableDropout()
149
+ )
150
+ (encoder): DebertaEncoder(
151
+ (layer): ModuleList(
152
+ (0): DebertaLayer(
153
+ (attention): DebertaAttention(
154
+ (self): DisentangledSelfAttention(
155
+ (in_proj): Linear(in_features=768, out_features=2304, bias=False)
156
+ (pos_dropout): StableDropout()
157
+ (pos_proj): Linear(in_features=768, out_features=768, bias=False)
158
+ (pos_q_proj): Linear(in_features=768, out_features=768, bias=True)
159
+ (dropout): StableDropout()
160
+ )
161
+ (output): DebertaSelfOutput(
162
+ (dense): Linear(in_features=768, out_features=768, bias=True)
163
+ (LayerNorm): DebertaLayerNorm()
164
+ (dropout): StableDropout()
165
+ )
166
+ )
167
+ (intermediate): DebertaIntermediate(
168
+ (dense): Linear(in_features=768, out_features=3072, bias=True)
169
+ (intermediate_act_fn): GELUActivation()
170
+ )
171
+ (output): DebertaOutput(
172
+ (dense): Linear(in_features=3072, out_features=768, bias=True)
173
+ (LayerNorm): DebertaLayerNorm()
174
+ (dropout): StableDropout()
175
+ )
176
+ )
177
+ (1): DebertaLayer(
178
+ (attention): DebertaAttention(
179
+ (self): DisentangledSelfAttention(
180
+ (in_proj): Linear(in_features=768, out_features=2304, bias=False)
181
+ (pos_dropout): StableDropout()
182
+ (pos_proj): Linear(in_features=768, out_features=768, bias=False)
183
+ (pos_q_proj): Linear(in_features=768, out_features=768, bias=True)
184
+ (dropout): StableDropout()
185
+ )
186
+ (output): DebertaSelfOutput(
187
+ (dense): Linear(in_features=768, out_features=768, bias=True)
188
+ (LayerNorm): DebertaLayerNorm()
189
+ (dropout): StableDropout()
190
+ )
191
+ )
192
+ (intermediate): DebertaIntermediate(
193
+ (dense): Linear(in_features=768, out_features=3072, bias=True)
194
+ (intermediate_act_fn): GELUActivation()
195
+ )
196
+ (output): DebertaOutput(
197
+ (dense): Linear(in_features=3072, out_features=768, bias=True)
198
+ (LayerNorm): DebertaLayerNorm()
199
+ (dropout): StableDropout()
200
+ )
201
+ )
202
+ (2): DebertaLayer(
203
+ (attention): DebertaAttention(
204
+ (self): DisentangledSelfAttention(
205
+ (in_proj): Linear(in_features=768, out_features=2304, bias=False)
206
+ (pos_dropout): StableDropout()
207
+ (pos_proj): Linear(in_features=768, out_features=768, bias=False)
208
+ (pos_q_proj): Linear(in_features=768, out_features=768, bias=True)
209
+ (dropout): StableDropout()
210
+ )
211
+ (output): DebertaSelfOutput(
212
+ (dense): Linear(in_features=768, out_features=768, bias=True)
213
+ (LayerNorm): DebertaLayerNorm()
214
+ (dropout): StableDropout()
215
+ )
216
+ )
217
+ (intermediate): DebertaIntermediate(
218
+ (dense): Linear(in_features=768, out_features=3072, bias=True)
219
+ (intermediate_act_fn): GELUActivation()
220
+ )
221
+ (output): DebertaOutput(
222
+ (dense): Linear(in_features=3072, out_features=768, bias=True)
223
+ (LayerNorm): DebertaLayerNorm()
224
+ (dropout): StableDropout()
225
+ )
226
+ )
227
+ (3): DebertaLayer(
228
+ (attention): DebertaAttention(
229
+ (self): DisentangledSelfAttention(
230
+ (in_proj): Linear(in_features=768, out_features=2304, bias=False)
231
+ (pos_dropout): StableDropout()
232
+ (pos_proj): Linear(in_features=768, out_features=768, bias=False)
233
+ (pos_q_proj): Linear(in_features=768, out_features=768, bias=True)
234
+ (dropout): StableDropout()
235
+ )
236
+ (output): DebertaSelfOutput(
237
+ (dense): Linear(in_features=768, out_features=768, bias=True)
238
+ (LayerNorm): DebertaLayerNorm()
239
+ (dropout): StableDropout()
240
+ )
241
+ )
242
+ (intermediate): DebertaIntermediate(
243
+ (dense): Linear(in_features=768, out_features=3072, bias=True)
244
+ (intermediate_act_fn): GELUActivation()
245
+ )
246
+ (output): DebertaOutput(
247
+ (dense): Linear(in_features=3072, out_features=768, bias=True)
248
+ (LayerNorm): DebertaLayerNorm()
249
+ (dropout): StableDropout()
250
+ )
251
+ )
252
+ (4): DebertaLayer(
253
+ (attention): DebertaAttention(
254
+ (self): DisentangledSelfAttention(
255
+ (in_proj): Linear(in_features=768, out_features=2304, bias=False)
256
+ (pos_dropout): StableDropout()
257
+ (pos_proj): Linear(in_features=768, out_features=768, bias=False)
258
+ (pos_q_proj): Linear(in_features=768, out_features=768, bias=True)
259
+ (dropout): StableDropout()
260
+ )
261
+ (output): DebertaSelfOutput(
262
+ (dense): Linear(in_features=768, out_features=768, bias=True)
263
+ (LayerNorm): DebertaLayerNorm()
264
+ (dropout): StableDropout()
265
+ )
266
+ )
267
+ (intermediate): DebertaIntermediate(
268
+ (dense): Linear(in_features=768, out_features=3072, bias=True)
269
+ (intermediate_act_fn): GELUActivation()
270
+ )
271
+ (output): DebertaOutput(
272
+ (dense): Linear(in_features=3072, out_features=768, bias=True)
273
+ (LayerNorm): DebertaLayerNorm()
274
+ (dropout): StableDropout()
275
+ )
276
+ )
277
+ (5): DebertaLayer(
278
+ (attention): DebertaAttention(
279
+ (self): DisentangledSelfAttention(
280
+ (in_proj): Linear(in_features=768, out_features=2304, bias=False)
281
+ (pos_dropout): StableDropout()
282
+ (pos_proj): Linear(in_features=768, out_features=768, bias=False)
283
+ (pos_q_proj): Linear(in_features=768, out_features=768, bias=True)
284
+ (dropout): StableDropout()
285
+ )
286
+ (output): DebertaSelfOutput(
287
+ (dense): Linear(in_features=768, out_features=768, bias=True)
288
+ (LayerNorm): DebertaLayerNorm()
289
+ (dropout): StableDropout()
290
+ )
291
+ )
292
+ (intermediate): DebertaIntermediate(
293
+ (dense): Linear(in_features=768, out_features=3072, bias=True)
294
+ (intermediate_act_fn): GELUActivation()
295
+ )
296
+ (output): DebertaOutput(
297
+ (dense): Linear(in_features=3072, out_features=768, bias=True)
298
+ (LayerNorm): DebertaLayerNorm()
299
+ (dropout): StableDropout()
300
+ )
301
+ )
302
+ (6): DebertaLayer(
303
+ (attention): DebertaAttention(
304
+ (self): DisentangledSelfAttention(
305
+ (in_proj): Linear(in_features=768, out_features=2304, bias=False)
306
+ (pos_dropout): StableDropout()
307
+ (pos_proj): Linear(in_features=768, out_features=768, bias=False)
308
+ (pos_q_proj): Linear(in_features=768, out_features=768, bias=True)
309
+ (dropout): StableDropout()
310
+ )
311
+ (output): DebertaSelfOutput(
312
+ (dense): Linear(in_features=768, out_features=768, bias=True)
313
+ (LayerNorm): DebertaLayerNorm()
314
+ (dropout): StableDropout()
315
+ )
316
+ )
317
+ (intermediate): DebertaIntermediate(
318
+ (dense): Linear(in_features=768, out_features=3072, bias=True)
319
+ (intermediate_act_fn): GELUActivation()
320
+ )
321
+ (output): DebertaOutput(
322
+ (dense): Linear(in_features=3072, out_features=768, bias=True)
323
+ (LayerNorm): DebertaLayerNorm()
324
+ (dropout): StableDropout()
325
+ )
326
+ )
327
+ (7): DebertaLayer(
328
+ (attention): DebertaAttention(
329
+ (self): DisentangledSelfAttention(
330
+ (in_proj): Linear(in_features=768, out_features=2304, bias=False)
331
+ (pos_dropout): StableDropout()
332
+ (pos_proj): Linear(in_features=768, out_features=768, bias=False)
333
+ (pos_q_proj): Linear(in_features=768, out_features=768, bias=True)
334
+ (dropout): StableDropout()
335
+ )
336
+ (output): DebertaSelfOutput(
337
+ (dense): Linear(in_features=768, out_features=768, bias=True)
338
+ (LayerNorm): DebertaLayerNorm()
339
+ (dropout): StableDropout()
340
+ )
341
+ )
342
+ (intermediate): DebertaIntermediate(
343
+ (dense): Linear(in_features=768, out_features=3072, bias=True)
344
+ (intermediate_act_fn): GELUActivation()
345
+ )
346
+ (output): DebertaOutput(
347
+ (dense): Linear(in_features=3072, out_features=768, bias=True)
348
+ (LayerNorm): DebertaLayerNorm()
349
+ (dropout): StableDropout()
350
+ )
351
+ )
352
+ (8): DebertaLayer(
353
+ (attention): DebertaAttention(
354
+ (self): DisentangledSelfAttention(
355
+ (in_proj): Linear(in_features=768, out_features=2304, bias=False)
356
+ (pos_dropout): StableDropout()
357
+ (pos_proj): Linear(in_features=768, out_features=768, bias=False)
358
+ (pos_q_proj): Linear(in_features=768, out_features=768, bias=True)
359
+ (dropout): StableDropout()
360
+ )
361
+ (output): DebertaSelfOutput(
362
+ (dense): Linear(in_features=768, out_features=768, bias=True)
363
+ (LayerNorm): DebertaLayerNorm()
364
+ (dropout): StableDropout()
365
+ )
366
+ )
367
+ (intermediate): DebertaIntermediate(
368
+ (dense): Linear(in_features=768, out_features=3072, bias=True)
369
+ (intermediate_act_fn): GELUActivation()
370
+ )
371
+ (output): DebertaOutput(
372
+ (dense): Linear(in_features=3072, out_features=768, bias=True)
373
+ (LayerNorm): DebertaLayerNorm()
374
+ (dropout): StableDropout()
375
+ )
376
+ )
377
+ (9): DebertaLayer(
378
+ (attention): DebertaAttention(
379
+ (self): DisentangledSelfAttention(
380
+ (in_proj): Linear(in_features=768, out_features=2304, bias=False)
381
+ (pos_dropout): StableDropout()
382
+ (pos_proj): Linear(in_features=768, out_features=768, bias=False)
383
+ (pos_q_proj): Linear(in_features=768, out_features=768, bias=True)
384
+ (dropout): StableDropout()
385
+ )
386
+ (output): DebertaSelfOutput(
387
+ (dense): Linear(in_features=768, out_features=768, bias=True)
388
+ (LayerNorm): DebertaLayerNorm()
389
+ (dropout): StableDropout()
390
+ )
391
+ )
392
+ (intermediate): DebertaIntermediate(
393
+ (dense): Linear(in_features=768, out_features=3072, bias=True)
394
+ (intermediate_act_fn): GELUActivation()
395
+ )
396
+ (output): DebertaOutput(
397
+ (dense): Linear(in_features=3072, out_features=768, bias=True)
398
+ (LayerNorm): DebertaLayerNorm()
399
+ (dropout): StableDropout()
400
+ )
401
+ )
402
+ (10): DebertaLayer(
403
+ (attention): DebertaAttention(
404
+ (self): DisentangledSelfAttention(
405
+ (in_proj): Linear(in_features=768, out_features=2304, bias=False)
406
+ (pos_dropout): StableDropout()
407
+ (pos_proj): Linear(in_features=768, out_features=768, bias=False)
408
+ (pos_q_proj): Linear(in_features=768, out_features=768, bias=True)
409
+ (dropout): StableDropout()
410
+ )
411
+ (output): DebertaSelfOutput(
412
+ (dense): Linear(in_features=768, out_features=768, bias=True)
413
+ (LayerNorm): DebertaLayerNorm()
414
+ (dropout): StableDropout()
415
+ )
416
+ )
417
+ (intermediate): DebertaIntermediate(
418
+ (dense): Linear(in_features=768, out_features=3072, bias=True)
419
+ (intermediate_act_fn): GELUActivation()
420
+ )
421
+ (output): DebertaOutput(
422
+ (dense): Linear(in_features=3072, out_features=768, bias=True)
423
+ (LayerNorm): DebertaLayerNorm()
424
+ (dropout): StableDropout()
425
+ )
426
+ )
427
+ (11): DebertaLayer(
428
+ (attention): DebertaAttention(
429
+ (self): DisentangledSelfAttention(
430
+ (in_proj): Linear(in_features=768, out_features=2304, bias=False)
431
+ (pos_dropout): StableDropout()
432
+ (pos_proj): Linear(in_features=768, out_features=768, bias=False)
433
+ (pos_q_proj): Linear(in_features=768, out_features=768, bias=True)
434
+ (dropout): StableDropout()
435
+ )
436
+ (output): DebertaSelfOutput(
437
+ (dense): Linear(in_features=768, out_features=768, bias=True)
438
+ (LayerNorm): DebertaLayerNorm()
439
+ (dropout): StableDropout()
440
+ )
441
+ )
442
+ (intermediate): DebertaIntermediate(
443
+ (dense): Linear(in_features=768, out_features=3072, bias=True)
444
+ (intermediate_act_fn): GELUActivation()
445
+ )
446
+ (output): DebertaOutput(
447
+ (dense): Linear(in_features=3072, out_features=768, bias=True)
448
+ (LayerNorm): DebertaLayerNorm()
449
+ (dropout): StableDropout()
450
+ )
451
+ )
452
+ )
453
+ (rel_embeddings): Embedding(1024, 768)
454
+ )
455
+ )
456
+ (dropout): Dropout(p=0.1, inplace=False)
457
+ (classifier): Linear(in_features=768, out_features=37, bias=True)
458
+ )
459
+ 2022-11-16 17:07:08,521 - __main__ - INFO - CONFIGS:{
460
+ "output_dir": "./eval_test1",
461
+ "per_device_train_batch_size": 16,
462
+ "per_device_eval_batch_size": 16,
463
+ "save_total_limit": 2,
464
+ "num_train_epochs": 3,
465
+ "seed": 1,
466
+ "load_best_model_at_end": true,
467
+ "evaluation_strategy": "epoch",
468
+ "save_strategy": "epoch",
469
+ "learning_rate": 2e-05,
470
+ "weight_decay": 0.01,
471
+ "fp16": true,
472
+ "logging_steps": 469.0
473
+ }
474
+ 2022-11-16 17:07:08,522 - transformers.training_args - INFO - PyTorch: setting up devices
475
+ 2022-11-16 17:07:08,557 - transformers.training_args - INFO - The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
476
+ 2022-11-16 17:07:11,000 - transformers.trainer - INFO - Using cuda_amp half precision backend
477
+ 2022-11-16 17:07:11,001 - __main__ - INFO - [[ MODEL EVALUATION ]]
478
+ 2022-11-16 17:07:11,001 - transformers.trainer - INFO - The following columns in the evaluation set don't have a corresponding argument in `DebertaForTokenClassification.forward` and have been ignored: words, ner_tags, id. If words, ner_tags, id are not expected by `DebertaForTokenClassification.forward`, you can safely ignore this message.
479
+ 2022-11-16 17:07:11,004 - transformers.trainer - INFO - ***** Running Evaluation *****
480
+ 2022-11-16 17:07:11,004 - transformers.trainer - INFO - Num examples = 9479
481
+ 2022-11-16 17:07:11,004 - transformers.trainer - INFO - Batch size = 16
482
+ 2022-11-16 17:07:11,007 - transformers.tokenization_utils_base - WARNING - You're using a DebertaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
483
+ 2022-11-16 17:07:33,916 - __main__ - INFO - {'eval_loss': 0.06115942820906639, 'eval_precision': 0.8953762782973517, 'eval_recall': 0.9100115483699032, 'eval_f1': 0.902634593356243, 'eval_accuracy': 0.9848035428915137, 'eval_runtime': 22.906, 'eval_samples_per_second': 413.822, 'eval_steps_per_second': 25.888, 'step': 0}
484
+ 2022-11-16 17:07:33,916 - transformers.trainer - INFO - The following columns in the test set don't have a corresponding argument in `DebertaForTokenClassification.forward` and have been ignored: words, ner_tags, id. If words, ner_tags, id are not expected by `DebertaForTokenClassification.forward`, you can safely ignore this message.
485
+ 2022-11-16 17:07:33,918 - transformers.trainer - INFO - ***** Running Prediction *****
486
+ 2022-11-16 17:07:33,918 - transformers.trainer - INFO - Num examples = 9479
487
+ 2022-11-16 17:07:33,918 - transformers.trainer - INFO - Batch size = 16
488
+ 2022-11-16 17:07:59,630 - __main__ - INFO - precision recall f1-score support
489
+
490
+ CARDINAL 0.86 0.87 0.86 935
491
+ DATE 0.85 0.89 0.87 1602
492
+ EVENT 0.65 0.78 0.71 63
493
+ FAC 0.74 0.80 0.77 135
494
+ GPE 0.97 0.96 0.96 2240
495
+ LANGUAGE 0.83 0.68 0.75 22
496
+ LAW 0.71 0.68 0.69 40
497
+ LOC 0.74 0.77 0.76 179
498
+ MONEY 0.88 0.90 0.89 314
499
+ NORP 0.94 0.97 0.95 841
500
+ ORDINAL 0.79 0.87 0.83 195
501
+ ORG 0.92 0.92 0.92 1795
502
+ PERCENT 0.92 0.92 0.92 349
503
+ PERSON 0.95 0.95 0.95 1988
504
+ PRODUCT 0.65 0.76 0.70 76
505
+ QUANTITY 0.77 0.82 0.80 105
506
+ TIME 0.62 0.65 0.63 212
507
+ WORK_OF_ART 0.64 0.69 0.66 166
508
+
509
+ micro avg 0.90 0.91 0.90 11257
510
+ macro avg 0.80 0.83 0.81 11257
511
+ weighted avg 0.90 0.91 0.90 11257
512
+
scaler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1de1151bb371c7a9fcf0cdf747ea19c1e78511e69dbbc399a1e42e08562f77d2
3
+ size 559
scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:31f8c1b6927b129b6987f9b4f5bfb276f770a427eb2d042836118c5cfe4e18a9
3
+ size 623
special_tokens_map.json ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "[CLS]",
4
+ "lstrip": false,
5
+ "normalized": true,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "cls_token": {
10
+ "content": "[CLS]",
11
+ "lstrip": false,
12
+ "normalized": true,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "eos_token": {
17
+ "content": "[SEP]",
18
+ "lstrip": false,
19
+ "normalized": true,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "mask_token": {
24
+ "content": "[MASK]",
25
+ "lstrip": true,
26
+ "normalized": true,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ },
30
+ "pad_token": {
31
+ "content": "[PAD]",
32
+ "lstrip": false,
33
+ "normalized": true,
34
+ "rstrip": false,
35
+ "single_word": false
36
+ },
37
+ "sep_token": {
38
+ "content": "[SEP]",
39
+ "lstrip": false,
40
+ "normalized": true,
41
+ "rstrip": false,
42
+ "single_word": false
43
+ },
44
+ "unk_token": {
45
+ "content": "[UNK]",
46
+ "lstrip": false,
47
+ "normalized": true,
48
+ "rstrip": false,
49
+ "single_word": false
50
+ }
51
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,67 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": false,
3
+ "add_prefix_space": true,
4
+ "bos_token": {
5
+ "__type": "AddedToken",
6
+ "content": "[CLS]",
7
+ "lstrip": false,
8
+ "normalized": true,
9
+ "rstrip": false,
10
+ "single_word": false
11
+ },
12
+ "cls_token": {
13
+ "__type": "AddedToken",
14
+ "content": "[CLS]",
15
+ "lstrip": false,
16
+ "normalized": true,
17
+ "rstrip": false,
18
+ "single_word": false
19
+ },
20
+ "do_lower_case": false,
21
+ "eos_token": {
22
+ "__type": "AddedToken",
23
+ "content": "[SEP]",
24
+ "lstrip": false,
25
+ "normalized": true,
26
+ "rstrip": false,
27
+ "single_word": false
28
+ },
29
+ "errors": "replace",
30
+ "mask_token": {
31
+ "__type": "AddedToken",
32
+ "content": "[MASK]",
33
+ "lstrip": true,
34
+ "normalized": true,
35
+ "rstrip": false,
36
+ "single_word": false
37
+ },
38
+ "model_max_length": 512,
39
+ "name_or_path": "microsoft/deberta-base",
40
+ "pad_token": {
41
+ "__type": "AddedToken",
42
+ "content": "[PAD]",
43
+ "lstrip": false,
44
+ "normalized": true,
45
+ "rstrip": false,
46
+ "single_word": false
47
+ },
48
+ "sep_token": {
49
+ "__type": "AddedToken",
50
+ "content": "[SEP]",
51
+ "lstrip": false,
52
+ "normalized": true,
53
+ "rstrip": false,
54
+ "single_word": false
55
+ },
56
+ "special_tokens_map_file": null,
57
+ "tokenizer_class": "DebertaTokenizer",
58
+ "unk_token": {
59
+ "__type": "AddedToken",
60
+ "content": "[UNK]",
61
+ "lstrip": false,
62
+ "normalized": true,
63
+ "rstrip": false,
64
+ "single_word": false
65
+ },
66
+ "vocab_type": "gpt2"
67
+ }
trainer_state.json ADDED
@@ -0,0 +1,232 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.05693262815475464,
3
+ "best_model_checkpoint": "./models/microsoft/deberta-base_1668615764.565312/checkpoint-9400",
4
+ "epoch": 3.0,
5
+ "global_step": 14100,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.1,
12
+ "learning_rate": 1.9341843971631205e-05,
13
+ "loss": 0.2396,
14
+ "step": 469
15
+ },
16
+ {
17
+ "epoch": 0.2,
18
+ "learning_rate": 1.8676595744680853e-05,
19
+ "loss": 0.0934,
20
+ "step": 938
21
+ },
22
+ {
23
+ "epoch": 0.3,
24
+ "learning_rate": 1.8011347517730498e-05,
25
+ "loss": 0.076,
26
+ "step": 1407
27
+ },
28
+ {
29
+ "epoch": 0.4,
30
+ "learning_rate": 1.7346099290780143e-05,
31
+ "loss": 0.0742,
32
+ "step": 1876
33
+ },
34
+ {
35
+ "epoch": 0.5,
36
+ "learning_rate": 1.6680851063829788e-05,
37
+ "loss": 0.0697,
38
+ "step": 2345
39
+ },
40
+ {
41
+ "epoch": 0.6,
42
+ "learning_rate": 1.6015602836879433e-05,
43
+ "loss": 0.0612,
44
+ "step": 2814
45
+ },
46
+ {
47
+ "epoch": 0.7,
48
+ "learning_rate": 1.5350354609929078e-05,
49
+ "loss": 0.0645,
50
+ "step": 3283
51
+ },
52
+ {
53
+ "epoch": 0.8,
54
+ "learning_rate": 1.4685106382978724e-05,
55
+ "loss": 0.0558,
56
+ "step": 3752
57
+ },
58
+ {
59
+ "epoch": 0.9,
60
+ "learning_rate": 1.401985815602837e-05,
61
+ "loss": 0.0559,
62
+ "step": 4221
63
+ },
64
+ {
65
+ "epoch": 1.0,
66
+ "learning_rate": 1.3354609929078016e-05,
67
+ "loss": 0.0533,
68
+ "step": 4690
69
+ },
70
+ {
71
+ "epoch": 1.0,
72
+ "eval_accuracy": 0.9835180063569192,
73
+ "eval_f1": 0.8845294916144851,
74
+ "eval_loss": 0.05832871422171593,
75
+ "eval_precision": 0.8721934369602763,
76
+ "eval_recall": 0.8972195078617748,
77
+ "eval_runtime": 22.9422,
78
+ "eval_samples_per_second": 413.168,
79
+ "eval_steps_per_second": 25.848,
80
+ "step": 4700
81
+ },
82
+ {
83
+ "epoch": 1.1,
84
+ "learning_rate": 1.268936170212766e-05,
85
+ "loss": 0.0432,
86
+ "step": 5159
87
+ },
88
+ {
89
+ "epoch": 1.2,
90
+ "learning_rate": 1.2024113475177306e-05,
91
+ "loss": 0.0426,
92
+ "step": 5628
93
+ },
94
+ {
95
+ "epoch": 1.3,
96
+ "learning_rate": 1.135886524822695e-05,
97
+ "loss": 0.0416,
98
+ "step": 6097
99
+ },
100
+ {
101
+ "epoch": 1.4,
102
+ "learning_rate": 1.0693617021276597e-05,
103
+ "loss": 0.042,
104
+ "step": 6566
105
+ },
106
+ {
107
+ "epoch": 1.5,
108
+ "learning_rate": 1.0028368794326242e-05,
109
+ "loss": 0.0413,
110
+ "step": 7035
111
+ },
112
+ {
113
+ "epoch": 1.6,
114
+ "learning_rate": 9.363120567375887e-06,
115
+ "loss": 0.0388,
116
+ "step": 7504
117
+ },
118
+ {
119
+ "epoch": 1.7,
120
+ "learning_rate": 8.697872340425533e-06,
121
+ "loss": 0.0369,
122
+ "step": 7973
123
+ },
124
+ {
125
+ "epoch": 1.8,
126
+ "learning_rate": 8.032624113475178e-06,
127
+ "loss": 0.039,
128
+ "step": 8442
129
+ },
130
+ {
131
+ "epoch": 1.9,
132
+ "learning_rate": 7.367375886524823e-06,
133
+ "loss": 0.036,
134
+ "step": 8911
135
+ },
136
+ {
137
+ "epoch": 2.0,
138
+ "learning_rate": 6.702127659574469e-06,
139
+ "loss": 0.0397,
140
+ "step": 9380
141
+ },
142
+ {
143
+ "epoch": 2.0,
144
+ "eval_accuracy": 0.9839366902741495,
145
+ "eval_f1": 0.8953622417309568,
146
+ "eval_loss": 0.05693262815475464,
147
+ "eval_precision": 0.8937771089669824,
148
+ "eval_recall": 0.8969530070178555,
149
+ "eval_runtime": 22.8104,
150
+ "eval_samples_per_second": 415.555,
151
+ "eval_steps_per_second": 25.997,
152
+ "step": 9400
153
+ },
154
+ {
155
+ "epoch": 2.1,
156
+ "learning_rate": 6.036879432624114e-06,
157
+ "loss": 0.0263,
158
+ "step": 9849
159
+ },
160
+ {
161
+ "epoch": 2.2,
162
+ "learning_rate": 5.37163120567376e-06,
163
+ "loss": 0.0252,
164
+ "step": 10318
165
+ },
166
+ {
167
+ "epoch": 2.3,
168
+ "learning_rate": 4.707801418439717e-06,
169
+ "loss": 0.0254,
170
+ "step": 10787
171
+ },
172
+ {
173
+ "epoch": 2.39,
174
+ "learning_rate": 4.042553191489362e-06,
175
+ "loss": 0.028,
176
+ "step": 11256
177
+ },
178
+ {
179
+ "epoch": 2.49,
180
+ "learning_rate": 3.377304964539007e-06,
181
+ "loss": 0.0256,
182
+ "step": 11725
183
+ },
184
+ {
185
+ "epoch": 2.59,
186
+ "learning_rate": 2.7120567375886527e-06,
187
+ "loss": 0.0242,
188
+ "step": 12194
189
+ },
190
+ {
191
+ "epoch": 2.69,
192
+ "learning_rate": 2.046808510638298e-06,
193
+ "loss": 0.0287,
194
+ "step": 12663
195
+ },
196
+ {
197
+ "epoch": 2.79,
198
+ "learning_rate": 1.3815602836879433e-06,
199
+ "loss": 0.0242,
200
+ "step": 13132
201
+ },
202
+ {
203
+ "epoch": 2.89,
204
+ "learning_rate": 7.177304964539007e-07,
205
+ "loss": 0.025,
206
+ "step": 13601
207
+ },
208
+ {
209
+ "epoch": 2.99,
210
+ "learning_rate": 5.248226950354611e-08,
211
+ "loss": 0.0266,
212
+ "step": 14070
213
+ },
214
+ {
215
+ "epoch": 3.0,
216
+ "eval_accuracy": 0.9848035428915137,
217
+ "eval_f1": 0.902634593356243,
218
+ "eval_loss": 0.06115942820906639,
219
+ "eval_precision": 0.8953762782973517,
220
+ "eval_recall": 0.9100115483699032,
221
+ "eval_runtime": 22.4809,
222
+ "eval_samples_per_second": 421.647,
223
+ "eval_steps_per_second": 26.378,
224
+ "step": 14100
225
+ }
226
+ ],
227
+ "max_steps": 14100,
228
+ "num_train_epochs": 3,
229
+ "total_flos": 7109576555415114.0,
230
+ "trial_name": null,
231
+ "trial_params": null
232
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9d368503b0706bb556b9151f13917aa8e5a85e45f4a65d7150cd257a0d68c42a
3
+ size 3439
vocab.json ADDED
The diff for this file is too large to render. See raw diff