deberta-v3-large-btc / config.json
asahi417's picture
model update
59fbbac
{
"_name_or_path": "tner_ckpt/btc_deberta_v3_large/model_lzavco/epoch_5",
"architectures": [
"DebertaV2ForTokenClassification"
],
"attention_probs_dropout_prob": 0.1,
"crf_state_dict": {
"_constraint_mask": [
[
1.0,
1.0,
1.0,
1.0,
0.0,
0.0,
1.0,
0.0,
1.0
],
[
1.0,
1.0,
1.0,
0.0,
1.0,
0.0,
1.0,
0.0,
1.0
],
[
1.0,
1.0,
1.0,
0.0,
0.0,
1.0,
1.0,
0.0,
1.0
],
[
1.0,
1.0,
1.0,
1.0,
0.0,
0.0,
1.0,
0.0,
1.0
],
[
1.0,
1.0,
1.0,
0.0,
1.0,
0.0,
1.0,
0.0,
1.0
],
[
1.0,
1.0,
1.0,
0.0,
0.0,
1.0,
1.0,
0.0,
1.0
],
[
1.0,
1.0,
1.0,
0.0,
0.0,
0.0,
1.0,
0.0,
1.0
],
[
1.0,
1.0,
1.0,
0.0,
0.0,
0.0,
1.0,
0.0,
0.0
],
[
0.0,
0.0,
0.0,
0.0,
0.0,
0.0,
0.0,
0.0,
0.0
]
],
"end_transitions": [
-1.0148895978927612,
1.0671864748001099,
-1.6164472103118896,
0.466050386428833,
-0.012239390052855015,
-1.1088250875473022,
0.40975475311279297
],
"start_transitions": [
0.47314950823783875,
0.201548233628273,
1.5476996898651123,
-1.4331055879592896,
-1.549608826637268,
0.6565876603126526,
0.8080530166625977
],
"transitions": [
[
-0.41058361530303955,
0.1702241599559784,
-0.39300161600112915,
-0.2474871426820755,
0.8343972563743591,
0.3032238185405731,
0.2561141848564148
],
[
-0.11347199231386185,
-0.24199432134628296,
0.031227607280015945,
0.3765334486961365,
0.019392093643546104,
0.7591969966888428,
0.5199909210205078
],
[
0.3379369378089905,
-0.743718147277832,
-0.1221601814031601,
-0.31503763794898987,
0.7697550058364868,
0.4699808657169342,
-0.07055890560150146
],
[
0.46931782364845276,
0.39986705780029297,
0.5056431889533997,
0.2596401870250702,
-0.05581130459904671,
-0.18991011381149292,
0.4412626326084137
],
[
-0.41374099254608154,
0.19209060072898865,
-0.4061834514141083,
-0.0016257184324786067,
-0.28092607855796814,
-0.21716837584972382,
0.23422983288764954
],
[
0.16249980032444,
-0.8426260352134705,
0.12414605170488358,
0.7118894457817078,
0.1571020483970642,
0.21087680757045746,
0.5860539674758911
],
[
-0.42638248205184937,
-0.16581180691719055,
0.51979660987854,
0.4090186059474945,
0.026761069893836975,
0.04246059060096741,
0.6112445592880249
]
]
},
"hidden_act": "gelu",
"hidden_dropout_prob": 0.1,
"hidden_size": 1024,
"id2label": {
"0": "B-LOC",
"1": "B-ORG",
"2": "B-PER",
"3": "I-LOC",
"4": "I-ORG",
"5": "I-PER",
"6": "O"
},
"initializer_range": 0.02,
"intermediate_size": 4096,
"label2id": {
"B-LOC": 0,
"B-ORG": 1,
"B-PER": 2,
"I-LOC": 3,
"I-ORG": 4,
"I-PER": 5,
"O": 6
},
"layer_norm_eps": 1e-07,
"max_position_embeddings": 512,
"max_relative_positions": -1,
"model_type": "deberta-v2",
"norm_rel_ebd": "layer_norm",
"num_attention_heads": 16,
"num_hidden_layers": 24,
"pad_token_id": 0,
"pooler_dropout": 0,
"pooler_hidden_act": "gelu",
"pooler_hidden_size": 1024,
"pos_att_type": [
"p2c",
"c2p"
],
"position_biased_input": false,
"position_buckets": 256,
"relative_attention": true,
"share_att_key": true,
"torch_dtype": "float32",
"transformers_version": "4.20.1",
"type_vocab_size": 0,
"vocab_size": 128100
}