add model
Browse files- 0_Transformer/config.json +23 -0
- 0_Transformer/pytorch_model.bin +3 -0
- 0_Transformer/sentence_bert_config.json +3 -0
- 0_Transformer/sentencepiece.bpe.model +3 -0
- 0_Transformer/special_tokens_map.json +1 -0
- 0_Transformer/tokenizer_config.json +1 -0
- 1_Pooling/config.json +7 -0
- config.json +3 -0
- modules.json +14 -0
- mse_evaluation_Dev-MSE-evaluator_results.csv +127 -0
0_Transformer/config.json
ADDED
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"architectures": [
|
3 |
+
"XLMRobertaModel"
|
4 |
+
],
|
5 |
+
"attention_probs_dropout_prob": 0.1,
|
6 |
+
"bos_token_id": 0,
|
7 |
+
"eos_token_id": 2,
|
8 |
+
"gradient_checkpointing": false,
|
9 |
+
"hidden_act": "gelu",
|
10 |
+
"hidden_dropout_prob": 0.1,
|
11 |
+
"hidden_size": 768,
|
12 |
+
"initializer_range": 0.02,
|
13 |
+
"intermediate_size": 3072,
|
14 |
+
"layer_norm_eps": 1e-05,
|
15 |
+
"max_position_embeddings": 514,
|
16 |
+
"model_type": "xlm-roberta",
|
17 |
+
"num_attention_heads": 12,
|
18 |
+
"num_hidden_layers": 12,
|
19 |
+
"output_past": true,
|
20 |
+
"pad_token_id": 1,
|
21 |
+
"type_vocab_size": 1,
|
22 |
+
"vocab_size": 250002
|
23 |
+
}
|
0_Transformer/pytorch_model.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f7ed7f322684e11c07b88e96f50a49a6cb92b8b5339923d89d2d7eb683dcc5e2
|
3 |
+
size 1112259151
|
0_Transformer/sentence_bert_config.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"max_seq_length": 128
|
3 |
+
}
|
0_Transformer/sentencepiece.bpe.model
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cfc8146abe2a0488e9e2a0c56de7952f7c11ab059eca145a0a727afce0db2865
|
3 |
+
size 5069051
|
0_Transformer/special_tokens_map.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"bos_token": "<s>", "eos_token": "</s>", "unk_token": "<unk>", "sep_token": "</s>", "pad_token": "<pad>", "cls_token": "<s>", "mask_token": "<mask>"}
|
0_Transformer/tokenizer_config.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"model_max_length": 512, "special_tokens_map_file": "/content/gdrive/My Drive/en2fin-sbert/output/knowledge-distillation/XLM-R-distilled-once/0_Transformer/special_tokens_map.json", "full_tokenizer_file": null}
|
1_Pooling/config.json
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"word_embedding_dimension": 768,
|
3 |
+
"pooling_mode_cls_token": false,
|
4 |
+
"pooling_mode_mean_tokens": true,
|
5 |
+
"pooling_mode_max_tokens": false,
|
6 |
+
"pooling_mode_mean_sqrt_len_tokens": false
|
7 |
+
}
|
config.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"__version__": "0.3.9"
|
3 |
+
}
|
modules.json
ADDED
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[
|
2 |
+
{
|
3 |
+
"idx": 0,
|
4 |
+
"name": "0",
|
5 |
+
"path": "0_Transformer",
|
6 |
+
"type": "sentence_transformers.models.Transformer"
|
7 |
+
},
|
8 |
+
{
|
9 |
+
"idx": 1,
|
10 |
+
"name": "1",
|
11 |
+
"path": "1_Pooling",
|
12 |
+
"type": "sentence_transformers.models.Pooling"
|
13 |
+
}
|
14 |
+
]
|
mse_evaluation_Dev-MSE-evaluator_results.csv
ADDED
@@ -0,0 +1,127 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
epoch,steps,MSE
|
2 |
+
0,500,8.570442348718643
|
3 |
+
0,1000,8.574279397726059
|
4 |
+
0,1500,8.557721227407455
|
5 |
+
0,2000,8.466233313083649
|
6 |
+
0,2500,8.497065305709839
|
7 |
+
0,3000,8.495021611452103
|
8 |
+
0,3500,8.528045564889908
|
9 |
+
0,4000,8.526290953159332
|
10 |
+
0,4500,8.51152092218399
|
11 |
+
0,5000,8.509846776723862
|
12 |
+
0,5500,8.489316701889038
|
13 |
+
0,6000,8.47911462187767
|
14 |
+
0,6500,8.477255702018738
|
15 |
+
0,7000,8.49146693944931
|
16 |
+
0,7500,8.450501412153244
|
17 |
+
0,8000,8.498767763376236
|
18 |
+
0,8500,8.525879681110382
|
19 |
+
0,9000,8.5193932056427
|
20 |
+
0,9500,8.524371683597565
|
21 |
+
0,10000,8.49359780550003
|
22 |
+
0,10500,8.501896262168884
|
23 |
+
0,11000,8.521968871355057
|
24 |
+
0,11500,8.512929081916809
|
25 |
+
0,12000,8.464228361845016
|
26 |
+
0,-1,8.464228361845016
|
27 |
+
1,500,8.439267426729202
|
28 |
+
1,1000,8.435093611478806
|
29 |
+
1,1500,8.464305847883224
|
30 |
+
1,2000,8.429614454507828
|
31 |
+
1,2500,8.444567769765854
|
32 |
+
1,3000,8.424865454435349
|
33 |
+
1,3500,8.420820534229279
|
34 |
+
1,4000,8.424185961484909
|
35 |
+
1,4500,8.416373282670975
|
36 |
+
1,5000,8.425454050302505
|
37 |
+
1,5500,8.41192752122879
|
38 |
+
1,6000,8.403629809617996
|
39 |
+
1,6500,8.41481164097786
|
40 |
+
1,7000,8.419384062290192
|
41 |
+
1,7500,8.419118076562881
|
42 |
+
1,8000,8.427265286445618
|
43 |
+
1,8500,8.4264375269413
|
44 |
+
1,9000,8.400160074234009
|
45 |
+
1,9500,8.40001180768013
|
46 |
+
1,10000,8.39078426361084
|
47 |
+
1,10500,8.387839049100876
|
48 |
+
1,11000,8.400828391313553
|
49 |
+
1,11500,8.38041678071022
|
50 |
+
1,12000,8.375781774520874
|
51 |
+
1,-1,8.375781774520874
|
52 |
+
2,500,8.397626876831055
|
53 |
+
2,1000,8.38230550289154
|
54 |
+
2,1500,8.391416817903519
|
55 |
+
2,2000,8.396975696086884
|
56 |
+
2,2500,8.38283821940422
|
57 |
+
2,3000,8.376853168010712
|
58 |
+
2,3500,8.388946205377579
|
59 |
+
2,4000,8.390841633081436
|
60 |
+
2,4500,8.380627632141113
|
61 |
+
2,5000,8.38095024228096
|
62 |
+
2,5500,8.381462842226028
|
63 |
+
2,6000,8.403825014829636
|
64 |
+
2,6500,8.39417278766632
|
65 |
+
2,7000,8.38434174656868
|
66 |
+
2,7500,8.365185558795929
|
67 |
+
2,8000,8.37533250451088
|
68 |
+
2,8500,8.382804691791534
|
69 |
+
2,9000,8.384855091571808
|
70 |
+
2,9500,8.382391184568405
|
71 |
+
2,10000,8.367933332920074
|
72 |
+
2,10500,8.37000235915184
|
73 |
+
2,11000,8.375443518161774
|
74 |
+
2,11500,8.379511535167694
|
75 |
+
2,12000,8.351945132017136
|
76 |
+
2,-1,8.351945132017136
|
77 |
+
3,500,8.353012055158615
|
78 |
+
3,1000,8.358874171972275
|
79 |
+
3,1500,8.356638997793198
|
80 |
+
3,2000,8.383019268512726
|
81 |
+
3,2500,8.342177420854568
|
82 |
+
3,3000,8.356159180402756
|
83 |
+
3,3500,8.349952101707458
|
84 |
+
3,4000,8.343320339918137
|
85 |
+
3,4500,8.349838852882385
|
86 |
+
3,5000,8.35258811712265
|
87 |
+
3,5500,8.359123021364212
|
88 |
+
3,6000,8.366020023822784
|
89 |
+
3,6500,8.366061002016068
|
90 |
+
3,7000,8.35094228386879
|
91 |
+
3,7500,8.335867524147034
|
92 |
+
3,8000,8.344996720552444
|
93 |
+
3,8500,8.35791900753975
|
94 |
+
3,9000,8.349382132291794
|
95 |
+
3,9500,8.332888036966324
|
96 |
+
3,10000,8.34961012005806
|
97 |
+
3,10500,8.342666923999786
|
98 |
+
3,11000,8.32587331533432
|
99 |
+
3,11500,8.321724086999893
|
100 |
+
3,12000,8.316271752119064
|
101 |
+
3,-1,8.316271752119064
|
102 |
+
4,500,8.327758312225342
|
103 |
+
4,1000,8.344313502311707
|
104 |
+
4,1500,8.35057869553566
|
105 |
+
4,2000,8.335026353597641
|
106 |
+
4,2500,8.343841135501862
|
107 |
+
4,3000,8.330678194761276
|
108 |
+
4,3500,8.342430740594864
|
109 |
+
4,4000,8.35268348455429
|
110 |
+
4,4500,8.346807211637497
|
111 |
+
4,5000,8.33790972828865
|
112 |
+
4,5500,8.340930938720703
|
113 |
+
4,6000,8.313610404729843
|
114 |
+
4,6500,8.340232074260712
|
115 |
+
4,7000,8.326788246631622
|
116 |
+
4,7500,8.33984762430191
|
117 |
+
4,8000,8.319726586341858
|
118 |
+
4,8500,8.330386132001877
|
119 |
+
4,9000,8.329372107982635
|
120 |
+
4,9500,8.320283889770508
|
121 |
+
4,10000,8.302289992570877
|
122 |
+
4,10500,8.327770233154297
|
123 |
+
4,11000,8.320759981870651
|
124 |
+
4,11500,8.321399986743927
|
125 |
+
4,12000,8.308279514312744
|
126 |
+
4,-1,8.308279514312744
|
127 |
+
5,500,8.304926007986069
|