model update
Browse files- analogy.json +1 -0
- classification.json +1 -0
- config.json +31 -0
- merges.txt +0 -0
- pytorch_model.bin +3 -0
- special_tokens_map.json +1 -0
- tokenizer.json +0 -0
- tokenizer_config.json +1 -0
- trainer_config.json +1 -0
- validation_loss.json +1 -0
- vocab.json +0 -0
analogy.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"distance_function": "cosine_similarity", "sat/test": 0.7151335311572701, "sat/valid": 0.7297297297297297, "u2/test": 0.7192982456140351, "u2/valid": 0.5416666666666666, "u4/test": 0.6921296296296297, "u4/valid": 0.625, "google/test": 0.964, "google/valid": 1.0, "bats/test": 0.8126737076153419, "bats/valid": 0.8241206030150754, "sat_full": 0.7165775401069518}
|
classification.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"lexical_relation_classification/BLESS": {"classifier_config": {"activation": "relu", "alpha": 0.0001, "batch_size": "auto", "beta_1": 0.9, "beta_2": 0.999, "early_stopping": false, "epsilon": 1e-08, "hidden_layer_sizes": [100], "learning_rate": "constant", "learning_rate_init": 0.001, "max_fun": 15000, "max_iter": 200, "momentum": 0.9, "n_iter_no_change": 10, "nesterovs_momentum": true, "power_t": 0.5, "random_state": 0, "shuffle": true, "solver": "adam", "tol": 0.0001, "validation_fraction": 0.1, "verbose": false, "warm_start": false}, "test/accuracy": 0.9251167696248305, "test/f1_macro": 0.9196346516032302, "test/f1_micro": 0.9251167696248305, "test/p_macro": 0.9228914915504977, "test/p_micro": 0.9251167696248305, "test/r_macro": 0.9167390763548414, "test/r_micro": 0.9251167696248305}, "lexical_relation_classification/CogALexV": {"classifier_config": {"activation": "relu", "alpha": 0.0001, "batch_size": "auto", "beta_1": 0.9, "beta_2": 0.999, "early_stopping": false, "epsilon": 1e-08, "hidden_layer_sizes": [100], "learning_rate": "constant", "learning_rate_init": 0.001, "max_fun": 15000, "max_iter": 200, "momentum": 0.9, "n_iter_no_change": 10, "nesterovs_momentum": true, "power_t": 0.5, "random_state": 0, "shuffle": true, "solver": "adam", "tol": 0.0001, "validation_fraction": 0.1, "verbose": false, "warm_start": false}, "test/accuracy": 0.8607981220657277, "test/f1_macro": 0.7029050751301971, "test/f1_micro": 0.8607981220657277, "test/p_macro": 0.7313949055416683, "test/p_micro": 0.8607981220657277, "test/r_macro": 0.6804383005626553, "test/r_micro": 0.8607981220657277}, "lexical_relation_classification/EVALution": {"classifier_config": {"activation": "relu", "alpha": 0.0001, "batch_size": "auto", "beta_1": 0.9, "beta_2": 0.999, "early_stopping": false, "epsilon": 1e-08, "hidden_layer_sizes": [100], "learning_rate": "constant", "learning_rate_init": 0.001, "max_fun": 15000, "max_iter": 200, "momentum": 0.9, "n_iter_no_change": 10, "nesterovs_momentum": true, "power_t": 0.5, "random_state": 0, "shuffle": true, "solver": "adam", "tol": 0.0001, "validation_fraction": 0.1, "verbose": false, "warm_start": false}, "test/accuracy": 0.6841820151679306, "test/f1_macro": 0.6749077706444823, "test/f1_micro": 0.6841820151679306, "test/p_macro": 0.6822611969186971, "test/p_micro": 0.6841820151679306, "test/r_macro": 0.6691999606355179, "test/r_micro": 0.6841820151679306}, "lexical_relation_classification/K&H+N": {"classifier_config": {"activation": "relu", "alpha": 0.0001, "batch_size": "auto", "beta_1": 0.9, "beta_2": 0.999, "early_stopping": false, "epsilon": 1e-08, "hidden_layer_sizes": [100], "learning_rate": "constant", "learning_rate_init": 0.001, "max_fun": 15000, "max_iter": 200, "momentum": 0.9, "n_iter_no_change": 10, "nesterovs_momentum": true, "power_t": 0.5, "random_state": 0, "shuffle": true, "solver": "adam", "tol": 0.0001, "validation_fraction": 0.1, "verbose": false, "warm_start": false}, "test/accuracy": 0.9595882312026153, "test/f1_macro": 0.8789234569786473, "test/f1_micro": 0.9595882312026153, "test/p_macro": 0.8849555927607803, "test/p_micro": 0.9595882312026153, "test/r_macro": 0.8732016591900109, "test/r_micro": 0.9595882312026153}, "lexical_relation_classification/ROOT09": {"classifier_config": {"activation": "relu", "alpha": 0.0001, "batch_size": "auto", "beta_1": 0.9, "beta_2": 0.999, "early_stopping": false, "epsilon": 1e-08, "hidden_layer_sizes": [100], "learning_rate": "constant", "learning_rate_init": 0.001, "max_fun": 15000, "max_iter": 200, "momentum": 0.9, "n_iter_no_change": 10, "nesterovs_momentum": true, "power_t": 0.5, "random_state": 0, "shuffle": true, "solver": "adam", "tol": 0.0001, "validation_fraction": 0.1, "verbose": false, "warm_start": false}, "test/accuracy": 0.9103729238483234, "test/f1_macro": 0.9076447549069412, "test/f1_micro": 0.9103729238483234, "test/p_macro": 0.9086898349896138, "test/p_micro": 0.9103729238483234, "test/r_macro": 0.9069026663387998, "test/r_micro": 0.9103729238483234}}
|
config.json
ADDED
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "roberta-large",
|
3 |
+
"architectures": [
|
4 |
+
"RobertaModel"
|
5 |
+
],
|
6 |
+
"attention_probs_dropout_prob": 0.1,
|
7 |
+
"bos_token_id": 0,
|
8 |
+
"eos_token_id": 2,
|
9 |
+
"gradient_checkpointing": false,
|
10 |
+
"hidden_act": "gelu",
|
11 |
+
"hidden_dropout_prob": 0.1,
|
12 |
+
"hidden_size": 1024,
|
13 |
+
"initializer_range": 0.02,
|
14 |
+
"intermediate_size": 4096,
|
15 |
+
"layer_norm_eps": 1e-05,
|
16 |
+
"max_position_embeddings": 514,
|
17 |
+
"model_type": "roberta",
|
18 |
+
"num_attention_heads": 16,
|
19 |
+
"num_hidden_layers": 24,
|
20 |
+
"pad_token_id": 1,
|
21 |
+
"position_embedding_type": "absolute",
|
22 |
+
"relbert_config": {
|
23 |
+
"mode": "average",
|
24 |
+
"template": "I wasn\u2019t aware of this relationship, but I just read in the encyclopedia that <subj> is the <mask> of <obj>",
|
25 |
+
"template_mode": "manual"
|
26 |
+
},
|
27 |
+
"transformers_version": "4.6.1",
|
28 |
+
"type_vocab_size": 1,
|
29 |
+
"use_cache": true,
|
30 |
+
"vocab_size": 50265
|
31 |
+
}
|
merges.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|
pytorch_model.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:dfe324fc457c22a64a75a79aa71b8a559ef77deb2afa0bd10c37ffad4791ac2f
|
3 |
+
size 1421595889
|
special_tokens_map.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"bos_token": "<s>", "eos_token": "</s>", "unk_token": "<unk>", "sep_token": "</s>", "pad_token": "<pad>", "cls_token": "<s>", "mask_token": {"content": "<mask>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": false}}
|
tokenizer.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
tokenizer_config.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"unk_token": "<unk>", "bos_token": "<s>", "eos_token": "</s>", "add_prefix_space": false, "errors": "replace", "sep_token": "</s>", "cls_token": "<s>", "pad_token": "<pad>", "mask_token": "<mask>", "model_max_length": 512, "special_tokens_map_file": null, "name_or_path": "roberta-large"}
|
trainer_config.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"model": "roberta-large", "max_length": 64, "mode": "average", "data": "relbert/semeval2012_relational_similarity", "template_mode": "manual", "template": "I wasn\u2019t aware of this relationship, but I just read in the encyclopedia that <subj> is the <mask> of <obj>", "loss_function": "nce_logout", "temperature_nce_constant": 0.05, "temperature_nce_rank": {"min": 0.01, "max": 0.05, "type": "linear"}, "epoch": 29, "batch": 128, "lr": 5e-06, "lr_decay": false, "lr_warmup": 1, "weight_decay": 0, "random_seed": 0, "exclude_relation": null, "n_sample": 640, "gradient_accumulation": 8}
|
validation_loss.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"validation_loss": 4.891365619187944, "validation_data": "relbert/semeval2012_relational_similarity", "validation_data/exclude_relation": null}
|
vocab.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|