Spaces:
Sleeping
Sleeping
Gosse Minnema
commited on
Commit
·
05922fb
1
Parent(s):
7717281
Initial commit
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- .gitignore +1 -0
- Dockerfile +5 -0
- README.md +5 -6
- config/ace/ace.jsonnet +131 -0
- config/ace/ft.jsonnet +51 -0
- config/ace/pt.jsonnet +69 -0
- config/ace/rt.jsonnet +89 -0
- config/basic/basic.jsonnet +132 -0
- config/basic/ft.jsonnet +51 -0
- config/basic/pt.jsonnet +67 -0
- config/basic/rt.jsonnet +87 -0
- config/env.jsonnet +4 -0
- config/fn-evalita/evalita.framenet_xlmr.jsonnet +141 -0
- config/fn-evalita/evalita.it_mono.jsonnet +141 -0
- config/fn-evalita/evalita.vanilla_xlmr.jsonnet +141 -0
- config/fn-evalita/evalita_plus_fn.vanilla_xlmr.freeze.jsonnet +142 -0
- config/fn-evalita/evalita_plus_fn.vanilla_xlmr.jsonnet +141 -0
- config/fn-kicktionary/kicktionary.concat_clipped.vanilla_xlmr.jsonnet +141 -0
- config/fn-kicktionary/kicktionary.football_xlmr.jsonnet +141 -0
- config/fn-kicktionary/kicktionary.framenet_xlmr.jsonnet +141 -0
- config/fn-kicktionary/kicktionary.vanilla_xlmr.jsonnet +141 -0
- config/fn-sonar/sonar-a1.framenet_xlmr.jsonnet +141 -0
- config/fn-sonar/sonar-a1.sonar_plus_fn.vanilla_xlmr.jsonnet +142 -0
- config/fn-sonar/sonar-a1.vanilla_xlmr.jsonnet +141 -0
- config/fn-sonar/sonar-a2.framenet_xlmr.jsonnet +141 -0
- config/fn-sonar/sonar-a2.sonar_plus_fn.vanilla_xlmr.jsonnet +141 -0
- config/fn-sonar/sonar-a2.vanilla_xlmr.jsonnet +141 -0
- config/fn/fn.orig.jsonnet +139 -0
- config/fn/fn.train-football.jsonnet +142 -0
- config/fn/fn.train3.jsonnet +141 -0
- docs/data.md +68 -0
- docs/mapping.md +17 -0
- docs/training.md +65 -0
- evalita_scores.txt +0 -0
- model.mod.tar.gz +3 -0
- requirements.txt +15 -0
- scripts/__pycache__/predict_concrete.cpython-37.pyc +0 -0
- scripts/__pycache__/predict_concrete.cpython-38.pyc +0 -0
- scripts/__pycache__/predict_concrete.cpython-39.pyc +0 -0
- scripts/__pycache__/predict_force.cpython-39.pyc +0 -0
- scripts/__pycache__/repl.cpython-39.pyc +0 -0
- scripts/aida_experiment/predict_aida.py +42 -0
- scripts/aida_experiment/read_aida.py +107 -0
- scripts/aida_experiment/test_mapping.py +59 -0
- scripts/archive/eval_tie.py +50 -0
- scripts/archive/frame_similarity.py +143 -0
- scripts/archive/kairos_mapping.py +43 -0
- scripts/archive/onto_test.py +34 -0
- scripts/archive/predict_better.py +47 -0
- scripts/archive/predict_kairos.py +98 -0
.gitignore
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
.venv/
|
Dockerfile
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
FROM python:3.9
|
2 |
+
WORKDIR /app
|
3 |
+
ADD . /app
|
4 |
+
RUN pip install -r requirements.txt
|
5 |
+
CMD ["python", "-m", "sociolome.lome_webserver", "0.0.0.0"]
|
README.md
CHANGED
@@ -1,10 +1,9 @@
|
|
1 |
---
|
2 |
-
title:
|
3 |
-
emoji:
|
4 |
colorFrom: yellow
|
5 |
-
colorTo:
|
6 |
sdk: docker
|
7 |
pinned: false
|
8 |
-
|
9 |
-
|
10 |
-
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
|
|
1 |
---
|
2 |
+
title: Sociofillmore Public
|
3 |
+
emoji: 💻
|
4 |
colorFrom: yellow
|
5 |
+
colorTo: red
|
6 |
sdk: docker
|
7 |
pinned: false
|
8 |
+
app_port: 5000
|
9 |
+
---
|
|
config/ace/ace.jsonnet
ADDED
@@ -0,0 +1,131 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
local env = import "../env.jsonnet";
|
2 |
+
|
3 |
+
local dataset_path = env.str("DATA_PATH", "data/ace/events");
|
4 |
+
local ontology_path = "data/ace/ontology.tsv";
|
5 |
+
|
6 |
+
local debug = false;
|
7 |
+
|
8 |
+
# embedding
|
9 |
+
local label_dim = 64;
|
10 |
+
local pretrained_model = env.str("ENCODER", "roberta-large");
|
11 |
+
|
12 |
+
# module
|
13 |
+
local dropout = 0.2;
|
14 |
+
local bio_dim = 512;
|
15 |
+
local bio_layers = 2;
|
16 |
+
local span_typing_dims = [256, 256];
|
17 |
+
local event_smoothing_factor = env.json("SMOOTHING", "0.0");
|
18 |
+
local arg_smoothing_factor = env.json("SMOOTHING", "0.0");
|
19 |
+
local layer_fix = 0;
|
20 |
+
|
21 |
+
# training
|
22 |
+
local typing_loss_factor = 8.0;
|
23 |
+
local grad_acc = env.json("GRAD_ACC", "1");
|
24 |
+
local max_training_tokens = 512;
|
25 |
+
local max_inference_tokens = 1024;
|
26 |
+
local lr = env.json("LR", "1e-3");
|
27 |
+
local cuda_devices = env.json("CUDA_DEVICES", "[0]");
|
28 |
+
|
29 |
+
{
|
30 |
+
dataset_reader: {
|
31 |
+
type: "concrete",
|
32 |
+
debug: debug,
|
33 |
+
pretrained_model: pretrained_model,
|
34 |
+
ignore_label: false,
|
35 |
+
[ if debug then "max_instances" ]: 128,
|
36 |
+
event_smoothing_factor: event_smoothing_factor,
|
37 |
+
arg_smoothing_factor: event_smoothing_factor,
|
38 |
+
},
|
39 |
+
train_data_path: dataset_path + "/train.tar.gz",
|
40 |
+
validation_data_path: dataset_path + "/dev.tar.gz",
|
41 |
+
test_data_path: dataset_path + "/test.tar.gz",
|
42 |
+
|
43 |
+
datasets_for_vocab_creation: ["train"],
|
44 |
+
|
45 |
+
data_loader: {
|
46 |
+
batch_sampler: {
|
47 |
+
type: "max_tokens_sampler",
|
48 |
+
max_tokens: max_training_tokens,
|
49 |
+
sorting_keys: ['tokens']
|
50 |
+
}
|
51 |
+
},
|
52 |
+
|
53 |
+
validation_data_loader: {
|
54 |
+
batch_sampler: {
|
55 |
+
type: "max_tokens_sampler",
|
56 |
+
max_tokens: max_inference_tokens,
|
57 |
+
sorting_keys: ['tokens']
|
58 |
+
}
|
59 |
+
},
|
60 |
+
|
61 |
+
model: {
|
62 |
+
type: "span",
|
63 |
+
word_embedding: {
|
64 |
+
token_embedders: {
|
65 |
+
"pieces": {
|
66 |
+
type: "pretrained_transformer",
|
67 |
+
model_name: pretrained_model,
|
68 |
+
}
|
69 |
+
},
|
70 |
+
},
|
71 |
+
span_extractor: {
|
72 |
+
type: 'combo',
|
73 |
+
sub_extractors: [
|
74 |
+
{
|
75 |
+
type: 'self_attentive',
|
76 |
+
},
|
77 |
+
{
|
78 |
+
type: 'bidirectional_endpoint',
|
79 |
+
}
|
80 |
+
]
|
81 |
+
},
|
82 |
+
span_finder: {
|
83 |
+
type: "bio",
|
84 |
+
bio_encoder: {
|
85 |
+
type: "lstm",
|
86 |
+
hidden_size: bio_dim,
|
87 |
+
num_layers: bio_layers,
|
88 |
+
bidirectional: true,
|
89 |
+
dropout: dropout,
|
90 |
+
},
|
91 |
+
no_label: false,
|
92 |
+
},
|
93 |
+
span_typing: {
|
94 |
+
type: 'mlp',
|
95 |
+
hidden_dims: span_typing_dims,
|
96 |
+
},
|
97 |
+
metrics: [{type: "srl"}],
|
98 |
+
|
99 |
+
ontology_path: ontology_path,
|
100 |
+
typing_loss_factor: typing_loss_factor,
|
101 |
+
label_dim: label_dim,
|
102 |
+
max_decoding_spans: 128,
|
103 |
+
max_recursion_depth: 2,
|
104 |
+
debug: debug,
|
105 |
+
},
|
106 |
+
|
107 |
+
trainer: {
|
108 |
+
num_epochs: 128,
|
109 |
+
patience: null,
|
110 |
+
[if std.length(cuda_devices) == 1 then "cuda_device"]: cuda_devices[0],
|
111 |
+
validation_metric: "+arg-c_f",
|
112 |
+
num_gradient_accumulation_steps: grad_acc,
|
113 |
+
optimizer: {
|
114 |
+
type: "transformer",
|
115 |
+
base: {
|
116 |
+
type: "adam",
|
117 |
+
lr: lr,
|
118 |
+
},
|
119 |
+
embeddings_lr: 0.0,
|
120 |
+
encoder_lr: 1e-5,
|
121 |
+
pooler_lr: 1e-5,
|
122 |
+
layer_fix: layer_fix,
|
123 |
+
}
|
124 |
+
},
|
125 |
+
|
126 |
+
cuda_devices:: cuda_devices,
|
127 |
+
[if std.length(cuda_devices) > 1 then "distributed"]: {
|
128 |
+
"cuda_devices": cuda_devices
|
129 |
+
},
|
130 |
+
[if std.length(cuda_devices) == 1 then "evaluate_on_test"]: true,
|
131 |
+
}
|
config/ace/ft.jsonnet
ADDED
@@ -0,0 +1,51 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
local env = import "../env.jsonnet";
|
2 |
+
local base = import "ace.jsonnet";
|
3 |
+
|
4 |
+
local pretrained_path = env.str("PRETRAINED_PATH", "cache/ace/best");
|
5 |
+
local lr = env.json("FT_LR", 5e-5);
|
6 |
+
|
7 |
+
# training
|
8 |
+
local cuda_devices = base.cuda_devices;
|
9 |
+
|
10 |
+
{
|
11 |
+
dataset_reader: base.dataset_reader,
|
12 |
+
train_data_path: base.train_data_path,
|
13 |
+
validation_data_path: base.validation_data_path,
|
14 |
+
test_data_path: base.test_data_path,
|
15 |
+
datasets_for_vocab_creation: ["train"],
|
16 |
+
data_loader: base.data_loader,
|
17 |
+
validation_data_loader: base.validation_data_loader,
|
18 |
+
|
19 |
+
model: {
|
20 |
+
type: "from_archive",
|
21 |
+
archive_file: pretrained_path
|
22 |
+
},
|
23 |
+
vocabulary: {
|
24 |
+
type: "from_files",
|
25 |
+
directory: pretrained_path + "/vocabulary"
|
26 |
+
},
|
27 |
+
|
28 |
+
trainer: {
|
29 |
+
num_epochs: base.trainer.num_epochs,
|
30 |
+
patience: base.trainer.patience,
|
31 |
+
[if std.length(cuda_devices) == 1 then "cuda_device"]: cuda_devices[0],
|
32 |
+
validation_metric: "+arg-c_f",
|
33 |
+
num_gradient_accumulation_steps: base.trainer.num_gradient_accumulation_steps,
|
34 |
+
optimizer: {
|
35 |
+
type: "transformer",
|
36 |
+
base: {
|
37 |
+
type: "adam",
|
38 |
+
lr: lr,
|
39 |
+
},
|
40 |
+
embeddings_lr: 0.0,
|
41 |
+
encoder_lr: 1e-5,
|
42 |
+
pooler_lr: 1e-5,
|
43 |
+
layer_fix: base.trainer.optimizer.layer_fix,
|
44 |
+
}
|
45 |
+
},
|
46 |
+
|
47 |
+
[if std.length(cuda_devices) > 1 then "distributed"]: {
|
48 |
+
"cuda_devices": cuda_devices
|
49 |
+
},
|
50 |
+
[if std.length(cuda_devices) == 1 then "evaluate_on_test"]: true
|
51 |
+
}
|
config/ace/pt.jsonnet
ADDED
@@ -0,0 +1,69 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
local env = import "../env.jsonnet";
|
2 |
+
local base = import "ace.jsonnet";
|
3 |
+
|
4 |
+
local fn_path = "data/framenet/full/full.jsonl";
|
5 |
+
local mapping_path = "data/ace/framenet2ace/";
|
6 |
+
|
7 |
+
local debug = false;
|
8 |
+
|
9 |
+
# training
|
10 |
+
local lr = env.json("PT_LR", "5e-5");
|
11 |
+
local cuda_devices = base.cuda_devices;
|
12 |
+
|
13 |
+
# mapping
|
14 |
+
local min_weight = env.json("MIN_WEIGHT", '0.0');
|
15 |
+
local max_weight = env.json("MAX_WEIGHT", '5.0');
|
16 |
+
|
17 |
+
{
|
18 |
+
dataset_reader: {
|
19 |
+
type: "semantic_role_labeling",
|
20 |
+
debug: debug,
|
21 |
+
pretrained_model: base.dataset_reader.pretrained_model,
|
22 |
+
ignore_label: false,
|
23 |
+
[ if debug then "max_instances" ]: 128,
|
24 |
+
event_smoothing_factor: base.dataset_reader.event_smoothing_factor,
|
25 |
+
arg_smoothing_factor: base.dataset_reader.arg_smoothing_factor,
|
26 |
+
ontology_mapping_path: mapping_path + '/ontology_mapping.json',
|
27 |
+
min_weight: min_weight,
|
28 |
+
max_weight: max_weight,
|
29 |
+
},
|
30 |
+
validation_dataset_reader: base.dataset_reader,
|
31 |
+
train_data_path: fn_path,
|
32 |
+
validation_data_path: base.validation_data_path,
|
33 |
+
test_data_path: base.test_data_path,
|
34 |
+
vocabulary: {
|
35 |
+
type: "extend",
|
36 |
+
directory: mapping_path + "/vocabulary"
|
37 |
+
},
|
38 |
+
|
39 |
+
datasets_for_vocab_creation: ["train"],
|
40 |
+
|
41 |
+
data_loader: base.data_loader,
|
42 |
+
validation_data_loader: base.validation_data_loader,
|
43 |
+
|
44 |
+
model: base.model,
|
45 |
+
|
46 |
+
trainer: {
|
47 |
+
num_epochs: base.trainer.num_epochs,
|
48 |
+
patience: base.trainer.patience,
|
49 |
+
[if std.length(cuda_devices) == 1 then "cuda_device"]: cuda_devices[0],
|
50 |
+
validation_metric: "+arg-c_f",
|
51 |
+
num_gradient_accumulation_steps: base.trainer.num_gradient_accumulation_steps,
|
52 |
+
optimizer: {
|
53 |
+
type: "transformer",
|
54 |
+
base: {
|
55 |
+
type: "adam",
|
56 |
+
lr: lr,
|
57 |
+
},
|
58 |
+
embeddings_lr: 0.0,
|
59 |
+
encoder_lr: 1e-5,
|
60 |
+
pooler_lr: 1e-5,
|
61 |
+
layer_fix: base.trainer.optimizer.layer_fix,
|
62 |
+
}
|
63 |
+
},
|
64 |
+
|
65 |
+
[if std.length(cuda_devices) > 1 then "distributed"]: {
|
66 |
+
"cuda_devices": cuda_devices
|
67 |
+
},
|
68 |
+
[if std.length(cuda_devices) == 1 then "evaluate_on_test"]: true
|
69 |
+
}
|
config/ace/rt.jsonnet
ADDED
@@ -0,0 +1,89 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
local env = import "../env.jsonnet";
|
2 |
+
local base = import "ace.jsonnet";
|
3 |
+
|
4 |
+
local dataset_path = env.str("DATA_PATH", "data/ace/events");
|
5 |
+
|
6 |
+
local debug = false;
|
7 |
+
|
8 |
+
# re-train
|
9 |
+
local pretrained_path = env.str("PRETRAINED_PATH", "cache/fn/best");
|
10 |
+
local rt_lr = env.json("RT_LR", 5e-5);
|
11 |
+
|
12 |
+
# module
|
13 |
+
local cuda_devices = base.cuda_devices;
|
14 |
+
|
15 |
+
{
|
16 |
+
dataset_reader: base.dataset_reader,
|
17 |
+
train_data_path: base.train_data_path,
|
18 |
+
validation_data_path: base.validation_data_path,
|
19 |
+
test_data_path: base.test_data_path,
|
20 |
+
|
21 |
+
datasets_for_vocab_creation: ["train"],
|
22 |
+
|
23 |
+
data_loader: base.data_loader,
|
24 |
+
validation_data_loader: base.validation_data_loader,
|
25 |
+
|
26 |
+
model: {
|
27 |
+
type: "span",
|
28 |
+
word_embedding: {
|
29 |
+
"_pretrained": {
|
30 |
+
"archive_file": pretrained_path,
|
31 |
+
"module_path": "word_embedding",
|
32 |
+
"freeze": false,
|
33 |
+
}
|
34 |
+
},
|
35 |
+
span_extractor: {
|
36 |
+
"_pretrained": {
|
37 |
+
"archive_file": pretrained_path,
|
38 |
+
"module_path": "_span_extractor",
|
39 |
+
"freeze": false,
|
40 |
+
}
|
41 |
+
},
|
42 |
+
span_finder: {
|
43 |
+
"_pretrained": {
|
44 |
+
"archive_file": pretrained_path,
|
45 |
+
"module_path": "_span_finder",
|
46 |
+
"freeze": false,
|
47 |
+
}
|
48 |
+
},
|
49 |
+
span_typing: {
|
50 |
+
type: 'mlp',
|
51 |
+
hidden_dims: base.model.span_typing.hidden_dims,
|
52 |
+
},
|
53 |
+
metrics: [{type: "srl"}],
|
54 |
+
|
55 |
+
typing_loss_factor: base.model.typing_loss_factor,
|
56 |
+
label_dim: base.model.label_dim,
|
57 |
+
max_decoding_spans: 128,
|
58 |
+
max_recursion_depth: 2,
|
59 |
+
debug: debug,
|
60 |
+
},
|
61 |
+
|
62 |
+
trainer: {
|
63 |
+
num_epochs: base.trainer.num_epochs,
|
64 |
+
patience: base.trainer.patience,
|
65 |
+
[if std.length(cuda_devices) == 1 then "cuda_device"]: cuda_devices[0],
|
66 |
+
validation_metric: "+arg-c_f",
|
67 |
+
num_gradient_accumulation_steps: base.trainer.num_gradient_accumulation_steps,
|
68 |
+
optimizer: {
|
69 |
+
type: "transformer",
|
70 |
+
base: {
|
71 |
+
type: "adam",
|
72 |
+
lr: base.trainer.optimizer.base.lr,
|
73 |
+
},
|
74 |
+
embeddings_lr: 0.0,
|
75 |
+
encoder_lr: 1e-5,
|
76 |
+
pooler_lr: 1e-5,
|
77 |
+
layer_fix: base.trainer.optimizer.layer_fix,
|
78 |
+
parameter_groups: [
|
79 |
+
[['_span_finder.*'], {'lr': rt_lr}],
|
80 |
+
[['_span_extractor.*'], {'lr': rt_lr}],
|
81 |
+
]
|
82 |
+
}
|
83 |
+
},
|
84 |
+
|
85 |
+
[if std.length(cuda_devices) > 1 then "distributed"]: {
|
86 |
+
"cuda_devices": cuda_devices
|
87 |
+
},
|
88 |
+
[if std.length(cuda_devices) == 1 then "evaluate_on_test"]: true
|
89 |
+
}
|
config/basic/basic.jsonnet
ADDED
@@ -0,0 +1,132 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
local env = import "../env.jsonnet";
|
2 |
+
|
3 |
+
local dataset_path = "data/better/basic/sent/";
|
4 |
+
local ontology_path = "data/better/ontology.tsv";
|
5 |
+
|
6 |
+
local debug = false;
|
7 |
+
|
8 |
+
# reader
|
9 |
+
local pretrained_model = env.str("ENCODER", "xlm-roberta-large");
|
10 |
+
|
11 |
+
# model
|
12 |
+
local label_dim = env.json("LABEL_DIM", "64");
|
13 |
+
local dropout = env.json("DROPOUT", "0.2");
|
14 |
+
local bio_dim = env.json("BIO_DIM", "512");
|
15 |
+
local bio_layers = env.json("BIO_LAYER", "2");
|
16 |
+
local span_typing_dims = env.json("TYPING_DIMS", "[256, 256]");
|
17 |
+
local typing_loss_factor = env.json("LOSS_FACTOR", "8.0");
|
18 |
+
|
19 |
+
# loader
|
20 |
+
local max_training_tokens = 512;
|
21 |
+
local max_inference_tokens = 1024;
|
22 |
+
|
23 |
+
# training
|
24 |
+
local layer_fix = env.json("LAYER_FIX", "0");
|
25 |
+
local grad_acc = env.json("GRAD_ACC", "1");
|
26 |
+
local cuda_devices = env.json("CUDA_DEVICES", "[-1]");
|
27 |
+
local patience = env.json("PATIENCE", "null");
|
28 |
+
|
29 |
+
{
|
30 |
+
dataset_reader: {
|
31 |
+
type: "better",
|
32 |
+
eval_type: "basic",
|
33 |
+
debug: debug,
|
34 |
+
pretrained_model: pretrained_model,
|
35 |
+
ignore_label: false,
|
36 |
+
[ if debug then "max_instances" ]: 128,
|
37 |
+
},
|
38 |
+
train_data_path: dataset_path + "/basic.eng-provided-72.0pct.train-70.0pct.d.bp.json",
|
39 |
+
validation_data_path: dataset_path + "/basic.eng-provided-72.0pct.analysis-15.0pct.ref.d.bp.json",
|
40 |
+
test_data_path: dataset_path + "/basic.eng-provided-72.0pct.devtest-15.0pct.ref.d.bp.json",
|
41 |
+
|
42 |
+
datasets_for_vocab_creation: ["train"],
|
43 |
+
|
44 |
+
data_loader: {
|
45 |
+
batch_sampler: {
|
46 |
+
type: "max_tokens_sampler",
|
47 |
+
max_tokens: max_training_tokens,
|
48 |
+
sorting_keys: ['tokens']
|
49 |
+
}
|
50 |
+
},
|
51 |
+
|
52 |
+
validation_data_loader: {
|
53 |
+
batch_sampler: {
|
54 |
+
type: "max_tokens_sampler",
|
55 |
+
max_tokens: max_inference_tokens,
|
56 |
+
sorting_keys: ['tokens']
|
57 |
+
}
|
58 |
+
},
|
59 |
+
|
60 |
+
model: {
|
61 |
+
type: "span",
|
62 |
+
word_embedding: {
|
63 |
+
token_embedders: {
|
64 |
+
"pieces": {
|
65 |
+
type: "pretrained_transformer",
|
66 |
+
model_name: pretrained_model,
|
67 |
+
}
|
68 |
+
},
|
69 |
+
},
|
70 |
+
span_extractor: {
|
71 |
+
type: 'combo',
|
72 |
+
sub_extractors: [
|
73 |
+
{
|
74 |
+
type: 'self_attentive',
|
75 |
+
},
|
76 |
+
{
|
77 |
+
type: 'bidirectional_endpoint',
|
78 |
+
}
|
79 |
+
]
|
80 |
+
},
|
81 |
+
span_finder: {
|
82 |
+
type: "bio",
|
83 |
+
bio_encoder: {
|
84 |
+
type: "lstm",
|
85 |
+
hidden_size: bio_dim,
|
86 |
+
num_layers: bio_layers,
|
87 |
+
bidirectional: true,
|
88 |
+
dropout: dropout,
|
89 |
+
},
|
90 |
+
no_label: false,
|
91 |
+
},
|
92 |
+
span_typing: {
|
93 |
+
type: 'mlp',
|
94 |
+
hidden_dims: span_typing_dims,
|
95 |
+
},
|
96 |
+
metrics: [{type: "srl"}],
|
97 |
+
|
98 |
+
typing_loss_factor: typing_loss_factor,
|
99 |
+
ontology_path: ontology_path,
|
100 |
+
label_dim: label_dim,
|
101 |
+
max_decoding_spans: 128,
|
102 |
+
max_recursion_depth: 2,
|
103 |
+
debug: debug,
|
104 |
+
},
|
105 |
+
|
106 |
+
trainer: {
|
107 |
+
num_epochs: 128,
|
108 |
+
patience: patience,
|
109 |
+
[if std.length(cuda_devices) == 1 then "cuda_device"]: cuda_devices[0],
|
110 |
+
validation_metric: "+em_f",
|
111 |
+
grad_norm: 10,
|
112 |
+
grad_clipping: 10,
|
113 |
+
num_gradient_accumulation_steps: grad_acc,
|
114 |
+
optimizer: {
|
115 |
+
type: "transformer",
|
116 |
+
base: {
|
117 |
+
type: "adam",
|
118 |
+
lr: 1e-3,
|
119 |
+
},
|
120 |
+
embeddings_lr: 0.0,
|
121 |
+
encoder_lr: 1e-5,
|
122 |
+
pooler_lr: 1e-5,
|
123 |
+
layer_fix: layer_fix,
|
124 |
+
}
|
125 |
+
},
|
126 |
+
|
127 |
+
cuda_devices:: cuda_devices,
|
128 |
+
[if std.length(cuda_devices) > 1 then "distributed"]: {
|
129 |
+
"cuda_devices": cuda_devices
|
130 |
+
},
|
131 |
+
[if std.length(cuda_devices) == 1 then "evaluate_on_test"]: true
|
132 |
+
}
|
config/basic/ft.jsonnet
ADDED
@@ -0,0 +1,51 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
local env = import "../env.jsonnet";
|
2 |
+
local base = import "basic.jsonnet";
|
3 |
+
|
4 |
+
local pretrained_path = env.str("PRETRAINED_PATH", "cache/basic/best");
|
5 |
+
local lr = env.json("FT_LR", 5e-5);
|
6 |
+
|
7 |
+
# training
|
8 |
+
local cuda_devices = base.cuda_devices;
|
9 |
+
|
10 |
+
{
|
11 |
+
dataset_reader: base.dataset_reader,
|
12 |
+
train_data_path: base.train_data_path,
|
13 |
+
validation_data_path: base.validation_data_path,
|
14 |
+
test_data_path: base.test_data_path,
|
15 |
+
datasets_for_vocab_creation: ["train"],
|
16 |
+
data_loader: base.data_loader,
|
17 |
+
validation_data_loader: base.validation_data_loader,
|
18 |
+
|
19 |
+
model: {
|
20 |
+
type: "from_archive",
|
21 |
+
archive_file: pretrained_path
|
22 |
+
},
|
23 |
+
vocabulary: {
|
24 |
+
type: "from_files",
|
25 |
+
directory: pretrained_path + "/vocabulary"
|
26 |
+
},
|
27 |
+
|
28 |
+
trainer: {
|
29 |
+
num_epochs: base.trainer.num_epochs,
|
30 |
+
patience: base.trainer.patience,
|
31 |
+
[if std.length(cuda_devices) == 1 then "cuda_device"]: cuda_devices[0],
|
32 |
+
validation_metric: "+arg-c_f",
|
33 |
+
num_gradient_accumulation_steps: base.trainer.num_gradient_accumulation_steps,
|
34 |
+
optimizer: {
|
35 |
+
type: "transformer",
|
36 |
+
base: {
|
37 |
+
type: "adam",
|
38 |
+
lr: lr,
|
39 |
+
},
|
40 |
+
embeddings_lr: 0.0,
|
41 |
+
encoder_lr: 1e-5,
|
42 |
+
pooler_lr: 1e-5,
|
43 |
+
layer_fix: base.trainer.optimizer.layer_fix,
|
44 |
+
}
|
45 |
+
},
|
46 |
+
|
47 |
+
[if std.length(cuda_devices) > 1 then "distributed"]: {
|
48 |
+
"cuda_devices": cuda_devices
|
49 |
+
},
|
50 |
+
[if std.length(cuda_devices) == 1 then "evaluate_on_test"]: true
|
51 |
+
}
|
config/basic/pt.jsonnet
ADDED
@@ -0,0 +1,67 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
local env = import "../env.jsonnet";
|
2 |
+
local base = import "basic.jsonnet";
|
3 |
+
|
4 |
+
local fn_path = "data/framenet/full/full.jsonl";
|
5 |
+
local mapping_path = "data/basic/framenet2better/";
|
6 |
+
|
7 |
+
local debug = false;
|
8 |
+
|
9 |
+
# training
|
10 |
+
local lr = env.json("PT_LR", "5e-5");
|
11 |
+
local cuda_devices = base.cuda_devices;
|
12 |
+
|
13 |
+
# mapping
|
14 |
+
local min_weight = env.json("MIN_WEIGHT", '0.0');
|
15 |
+
local max_weight = env.json("MAX_WEIGHT", '5.0');
|
16 |
+
|
17 |
+
{
|
18 |
+
dataset_reader: {
|
19 |
+
type: "semantic_role_labeling",
|
20 |
+
debug: debug,
|
21 |
+
pretrained_model: base.dataset_reader.pretrained_model,
|
22 |
+
ignore_label: false,
|
23 |
+
[ if debug then "max_instances" ]: 128,
|
24 |
+
ontology_mapping_path: mapping_path + '/ontology_mapping.json',
|
25 |
+
min_weight: min_weight,
|
26 |
+
max_weight: max_weight,
|
27 |
+
},
|
28 |
+
validation_dataset_reader: base.dataset_reader,
|
29 |
+
train_data_path: fn_path,
|
30 |
+
validation_data_path: base.validation_data_path,
|
31 |
+
test_data_path: base.test_data_path,
|
32 |
+
vocabulary: {
|
33 |
+
type: "extend",
|
34 |
+
directory: mapping_path + "/vocabulary"
|
35 |
+
},
|
36 |
+
|
37 |
+
datasets_for_vocab_creation: ["train"],
|
38 |
+
|
39 |
+
data_loader: base.data_loader,
|
40 |
+
validation_data_loader: base.validation_data_loader,
|
41 |
+
|
42 |
+
model: base.model,
|
43 |
+
|
44 |
+
trainer: {
|
45 |
+
num_epochs: base.trainer.num_epochs,
|
46 |
+
patience: base.trainer.patience,
|
47 |
+
[if std.length(cuda_devices) == 1 then "cuda_device"]: cuda_devices[0],
|
48 |
+
validation_metric: "+arg-c_f",
|
49 |
+
num_gradient_accumulation_steps: base.trainer.num_gradient_accumulation_steps,
|
50 |
+
optimizer: {
|
51 |
+
type: "transformer",
|
52 |
+
base: {
|
53 |
+
type: "adam",
|
54 |
+
lr: lr,
|
55 |
+
},
|
56 |
+
embeddings_lr: 0.0,
|
57 |
+
encoder_lr: 1e-5,
|
58 |
+
pooler_lr: 1e-5,
|
59 |
+
layer_fix: base.trainer.optimizer.layer_fix,
|
60 |
+
}
|
61 |
+
},
|
62 |
+
|
63 |
+
[if std.length(cuda_devices) > 1 then "distributed"]: {
|
64 |
+
"cuda_devices": cuda_devices
|
65 |
+
},
|
66 |
+
[if std.length(cuda_devices) == 1 then "evaluate_on_test"]: true
|
67 |
+
}
|
config/basic/rt.jsonnet
ADDED
@@ -0,0 +1,87 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
local env = import "../env.jsonnet";
|
2 |
+
local base = import "basic.jsonnet";
|
3 |
+
|
4 |
+
local debug = false;
|
5 |
+
|
6 |
+
# re-train
|
7 |
+
local pretrained_path = env.str("PRETRAINED_PATH", "cache/fn/best");
|
8 |
+
local rt_lr = env.json("RT_LR", 5e-5);
|
9 |
+
|
10 |
+
# module
|
11 |
+
local cuda_devices = base.cuda_devices;
|
12 |
+
|
13 |
+
{
|
14 |
+
dataset_reader: base.dataset_reader,
|
15 |
+
train_data_path: base.train_data_path,
|
16 |
+
validation_data_path: base.validation_data_path,
|
17 |
+
test_data_path: base.test_data_path,
|
18 |
+
|
19 |
+
datasets_for_vocab_creation: ["train"],
|
20 |
+
|
21 |
+
data_loader: base.data_loader,
|
22 |
+
validation_data_loader: base.validation_data_loader,
|
23 |
+
|
24 |
+
model: {
|
25 |
+
type: "span",
|
26 |
+
word_embedding: {
|
27 |
+
"_pretrained": {
|
28 |
+
"archive_file": pretrained_path,
|
29 |
+
"module_path": "word_embedding",
|
30 |
+
"freeze": false,
|
31 |
+
}
|
32 |
+
},
|
33 |
+
span_extractor: {
|
34 |
+
"_pretrained": {
|
35 |
+
"archive_file": pretrained_path,
|
36 |
+
"module_path": "_span_extractor",
|
37 |
+
"freeze": false,
|
38 |
+
}
|
39 |
+
},
|
40 |
+
span_finder: {
|
41 |
+
"_pretrained": {
|
42 |
+
"archive_file": pretrained_path,
|
43 |
+
"module_path": "_span_finder",
|
44 |
+
"freeze": false,
|
45 |
+
}
|
46 |
+
},
|
47 |
+
span_typing: {
|
48 |
+
type: 'mlp',
|
49 |
+
hidden_dims: base.model.span_typing.hidden_dims,
|
50 |
+
},
|
51 |
+
metrics: [{type: "srl"}],
|
52 |
+
|
53 |
+
typing_loss_factor: base.model.typing_loss_factor,
|
54 |
+
label_dim: base.model.label_dim,
|
55 |
+
max_decoding_spans: 128,
|
56 |
+
max_recursion_depth: 2,
|
57 |
+
debug: debug,
|
58 |
+
},
|
59 |
+
|
60 |
+
trainer: {
|
61 |
+
num_epochs: base.trainer.num_epochs,
|
62 |
+
patience: base.trainer.patience,
|
63 |
+
[if std.length(cuda_devices) == 1 then "cuda_device"]: cuda_devices[0],
|
64 |
+
validation_metric: "+arg-c_f",
|
65 |
+
num_gradient_accumulation_steps: base.trainer.num_gradient_accumulation_steps,
|
66 |
+
optimizer: {
|
67 |
+
type: "transformer",
|
68 |
+
base: {
|
69 |
+
type: "adam",
|
70 |
+
lr: base.trainer.optimizer.base.lr,
|
71 |
+
},
|
72 |
+
embeddings_lr: 0.0,
|
73 |
+
encoder_lr: 1e-5,
|
74 |
+
pooler_lr: 1e-5,
|
75 |
+
layer_fix: base.trainer.optimizer.layer_fix,
|
76 |
+
parameter_groups: [
|
77 |
+
[['_span_finder.*'], {'lr': rt_lr}],
|
78 |
+
[['_span_extractor.*'], {'lr': rt_lr}],
|
79 |
+
]
|
80 |
+
}
|
81 |
+
},
|
82 |
+
|
83 |
+
[if std.length(cuda_devices) > 1 then "distributed"]: {
|
84 |
+
"cuda_devices": cuda_devices
|
85 |
+
},
|
86 |
+
[if std.length(cuda_devices) == 1 then "evaluate_on_test"]: true
|
87 |
+
}
|
config/env.jsonnet
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
json: function(name, default) if std.extVar("LOGNAME")=="tuning" then std.parseJson(std.extVar(name)) else std.parseJson(default),
|
3 |
+
str: function(name, default) if std.extVar("LOGNAME")=="tuning" then std.extVar(name) else default
|
4 |
+
}
|
config/fn-evalita/evalita.framenet_xlmr.jsonnet
ADDED
@@ -0,0 +1,141 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
local env = import "../env.jsonnet";
|
2 |
+
|
3 |
+
#local dataset_path = env.str("DATA_PATH", "data/framenet/full");
|
4 |
+
local dataset_path = "/home/p289731/cloned/lome/preproc/evalita_jsonl";
|
5 |
+
local ontology_path = "data/framenet/ontology.tsv";
|
6 |
+
|
7 |
+
local debug = false;
|
8 |
+
|
9 |
+
# reader
|
10 |
+
local pretrained_model = "/data/p289731/cloned/lome-models/models/xlm-roberta-framenet/";
|
11 |
+
local smoothing_factor = env.json("SMOOTHING", "0.1");
|
12 |
+
|
13 |
+
# model
|
14 |
+
local label_dim = env.json("LABEL_DIM", "64");
|
15 |
+
local dropout = env.json("DROPOUT", "0.2");
|
16 |
+
local bio_dim = env.json("BIO_DIM", "512");
|
17 |
+
local bio_layers = env.json("BIO_LAYER", "2");
|
18 |
+
local span_typing_dims = env.json("TYPING_DIMS", "[256, 256]");
|
19 |
+
local typing_loss_factor = env.json("LOSS_FACTOR", "8.0");
|
20 |
+
|
21 |
+
# loader
|
22 |
+
local exemplar_ratio = env.json("EXEMPLAR_RATIO", "0.05");
|
23 |
+
local max_training_tokens = 512;
|
24 |
+
local max_inference_tokens = 1024;
|
25 |
+
|
26 |
+
# training
|
27 |
+
local layer_fix = env.json("LAYER_FIX", "0");
|
28 |
+
local grad_acc = env.json("GRAD_ACC", "1");
|
29 |
+
#local cuda_devices = env.json("CUDA_DEVICES", "[-1]");
|
30 |
+
local cuda_devices = [0];
|
31 |
+
local patience = 32;
|
32 |
+
|
33 |
+
{
|
34 |
+
dataset_reader: {
|
35 |
+
type: "semantic_role_labeling",
|
36 |
+
debug: debug,
|
37 |
+
pretrained_model: pretrained_model,
|
38 |
+
ignore_label: false,
|
39 |
+
[ if debug then "max_instances" ]: 128,
|
40 |
+
event_smoothing_factor: smoothing_factor,
|
41 |
+
arg_smoothing_factor: smoothing_factor,
|
42 |
+
},
|
43 |
+
train_data_path: dataset_path + "/evalita_train.jsonl",
|
44 |
+
validation_data_path: dataset_path + "/evalita_dev.jsonl",
|
45 |
+
test_data_path: dataset_path + "/evalita_test.jsonl",
|
46 |
+
|
47 |
+
datasets_for_vocab_creation: ["train"],
|
48 |
+
|
49 |
+
data_loader: {
|
50 |
+
batch_sampler: {
|
51 |
+
type: "mix_sampler",
|
52 |
+
max_tokens: max_training_tokens,
|
53 |
+
sorting_keys: ['tokens'],
|
54 |
+
sampling_ratios: {
|
55 |
+
'exemplar': 1.0,
|
56 |
+
'full text': 0.0,
|
57 |
+
}
|
58 |
+
}
|
59 |
+
},
|
60 |
+
|
61 |
+
validation_data_loader: {
|
62 |
+
batch_sampler: {
|
63 |
+
type: "max_tokens_sampler",
|
64 |
+
max_tokens: max_inference_tokens,
|
65 |
+
sorting_keys: ['tokens']
|
66 |
+
}
|
67 |
+
},
|
68 |
+
|
69 |
+
model: {
|
70 |
+
type: "span",
|
71 |
+
word_embedding: {
|
72 |
+
token_embedders: {
|
73 |
+
"pieces": {
|
74 |
+
type: "pretrained_transformer",
|
75 |
+
model_name: pretrained_model,
|
76 |
+
}
|
77 |
+
},
|
78 |
+
},
|
79 |
+
span_extractor: {
|
80 |
+
type: 'combo',
|
81 |
+
sub_extractors: [
|
82 |
+
{
|
83 |
+
type: 'self_attentive',
|
84 |
+
},
|
85 |
+
{
|
86 |
+
type: 'bidirectional_endpoint',
|
87 |
+
}
|
88 |
+
]
|
89 |
+
},
|
90 |
+
span_finder: {
|
91 |
+
type: "bio",
|
92 |
+
bio_encoder: {
|
93 |
+
type: "lstm",
|
94 |
+
hidden_size: bio_dim,
|
95 |
+
num_layers: bio_layers,
|
96 |
+
bidirectional: true,
|
97 |
+
dropout: dropout,
|
98 |
+
},
|
99 |
+
no_label: false,
|
100 |
+
},
|
101 |
+
span_typing: {
|
102 |
+
type: 'mlp',
|
103 |
+
hidden_dims: span_typing_dims,
|
104 |
+
},
|
105 |
+
metrics: [{type: "srl"}],
|
106 |
+
|
107 |
+
typing_loss_factor: typing_loss_factor,
|
108 |
+
ontology_path: null,
|
109 |
+
label_dim: label_dim,
|
110 |
+
max_decoding_spans: 128,
|
111 |
+
max_recursion_depth: 2,
|
112 |
+
debug: debug,
|
113 |
+
},
|
114 |
+
|
115 |
+
trainer: {
|
116 |
+
num_epochs: 128,
|
117 |
+
patience: patience,
|
118 |
+
[if std.length(cuda_devices) == 1 then "cuda_device"]: cuda_devices[0],
|
119 |
+
validation_metric: "+em_f",
|
120 |
+
grad_norm: 10,
|
121 |
+
grad_clipping: 10,
|
122 |
+
num_gradient_accumulation_steps: grad_acc,
|
123 |
+
optimizer: {
|
124 |
+
type: "transformer",
|
125 |
+
base: {
|
126 |
+
type: "adam",
|
127 |
+
lr: 1e-3,
|
128 |
+
},
|
129 |
+
embeddings_lr: 0.0,
|
130 |
+
encoder_lr: 1e-5,
|
131 |
+
pooler_lr: 1e-5,
|
132 |
+
layer_fix: layer_fix,
|
133 |
+
}
|
134 |
+
},
|
135 |
+
|
136 |
+
cuda_devices:: cuda_devices,
|
137 |
+
[if std.length(cuda_devices) > 1 then "distributed"]: {
|
138 |
+
"cuda_devices": cuda_devices
|
139 |
+
},
|
140 |
+
[if std.length(cuda_devices) == 1 then "evaluate_on_test"]: true
|
141 |
+
}
|
config/fn-evalita/evalita.it_mono.jsonnet
ADDED
@@ -0,0 +1,141 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
local env = import "../env.jsonnet";
|
2 |
+
|
3 |
+
#local dataset_path = env.str("DATA_PATH", "data/framenet/full");
|
4 |
+
local dataset_path = "/home/p289731/cloned/lome/preproc/evalita_jsonl";
|
5 |
+
local ontology_path = "data/framenet/ontology.tsv";
|
6 |
+
|
7 |
+
local debug = false;
|
8 |
+
|
9 |
+
# reader
|
10 |
+
local pretrained_model = env.str("ENCODER", "Musixmatch/umberto-commoncrawl-cased-v1");
|
11 |
+
local smoothing_factor = env.json("SMOOTHING", "0.1");
|
12 |
+
|
13 |
+
# model
|
14 |
+
local label_dim = env.json("LABEL_DIM", "64");
|
15 |
+
local dropout = env.json("DROPOUT", "0.2");
|
16 |
+
local bio_dim = env.json("BIO_DIM", "512");
|
17 |
+
local bio_layers = env.json("BIO_LAYER", "2");
|
18 |
+
local span_typing_dims = env.json("TYPING_DIMS", "[256, 256]");
|
19 |
+
local typing_loss_factor = env.json("LOSS_FACTOR", "8.0");
|
20 |
+
|
21 |
+
# loader
|
22 |
+
local exemplar_ratio = env.json("EXEMPLAR_RATIO", "0.05");
|
23 |
+
local max_training_tokens = 512;
|
24 |
+
local max_inference_tokens = 1024;
|
25 |
+
|
26 |
+
# training
|
27 |
+
local layer_fix = env.json("LAYER_FIX", "0");
|
28 |
+
local grad_acc = env.json("GRAD_ACC", "1");
|
29 |
+
#local cuda_devices = env.json("CUDA_DEVICES", "[-1]");
|
30 |
+
local cuda_devices = [0];
|
31 |
+
local patience = 32;
|
32 |
+
|
33 |
+
{
|
34 |
+
dataset_reader: {
|
35 |
+
type: "semantic_role_labeling",
|
36 |
+
debug: debug,
|
37 |
+
pretrained_model: pretrained_model,
|
38 |
+
ignore_label: false,
|
39 |
+
[ if debug then "max_instances" ]: 128,
|
40 |
+
event_smoothing_factor: smoothing_factor,
|
41 |
+
arg_smoothing_factor: smoothing_factor,
|
42 |
+
},
|
43 |
+
train_data_path: dataset_path + "/evalita_train.jsonl",
|
44 |
+
validation_data_path: dataset_path + "/evalita_dev.jsonl",
|
45 |
+
test_data_path: dataset_path + "/evalita_test.jsonl",
|
46 |
+
|
47 |
+
datasets_for_vocab_creation: ["train"],
|
48 |
+
|
49 |
+
data_loader: {
|
50 |
+
batch_sampler: {
|
51 |
+
type: "mix_sampler",
|
52 |
+
max_tokens: max_training_tokens,
|
53 |
+
sorting_keys: ['tokens'],
|
54 |
+
sampling_ratios: {
|
55 |
+
'exemplar': 1.0,
|
56 |
+
'full text': 0.0,
|
57 |
+
}
|
58 |
+
}
|
59 |
+
},
|
60 |
+
|
61 |
+
validation_data_loader: {
|
62 |
+
batch_sampler: {
|
63 |
+
type: "max_tokens_sampler",
|
64 |
+
max_tokens: max_inference_tokens,
|
65 |
+
sorting_keys: ['tokens']
|
66 |
+
}
|
67 |
+
},
|
68 |
+
|
69 |
+
model: {
|
70 |
+
type: "span",
|
71 |
+
word_embedding: {
|
72 |
+
token_embedders: {
|
73 |
+
"pieces": {
|
74 |
+
type: "pretrained_transformer",
|
75 |
+
model_name: pretrained_model,
|
76 |
+
}
|
77 |
+
},
|
78 |
+
},
|
79 |
+
span_extractor: {
|
80 |
+
type: 'combo',
|
81 |
+
sub_extractors: [
|
82 |
+
{
|
83 |
+
type: 'self_attentive',
|
84 |
+
},
|
85 |
+
{
|
86 |
+
type: 'bidirectional_endpoint',
|
87 |
+
}
|
88 |
+
]
|
89 |
+
},
|
90 |
+
span_finder: {
|
91 |
+
type: "bio",
|
92 |
+
bio_encoder: {
|
93 |
+
type: "lstm",
|
94 |
+
hidden_size: bio_dim,
|
95 |
+
num_layers: bio_layers,
|
96 |
+
bidirectional: true,
|
97 |
+
dropout: dropout,
|
98 |
+
},
|
99 |
+
no_label: false,
|
100 |
+
},
|
101 |
+
span_typing: {
|
102 |
+
type: 'mlp',
|
103 |
+
hidden_dims: span_typing_dims,
|
104 |
+
},
|
105 |
+
metrics: [{type: "srl"}],
|
106 |
+
|
107 |
+
typing_loss_factor: typing_loss_factor,
|
108 |
+
ontology_path: null,
|
109 |
+
label_dim: label_dim,
|
110 |
+
max_decoding_spans: 128,
|
111 |
+
max_recursion_depth: 2,
|
112 |
+
debug: debug,
|
113 |
+
},
|
114 |
+
|
115 |
+
trainer: {
|
116 |
+
num_epochs: 128,
|
117 |
+
patience: patience,
|
118 |
+
[if std.length(cuda_devices) == 1 then "cuda_device"]: cuda_devices[0],
|
119 |
+
validation_metric: "+em_f",
|
120 |
+
grad_norm: 10,
|
121 |
+
grad_clipping: 10,
|
122 |
+
num_gradient_accumulation_steps: grad_acc,
|
123 |
+
optimizer: {
|
124 |
+
type: "transformer",
|
125 |
+
base: {
|
126 |
+
type: "adam",
|
127 |
+
lr: 1e-3,
|
128 |
+
},
|
129 |
+
embeddings_lr: 0.0,
|
130 |
+
encoder_lr: 1e-5,
|
131 |
+
pooler_lr: 1e-5,
|
132 |
+
layer_fix: layer_fix,
|
133 |
+
}
|
134 |
+
},
|
135 |
+
|
136 |
+
cuda_devices:: cuda_devices,
|
137 |
+
[if std.length(cuda_devices) > 1 then "distributed"]: {
|
138 |
+
"cuda_devices": cuda_devices
|
139 |
+
},
|
140 |
+
[if std.length(cuda_devices) == 1 then "evaluate_on_test"]: true
|
141 |
+
}
|
config/fn-evalita/evalita.vanilla_xlmr.jsonnet
ADDED
@@ -0,0 +1,141 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
local env = import "../env.jsonnet";
|
2 |
+
|
3 |
+
#local dataset_path = env.str("DATA_PATH", "data/framenet/full");
|
4 |
+
local dataset_path = "/home/p289731/cloned/lome/preproc/evalita_jsonl";
|
5 |
+
local ontology_path = "data/framenet/ontology.tsv";
|
6 |
+
|
7 |
+
local debug = false;
|
8 |
+
|
9 |
+
# reader
|
10 |
+
local pretrained_model = env.str("ENCODER", "xlm-roberta-large");
|
11 |
+
local smoothing_factor = env.json("SMOOTHING", "0.1");
|
12 |
+
|
13 |
+
# model
|
14 |
+
local label_dim = env.json("LABEL_DIM", "64");
|
15 |
+
local dropout = env.json("DROPOUT", "0.2");
|
16 |
+
local bio_dim = env.json("BIO_DIM", "512");
|
17 |
+
local bio_layers = env.json("BIO_LAYER", "2");
|
18 |
+
local span_typing_dims = env.json("TYPING_DIMS", "[256, 256]");
|
19 |
+
local typing_loss_factor = env.json("LOSS_FACTOR", "8.0");
|
20 |
+
|
21 |
+
# loader
|
22 |
+
local exemplar_ratio = env.json("EXEMPLAR_RATIO", "0.05");
|
23 |
+
local max_training_tokens = 512;
|
24 |
+
local max_inference_tokens = 1024;
|
25 |
+
|
26 |
+
# training
|
27 |
+
local layer_fix = env.json("LAYER_FIX", "0");
|
28 |
+
local grad_acc = env.json("GRAD_ACC", "1");
|
29 |
+
#local cuda_devices = env.json("CUDA_DEVICES", "[-1]");
|
30 |
+
local cuda_devices = [0];
|
31 |
+
local patience = 32;
|
32 |
+
|
33 |
+
{
|
34 |
+
dataset_reader: {
|
35 |
+
type: "semantic_role_labeling",
|
36 |
+
debug: debug,
|
37 |
+
pretrained_model: pretrained_model,
|
38 |
+
ignore_label: false,
|
39 |
+
[ if debug then "max_instances" ]: 128,
|
40 |
+
event_smoothing_factor: smoothing_factor,
|
41 |
+
arg_smoothing_factor: smoothing_factor,
|
42 |
+
},
|
43 |
+
train_data_path: dataset_path + "/evalita_train.jsonl",
|
44 |
+
validation_data_path: dataset_path + "/evalita_dev.jsonl",
|
45 |
+
test_data_path: dataset_path + "/evalita_test.jsonl",
|
46 |
+
|
47 |
+
datasets_for_vocab_creation: ["train"],
|
48 |
+
|
49 |
+
data_loader: {
|
50 |
+
batch_sampler: {
|
51 |
+
type: "mix_sampler",
|
52 |
+
max_tokens: max_training_tokens,
|
53 |
+
sorting_keys: ['tokens'],
|
54 |
+
sampling_ratios: {
|
55 |
+
'exemplar': 1.0,
|
56 |
+
'full text': 0.0,
|
57 |
+
}
|
58 |
+
}
|
59 |
+
},
|
60 |
+
|
61 |
+
validation_data_loader: {
|
62 |
+
batch_sampler: {
|
63 |
+
type: "max_tokens_sampler",
|
64 |
+
max_tokens: max_inference_tokens,
|
65 |
+
sorting_keys: ['tokens']
|
66 |
+
}
|
67 |
+
},
|
68 |
+
|
69 |
+
model: {
|
70 |
+
type: "span",
|
71 |
+
word_embedding: {
|
72 |
+
token_embedders: {
|
73 |
+
"pieces": {
|
74 |
+
type: "pretrained_transformer",
|
75 |
+
model_name: pretrained_model,
|
76 |
+
}
|
77 |
+
},
|
78 |
+
},
|
79 |
+
span_extractor: {
|
80 |
+
type: 'combo',
|
81 |
+
sub_extractors: [
|
82 |
+
{
|
83 |
+
type: 'self_attentive',
|
84 |
+
},
|
85 |
+
{
|
86 |
+
type: 'bidirectional_endpoint',
|
87 |
+
}
|
88 |
+
]
|
89 |
+
},
|
90 |
+
span_finder: {
|
91 |
+
type: "bio",
|
92 |
+
bio_encoder: {
|
93 |
+
type: "lstm",
|
94 |
+
hidden_size: bio_dim,
|
95 |
+
num_layers: bio_layers,
|
96 |
+
bidirectional: true,
|
97 |
+
dropout: dropout,
|
98 |
+
},
|
99 |
+
no_label: false,
|
100 |
+
},
|
101 |
+
span_typing: {
|
102 |
+
type: 'mlp',
|
103 |
+
hidden_dims: span_typing_dims,
|
104 |
+
},
|
105 |
+
metrics: [{type: "srl"}],
|
106 |
+
|
107 |
+
typing_loss_factor: typing_loss_factor,
|
108 |
+
ontology_path: null,
|
109 |
+
label_dim: label_dim,
|
110 |
+
max_decoding_spans: 128,
|
111 |
+
max_recursion_depth: 2,
|
112 |
+
debug: debug,
|
113 |
+
},
|
114 |
+
|
115 |
+
trainer: {
|
116 |
+
num_epochs: 128,
|
117 |
+
patience: patience,
|
118 |
+
[if std.length(cuda_devices) == 1 then "cuda_device"]: cuda_devices[0],
|
119 |
+
validation_metric: "+em_f",
|
120 |
+
grad_norm: 10,
|
121 |
+
grad_clipping: 10,
|
122 |
+
num_gradient_accumulation_steps: grad_acc,
|
123 |
+
optimizer: {
|
124 |
+
type: "transformer",
|
125 |
+
base: {
|
126 |
+
type: "adam",
|
127 |
+
lr: 1e-3,
|
128 |
+
},
|
129 |
+
embeddings_lr: 0.0,
|
130 |
+
encoder_lr: 1e-5,
|
131 |
+
pooler_lr: 1e-5,
|
132 |
+
layer_fix: layer_fix,
|
133 |
+
}
|
134 |
+
},
|
135 |
+
|
136 |
+
cuda_devices:: cuda_devices,
|
137 |
+
[if std.length(cuda_devices) > 1 then "distributed"]: {
|
138 |
+
"cuda_devices": cuda_devices
|
139 |
+
},
|
140 |
+
[if std.length(cuda_devices) == 1 then "evaluate_on_test"]: true
|
141 |
+
}
|
config/fn-evalita/evalita_plus_fn.vanilla_xlmr.freeze.jsonnet
ADDED
@@ -0,0 +1,142 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
local env = import "../env.jsonnet";
|
2 |
+
|
3 |
+
#local dataset_path = env.str("DATA_PATH", "data/framenet/full");
|
4 |
+
local dataset_path = "/home/p289731/cloned/lome/preproc/evalita_jsonl";
|
5 |
+
local ontology_path = "data/framenet/ontology.tsv";
|
6 |
+
|
7 |
+
local debug = false;
|
8 |
+
|
9 |
+
# reader
|
10 |
+
local pretrained_model = env.str("ENCODER", "xlm-roberta-large");
|
11 |
+
local smoothing_factor = env.json("SMOOTHING", "0.1");
|
12 |
+
|
13 |
+
# model
|
14 |
+
local label_dim = env.json("LABEL_DIM", "64");
|
15 |
+
local dropout = env.json("DROPOUT", "0.2");
|
16 |
+
local bio_dim = env.json("BIO_DIM", "512");
|
17 |
+
local bio_layers = env.json("BIO_LAYER", "2");
|
18 |
+
local span_typing_dims = env.json("TYPING_DIMS", "[256, 256]");
|
19 |
+
local typing_loss_factor = env.json("LOSS_FACTOR", "8.0");
|
20 |
+
|
21 |
+
# loader
|
22 |
+
local exemplar_ratio = env.json("EXEMPLAR_RATIO", "0.05");
|
23 |
+
local max_training_tokens = 512;
|
24 |
+
local max_inference_tokens = 1024;
|
25 |
+
|
26 |
+
# training
|
27 |
+
local layer_fix = env.json("LAYER_FIX", "0");
|
28 |
+
local grad_acc = env.json("GRAD_ACC", "1");
|
29 |
+
#local cuda_devices = env.json("CUDA_DEVICES", "[-1]");
|
30 |
+
local cuda_devices = [0];
|
31 |
+
local patience = 32;
|
32 |
+
|
33 |
+
{
|
34 |
+
dataset_reader: {
|
35 |
+
type: "semantic_role_labeling",
|
36 |
+
debug: debug,
|
37 |
+
pretrained_model: pretrained_model,
|
38 |
+
ignore_label: false,
|
39 |
+
[ if debug then "max_instances" ]: 128,
|
40 |
+
event_smoothing_factor: smoothing_factor,
|
41 |
+
arg_smoothing_factor: smoothing_factor,
|
42 |
+
},
|
43 |
+
train_data_path: dataset_path + "/evalita_plus_fn_train.jsonl",
|
44 |
+
validation_data_path: dataset_path + "/evalita_dev.jsonl",
|
45 |
+
test_data_path: dataset_path + "/evalita_test.jsonl",
|
46 |
+
|
47 |
+
datasets_for_vocab_creation: ["train"],
|
48 |
+
|
49 |
+
data_loader: {
|
50 |
+
batch_sampler: {
|
51 |
+
type: "mix_sampler",
|
52 |
+
max_tokens: max_training_tokens,
|
53 |
+
sorting_keys: ['tokens'],
|
54 |
+
sampling_ratios: {
|
55 |
+
'exemplar': 1.0,
|
56 |
+
'full text': 0.0,
|
57 |
+
}
|
58 |
+
}
|
59 |
+
},
|
60 |
+
|
61 |
+
validation_data_loader: {
|
62 |
+
batch_sampler: {
|
63 |
+
type: "max_tokens_sampler",
|
64 |
+
max_tokens: max_inference_tokens,
|
65 |
+
sorting_keys: ['tokens']
|
66 |
+
}
|
67 |
+
},
|
68 |
+
|
69 |
+
model: {
|
70 |
+
type: "span",
|
71 |
+
word_embedding: {
|
72 |
+
token_embedders: {
|
73 |
+
"pieces": {
|
74 |
+
type: "pretrained_transformer",
|
75 |
+
model_name: pretrained_model,
|
76 |
+
train_parameters: false
|
77 |
+
}
|
78 |
+
},
|
79 |
+
},
|
80 |
+
span_extractor: {
|
81 |
+
type: 'combo',
|
82 |
+
sub_extractors: [
|
83 |
+
{
|
84 |
+
type: 'self_attentive',
|
85 |
+
},
|
86 |
+
{
|
87 |
+
type: 'bidirectional_endpoint',
|
88 |
+
}
|
89 |
+
]
|
90 |
+
},
|
91 |
+
span_finder: {
|
92 |
+
type: "bio",
|
93 |
+
bio_encoder: {
|
94 |
+
type: "lstm",
|
95 |
+
hidden_size: bio_dim,
|
96 |
+
num_layers: bio_layers,
|
97 |
+
bidirectional: true,
|
98 |
+
dropout: dropout,
|
99 |
+
},
|
100 |
+
no_label: false,
|
101 |
+
},
|
102 |
+
span_typing: {
|
103 |
+
type: 'mlp',
|
104 |
+
hidden_dims: span_typing_dims,
|
105 |
+
},
|
106 |
+
metrics: [{type: "srl"}],
|
107 |
+
|
108 |
+
typing_loss_factor: typing_loss_factor,
|
109 |
+
ontology_path: null,
|
110 |
+
label_dim: label_dim,
|
111 |
+
max_decoding_spans: 128,
|
112 |
+
max_recursion_depth: 2,
|
113 |
+
debug: debug,
|
114 |
+
},
|
115 |
+
|
116 |
+
trainer: {
|
117 |
+
num_epochs: 128,
|
118 |
+
patience: patience,
|
119 |
+
[if std.length(cuda_devices) == 1 then "cuda_device"]: cuda_devices[0],
|
120 |
+
validation_metric: "+em_f",
|
121 |
+
grad_norm: 10,
|
122 |
+
grad_clipping: 10,
|
123 |
+
num_gradient_accumulation_steps: grad_acc,
|
124 |
+
optimizer: {
|
125 |
+
type: "transformer",
|
126 |
+
base: {
|
127 |
+
type: "adam",
|
128 |
+
lr: 1e-3,
|
129 |
+
},
|
130 |
+
embeddings_lr: 0.0,
|
131 |
+
encoder_lr: 1e-5,
|
132 |
+
pooler_lr: 1e-5,
|
133 |
+
layer_fix: layer_fix,
|
134 |
+
}
|
135 |
+
},
|
136 |
+
|
137 |
+
cuda_devices:: cuda_devices,
|
138 |
+
[if std.length(cuda_devices) > 1 then "distributed"]: {
|
139 |
+
"cuda_devices": cuda_devices
|
140 |
+
},
|
141 |
+
[if std.length(cuda_devices) == 1 then "evaluate_on_test"]: true
|
142 |
+
}
|
config/fn-evalita/evalita_plus_fn.vanilla_xlmr.jsonnet
ADDED
@@ -0,0 +1,141 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
local env = import "../env.jsonnet";
|
2 |
+
|
3 |
+
#local dataset_path = env.str("DATA_PATH", "data/framenet/full");
|
4 |
+
local dataset_path = "/home/p289731/cloned/lome/preproc/evalita_jsonl";
|
5 |
+
local ontology_path = "data/framenet/ontology.tsv";
|
6 |
+
|
7 |
+
local debug = false;
|
8 |
+
|
9 |
+
# reader
|
10 |
+
local pretrained_model = env.str("ENCODER", "xlm-roberta-large");
|
11 |
+
local smoothing_factor = env.json("SMOOTHING", "0.1");
|
12 |
+
|
13 |
+
# model
|
14 |
+
local label_dim = env.json("LABEL_DIM", "64");
|
15 |
+
local dropout = env.json("DROPOUT", "0.2");
|
16 |
+
local bio_dim = env.json("BIO_DIM", "512");
|
17 |
+
local bio_layers = env.json("BIO_LAYER", "2");
|
18 |
+
local span_typing_dims = env.json("TYPING_DIMS", "[256, 256]");
|
19 |
+
local typing_loss_factor = env.json("LOSS_FACTOR", "8.0");
|
20 |
+
|
21 |
+
# loader
|
22 |
+
local exemplar_ratio = env.json("EXEMPLAR_RATIO", "0.05");
|
23 |
+
local max_training_tokens = 512;
|
24 |
+
local max_inference_tokens = 1024;
|
25 |
+
|
26 |
+
# training
|
27 |
+
local layer_fix = env.json("LAYER_FIX", "0");
|
28 |
+
local grad_acc = env.json("GRAD_ACC", "1");
|
29 |
+
#local cuda_devices = env.json("CUDA_DEVICES", "[-1]");
|
30 |
+
local cuda_devices = [0];
|
31 |
+
local patience = 32;
|
32 |
+
|
33 |
+
{
|
34 |
+
dataset_reader: {
|
35 |
+
type: "semantic_role_labeling",
|
36 |
+
debug: debug,
|
37 |
+
pretrained_model: pretrained_model,
|
38 |
+
ignore_label: false,
|
39 |
+
[ if debug then "max_instances" ]: 128,
|
40 |
+
event_smoothing_factor: smoothing_factor,
|
41 |
+
arg_smoothing_factor: smoothing_factor,
|
42 |
+
},
|
43 |
+
train_data_path: dataset_path + "/evalita_plus_fn_train.jsonl",
|
44 |
+
validation_data_path: dataset_path + "/evalita_dev.jsonl",
|
45 |
+
test_data_path: dataset_path + "/evalita_test.jsonl",
|
46 |
+
|
47 |
+
datasets_for_vocab_creation: ["train"],
|
48 |
+
|
49 |
+
data_loader: {
|
50 |
+
batch_sampler: {
|
51 |
+
type: "mix_sampler",
|
52 |
+
max_tokens: max_training_tokens,
|
53 |
+
sorting_keys: ['tokens'],
|
54 |
+
sampling_ratios: {
|
55 |
+
'exemplar': 1.0,
|
56 |
+
'full text': 0.0,
|
57 |
+
}
|
58 |
+
}
|
59 |
+
},
|
60 |
+
|
61 |
+
validation_data_loader: {
|
62 |
+
batch_sampler: {
|
63 |
+
type: "max_tokens_sampler",
|
64 |
+
max_tokens: max_inference_tokens,
|
65 |
+
sorting_keys: ['tokens']
|
66 |
+
}
|
67 |
+
},
|
68 |
+
|
69 |
+
model: {
|
70 |
+
type: "span",
|
71 |
+
word_embedding: {
|
72 |
+
token_embedders: {
|
73 |
+
"pieces": {
|
74 |
+
type: "pretrained_transformer",
|
75 |
+
model_name: pretrained_model,
|
76 |
+
}
|
77 |
+
},
|
78 |
+
},
|
79 |
+
span_extractor: {
|
80 |
+
type: 'combo',
|
81 |
+
sub_extractors: [
|
82 |
+
{
|
83 |
+
type: 'self_attentive',
|
84 |
+
},
|
85 |
+
{
|
86 |
+
type: 'bidirectional_endpoint',
|
87 |
+
}
|
88 |
+
]
|
89 |
+
},
|
90 |
+
span_finder: {
|
91 |
+
type: "bio",
|
92 |
+
bio_encoder: {
|
93 |
+
type: "lstm",
|
94 |
+
hidden_size: bio_dim,
|
95 |
+
num_layers: bio_layers,
|
96 |
+
bidirectional: true,
|
97 |
+
dropout: dropout,
|
98 |
+
},
|
99 |
+
no_label: false,
|
100 |
+
},
|
101 |
+
span_typing: {
|
102 |
+
type: 'mlp',
|
103 |
+
hidden_dims: span_typing_dims,
|
104 |
+
},
|
105 |
+
metrics: [{type: "srl"}],
|
106 |
+
|
107 |
+
typing_loss_factor: typing_loss_factor,
|
108 |
+
ontology_path: null,
|
109 |
+
label_dim: label_dim,
|
110 |
+
max_decoding_spans: 128,
|
111 |
+
max_recursion_depth: 2,
|
112 |
+
debug: debug,
|
113 |
+
},
|
114 |
+
|
115 |
+
trainer: {
|
116 |
+
num_epochs: 128,
|
117 |
+
patience: patience,
|
118 |
+
[if std.length(cuda_devices) == 1 then "cuda_device"]: cuda_devices[0],
|
119 |
+
validation_metric: "+em_f",
|
120 |
+
grad_norm: 10,
|
121 |
+
grad_clipping: 10,
|
122 |
+
num_gradient_accumulation_steps: grad_acc,
|
123 |
+
optimizer: {
|
124 |
+
type: "transformer",
|
125 |
+
base: {
|
126 |
+
type: "adam",
|
127 |
+
lr: 1e-3,
|
128 |
+
},
|
129 |
+
embeddings_lr: 0.0,
|
130 |
+
encoder_lr: 1e-5,
|
131 |
+
pooler_lr: 1e-5,
|
132 |
+
layer_fix: layer_fix,
|
133 |
+
}
|
134 |
+
},
|
135 |
+
|
136 |
+
cuda_devices:: cuda_devices,
|
137 |
+
[if std.length(cuda_devices) > 1 then "distributed"]: {
|
138 |
+
"cuda_devices": cuda_devices
|
139 |
+
},
|
140 |
+
[if std.length(cuda_devices) == 1 then "evaluate_on_test"]: true
|
141 |
+
}
|
config/fn-kicktionary/kicktionary.concat_clipped.vanilla_xlmr.jsonnet
ADDED
@@ -0,0 +1,141 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
local env = import "../env.jsonnet";
|
2 |
+
|
3 |
+
#local dataset_path = env.str("DATA_PATH", "data/framenet/full");
|
4 |
+
local dataset_path = "/home/p289731/cloned/lome/preproc/kicktionary_jsonl";
|
5 |
+
local ontology_path = "data/framenet/ontology.tsv";
|
6 |
+
|
7 |
+
local debug = false;
|
8 |
+
|
9 |
+
# reader
|
10 |
+
local pretrained_model = env.str("ENCODER", "xlm-roberta-large");
|
11 |
+
local smoothing_factor = env.json("SMOOTHING", "0.1");
|
12 |
+
|
13 |
+
# model
|
14 |
+
local label_dim = env.json("LABEL_DIM", "64");
|
15 |
+
local dropout = env.json("DROPOUT", "0.2");
|
16 |
+
local bio_dim = env.json("BIO_DIM", "512");
|
17 |
+
local bio_layers = env.json("BIO_LAYER", "2");
|
18 |
+
local span_typing_dims = env.json("TYPING_DIMS", "[256, 256]");
|
19 |
+
local typing_loss_factor = env.json("LOSS_FACTOR", "8.0");
|
20 |
+
|
21 |
+
# loader
|
22 |
+
local exemplar_ratio = env.json("EXEMPLAR_RATIO", "0.05");
|
23 |
+
local max_training_tokens = 512;
|
24 |
+
local max_inference_tokens = 1024;
|
25 |
+
|
26 |
+
# training
|
27 |
+
local layer_fix = env.json("LAYER_FIX", "0");
|
28 |
+
local grad_acc = env.json("GRAD_ACC", "1");
|
29 |
+
#local cuda_devices = env.json("CUDA_DEVICES", "[-1]");
|
30 |
+
local cuda_devices = [0];
|
31 |
+
local patience = 32;
|
32 |
+
|
33 |
+
{
|
34 |
+
dataset_reader: {
|
35 |
+
type: "semantic_role_labeling",
|
36 |
+
debug: debug,
|
37 |
+
pretrained_model: pretrained_model,
|
38 |
+
ignore_label: false,
|
39 |
+
[ if debug then "max_instances" ]: 128,
|
40 |
+
event_smoothing_factor: smoothing_factor,
|
41 |
+
arg_smoothing_factor: smoothing_factor,
|
42 |
+
},
|
43 |
+
train_data_path: dataset_path + "/kicktionary_exemplars_train.concat_clipped.jsonl",
|
44 |
+
validation_data_path: dataset_path + "/kicktionary_exemplars_dev.jsonl",
|
45 |
+
test_data_path: dataset_path + "/kicktionary_exemplars_test.jsonl",
|
46 |
+
|
47 |
+
datasets_for_vocab_creation: ["train"],
|
48 |
+
|
49 |
+
data_loader: {
|
50 |
+
batch_sampler: {
|
51 |
+
type: "mix_sampler",
|
52 |
+
max_tokens: max_training_tokens,
|
53 |
+
sorting_keys: ['tokens'],
|
54 |
+
sampling_ratios: {
|
55 |
+
'exemplar': 1.0,
|
56 |
+
'full text': 0.0,
|
57 |
+
}
|
58 |
+
}
|
59 |
+
},
|
60 |
+
|
61 |
+
validation_data_loader: {
|
62 |
+
batch_sampler: {
|
63 |
+
type: "max_tokens_sampler",
|
64 |
+
max_tokens: max_inference_tokens,
|
65 |
+
sorting_keys: ['tokens']
|
66 |
+
}
|
67 |
+
},
|
68 |
+
|
69 |
+
model: {
|
70 |
+
type: "span",
|
71 |
+
word_embedding: {
|
72 |
+
token_embedders: {
|
73 |
+
"pieces": {
|
74 |
+
type: "pretrained_transformer",
|
75 |
+
model_name: pretrained_model,
|
76 |
+
}
|
77 |
+
},
|
78 |
+
},
|
79 |
+
span_extractor: {
|
80 |
+
type: 'combo',
|
81 |
+
sub_extractors: [
|
82 |
+
{
|
83 |
+
type: 'self_attentive',
|
84 |
+
},
|
85 |
+
{
|
86 |
+
type: 'bidirectional_endpoint',
|
87 |
+
}
|
88 |
+
]
|
89 |
+
},
|
90 |
+
span_finder: {
|
91 |
+
type: "bio",
|
92 |
+
bio_encoder: {
|
93 |
+
type: "lstm",
|
94 |
+
hidden_size: bio_dim,
|
95 |
+
num_layers: bio_layers,
|
96 |
+
bidirectional: true,
|
97 |
+
dropout: dropout,
|
98 |
+
},
|
99 |
+
no_label: false,
|
100 |
+
},
|
101 |
+
span_typing: {
|
102 |
+
type: 'mlp',
|
103 |
+
hidden_dims: span_typing_dims,
|
104 |
+
},
|
105 |
+
metrics: [{type: "srl"}],
|
106 |
+
|
107 |
+
typing_loss_factor: typing_loss_factor,
|
108 |
+
ontology_path: null,
|
109 |
+
label_dim: label_dim,
|
110 |
+
max_decoding_spans: 128,
|
111 |
+
max_recursion_depth: 2,
|
112 |
+
debug: debug,
|
113 |
+
},
|
114 |
+
|
115 |
+
trainer: {
|
116 |
+
num_epochs: 128,
|
117 |
+
patience: patience,
|
118 |
+
[if std.length(cuda_devices) == 1 then "cuda_device"]: cuda_devices[0],
|
119 |
+
validation_metric: "+em_f",
|
120 |
+
grad_norm: 10,
|
121 |
+
grad_clipping: 10,
|
122 |
+
num_gradient_accumulation_steps: grad_acc,
|
123 |
+
optimizer: {
|
124 |
+
type: "transformer",
|
125 |
+
base: {
|
126 |
+
type: "adam",
|
127 |
+
lr: 1e-3,
|
128 |
+
},
|
129 |
+
embeddings_lr: 0.0,
|
130 |
+
encoder_lr: 1e-5,
|
131 |
+
pooler_lr: 1e-5,
|
132 |
+
layer_fix: layer_fix,
|
133 |
+
}
|
134 |
+
},
|
135 |
+
|
136 |
+
cuda_devices:: cuda_devices,
|
137 |
+
[if std.length(cuda_devices) > 1 then "distributed"]: {
|
138 |
+
"cuda_devices": cuda_devices
|
139 |
+
},
|
140 |
+
[if std.length(cuda_devices) == 1 then "evaluate_on_test"]: true
|
141 |
+
}
|
config/fn-kicktionary/kicktionary.football_xlmr.jsonnet
ADDED
@@ -0,0 +1,141 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
local env = import "../env.jsonnet";
|
2 |
+
|
3 |
+
#local dataset_path = env.str("DATA_PATH", "data/framenet/full");
|
4 |
+
local dataset_path = "/home/p289731/cloned/lome/preproc/kicktionary_jsonl";
|
5 |
+
local ontology_path = "data/framenet/ontology.tsv";
|
6 |
+
|
7 |
+
local debug = false;
|
8 |
+
|
9 |
+
# reader
|
10 |
+
local pretrained_model = env.str("ENCODER", "/data/p289731/cloned/lome-models/models/xlm-roberta-football/");
|
11 |
+
local smoothing_factor = env.json("SMOOTHING", "0.1");
|
12 |
+
|
13 |
+
# model
|
14 |
+
local label_dim = env.json("LABEL_DIM", "64");
|
15 |
+
local dropout = env.json("DROPOUT", "0.2");
|
16 |
+
local bio_dim = env.json("BIO_DIM", "512");
|
17 |
+
local bio_layers = env.json("BIO_LAYER", "2");
|
18 |
+
local span_typing_dims = env.json("TYPING_DIMS", "[256, 256]");
|
19 |
+
local typing_loss_factor = env.json("LOSS_FACTOR", "8.0");
|
20 |
+
|
21 |
+
# loader
|
22 |
+
local exemplar_ratio = env.json("EXEMPLAR_RATIO", "0.05");
|
23 |
+
local max_training_tokens = 512;
|
24 |
+
local max_inference_tokens = 1024;
|
25 |
+
|
26 |
+
# training
|
27 |
+
local layer_fix = env.json("LAYER_FIX", "0");
|
28 |
+
local grad_acc = env.json("GRAD_ACC", "1");
|
29 |
+
#local cuda_devices = env.json("CUDA_DEVICES", "[-1]");
|
30 |
+
local cuda_devices = [0];
|
31 |
+
local patience = 32;
|
32 |
+
|
33 |
+
{
|
34 |
+
dataset_reader: {
|
35 |
+
type: "semantic_role_labeling",
|
36 |
+
debug: debug,
|
37 |
+
pretrained_model: pretrained_model,
|
38 |
+
ignore_label: false,
|
39 |
+
[ if debug then "max_instances" ]: 128,
|
40 |
+
event_smoothing_factor: smoothing_factor,
|
41 |
+
arg_smoothing_factor: smoothing_factor,
|
42 |
+
},
|
43 |
+
train_data_path: dataset_path + "/kicktionary_exemplars_train.jsonl",
|
44 |
+
validation_data_path: dataset_path + "/kicktionary_exemplars_dev.jsonl",
|
45 |
+
test_data_path: dataset_path + "/kicktionary_exemplars_test.jsonl",
|
46 |
+
|
47 |
+
datasets_for_vocab_creation: ["train"],
|
48 |
+
|
49 |
+
data_loader: {
|
50 |
+
batch_sampler: {
|
51 |
+
type: "mix_sampler",
|
52 |
+
max_tokens: max_training_tokens,
|
53 |
+
sorting_keys: ['tokens'],
|
54 |
+
sampling_ratios: {
|
55 |
+
'exemplar': 1.0,
|
56 |
+
'full text': 0.0,
|
57 |
+
}
|
58 |
+
}
|
59 |
+
},
|
60 |
+
|
61 |
+
validation_data_loader: {
|
62 |
+
batch_sampler: {
|
63 |
+
type: "max_tokens_sampler",
|
64 |
+
max_tokens: max_inference_tokens,
|
65 |
+
sorting_keys: ['tokens']
|
66 |
+
}
|
67 |
+
},
|
68 |
+
|
69 |
+
model: {
|
70 |
+
type: "span",
|
71 |
+
word_embedding: {
|
72 |
+
token_embedders: {
|
73 |
+
"pieces": {
|
74 |
+
type: "pretrained_transformer",
|
75 |
+
model_name: pretrained_model,
|
76 |
+
}
|
77 |
+
},
|
78 |
+
},
|
79 |
+
span_extractor: {
|
80 |
+
type: 'combo',
|
81 |
+
sub_extractors: [
|
82 |
+
{
|
83 |
+
type: 'self_attentive',
|
84 |
+
},
|
85 |
+
{
|
86 |
+
type: 'bidirectional_endpoint',
|
87 |
+
}
|
88 |
+
]
|
89 |
+
},
|
90 |
+
span_finder: {
|
91 |
+
type: "bio",
|
92 |
+
bio_encoder: {
|
93 |
+
type: "lstm",
|
94 |
+
hidden_size: bio_dim,
|
95 |
+
num_layers: bio_layers,
|
96 |
+
bidirectional: true,
|
97 |
+
dropout: dropout,
|
98 |
+
},
|
99 |
+
no_label: false,
|
100 |
+
},
|
101 |
+
span_typing: {
|
102 |
+
type: 'mlp',
|
103 |
+
hidden_dims: span_typing_dims,
|
104 |
+
},
|
105 |
+
metrics: [{type: "srl"}],
|
106 |
+
|
107 |
+
typing_loss_factor: typing_loss_factor,
|
108 |
+
ontology_path: null,
|
109 |
+
label_dim: label_dim,
|
110 |
+
max_decoding_spans: 128,
|
111 |
+
max_recursion_depth: 2,
|
112 |
+
debug: debug,
|
113 |
+
},
|
114 |
+
|
115 |
+
trainer: {
|
116 |
+
num_epochs: 128,
|
117 |
+
patience: patience,
|
118 |
+
[if std.length(cuda_devices) == 1 then "cuda_device"]: cuda_devices[0],
|
119 |
+
validation_metric: "+em_f",
|
120 |
+
grad_norm: 10,
|
121 |
+
grad_clipping: 10,
|
122 |
+
num_gradient_accumulation_steps: grad_acc,
|
123 |
+
optimizer: {
|
124 |
+
type: "transformer",
|
125 |
+
base: {
|
126 |
+
type: "adam",
|
127 |
+
lr: 1e-3,
|
128 |
+
},
|
129 |
+
embeddings_lr: 0.0,
|
130 |
+
encoder_lr: 1e-5,
|
131 |
+
pooler_lr: 1e-5,
|
132 |
+
layer_fix: layer_fix,
|
133 |
+
}
|
134 |
+
},
|
135 |
+
|
136 |
+
cuda_devices:: cuda_devices,
|
137 |
+
[if std.length(cuda_devices) > 1 then "distributed"]: {
|
138 |
+
"cuda_devices": cuda_devices
|
139 |
+
},
|
140 |
+
[if std.length(cuda_devices) == 1 then "evaluate_on_test"]: true
|
141 |
+
}
|
config/fn-kicktionary/kicktionary.framenet_xlmr.jsonnet
ADDED
@@ -0,0 +1,141 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
local env = import "../env.jsonnet";
|
2 |
+
|
3 |
+
#local dataset_path = env.str("DATA_PATH", "data/framenet/full");
|
4 |
+
local dataset_path = "/home/p289731/cloned/lome/preproc/kicktionary_jsonl";
|
5 |
+
local ontology_path = "data/framenet/ontology.tsv";
|
6 |
+
|
7 |
+
local debug = false;
|
8 |
+
|
9 |
+
# reader
|
10 |
+
local pretrained_model = "/data/p289731/cloned/lome-models/models/xlm-roberta-framenet/";
|
11 |
+
local smoothing_factor = env.json("SMOOTHING", "0.1");
|
12 |
+
|
13 |
+
# model
|
14 |
+
local label_dim = env.json("LABEL_DIM", "64");
|
15 |
+
local dropout = env.json("DROPOUT", "0.2");
|
16 |
+
local bio_dim = env.json("BIO_DIM", "512");
|
17 |
+
local bio_layers = env.json("BIO_LAYER", "2");
|
18 |
+
local span_typing_dims = env.json("TYPING_DIMS", "[256, 256]");
|
19 |
+
local typing_loss_factor = env.json("LOSS_FACTOR", "8.0");
|
20 |
+
|
21 |
+
# loader
|
22 |
+
local exemplar_ratio = env.json("EXEMPLAR_RATIO", "0.05");
|
23 |
+
local max_training_tokens = 512;
|
24 |
+
local max_inference_tokens = 1024;
|
25 |
+
|
26 |
+
# training
|
27 |
+
local layer_fix = env.json("LAYER_FIX", "0");
|
28 |
+
local grad_acc = env.json("GRAD_ACC", "1");
|
29 |
+
#local cuda_devices = env.json("CUDA_DEVICES", "[-1]");
|
30 |
+
local cuda_devices = [0];
|
31 |
+
local patience = 32;
|
32 |
+
|
33 |
+
{
|
34 |
+
dataset_reader: {
|
35 |
+
type: "semantic_role_labeling",
|
36 |
+
debug: debug,
|
37 |
+
pretrained_model: pretrained_model,
|
38 |
+
ignore_label: false,
|
39 |
+
[ if debug then "max_instances" ]: 128,
|
40 |
+
event_smoothing_factor: smoothing_factor,
|
41 |
+
arg_smoothing_factor: smoothing_factor,
|
42 |
+
},
|
43 |
+
train_data_path: dataset_path + "/kicktionary_exemplars_train.jsonl",
|
44 |
+
validation_data_path: dataset_path + "/kicktionary_exemplars_dev.jsonl",
|
45 |
+
test_data_path: dataset_path + "/kicktionary_exemplars_test.jsonl",
|
46 |
+
|
47 |
+
datasets_for_vocab_creation: ["train"],
|
48 |
+
|
49 |
+
data_loader: {
|
50 |
+
batch_sampler: {
|
51 |
+
type: "mix_sampler",
|
52 |
+
max_tokens: max_training_tokens,
|
53 |
+
sorting_keys: ['tokens'],
|
54 |
+
sampling_ratios: {
|
55 |
+
'exemplar': 1.0,
|
56 |
+
'full text': 0.0,
|
57 |
+
}
|
58 |
+
}
|
59 |
+
},
|
60 |
+
|
61 |
+
validation_data_loader: {
|
62 |
+
batch_sampler: {
|
63 |
+
type: "max_tokens_sampler",
|
64 |
+
max_tokens: max_inference_tokens,
|
65 |
+
sorting_keys: ['tokens']
|
66 |
+
}
|
67 |
+
},
|
68 |
+
|
69 |
+
model: {
|
70 |
+
type: "span",
|
71 |
+
word_embedding: {
|
72 |
+
token_embedders: {
|
73 |
+
"pieces": {
|
74 |
+
type: "pretrained_transformer",
|
75 |
+
model_name: pretrained_model,
|
76 |
+
}
|
77 |
+
},
|
78 |
+
},
|
79 |
+
span_extractor: {
|
80 |
+
type: 'combo',
|
81 |
+
sub_extractors: [
|
82 |
+
{
|
83 |
+
type: 'self_attentive',
|
84 |
+
},
|
85 |
+
{
|
86 |
+
type: 'bidirectional_endpoint',
|
87 |
+
}
|
88 |
+
]
|
89 |
+
},
|
90 |
+
span_finder: {
|
91 |
+
type: "bio",
|
92 |
+
bio_encoder: {
|
93 |
+
type: "lstm",
|
94 |
+
hidden_size: bio_dim,
|
95 |
+
num_layers: bio_layers,
|
96 |
+
bidirectional: true,
|
97 |
+
dropout: dropout,
|
98 |
+
},
|
99 |
+
no_label: false,
|
100 |
+
},
|
101 |
+
span_typing: {
|
102 |
+
type: 'mlp',
|
103 |
+
hidden_dims: span_typing_dims,
|
104 |
+
},
|
105 |
+
metrics: [{type: "srl"}],
|
106 |
+
|
107 |
+
typing_loss_factor: typing_loss_factor,
|
108 |
+
ontology_path: null,
|
109 |
+
label_dim: label_dim,
|
110 |
+
max_decoding_spans: 128,
|
111 |
+
max_recursion_depth: 2,
|
112 |
+
debug: debug,
|
113 |
+
},
|
114 |
+
|
115 |
+
trainer: {
|
116 |
+
num_epochs: 128,
|
117 |
+
patience: patience,
|
118 |
+
[if std.length(cuda_devices) == 1 then "cuda_device"]: cuda_devices[0],
|
119 |
+
validation_metric: "+em_f",
|
120 |
+
grad_norm: 10,
|
121 |
+
grad_clipping: 10,
|
122 |
+
num_gradient_accumulation_steps: grad_acc,
|
123 |
+
optimizer: {
|
124 |
+
type: "transformer",
|
125 |
+
base: {
|
126 |
+
type: "adam",
|
127 |
+
lr: 1e-3,
|
128 |
+
},
|
129 |
+
embeddings_lr: 0.0,
|
130 |
+
encoder_lr: 1e-5,
|
131 |
+
pooler_lr: 1e-5,
|
132 |
+
layer_fix: layer_fix,
|
133 |
+
}
|
134 |
+
},
|
135 |
+
|
136 |
+
cuda_devices:: cuda_devices,
|
137 |
+
[if std.length(cuda_devices) > 1 then "distributed"]: {
|
138 |
+
"cuda_devices": cuda_devices
|
139 |
+
},
|
140 |
+
[if std.length(cuda_devices) == 1 then "evaluate_on_test"]: true
|
141 |
+
}
|
config/fn-kicktionary/kicktionary.vanilla_xlmr.jsonnet
ADDED
@@ -0,0 +1,141 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
local env = import "../env.jsonnet";
|
2 |
+
|
3 |
+
#local dataset_path = env.str("DATA_PATH", "data/framenet/full");
|
4 |
+
local dataset_path = "/home/p289731/cloned/lome/preproc/kicktionary_jsonl";
|
5 |
+
local ontology_path = "data/framenet/ontology.tsv";
|
6 |
+
|
7 |
+
local debug = false;
|
8 |
+
|
9 |
+
# reader
|
10 |
+
local pretrained_model = env.str("ENCODER", "xlm-roberta-large");
|
11 |
+
local smoothing_factor = env.json("SMOOTHING", "0.1");
|
12 |
+
|
13 |
+
# model
|
14 |
+
local label_dim = env.json("LABEL_DIM", "64");
|
15 |
+
local dropout = env.json("DROPOUT", "0.2");
|
16 |
+
local bio_dim = env.json("BIO_DIM", "512");
|
17 |
+
local bio_layers = env.json("BIO_LAYER", "2");
|
18 |
+
local span_typing_dims = env.json("TYPING_DIMS", "[256, 256]");
|
19 |
+
local typing_loss_factor = env.json("LOSS_FACTOR", "8.0");
|
20 |
+
|
21 |
+
# loader
|
22 |
+
local exemplar_ratio = env.json("EXEMPLAR_RATIO", "0.05");
|
23 |
+
local max_training_tokens = 512;
|
24 |
+
local max_inference_tokens = 1024;
|
25 |
+
|
26 |
+
# training
|
27 |
+
local layer_fix = env.json("LAYER_FIX", "0");
|
28 |
+
local grad_acc = env.json("GRAD_ACC", "1");
|
29 |
+
#local cuda_devices = env.json("CUDA_DEVICES", "[-1]");
|
30 |
+
local cuda_devices = [0];
|
31 |
+
local patience = 32;
|
32 |
+
|
33 |
+
{
|
34 |
+
dataset_reader: {
|
35 |
+
type: "semantic_role_labeling",
|
36 |
+
debug: debug,
|
37 |
+
pretrained_model: pretrained_model,
|
38 |
+
ignore_label: false,
|
39 |
+
[ if debug then "max_instances" ]: 128,
|
40 |
+
event_smoothing_factor: smoothing_factor,
|
41 |
+
arg_smoothing_factor: smoothing_factor,
|
42 |
+
},
|
43 |
+
train_data_path: dataset_path + "/kicktionary_exemplars_train.jsonl",
|
44 |
+
validation_data_path: dataset_path + "/kicktionary_exemplars_dev.jsonl",
|
45 |
+
test_data_path: dataset_path + "/kicktionary_exemplars_test.jsonl",
|
46 |
+
|
47 |
+
datasets_for_vocab_creation: ["train"],
|
48 |
+
|
49 |
+
data_loader: {
|
50 |
+
batch_sampler: {
|
51 |
+
type: "mix_sampler",
|
52 |
+
max_tokens: max_training_tokens,
|
53 |
+
sorting_keys: ['tokens'],
|
54 |
+
sampling_ratios: {
|
55 |
+
'exemplar': 1.0,
|
56 |
+
'full text': 0.0,
|
57 |
+
}
|
58 |
+
}
|
59 |
+
},
|
60 |
+
|
61 |
+
validation_data_loader: {
|
62 |
+
batch_sampler: {
|
63 |
+
type: "max_tokens_sampler",
|
64 |
+
max_tokens: max_inference_tokens,
|
65 |
+
sorting_keys: ['tokens']
|
66 |
+
}
|
67 |
+
},
|
68 |
+
|
69 |
+
model: {
|
70 |
+
type: "span",
|
71 |
+
word_embedding: {
|
72 |
+
token_embedders: {
|
73 |
+
"pieces": {
|
74 |
+
type: "pretrained_transformer",
|
75 |
+
model_name: pretrained_model,
|
76 |
+
}
|
77 |
+
},
|
78 |
+
},
|
79 |
+
span_extractor: {
|
80 |
+
type: 'combo',
|
81 |
+
sub_extractors: [
|
82 |
+
{
|
83 |
+
type: 'self_attentive',
|
84 |
+
},
|
85 |
+
{
|
86 |
+
type: 'bidirectional_endpoint',
|
87 |
+
}
|
88 |
+
]
|
89 |
+
},
|
90 |
+
span_finder: {
|
91 |
+
type: "bio",
|
92 |
+
bio_encoder: {
|
93 |
+
type: "lstm",
|
94 |
+
hidden_size: bio_dim,
|
95 |
+
num_layers: bio_layers,
|
96 |
+
bidirectional: true,
|
97 |
+
dropout: dropout,
|
98 |
+
},
|
99 |
+
no_label: false,
|
100 |
+
},
|
101 |
+
span_typing: {
|
102 |
+
type: 'mlp',
|
103 |
+
hidden_dims: span_typing_dims,
|
104 |
+
},
|
105 |
+
metrics: [{type: "srl"}],
|
106 |
+
|
107 |
+
typing_loss_factor: typing_loss_factor,
|
108 |
+
ontology_path: null,
|
109 |
+
label_dim: label_dim,
|
110 |
+
max_decoding_spans: 128,
|
111 |
+
max_recursion_depth: 2,
|
112 |
+
debug: debug,
|
113 |
+
},
|
114 |
+
|
115 |
+
trainer: {
|
116 |
+
num_epochs: 128,
|
117 |
+
patience: patience,
|
118 |
+
[if std.length(cuda_devices) == 1 then "cuda_device"]: cuda_devices[0],
|
119 |
+
validation_metric: "+em_f",
|
120 |
+
grad_norm: 10,
|
121 |
+
grad_clipping: 10,
|
122 |
+
num_gradient_accumulation_steps: grad_acc,
|
123 |
+
optimizer: {
|
124 |
+
type: "transformer",
|
125 |
+
base: {
|
126 |
+
type: "adam",
|
127 |
+
lr: 1e-3,
|
128 |
+
},
|
129 |
+
embeddings_lr: 0.0,
|
130 |
+
encoder_lr: 1e-5,
|
131 |
+
pooler_lr: 1e-5,
|
132 |
+
layer_fix: layer_fix,
|
133 |
+
}
|
134 |
+
},
|
135 |
+
|
136 |
+
cuda_devices:: cuda_devices,
|
137 |
+
[if std.length(cuda_devices) > 1 then "distributed"]: {
|
138 |
+
"cuda_devices": cuda_devices
|
139 |
+
},
|
140 |
+
[if std.length(cuda_devices) == 1 then "evaluate_on_test"]: true
|
141 |
+
}
|
config/fn-sonar/sonar-a1.framenet_xlmr.jsonnet
ADDED
@@ -0,0 +1,141 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
local env = import "../env.jsonnet";
|
2 |
+
|
3 |
+
#local dataset_path = env.str("DATA_PATH", "data/framenet/full");
|
4 |
+
local dataset_path = "/home/p289731/cloned/lome/preproc/sonar_jsonl";
|
5 |
+
local ontology_path = "data/framenet/ontology.tsv";
|
6 |
+
|
7 |
+
local debug = false;
|
8 |
+
|
9 |
+
# reader
|
10 |
+
local pretrained_model = "/data/p289731/cloned/lome-models/models/xlm-roberta-framenet/";
|
11 |
+
local smoothing_factor = env.json("SMOOTHING", "0.1");
|
12 |
+
|
13 |
+
# model
|
14 |
+
local label_dim = env.json("LABEL_DIM", "64");
|
15 |
+
local dropout = env.json("DROPOUT", "0.2");
|
16 |
+
local bio_dim = env.json("BIO_DIM", "512");
|
17 |
+
local bio_layers = env.json("BIO_LAYER", "2");
|
18 |
+
local span_typing_dims = env.json("TYPING_DIMS", "[256, 256]");
|
19 |
+
local typing_loss_factor = env.json("LOSS_FACTOR", "8.0");
|
20 |
+
|
21 |
+
# loader
|
22 |
+
local exemplar_ratio = env.json("EXEMPLAR_RATIO", "0.05");
|
23 |
+
local max_training_tokens = 512;
|
24 |
+
local max_inference_tokens = 1024;
|
25 |
+
|
26 |
+
# training
|
27 |
+
local layer_fix = env.json("LAYER_FIX", "0");
|
28 |
+
local grad_acc = env.json("GRAD_ACC", "1");
|
29 |
+
#local cuda_devices = env.json("CUDA_DEVICES", "[-1]");
|
30 |
+
local cuda_devices = [0];
|
31 |
+
local patience = 32;
|
32 |
+
|
33 |
+
{
|
34 |
+
dataset_reader: {
|
35 |
+
type: "semantic_role_labeling",
|
36 |
+
debug: debug,
|
37 |
+
pretrained_model: pretrained_model,
|
38 |
+
ignore_label: false,
|
39 |
+
[ if debug then "max_instances" ]: 128,
|
40 |
+
event_smoothing_factor: smoothing_factor,
|
41 |
+
arg_smoothing_factor: smoothing_factor,
|
42 |
+
},
|
43 |
+
train_data_path: dataset_path + "/dutch-sonar-train-A1.jsonl",
|
44 |
+
validation_data_path: dataset_path + "/dutch-sonar-dev-A1.jsonl",
|
45 |
+
test_data_path: dataset_path + "/dutch-sonar-test-A1.jsonl",
|
46 |
+
|
47 |
+
datasets_for_vocab_creation: ["train"],
|
48 |
+
|
49 |
+
data_loader: {
|
50 |
+
batch_sampler: {
|
51 |
+
type: "mix_sampler",
|
52 |
+
max_tokens: max_training_tokens,
|
53 |
+
sorting_keys: ['tokens'],
|
54 |
+
sampling_ratios: {
|
55 |
+
'exemplar': 1.0,
|
56 |
+
'full text': 0.0,
|
57 |
+
}
|
58 |
+
}
|
59 |
+
},
|
60 |
+
|
61 |
+
validation_data_loader: {
|
62 |
+
batch_sampler: {
|
63 |
+
type: "max_tokens_sampler",
|
64 |
+
max_tokens: max_inference_tokens,
|
65 |
+
sorting_keys: ['tokens']
|
66 |
+
}
|
67 |
+
},
|
68 |
+
|
69 |
+
model: {
|
70 |
+
type: "span",
|
71 |
+
word_embedding: {
|
72 |
+
token_embedders: {
|
73 |
+
"pieces": {
|
74 |
+
type: "pretrained_transformer",
|
75 |
+
model_name: pretrained_model,
|
76 |
+
}
|
77 |
+
},
|
78 |
+
},
|
79 |
+
span_extractor: {
|
80 |
+
type: 'combo',
|
81 |
+
sub_extractors: [
|
82 |
+
{
|
83 |
+
type: 'self_attentive',
|
84 |
+
},
|
85 |
+
{
|
86 |
+
type: 'bidirectional_endpoint',
|
87 |
+
}
|
88 |
+
]
|
89 |
+
},
|
90 |
+
span_finder: {
|
91 |
+
type: "bio",
|
92 |
+
bio_encoder: {
|
93 |
+
type: "lstm",
|
94 |
+
hidden_size: bio_dim,
|
95 |
+
num_layers: bio_layers,
|
96 |
+
bidirectional: true,
|
97 |
+
dropout: dropout,
|
98 |
+
},
|
99 |
+
no_label: false,
|
100 |
+
},
|
101 |
+
span_typing: {
|
102 |
+
type: 'mlp',
|
103 |
+
hidden_dims: span_typing_dims,
|
104 |
+
},
|
105 |
+
metrics: [{type: "srl"}],
|
106 |
+
|
107 |
+
typing_loss_factor: typing_loss_factor,
|
108 |
+
ontology_path: null,
|
109 |
+
label_dim: label_dim,
|
110 |
+
max_decoding_spans: 128,
|
111 |
+
max_recursion_depth: 2,
|
112 |
+
debug: debug,
|
113 |
+
},
|
114 |
+
|
115 |
+
trainer: {
|
116 |
+
num_epochs: 128,
|
117 |
+
patience: patience,
|
118 |
+
[if std.length(cuda_devices) == 1 then "cuda_device"]: cuda_devices[0],
|
119 |
+
validation_metric: "+em_f",
|
120 |
+
grad_norm: 10,
|
121 |
+
grad_clipping: 10,
|
122 |
+
num_gradient_accumulation_steps: grad_acc,
|
123 |
+
optimizer: {
|
124 |
+
type: "transformer",
|
125 |
+
base: {
|
126 |
+
type: "adam",
|
127 |
+
lr: 1e-3,
|
128 |
+
},
|
129 |
+
embeddings_lr: 0.0,
|
130 |
+
encoder_lr: 1e-5,
|
131 |
+
pooler_lr: 1e-5,
|
132 |
+
layer_fix: layer_fix,
|
133 |
+
}
|
134 |
+
},
|
135 |
+
|
136 |
+
cuda_devices:: cuda_devices,
|
137 |
+
[if std.length(cuda_devices) > 1 then "distributed"]: {
|
138 |
+
"cuda_devices": cuda_devices
|
139 |
+
},
|
140 |
+
[if std.length(cuda_devices) == 1 then "evaluate_on_test"]: true
|
141 |
+
}
|
config/fn-sonar/sonar-a1.sonar_plus_fn.vanilla_xlmr.jsonnet
ADDED
@@ -0,0 +1,142 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
local env = import "../env.jsonnet";
|
2 |
+
|
3 |
+
#local dataset_path = env.str("DATA_PATH", "data/framenet/full");
|
4 |
+
local dataset_path = "/home/p289731/cloned/lome/preproc/sonar_jsonl";
|
5 |
+
local ontology_path = "data/framenet/ontology.tsv";
|
6 |
+
|
7 |
+
local debug = false;
|
8 |
+
|
9 |
+
# reader
|
10 |
+
local pretrained_model = env.str("ENCODER", "xlm-roberta-large");
|
11 |
+
local smoothing_factor = env.json("SMOOTHING", "0.1");
|
12 |
+
|
13 |
+
# model
|
14 |
+
local label_dim = env.json("LABEL_DIM", "64");
|
15 |
+
local dropout = env.json("DROPOUT", "0.2");
|
16 |
+
local bio_dim = env.json("BIO_DIM", "512");
|
17 |
+
local bio_layers = env.json("BIO_LAYER", "2");
|
18 |
+
local span_typing_dims = env.json("TYPING_DIMS", "[256, 256]");
|
19 |
+
local typing_loss_factor = env.json("LOSS_FACTOR", "8.0");
|
20 |
+
|
21 |
+
# loader
|
22 |
+
local exemplar_ratio = env.json("EXEMPLAR_RATIO", "0.05");
|
23 |
+
local max_training_tokens = 512;
|
24 |
+
local max_inference_tokens = 1024;
|
25 |
+
|
26 |
+
# training
|
27 |
+
local layer_fix = env.json("LAYER_FIX", "0");
|
28 |
+
local grad_acc = env.json("GRAD_ACC", "1");
|
29 |
+
#local cuda_devices = env.json("CUDA_DEVICES", "[-1]");
|
30 |
+
local cuda_devices = [0];
|
31 |
+
local patience = 32;
|
32 |
+
|
33 |
+
{
|
34 |
+
dataset_reader: {
|
35 |
+
type: "semantic_role_labeling",
|
36 |
+
debug: debug,
|
37 |
+
pretrained_model: pretrained_model,
|
38 |
+
ignore_label: false,
|
39 |
+
[ if debug then "max_instances" ]: 128,
|
40 |
+
event_smoothing_factor: smoothing_factor,
|
41 |
+
arg_smoothing_factor: smoothing_factor,
|
42 |
+
},
|
43 |
+
|
44 |
+
train_data_path: dataset_path + "/dutch-sonar-train-A1.jsonl",
|
45 |
+
validation_data_path: dataset_path + "/dutch-sonar-dev-A1.jsonl",
|
46 |
+
test_data_path: dataset_path + "/dutch-sonar-test-A1.jsonl",
|
47 |
+
|
48 |
+
datasets_for_vocab_creation: ["train"],
|
49 |
+
|
50 |
+
data_loader: {
|
51 |
+
batch_sampler: {
|
52 |
+
type: "mix_sampler",
|
53 |
+
max_tokens: max_training_tokens,
|
54 |
+
sorting_keys: ['tokens'],
|
55 |
+
sampling_ratios: {
|
56 |
+
'exemplar': 1.0,
|
57 |
+
'full text': 0.0,
|
58 |
+
}
|
59 |
+
}
|
60 |
+
},
|
61 |
+
|
62 |
+
validation_data_loader: {
|
63 |
+
batch_sampler: {
|
64 |
+
type: "max_tokens_sampler",
|
65 |
+
max_tokens: max_inference_tokens,
|
66 |
+
sorting_keys: ['tokens']
|
67 |
+
}
|
68 |
+
},
|
69 |
+
|
70 |
+
model: {
|
71 |
+
type: "span",
|
72 |
+
word_embedding: {
|
73 |
+
token_embedders: {
|
74 |
+
"pieces": {
|
75 |
+
type: "pretrained_transformer",
|
76 |
+
model_name: pretrained_model,
|
77 |
+
}
|
78 |
+
},
|
79 |
+
},
|
80 |
+
span_extractor: {
|
81 |
+
type: 'combo',
|
82 |
+
sub_extractors: [
|
83 |
+
{
|
84 |
+
type: 'self_attentive',
|
85 |
+
},
|
86 |
+
{
|
87 |
+
type: 'bidirectional_endpoint',
|
88 |
+
}
|
89 |
+
]
|
90 |
+
},
|
91 |
+
span_finder: {
|
92 |
+
type: "bio",
|
93 |
+
bio_encoder: {
|
94 |
+
type: "lstm",
|
95 |
+
hidden_size: bio_dim,
|
96 |
+
num_layers: bio_layers,
|
97 |
+
bidirectional: true,
|
98 |
+
dropout: dropout,
|
99 |
+
},
|
100 |
+
no_label: false,
|
101 |
+
},
|
102 |
+
span_typing: {
|
103 |
+
type: 'mlp',
|
104 |
+
hidden_dims: span_typing_dims,
|
105 |
+
},
|
106 |
+
metrics: [{type: "srl"}],
|
107 |
+
|
108 |
+
typing_loss_factor: typing_loss_factor,
|
109 |
+
ontology_path: null,
|
110 |
+
label_dim: label_dim,
|
111 |
+
max_decoding_spans: 128,
|
112 |
+
max_recursion_depth: 2,
|
113 |
+
debug: debug,
|
114 |
+
},
|
115 |
+
|
116 |
+
trainer: {
|
117 |
+
num_epochs: 128,
|
118 |
+
patience: patience,
|
119 |
+
[if std.length(cuda_devices) == 1 then "cuda_device"]: cuda_devices[0],
|
120 |
+
validation_metric: "+em_f",
|
121 |
+
grad_norm: 10,
|
122 |
+
grad_clipping: 10,
|
123 |
+
num_gradient_accumulation_steps: grad_acc,
|
124 |
+
optimizer: {
|
125 |
+
type: "transformer",
|
126 |
+
base: {
|
127 |
+
type: "adam",
|
128 |
+
lr: 1e-3,
|
129 |
+
},
|
130 |
+
embeddings_lr: 0.0,
|
131 |
+
encoder_lr: 1e-5,
|
132 |
+
pooler_lr: 1e-5,
|
133 |
+
layer_fix: layer_fix,
|
134 |
+
}
|
135 |
+
},
|
136 |
+
|
137 |
+
cuda_devices:: cuda_devices,
|
138 |
+
[if std.length(cuda_devices) > 1 then "distributed"]: {
|
139 |
+
"cuda_devices": cuda_devices
|
140 |
+
},
|
141 |
+
[if std.length(cuda_devices) == 1 then "evaluate_on_test"]: true
|
142 |
+
}
|
config/fn-sonar/sonar-a1.vanilla_xlmr.jsonnet
ADDED
@@ -0,0 +1,141 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
local env = import "../env.jsonnet";
|
2 |
+
|
3 |
+
#local dataset_path = env.str("DATA_PATH", "data/framenet/full");
|
4 |
+
local dataset_path = "/home/p289731/cloned/lome/preproc/sonar_jsonl";
|
5 |
+
local ontology_path = "data/framenet/ontology.tsv";
|
6 |
+
|
7 |
+
local debug = false;
|
8 |
+
|
9 |
+
# reader
|
10 |
+
local pretrained_model = env.str("ENCODER", "xlm-roberta-large");
|
11 |
+
local smoothing_factor = env.json("SMOOTHING", "0.1");
|
12 |
+
|
13 |
+
# model
|
14 |
+
local label_dim = env.json("LABEL_DIM", "64");
|
15 |
+
local dropout = env.json("DROPOUT", "0.2");
|
16 |
+
local bio_dim = env.json("BIO_DIM", "512");
|
17 |
+
local bio_layers = env.json("BIO_LAYER", "2");
|
18 |
+
local span_typing_dims = env.json("TYPING_DIMS", "[256, 256]");
|
19 |
+
local typing_loss_factor = env.json("LOSS_FACTOR", "8.0");
|
20 |
+
|
21 |
+
# loader
|
22 |
+
local exemplar_ratio = env.json("EXEMPLAR_RATIO", "0.05");
|
23 |
+
local max_training_tokens = 512;
|
24 |
+
local max_inference_tokens = 1024;
|
25 |
+
|
26 |
+
# training
|
27 |
+
local layer_fix = env.json("LAYER_FIX", "0");
|
28 |
+
local grad_acc = env.json("GRAD_ACC", "1");
|
29 |
+
#local cuda_devices = env.json("CUDA_DEVICES", "[-1]");
|
30 |
+
local cuda_devices = [0];
|
31 |
+
local patience = 32;
|
32 |
+
|
33 |
+
{
|
34 |
+
dataset_reader: {
|
35 |
+
type: "semantic_role_labeling",
|
36 |
+
debug: debug,
|
37 |
+
pretrained_model: pretrained_model,
|
38 |
+
ignore_label: false,
|
39 |
+
[ if debug then "max_instances" ]: 128,
|
40 |
+
event_smoothing_factor: smoothing_factor,
|
41 |
+
arg_smoothing_factor: smoothing_factor,
|
42 |
+
},
|
43 |
+
train_data_path: dataset_path + "/dutch-sonar-train-A1.jsonl",
|
44 |
+
validation_data_path: dataset_path + "/dutch-sonar-dev-A1.jsonl",
|
45 |
+
test_data_path: dataset_path + "/dutch-sonar-test-A1.jsonl",
|
46 |
+
|
47 |
+
datasets_for_vocab_creation: ["train"],
|
48 |
+
|
49 |
+
data_loader: {
|
50 |
+
batch_sampler: {
|
51 |
+
type: "mix_sampler",
|
52 |
+
max_tokens: max_training_tokens,
|
53 |
+
sorting_keys: ['tokens'],
|
54 |
+
sampling_ratios: {
|
55 |
+
'exemplar': 1.0,
|
56 |
+
'full text': 0.0,
|
57 |
+
}
|
58 |
+
}
|
59 |
+
},
|
60 |
+
|
61 |
+
validation_data_loader: {
|
62 |
+
batch_sampler: {
|
63 |
+
type: "max_tokens_sampler",
|
64 |
+
max_tokens: max_inference_tokens,
|
65 |
+
sorting_keys: ['tokens']
|
66 |
+
}
|
67 |
+
},
|
68 |
+
|
69 |
+
model: {
|
70 |
+
type: "span",
|
71 |
+
word_embedding: {
|
72 |
+
token_embedders: {
|
73 |
+
"pieces": {
|
74 |
+
type: "pretrained_transformer",
|
75 |
+
model_name: pretrained_model,
|
76 |
+
}
|
77 |
+
},
|
78 |
+
},
|
79 |
+
span_extractor: {
|
80 |
+
type: 'combo',
|
81 |
+
sub_extractors: [
|
82 |
+
{
|
83 |
+
type: 'self_attentive',
|
84 |
+
},
|
85 |
+
{
|
86 |
+
type: 'bidirectional_endpoint',
|
87 |
+
}
|
88 |
+
]
|
89 |
+
},
|
90 |
+
span_finder: {
|
91 |
+
type: "bio",
|
92 |
+
bio_encoder: {
|
93 |
+
type: "lstm",
|
94 |
+
hidden_size: bio_dim,
|
95 |
+
num_layers: bio_layers,
|
96 |
+
bidirectional: true,
|
97 |
+
dropout: dropout,
|
98 |
+
},
|
99 |
+
no_label: false,
|
100 |
+
},
|
101 |
+
span_typing: {
|
102 |
+
type: 'mlp',
|
103 |
+
hidden_dims: span_typing_dims,
|
104 |
+
},
|
105 |
+
metrics: [{type: "srl"}],
|
106 |
+
|
107 |
+
typing_loss_factor: typing_loss_factor,
|
108 |
+
ontology_path: null,
|
109 |
+
label_dim: label_dim,
|
110 |
+
max_decoding_spans: 128,
|
111 |
+
max_recursion_depth: 2,
|
112 |
+
debug: debug,
|
113 |
+
},
|
114 |
+
|
115 |
+
trainer: {
|
116 |
+
num_epochs: 128,
|
117 |
+
patience: patience,
|
118 |
+
[if std.length(cuda_devices) == 1 then "cuda_device"]: cuda_devices[0],
|
119 |
+
validation_metric: "+em_f",
|
120 |
+
grad_norm: 10,
|
121 |
+
grad_clipping: 10,
|
122 |
+
num_gradient_accumulation_steps: grad_acc,
|
123 |
+
optimizer: {
|
124 |
+
type: "transformer",
|
125 |
+
base: {
|
126 |
+
type: "adam",
|
127 |
+
lr: 1e-3,
|
128 |
+
},
|
129 |
+
embeddings_lr: 0.0,
|
130 |
+
encoder_lr: 1e-5,
|
131 |
+
pooler_lr: 1e-5,
|
132 |
+
layer_fix: layer_fix,
|
133 |
+
}
|
134 |
+
},
|
135 |
+
|
136 |
+
cuda_devices:: cuda_devices,
|
137 |
+
[if std.length(cuda_devices) > 1 then "distributed"]: {
|
138 |
+
"cuda_devices": cuda_devices
|
139 |
+
},
|
140 |
+
[if std.length(cuda_devices) == 1 then "evaluate_on_test"]: true
|
141 |
+
}
|
config/fn-sonar/sonar-a2.framenet_xlmr.jsonnet
ADDED
@@ -0,0 +1,141 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
local env = import "../env.jsonnet";
|
2 |
+
|
3 |
+
#local dataset_path = env.str("DATA_PATH", "data/framenet/full");
|
4 |
+
local dataset_path = "/home/p289731/cloned/lome/preproc/sonar_jsonl";
|
5 |
+
local ontology_path = "data/framenet/ontology.tsv";
|
6 |
+
|
7 |
+
local debug = false;
|
8 |
+
|
9 |
+
# reader
|
10 |
+
local pretrained_model = "/data/p289731/cloned/lome-models/models/xlm-roberta-framenet/";
|
11 |
+
local smoothing_factor = env.json("SMOOTHING", "0.1");
|
12 |
+
|
13 |
+
# model
|
14 |
+
local label_dim = env.json("LABEL_DIM", "64");
|
15 |
+
local dropout = env.json("DROPOUT", "0.2");
|
16 |
+
local bio_dim = env.json("BIO_DIM", "512");
|
17 |
+
local bio_layers = env.json("BIO_LAYER", "2");
|
18 |
+
local span_typing_dims = env.json("TYPING_DIMS", "[256, 256]");
|
19 |
+
local typing_loss_factor = env.json("LOSS_FACTOR", "8.0");
|
20 |
+
|
21 |
+
# loader
|
22 |
+
local exemplar_ratio = env.json("EXEMPLAR_RATIO", "0.05");
|
23 |
+
local max_training_tokens = 512;
|
24 |
+
local max_inference_tokens = 1024;
|
25 |
+
|
26 |
+
# training
|
27 |
+
local layer_fix = env.json("LAYER_FIX", "0");
|
28 |
+
local grad_acc = env.json("GRAD_ACC", "1");
|
29 |
+
#local cuda_devices = env.json("CUDA_DEVICES", "[-1]");
|
30 |
+
local cuda_devices = [0];
|
31 |
+
local patience = 32;
|
32 |
+
|
33 |
+
{
|
34 |
+
dataset_reader: {
|
35 |
+
type: "semantic_role_labeling",
|
36 |
+
debug: debug,
|
37 |
+
pretrained_model: pretrained_model,
|
38 |
+
ignore_label: false,
|
39 |
+
[ if debug then "max_instances" ]: 128,
|
40 |
+
event_smoothing_factor: smoothing_factor,
|
41 |
+
arg_smoothing_factor: smoothing_factor,
|
42 |
+
},
|
43 |
+
train_data_path: dataset_path + "/dutch-sonar-train-A2.jsonl",
|
44 |
+
validation_data_path: dataset_path + "/dutch-sonar-dev-A2.jsonl",
|
45 |
+
test_data_path: dataset_path + "/dutch-sonar-test-A2.jsonl",
|
46 |
+
|
47 |
+
datasets_for_vocab_creation: ["train"],
|
48 |
+
|
49 |
+
data_loader: {
|
50 |
+
batch_sampler: {
|
51 |
+
type: "mix_sampler",
|
52 |
+
max_tokens: max_training_tokens,
|
53 |
+
sorting_keys: ['tokens'],
|
54 |
+
sampling_ratios: {
|
55 |
+
'exemplar': 1.0,
|
56 |
+
'full text': 0.0,
|
57 |
+
}
|
58 |
+
}
|
59 |
+
},
|
60 |
+
|
61 |
+
validation_data_loader: {
|
62 |
+
batch_sampler: {
|
63 |
+
type: "max_tokens_sampler",
|
64 |
+
max_tokens: max_inference_tokens,
|
65 |
+
sorting_keys: ['tokens']
|
66 |
+
}
|
67 |
+
},
|
68 |
+
|
69 |
+
model: {
|
70 |
+
type: "span",
|
71 |
+
word_embedding: {
|
72 |
+
token_embedders: {
|
73 |
+
"pieces": {
|
74 |
+
type: "pretrained_transformer",
|
75 |
+
model_name: pretrained_model,
|
76 |
+
}
|
77 |
+
},
|
78 |
+
},
|
79 |
+
span_extractor: {
|
80 |
+
type: 'combo',
|
81 |
+
sub_extractors: [
|
82 |
+
{
|
83 |
+
type: 'self_attentive',
|
84 |
+
},
|
85 |
+
{
|
86 |
+
type: 'bidirectional_endpoint',
|
87 |
+
}
|
88 |
+
]
|
89 |
+
},
|
90 |
+
span_finder: {
|
91 |
+
type: "bio",
|
92 |
+
bio_encoder: {
|
93 |
+
type: "lstm",
|
94 |
+
hidden_size: bio_dim,
|
95 |
+
num_layers: bio_layers,
|
96 |
+
bidirectional: true,
|
97 |
+
dropout: dropout,
|
98 |
+
},
|
99 |
+
no_label: false,
|
100 |
+
},
|
101 |
+
span_typing: {
|
102 |
+
type: 'mlp',
|
103 |
+
hidden_dims: span_typing_dims,
|
104 |
+
},
|
105 |
+
metrics: [{type: "srl"}],
|
106 |
+
|
107 |
+
typing_loss_factor: typing_loss_factor,
|
108 |
+
ontology_path: null,
|
109 |
+
label_dim: label_dim,
|
110 |
+
max_decoding_spans: 128,
|
111 |
+
max_recursion_depth: 2,
|
112 |
+
debug: debug,
|
113 |
+
},
|
114 |
+
|
115 |
+
trainer: {
|
116 |
+
num_epochs: 128,
|
117 |
+
patience: patience,
|
118 |
+
[if std.length(cuda_devices) == 1 then "cuda_device"]: cuda_devices[0],
|
119 |
+
validation_metric: "+em_f",
|
120 |
+
grad_norm: 10,
|
121 |
+
grad_clipping: 10,
|
122 |
+
num_gradient_accumulation_steps: grad_acc,
|
123 |
+
optimizer: {
|
124 |
+
type: "transformer",
|
125 |
+
base: {
|
126 |
+
type: "adam",
|
127 |
+
lr: 1e-3,
|
128 |
+
},
|
129 |
+
embeddings_lr: 0.0,
|
130 |
+
encoder_lr: 1e-5,
|
131 |
+
pooler_lr: 1e-5,
|
132 |
+
layer_fix: layer_fix,
|
133 |
+
}
|
134 |
+
},
|
135 |
+
|
136 |
+
cuda_devices:: cuda_devices,
|
137 |
+
[if std.length(cuda_devices) > 1 then "distributed"]: {
|
138 |
+
"cuda_devices": cuda_devices
|
139 |
+
},
|
140 |
+
[if std.length(cuda_devices) == 1 then "evaluate_on_test"]: true
|
141 |
+
}
|
config/fn-sonar/sonar-a2.sonar_plus_fn.vanilla_xlmr.jsonnet
ADDED
@@ -0,0 +1,141 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
local env = import "../env.jsonnet";
|
2 |
+
|
3 |
+
#local dataset_path = env.str("DATA_PATH", "data/framenet/full");
|
4 |
+
local dataset_path = "/home/p289731/cloned/lome/preproc/sonar_jsonl";
|
5 |
+
local ontology_path = "data/framenet/ontology.tsv";
|
6 |
+
|
7 |
+
local debug = false;
|
8 |
+
|
9 |
+
# reader
|
10 |
+
local pretrained_model = env.str("ENCODER", "xlm-roberta-large");
|
11 |
+
local smoothing_factor = env.json("SMOOTHING", "0.1");
|
12 |
+
|
13 |
+
# model
|
14 |
+
local label_dim = env.json("LABEL_DIM", "64");
|
15 |
+
local dropout = env.json("DROPOUT", "0.2");
|
16 |
+
local bio_dim = env.json("BIO_DIM", "512");
|
17 |
+
local bio_layers = env.json("BIO_LAYER", "2");
|
18 |
+
local span_typing_dims = env.json("TYPING_DIMS", "[256, 256]");
|
19 |
+
local typing_loss_factor = env.json("LOSS_FACTOR", "8.0");
|
20 |
+
|
21 |
+
# loader
|
22 |
+
local exemplar_ratio = env.json("EXEMPLAR_RATIO", "0.05");
|
23 |
+
local max_training_tokens = 512;
|
24 |
+
local max_inference_tokens = 1024;
|
25 |
+
|
26 |
+
# training
|
27 |
+
local layer_fix = env.json("LAYER_FIX", "0");
|
28 |
+
local grad_acc = env.json("GRAD_ACC", "1");
|
29 |
+
#local cuda_devices = env.json("CUDA_DEVICES", "[-1]");
|
30 |
+
local cuda_devices = [0];
|
31 |
+
local patience = 32;
|
32 |
+
|
33 |
+
{
|
34 |
+
dataset_reader: {
|
35 |
+
type: "semantic_role_labeling",
|
36 |
+
debug: debug,
|
37 |
+
pretrained_model: pretrained_model,
|
38 |
+
ignore_label: false,
|
39 |
+
[ if debug then "max_instances" ]: 128,
|
40 |
+
event_smoothing_factor: smoothing_factor,
|
41 |
+
arg_smoothing_factor: smoothing_factor,
|
42 |
+
},
|
43 |
+
train_data_path: dataset_path + "/dutch-sonar-train-A2.jsonl",
|
44 |
+
validation_data_path: dataset_path + "/dutch-sonar-dev-A2.jsonl",
|
45 |
+
test_data_path: dataset_path + "/dutch-sonar-test-A2.jsonl",
|
46 |
+
|
47 |
+
datasets_for_vocab_creation: ["train"],
|
48 |
+
|
49 |
+
data_loader: {
|
50 |
+
batch_sampler: {
|
51 |
+
type: "mix_sampler",
|
52 |
+
max_tokens: max_training_tokens,
|
53 |
+
sorting_keys: ['tokens'],
|
54 |
+
sampling_ratios: {
|
55 |
+
'exemplar': 1.0,
|
56 |
+
'full text': 0.0,
|
57 |
+
}
|
58 |
+
}
|
59 |
+
},
|
60 |
+
|
61 |
+
validation_data_loader: {
|
62 |
+
batch_sampler: {
|
63 |
+
type: "max_tokens_sampler",
|
64 |
+
max_tokens: max_inference_tokens,
|
65 |
+
sorting_keys: ['tokens']
|
66 |
+
}
|
67 |
+
},
|
68 |
+
|
69 |
+
model: {
|
70 |
+
type: "span",
|
71 |
+
word_embedding: {
|
72 |
+
token_embedders: {
|
73 |
+
"pieces": {
|
74 |
+
type: "pretrained_transformer",
|
75 |
+
model_name: pretrained_model,
|
76 |
+
}
|
77 |
+
},
|
78 |
+
},
|
79 |
+
span_extractor: {
|
80 |
+
type: 'combo',
|
81 |
+
sub_extractors: [
|
82 |
+
{
|
83 |
+
type: 'self_attentive',
|
84 |
+
},
|
85 |
+
{
|
86 |
+
type: 'bidirectional_endpoint',
|
87 |
+
}
|
88 |
+
]
|
89 |
+
},
|
90 |
+
span_finder: {
|
91 |
+
type: "bio",
|
92 |
+
bio_encoder: {
|
93 |
+
type: "lstm",
|
94 |
+
hidden_size: bio_dim,
|
95 |
+
num_layers: bio_layers,
|
96 |
+
bidirectional: true,
|
97 |
+
dropout: dropout,
|
98 |
+
},
|
99 |
+
no_label: false,
|
100 |
+
},
|
101 |
+
span_typing: {
|
102 |
+
type: 'mlp',
|
103 |
+
hidden_dims: span_typing_dims,
|
104 |
+
},
|
105 |
+
metrics: [{type: "srl"}],
|
106 |
+
|
107 |
+
typing_loss_factor: typing_loss_factor,
|
108 |
+
ontology_path: null,
|
109 |
+
label_dim: label_dim,
|
110 |
+
max_decoding_spans: 128,
|
111 |
+
max_recursion_depth: 2,
|
112 |
+
debug: debug,
|
113 |
+
},
|
114 |
+
|
115 |
+
trainer: {
|
116 |
+
num_epochs: 128,
|
117 |
+
patience: patience,
|
118 |
+
[if std.length(cuda_devices) == 1 then "cuda_device"]: cuda_devices[0],
|
119 |
+
validation_metric: "+em_f",
|
120 |
+
grad_norm: 10,
|
121 |
+
grad_clipping: 10,
|
122 |
+
num_gradient_accumulation_steps: grad_acc,
|
123 |
+
optimizer: {
|
124 |
+
type: "transformer",
|
125 |
+
base: {
|
126 |
+
type: "adam",
|
127 |
+
lr: 1e-3,
|
128 |
+
},
|
129 |
+
embeddings_lr: 0.0,
|
130 |
+
encoder_lr: 1e-5,
|
131 |
+
pooler_lr: 1e-5,
|
132 |
+
layer_fix: layer_fix,
|
133 |
+
}
|
134 |
+
},
|
135 |
+
|
136 |
+
cuda_devices:: cuda_devices,
|
137 |
+
[if std.length(cuda_devices) > 1 then "distributed"]: {
|
138 |
+
"cuda_devices": cuda_devices
|
139 |
+
},
|
140 |
+
[if std.length(cuda_devices) == 1 then "evaluate_on_test"]: true
|
141 |
+
}
|
config/fn-sonar/sonar-a2.vanilla_xlmr.jsonnet
ADDED
@@ -0,0 +1,141 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
local env = import "../env.jsonnet";
|
2 |
+
|
3 |
+
#local dataset_path = env.str("DATA_PATH", "data/framenet/full");
|
4 |
+
local dataset_path = "/home/p289731/cloned/lome/preproc/sonar_jsonl";
|
5 |
+
local ontology_path = "data/framenet/ontology.tsv";
|
6 |
+
|
7 |
+
local debug = false;
|
8 |
+
|
9 |
+
# reader
|
10 |
+
local pretrained_model = env.str("ENCODER", "xlm-roberta-large");
|
11 |
+
local smoothing_factor = env.json("SMOOTHING", "0.1");
|
12 |
+
|
13 |
+
# model
|
14 |
+
local label_dim = env.json("LABEL_DIM", "64");
|
15 |
+
local dropout = env.json("DROPOUT", "0.2");
|
16 |
+
local bio_dim = env.json("BIO_DIM", "512");
|
17 |
+
local bio_layers = env.json("BIO_LAYER", "2");
|
18 |
+
local span_typing_dims = env.json("TYPING_DIMS", "[256, 256]");
|
19 |
+
local typing_loss_factor = env.json("LOSS_FACTOR", "8.0");
|
20 |
+
|
21 |
+
# loader
|
22 |
+
local exemplar_ratio = env.json("EXEMPLAR_RATIO", "0.05");
|
23 |
+
local max_training_tokens = 512;
|
24 |
+
local max_inference_tokens = 1024;
|
25 |
+
|
26 |
+
# training
|
27 |
+
local layer_fix = env.json("LAYER_FIX", "0");
|
28 |
+
local grad_acc = env.json("GRAD_ACC", "1");
|
29 |
+
#local cuda_devices = env.json("CUDA_DEVICES", "[-1]");
|
30 |
+
local cuda_devices = [0];
|
31 |
+
local patience = 32;
|
32 |
+
|
33 |
+
{
|
34 |
+
dataset_reader: {
|
35 |
+
type: "semantic_role_labeling",
|
36 |
+
debug: debug,
|
37 |
+
pretrained_model: pretrained_model,
|
38 |
+
ignore_label: false,
|
39 |
+
[ if debug then "max_instances" ]: 128,
|
40 |
+
event_smoothing_factor: smoothing_factor,
|
41 |
+
arg_smoothing_factor: smoothing_factor,
|
42 |
+
},
|
43 |
+
train_data_path: dataset_path + "/dutch-sonar-train-A2.jsonl",
|
44 |
+
validation_data_path: dataset_path + "/dutch-sonar-dev-A2.jsonl",
|
45 |
+
test_data_path: dataset_path + "/dutch-sonar-test-A2.jsonl",
|
46 |
+
|
47 |
+
datasets_for_vocab_creation: ["train"],
|
48 |
+
|
49 |
+
data_loader: {
|
50 |
+
batch_sampler: {
|
51 |
+
type: "mix_sampler",
|
52 |
+
max_tokens: max_training_tokens,
|
53 |
+
sorting_keys: ['tokens'],
|
54 |
+
sampling_ratios: {
|
55 |
+
'exemplar': 1.0,
|
56 |
+
'full text': 0.0,
|
57 |
+
}
|
58 |
+
}
|
59 |
+
},
|
60 |
+
|
61 |
+
validation_data_loader: {
|
62 |
+
batch_sampler: {
|
63 |
+
type: "max_tokens_sampler",
|
64 |
+
max_tokens: max_inference_tokens,
|
65 |
+
sorting_keys: ['tokens']
|
66 |
+
}
|
67 |
+
},
|
68 |
+
|
69 |
+
model: {
|
70 |
+
type: "span",
|
71 |
+
word_embedding: {
|
72 |
+
token_embedders: {
|
73 |
+
"pieces": {
|
74 |
+
type: "pretrained_transformer",
|
75 |
+
model_name: pretrained_model,
|
76 |
+
}
|
77 |
+
},
|
78 |
+
},
|
79 |
+
span_extractor: {
|
80 |
+
type: 'combo',
|
81 |
+
sub_extractors: [
|
82 |
+
{
|
83 |
+
type: 'self_attentive',
|
84 |
+
},
|
85 |
+
{
|
86 |
+
type: 'bidirectional_endpoint',
|
87 |
+
}
|
88 |
+
]
|
89 |
+
},
|
90 |
+
span_finder: {
|
91 |
+
type: "bio",
|
92 |
+
bio_encoder: {
|
93 |
+
type: "lstm",
|
94 |
+
hidden_size: bio_dim,
|
95 |
+
num_layers: bio_layers,
|
96 |
+
bidirectional: true,
|
97 |
+
dropout: dropout,
|
98 |
+
},
|
99 |
+
no_label: false,
|
100 |
+
},
|
101 |
+
span_typing: {
|
102 |
+
type: 'mlp',
|
103 |
+
hidden_dims: span_typing_dims,
|
104 |
+
},
|
105 |
+
metrics: [{type: "srl"}],
|
106 |
+
|
107 |
+
typing_loss_factor: typing_loss_factor,
|
108 |
+
ontology_path: null,
|
109 |
+
label_dim: label_dim,
|
110 |
+
max_decoding_spans: 128,
|
111 |
+
max_recursion_depth: 2,
|
112 |
+
debug: debug,
|
113 |
+
},
|
114 |
+
|
115 |
+
trainer: {
|
116 |
+
num_epochs: 128,
|
117 |
+
patience: patience,
|
118 |
+
[if std.length(cuda_devices) == 1 then "cuda_device"]: cuda_devices[0],
|
119 |
+
validation_metric: "+em_f",
|
120 |
+
grad_norm: 10,
|
121 |
+
grad_clipping: 10,
|
122 |
+
num_gradient_accumulation_steps: grad_acc,
|
123 |
+
optimizer: {
|
124 |
+
type: "transformer",
|
125 |
+
base: {
|
126 |
+
type: "adam",
|
127 |
+
lr: 1e-3,
|
128 |
+
},
|
129 |
+
embeddings_lr: 0.0,
|
130 |
+
encoder_lr: 1e-5,
|
131 |
+
pooler_lr: 1e-5,
|
132 |
+
layer_fix: layer_fix,
|
133 |
+
}
|
134 |
+
},
|
135 |
+
|
136 |
+
cuda_devices:: cuda_devices,
|
137 |
+
[if std.length(cuda_devices) > 1 then "distributed"]: {
|
138 |
+
"cuda_devices": cuda_devices
|
139 |
+
},
|
140 |
+
[if std.length(cuda_devices) == 1 then "evaluate_on_test"]: true
|
141 |
+
}
|
config/fn/fn.orig.jsonnet
ADDED
@@ -0,0 +1,139 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
local env = import "../env.jsonnet";
|
2 |
+
|
3 |
+
local dataset_path = env.str("DATA_PATH", "data/framenet/full");
|
4 |
+
local ontology_path = "data/framenet/ontology.tsv";
|
5 |
+
|
6 |
+
local debug = false;
|
7 |
+
|
8 |
+
# reader
|
9 |
+
local pretrained_model = env.str("ENCODER", "xlm-roberta-large");
|
10 |
+
local smoothing_factor = env.json("SMOOTHING", "0.1");
|
11 |
+
|
12 |
+
# model
|
13 |
+
local label_dim = env.json("LABEL_DIM", "64");
|
14 |
+
local dropout = env.json("DROPOUT", "0.2");
|
15 |
+
local bio_dim = env.json("BIO_DIM", "512");
|
16 |
+
local bio_layers = env.json("BIO_LAYER", "2");
|
17 |
+
local span_typing_dims = env.json("TYPING_DIMS", "[256, 256]");
|
18 |
+
local typing_loss_factor = env.json("LOSS_FACTOR", "8.0");
|
19 |
+
|
20 |
+
# loader
|
21 |
+
local exemplar_ratio = env.json("EXEMPLAR_RATIO", "0.05");
|
22 |
+
local max_training_tokens = 512;
|
23 |
+
local max_inference_tokens = 1024;
|
24 |
+
|
25 |
+
# training
|
26 |
+
local layer_fix = env.json("LAYER_FIX", "0");
|
27 |
+
local grad_acc = env.json("GRAD_ACC", "1");
|
28 |
+
local cuda_devices = env.json("CUDA_DEVICES", "[-1]");
|
29 |
+
local patience = env.json("PATIENCE", "null");
|
30 |
+
|
31 |
+
{
|
32 |
+
dataset_reader: {
|
33 |
+
type: "semantic_role_labeling",
|
34 |
+
debug: debug,
|
35 |
+
pretrained_model: pretrained_model,
|
36 |
+
ignore_label: false,
|
37 |
+
[ if debug then "max_instances" ]: 128,
|
38 |
+
event_smoothing_factor: smoothing_factor,
|
39 |
+
arg_smoothing_factor: smoothing_factor,
|
40 |
+
},
|
41 |
+
train_data_path: dataset_path + "/train.jsonl",
|
42 |
+
validation_data_path: dataset_path + "/dev.jsonl",
|
43 |
+
test_data_path: dataset_path + "/test.jsonl",
|
44 |
+
|
45 |
+
datasets_for_vocab_creation: ["train"],
|
46 |
+
|
47 |
+
data_loader: {
|
48 |
+
batch_sampler: {
|
49 |
+
type: "mix_sampler",
|
50 |
+
max_tokens: max_training_tokens,
|
51 |
+
sorting_keys: ['tokens'],
|
52 |
+
sampling_ratios: {
|
53 |
+
'exemplar': exemplar_ratio,
|
54 |
+
'full text': 1.0,
|
55 |
+
}
|
56 |
+
}
|
57 |
+
},
|
58 |
+
|
59 |
+
validation_data_loader: {
|
60 |
+
batch_sampler: {
|
61 |
+
type: "max_tokens_sampler",
|
62 |
+
max_tokens: max_inference_tokens,
|
63 |
+
sorting_keys: ['tokens']
|
64 |
+
}
|
65 |
+
},
|
66 |
+
|
67 |
+
model: {
|
68 |
+
type: "span",
|
69 |
+
word_embedding: {
|
70 |
+
token_embedders: {
|
71 |
+
"pieces": {
|
72 |
+
type: "pretrained_transformer",
|
73 |
+
model_name: pretrained_model,
|
74 |
+
}
|
75 |
+
},
|
76 |
+
},
|
77 |
+
span_extractor: {
|
78 |
+
type: 'combo',
|
79 |
+
sub_extractors: [
|
80 |
+
{
|
81 |
+
type: 'self_attentive',
|
82 |
+
},
|
83 |
+
{
|
84 |
+
type: 'bidirectional_endpoint',
|
85 |
+
}
|
86 |
+
]
|
87 |
+
},
|
88 |
+
span_finder: {
|
89 |
+
type: "bio",
|
90 |
+
bio_encoder: {
|
91 |
+
type: "lstm",
|
92 |
+
hidden_size: bio_dim,
|
93 |
+
num_layers: bio_layers,
|
94 |
+
bidirectional: true,
|
95 |
+
dropout: dropout,
|
96 |
+
},
|
97 |
+
no_label: false,
|
98 |
+
},
|
99 |
+
span_typing: {
|
100 |
+
type: 'mlp',
|
101 |
+
hidden_dims: span_typing_dims,
|
102 |
+
},
|
103 |
+
metrics: [{type: "srl"}],
|
104 |
+
|
105 |
+
typing_loss_factor: typing_loss_factor,
|
106 |
+
ontology_path: ontology_path,
|
107 |
+
label_dim: label_dim,
|
108 |
+
max_decoding_spans: 128,
|
109 |
+
max_recursion_depth: 2,
|
110 |
+
debug: debug,
|
111 |
+
},
|
112 |
+
|
113 |
+
trainer: {
|
114 |
+
num_epochs: 128,
|
115 |
+
patience: patience,
|
116 |
+
[if std.length(cuda_devices) == 1 then "cuda_device"]: cuda_devices[0],
|
117 |
+
validation_metric: "+em_f",
|
118 |
+
grad_norm: 10,
|
119 |
+
grad_clipping: 10,
|
120 |
+
num_gradient_accumulation_steps: grad_acc,
|
121 |
+
optimizer: {
|
122 |
+
type: "transformer",
|
123 |
+
base: {
|
124 |
+
type: "adam",
|
125 |
+
lr: 1e-3,
|
126 |
+
},
|
127 |
+
embeddings_lr: 0.0,
|
128 |
+
encoder_lr: 1e-5,
|
129 |
+
pooler_lr: 1e-5,
|
130 |
+
layer_fix: layer_fix,
|
131 |
+
}
|
132 |
+
},
|
133 |
+
|
134 |
+
cuda_devices:: cuda_devices,
|
135 |
+
[if std.length(cuda_devices) > 1 then "distributed"]: {
|
136 |
+
"cuda_devices": cuda_devices
|
137 |
+
},
|
138 |
+
[if std.length(cuda_devices) == 1 then "evaluate_on_test"]: true
|
139 |
+
}
|
config/fn/fn.train-football.jsonnet
ADDED
@@ -0,0 +1,142 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
local env = import "../env.jsonnet";
|
2 |
+
|
3 |
+
#local dataset_path = env.str("DATA_PATH", "data/framenet/full");
|
4 |
+
local dataset_path = "/home/p289731/cloned/lome/preproc/framenet_jsonl/full";
|
5 |
+
local ontology_path = "data/framenet/ontology.tsv";
|
6 |
+
|
7 |
+
local debug = false;
|
8 |
+
|
9 |
+
# reader
|
10 |
+
#local pretrained_model = env.str("ENCODER", "xlm-roberta-large");
|
11 |
+
local pretrained_model = env.str("ENCODER", "/data/p289731/cloned/lome-models/models/xlm-roberta-football/");
|
12 |
+
local smoothing_factor = env.json("SMOOTHING", "0.1");
|
13 |
+
|
14 |
+
# model
|
15 |
+
local label_dim = env.json("LABEL_DIM", "64");
|
16 |
+
local dropout = env.json("DROPOUT", "0.2");
|
17 |
+
local bio_dim = env.json("BIO_DIM", "512");
|
18 |
+
local bio_layers = env.json("BIO_LAYER", "2");
|
19 |
+
local span_typing_dims = env.json("TYPING_DIMS", "[256, 256]");
|
20 |
+
local typing_loss_factor = env.json("LOSS_FACTOR", "8.0");
|
21 |
+
|
22 |
+
# loader
|
23 |
+
local exemplar_ratio = env.json("EXEMPLAR_RATIO", "0.05");
|
24 |
+
local max_training_tokens = 512;
|
25 |
+
local max_inference_tokens = 1024;
|
26 |
+
|
27 |
+
# training
|
28 |
+
local layer_fix = env.json("LAYER_FIX", "0");
|
29 |
+
local grad_acc = env.json("GRAD_ACC", "1");
|
30 |
+
#local cuda_devices = env.json("CUDA_DEVICES", "[-1]");
|
31 |
+
local cuda_devices = [0];
|
32 |
+
local patience = 32;
|
33 |
+
|
34 |
+
{
|
35 |
+
dataset_reader: {
|
36 |
+
type: "semantic_role_labeling",
|
37 |
+
debug: debug,
|
38 |
+
pretrained_model: "xlm-roberta-large",
|
39 |
+
ignore_label: false,
|
40 |
+
[ if debug then "max_instances" ]: 128,
|
41 |
+
event_smoothing_factor: smoothing_factor,
|
42 |
+
arg_smoothing_factor: smoothing_factor,
|
43 |
+
},
|
44 |
+
train_data_path: dataset_path + "/train.jsonl",
|
45 |
+
validation_data_path: dataset_path + "/dev.jsonl",
|
46 |
+
test_data_path: dataset_path + "/test.jsonl",
|
47 |
+
|
48 |
+
datasets_for_vocab_creation: ["train"],
|
49 |
+
|
50 |
+
data_loader: {
|
51 |
+
batch_sampler: {
|
52 |
+
type: "mix_sampler",
|
53 |
+
max_tokens: max_training_tokens,
|
54 |
+
sorting_keys: ['tokens'],
|
55 |
+
sampling_ratios: {
|
56 |
+
'exemplar': exemplar_ratio,
|
57 |
+
'full text': 1.0,
|
58 |
+
}
|
59 |
+
}
|
60 |
+
},
|
61 |
+
|
62 |
+
validation_data_loader: {
|
63 |
+
batch_sampler: {
|
64 |
+
type: "max_tokens_sampler",
|
65 |
+
max_tokens: max_inference_tokens,
|
66 |
+
sorting_keys: ['tokens']
|
67 |
+
}
|
68 |
+
},
|
69 |
+
|
70 |
+
model: {
|
71 |
+
type: "span",
|
72 |
+
word_embedding: {
|
73 |
+
token_embedders: {
|
74 |
+
"pieces": {
|
75 |
+
type: "pretrained_transformer",
|
76 |
+
model_name: pretrained_model,
|
77 |
+
}
|
78 |
+
},
|
79 |
+
},
|
80 |
+
span_extractor: {
|
81 |
+
type: 'combo',
|
82 |
+
sub_extractors: [
|
83 |
+
{
|
84 |
+
type: 'self_attentive',
|
85 |
+
},
|
86 |
+
{
|
87 |
+
type: 'bidirectional_endpoint',
|
88 |
+
}
|
89 |
+
]
|
90 |
+
},
|
91 |
+
span_finder: {
|
92 |
+
type: "bio",
|
93 |
+
bio_encoder: {
|
94 |
+
type: "lstm",
|
95 |
+
hidden_size: bio_dim,
|
96 |
+
num_layers: bio_layers,
|
97 |
+
bidirectional: true,
|
98 |
+
dropout: dropout,
|
99 |
+
},
|
100 |
+
no_label: false,
|
101 |
+
},
|
102 |
+
span_typing: {
|
103 |
+
type: 'mlp',
|
104 |
+
hidden_dims: span_typing_dims,
|
105 |
+
},
|
106 |
+
metrics: [{type: "srl"}],
|
107 |
+
|
108 |
+
typing_loss_factor: typing_loss_factor,
|
109 |
+
ontology_path: null,
|
110 |
+
label_dim: label_dim,
|
111 |
+
max_decoding_spans: 128,
|
112 |
+
max_recursion_depth: 2,
|
113 |
+
debug: debug,
|
114 |
+
},
|
115 |
+
|
116 |
+
trainer: {
|
117 |
+
num_epochs: 128,
|
118 |
+
patience: patience,
|
119 |
+
[if std.length(cuda_devices) == 1 then "cuda_device"]: cuda_devices[0],
|
120 |
+
validation_metric: "+em_f",
|
121 |
+
grad_norm: 10,
|
122 |
+
grad_clipping: 10,
|
123 |
+
num_gradient_accumulation_steps: grad_acc,
|
124 |
+
optimizer: {
|
125 |
+
type: "transformer",
|
126 |
+
base: {
|
127 |
+
type: "adam",
|
128 |
+
lr: 1e-3,
|
129 |
+
},
|
130 |
+
embeddings_lr: 0.0,
|
131 |
+
encoder_lr: 1e-5,
|
132 |
+
pooler_lr: 1e-5,
|
133 |
+
layer_fix: layer_fix,
|
134 |
+
}
|
135 |
+
},
|
136 |
+
|
137 |
+
cuda_devices:: cuda_devices,
|
138 |
+
[if std.length(cuda_devices) > 1 then "distributed"]: {
|
139 |
+
"cuda_devices": cuda_devices
|
140 |
+
},
|
141 |
+
[if std.length(cuda_devices) == 1 then "evaluate_on_test"]: true
|
142 |
+
}
|
config/fn/fn.train3.jsonnet
ADDED
@@ -0,0 +1,141 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
local env = import "../env.jsonnet";
|
2 |
+
|
3 |
+
#local dataset_path = env.str("DATA_PATH", "data/framenet/full");
|
4 |
+
local dataset_path = "/home/p289731/cloned/lome/preproc/framenet_jsonl/full";
|
5 |
+
local ontology_path = "data/framenet/ontology.tsv";
|
6 |
+
|
7 |
+
local debug = false;
|
8 |
+
|
9 |
+
# reader
|
10 |
+
local pretrained_model = env.str("ENCODER", "xlm-roberta-large");
|
11 |
+
local smoothing_factor = env.json("SMOOTHING", "0.1");
|
12 |
+
|
13 |
+
# model
|
14 |
+
local label_dim = env.json("LABEL_DIM", "64");
|
15 |
+
local dropout = env.json("DROPOUT", "0.2");
|
16 |
+
local bio_dim = env.json("BIO_DIM", "512");
|
17 |
+
local bio_layers = env.json("BIO_LAYER", "2");
|
18 |
+
local span_typing_dims = env.json("TYPING_DIMS", "[256, 256]");
|
19 |
+
local typing_loss_factor = env.json("LOSS_FACTOR", "8.0");
|
20 |
+
|
21 |
+
# loader
|
22 |
+
local exemplar_ratio = env.json("EXEMPLAR_RATIO", "0.05");
|
23 |
+
local max_training_tokens = 512;
|
24 |
+
local max_inference_tokens = 1024;
|
25 |
+
|
26 |
+
# training
|
27 |
+
local layer_fix = env.json("LAYER_FIX", "0");
|
28 |
+
local grad_acc = env.json("GRAD_ACC", "1");
|
29 |
+
#local cuda_devices = env.json("CUDA_DEVICES", "[-1]");
|
30 |
+
local cuda_devices = [0];
|
31 |
+
local patience = 32;
|
32 |
+
|
33 |
+
{
|
34 |
+
dataset_reader: {
|
35 |
+
type: "semantic_role_labeling",
|
36 |
+
debug: debug,
|
37 |
+
pretrained_model: pretrained_model,
|
38 |
+
ignore_label: false,
|
39 |
+
[ if debug then "max_instances" ]: 128,
|
40 |
+
event_smoothing_factor: smoothing_factor,
|
41 |
+
arg_smoothing_factor: smoothing_factor,
|
42 |
+
},
|
43 |
+
train_data_path: dataset_path + "/train.jsonl",
|
44 |
+
validation_data_path: dataset_path + "/dev.jsonl",
|
45 |
+
test_data_path: dataset_path + "/test.jsonl",
|
46 |
+
|
47 |
+
datasets_for_vocab_creation: ["train"],
|
48 |
+
|
49 |
+
data_loader: {
|
50 |
+
batch_sampler: {
|
51 |
+
type: "mix_sampler",
|
52 |
+
max_tokens: max_training_tokens,
|
53 |
+
sorting_keys: ['tokens'],
|
54 |
+
sampling_ratios: {
|
55 |
+
'exemplar': exemplar_ratio,
|
56 |
+
'full text': 1.0,
|
57 |
+
}
|
58 |
+
}
|
59 |
+
},
|
60 |
+
|
61 |
+
validation_data_loader: {
|
62 |
+
batch_sampler: {
|
63 |
+
type: "max_tokens_sampler",
|
64 |
+
max_tokens: max_inference_tokens,
|
65 |
+
sorting_keys: ['tokens']
|
66 |
+
}
|
67 |
+
},
|
68 |
+
|
69 |
+
model: {
|
70 |
+
type: "span",
|
71 |
+
word_embedding: {
|
72 |
+
token_embedders: {
|
73 |
+
"pieces": {
|
74 |
+
type: "pretrained_transformer",
|
75 |
+
model_name: pretrained_model,
|
76 |
+
}
|
77 |
+
},
|
78 |
+
},
|
79 |
+
span_extractor: {
|
80 |
+
type: 'combo',
|
81 |
+
sub_extractors: [
|
82 |
+
{
|
83 |
+
type: 'self_attentive',
|
84 |
+
},
|
85 |
+
{
|
86 |
+
type: 'bidirectional_endpoint',
|
87 |
+
}
|
88 |
+
]
|
89 |
+
},
|
90 |
+
span_finder: {
|
91 |
+
type: "bio",
|
92 |
+
bio_encoder: {
|
93 |
+
type: "lstm",
|
94 |
+
hidden_size: bio_dim,
|
95 |
+
num_layers: bio_layers,
|
96 |
+
bidirectional: true,
|
97 |
+
dropout: dropout,
|
98 |
+
},
|
99 |
+
no_label: false,
|
100 |
+
},
|
101 |
+
span_typing: {
|
102 |
+
type: 'mlp',
|
103 |
+
hidden_dims: span_typing_dims,
|
104 |
+
},
|
105 |
+
metrics: [{type: "srl"}],
|
106 |
+
|
107 |
+
typing_loss_factor: typing_loss_factor,
|
108 |
+
ontology_path: null,
|
109 |
+
label_dim: label_dim,
|
110 |
+
max_decoding_spans: 128,
|
111 |
+
max_recursion_depth: 2,
|
112 |
+
debug: debug,
|
113 |
+
},
|
114 |
+
|
115 |
+
trainer: {
|
116 |
+
num_epochs: 128,
|
117 |
+
patience: patience,
|
118 |
+
[if std.length(cuda_devices) == 1 then "cuda_device"]: cuda_devices[0],
|
119 |
+
validation_metric: "+em_f",
|
120 |
+
grad_norm: 10,
|
121 |
+
grad_clipping: 10,
|
122 |
+
num_gradient_accumulation_steps: grad_acc,
|
123 |
+
optimizer: {
|
124 |
+
type: "transformer",
|
125 |
+
base: {
|
126 |
+
type: "adam",
|
127 |
+
lr: 1e-3,
|
128 |
+
},
|
129 |
+
embeddings_lr: 0.0,
|
130 |
+
encoder_lr: 1e-5,
|
131 |
+
pooler_lr: 1e-5,
|
132 |
+
layer_fix: layer_fix,
|
133 |
+
}
|
134 |
+
},
|
135 |
+
|
136 |
+
cuda_devices:: cuda_devices,
|
137 |
+
[if std.length(cuda_devices) > 1 then "distributed"]: {
|
138 |
+
"cuda_devices": cuda_devices
|
139 |
+
},
|
140 |
+
[if std.length(cuda_devices) == 1 then "evaluate_on_test"]: true
|
141 |
+
}
|
docs/data.md
ADDED
@@ -0,0 +1,68 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Data Format
|
2 |
+
|
3 |
+
You can pass SpanFinder any formats of data, as long as you implement a dataset reader inherited from SpanReader. We also provide a Concrete dataset reader. Besides them, SpanFinder comes with its own JSON data format, which enables richer features for training and modeling.
|
4 |
+
|
5 |
+
The minimal example of the JSON is
|
6 |
+
|
7 |
+
```JSON
|
8 |
+
{
|
9 |
+
"meta": {
|
10 |
+
"fully_annotated": true
|
11 |
+
},
|
12 |
+
"tokens": ["Bob", "attacks", "the", "building", "."],
|
13 |
+
"annotations": [
|
14 |
+
{
|
15 |
+
"span": [1, 1],
|
16 |
+
"label": "Attack",
|
17 |
+
"children": [
|
18 |
+
{
|
19 |
+
"span": [0, 0],
|
20 |
+
"label": "Assailant",
|
21 |
+
"children": []
|
22 |
+
},
|
23 |
+
{
|
24 |
+
"span": [2, 3],
|
25 |
+
"label": "Victim",
|
26 |
+
"children": []
|
27 |
+
}
|
28 |
+
]
|
29 |
+
},
|
30 |
+
{
|
31 |
+
"span": [3, 3],
|
32 |
+
"label": "Buildings",
|
33 |
+
"children": [
|
34 |
+
{
|
35 |
+
"span": [3, 3],
|
36 |
+
"label": "Building",
|
37 |
+
"children": []
|
38 |
+
}
|
39 |
+
]
|
40 |
+
}
|
41 |
+
]
|
42 |
+
}
|
43 |
+
```
|
44 |
+
|
45 |
+
You can have nested spans with unlimited depth.
|
46 |
+
|
47 |
+
## Meta-info for Semantic Role Labeling (SRL)
|
48 |
+
|
49 |
+
```JSON
|
50 |
+
{
|
51 |
+
"ontology": {
|
52 |
+
"event": ["Violence-Attack"],
|
53 |
+
"argument": ["Agent", "Patient"],
|
54 |
+
"link": [[0, 0], [0, 1]]
|
55 |
+
},
|
56 |
+
"ontology_mapping": {
|
57 |
+
"event": {
|
58 |
+
"Attack": ["Violence-Attack", 0.8]
|
59 |
+
},
|
60 |
+
"argument": {
|
61 |
+
"Assault": ["Agent", 0.95],
|
62 |
+
"Victim": ["patient", 0.9]
|
63 |
+
}
|
64 |
+
}
|
65 |
+
}
|
66 |
+
```
|
67 |
+
|
68 |
+
TODO: Guanghui needs to doc this.
|
docs/mapping.md
ADDED
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
## Mapping
|
2 |
+
|
3 |
+
If a file is passed to the predictor,
|
4 |
+
the predicted spans will be converted into a new ontology.
|
5 |
+
The file format should be
|
6 |
+
|
7 |
+
`<original parent label>\t<original label>\t<new label>`
|
8 |
+
|
9 |
+
If the predicted span is labeled as `<original label>`,
|
10 |
+
and its parent is labeled as `<orignal parent label>`,
|
11 |
+
it will be re-labeled as `<new label>`.
|
12 |
+
If no rules match, the span and all of its descendents will be ignored.
|
13 |
+
|
14 |
+
The `<original parent label>` is optional.
|
15 |
+
If the parent label is `@@VIRTUAL_ROOT@@`, then this rule matches the first layer of spans.
|
16 |
+
In semantic parsing, it matches events.
|
17 |
+
If the parent label is `*`, it means it can match anything.
|
docs/training.md
ADDED
@@ -0,0 +1,65 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Training Span Finder
|
2 |
+
|
3 |
+
## Metrics explanation
|
4 |
+
|
5 |
+
By default, the following metrics will be used
|
6 |
+
|
7 |
+
- em: (includes emp, emr, emf) Exact matching metric. A span is exactly matched iff its parent, boundaries, and label are all correctly predicted. Note that if a parent is not correctly predicted, all its children will be treated as false negative. In another word, errors are propagated.
|
8 |
+
- sm: (includes smp, smr, smf) Span matching metric. Similar to EM but will not check the labels. If you observe high EM but low SM, then the typing system is not properly working.
|
9 |
+
- finder: (includes finder-p, finder-r, finder-f) A metric to measure how well the model can find spans. Different from SM, in this metric, gold parent will be provided, so the errors will not be propagated.
|
10 |
+
- typing_acc: Span typing accuracy with gold parent and gold span boundaries.
|
11 |
+
|
12 |
+
|
13 |
+
Optional metrics that might be useful for SRL-style tasks. Put the following line
|
14 |
+
|
15 |
+
`metrics: [{type: "srl", check_type: true}],`
|
16 |
+
|
17 |
+
to the span model in the config file to turn on this feature. You will see the following two metrics:
|
18 |
+
|
19 |
+
- trigger: (include trigger-p, trigger-r, trigger-f) It measures how well the system can find the event triggers (or frames in FrameNet). If `check_type` is True, it also checks the event label.
|
20 |
+
- role: (include role-p, role-r, role-f) It measures how well the system can find roles. Note if the event/trigger is not found, all its children will be treated as false negative. If `check_type` is True, it also checks the role label.
|
21 |
+
|
22 |
+
## Ontology Constraint
|
23 |
+
|
24 |
+
In some cases, certain spans can also be attached to specific spans.
|
25 |
+
E.g., in SRL tasks, event can only be attached to the VirtualRoot, and arguments can only be attached to the events.
|
26 |
+
The constraints of FrameNet is harsher, where each frame have some specific frame elements.
|
27 |
+
|
28 |
+
These constraints can be abstracted as a boolean square matrix whose columns and rows are span labels including VIRTUAL_ROOT.
|
29 |
+
Say it's `M`, label2 can be label1's child iff `M[label1, label2]` if True.
|
30 |
+
|
31 |
+
You can specify ontology constraint for SpanFinder with the `ontology_path` argument in the SpanModel class.
|
32 |
+
The format of this file is simple. Each line is one row of the `M` matrix:
|
33 |
+
|
34 |
+
```parent_label child_label_1 child_label_2```
|
35 |
+
|
36 |
+
which means child1 and child2 can be attached to the parent.
|
37 |
+
Both `parent_label` and `child_label` are strings, and the space between them should be `\t` not ` `.
|
38 |
+
If a parent_label is missing from the file, by default all children be attachable.
|
39 |
+
If this file is not provided, all labels can be attached to all labels.
|
40 |
+
|
41 |
+
An example of this file can be found at CLSP grid:
|
42 |
+
|
43 |
+
```/home/gqin2/data/framenet/ontology.tsv```
|
44 |
+
|
45 |
+
## Typing loss factor
|
46 |
+
|
47 |
+
(This section might be updated soon -- Guanghui)
|
48 |
+
|
49 |
+
The loss comes from two sources: SpanFinding and SpanTyping modules.
|
50 |
+
SpanFinder uses CRF and use probability as loss, but SpanTyping uses cross entropy.
|
51 |
+
They're of different scale so we have to re-scale them.
|
52 |
+
The formula is:
|
53 |
+
|
54 |
+
`loss = finding_loss + typing_loss_factor * typing_loss`
|
55 |
+
|
56 |
+
Empirically Guanghui finds the optimal `typing_loss_factor` for FrameNet system is 750.
|
57 |
+
|
58 |
+
In theory, we should put the two losses to the same space. Guanghui is looking into this, and this might be solved in SpanFinder 0.0.2.
|
59 |
+
|
60 |
+
## Optimizer
|
61 |
+
|
62 |
+
A custom optimizer `transformer` is used for span finder.
|
63 |
+
It allows you to specify special learning rate for transformer encoder and fix the parameters of certain modules.
|
64 |
+
Empirically, fix embedding (so only fine-tune the encoder and pooler) and train with lr=1e-5 yields best results for FrameNet.
|
65 |
+
For usage and more details, see its class doc.
|
evalita_scores.txt
ADDED
File without changes
|
model.mod.tar.gz
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3f5be5aeef50b2f4840317b8196c51186f9f138a853dc1eb2da980b1947ceb23
|
3 |
+
size 1795605184
|
requirements.txt
ADDED
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
allennlp>=2.0.0
|
2 |
+
allennlp-models>=2.0.0
|
3 |
+
transformers>=4.0.0 # Why is huggingface so unstable?
|
4 |
+
numpy
|
5 |
+
torch>=1.7.0,<1.8.0
|
6 |
+
tqdm
|
7 |
+
nltk
|
8 |
+
overrides
|
9 |
+
concrete
|
10 |
+
flask
|
11 |
+
scipy
|
12 |
+
https://github.com/explosion/spacy-models/releases/download/it_core_news_md-3.0.0/it_core_news_md-3.0.0-py3-none-any.whl
|
13 |
+
https://github.com/explosion/spacy-models/releases/download/en_core_web_md-3.0.0/en_core_web_md-3.0.0-py3-none-any.whl
|
14 |
+
https://github.com/explosion/spacy-models/releases/download/nl_core_news_md-3.0.0/nl_core_news_md-3.0.0-py3-none-any.whl
|
15 |
+
https://github.com/explosion/spacy-models/releases/download/xx_sent_ud_sm-3.0.0/xx_sent_ud_sm-3.0.0-py3-none-any.whl
|
scripts/__pycache__/predict_concrete.cpython-37.pyc
ADDED
Binary file (1.35 kB). View file
|
|
scripts/__pycache__/predict_concrete.cpython-38.pyc
ADDED
Binary file (1.35 kB). View file
|
|
scripts/__pycache__/predict_concrete.cpython-39.pyc
ADDED
Binary file (1.35 kB). View file
|
|
scripts/__pycache__/predict_force.cpython-39.pyc
ADDED
Binary file (1.15 kB). View file
|
|
scripts/__pycache__/repl.cpython-39.pyc
ADDED
Binary file (440 Bytes). View file
|
|
scripts/aida_experiment/predict_aida.py
ADDED
@@ -0,0 +1,42 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import json
|
2 |
+
import os
|
3 |
+
import copy
|
4 |
+
from collections import defaultdict
|
5 |
+
from argparse import ArgumentParser
|
6 |
+
from tqdm import tqdm
|
7 |
+
import random
|
8 |
+
from tqdm import tqdm
|
9 |
+
from scripts.predict_concrete import read_kairos
|
10 |
+
|
11 |
+
from sftp import SpanPredictor
|
12 |
+
|
13 |
+
|
14 |
+
parser = ArgumentParser()
|
15 |
+
parser.add_argument('aida', type=str)
|
16 |
+
parser.add_argument('model', type=str)
|
17 |
+
parser.add_argument('dst', type=str)
|
18 |
+
parser.add_argument('--topk', type=int, default=10)
|
19 |
+
parser.add_argument('--device', type=int, default=0)
|
20 |
+
args = parser.parse_args()
|
21 |
+
|
22 |
+
k = args.topk
|
23 |
+
corpus = json.load(open(args.aida))
|
24 |
+
predictor = SpanPredictor.from_path(args.model, cuda_device=args.device)
|
25 |
+
idx2fn = predictor._model.vocab.get_index_to_token_vocabulary('span_label')
|
26 |
+
random.seed(42)
|
27 |
+
random.shuffle(corpus)
|
28 |
+
|
29 |
+
|
30 |
+
output_fp = open(args.dst, 'a')
|
31 |
+
for line in tqdm(corpus):
|
32 |
+
tokens, ann = line['tokens'], line['annotation']
|
33 |
+
start, end, kairos_label = ann['start_idx'], ann['end_idx'], ann['label']
|
34 |
+
prob_dist = predictor.force_decode(tokens, [(start, end)])[0]
|
35 |
+
topk_indices = prob_dist.argsort(descending=True)[:k]
|
36 |
+
prob = prob_dist[topk_indices].tolist()
|
37 |
+
frames = [(idx2fn[int(idx)], p) for idx, p in zip(topk_indices, prob)]
|
38 |
+
output_fp.write(json.dumps({
|
39 |
+
'tokens': tokens,
|
40 |
+
'frames': frames,
|
41 |
+
'kairos': kairos_label
|
42 |
+
}) + '\n')
|
scripts/aida_experiment/read_aida.py
ADDED
@@ -0,0 +1,107 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import json
|
2 |
+
import os
|
3 |
+
import copy
|
4 |
+
from collections import defaultdict
|
5 |
+
from argparse import ArgumentParser
|
6 |
+
from tqdm import tqdm
|
7 |
+
|
8 |
+
|
9 |
+
def extract_sentences(raw_doc):
|
10 |
+
sentence_tokens = list() # [(start, end), list_tokens, event_list]
|
11 |
+
for sent_boundary in raw_doc['_views']['_InitialView']['Sentence']:
|
12 |
+
start, end = sent_boundary.get('begin', 0), sent_boundary.get('end')
|
13 |
+
sentence_tokens.append([(start, end), list(), list()])
|
14 |
+
begin2sentence, end2sentence = dict(), dict()
|
15 |
+
for token in raw_doc['_views']['_InitialView']['Token']:
|
16 |
+
start, end = token.get('begin', 0), token.get('end')
|
17 |
+
added = False
|
18 |
+
for sent_idx, (bound, tl, _) in enumerate(sentence_tokens):
|
19 |
+
if start in range(*bound) and (end - 1) in range(*bound):
|
20 |
+
assert not added
|
21 |
+
begin2sentence[start] = (sent_idx, len(tl))
|
22 |
+
end2sentence[end] = (sent_idx, len(tl))
|
23 |
+
tl.append((start, end))
|
24 |
+
added = True
|
25 |
+
assert added
|
26 |
+
return sentence_tokens, begin2sentence, end2sentence
|
27 |
+
|
28 |
+
|
29 |
+
def read_aida2kairos(mapping_path):
|
30 |
+
mapping = dict()
|
31 |
+
for line in open(mapping_path).readlines():
|
32 |
+
kairos, aida_list = line.replace('\n', '').replace(',', '').split('\t')
|
33 |
+
for aida in aida_list.split():
|
34 |
+
if aida in 'x?':
|
35 |
+
continue
|
36 |
+
if aida in mapping:
|
37 |
+
print('warning:', aida, 'already in the mapping, repeated.')
|
38 |
+
mapping[aida] = kairos
|
39 |
+
return mapping
|
40 |
+
|
41 |
+
|
42 |
+
def read_aida(corpus_path, mapping_path):
|
43 |
+
print('reading aida data')
|
44 |
+
n_negative, n_span_mismatch, n_diff = 0, 0, 0
|
45 |
+
outputs = list()
|
46 |
+
mapping = read_aida2kairos(mapping_path)
|
47 |
+
for event_fn in tqdm(os.listdir(corpus_path)):
|
48 |
+
event_name = event_fn.split('-')[0]
|
49 |
+
if event_name not in mapping:
|
50 |
+
print('warning:', event_name, 'not in the mapping.')
|
51 |
+
continue
|
52 |
+
event_name = mapping[event_name]
|
53 |
+
|
54 |
+
for doc_name in os.listdir(os.path.join(corpus_path, event_fn)):
|
55 |
+
if not doc_name.endswith('json'):
|
56 |
+
continue
|
57 |
+
raw_doc = json.load(open(os.path.join(corpus_path, event_fn, doc_name)))
|
58 |
+
sentences, begin2sentence, end2sentence = extract_sentences(raw_doc)
|
59 |
+
for fss_no, fss in raw_doc['_referenced_fss'].items():
|
60 |
+
if fss_no == '1':
|
61 |
+
continue
|
62 |
+
begin, end, is_negative = fss['begin'], fss['end'], fss['negative_example']
|
63 |
+
if is_negative:
|
64 |
+
n_negative += 1
|
65 |
+
continue
|
66 |
+
if begin not in begin2sentence or end not in end2sentence:
|
67 |
+
n_span_mismatch += 1
|
68 |
+
continue
|
69 |
+
(b_idx_sent, b_idx_token), (e_idx_sent, e_idx_token) = begin2sentence[begin], end2sentence[end]
|
70 |
+
if b_idx_sent != e_idx_sent:
|
71 |
+
n_diff += 1
|
72 |
+
continue
|
73 |
+
sentences[b_idx_sent][2].append([b_idx_token, e_idx_token])
|
74 |
+
|
75 |
+
text = raw_doc['_referenced_fss']['1']['sofaString']
|
76 |
+
|
77 |
+
for _, tokens, events in sentences:
|
78 |
+
tokens = [text[start:end] for start, end in tokens]
|
79 |
+
for (start, end) in events:
|
80 |
+
outputs.append({
|
81 |
+
'tokens': copy.deepcopy(tokens),
|
82 |
+
'annotation': {
|
83 |
+
'start_idx': start,
|
84 |
+
'end_idx': end,
|
85 |
+
'label': event_name,
|
86 |
+
}
|
87 |
+
})
|
88 |
+
|
89 |
+
print(f'Loaded {len(outputs)} annotations.')
|
90 |
+
print(f'{n_negative} negative annotations are ignored.')
|
91 |
+
print(f'{n_span_mismatch} mismatched annotations are ignored.')
|
92 |
+
print(f'{n_diff} annotations across sentences are ignored.')
|
93 |
+
|
94 |
+
return outputs
|
95 |
+
|
96 |
+
|
97 |
+
if __name__ == '__main__':
|
98 |
+
parser = ArgumentParser()
|
99 |
+
parser.add_argument('aida', type=str)
|
100 |
+
parser.add_argument('aida2kairos', type=str)
|
101 |
+
parser.add_argument('dst', type=str)
|
102 |
+
args = parser.parse_args()
|
103 |
+
|
104 |
+
aida = read_aida(args.aida, args.aida2kairos)
|
105 |
+
|
106 |
+
json.dump(aida, open(args.dst, 'w'))
|
107 |
+
|
scripts/aida_experiment/test_mapping.py
ADDED
@@ -0,0 +1,59 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import json
|
2 |
+
import os
|
3 |
+
import copy
|
4 |
+
from collections import defaultdict
|
5 |
+
from argparse import ArgumentParser
|
6 |
+
from tqdm import tqdm
|
7 |
+
import random
|
8 |
+
from tqdm import tqdm
|
9 |
+
from scripts.predict_concrete import read_kairos
|
10 |
+
|
11 |
+
from sftp import SpanPredictor
|
12 |
+
|
13 |
+
|
14 |
+
parser = ArgumentParser()
|
15 |
+
parser.add_argument('aida', type=str)
|
16 |
+
parser.add_argument('model', type=str)
|
17 |
+
parser.add_argument('fn2kairos', type=str, default=None)
|
18 |
+
parser.add_argument('--device', type=int, default=3)
|
19 |
+
args = parser.parse_args()
|
20 |
+
|
21 |
+
corpus = json.load(open(args.aida))
|
22 |
+
mapping = read_kairos(args.fn2kairos)
|
23 |
+
predictor = SpanPredictor.from_path(args.model, cuda_device=args.device)
|
24 |
+
random.seed(42)
|
25 |
+
random.shuffle(corpus)
|
26 |
+
batch_size = 128
|
27 |
+
|
28 |
+
|
29 |
+
def batchify(a_list):
|
30 |
+
cur = list()
|
31 |
+
for item in a_list:
|
32 |
+
cur.append(item)
|
33 |
+
if len(cur) == batch_size:
|
34 |
+
yield cur
|
35 |
+
cur = list()
|
36 |
+
if len(cur) > 0:
|
37 |
+
yield cur
|
38 |
+
|
39 |
+
|
40 |
+
batches = list(batchify(corpus))
|
41 |
+
|
42 |
+
|
43 |
+
n_total = n_pos = n_span_match = 0
|
44 |
+
for idx, lines in tqdm(enumerate(batches)):
|
45 |
+
n_total += batch_size
|
46 |
+
prediction_lines = predictor.predict_batch_sentences(
|
47 |
+
[line['tokens'] for line in lines], max_tokens=1024, ontology_mapping=mapping
|
48 |
+
)
|
49 |
+
for preds, ann in zip(prediction_lines, lines):
|
50 |
+
ann = ann['annotation']
|
51 |
+
preds = preds['prediction']
|
52 |
+
for pred in preds:
|
53 |
+
if pred['start_idx'] == ann['start_idx'] and pred['end_idx'] == ann['end_idx']:
|
54 |
+
n_span_match += 1
|
55 |
+
if pred['label'] == ann['label']:
|
56 |
+
n_pos += 1
|
57 |
+
|
58 |
+
print(f'exact match precision: {n_pos * 100 / n_total:.3f}')
|
59 |
+
print(f'span only precision: {n_span_match * 100 / n_total:.3f}')
|
scripts/archive/eval_tie.py
ADDED
@@ -0,0 +1,50 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import sys
|
3 |
+
import json
|
4 |
+
from pprint import pprint
|
5 |
+
from collections import defaultdict
|
6 |
+
|
7 |
+
from sftp.metrics.exact_match import ExactMatch
|
8 |
+
|
9 |
+
|
10 |
+
def evaluate():
|
11 |
+
em = ExactMatch(True)
|
12 |
+
sm = ExactMatch(False)
|
13 |
+
gold_file, pred_file = sys.argv[1:]
|
14 |
+
test_sentences = {json.loads(line)['meta']['sentence ID']: json.loads(line) for line in open(gold_file).readlines()}
|
15 |
+
pred_sentences = defaultdict(list)
|
16 |
+
for line in open(pred_file).readlines():
|
17 |
+
one_pred = json.loads(line)
|
18 |
+
pred_sentences[one_pred['meta']['sentence ID']].append(one_pred)
|
19 |
+
for sent_id, gold_sent in test_sentences.items():
|
20 |
+
pred_sent = pred_sentences.get(sent_id, [])
|
21 |
+
pred_frames, pred_fes = [], []
|
22 |
+
for fr_idx, fr in enumerate(pred_sent):
|
23 |
+
pred_frames.append({key: fr[key] for key in ["start_idx", "end_idx", "label"]})
|
24 |
+
pred_frames[-1]['parent'] = 0
|
25 |
+
for fe in fr['children']:
|
26 |
+
pred_fes.append({key: fe[key] for key in ["start_idx", "end_idx", "label"]})
|
27 |
+
pred_fes[-1]['parent'] = fr_idx+1
|
28 |
+
pred_to_eval = pred_frames + pred_fes
|
29 |
+
|
30 |
+
gold_frames, gold_fes = [], []
|
31 |
+
for fr_idx, fr in enumerate(gold_sent['frame']):
|
32 |
+
gold_frames.append({
|
33 |
+
'start_idx': fr['target'][0], 'end_idx': fr['target'][-1], "label": fr['name'], 'parent': 0
|
34 |
+
})
|
35 |
+
for start_idx, end_idx, fe_name in fr['fe']:
|
36 |
+
gold_fes.append({
|
37 |
+
"start_idx": start_idx, "end_idx": end_idx, "label": fe_name, "parent": fr_idx+1
|
38 |
+
})
|
39 |
+
gold_to_eval = gold_frames + gold_fes
|
40 |
+
em(pred_to_eval, gold_to_eval)
|
41 |
+
sm(pred_to_eval, gold_to_eval)
|
42 |
+
|
43 |
+
print('EM')
|
44 |
+
pprint(em.get_metric(True))
|
45 |
+
print('SM')
|
46 |
+
pprint(sm.get_metric(True))
|
47 |
+
|
48 |
+
|
49 |
+
if __name__ == '__main__':
|
50 |
+
evaluate()
|
scripts/archive/frame_similarity.py
ADDED
@@ -0,0 +1,143 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from argparse import ArgumentParser
|
2 |
+
from collections import defaultdict
|
3 |
+
|
4 |
+
from torch import nn
|
5 |
+
from copy import deepcopy
|
6 |
+
import torch
|
7 |
+
import os
|
8 |
+
import json
|
9 |
+
|
10 |
+
from sftp import SpanPredictor
|
11 |
+
import nltk
|
12 |
+
|
13 |
+
|
14 |
+
def shift_grid_cos_sim(mat: torch.Tensor):
|
15 |
+
mat1 = mat.unsqueeze(0).expand(mat.shape[0], -1, -1)
|
16 |
+
mat2 = mat.unsqueeze(1).expand(-1, mat.shape[0], -1)
|
17 |
+
cos = nn.CosineSimilarity(2)
|
18 |
+
sim = (cos(mat1, mat2) + 1) / 2
|
19 |
+
return sim
|
20 |
+
|
21 |
+
|
22 |
+
def all_frames():
|
23 |
+
nltk.download('framenet_v17')
|
24 |
+
fn = nltk.corpus.framenet
|
25 |
+
return fn.frames()
|
26 |
+
|
27 |
+
|
28 |
+
def extract_relations(fr):
|
29 |
+
ret = list()
|
30 |
+
added = {fr.name}
|
31 |
+
for rel in fr.frameRelations:
|
32 |
+
for key in ['subFrameName', 'superFrameName']:
|
33 |
+
rel_fr_name = rel[key]
|
34 |
+
if rel_fr_name in added:
|
35 |
+
continue
|
36 |
+
ret.append((rel_fr_name, key[:-4]))
|
37 |
+
return ret
|
38 |
+
|
39 |
+
|
40 |
+
def run():
|
41 |
+
parser = ArgumentParser()
|
42 |
+
parser.add_argument('archive', metavar='ARCHIVE_PATH', type=str)
|
43 |
+
parser.add_argument('dst', metavar='DESTINATION', type=str)
|
44 |
+
parser.add_argument('kairos', metavar='KAIROS', type=str)
|
45 |
+
parser.add_argument('--topk', metavar='TOPK', type=int, default=10)
|
46 |
+
args = parser.parse_args()
|
47 |
+
|
48 |
+
predictor = SpanPredictor.from_path(args.archive, cuda_device=-1)
|
49 |
+
kairos_gold_mapping = json.load(open(args.kairos))
|
50 |
+
|
51 |
+
label_emb = predictor._model._span_typing.label_emb.weight.clone().detach()
|
52 |
+
idx2label = predictor._model.vocab.get_index_to_token_vocabulary('span_label')
|
53 |
+
|
54 |
+
emb_sim = shift_grid_cos_sim(label_emb)
|
55 |
+
fr2definition = {fr.name: (fr.URL, fr.definition) for fr in all_frames()}
|
56 |
+
|
57 |
+
last_mlp = predictor._model._span_typing.MLPs[-1].weight.detach().clone()
|
58 |
+
mlp_sim = shift_grid_cos_sim(last_mlp)
|
59 |
+
|
60 |
+
def rank_frame(sim):
|
61 |
+
rank = sim.argsort(1, True)
|
62 |
+
scores = sim.gather(1, rank)
|
63 |
+
mapping = {
|
64 |
+
fr.name: {
|
65 |
+
'similarity': list(),
|
66 |
+
'ontology': extract_relations(fr),
|
67 |
+
'URL': fr.URL,
|
68 |
+
'definition': fr.definition
|
69 |
+
} for fr in all_frames()
|
70 |
+
}
|
71 |
+
for left_idx, (right_indices, match_scores) in enumerate(zip(rank, scores)):
|
72 |
+
left_label = idx2label[left_idx]
|
73 |
+
if left_label not in mapping:
|
74 |
+
continue
|
75 |
+
for right_idx, s in zip(right_indices, match_scores):
|
76 |
+
right_label = idx2label[int(right_idx)]
|
77 |
+
if right_label not in mapping or right_idx == left_idx:
|
78 |
+
continue
|
79 |
+
mapping[left_label]['similarity'].append((right_label, float(s)))
|
80 |
+
return mapping
|
81 |
+
|
82 |
+
emb_map = rank_frame(emb_sim)
|
83 |
+
mlp_map = rank_frame(mlp_sim)
|
84 |
+
|
85 |
+
def dump(mapping, folder_path):
|
86 |
+
os.makedirs(folder_path, exist_ok=True)
|
87 |
+
json.dump(mapping, open(os.path.join(folder_path, 'raw.json'), 'w'))
|
88 |
+
sim_lines, onto_lines = list(), list()
|
89 |
+
|
90 |
+
for fr, values in mapping.items():
|
91 |
+
sim_line = [
|
92 |
+
fr,
|
93 |
+
values['definition'],
|
94 |
+
values['URL'],
|
95 |
+
]
|
96 |
+
onto_line = deepcopy(sim_line)
|
97 |
+
for rel_fr_name, rel_type in values['ontology']:
|
98 |
+
onto_line.append(f'{rel_fr_name} ({rel_type})')
|
99 |
+
onto_lines.append('\t'.join(onto_line))
|
100 |
+
if len(values['similarity']) > 0:
|
101 |
+
for sim_fr_name, score in values['similarity'][:args.topk]:
|
102 |
+
sim_line.append(f'{sim_fr_name} ({score:.3f})')
|
103 |
+
sim_lines.append('\t'.join(sim_line))
|
104 |
+
|
105 |
+
with open(os.path.join(folder_path, 'similarity.tsv'), 'w') as fp:
|
106 |
+
fp.write('\n'.join(sim_lines))
|
107 |
+
with open(os.path.join(folder_path, 'ontology.tsv'), 'w') as fp:
|
108 |
+
fp.write('\n'.join(onto_lines))
|
109 |
+
|
110 |
+
kairos_dump = list()
|
111 |
+
for kairos_event, kairos_content in kairos_gold_mapping.items():
|
112 |
+
for gold_fr in kairos_content['framenet']:
|
113 |
+
gold_fr = gold_fr['label']
|
114 |
+
if gold_fr not in fr2definition:
|
115 |
+
continue
|
116 |
+
kairos_dump.append([
|
117 |
+
'GOLD',
|
118 |
+
gold_fr,
|
119 |
+
kairos_event,
|
120 |
+
fr2definition[gold_fr][0],
|
121 |
+
fr2definition[gold_fr][1],
|
122 |
+
str(kairos_content['description']),
|
123 |
+
'1.00'
|
124 |
+
])
|
125 |
+
for ass_fr, sim_score in mapping[gold_fr]['similarity'][:args.topk]:
|
126 |
+
kairos_dump.append([
|
127 |
+
'',
|
128 |
+
ass_fr,
|
129 |
+
kairos_event,
|
130 |
+
fr2definition[ass_fr][0],
|
131 |
+
fr2definition[ass_fr][1],
|
132 |
+
str(kairos_content['description']),
|
133 |
+
f'{sim_score:.2f}'
|
134 |
+
])
|
135 |
+
kairos_dump = list(map(lambda line: '\t'.join(line), kairos_dump))
|
136 |
+
open(os.path.join(folder_path, 'kairos_sheet.tsv'), 'w').write('\n'.join(kairos_dump))
|
137 |
+
|
138 |
+
dump(mlp_map, os.path.join(args.dst, 'mlp'))
|
139 |
+
dump(emb_map, os.path.join(args.dst, 'emb'))
|
140 |
+
|
141 |
+
|
142 |
+
if __name__ == '__main__':
|
143 |
+
run()
|
scripts/archive/kairos_mapping.py
ADDED
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import argparse
|
2 |
+
import os
|
3 |
+
import json
|
4 |
+
|
5 |
+
|
6 |
+
def main():
|
7 |
+
parser = argparse.ArgumentParser()
|
8 |
+
parser.add_argument('map', metavar='MappingFile', type=str, help="Mapping JSON file.")
|
9 |
+
parser.add_argument('src', metavar='SourceFile', type=str, help="Results of span finder.")
|
10 |
+
parser.add_argument('dst', metavar='Destination', type=str, help="Output path.")
|
11 |
+
args = parser.parse_args()
|
12 |
+
assert os.path.exists(args.map), "Mapping file doesn't exist."
|
13 |
+
assert os.path.exists(args.src), "Rouce file not found."
|
14 |
+
|
15 |
+
k_raw = json.load(open(args.map))
|
16 |
+
k_map = dict()
|
17 |
+
for kairos_event, content in k_raw.items():
|
18 |
+
for fr in content['framenet']:
|
19 |
+
if fr['label'] in k_map:
|
20 |
+
print("Duplicate frame: " + fr['label'])
|
21 |
+
k_map[fr['label']] = kairos_event
|
22 |
+
inputs = list(map(json.loads, open(args.src).readlines()))
|
23 |
+
|
24 |
+
n_total = n_mapped = 0
|
25 |
+
|
26 |
+
for line in inputs:
|
27 |
+
new_frames = list()
|
28 |
+
n_total += len(line['prediction'])
|
29 |
+
for fr in line['prediction']:
|
30 |
+
if fr['label'] in k_map:
|
31 |
+
fr['label'] = k_map[fr['label']]
|
32 |
+
new_frames.append(fr)
|
33 |
+
n_mapped += 1
|
34 |
+
line['prediction'] = new_frames
|
35 |
+
|
36 |
+
with open(args.dst, 'w') as fp:
|
37 |
+
fp.write('\n'.join(map(json.dumps, inputs)))
|
38 |
+
|
39 |
+
print(f'Done. Among {n_total} frames, {n_mapped} are mapped to KAIROS ontology, others are omitted.')
|
40 |
+
|
41 |
+
|
42 |
+
if __name__ == '__main__':
|
43 |
+
main()
|
scripts/archive/onto_test.py
ADDED
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import json
|
2 |
+
from tools.framenet.naive_identifier import FrameIdentifier
|
3 |
+
|
4 |
+
test_file_path = '/home/gqin2/data/framenet/full/test.jsonl'
|
5 |
+
test_sentences = [
|
6 |
+
json.loads(line) for line in open(test_file_path)
|
7 |
+
]
|
8 |
+
test_set = []
|
9 |
+
for ann in test_sentences:
|
10 |
+
for fr in ann['frame']:
|
11 |
+
test_set.append((fr['name'], ann['text'][fr['target'][0]: fr['target'][-1]+1], fr['lu']))
|
12 |
+
|
13 |
+
fi = FrameIdentifier()
|
14 |
+
|
15 |
+
|
16 |
+
tp = fp = fn = 0
|
17 |
+
fails = []
|
18 |
+
for frame, target_words, lu in test_set:
|
19 |
+
pred = fi(target_words)
|
20 |
+
if frame in pred:
|
21 |
+
tp += 1
|
22 |
+
fp += len(pred) - 1
|
23 |
+
else:
|
24 |
+
fp += len(pred)
|
25 |
+
fn += 1
|
26 |
+
fails.append((frame, target_words, pred, lu))
|
27 |
+
|
28 |
+
fails.sort(key=lambda x: x[0])
|
29 |
+
for frame, target_words, pred, lu in fails:
|
30 |
+
print(frame, ' '.join(target_words), ' '.join(pred), lu, sep='\t')
|
31 |
+
|
32 |
+
print(f'tp={tp}, fp={fp}, fn={fn}')
|
33 |
+
print(f'precision={tp/(tp+fp)}')
|
34 |
+
print(f'recall={tp/(tp+fn)}')
|
scripts/archive/predict_better.py
ADDED
@@ -0,0 +1,47 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from typing import *
|
2 |
+
import torch
|
3 |
+
import json
|
4 |
+
import argparse
|
5 |
+
import os
|
6 |
+
from tqdm import tqdm
|
7 |
+
|
8 |
+
from sftp.predictor import SpanPredictor
|
9 |
+
from sftp.models import SpanModel
|
10 |
+
from sftp.data_reader import BetterDatasetReader
|
11 |
+
|
12 |
+
|
13 |
+
def predict_doc(predictor, json_path: str):
|
14 |
+
src = json.load(open(json_path))
|
15 |
+
for doc_name, entry in tqdm(list(src['entries'].items())):
|
16 |
+
pred = predictor.predict_json(entry)
|
17 |
+
triggers = list()
|
18 |
+
for trigger in pred['prediction']:
|
19 |
+
children = list()
|
20 |
+
for child in trigger['children']:
|
21 |
+
children.append([child['start_idx'], child['end_idx']])
|
22 |
+
triggers.append({
|
23 |
+
"span": [trigger['start_idx'], trigger['end_idx']],
|
24 |
+
"argument": children
|
25 |
+
})
|
26 |
+
entry['trigger span'] = triggers
|
27 |
+
return src
|
28 |
+
|
29 |
+
|
30 |
+
if __name__ == '__main__':
|
31 |
+
parser = argparse.ArgumentParser()
|
32 |
+
parser.add_argument('-a', type=str, help='archive path')
|
33 |
+
parser.add_argument('-s', type=str, help='source path')
|
34 |
+
parser.add_argument('-d', type=str, help='destination path')
|
35 |
+
parser.add_argument('-c', type=int, default=0, help='cuda device')
|
36 |
+
args = parser.parse_args()
|
37 |
+
predictor_ = SpanPredictor.from_path(os.path.join(args.a, 'model.tar.gz'), 'span', cuda_device=args.c)
|
38 |
+
model_name = os.path.basename(args.a)
|
39 |
+
tgt_path = os.path.join(args.d, model_name)
|
40 |
+
os.makedirs(tgt_path, exist_ok=True)
|
41 |
+
for root, _, files in os.walk(args.s):
|
42 |
+
for fn in files:
|
43 |
+
if not fn.endswith('json') and not fn.endswith('valid'):
|
44 |
+
continue
|
45 |
+
processed_json = predict_doc(predictor_, os.path.join(root, fn))
|
46 |
+
with open(os.path.join(tgt_path, fn), 'w') as fp:
|
47 |
+
json.dump(processed_json, fp)
|
scripts/archive/predict_kairos.py
ADDED
@@ -0,0 +1,98 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import argparse
|
3 |
+
from xml.etree import ElementTree
|
4 |
+
import copy
|
5 |
+
from operator import attrgetter
|
6 |
+
import json
|
7 |
+
import logging
|
8 |
+
|
9 |
+
from sftp import SpanPredictor
|
10 |
+
|
11 |
+
|
12 |
+
def predict_kairos(model_archive, source_folder, onto_map):
|
13 |
+
xml_files = list()
|
14 |
+
for root, _, files in os.walk(source_folder):
|
15 |
+
for f in files:
|
16 |
+
if f.endswith('.xml'):
|
17 |
+
xml_files.append(os.path.join(root, f))
|
18 |
+
logging.info(f'{len(xml_files)} files are found:')
|
19 |
+
for fn in xml_files:
|
20 |
+
logging.info(' - ' + fn)
|
21 |
+
|
22 |
+
logging.info('Loading ontology from ' + onto_map)
|
23 |
+
k_map = dict()
|
24 |
+
for kairos_event, content in json.load(open(onto_map)).items():
|
25 |
+
for fr in content['framenet']:
|
26 |
+
if fr['label'] in k_map:
|
27 |
+
logging.info("Duplicate frame: " + fr['label'])
|
28 |
+
k_map[fr['label']] = kairos_event
|
29 |
+
|
30 |
+
logging.info('Loading model from ' + model_archive + ' ...')
|
31 |
+
predictor = SpanPredictor.from_path(model_archive)
|
32 |
+
|
33 |
+
predictions = list()
|
34 |
+
|
35 |
+
for fn in xml_files:
|
36 |
+
logging.info('Now processing ' + os.path.basename(fn))
|
37 |
+
tree = ElementTree.parse(fn).getroot()
|
38 |
+
for doc in tree:
|
39 |
+
doc_meta = copy.deepcopy(doc.attrib)
|
40 |
+
text = list(doc)[0]
|
41 |
+
for seg in text:
|
42 |
+
seg_meta = copy.deepcopy(doc_meta)
|
43 |
+
seg_meta['seg'] = copy.deepcopy(seg.attrib)
|
44 |
+
tokens = [child for child in seg if child.tag == 'TOKEN']
|
45 |
+
tokens.sort(key=lambda t: t.attrib['start_char'])
|
46 |
+
words = list(map(attrgetter('text'), tokens))
|
47 |
+
one_pred = predictor.predict_sentence(words)
|
48 |
+
one_pred['meta'] = seg_meta
|
49 |
+
|
50 |
+
new_frames = list()
|
51 |
+
for fr in one_pred['prediction']:
|
52 |
+
if fr['label'] in k_map:
|
53 |
+
fr['label'] = k_map[fr['label']]
|
54 |
+
new_frames.append(fr)
|
55 |
+
one_pred['prediction'] = new_frames
|
56 |
+
|
57 |
+
predictions.append(one_pred)
|
58 |
+
|
59 |
+
logging.info('Finished Prediction.')
|
60 |
+
|
61 |
+
return predictions
|
62 |
+
|
63 |
+
|
64 |
+
def do_task(input_dir, model_archive, onto_map):
|
65 |
+
"""
|
66 |
+
This function is called by the KAIROS infrastructure code for each
|
67 |
+
TASK1 input.
|
68 |
+
"""
|
69 |
+
|
70 |
+
return predict_kairos(model_archive=model_archive,
|
71 |
+
source_folder=input_dir,
|
72 |
+
onto_map=onto_map)
|
73 |
+
|
74 |
+
|
75 |
+
def run():
|
76 |
+
parser = argparse.ArgumentParser(description='Span Finder for KAIROS Quizlet4\n')
|
77 |
+
parser.add_argument('model_archive', metavar='MODEL_ARCHIVE', type=str, help='Path to model archive file.')
|
78 |
+
parser.add_argument('source_folder', metavar='SOURCE_FOLDER', type=str, help='Path to the folder that contains the XMLs.')
|
79 |
+
parser.add_argument('onto_map', metavar='ONTO_MAP', type=str, help='Path to the ontology JSON.')
|
80 |
+
parser.add_argument('destination', metavar='DESTINATION', type=str, help='Output path. (jsonl file path)')
|
81 |
+
args = parser.parse_args()
|
82 |
+
|
83 |
+
logging.basicConfig(level='INFO', format="%(asctime)s %(name)-12s %(levelname)-8s %(message)s")
|
84 |
+
|
85 |
+
predictions = predict_kairos(model_archive=args.model_archive,
|
86 |
+
source_folder=args.source_folder,
|
87 |
+
onto_map=args.onto_map)
|
88 |
+
|
89 |
+
logging.info('Saving to ' + args.destination + ' ...')
|
90 |
+
os.makedirs(os.path.dirname(args.destination), exist_ok=True)
|
91 |
+
with open(args.destination, 'w') as fp:
|
92 |
+
fp.write('\n'.join(map(json.dumps, predictions)))
|
93 |
+
|
94 |
+
logging.info('Done.')
|
95 |
+
|
96 |
+
|
97 |
+
if __name__ == '__main__':
|
98 |
+
run()
|