Update repository
Browse files- README.md +6 -0
- config.json +5 -0
- log/train/events.out.tfevents.1639101937.allennlp-server4 +3 -0
- log/validation/events.out.tfevents.1639101937.allennlp-server4 +3 -0
- metrics.json +21 -21
- vocabulary/non_padded_namespaces.txt +1 -1
- weights.th +3 -0
README.md
CHANGED
@@ -3,6 +3,12 @@ tags:
|
|
3 |
- allennlp
|
4 |
---
|
5 |
|
|
|
|
|
|
|
|
|
|
|
|
|
6 |
This is an implementation of the BiDAF model with ELMo embeddings. The basic layout is pretty simple: encode words as a combination of word embeddings and a character-level encoder, pass the word representations through a bi-LSTM/GRU, use a matrix of attentions to put question information into the passage word representations (this is the only part that is at all non-standard), pass this through another few layers of bi-LSTMs/GRUs, and do a softmax over span start and span end.
|
7 |
|
8 |
CAVEATS:
|
|
|
3 |
- allennlp
|
4 |
---
|
5 |
|
6 |
+
# TODO: Fill this model card
|
7 |
+
---
|
8 |
+
tags:
|
9 |
+
- allennlp
|
10 |
+
---
|
11 |
+
|
12 |
This is an implementation of the BiDAF model with ELMo embeddings. The basic layout is pretty simple: encode words as a combination of word embeddings and a character-level encoder, pass the word representations through a bi-LSTM/GRU, use a matrix of attentions to put question information into the passage word representations (this is the only part that is at all non-standard), pass this through another few layers of bi-LSTMs/GRUs, and do a softmax over span start and span end.
|
13 |
|
14 |
CAVEATS:
|
config.json
CHANGED
@@ -93,6 +93,11 @@
|
|
93 |
"train_data_path": "https://allennlp.s3.amazonaws.com/datasets/squad/squad-train-v1.1.json",
|
94 |
"validation_data_path": "https://allennlp.s3.amazonaws.com/datasets/squad/squad-dev-v1.1.json",
|
95 |
"trainer": {
|
|
|
|
|
|
|
|
|
|
|
96 |
"grad_norm": 5,
|
97 |
"learning_rate_scheduler": {
|
98 |
"type": "reduce_on_plateau",
|
|
|
93 |
"train_data_path": "https://allennlp.s3.amazonaws.com/datasets/squad/squad-train-v1.1.json",
|
94 |
"validation_data_path": "https://allennlp.s3.amazonaws.com/datasets/squad/squad-dev-v1.1.json",
|
95 |
"trainer": {
|
96 |
+
"callbacks": [
|
97 |
+
{
|
98 |
+
"type": "tensorboard"
|
99 |
+
}
|
100 |
+
],
|
101 |
"grad_norm": 5,
|
102 |
"learning_rate_scheduler": {
|
103 |
"type": "reduce_on_plateau",
|
log/train/events.out.tfevents.1639101937.allennlp-server4
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9c24f83844d792d74541109c0f326ec681ccbc23ee00ee84f195c110210489cc
|
3 |
+
size 137156
|
log/validation/events.out.tfevents.1639101937.allennlp-server4
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:240605c0f609df48c9b6a32f87d56e5a00944db4b1b5a78055befceea5d4ec23
|
3 |
+
size 4930
|
metrics.json
CHANGED
@@ -1,27 +1,27 @@
|
|
1 |
{
|
2 |
"best_epoch": 4,
|
3 |
-
"peak_worker_0_memory_MB":
|
4 |
"peak_gpu_0_memory_MB": 7284.326171875,
|
5 |
-
"training_duration": "4:
|
6 |
"epoch": 14,
|
7 |
-
"training_start_acc": 0.
|
8 |
-
"training_end_acc": 0.
|
9 |
-
"training_span_acc": 0.
|
10 |
-
"training_em": 0.
|
11 |
-
"training_f1": 0.
|
12 |
-
"training_loss": 0.
|
13 |
-
"training_worker_0_memory_MB":
|
14 |
"training_gpu_0_memory_MB": 7283.4765625,
|
15 |
-
"validation_start_acc": 0.
|
16 |
-
"validation_end_acc": 0.
|
17 |
-
"validation_span_acc": 0.
|
18 |
-
"validation_em": 0.
|
19 |
-
"validation_f1": 0.
|
20 |
-
"validation_loss": 4.
|
21 |
-
"best_validation_start_acc": 0.
|
22 |
-
"best_validation_end_acc": 0.
|
23 |
-
"best_validation_span_acc": 0.
|
24 |
-
"best_validation_em": 0.
|
25 |
-
"best_validation_f1": 0.
|
26 |
-
"best_validation_loss": 2.
|
27 |
}
|
|
|
1 |
{
|
2 |
"best_epoch": 4,
|
3 |
+
"peak_worker_0_memory_MB": 39687.31640625,
|
4 |
"peak_gpu_0_memory_MB": 7284.326171875,
|
5 |
+
"training_duration": "4:19:37.259793",
|
6 |
"epoch": 14,
|
7 |
+
"training_start_acc": 0.9661183346841858,
|
8 |
+
"training_end_acc": 0.9663238164819233,
|
9 |
+
"training_span_acc": 0.9416203381317139,
|
10 |
+
"training_em": 0.9519857532620235,
|
11 |
+
"training_f1": 0.9757483852872945,
|
12 |
+
"training_loss": 0.20176630357267616,
|
13 |
+
"training_worker_0_memory_MB": 39687.31640625,
|
14 |
"training_gpu_0_memory_MB": 7283.4765625,
|
15 |
+
"validation_start_acc": 0.6244087038789026,
|
16 |
+
"validation_end_acc": 0.671523178807947,
|
17 |
+
"validation_span_acc": 0.5334910122989593,
|
18 |
+
"validation_em": 0.6758751182592242,
|
19 |
+
"validation_f1": 0.7833561202573218,
|
20 |
+
"validation_loss": 4.825442760395554,
|
21 |
+
"best_validation_start_acc": 0.6578997161778619,
|
22 |
+
"best_validation_end_acc": 0.6980132450331126,
|
23 |
+
"best_validation_span_acc": 0.5754966887417219,
|
24 |
+
"best_validation_em": 0.7126773888363293,
|
25 |
+
"best_validation_f1": 0.8017721561069574,
|
26 |
+
"best_validation_loss": 2.247398374665458
|
27 |
}
|
vocabulary/non_padded_namespaces.txt
CHANGED
@@ -1,2 +1,2 @@
|
|
1 |
-
*labels
|
2 |
*tags
|
|
|
|
|
|
1 |
*tags
|
2 |
+
*labels
|
weights.th
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:18e347b7ff0a9e0dd64d355473388175b9dc1aff66e0d5b08def9e1de7da154c
|
3 |
+
size 450055351
|