LysandreJik commited on
Commit
6a01ac0
1 Parent(s): 076ba0e

Initial commit

Browse files
.gitattributes CHANGED
@@ -14,3 +14,4 @@
14
  *.pb filter=lfs diff=lfs merge=lfs -text
15
  *.pt filter=lfs diff=lfs merge=lfs -text
16
  *.pth filter=lfs diff=lfs merge=lfs -text
 
14
  *.pb filter=lfs diff=lfs merge=lfs -text
15
  *.pt filter=lfs diff=lfs merge=lfs -text
16
  *.pth filter=lfs diff=lfs merge=lfs -text
17
+ weights.th filter=lfs diff=lfs merge=lfs -text
config.json ADDED
@@ -0,0 +1,95 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset_reader": {
3
+ "type": "conll2003",
4
+ "coding_scheme": "BIOUL",
5
+ "tag_label": "ner",
6
+ "token_indexers": {
7
+ "elmo": {
8
+ "type": "elmo_characters"
9
+ },
10
+ "token_characters": {
11
+ "type": "characters",
12
+ "min_padding_length": 3
13
+ },
14
+ "tokens": {
15
+ "type": "single_id",
16
+ "lowercase_tokens": true
17
+ }
18
+ }
19
+ },
20
+ "model": {
21
+ "type": "crf_tagger",
22
+ "dropout": 0.5,
23
+ "encoder": {
24
+ "type": "lstm",
25
+ "bidirectional": true,
26
+ "dropout": 0.5,
27
+ "hidden_size": 200,
28
+ "input_size": 1202,
29
+ "num_layers": 2
30
+ },
31
+ "include_start_end_transitions": false,
32
+ "label_encoding": "BIOUL",
33
+ "regularizer": {
34
+ "regexes": [
35
+ [
36
+ "scalar_parameters",
37
+ {
38
+ "alpha": 0.1,
39
+ "type": "l2"
40
+ }
41
+ ]
42
+ ]
43
+ },
44
+ "text_field_embedder": {
45
+ "token_embedders": {
46
+ "elmo": {
47
+ "type": "elmo_token_embedder",
48
+ "do_layer_norm": false,
49
+ "dropout": 0,
50
+ "options_file": "https://allennlp.s3.amazonaws.com/models/elmo/2x4096_512_2048cnn_2xhighway/elmo_2x4096_512_2048cnn_2xhighway_options.json",
51
+ "weight_file": "https://allennlp.s3.amazonaws.com/models/elmo/2x4096_512_2048cnn_2xhighway/elmo_2x4096_512_2048cnn_2xhighway_weights.hdf5"
52
+ },
53
+ "token_characters": {
54
+ "type": "character_encoding",
55
+ "embedding": {
56
+ "embedding_dim": 16
57
+ },
58
+ "encoder": {
59
+ "type": "cnn",
60
+ "conv_layer_activation": "relu",
61
+ "embedding_dim": 16,
62
+ "ngram_filter_sizes": [
63
+ 3
64
+ ],
65
+ "num_filters": 128
66
+ }
67
+ },
68
+ "tokens": {
69
+ "type": "embedding",
70
+ "embedding_dim": 50,
71
+ "pretrained_file": "https://allennlp.s3.amazonaws.com/datasets/glove/glove.6B.50d.txt.gz",
72
+ "trainable": true
73
+ }
74
+ }
75
+ }
76
+ },
77
+ "train_data_path": "/mnt/tank/dirkg/data/conll2003/train.txt",
78
+ "validation_data_path": "/mnt/tank/dirkg/data/conll2003/valid.txt",
79
+ "trainer": {
80
+ "checkpointer": {
81
+ "num_serialized_models_to_keep": 3
82
+ },
83
+ "grad_norm": 5,
84
+ "num_epochs": 75,
85
+ "optimizer": {
86
+ "type": "adam",
87
+ "lr": 0.001
88
+ },
89
+ "patience": 25,
90
+ "validation_metric": "+f1-measure-overall"
91
+ },
92
+ "data_loader": {
93
+ "batch_size": 64
94
+ }
95
+ }
vocabulary/.lock ADDED
File without changes
vocabulary/labels.txt ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ O
2
+ U-LOC
3
+ B-PER
4
+ L-PER
5
+ U-ORG
6
+ U-MISC
7
+ B-ORG
8
+ L-ORG
9
+ U-PER
10
+ I-ORG
11
+ B-LOC
12
+ L-LOC
13
+ B-MISC
14
+ L-MISC
15
+ I-MISC
16
+ I-PER
17
+ I-LOC
vocabulary/non_padded_namespaces.txt ADDED
@@ -0,0 +1,2 @@
 
 
1
+ *labels
2
+ *tags
vocabulary/token_characters.txt ADDED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ @@UNKNOWN@@
2
+ e
3
+ a
4
+ t
5
+ n
6
+ i
7
+ o
8
+ r
9
+ s
10
+ l
11
+ d
12
+ h
13
+ c
14
+ u
15
+ m
16
+ f
17
+ p
18
+ g
19
+ .
20
+ y
21
+ w
22
+ b
23
+ -
24
+ 1
25
+ ,
26
+ v
27
+ 0
28
+ S
29
+ A
30
+ k
31
+ 2
32
+ T
33
+ 9
34
+ I
35
+ C
36
+ 6
37
+ E
38
+ N
39
+ 3
40
+ R
41
+ M
42
+ 5
43
+ 4
44
+ O
45
+ B
46
+ L
47
+ 8
48
+ (
49
+ )
50
+ 7
51
+ P
52
+ D
53
+ G
54
+ "
55
+ H
56
+ F
57
+ '
58
+ W
59
+ U
60
+ x
61
+ K
62
+ J
63
+ j
64
+ z
65
+ :
66
+ q
67
+ Y
68
+ V
69
+ /
70
+ $
71
+ Z
72
+ +
73
+ Q
74
+ X
75
+ ;
76
+ *
77
+ &
78
+ =
79
+ %
80
+ ?
81
+ [
82
+ ]
83
+ !
84
+ @
85
+ `
vocabulary/tokens.txt ADDED
The diff for this file is too large to render. See raw diff
weights.th ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e17080f0f5e046def78f1acca3623ec01daa842ca30f3e21fee3867a3bd91503
3
+ size 393624561