qgyd2021 commited on
Commit
eba4db9
1 Parent(s): 9bf0f66

Upload 6 files

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ weights.th filter=lfs diff=lfs merge=lfs -text
config.json ADDED
@@ -0,0 +1,66 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset_reader": {
3
+ "type": "text_classification_json_utf8",
4
+ "token_indexers": {
5
+ "tokens": {
6
+ "type": "single_id",
7
+ "namespace": "tokens",
8
+ "lowercase_tokens": true,
9
+ "token_min_padding_length": 5
10
+ }
11
+ },
12
+ "tokenizer": {
13
+ "type": "pretrained_transformer",
14
+ "model_name": "/data/tianxing/PycharmProjects/AllenNLP/pretrained_models/huggingface/google-bert/bert-base-multilingual-uncased"
15
+ },
16
+ "max_sequence_length": 256
17
+ },
18
+ "train_data_path": "train.json",
19
+ "validation_data_path": "valid.json",
20
+ "vocabulary": {
21
+ "directory_path": "vocabulary"
22
+ },
23
+ "model": {
24
+ "type": "basic_classifier",
25
+ "text_field_embedder": {
26
+ "token_embedders": {
27
+ "tokens": {
28
+ "type": "embedding",
29
+ "num_embeddings": 105880,
30
+ "embedding_dim": 64
31
+ }
32
+ }
33
+ },
34
+ "seq2seq_encoder": {
35
+ "type": "pass_through",
36
+ "input_dim": 64
37
+ },
38
+ "seq2vec_encoder": {
39
+ "type": "bag_of_embeddings",
40
+ "embedding_dim": 64
41
+ }
42
+ },
43
+ "data_loader": {
44
+ "type": "multiprocess",
45
+ "batch_size": 64,
46
+ "shuffle": true
47
+ },
48
+ "trainer": {
49
+ "type": "gradient_descent",
50
+ "cuda_device": -1,
51
+ "optimizer": {
52
+ "type": "bert_adam",
53
+ "lr": 5e-05,
54
+ "warmup": 0.1,
55
+ "t_total": 50000,
56
+ "schedule": "warmup_linear"
57
+ },
58
+ "checkpointer": {
59
+ "serialization_dir": "serialization_dir",
60
+ "keep_most_recent_by_count": 10
61
+ },
62
+ "patience": 5,
63
+ "validation_metric": "+accuracy",
64
+ "num_epochs": 100
65
+ }
66
+ }
vocabulary/.lock ADDED
File without changes
vocabulary/labels.txt ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ en
2
+ zh
3
+ ar
4
+ bg
5
+ cs
6
+ da
7
+ de
8
+ el
9
+ es
10
+ et
11
+ fi
12
+ fr
13
+ ga
14
+ hi
15
+ is
16
+ it
17
+ ja
18
+ ko
19
+ lt
20
+ lv
21
+ mt
22
+ nl
23
+ no
24
+ pl
25
+ pt
26
+ ro
27
+ ru
28
+ sk
29
+ sl
30
+ sw
31
+ sv
32
+ th
33
+ tn
34
+ tr
35
+ ts
36
+ ur
37
+ vi
38
+ tl
39
+ hr
40
+ eo
41
+ uk
42
+ mr
43
+ bn
44
+ af
45
+ zu
46
+ id
47
+ fo
48
+ bs
49
+ yo
50
+ hi_en
51
+ hu
52
+ gl
53
+ hy
vocabulary/non_padded_namespaces.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ *tags
2
+ *labels
vocabulary/tokens.txt ADDED
The diff for this file is too large to render. See raw diff
 
weights.th ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e497bc5b8d85a72a9bf97634af0502c6af0c0a6bc792e4d237667a49a33583d8
3
+ size 27120547