SushantGautam commited on
Commit
9425561
1 Parent(s): 5df7fd1

Training in progress, step 500

Browse files
config.json CHANGED
@@ -1,28 +1,12 @@
1
  {
2
- "_name_or_path": "danielhou13/longformer-finetuned_papers",
3
  "architectures": [
4
- "LongformerForSequenceClassification"
5
  ],
6
- "attention_mode": "longformer",
7
  "attention_probs_dropout_prob": 0.1,
8
- "attention_window": [
9
- 512,
10
- 512,
11
- 512,
12
- 512,
13
- 512,
14
- 512,
15
- 512,
16
- 512,
17
- 512,
18
- 512,
19
- 512,
20
- 512
21
- ],
22
- "bos_token_id": 0,
23
- "classifier_dropout": null,
24
- "eos_token_id": 2,
25
- "gradient_checkpointing": false,
26
  "hidden_act": "gelu",
27
  "hidden_dropout_prob": 0.1,
28
  "hidden_size": 768,
@@ -30,25 +14,25 @@
30
  "0": 0,
31
  "1": 1
32
  },
33
- "ignore_attention_mask": false,
34
  "initializer_range": 0.02,
35
  "intermediate_size": 3072,
36
  "label2id": {
37
  "0": 0,
38
  "1": 1
39
  },
40
- "layer_norm_eps": 1e-05,
41
- "max_position_embeddings": 4098,
42
- "model_type": "longformer",
 
43
  "num_attention_heads": 12,
 
 
44
  "num_hidden_layers": 12,
45
- "pad_token_id": 1,
46
- "position_embedding_type": "absolute",
47
  "problem_type": "single_label_classification",
48
- "sep_token_id": 2,
49
  "torch_dtype": "float32",
50
  "transformers_version": "4.21.0.dev0",
51
- "type_vocab_size": 1,
52
- "use_cache": true,
53
- "vocab_size": 50265
54
  }
 
1
  {
2
+ "_name_or_path": "google/canine-c",
3
  "architectures": [
4
+ "CanineForSequenceClassification"
5
  ],
 
6
  "attention_probs_dropout_prob": 0.1,
7
+ "bos_token_id": 57344,
8
+ "downsampling_rate": 4,
9
+ "eos_token_id": 57345,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
  "hidden_act": "gelu",
11
  "hidden_dropout_prob": 0.1,
12
  "hidden_size": 768,
 
14
  "0": 0,
15
  "1": 1
16
  },
 
17
  "initializer_range": 0.02,
18
  "intermediate_size": 3072,
19
  "label2id": {
20
  "0": 0,
21
  "1": 1
22
  },
23
+ "layer_norm_eps": 1e-12,
24
+ "local_transformer_stride": 128,
25
+ "max_position_embeddings": 16384,
26
+ "model_type": "canine",
27
  "num_attention_heads": 12,
28
+ "num_hash_buckets": 16384,
29
+ "num_hash_functions": 8,
30
  "num_hidden_layers": 12,
31
+ "pad_token_id": 0,
 
32
  "problem_type": "single_label_classification",
 
33
  "torch_dtype": "float32",
34
  "transformers_version": "4.21.0.dev0",
35
+ "type_vocab_size": 16,
36
+ "upsampling_kernel_size": 4,
37
+ "use_cache": true
38
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1f15b40c73a175c2a56798129b340dd32d71f87b64e59da6e7f6127bd649bffd
3
- size 594740970
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9107be29657cd37a3ce5ccebb90ff9b8687149900e2dc29db09eee65c0f66146
3
+ size 528556002
special_tokens_map.json CHANGED
@@ -1,63 +1,44 @@
1
  {
2
- "additional_special_tokens": [
3
- "ar_AR",
4
- "cs_CZ",
5
- "de_DE",
6
- "en_XX",
7
- "es_XX",
8
- "et_EE",
9
- "fi_FI",
10
- "fr_XX",
11
- "gu_IN",
12
- "hi_IN",
13
- "it_IT",
14
- "ja_XX",
15
- "kk_KZ",
16
- "ko_KR",
17
- "lt_LT",
18
- "lv_LV",
19
- "my_MM",
20
- "ne_NP",
21
- "nl_XX",
22
- "ro_RO",
23
- "ru_RU",
24
- "si_LK",
25
- "tr_TR",
26
- "vi_VN",
27
- "zh_CN",
28
- "af_ZA",
29
- "az_AZ",
30
- "bn_IN",
31
- "fa_IR",
32
- "he_IL",
33
- "hr_HR",
34
- "id_ID",
35
- "ka_GE",
36
- "km_KH",
37
- "mk_MK",
38
- "ml_IN",
39
- "mn_MN",
40
- "mr_IN",
41
- "pl_PL",
42
- "ps_AF",
43
- "pt_XX",
44
- "sv_SE",
45
- "sw_KE",
46
- "ta_IN",
47
- "te_IN",
48
- "th_TH",
49
- "tl_XX",
50
- "uk_UA",
51
- "ur_PK",
52
- "xh_ZA",
53
- "gl_ES",
54
- "sl_SI"
55
- ],
56
- "bos_token": "<s>",
57
- "cls_token": "<s>",
58
- "eos_token": "</s>",
59
- "mask_token": "<mask>",
60
- "pad_token": "<pad>",
61
- "sep_token": "</s>",
62
- "unk_token": "<unk>"
63
  }
 
1
  {
2
+ "bos_token": {
3
+ "content": "",
4
+ "lstrip": false,
5
+ "normalized": true,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "cls_token": {
10
+ "content": "",
11
+ "lstrip": false,
12
+ "normalized": true,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "eos_token": {
17
+ "content": "",
18
+ "lstrip": false,
19
+ "normalized": true,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "mask_token": {
24
+ "content": "",
25
+ "lstrip": true,
26
+ "normalized": true,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ },
30
+ "pad_token": {
31
+ "content": "\u0000",
32
+ "lstrip": false,
33
+ "normalized": true,
34
+ "rstrip": false,
35
+ "single_word": false
36
+ },
37
+ "sep_token": {
38
+ "content": "",
39
+ "lstrip": false,
40
+ "normalized": true,
41
+ "rstrip": false,
42
+ "single_word": false
43
+ }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
44
  }
tokenizer_config.json CHANGED
@@ -1,77 +1,55 @@
1
  {
2
- "additional_special_tokens": [
3
- "ar_AR",
4
- "cs_CZ",
5
- "de_DE",
6
- "en_XX",
7
- "es_XX",
8
- "et_EE",
9
- "fi_FI",
10
- "fr_XX",
11
- "gu_IN",
12
- "hi_IN",
13
- "it_IT",
14
- "ja_XX",
15
- "kk_KZ",
16
- "ko_KR",
17
- "lt_LT",
18
- "lv_LV",
19
- "my_MM",
20
- "ne_NP",
21
- "nl_XX",
22
- "ro_RO",
23
- "ru_RU",
24
- "si_LK",
25
- "tr_TR",
26
- "vi_VN",
27
- "zh_CN",
28
- "af_ZA",
29
- "az_AZ",
30
- "bn_IN",
31
- "fa_IR",
32
- "he_IL",
33
- "hr_HR",
34
- "id_ID",
35
- "ka_GE",
36
- "km_KH",
37
- "mk_MK",
38
- "ml_IN",
39
- "mn_MN",
40
- "mr_IN",
41
- "pl_PL",
42
- "ps_AF",
43
- "pt_XX",
44
- "sv_SE",
45
- "sw_KE",
46
- "ta_IN",
47
- "te_IN",
48
- "th_TH",
49
- "tl_XX",
50
- "uk_UA",
51
- "ur_PK",
52
- "xh_ZA",
53
- "gl_ES",
54
- "sl_SI"
55
- ],
56
- "bos_token": "<s>",
57
- "cls_token": "<s>",
58
- "eos_token": "</s>",
59
  "mask_token": {
60
  "__type": "AddedToken",
61
- "content": "<mask>",
62
  "lstrip": true,
63
  "normalized": true,
64
  "rstrip": false,
65
  "single_word": false
66
  },
67
- "model_max_length": 1024,
68
- "name_or_path": "facebook/mbart-large-50",
69
- "pad_token": "<pad>",
70
- "sep_token": "</s>",
71
- "sp_model_kwargs": {},
72
- "special_tokens_map_file": "/home/suraj/projects/mbart-50/hf_models/mbart-50-large/special_tokens_map.json",
73
- "src_lang": null,
74
- "tgt_lang": null,
75
- "tokenizer_class": "MBart50Tokenizer",
76
- "unk_token": "<unk>"
 
 
 
 
 
 
 
 
 
 
77
  }
 
1
  {
2
+ "add_prefix_space": false,
3
+ "bos_token": {
4
+ "__type": "AddedToken",
5
+ "content": "",
6
+ "lstrip": false,
7
+ "normalized": true,
8
+ "rstrip": false,
9
+ "single_word": false
10
+ },
11
+ "cls_token": {
12
+ "__type": "AddedToken",
13
+ "content": "",
14
+ "lstrip": false,
15
+ "normalized": true,
16
+ "rstrip": false,
17
+ "single_word": false
18
+ },
19
+ "eos_token": {
20
+ "__type": "AddedToken",
21
+ "content": "",
22
+ "lstrip": false,
23
+ "normalized": true,
24
+ "rstrip": false,
25
+ "single_word": false
26
+ },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
  "mask_token": {
28
  "__type": "AddedToken",
29
+ "content": "",
30
  "lstrip": true,
31
  "normalized": true,
32
  "rstrip": false,
33
  "single_word": false
34
  },
35
+ "model_max_length": 2048,
36
+ "name_or_path": "google/canine-c",
37
+ "pad_token": {
38
+ "__type": "AddedToken",
39
+ "content": "\u0000",
40
+ "lstrip": false,
41
+ "normalized": true,
42
+ "rstrip": false,
43
+ "single_word": false
44
+ },
45
+ "sep_token": {
46
+ "__type": "AddedToken",
47
+ "content": "",
48
+ "lstrip": false,
49
+ "normalized": true,
50
+ "rstrip": false,
51
+ "single_word": false
52
+ },
53
+ "special_tokens_map_file": "/home/sushant/.cache/huggingface/transformers/f8be97736b4765e3e8d559b4e7d4f1f531b1621150e5344b600684a27bc84e38.ab71f530366fe02e2834427e7b90198bfd0d573bc4279bfafdb2b95fe2b46dde",
54
+ "tokenizer_class": "CanineTokenizer"
55
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1cdd9f11ec0611e7c1667b6b3576b8b5182e15a55bb5d5d4e75a66f5ae39651d
3
  size 3311
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6a187b567c6f1b2020a1b7ff81e62e96bb9090afda3d0d367ed2f2a016358a10
3
  size 3311