Training in progress, step 500

Files changed (5) hide show

config.json CHANGED Viewed

@@ -1,28 +1,12 @@
 {
-  "_name_or_path": "danielhou13/longformer-finetuned_papers",
   "architectures": [
-    "LongformerForSequenceClassification"
   ],
-  "attention_mode": "longformer",
   "attention_probs_dropout_prob": 0.1,
-  "attention_window": [
-    512,
-    512,
-    512,
-    512,
-    512,
-    512,
-    512,
-    512,
-    512,
-    512,
-    512,
-    512
-  ],
-  "bos_token_id": 0,
-  "classifier_dropout": null,
-  "eos_token_id": 2,
-  "gradient_checkpointing": false,
   "hidden_act": "gelu",
   "hidden_dropout_prob": 0.1,
   "hidden_size": 768,
@@ -30,25 +14,25 @@
     "0": 0,
     "1": 1
   },
-  "ignore_attention_mask": false,
   "initializer_range": 0.02,
   "intermediate_size": 3072,
   "label2id": {
     "0": 0,
     "1": 1
   },
-  "layer_norm_eps": 1e-05,
-  "max_position_embeddings": 4098,
-  "model_type": "longformer",
   "num_attention_heads": 12,
   "num_hidden_layers": 12,
-  "pad_token_id": 1,
-  "position_embedding_type": "absolute",
   "problem_type": "single_label_classification",
-  "sep_token_id": 2,
   "torch_dtype": "float32",
   "transformers_version": "4.21.0.dev0",
-  "type_vocab_size": 1,
-  "use_cache": true,
-  "vocab_size": 50265
 }

 {
+  "_name_or_path": "google/canine-c",
   "architectures": [
+    "CanineForSequenceClassification"
   ],
   "attention_probs_dropout_prob": 0.1,
+  "bos_token_id": 57344,
+  "downsampling_rate": 4,
+  "eos_token_id": 57345,
   "hidden_act": "gelu",
   "hidden_dropout_prob": 0.1,
   "hidden_size": 768,
     "0": 0,
     "1": 1
   },
   "initializer_range": 0.02,
   "intermediate_size": 3072,
   "label2id": {
     "0": 0,
     "1": 1
   },
+  "layer_norm_eps": 1e-12,
+  "local_transformer_stride": 128,
+  "max_position_embeddings": 16384,
+  "model_type": "canine",
   "num_attention_heads": 12,
+  "num_hash_buckets": 16384,
+  "num_hash_functions": 8,
   "num_hidden_layers": 12,
+  "pad_token_id": 0,
   "problem_type": "single_label_classification",
   "torch_dtype": "float32",
   "transformers_version": "4.21.0.dev0",
+  "type_vocab_size": 16,
+  "upsampling_kernel_size": 4,
+  "use_cache": true
 }

pytorch_model.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1f15b40c73a175c2a56798129b340dd32d71f87b64e59da6e7f6127bd649bffd
-size 594740970

 version https://git-lfs.github.com/spec/v1
+oid sha256:9107be29657cd37a3ce5ccebb90ff9b8687149900e2dc29db09eee65c0f66146
+size 528556002

special_tokens_map.json CHANGED Viewed

@@ -1,63 +1,44 @@
 {
-  "additional_special_tokens": [
-    "ar_AR",
-    "cs_CZ",
-    "de_DE",
-    "en_XX",
-    "es_XX",
-    "et_EE",
-    "fi_FI",
-    "fr_XX",
-    "gu_IN",
-    "hi_IN",
-    "it_IT",
-    "ja_XX",
-    "kk_KZ",
-    "ko_KR",
-    "lt_LT",
-    "lv_LV",
-    "my_MM",
-    "ne_NP",
-    "nl_XX",
-    "ro_RO",
-    "ru_RU",
-    "si_LK",
-    "tr_TR",
-    "vi_VN",
-    "zh_CN",
-    "af_ZA",
-    "az_AZ",
-    "bn_IN",
-    "fa_IR",
-    "he_IL",
-    "hr_HR",
-    "id_ID",
-    "ka_GE",
-    "km_KH",
-    "mk_MK",
-    "ml_IN",
-    "mn_MN",
-    "mr_IN",
-    "pl_PL",
-    "ps_AF",
-    "pt_XX",
-    "sv_SE",
-    "sw_KE",
-    "ta_IN",
-    "te_IN",
-    "th_TH",
-    "tl_XX",
-    "uk_UA",
-    "ur_PK",
-    "xh_ZA",
-    "gl_ES",
-    "sl_SI"
-  ],
-  "bos_token": "<s>",
-  "cls_token": "<s>",
-  "eos_token": "</s>",
-  "mask_token": "<mask>",
-  "pad_token": "<pad>",
-  "sep_token": "</s>",
-  "unk_token": "<unk>"
 }

 {
+  "bos_token": {
+    "content": "",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "cls_token": {
+    "content": "",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "mask_token": {
+    "content": "",
+    "lstrip": true,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "\u0000",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "sep_token": {
+    "content": "",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  }
 }

tokenizer_config.json CHANGED Viewed

@@ -1,77 +1,55 @@
 {
-  "additional_special_tokens": [
-    "ar_AR",
-    "cs_CZ",
-    "de_DE",
-    "en_XX",
-    "es_XX",
-    "et_EE",
-    "fi_FI",
-    "fr_XX",
-    "gu_IN",
-    "hi_IN",
-    "it_IT",
-    "ja_XX",
-    "kk_KZ",
-    "ko_KR",
-    "lt_LT",
-    "lv_LV",
-    "my_MM",
-    "ne_NP",
-    "nl_XX",
-    "ro_RO",
-    "ru_RU",
-    "si_LK",
-    "tr_TR",
-    "vi_VN",
-    "zh_CN",
-    "af_ZA",
-    "az_AZ",
-    "bn_IN",
-    "fa_IR",
-    "he_IL",
-    "hr_HR",
-    "id_ID",
-    "ka_GE",
-    "km_KH",
-    "mk_MK",
-    "ml_IN",
-    "mn_MN",
-    "mr_IN",
-    "pl_PL",
-    "ps_AF",
-    "pt_XX",
-    "sv_SE",
-    "sw_KE",
-    "ta_IN",
-    "te_IN",
-    "th_TH",
-    "tl_XX",
-    "uk_UA",
-    "ur_PK",
-    "xh_ZA",
-    "gl_ES",
-    "sl_SI"
-  ],
-  "bos_token": "<s>",
-  "cls_token": "<s>",
-  "eos_token": "</s>",
   "mask_token": {
     "__type": "AddedToken",
-    "content": "<mask>",
     "lstrip": true,
     "normalized": true,
     "rstrip": false,
     "single_word": false
   },
-  "model_max_length": 1024,
-  "name_or_path": "facebook/mbart-large-50",
-  "pad_token": "<pad>",
-  "sep_token": "</s>",
-  "sp_model_kwargs": {},
-  "special_tokens_map_file": "/home/suraj/projects/mbart-50/hf_models/mbart-50-large/special_tokens_map.json",
-  "src_lang": null,
-  "tgt_lang": null,
-  "tokenizer_class": "MBart50Tokenizer",
-  "unk_token": "<unk>"
 }

 {
+  "add_prefix_space": false,
+  "bos_token": {
+    "__type": "AddedToken",
+    "content": "",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "cls_token": {
+    "__type": "AddedToken",
+    "content": "",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "__type": "AddedToken",
+    "content": "",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
   "mask_token": {
     "__type": "AddedToken",
+    "content": "",
     "lstrip": true,
     "normalized": true,
     "rstrip": false,
     "single_word": false
   },
+  "model_max_length": 2048,
+  "name_or_path": "google/canine-c",
+  "pad_token": {
+    "__type": "AddedToken",
+    "content": "\u0000",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "sep_token": {
+    "__type": "AddedToken",
+    "content": "",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "special_tokens_map_file": "/home/sushant/.cache/huggingface/transformers/f8be97736b4765e3e8d559b4e7d4f1f531b1621150e5344b600684a27bc84e38.ab71f530366fe02e2834427e7b90198bfd0d573bc4279bfafdb2b95fe2b46dde",
+  "tokenizer_class": "CanineTokenizer"
 }

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1cdd9f11ec0611e7c1667b6b3576b8b5182e15a55bb5d5d4e75a66f5ae39651d
 size 3311

 version https://git-lfs.github.com/spec/v1
+oid sha256:6a187b567c6f1b2020a1b7ff81e62e96bb9090afda3d0d367ed2f2a016358a10
 size 3311