Training in progress, epoch 1

Browse files

Files changed (6) hide show

.gitattributes +1 -0
config.json +16 -65
model.safetensors +2 -2
tokenizer.json +0 -0
tokenizer_config.json +6 -12
training_args.bin +1 -1

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+tokenizer.json filter=lfs diff=lfs merge=lfs -text

config.json CHANGED Viewed

@@ -1,90 +1,41 @@
 {
   "architectures": [
-    "ModernBertForSequenceClassification"
   ],
-  "attention_bias": false,
-  "attention_dropout": 0.0,
   "bos_token_id": 0,
-  "classifier_activation": "silu",
-  "classifier_bias": false,
-  "classifier_dropout": 0.0,
-  "classifier_pooling": "mean",
-  "cls_token_id": 0,
-  "decoder_bias": true,
-  "deterministic_flash_attn": false,
   "dtype": "float32",
-  "embedding_dropout": 0.0,
   "eos_token_id": 2,
-  "global_attn_every_n_layers": 3,
-  "gradient_checkpointing": false,
-  "hidden_activation": "gelu",
-  "hidden_size": 768,
   "id2label": {
     "0": "NEGATIVE",
     "1": "NEUTRAL",
     "2": "POSITIVE"
   },
-  "initializer_cutoff_factor": 2.0,
   "initializer_range": 0.02,
-  "intermediate_size": 1152,
   "label2id": {
     "NEGATIVE": 0,
     "NEUTRAL": 1,
     "POSITIVE": 2
   },
   "layer_norm_eps": 1e-05,
-  "layer_types": [
-    "full_attention",
-    "sliding_attention",
-    "sliding_attention",
-    "full_attention",
-    "sliding_attention",
-    "sliding_attention",
-    "full_attention",
-    "sliding_attention",
-    "sliding_attention",
-    "full_attention",
-    "sliding_attention",
-    "sliding_attention",
-    "full_attention",
-    "sliding_attention",
-    "sliding_attention",
-    "full_attention",
-    "sliding_attention",
-    "sliding_attention",
-    "full_attention",
-    "sliding_attention",
-    "sliding_attention",
-    "full_attention"
-  ],
-  "local_attention": 128,
-  "max_position_embeddings": 8192,
-  "mlp_bias": false,
-  "mlp_dropout": 0.0,
-  "model_type": "modernbert",
-  "norm_bias": false,
-  "norm_eps": 1e-05,
-  "num_attention_heads": 12,
-  "num_hidden_layers": 22,
   "pad_token_id": 1,
   "position_embedding_type": "absolute",
   "problem_type": "single_label_classification",
-  "repad_logits_with_grad": false,
-  "rope_parameters": {
-    "full_attention": {
-      "rope_theta": 160000.0,
-      "rope_type": "default"
-    },
-    "sliding_attention": {
-      "rope_theta": 10000.0,
-      "rope_type": "default"
-    }
-  },
-  "sep_token_id": 2,
-  "sparse_pred_ignore_index": -100,
-  "sparse_prediction": false,
   "tie_word_embeddings": true,
   "transformers_version": "5.5.4",
   "use_cache": false,
-  "vocab_size": 51200
 }

 {
+  "add_cross_attention": false,
   "architectures": [
+    "XLMRobertaForSequenceClassification"
   ],
+  "attention_probs_dropout_prob": 0.1,
   "bos_token_id": 0,
+  "classifier_dropout": null,
   "dtype": "float32",
   "eos_token_id": 2,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 1024,
   "id2label": {
     "0": "NEGATIVE",
     "1": "NEUTRAL",
     "2": "POSITIVE"
   },
   "initializer_range": 0.02,
+  "intermediate_size": 4096,
+  "is_decoder": false,
   "label2id": {
     "NEGATIVE": 0,
     "NEUTRAL": 1,
     "POSITIVE": 2
   },
   "layer_norm_eps": 1e-05,
+  "max_position_embeddings": 514,
+  "model_type": "xlm-roberta",
+  "num_attention_heads": 16,
+  "num_hidden_layers": 24,
+  "output_past": true,
   "pad_token_id": 1,
   "position_embedding_type": "absolute",
   "problem_type": "single_label_classification",
   "tie_word_embeddings": true,
   "transformers_version": "5.5.4",
+  "type_vocab_size": 1,
   "use_cache": false,
+  "vocab_size": 250002
 }

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:535cef9551eae79acca4525cb70345793784fb4aaf9e623d888738bf2b0cbe3b
-size 600998764

 version https://git-lfs.github.com/spec/v1
+oid sha256:e757200496170c39eb7e357e50e6d7e57978ddd2995d9dfd1b964b8516e49e03
+size 2239622724

tokenizer.json CHANGED Viewed

The diff for this file is too large to render. See raw diff

tokenizer_config.json CHANGED Viewed

@@ -2,20 +2,14 @@
   "add_prefix_space": true,
   "backend": "tokenizers",
   "bos_token": "<s>",
-  "clean_up_tokenization_spaces": false,
   "eos_token": "</s>",
   "is_local": false,
-  "legacy": true,
   "mask_token": "<mask>",
-  "model_input_names": [
-    "input_ids",
-    "attention_mask"
-  ],
-  "model_max_length": 8192,
   "pad_token": "<pad>",
-  "sp_model_kwargs": {},
-  "spaces_between_special_tokens": false,
-  "tokenizer_class": "TokenizersBackend",
-  "unk_token": "<unk>",
-  "use_default_system_prompt": false
 }

   "add_prefix_space": true,
   "backend": "tokenizers",
   "bos_token": "<s>",
+  "clean_up_tokenization_spaces": true,
+  "cls_token": "<s>",
   "eos_token": "</s>",
   "is_local": false,
   "mask_token": "<mask>",
+  "model_max_length": 512,
   "pad_token": "<pad>",
+  "sep_token": "</s>",
+  "tokenizer_class": "XLMRobertaTokenizer",
+  "unk_token": "<unk>"
 }

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4164dc7f7296b1c51edd8abe3c3ef3a1b2c72a72e649ffa5e0529c159aeffa92
 size 5201

 version https://git-lfs.github.com/spec/v1
+oid sha256:0b05e912d15c955433d3af27a1771d4c9d03ddfa3ea5eb910da66d87e0bdb35d
 size 5201