End of training

Browse files

Files changed (7) hide show

README.md +111 -0
adapter_config.json +31 -0
adapter_model.safetensors +3 -0
special_tokens_map.json +30 -0
tokenizer.json +0 -0
tokenizer_config.json +52 -0
training_args.bin +3 -0

README.md ADDED Viewed

	@@ -0,0 +1,111 @@

+---
+library_name: peft
+tags:
+- generated_from_trainer
+base_model: mahdibaghbanzadeh/seqsight_4096_512_27M
+metrics:
+- accuracy
+model-index:
+- name: GUE_EMP_H4-seqsight_4096_512_27M-L32_f
+  results: []
+---
+<!-- This model card has been generated automatically according to the information the Trainer had access to. You
+should probably proofread and complete it, then remove this comment. -->
+# GUE_EMP_H4-seqsight_4096_512_27M-L32_f
+This model is a fine-tuned version of [mahdibaghbanzadeh/seqsight_4096_512_27M](https://huggingface.co/mahdibaghbanzadeh/seqsight_4096_512_27M) on the [mahdibaghbanzadeh/GUE_EMP_H4](https://huggingface.co/datasets/mahdibaghbanzadeh/GUE_EMP_H4) dataset.
+It achieves the following results on the evaluation set:
+- Loss: 0.2596
+- F1 Score: 0.8990
+- Accuracy: 0.8994
+## Model description
+More information needed
+## Intended uses & limitations
+More information needed
+## Training and evaluation data
+More information needed
+## Training procedure
+### Training hyperparameters
+The following hyperparameters were used during training:
+- learning_rate: 0.0005
+- train_batch_size: 128
+- eval_batch_size: 128
+- seed: 42
+- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
+- lr_scheduler_type: linear
+- training_steps: 10000
+### Training results
+| Training Loss | Epoch  | Step  | Validation Loss | F1 Score | Accuracy |
+|:-------------:|:------:|:-----:|:---------------:|:--------:|:--------:|
+| 0.3344        | 2.17   | 200   | 0.2833          | 0.8947   | 0.8946   |
+| 0.2613        | 4.35   | 400   | 0.2697          | 0.8952   | 0.8953   |
+| 0.2448        | 6.52   | 600   | 0.2689          | 0.9007   | 0.9008   |
+| 0.2336        | 8.7    | 800   | 0.2780          | 0.8913   | 0.8912   |
+| 0.2122        | 10.87  | 1000  | 0.2770          | 0.8940   | 0.8939   |
+| 0.205         | 13.04  | 1200  | 0.2818          | 0.8968   | 0.8966   |
+| 0.186         | 15.22  | 1400  | 0.2895          | 0.8941   | 0.8939   |
+| 0.1726        | 17.39  | 1600  | 0.3137          | 0.8874   | 0.8871   |
+| 0.1593        | 19.57  | 1800  | 0.3108          | 0.8898   | 0.8898   |
+| 0.1454        | 21.74  | 2000  | 0.3295          | 0.8798   | 0.8795   |
+| 0.1317        | 23.91  | 2200  | 0.3456          | 0.8848   | 0.8850   |
+| 0.1247        | 26.09  | 2400  | 0.3373          | 0.8849   | 0.8850   |
+| 0.1073        | 28.26  | 2600  | 0.3978          | 0.8842   | 0.8843   |
+| 0.0975        | 30.43  | 2800  | 0.4058          | 0.8789   | 0.8789   |
+| 0.0828        | 32.61  | 3000  | 0.4454          | 0.8718   | 0.8720   |
+| 0.0786        | 34.78  | 3200  | 0.4245          | 0.8897   | 0.8898   |
+| 0.0722        | 36.96  | 3400  | 0.4648          | 0.8799   | 0.8802   |
+| 0.0607        | 39.13  | 3600  | 0.5033          | 0.8738   | 0.8741   |
+| 0.0591        | 41.3   | 3800  | 0.4646          | 0.8830   | 0.8830   |
+| 0.053         | 43.48  | 4000  | 0.5155          | 0.8723   | 0.8720   |
+| 0.048         | 45.65  | 4200  | 0.5738          | 0.8689   | 0.8693   |
+| 0.0458        | 47.83  | 4400  | 0.5701          | 0.8768   | 0.8768   |
+| 0.042         | 50.0   | 4600  | 0.5922          | 0.8682   | 0.8686   |
+| 0.039         | 52.17  | 4800  | 0.6313          | 0.8734   | 0.8734   |
+| 0.0365        | 54.35  | 5000  | 0.6028          | 0.8801   | 0.8802   |
+| 0.0328        | 56.52  | 5200  | 0.6634          | 0.8709   | 0.8706   |
+| 0.0332        | 58.7   | 5400  | 0.6220          | 0.8747   | 0.8747   |
+| 0.0279        | 60.87  | 5600  | 0.6763          | 0.8703   | 0.8700   |
+| 0.0316        | 63.04  | 5800  | 0.6680          | 0.8689   | 0.8686   |
+| 0.0272        | 65.22  | 6000  | 0.6361          | 0.8774   | 0.8775   |
+| 0.0237        | 67.39  | 6200  | 0.6719          | 0.8734   | 0.8734   |
+| 0.0284        | 69.57  | 6400  | 0.6502          | 0.8774   | 0.8775   |
+| 0.0238        | 71.74  | 6600  | 0.7002          | 0.8786   | 0.8789   |
+| 0.0219        | 73.91  | 6800  | 0.6923          | 0.8781   | 0.8782   |
+| 0.0184        | 76.09  | 7000  | 0.7053          | 0.8795   | 0.8795   |
+| 0.0192        | 78.26  | 7200  | 0.7043          | 0.8857   | 0.8857   |
+| 0.0204        | 80.43  | 7400  | 0.7248          | 0.8830   | 0.8830   |
+| 0.0202        | 82.61  | 7600  | 0.7226          | 0.8764   | 0.8768   |
+| 0.0199        | 84.78  | 7800  | 0.7160          | 0.8884   | 0.8884   |
+| 0.016         | 86.96  | 8000  | 0.7167          | 0.8822   | 0.8823   |
+| 0.0167        | 89.13  | 8200  | 0.7441          | 0.8788   | 0.8789   |
+| 0.0153        | 91.3   | 8400  | 0.7368          | 0.8781   | 0.8782   |
+| 0.0139        | 93.48  | 8600  | 0.7587          | 0.8808   | 0.8809   |
+| 0.0138        | 95.65  | 8800  | 0.7746          | 0.8761   | 0.8761   |
+| 0.0144        | 97.83  | 9000  | 0.7587          | 0.8836   | 0.8836   |
+| 0.0139        | 100.0  | 9200  | 0.7791          | 0.8823   | 0.8823   |
+| 0.015         | 102.17 | 9400  | 0.7806          | 0.8809   | 0.8809   |
+| 0.0126        | 104.35 | 9600  | 0.7763          | 0.8795   | 0.8795   |
+| 0.0115        | 106.52 | 9800  | 0.7799          | 0.8808   | 0.8809   |
+| 0.0142        | 108.7  | 10000 | 0.7773          | 0.8788   | 0.8789   |
+### Framework versions
+- PEFT 0.9.0
+- Transformers 4.38.2
+- Pytorch 2.2.0+cu121
+- Datasets 2.17.1
+- Tokenizers 0.15.2

adapter_config.json ADDED Viewed

	@@ -0,0 +1,31 @@

+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "mahdibaghbanzadeh/seqsight_4096_512_27M",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 8.0,
+  "lora_dropout": 0.1,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": [
+    "classifier",
+    "pooler"
+  ],
+  "peft_type": "LORA",
+  "r": 32,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "query_proj",
+    "value_proj"
+  ],
+  "task_type": "SEQ_CLS",
+  "use_dora": false,
+  "use_rslora": false
+}

adapter_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:877b48c2e6f7e307469121ac81068b7ddb9b797cd2b7e7f15472da6f8fa7e494
+size 3157040

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,30 @@

+{
+  "cls_token": {
+    "content": "<cls>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "<eos>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "mask_token": {
+    "content": "<sos>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "<pad>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,52 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "<sos>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "<eos>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "2": {
+      "content": "<pad>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "3": {
+      "content": "<cls>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "clean_up_tokenization_spaces": true,
+  "cls_token": "<cls>",
+  "eos_token": "<eos>",
+  "mask_token": "<sos>",
+  "max_length": 512,
+  "model_max_length": 512,
+  "pad_to_multiple_of": null,
+  "pad_token": "<pad>",
+  "pad_token_type_id": 0,
+  "padding": "max_length",
+  "padding_side": "right",
+  "stride": 0,
+  "tokenizer_class": "PreTrainedTokenizerFast",
+  "truncation": true,
+  "truncation_side": "right",
+  "truncation_strategy": "longest_first"
+}

training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9798b1bd01da728cb00b425ea49782f16f5730863e732b96b3910698dd6c6da2
+size 4920