Upload 15 files

Browse files

Files changed (15) hide show

README.md +57 -0
added_tokens.json +9 -0
all_results.json +16 -0
config.json +39 -0
eval_results.json +10 -0
generation_config.json +6 -0
merges.txt +0 -0
pytorch_model.bin +3 -0
special_tokens_map.json +60 -0
tokenizer.json +0 -0
tokenizer_config.json +82 -0
train_results.json +9 -0
trainer_state.json +133 -0
training_args.bin +3 -0
vocab.json +0 -0

README.md ADDED Viewed

	@@ -0,0 +1,57 @@

+---
+license: apache-2.0
+base_model: nferruz/ProtGPT2
+tags:
+- generated_from_trainer
+metrics:
+- accuracy
+model-index:
+- name: model_output
+  results: []
+---
+<!-- This model card has been generated automatically according to the information the Trainer had access to. You
+should probably proofread and complete it, then remove this comment. -->
+# model_output
+This model is a fine-tuned version of [nferruz/ProtGPT2](https://huggingface.co/nferruz/ProtGPT2) on an unknown dataset.
+It achieves the following results on the evaluation set:
+- Loss: 14.2921
+- Accuracy: 0.2671
+## Model description
+More information needed
+## Intended uses & limitations
+More information needed
+## Training and evaluation data
+More information needed
+## Training procedure
+### Training hyperparameters
+The following hyperparameters were used during training:
+- learning_rate: 0.001
+- train_batch_size: 8
+- eval_batch_size: 8
+- seed: 42
+- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
+- lr_scheduler_type: linear
+- num_epochs: 200.0
+### Training results
+### Framework versions
+- Transformers 4.41.0.dev0
+- Pytorch 2.3.0+cu121
+- Datasets 2.19.1
+- Tokenizers 0.19.1

added_tokens.json ADDED Viewed

	@@ -0,0 +1,9 @@

+{
+  "<PAD>": 50263,
+  "<endoftext>": 50262,
+  "<startoftext>": 50261,
+  "LABEL:": 50258,
+  "NEGATIVE": 50260,
+  "POSITIVE": 50259,
+  "SEQUENCE:": 50257
+}

all_results.json ADDED Viewed

	@@ -0,0 +1,16 @@

+{
+    "epoch": 200.0,
+    "eval_accuracy": 0.26709025741283804,
+    "eval_loss": 14.292082786560059,
+    "eval_runtime": 4.0475,
+    "eval_samples": 30,
+    "eval_samples_per_second": 7.412,
+    "eval_steps_per_second": 0.988,
+    "perplexity": 1610544.3518588347,
+    "total_flos": 1.175134666752e+17,
+    "train_loss": 0.14310694140546462,
+    "train_runtime": 11074.5138,
+    "train_samples": 135,
+    "train_samples_per_second": 2.438,
+    "train_steps_per_second": 0.614
+}

config.json ADDED Viewed

	@@ -0,0 +1,39 @@

+{
+  "_name_or_path": "nferruz/ProtGPT2",
+  "activation_function": "gelu_new",
+  "architectures": [
+    "GPT2LMHeadModel"
+  ],
+  "attn_pdrop": 0.1,
+  "bos_token_id": 0,
+  "embd_pdrop": 0.1,
+  "eos_token_id": 0,
+  "initializer_range": 0.02,
+  "layer_norm_epsilon": 1e-05,
+  "model_type": "gpt2",
+  "n_ctx": 1024,
+  "n_embd": 1280,
+  "n_head": 20,
+  "n_inner": null,
+  "n_layer": 36,
+  "n_positions": 1024,
+  "reorder_and_upcast_attn": false,
+  "resid_pdrop": 0.1,
+  "scale_attn_by_inverse_layer_idx": false,
+  "scale_attn_weights": true,
+  "summary_activation": null,
+  "summary_first_dropout": 0.1,
+  "summary_proj_to_labels": true,
+  "summary_type": "cls_index",
+  "summary_use_proj": true,
+  "task_specific_params": {
+    "text-generation": {
+      "do_sample": true,
+      "max_length": 50
+    }
+  },
+  "torch_dtype": "float32",
+  "transformers_version": "4.41.0.dev0",
+  "use_cache": true,
+  "vocab_size": 50264
+}

eval_results.json ADDED Viewed

	@@ -0,0 +1,10 @@

+{
+    "epoch": 200.0,
+    "eval_accuracy": 0.26709025741283804,
+    "eval_loss": 14.292082786560059,
+    "eval_runtime": 4.0475,
+    "eval_samples": 30,
+    "eval_samples_per_second": 7.412,
+    "eval_steps_per_second": 0.988,
+    "perplexity": 1610544.3518588347
+}

generation_config.json ADDED Viewed

	@@ -0,0 +1,6 @@

+{
+  "_from_model_config": true,
+  "bos_token_id": 0,
+  "eos_token_id": 0,
+  "transformers_version": "4.41.0.dev0"
+}

merges.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

pytorch_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d26a7841afbb9a07d9389e7a7a5ec820f904a9930b8db11d9d4c92b4003aadbe
+size 3096298970

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,60 @@

+{
+  "additional_special_tokens": [
+    {
+      "content": "SEQUENCE:",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false
+    },
+    {
+      "content": "LABEL:",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false
+    },
+    {
+      "content": "POSITIVE",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false
+    },
+    {
+      "content": "NEGATIVE",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false
+    }
+  ],
+  "bos_token": {
+    "content": "<startoftext>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "<endoftext>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "<PAD>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "unk_token": {
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  }
+}

tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,82 @@

+{
+  "add_prefix_space": false,
+  "added_tokens_decoder": {
+    "0": {
+      "content": "<|endoftext|>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "50257": {
+      "content": "SEQUENCE:",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "50258": {
+      "content": "LABEL:",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "50259": {
+      "content": "POSITIVE",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "50260": {
+      "content": "NEGATIVE",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "50261": {
+      "content": "<startoftext>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "50262": {
+      "content": "<endoftext>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "50263": {
+      "content": "<PAD>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "additional_special_tokens": [
+    "SEQUENCE:",
+    "LABEL:",
+    "POSITIVE",
+    "NEGATIVE"
+  ],
+  "bos_token": "<startoftext>",
+  "clean_up_tokenization_spaces": true,
+  "eos_token": "<endoftext>",
+  "model_max_length": 1000000000000000019884624838656,
+  "pad_token": "<PAD>",
+  "tokenizer_class": "GPT2Tokenizer",
+  "unk_token": "<|endoftext|>"
+}

train_results.json ADDED Viewed

	@@ -0,0 +1,9 @@

+{
+    "epoch": 200.0,
+    "total_flos": 1.175134666752e+17,
+    "train_loss": 0.14310694140546462,
+    "train_runtime": 11074.5138,
+    "train_samples": 135,
+    "train_samples_per_second": 2.438,
+    "train_steps_per_second": 0.614
+}

trainer_state.json ADDED Viewed

	@@ -0,0 +1,133 @@

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 200.0,
+  "eval_steps": 500,
+  "global_step": 6800,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 14.705882352941176,
+      "grad_norm": 0.8522269129753113,
+      "learning_rate": 0.0009264705882352942,
+      "loss": 1.7538,
+      "step": 500
+    },
+    {
+      "epoch": 29.41176470588235,
+      "grad_norm": 0.24410583078861237,
+      "learning_rate": 0.0008529411764705882,
+      "loss": 0.0574,
+      "step": 1000
+    },
+    {
+      "epoch": 44.11764705882353,
+      "grad_norm": 0.32126477360725403,
+      "learning_rate": 0.0007794117647058824,
+      "loss": 0.0246,
+      "step": 1500
+    },
+    {
+      "epoch": 58.8235294117647,
+      "grad_norm": 0.1633734405040741,
+      "learning_rate": 0.0007058823529411765,
+      "loss": 0.0177,
+      "step": 2000
+    },
+    {
+      "epoch": 73.52941176470588,
+      "grad_norm": 1.6365108489990234,
+      "learning_rate": 0.0006323529411764706,
+      "loss": 0.0407,
+      "step": 2500
+    },
+    {
+      "epoch": 88.23529411764706,
+      "grad_norm": 0.11134446412324905,
+      "learning_rate": 0.0005588235294117647,
+      "loss": 0.0197,
+      "step": 3000
+    },
+    {
+      "epoch": 102.94117647058823,
+      "grad_norm": 1.3109087944030762,
+      "learning_rate": 0.0004852941176470588,
+      "loss": 0.0146,
+      "step": 3500
+    },
+    {
+      "epoch": 117.6470588235294,
+      "grad_norm": 0.1169649288058281,
+      "learning_rate": 0.0004117647058823529,
+      "loss": 0.0043,
+      "step": 4000
+    },
+    {
+      "epoch": 132.35294117647058,
+      "grad_norm": 0.061263132840394974,
+      "learning_rate": 0.0003382352941176471,
+      "loss": 0.0044,
+      "step": 4500
+    },
+    {
+      "epoch": 147.05882352941177,
+      "grad_norm": 0.019590700045228004,
+      "learning_rate": 0.0002647058823529412,
+      "loss": 0.0025,
+      "step": 5000
+    },
+    {
+      "epoch": 161.76470588235293,
+      "grad_norm": 0.011288847774267197,
+      "learning_rate": 0.00019117647058823528,
+      "loss": 0.0024,
+      "step": 5500
+    },
+    {
+      "epoch": 176.47058823529412,
+      "grad_norm": 0.011768895201385021,
+      "learning_rate": 0.00011764705882352942,
+      "loss": 0.0017,
+      "step": 6000
+    },
+    {
+      "epoch": 191.1764705882353,
+      "grad_norm": 0.010613554157316685,
+      "learning_rate": 4.411764705882353e-05,
+      "loss": 0.0015,
+      "step": 6500
+    },
+    {
+      "epoch": 200.0,
+      "step": 6800,
+      "total_flos": 1.175134666752e+17,
+      "train_loss": 0.14310694140546462,
+      "train_runtime": 11074.5138,
+      "train_samples_per_second": 2.438,
+      "train_steps_per_second": 0.614
+    }
+  ],
+  "logging_steps": 500,
+  "max_steps": 6800,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 200,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 1.175134666752e+17,
+  "train_batch_size": 4,
+  "trial_name": null,
+  "trial_params": null
+}

training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:431a1bfed34c87857239222334d4e4599fd4c55d91edf03879fa59d9ee1f3a8f
+size 5112

vocab.json ADDED Viewed

The diff for this file is too large to render. See raw diff