Training in progress, step 25, checkpoint

Browse files

Files changed (10) hide show

last-checkpoint/README.md +1 -1
last-checkpoint/adapter_config.json +4 -4
last-checkpoint/adapter_model.safetensors +2 -2
last-checkpoint/optimizer.pt +2 -2
last-checkpoint/special_tokens_map.json +0 -10
last-checkpoint/tokenizer.json +2 -65
last-checkpoint/tokenizer.model +2 -2
last-checkpoint/tokenizer_config.json +3 -49
last-checkpoint/trainer_state.json +51 -51
last-checkpoint/training_args.bin +1 -1

last-checkpoint/README.md CHANGED Viewed

@@ -1,6 +1,6 @@
 ---
 library_name: peft
-base_model: defog/sqlcoder-7b-2
 ---
 # Model Card for Model ID

 ---
 library_name: peft
+base_model: meta-llama/Llama-2-13b-hf
 ---
 # Model Card for Model ID

last-checkpoint/adapter_config.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "alpha_pattern": {},
   "auto_mapping": null,
-  "base_model_name_or_path": "defog/sqlcoder-7b-2",
   "bias": "none",
   "fan_in_fan_out": false,
   "inference_mode": true,
@@ -9,7 +9,7 @@
   "layers_pattern": null,
   "layers_to_transform": null,
   "loftq_config": {},
-  "lora_alpha": 32,
   "lora_dropout": 0.1,
   "megatron_config": null,
   "megatron_core": "megatron.core",
@@ -20,9 +20,9 @@
   "revision": null,
   "target_modules": [
     "v_proj",
-    "q_proj",
     "k_proj",
-    "o_proj"
   ],
   "task_type": "CAUSAL_LM",
   "use_dora": false,

 {
   "alpha_pattern": {},
   "auto_mapping": null,
+  "base_model_name_or_path": "meta-llama/Llama-2-13b-hf",
   "bias": "none",
   "fan_in_fan_out": false,
   "inference_mode": true,
   "layers_pattern": null,
   "layers_to_transform": null,
   "loftq_config": {},
+  "lora_alpha": 64,
   "lora_dropout": 0.1,
   "megatron_config": null,
   "megatron_core": "megatron.core",
   "revision": null,
   "target_modules": [
     "v_proj",
     "k_proj",
+    "o_proj",
+    "q_proj"
   ],
   "task_type": "CAUSAL_LM",
   "use_dora": false,

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4f567278abac0a182c45835aeeebcf74ff1ae9af34c0dbacb7ab3fe490aa5b1e
-size 536906096

 version https://git-lfs.github.com/spec/v1
+oid sha256:335ff3ab02635313a29bd60a29e36f5945243b709320d83cfa5ece896a4ce0b6
+size 838904832

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2673747580f82b94b8d87d9b609fdfb003c7ce6c8cf6a1642e31c0f18ac6409d
-size 1073950458

 version https://git-lfs.github.com/spec/v1
+oid sha256:611c64f6ffdb40b6b7f76b9a41a0c02b3b92b97b992a64dacf57259d7b9ca2a2
+size 1677982394

last-checkpoint/special_tokens_map.json CHANGED Viewed

@@ -1,14 +1,4 @@
 {
-  "additional_special_tokens": [
-    "▁<PRE>",
-    "▁<MID>",
-    "▁<SUF>",
-    "▁<EOT>",
-    "▁<PRE>",
-    "▁<MID>",
-    "▁<SUF>",
-    "▁<EOT>"
-  ],
   "bos_token": {
     "content": "<s>",
     "lstrip": false,

 {
   "bos_token": {
     "content": "<s>",
     "lstrip": false,

last-checkpoint/tokenizer.json CHANGED Viewed

@@ -34,42 +34,6 @@
       "rstrip": false,
       "normalized": false,
       "special": true
-    },
-    {
-      "id": 32007,
-      "content": "▁<PRE>",
-      "single_word": false,
-      "lstrip": false,
-      "rstrip": false,
-      "normalized": false,
-      "special": true
-    },
-    {
-      "id": 32008,
-      "content": "▁<SUF>",
-      "single_word": false,
-      "lstrip": false,
-      "rstrip": false,
-      "normalized": false,
-      "special": true
-    },
-    {
-      "id": 32009,
-      "content": "▁<MID>",
-      "single_word": false,
-      "lstrip": false,
-      "rstrip": false,
-      "normalized": false,
-      "special": true
-    },
-    {
-      "id": 32010,
-      "content": "▁<EOT>",
-      "single_word": false,
-      "lstrip": false,
-      "rstrip": false,
-      "normalized": false,
-      "special": true
     }
   ],
   "normalizer": {
@@ -32175,23 +32139,7 @@
       "왕": 31996,
       "收": 31997,
       "弘": 31998,
-      "给": 31999,
-      "▁<SU": 32000,
-      "▁<SUF": 32001,
-      "▁<PRE": 32002,
-      "▁<M": 32003,
-      "▁<MID": 32004,
-      "▁<E": 32005,
-      "▁<EOT": 32006,
-      "▁<PRE>": 32007,
-      "▁<SUF>": 32008,
-      "▁<MID>": 32009,
-      "▁<EOT>": 32010,
-      "▁<EOT><EOT>": 32011,
-      "▁<EOT><EOT><EOT>": 32012,
-      "▁<EOT><EOT><EOT><EOT>": 32013,
-      "▁<EOT><EOT><EOT><EOT><EOT>": 32014,
-      "▁<EOT><EOT><EOT><EOT><EOT><EOT>": 32015
     },
     "merges": [
       "▁ t",
@@ -93442,18 +93390,7 @@
       "▁▁▁▁▁▁▁▁▁ ▁▁▁▁▁▁",
       "▁▁▁▁▁▁▁ ▁▁▁▁▁▁▁▁",
       "▁▁▁▁▁▁▁▁▁▁▁ ▁▁▁▁",
-      "▁ ▁▁▁▁▁▁▁▁▁▁▁▁▁▁",
-      "▁< SU",
-      "▁<SU F",
-      "▁< PRE",
-      "▁< M",
-      "▁<M ID",
-      "▁< E",
-      "▁<E OT",
-      "▁<PRE >",
-      "▁<SUF >",
-      "▁<MID >",
-      "▁<EOT >"
     ]
   }
 }

       "rstrip": false,
       "normalized": false,
       "special": true
     }
   ],
   "normalizer": {
       "왕": 31996,
       "收": 31997,
       "弘": 31998,
+      "给": 31999
     },
     "merges": [
       "▁ t",
       "▁▁▁▁▁▁▁▁▁ ▁▁▁▁▁▁",
       "▁▁▁▁▁▁▁ ▁▁▁▁▁▁▁▁",
       "▁▁▁▁▁▁▁▁▁▁▁ ▁▁▁▁",
+      "▁ ▁▁▁▁▁▁▁▁▁▁▁▁▁▁"
     ]
   }
 }

last-checkpoint/tokenizer.model CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:45ccb9c8b6b561889acea59191d66986d314e7cbd6a78abc6e49b139ca91c1e6
-size 500058

 version https://git-lfs.github.com/spec/v1
+oid sha256:9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347
+size 499723

last-checkpoint/tokenizer_config.json CHANGED Viewed

@@ -25,63 +25,17 @@
       "rstrip": false,
       "single_word": false,
       "special": true
-    },
-    "32007": {
-      "content": "▁<PRE>",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": true
-    },
-    "32008": {
-      "content": "▁<SUF>",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": true
-    },
-    "32009": {
-      "content": "▁<MID>",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": true
-    },
-    "32010": {
-      "content": "▁<EOT>",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": true
     }
   },
-  "additional_special_tokens": [
-    "▁<PRE>",
-    "▁<MID>",
-    "▁<SUF>",
-    "▁<EOT>",
-    "▁<PRE>",
-    "▁<MID>",
-    "▁<SUF>",
-    "▁<EOT>"
-  ],
   "bos_token": "<s>",
   "clean_up_tokenization_spaces": false,
   "eos_token": "</s>",
-  "eot_token": "▁<EOT>",
-  "fill_token": "<FILL_ME>",
-  "legacy": null,
-  "middle_token": "▁<MID>",
   "model_max_length": 1000000000000000019884624838656,
   "pad_token": "</s>",
-  "prefix_token": "▁<PRE>",
   "sp_model_kwargs": {},
-  "suffix_token": "▁<SUF>",
-  "tokenizer_class": "CodeLlamaTokenizer",
   "unk_token": "<unk>",
   "use_default_system_prompt": false
 }

       "rstrip": false,
       "single_word": false,
       "special": true
     }
   },
   "bos_token": "<s>",
   "clean_up_tokenization_spaces": false,
   "eos_token": "</s>",
+  "legacy": false,
   "model_max_length": 1000000000000000019884624838656,
   "pad_token": "</s>",
+  "padding_side": "right",
   "sp_model_kwargs": {},
+  "tokenizer_class": "LlamaTokenizer",
   "unk_token": "<unk>",
   "use_default_system_prompt": false
 }

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -10,177 +10,177 @@
   "log_history": [
     {
       "epoch": 0.0,
-      "grad_norm": Infinity,
       "learning_rate": 0.0002,
-      "loss": 2.4627,
       "step": 1
     },
     {
       "epoch": 0.0,
-      "grad_norm": 948.3303833007812,
       "learning_rate": 0.0002,
-      "loss": 6.0035,
       "step": 2
     },
     {
       "epoch": 0.0,
-      "grad_norm": 0.2631005644798279,
       "learning_rate": 0.0002,
-      "loss": 3.0899,
       "step": 3
     },
     {
       "epoch": 0.0,
-      "grad_norm": 16874.787109375,
       "learning_rate": 0.0002,
-      "loss": 2.9749,
       "step": 4
     },
     {
       "epoch": 0.0,
-      "grad_norm": 24137.34375,
       "learning_rate": 0.0002,
-      "loss": 3.0415,
       "step": 5
     },
     {
       "epoch": 0.0,
-      "grad_norm": 0.28247156739234924,
       "learning_rate": 0.0002,
-      "loss": 2.993,
       "step": 6
     },
     {
       "epoch": 0.0,
-      "grad_norm": 0.3185359835624695,
       "learning_rate": 0.0002,
-      "loss": 2.9267,
       "step": 7
     },
     {
       "epoch": 0.0,
-      "grad_norm": 0.3125191032886505,
       "learning_rate": 0.0002,
-      "loss": 2.7621,
       "step": 8
     },
     {
       "epoch": 0.0,
-      "grad_norm": 0.31877008080482483,
       "learning_rate": 0.0002,
-      "loss": 2.6476,
       "step": 9
     },
     {
       "epoch": 0.0,
-      "grad_norm": 0.290170818567276,
       "learning_rate": 0.0002,
-      "loss": 2.4098,
       "step": 10
     },
     {
       "epoch": 0.0,
-      "grad_norm": 0.3100622594356537,
       "learning_rate": 0.0002,
-      "loss": 2.3601,
       "step": 11
     },
     {
       "epoch": 0.0,
-      "grad_norm": 0.3061903417110443,
       "learning_rate": 0.0002,
-      "loss": 2.2337,
       "step": 12
     },
     {
       "epoch": 0.0,
-      "grad_norm": 0.35913383960723877,
       "learning_rate": 0.0002,
-      "loss": 2.041,
       "step": 13
     },
     {
       "epoch": 0.0,
-      "grad_norm": 0.35995352268218994,
       "learning_rate": 0.0002,
-      "loss": 1.896,
       "step": 14
     },
     {
       "epoch": 0.01,
-      "grad_norm": 0.35563966631889343,
       "learning_rate": 0.0002,
-      "loss": 1.7044,
       "step": 15
     },
     {
       "epoch": 0.01,
-      "grad_norm": 0.3549964129924774,
       "learning_rate": 0.0002,
-      "loss": 1.5553,
       "step": 16
     },
     {
       "epoch": 0.01,
-      "grad_norm": 0.3693196177482605,
       "learning_rate": 0.0002,
-      "loss": 1.4171,
       "step": 17
     },
     {
       "epoch": 0.01,
-      "grad_norm": 0.3318246304988861,
       "learning_rate": 0.0002,
-      "loss": 1.186,
       "step": 18
     },
     {
       "epoch": 0.01,
-      "grad_norm": 0.2706567645072937,
       "learning_rate": 0.0002,
-      "loss": 1.1127,
       "step": 19
     },
     {
       "epoch": 0.01,
-      "grad_norm": 0.24653750658035278,
       "learning_rate": 0.0002,
-      "loss": 1.0762,
       "step": 20
     },
     {
       "epoch": 0.01,
-      "grad_norm": 0.16432078182697296,
       "learning_rate": 0.0002,
-      "loss": 0.9719,
       "step": 21
     },
     {
       "epoch": 0.01,
-      "grad_norm": 0.17397165298461914,
       "learning_rate": 0.0002,
-      "loss": 0.9167,
       "step": 22
     },
     {
       "epoch": 0.01,
-      "grad_norm": 0.23421818017959595,
       "learning_rate": 0.0002,
-      "loss": 0.8786,
       "step": 23
     },
     {
       "epoch": 0.01,
-      "grad_norm": 0.19150808453559875,
       "learning_rate": 0.0002,
-      "loss": 0.89,
       "step": 24
     },
     {
       "epoch": 0.01,
-      "grad_norm": 755059.0,
       "learning_rate": 0.0002,
-      "loss": 0.8755,
       "step": 25
     }
   ],
@@ -189,7 +189,7 @@
   "num_input_tokens_seen": 0,
   "num_train_epochs": 1,
   "save_steps": 25,
-  "total_flos": 3544369882398720.0,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

   "log_history": [
     {
       "epoch": 0.0,
+      "grad_norm": 0.21934866905212402,
       "learning_rate": 0.0002,
+      "loss": 2.1352,
       "step": 1
     },
     {
       "epoch": 0.0,
+      "grad_norm": 0.37619397044181824,
       "learning_rate": 0.0002,
+      "loss": 2.3002,
       "step": 2
     },
     {
       "epoch": 0.0,
+      "grad_norm": 0.43210744857788086,
       "learning_rate": 0.0002,
+      "loss": 2.041,
       "step": 3
     },
     {
       "epoch": 0.0,
+      "grad_norm": 0.4717111885547638,
       "learning_rate": 0.0002,
+      "loss": 1.839,
       "step": 4
     },
     {
       "epoch": 0.0,
+      "grad_norm": 0.6455919146537781,
       "learning_rate": 0.0002,
+      "loss": 1.5939,
       "step": 5
     },
     {
       "epoch": 0.0,
+      "grad_norm": 1.2801408767700195,
       "learning_rate": 0.0002,
+      "loss": 1.327,
       "step": 6
     },
     {
       "epoch": 0.0,
+      "grad_norm": 0.9770981669425964,
       "learning_rate": 0.0002,
+      "loss": 1.1536,
       "step": 7
     },
     {
       "epoch": 0.0,
+      "grad_norm": 1.177263617515564,
       "learning_rate": 0.0002,
+      "loss": 0.9881,
       "step": 8
     },
     {
       "epoch": 0.0,
+      "grad_norm": 0.6201061606407166,
       "learning_rate": 0.0002,
+      "loss": 0.8609,
       "step": 9
     },
     {
       "epoch": 0.0,
+      "grad_norm": 1.45395827293396,
       "learning_rate": 0.0002,
+      "loss": 0.8477,
       "step": 10
     },
     {
       "epoch": 0.0,
+      "grad_norm": 1.0724296569824219,
       "learning_rate": 0.0002,
+      "loss": 0.7573,
       "step": 11
     },
     {
       "epoch": 0.0,
+      "grad_norm": 0.9028312563896179,
       "learning_rate": 0.0002,
+      "loss": 0.7258,
       "step": 12
     },
     {
       "epoch": 0.0,
+      "grad_norm": 0.8523911237716675,
       "learning_rate": 0.0002,
+      "loss": 0.7513,
       "step": 13
     },
     {
       "epoch": 0.0,
+      "grad_norm": 0.6326367855072021,
       "learning_rate": 0.0002,
+      "loss": 0.7186,
       "step": 14
     },
     {
       "epoch": 0.01,
+      "grad_norm": 0.39301833510398865,
       "learning_rate": 0.0002,
+      "loss": 0.706,
       "step": 15
     },
     {
       "epoch": 0.01,
+      "grad_norm": 0.5725602507591248,
       "learning_rate": 0.0002,
+      "loss": 0.6406,
       "step": 16
     },
     {
       "epoch": 0.01,
+      "grad_norm": 0.6625002026557922,
       "learning_rate": 0.0002,
+      "loss": 0.6349,
       "step": 17
     },
     {
       "epoch": 0.01,
+      "grad_norm": 0.19412539899349213,
       "learning_rate": 0.0002,
+      "loss": 0.6134,
       "step": 18
     },
     {
       "epoch": 0.01,
+      "grad_norm": 0.34864893555641174,
       "learning_rate": 0.0002,
+      "loss": 0.6381,
       "step": 19
     },
     {
       "epoch": 0.01,
+      "grad_norm": 0.30731046199798584,
       "learning_rate": 0.0002,
+      "loss": 0.6553,
       "step": 20
     },
     {
       "epoch": 0.01,
+      "grad_norm": 0.1636987179517746,
       "learning_rate": 0.0002,
+      "loss": 0.5984,
       "step": 21
     },
     {
       "epoch": 0.01,
+      "grad_norm": 0.1931622475385666,
       "learning_rate": 0.0002,
+      "loss": 0.5821,
       "step": 22
     },
     {
       "epoch": 0.01,
+      "grad_norm": 0.32078325748443604,
       "learning_rate": 0.0002,
+      "loss": 0.586,
       "step": 23
     },
     {
       "epoch": 0.01,
+      "grad_norm": 0.17550581693649292,
       "learning_rate": 0.0002,
+      "loss": 0.5315,
       "step": 24
     },
     {
       "epoch": 0.01,
+      "grad_norm": 0.22394584119319916,
       "learning_rate": 0.0002,
+      "loss": 0.5415,
       "step": 25
     }
   ],
   "num_input_tokens_seen": 0,
   "num_train_epochs": 1,
   "save_steps": 25,
+  "total_flos": 6828259445391360.0,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

last-checkpoint/training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6bd37a7bed4b29db3131947c878454b234a832069e5051a4e513a682b0b6b1f6
 size 4856

 version https://git-lfs.github.com/spec/v1
+oid sha256:229196146870c6d4028c2c7478b67450b5d84079445d42ab8c78010679048a83
 size 4856