Training in progress, step 20, checkpoint

Browse files

Files changed (11) hide show

last-checkpoint/README.md +1 -1
last-checkpoint/adapter_config.json +10 -2
last-checkpoint/adapter_model.safetensors +2 -2
last-checkpoint/optimizer.pt +2 -2
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/tokenizer.json +2 -2
last-checkpoint/tokenizer.model +3 -0
last-checkpoint/tokenizer_config.json +7 -11
last-checkpoint/trainer_state.json +8 -141
last-checkpoint/training_args.bin +1 -1

last-checkpoint/README.md CHANGED Viewed

@@ -1,6 +1,6 @@
 ---
 library_name: peft
-base_model: bigscience/bloomz-1b1
 ---
 # Model Card for Model ID

 ---
 library_name: peft
+base_model: JackFram/llama-160m
 ---
 # Model Card for Model ID

last-checkpoint/adapter_config.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "alpha_pattern": {},
   "auto_mapping": null,
-  "base_model_name_or_path": "bigscience/bloomz-1b1",
   "bias": "none",
   "fan_in_fan_out": false,
   "inference_mode": true,
@@ -20,7 +20,15 @@
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
-    "query_key_value"
   ],
   "task_type": "CAUSAL_LM",
   "use_dora": false,

 {
   "alpha_pattern": {},
   "auto_mapping": null,
+  "base_model_name_or_path": "JackFram/llama-160m",
   "bias": "none",
   "fan_in_fan_out": false,
   "inference_mode": true,
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
+    "up_proj",
+    "embed_tokens",
+    "v_proj",
+    "down_proj",
+    "k_proj",
+    "lm_head",
+    "q_proj",
+    "o_proj",
+    "gate_proj"
   ],
   "task_type": "CAUSAL_LM",
   "use_dora": false,

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1801d110451c0363b6c96a6beb709896d1e97932e109f064917b003009fc7531
-size 4725640

 version https://git-lfs.github.com/spec/v1
+oid sha256:ec6a12bd652862c2785d139bd15d9582616e16bfdf59174630b76670fc9a963e
+size 205511192

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:dad47e1bd71fe5d6ce8ab2ddc71e1f433caf30cc0edbeaa8bee27bcbcb73645c
-size 2423802

 version https://git-lfs.github.com/spec/v1
+oid sha256:a4a9363a2a673c3fe1089907dfeb0e8180df9ee7402d1e8ff77d6dd7b4aa5650
+size 4644026

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:bd42e404833ad689f8f0afea3e690a640621e31bc600a66bf369e5fbdf9e45d8
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:ed0777ad4342578dcc46ca5f5a6d5fcf48dd8094a61fe1c87b41cb2f83ff1c6a
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:092d025c9fcbd51424bdca7f48bfd52fefca1fab60b5376be0369f85dfec9dd1
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:064312e6375f494574ea3d2e75c4bb8dc97a4b36316db34a10f092589094ee40
 size 1064

last-checkpoint/tokenizer.json CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3fe0e32d5e685050c0787e7fa781db4fc73bddea1b6da68659010dedc06005b3
-size 14500638

 version https://git-lfs.github.com/spec/v1
+oid sha256:054b87d156d39458c2c9bc37f19d8dc373128f5545d309d1e58c83187d68113a
+size 1842934

last-checkpoint/tokenizer.model ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347
+size 499723

last-checkpoint/tokenizer_config.json CHANGED Viewed

@@ -1,5 +1,6 @@
 {
-  "add_prefix_space": false,
   "added_tokens_decoder": {
     "0": {
       "content": "<unk>",
@@ -24,22 +25,17 @@
       "rstrip": false,
       "single_word": false,
       "special": true
-    },
-    "3": {
-      "content": "<pad>",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": true
     }
   },
   "bos_token": "<s>",
   "clean_up_tokenization_spaces": false,
   "eos_token": "</s>",
   "model_max_length": 1000000000000000019884624838656,
   "pad_token": "</s>",
   "padding_side": "right",
-  "tokenizer_class": "BloomTokenizer",
-  "unk_token": "<unk>"
 }

 {
+  "add_bos_token": true,
+  "add_eos_token": false,
   "added_tokens_decoder": {
     "0": {
       "content": "<unk>",
       "rstrip": false,
       "single_word": false,
       "special": true
     }
   },
   "bos_token": "<s>",
   "clean_up_tokenization_spaces": false,
   "eos_token": "</s>",
+  "legacy": false,
   "model_max_length": 1000000000000000019884624838656,
   "pad_token": "</s>",
   "padding_side": "right",
+  "sp_model_kwargs": {},
+  "tokenizer_class": "LlamaTokenizer",
+  "unk_token": "<unk>",
+  "use_default_system_prompt": false
 }

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,161 +1,28 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.00031130100472399273,
   "eval_steps": 500,
-  "global_step": 400,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
   "log_history": [
     {
       "epoch": 0.0,
-      "grad_norm": 2.555440902709961,
-      "learning_rate": 3.3333333333333333e-06,
-      "loss": 3.6397,
       "step": 20
-    },
-    {
-      "epoch": 0.0,
-      "grad_norm": 2.218903064727783,
-      "learning_rate": 6.666666666666667e-06,
-      "loss": 3.6917,
-      "step": 40
-    },
-    {
-      "epoch": 0.0,
-      "grad_norm": 0.9262466430664062,
-      "learning_rate": 1e-05,
-      "loss": 3.5828,
-      "step": 60
-    },
-    {
-      "epoch": 0.0,
-      "grad_norm": 2.782036542892456,
-      "learning_rate": 1.3166666666666665e-05,
-      "loss": 3.5865,
-      "step": 80
-    },
-    {
-      "epoch": 0.0,
-      "grad_norm": 1.9482054710388184,
-      "learning_rate": 1.65e-05,
-      "loss": 3.3337,
-      "step": 100
-    },
-    {
-      "epoch": 0.0,
-      "grad_norm": 4.047863006591797,
-      "learning_rate": 1.9833333333333335e-05,
-      "loss": 3.1903,
-      "step": 120
-    },
-    {
-      "epoch": 0.0,
-      "grad_norm": 3.08722186088562,
-      "learning_rate": 2.3166666666666666e-05,
-      "loss": 3.5379,
-      "step": 140
-    },
-    {
-      "epoch": 0.0,
-      "grad_norm": 3.540940046310425,
-      "learning_rate": 2.6500000000000004e-05,
-      "loss": 3.16,
-      "step": 160
-    },
-    {
-      "epoch": 0.0,
-      "grad_norm": 5.391817092895508,
-      "learning_rate": 2.9833333333333335e-05,
-      "loss": 3.2489,
-      "step": 180
-    },
-    {
-      "epoch": 0.0,
-      "grad_norm": 5.890682220458984,
-      "learning_rate": 3.316666666666667e-05,
-      "loss": 3.0499,
-      "step": 200
-    },
-    {
-      "epoch": 0.0,
-      "grad_norm": 6.314597129821777,
-      "learning_rate": 3.65e-05,
-      "loss": 2.8568,
-      "step": 220
-    },
-    {
-      "epoch": 0.0,
-      "grad_norm": 1.0859078168869019,
-      "learning_rate": 3.983333333333333e-05,
-      "loss": 2.8566,
-      "step": 240
-    },
-    {
-      "epoch": 0.0,
-      "grad_norm": 4.688353538513184,
-      "learning_rate": 4.316666666666667e-05,
-      "loss": 3.0079,
-      "step": 260
-    },
-    {
-      "epoch": 0.0,
-      "grad_norm": 4.502331256866455,
-      "learning_rate": 4.6500000000000005e-05,
-      "loss": 2.6839,
-      "step": 280
-    },
-    {
-      "epoch": 0.0,
-      "grad_norm": 8.951983451843262,
-      "learning_rate": 4.9833333333333336e-05,
-      "loss": 2.7932,
-      "step": 300
-    },
-    {
-      "epoch": 0.0,
-      "grad_norm": 4.788575172424316,
-      "learning_rate": 4.9999526661182696e-05,
-      "loss": 2.9341,
-      "step": 320
-    },
-    {
-      "epoch": 0.0,
-      "grad_norm": 7.716049671173096,
-      "learning_rate": 4.999800570348766e-05,
-      "loss": 2.5987,
-      "step": 340
-    },
-    {
-      "epoch": 0.0,
-      "grad_norm": 4.9223952293396,
-      "learning_rate": 4.9995435879539254e-05,
-      "loss": 2.7863,
-      "step": 360
-    },
-    {
-      "epoch": 0.0,
-      "grad_norm": 7.647037506103516,
-      "learning_rate": 4.999181729716214e-05,
-      "loss": 2.6197,
-      "step": 380
-    },
-    {
-      "epoch": 0.0,
-      "grad_norm": 1.073474407196045,
-      "learning_rate": 4.998715010818479e-05,
-      "loss": 2.6627,
-      "step": 400
     }
   ],
   "logging_steps": 20,
-  "max_steps": 10000,
   "num_input_tokens_seen": 0,
   "num_train_epochs": 1,
   "save_steps": 20,
-  "total_flos": 1824387808739328.0,
-  "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null
 }

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.0001245204018895971,
   "eval_steps": 500,
+  "global_step": 20,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
   "log_history": [
     {
       "epoch": 0.0,
+      "grad_norm": NaN,
+      "learning_rate": 9.999999999999999e-06,
+      "loss": 4.7178,
       "step": 20
     }
   ],
   "logging_steps": 20,
+  "max_steps": 2000,
   "num_input_tokens_seen": 0,
   "num_train_epochs": 1,
   "save_steps": 20,
+  "total_flos": 297330828518400.0,
+  "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null
 }

last-checkpoint/training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:da5d41afb0ddac16c18350b7a14dbb4a1e14941c08946cda5f16f71c0aaf525c
 size 4984

 version https://git-lfs.github.com/spec/v1
+oid sha256:70e3a7379ab98397e1c0492218b73c68c0db563ddcb245966b7fd947f7930183
 size 4984