Training in progress, step 3000

Browse files

Files changed (9) hide show

added_tokens.json +5 -0
config.json +28 -0
merges.txt +0 -0
model.safetensors +3 -0
special_tokens_map.json +20 -0
tokenizer_config.json +44 -0
trainer_log.jsonl +12 -0
training_args.bin +3 -0
vocab.json +0 -0

added_tokens.json ADDED Viewed

	@@ -0,0 +1,5 @@

+{
+  "<|endoftext|>": 151643,
+  "<|im_end|>": 151645,
+  "<|im_start|>": 151644
+}

config.json ADDED Viewed

	@@ -0,0 +1,28 @@

+{
+  "_name_or_path": "MathSymbol/BasicSFT_1.8_Pretrain_Lightning",
+  "architectures": [
+    "Qwen2ForCausalLM"
+  ],
+  "attention_dropout": 0.0,
+  "bos_token_id": 151643,
+  "eos_token_id": 151643,
+  "hidden_act": "silu",
+  "hidden_size": 2048,
+  "initializer_range": 0.02,
+  "intermediate_size": 5504,
+  "max_position_embeddings": 32768,
+  "max_window_layers": 21,
+  "model_type": "qwen2",
+  "num_attention_heads": 16,
+  "num_hidden_layers": 24,
+  "num_key_value_heads": 16,
+  "rms_norm_eps": 1e-06,
+  "rope_theta": 1000000.0,
+  "sliding_window": 32768,
+  "tie_word_embeddings": false,
+  "torch_dtype": "bfloat16",
+  "transformers_version": "4.40.0",
+  "use_cache": false,
+  "use_sliding_window": false,
+  "vocab_size": 151936
+}

merges.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:bcb9e249858be8cd1761d787a5eab4c07eeae570ff35e838e331a6da03f1806b
+size 3673690696

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,20 @@

+{
+  "additional_special_tokens": [
+    "<|im_start|>",
+    "<|im_end|>"
+  ],
+  "eos_token": {
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,44 @@

+{
+  "add_prefix_space": false,
+  "added_tokens_decoder": {
+    "151643": {
+      "content": "<|endoftext|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151644": {
+      "content": "<|im_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151645": {
+      "content": "<|im_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "additional_special_tokens": [
+    "<|im_start|>",
+    "<|im_end|>"
+  ],
+  "bos_token": null,
+  "chat_template": "{% set system_message = 'You are a Math Teacher.Your goal is to understand a math word problem. Then recognize and distinguish which problem it is and then define the variables (if needed) and formulate the problem as it kind then transform it to Symbolic Form.' %}{% if messages[0]['role'] == 'system' %}{% set system_message = messages[0]['content'] %}{% endif %}{% if system_message is defined %}{{ system_message + '\\n' }}{% endif %}{% for message in messages %}{% set content = message['content'] %}{% if message['role'] == 'user' %}{{ 'Question: ' + content + ' \\n Answer: ' }}{% elif message['role'] == 'assistant' %}{{ content + '<|endoftext|>' + '\\n' }}{% endif %}{% endfor %}",
+  "clean_up_tokenization_spaces": false,
+  "eos_token": "<|endoftext|>",
+  "errors": "replace",
+  "model_max_length": 32768,
+  "pad_token": "<|endoftext|>",
+  "padding_side": "right",
+  "split_special_tokens": false,
+  "tokenizer_class": "Qwen2Tokenizer",
+  "unk_token": null
+}

trainer_log.jsonl ADDED Viewed

	@@ -0,0 +1,12 @@

+{"current_steps": 300, "total_steps": 9120, "loss": 0.7143, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3e-05, "epoch": 0.03289383514706285, "percentage": 3.29, "elapsed_time": "0:11:46", "remaining_time": "5:46:15"}
+{"current_steps": 600, "total_steps": 9120, "loss": 0.2219, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.998339850669331e-05, "epoch": 0.0657876702941257, "percentage": 6.58, "elapsed_time": "0:21:56", "remaining_time": "5:11:39"}
+{"current_steps": 900, "total_steps": 9120, "loss": 0.2074, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9734816848192624e-05, "epoch": 0.09868150544118856, "percentage": 9.87, "elapsed_time": "0:33:43", "remaining_time": "5:08:02"}
+{"current_steps": 1200, "total_steps": 9120, "loss": 0.1906, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9190839785031474e-05, "epoch": 0.1315753405882514, "percentage": 13.16, "elapsed_time": "0:43:50", "remaining_time": "4:49:22"}
+{"current_steps": 1500, "total_steps": 9120, "loss": 0.1923, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.835796376008569e-05, "epoch": 0.16446917573531428, "percentage": 16.45, "elapsed_time": "0:55:43", "remaining_time": "4:43:07"}
+{"current_steps": 1500, "total_steps": 9120, "loss": null, "eval_loss": 0.1849033087491989, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": null, "epoch": 0.16446917573531428, "percentage": 16.45, "elapsed_time": "0:55:43", "remaining_time": "4:43:07"}
+{"current_steps": 1800, "total_steps": 9120, "loss": 0.1839, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.7246135390382216e-05, "epoch": 0.19736301088237712, "percentage": 19.74, "elapsed_time": "1:07:10", "remaining_time": "4:33:08"}
+{"current_steps": 2100, "total_steps": 9120, "loss": 0.1938, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.586863267968384e-05, "epoch": 0.23025684602944, "percentage": 23.03, "elapsed_time": "1:19:01", "remaining_time": "4:24:08"}
+{"current_steps": 2400, "total_steps": 9120, "loss": 0.1863, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.4241906446007296e-05, "epoch": 0.2631506811765028, "percentage": 26.32, "elapsed_time": "1:29:09", "remaining_time": "4:09:38"}
+{"current_steps": 2700, "total_steps": 9120, "loss": 0.1797, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.238538385782601e-05, "epoch": 0.2960445163235657, "percentage": 29.61, "elapsed_time": "1:40:55", "remaining_time": "3:59:59"}
+{"current_steps": 3000, "total_steps": 9120, "loss": 0.176, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.032123642522486e-05, "epoch": 0.32893835147062855, "percentage": 32.89, "elapsed_time": "1:51:07", "remaining_time": "3:46:41"}
+{"current_steps": 3000, "total_steps": 9120, "loss": null, "eval_loss": 0.1760552078485489, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": null, "epoch": 0.32893835147062855, "percentage": 32.89, "elapsed_time": "1:51:07", "remaining_time": "3:46:41"}

training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e87d1dec697e26f8674bebb378e7add472d9862f152df6bbd1c35b83737657ec
+size 5176

vocab.json ADDED Viewed

The diff for this file is too large to render. See raw diff