ChatGLM-Bert-VITS2

Paused

App Files Files Community

kevinwang676 commited on Nov 29, 2023

Commit

390c787

•

1 Parent(s): 97813ba

Upload folder using huggingface_hub

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

all_results.json +8 -0
checkpoint-100/config.json +47 -0
checkpoint-100/generation_config.json +6 -0
checkpoint-100/optimizer.pt +3 -0
checkpoint-100/pytorch_model.bin +3 -0
checkpoint-100/rng_state.pth +3 -0
checkpoint-100/scheduler.pt +3 -0
checkpoint-100/special_tokens_map.json +1 -0
checkpoint-100/tokenizer.model +3 -0
checkpoint-100/tokenizer_config.json +14 -0
checkpoint-100/trainer_state.json +76 -0
checkpoint-100/training_args.bin +3 -0
checkpoint-200/config.json +47 -0
checkpoint-200/generation_config.json +6 -0
checkpoint-200/optimizer.pt +3 -0
checkpoint-200/pytorch_model.bin +3 -0
checkpoint-200/rng_state.pth +3 -0
checkpoint-200/scheduler.pt +3 -0
checkpoint-200/special_tokens_map.json +1 -0
checkpoint-200/tokenizer.model +3 -0
checkpoint-200/tokenizer_config.json +14 -0
checkpoint-200/trainer_state.json +136 -0
checkpoint-200/training_args.bin +3 -0
checkpoint-300/config.json +47 -0
checkpoint-300/generation_config.json +6 -0
checkpoint-300/optimizer.pt +3 -0
checkpoint-300/pytorch_model.bin +3 -0
checkpoint-300/rng_state.pth +3 -0
checkpoint-300/scheduler.pt +3 -0
checkpoint-300/special_tokens_map.json +1 -0
checkpoint-300/tokenizer.model +3 -0
checkpoint-300/tokenizer_config.json +14 -0
checkpoint-300/trainer_state.json +196 -0
checkpoint-300/training_args.bin +3 -0
checkpoint-400/config.json +47 -0
checkpoint-400/generation_config.json +6 -0
checkpoint-400/optimizer.pt +3 -0
checkpoint-400/pytorch_model.bin +3 -0
checkpoint-400/rng_state.pth +3 -0
checkpoint-400/scheduler.pt +3 -0
checkpoint-400/special_tokens_map.json +1 -0
checkpoint-400/tokenizer.model +3 -0
checkpoint-400/tokenizer_config.json +14 -0
checkpoint-400/trainer_state.json +256 -0
checkpoint-400/training_args.bin +3 -0
checkpoint-500/config.json +47 -0
checkpoint-500/generation_config.json +6 -0
checkpoint-500/optimizer.pt +3 -0
checkpoint-500/pytorch_model.bin +3 -0
checkpoint-500/rng_state.pth +3 -0

all_results.json ADDED Viewed

	@@ -0,0 +1,8 @@

+{
+    "epoch": 97.96,
+    "train_loss": 0.302445507645607,
+    "train_runtime": 8265.7464,
+    "train_samples": 98,
+    "train_samples_per_second": 1.161,
+    "train_steps_per_second": 0.073
+}

checkpoint-100/config.json ADDED Viewed

	@@ -0,0 +1,47 @@

+{
+  "_name_or_path": "chatglm2-6b",
+  "add_bias_linear": false,
+  "add_qkv_bias": true,
+  "apply_query_key_layer_scaling": true,
+  "apply_residual_connection_post_layernorm": false,
+  "architectures": [
+    "ChatGLMForConditionalGeneration"
+  ],
+  "attention_dropout": 0.0,
+  "attention_softmax_in_fp32": true,
+  "auto_map": {
+    "AutoConfig": "configuration_chatglm.ChatGLMConfig",
+    "AutoModel": "modeling_chatglm.ChatGLMForConditionalGeneration",
+    "AutoModelForCausalLM": "modeling_chatglm.ChatGLMForConditionalGeneration",
+    "AutoModelForSeq2SeqLM": "modeling_chatglm.ChatGLMForConditionalGeneration",
+    "AutoModelForSequenceClassification": "modeling_chatglm.ChatGLMForSequenceClassification"
+  },
+  "bias_dropout_fusion": true,
+  "classifier_dropout": null,
+  "eos_token_id": 2,
+  "ffn_hidden_size": 13696,
+  "fp32_residual_connection": false,
+  "hidden_dropout": 0.0,
+  "hidden_size": 4096,
+  "kv_channels": 128,
+  "layernorm_epsilon": 1e-05,
+  "model_type": "chatglm",
+  "multi_query_attention": true,
+  "multi_query_group_num": 2,
+  "num_attention_heads": 32,
+  "num_layers": 28,
+  "original_rope": true,
+  "pad_token_id": 0,
+  "padded_vocab_size": 65024,
+  "post_layer_norm": true,
+  "pre_seq_len": 128,
+  "prefix_projection": false,
+  "quantization_bit": 0,
+  "rmsnorm": true,
+  "seq_length": 32768,
+  "tie_word_embeddings": false,
+  "torch_dtype": "float16",
+  "transformers_version": "4.30.2",
+  "use_cache": true,
+  "vocab_size": 65024
+}

checkpoint-100/generation_config.json ADDED Viewed

	@@ -0,0 +1,6 @@

+{
+  "_from_model_config": true,
+  "eos_token_id": 2,
+  "pad_token_id": 0,
+  "transformers_version": "4.30.2"
+}

checkpoint-100/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f5b9322b7c791be0283cae33d01cb2e6c40786a9c9fab7fc421715ba39faa314
+size 14681892

checkpoint-100/pytorch_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:119f3aee6155af1456cc129b0eab064a91fd3a95f864e8b1a4985d7e10381988
+size 7341306

checkpoint-100/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:723f496b9d8d8e776f11531f4652ca1ce47b825b86325d6f5aba8841ca36f1a0
+size 14244

checkpoint-100/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e97f84b2e9cf03e34106548e8fd72d9181e088fcbe9b5747b6e8466de9610724
+size 1064

checkpoint-100/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {}

checkpoint-100/tokenizer.model ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e7dc4c393423b76e4373e5157ddc34803a0189ba96b21ddbb40269d31468a6f2
+size 1018370

checkpoint-100/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,14 @@

+{
+  "auto_map": {
+    "AutoTokenizer": [
+      "tokenization_chatglm.ChatGLMTokenizer",
+      null
+    ]
+  },
+  "clean_up_tokenization_spaces": false,
+  "do_lower_case": false,
+  "model_max_length": 1000000000000000019884624838656,
+  "padding_side": "left",
+  "remove_space": false,
+  "tokenizer_class": "ChatGLMTokenizer"
+}

checkpoint-100/trainer_state.json ADDED Viewed

	@@ -0,0 +1,76 @@

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 16.3265306122449,
+  "global_step": 100,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 1.63,
+      "learning_rate": 0.009833333333333333,
+      "loss": 2.53,
+      "step": 10
+    },
+    {
+      "epoch": 3.27,
+      "learning_rate": 0.009666666666666667,
+      "loss": 2.0016,
+      "step": 20
+    },
+    {
+      "epoch": 4.9,
+      "learning_rate": 0.0095,
+      "loss": 1.7775,
+      "step": 30
+    },
+    {
+      "epoch": 6.53,
+      "learning_rate": 0.009333333333333334,
+      "loss": 1.6576,
+      "step": 40
+    },
+    {
+      "epoch": 8.16,
+      "learning_rate": 0.009166666666666667,
+      "loss": 1.5048,
+      "step": 50
+    },
+    {
+      "epoch": 9.8,
+      "learning_rate": 0.009000000000000001,
+      "loss": 1.3572,
+      "step": 60
+    },
+    {
+      "epoch": 11.43,
+      "learning_rate": 0.008833333333333334,
+      "loss": 1.2067,
+      "step": 70
+    },
+    {
+      "epoch": 13.06,
+      "learning_rate": 0.008666666666666668,
+      "loss": 1.0777,
+      "step": 80
+    },
+    {
+      "epoch": 14.69,
+      "learning_rate": 0.0085,
+      "loss": 0.9188,
+      "step": 90
+    },
+    {
+      "epoch": 16.33,
+      "learning_rate": 0.008333333333333333,
+      "loss": 0.7241,
+      "step": 100
+    }
+  ],
+  "max_steps": 600,
+  "num_train_epochs": 100,
+  "total_flos": 1.1757481562734592e+17,
+  "trial_name": null,
+  "trial_params": null
+}

checkpoint-100/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:df0a343e1f2ccb38a19082ba999546089030c0e15418471a24d346cbb68fa7af
+size 4472

checkpoint-200/config.json ADDED Viewed

	@@ -0,0 +1,47 @@

+{
+  "_name_or_path": "chatglm2-6b",
+  "add_bias_linear": false,
+  "add_qkv_bias": true,
+  "apply_query_key_layer_scaling": true,
+  "apply_residual_connection_post_layernorm": false,
+  "architectures": [
+    "ChatGLMForConditionalGeneration"
+  ],
+  "attention_dropout": 0.0,
+  "attention_softmax_in_fp32": true,
+  "auto_map": {
+    "AutoConfig": "configuration_chatglm.ChatGLMConfig",
+    "AutoModel": "modeling_chatglm.ChatGLMForConditionalGeneration",
+    "AutoModelForCausalLM": "modeling_chatglm.ChatGLMForConditionalGeneration",
+    "AutoModelForSeq2SeqLM": "modeling_chatglm.ChatGLMForConditionalGeneration",
+    "AutoModelForSequenceClassification": "modeling_chatglm.ChatGLMForSequenceClassification"
+  },
+  "bias_dropout_fusion": true,
+  "classifier_dropout": null,
+  "eos_token_id": 2,
+  "ffn_hidden_size": 13696,
+  "fp32_residual_connection": false,
+  "hidden_dropout": 0.0,
+  "hidden_size": 4096,
+  "kv_channels": 128,
+  "layernorm_epsilon": 1e-05,
+  "model_type": "chatglm",
+  "multi_query_attention": true,
+  "multi_query_group_num": 2,
+  "num_attention_heads": 32,
+  "num_layers": 28,
+  "original_rope": true,
+  "pad_token_id": 0,
+  "padded_vocab_size": 65024,
+  "post_layer_norm": true,
+  "pre_seq_len": 128,
+  "prefix_projection": false,
+  "quantization_bit": 0,
+  "rmsnorm": true,
+  "seq_length": 32768,
+  "tie_word_embeddings": false,
+  "torch_dtype": "float16",
+  "transformers_version": "4.30.2",
+  "use_cache": true,
+  "vocab_size": 65024
+}

checkpoint-200/generation_config.json ADDED Viewed

	@@ -0,0 +1,6 @@

+{
+  "_from_model_config": true,
+  "eos_token_id": 2,
+  "pad_token_id": 0,
+  "transformers_version": "4.30.2"
+}

checkpoint-200/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:933b7b82708ba6a23d949d7b05fcb8644b9ab8b06ecf625f35c30aeba85b3ba2
+size 14681892

checkpoint-200/pytorch_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:54e939cf8e3ee1c58646595ea0e7748202c1e1b85f82aeb536a388bbe8d36e86
+size 7341306

checkpoint-200/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:51523eedac643c13a3a71297ac9e347331249d1d4cc19f9738a182bae3585fb2
+size 14244

checkpoint-200/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3db1c4819d8e7a76f34cf5f8f4aa0bf9497992cd0862dbd9ba3fc68b9886b79e
+size 1064

checkpoint-200/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {}

checkpoint-200/tokenizer.model ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e7dc4c393423b76e4373e5157ddc34803a0189ba96b21ddbb40269d31468a6f2
+size 1018370

checkpoint-200/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,14 @@

+{
+  "auto_map": {
+    "AutoTokenizer": [
+      "tokenization_chatglm.ChatGLMTokenizer",
+      null
+    ]
+  },
+  "clean_up_tokenization_spaces": false,
+  "do_lower_case": false,
+  "model_max_length": 1000000000000000019884624838656,
+  "padding_side": "left",
+  "remove_space": false,
+  "tokenizer_class": "ChatGLMTokenizer"
+}

checkpoint-200/trainer_state.json ADDED Viewed

	@@ -0,0 +1,136 @@

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 32.6530612244898,
+  "global_step": 200,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 1.63,
+      "learning_rate": 0.009833333333333333,
+      "loss": 2.53,
+      "step": 10
+    },
+    {
+      "epoch": 3.27,
+      "learning_rate": 0.009666666666666667,
+      "loss": 2.0016,
+      "step": 20
+    },
+    {
+      "epoch": 4.9,
+      "learning_rate": 0.0095,
+      "loss": 1.7775,
+      "step": 30
+    },
+    {
+      "epoch": 6.53,
+      "learning_rate": 0.009333333333333334,
+      "loss": 1.6576,
+      "step": 40
+    },
+    {
+      "epoch": 8.16,
+      "learning_rate": 0.009166666666666667,
+      "loss": 1.5048,
+      "step": 50
+    },
+    {
+      "epoch": 9.8,
+      "learning_rate": 0.009000000000000001,
+      "loss": 1.3572,
+      "step": 60
+    },
+    {
+      "epoch": 11.43,
+      "learning_rate": 0.008833333333333334,
+      "loss": 1.2067,
+      "step": 70
+    },
+    {
+      "epoch": 13.06,
+      "learning_rate": 0.008666666666666668,
+      "loss": 1.0777,
+      "step": 80
+    },
+    {
+      "epoch": 14.69,
+      "learning_rate": 0.0085,
+      "loss": 0.9188,
+      "step": 90
+    },
+    {
+      "epoch": 16.33,
+      "learning_rate": 0.008333333333333333,
+      "loss": 0.7241,
+      "step": 100
+    },
+    {
+      "epoch": 17.96,
+      "learning_rate": 0.008166666666666666,
+      "loss": 0.5775,
+      "step": 110
+    },
+    {
+      "epoch": 19.59,
+      "learning_rate": 0.008,
+      "loss": 0.4235,
+      "step": 120
+    },
+    {
+      "epoch": 21.22,
+      "learning_rate": 0.007833333333333333,
+      "loss": 0.3182,
+      "step": 130
+    },
+    {
+      "epoch": 22.86,
+      "learning_rate": 0.007666666666666667,
+      "loss": 0.2155,
+      "step": 140
+    },
+    {
+      "epoch": 24.49,
+      "learning_rate": 0.0075,
+      "loss": 0.1633,
+      "step": 150
+    },
+    {
+      "epoch": 26.12,
+      "learning_rate": 0.007333333333333333,
+      "loss": 0.1234,
+      "step": 160
+    },
+    {
+      "epoch": 27.76,
+      "learning_rate": 0.007166666666666667,
+      "loss": 0.0911,
+      "step": 170
+    },
+    {
+      "epoch": 29.39,
+      "learning_rate": 0.006999999999999999,
+      "loss": 0.0738,
+      "step": 180
+    },
+    {
+      "epoch": 31.02,
+      "learning_rate": 0.006833333333333334,
+      "loss": 0.0673,
+      "step": 190
+    },
+    {
+      "epoch": 32.65,
+      "learning_rate": 0.006666666666666666,
+      "loss": 0.0544,
+      "step": 200
+    }
+  ],
+  "max_steps": 600,
+  "num_train_epochs": 100,
+  "total_flos": 2.3514963125469184e+17,
+  "trial_name": null,
+  "trial_params": null
+}

checkpoint-200/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:df0a343e1f2ccb38a19082ba999546089030c0e15418471a24d346cbb68fa7af
+size 4472

checkpoint-300/config.json ADDED Viewed

	@@ -0,0 +1,47 @@

+{
+  "_name_or_path": "chatglm2-6b",
+  "add_bias_linear": false,
+  "add_qkv_bias": true,
+  "apply_query_key_layer_scaling": true,
+  "apply_residual_connection_post_layernorm": false,
+  "architectures": [
+    "ChatGLMForConditionalGeneration"
+  ],
+  "attention_dropout": 0.0,
+  "attention_softmax_in_fp32": true,
+  "auto_map": {
+    "AutoConfig": "configuration_chatglm.ChatGLMConfig",
+    "AutoModel": "modeling_chatglm.ChatGLMForConditionalGeneration",
+    "AutoModelForCausalLM": "modeling_chatglm.ChatGLMForConditionalGeneration",
+    "AutoModelForSeq2SeqLM": "modeling_chatglm.ChatGLMForConditionalGeneration",
+    "AutoModelForSequenceClassification": "modeling_chatglm.ChatGLMForSequenceClassification"
+  },
+  "bias_dropout_fusion": true,
+  "classifier_dropout": null,
+  "eos_token_id": 2,
+  "ffn_hidden_size": 13696,
+  "fp32_residual_connection": false,
+  "hidden_dropout": 0.0,
+  "hidden_size": 4096,
+  "kv_channels": 128,
+  "layernorm_epsilon": 1e-05,
+  "model_type": "chatglm",
+  "multi_query_attention": true,
+  "multi_query_group_num": 2,
+  "num_attention_heads": 32,
+  "num_layers": 28,
+  "original_rope": true,
+  "pad_token_id": 0,
+  "padded_vocab_size": 65024,
+  "post_layer_norm": true,
+  "pre_seq_len": 128,
+  "prefix_projection": false,
+  "quantization_bit": 0,
+  "rmsnorm": true,
+  "seq_length": 32768,
+  "tie_word_embeddings": false,
+  "torch_dtype": "float16",
+  "transformers_version": "4.30.2",
+  "use_cache": true,
+  "vocab_size": 65024
+}

checkpoint-300/generation_config.json ADDED Viewed

	@@ -0,0 +1,6 @@

+{
+  "_from_model_config": true,
+  "eos_token_id": 2,
+  "pad_token_id": 0,
+  "transformers_version": "4.30.2"
+}

checkpoint-300/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:fb80ab0b61a192373221d205400431f1f9db5591d3be1fcdb9051924f1b410d2
+size 14681892

checkpoint-300/pytorch_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:187b159480029f605a8ec08a6da076afe43110d3c1ae18d10931f2ac9e5793ec
+size 7341306

checkpoint-300/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c5035452976c183913e118a486015c4dbd9cf61159f30c79ac9dd02dbf2cd81c
+size 14244

checkpoint-300/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6b5ff897392aa57ce97759b435acfdb4ee39aef21d4a4a68095c3294c513f6c0
+size 1064

checkpoint-300/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {}

checkpoint-300/tokenizer.model ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e7dc4c393423b76e4373e5157ddc34803a0189ba96b21ddbb40269d31468a6f2
+size 1018370

checkpoint-300/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,14 @@

+{
+  "auto_map": {
+    "AutoTokenizer": [
+      "tokenization_chatglm.ChatGLMTokenizer",
+      null
+    ]
+  },
+  "clean_up_tokenization_spaces": false,
+  "do_lower_case": false,
+  "model_max_length": 1000000000000000019884624838656,
+  "padding_side": "left",
+  "remove_space": false,
+  "tokenizer_class": "ChatGLMTokenizer"
+}

checkpoint-300/trainer_state.json ADDED Viewed

	@@ -0,0 +1,196 @@

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 48.97959183673469,
+  "global_step": 300,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 1.63,
+      "learning_rate": 0.009833333333333333,
+      "loss": 2.53,
+      "step": 10
+    },
+    {
+      "epoch": 3.27,
+      "learning_rate": 0.009666666666666667,
+      "loss": 2.0016,
+      "step": 20
+    },
+    {
+      "epoch": 4.9,
+      "learning_rate": 0.0095,
+      "loss": 1.7775,
+      "step": 30
+    },
+    {
+      "epoch": 6.53,
+      "learning_rate": 0.009333333333333334,
+      "loss": 1.6576,
+      "step": 40
+    },
+    {
+      "epoch": 8.16,
+      "learning_rate": 0.009166666666666667,
+      "loss": 1.5048,
+      "step": 50
+    },
+    {
+      "epoch": 9.8,
+      "learning_rate": 0.009000000000000001,
+      "loss": 1.3572,
+      "step": 60
+    },
+    {
+      "epoch": 11.43,
+      "learning_rate": 0.008833333333333334,
+      "loss": 1.2067,
+      "step": 70
+    },
+    {
+      "epoch": 13.06,
+      "learning_rate": 0.008666666666666668,
+      "loss": 1.0777,
+      "step": 80
+    },
+    {
+      "epoch": 14.69,
+      "learning_rate": 0.0085,
+      "loss": 0.9188,
+      "step": 90
+    },
+    {
+      "epoch": 16.33,
+      "learning_rate": 0.008333333333333333,
+      "loss": 0.7241,
+      "step": 100
+    },
+    {
+      "epoch": 17.96,
+      "learning_rate": 0.008166666666666666,
+      "loss": 0.5775,
+      "step": 110
+    },
+    {
+      "epoch": 19.59,
+      "learning_rate": 0.008,
+      "loss": 0.4235,
+      "step": 120
+    },
+    {
+      "epoch": 21.22,
+      "learning_rate": 0.007833333333333333,
+      "loss": 0.3182,
+      "step": 130
+    },
+    {
+      "epoch": 22.86,
+      "learning_rate": 0.007666666666666667,
+      "loss": 0.2155,
+      "step": 140
+    },
+    {
+      "epoch": 24.49,
+      "learning_rate": 0.0075,
+      "loss": 0.1633,
+      "step": 150
+    },
+    {
+      "epoch": 26.12,
+      "learning_rate": 0.007333333333333333,
+      "loss": 0.1234,
+      "step": 160
+    },
+    {
+      "epoch": 27.76,
+      "learning_rate": 0.007166666666666667,
+      "loss": 0.0911,
+      "step": 170
+    },
+    {
+      "epoch": 29.39,
+      "learning_rate": 0.006999999999999999,
+      "loss": 0.0738,
+      "step": 180
+    },
+    {
+      "epoch": 31.02,
+      "learning_rate": 0.006833333333333334,
+      "loss": 0.0673,
+      "step": 190
+    },
+    {
+      "epoch": 32.65,
+      "learning_rate": 0.006666666666666666,
+      "loss": 0.0544,
+      "step": 200
+    },
+    {
+      "epoch": 34.29,
+      "learning_rate": 0.006500000000000001,
+      "loss": 0.0492,
+      "step": 210
+    },
+    {
+      "epoch": 35.92,
+      "learning_rate": 0.006333333333333333,
+      "loss": 0.0458,
+      "step": 220
+    },
+    {
+      "epoch": 37.55,
+      "learning_rate": 0.0061666666666666675,
+      "loss": 0.0434,
+      "step": 230
+    },
+    {
+      "epoch": 39.18,
+      "learning_rate": 0.006,
+      "loss": 0.0387,
+      "step": 240
+    },
+    {
+      "epoch": 40.82,
+      "learning_rate": 0.005833333333333334,
+      "loss": 0.0375,
+      "step": 250
+    },
+    {
+      "epoch": 42.45,
+      "learning_rate": 0.005666666666666666,
+      "loss": 0.0363,
+      "step": 260
+    },
+    {
+      "epoch": 44.08,
+      "learning_rate": 0.0055000000000000005,
+      "loss": 0.0347,
+      "step": 270
+    },
+    {
+      "epoch": 45.71,
+      "learning_rate": 0.005333333333333333,
+      "loss": 0.0341,
+      "step": 280
+    },
+    {
+      "epoch": 47.35,
+      "learning_rate": 0.0051666666666666675,
+      "loss": 0.0327,
+      "step": 290
+    },
+    {
+      "epoch": 48.98,
+      "learning_rate": 0.005,
+      "loss": 0.0307,
+      "step": 300
+    }
+  ],
+  "max_steps": 600,
+  "num_train_epochs": 100,
+  "total_flos": 3.5272444688203776e+17,
+  "trial_name": null,
+  "trial_params": null
+}

checkpoint-300/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:df0a343e1f2ccb38a19082ba999546089030c0e15418471a24d346cbb68fa7af
+size 4472

checkpoint-400/config.json ADDED Viewed

	@@ -0,0 +1,47 @@

+{
+  "_name_or_path": "chatglm2-6b",
+  "add_bias_linear": false,
+  "add_qkv_bias": true,
+  "apply_query_key_layer_scaling": true,
+  "apply_residual_connection_post_layernorm": false,
+  "architectures": [
+    "ChatGLMForConditionalGeneration"
+  ],
+  "attention_dropout": 0.0,
+  "attention_softmax_in_fp32": true,
+  "auto_map": {
+    "AutoConfig": "configuration_chatglm.ChatGLMConfig",
+    "AutoModel": "modeling_chatglm.ChatGLMForConditionalGeneration",
+    "AutoModelForCausalLM": "modeling_chatglm.ChatGLMForConditionalGeneration",
+    "AutoModelForSeq2SeqLM": "modeling_chatglm.ChatGLMForConditionalGeneration",
+    "AutoModelForSequenceClassification": "modeling_chatglm.ChatGLMForSequenceClassification"
+  },
+  "bias_dropout_fusion": true,
+  "classifier_dropout": null,
+  "eos_token_id": 2,
+  "ffn_hidden_size": 13696,
+  "fp32_residual_connection": false,
+  "hidden_dropout": 0.0,
+  "hidden_size": 4096,
+  "kv_channels": 128,
+  "layernorm_epsilon": 1e-05,
+  "model_type": "chatglm",
+  "multi_query_attention": true,
+  "multi_query_group_num": 2,
+  "num_attention_heads": 32,
+  "num_layers": 28,
+  "original_rope": true,
+  "pad_token_id": 0,
+  "padded_vocab_size": 65024,
+  "post_layer_norm": true,
+  "pre_seq_len": 128,
+  "prefix_projection": false,
+  "quantization_bit": 0,
+  "rmsnorm": true,
+  "seq_length": 32768,
+  "tie_word_embeddings": false,
+  "torch_dtype": "float16",
+  "transformers_version": "4.30.2",
+  "use_cache": true,
+  "vocab_size": 65024
+}

checkpoint-400/generation_config.json ADDED Viewed

	@@ -0,0 +1,6 @@

+{
+  "_from_model_config": true,
+  "eos_token_id": 2,
+  "pad_token_id": 0,
+  "transformers_version": "4.30.2"
+}

checkpoint-400/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2ddda63cbe968668b459a73f0a54c34fc36c007f9f202063794ded2a8814a37a
+size 14681892

checkpoint-400/pytorch_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1b954c8f23337c53ad1c86bafb2969338878db3b96c2bc2459aa04e1198a2141
+size 7341306

checkpoint-400/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:11204a688e287bc0c7409fba921f7fd490e9471d91d738932d045851e4742a4e
+size 14244

checkpoint-400/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c32c17fb8a573adc159285286f456bfb53c7e2d80664d0c2cce541b6013ed8d7
+size 1064

checkpoint-400/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {}

checkpoint-400/tokenizer.model ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e7dc4c393423b76e4373e5157ddc34803a0189ba96b21ddbb40269d31468a6f2
+size 1018370

checkpoint-400/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,14 @@

+{
+  "auto_map": {
+    "AutoTokenizer": [
+      "tokenization_chatglm.ChatGLMTokenizer",
+      null
+    ]
+  },
+  "clean_up_tokenization_spaces": false,
+  "do_lower_case": false,
+  "model_max_length": 1000000000000000019884624838656,
+  "padding_side": "left",
+  "remove_space": false,
+  "tokenizer_class": "ChatGLMTokenizer"
+}

checkpoint-400/trainer_state.json ADDED Viewed

	@@ -0,0 +1,256 @@

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 65.3061224489796,
+  "global_step": 400,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 1.63,
+      "learning_rate": 0.009833333333333333,
+      "loss": 2.53,
+      "step": 10
+    },
+    {
+      "epoch": 3.27,
+      "learning_rate": 0.009666666666666667,
+      "loss": 2.0016,
+      "step": 20
+    },
+    {
+      "epoch": 4.9,
+      "learning_rate": 0.0095,
+      "loss": 1.7775,
+      "step": 30
+    },
+    {
+      "epoch": 6.53,
+      "learning_rate": 0.009333333333333334,
+      "loss": 1.6576,
+      "step": 40
+    },
+    {
+      "epoch": 8.16,
+      "learning_rate": 0.009166666666666667,
+      "loss": 1.5048,
+      "step": 50
+    },
+    {
+      "epoch": 9.8,
+      "learning_rate": 0.009000000000000001,
+      "loss": 1.3572,
+      "step": 60
+    },
+    {
+      "epoch": 11.43,
+      "learning_rate": 0.008833333333333334,
+      "loss": 1.2067,
+      "step": 70
+    },
+    {
+      "epoch": 13.06,
+      "learning_rate": 0.008666666666666668,
+      "loss": 1.0777,
+      "step": 80
+    },
+    {
+      "epoch": 14.69,
+      "learning_rate": 0.0085,
+      "loss": 0.9188,
+      "step": 90
+    },
+    {
+      "epoch": 16.33,
+      "learning_rate": 0.008333333333333333,
+      "loss": 0.7241,
+      "step": 100
+    },
+    {
+      "epoch": 17.96,
+      "learning_rate": 0.008166666666666666,
+      "loss": 0.5775,
+      "step": 110
+    },
+    {
+      "epoch": 19.59,
+      "learning_rate": 0.008,
+      "loss": 0.4235,
+      "step": 120
+    },
+    {
+      "epoch": 21.22,
+      "learning_rate": 0.007833333333333333,
+      "loss": 0.3182,
+      "step": 130
+    },
+    {
+      "epoch": 22.86,
+      "learning_rate": 0.007666666666666667,
+      "loss": 0.2155,
+      "step": 140
+    },
+    {
+      "epoch": 24.49,
+      "learning_rate": 0.0075,
+      "loss": 0.1633,
+      "step": 150
+    },
+    {
+      "epoch": 26.12,
+      "learning_rate": 0.007333333333333333,
+      "loss": 0.1234,
+      "step": 160
+    },
+    {
+      "epoch": 27.76,
+      "learning_rate": 0.007166666666666667,
+      "loss": 0.0911,
+      "step": 170
+    },
+    {
+      "epoch": 29.39,
+      "learning_rate": 0.006999999999999999,
+      "loss": 0.0738,
+      "step": 180
+    },
+    {
+      "epoch": 31.02,
+      "learning_rate": 0.006833333333333334,
+      "loss": 0.0673,
+      "step": 190
+    },
+    {
+      "epoch": 32.65,
+      "learning_rate": 0.006666666666666666,
+      "loss": 0.0544,
+      "step": 200
+    },
+    {
+      "epoch": 34.29,
+      "learning_rate": 0.006500000000000001,
+      "loss": 0.0492,
+      "step": 210
+    },
+    {
+      "epoch": 35.92,
+      "learning_rate": 0.006333333333333333,
+      "loss": 0.0458,
+      "step": 220
+    },
+    {
+      "epoch": 37.55,
+      "learning_rate": 0.0061666666666666675,
+      "loss": 0.0434,
+      "step": 230
+    },
+    {
+      "epoch": 39.18,
+      "learning_rate": 0.006,
+      "loss": 0.0387,
+      "step": 240
+    },
+    {
+      "epoch": 40.82,
+      "learning_rate": 0.005833333333333334,
+      "loss": 0.0375,
+      "step": 250
+    },
+    {
+      "epoch": 42.45,
+      "learning_rate": 0.005666666666666666,
+      "loss": 0.0363,
+      "step": 260
+    },
+    {
+      "epoch": 44.08,
+      "learning_rate": 0.0055000000000000005,
+      "loss": 0.0347,
+      "step": 270
+    },
+    {
+      "epoch": 45.71,
+      "learning_rate": 0.005333333333333333,
+      "loss": 0.0341,
+      "step": 280
+    },
+    {
+      "epoch": 47.35,
+      "learning_rate": 0.0051666666666666675,
+      "loss": 0.0327,
+      "step": 290
+    },
+    {
+      "epoch": 48.98,
+      "learning_rate": 0.005,
+      "loss": 0.0307,
+      "step": 300
+    },
+    {
+      "epoch": 50.61,
+      "learning_rate": 0.004833333333333334,
+      "loss": 0.031,
+      "step": 310
+    },
+    {
+      "epoch": 52.24,
+      "learning_rate": 0.004666666666666667,
+      "loss": 0.0312,
+      "step": 320
+    },
+    {
+      "epoch": 53.88,
+      "learning_rate": 0.0045000000000000005,
+      "loss": 0.033,
+      "step": 330
+    },
+    {
+      "epoch": 55.51,
+      "learning_rate": 0.004333333333333334,
+      "loss": 0.0294,
+      "step": 340
+    },
+    {
+      "epoch": 57.14,
+      "learning_rate": 0.004166666666666667,
+      "loss": 0.0308,
+      "step": 350
+    },
+    {
+      "epoch": 58.78,
+      "learning_rate": 0.004,
+      "loss": 0.0301,
+      "step": 360
+    },
+    {
+      "epoch": 60.41,
+      "learning_rate": 0.0038333333333333336,
+      "loss": 0.0292,
+      "step": 370
+    },
+    {
+      "epoch": 62.04,
+      "learning_rate": 0.0036666666666666666,
+      "loss": 0.0316,
+      "step": 380
+    },
+    {
+      "epoch": 63.67,
+      "learning_rate": 0.0034999999999999996,
+      "loss": 0.0302,
+      "step": 390
+    },
+    {
+      "epoch": 65.31,
+      "learning_rate": 0.003333333333333333,
+      "loss": 0.0295,
+      "step": 400
+    }
+  ],
+  "max_steps": 600,
+  "num_train_epochs": 100,
+  "total_flos": 4.702992625093837e+17,
+  "trial_name": null,
+  "trial_params": null
+}

checkpoint-400/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:df0a343e1f2ccb38a19082ba999546089030c0e15418471a24d346cbb68fa7af
+size 4472

checkpoint-500/config.json ADDED Viewed

	@@ -0,0 +1,47 @@

+{
+  "_name_or_path": "chatglm2-6b",
+  "add_bias_linear": false,
+  "add_qkv_bias": true,
+  "apply_query_key_layer_scaling": true,
+  "apply_residual_connection_post_layernorm": false,
+  "architectures": [
+    "ChatGLMForConditionalGeneration"
+  ],
+  "attention_dropout": 0.0,
+  "attention_softmax_in_fp32": true,
+  "auto_map": {
+    "AutoConfig": "configuration_chatglm.ChatGLMConfig",
+    "AutoModel": "modeling_chatglm.ChatGLMForConditionalGeneration",
+    "AutoModelForCausalLM": "modeling_chatglm.ChatGLMForConditionalGeneration",
+    "AutoModelForSeq2SeqLM": "modeling_chatglm.ChatGLMForConditionalGeneration",
+    "AutoModelForSequenceClassification": "modeling_chatglm.ChatGLMForSequenceClassification"
+  },
+  "bias_dropout_fusion": true,
+  "classifier_dropout": null,
+  "eos_token_id": 2,
+  "ffn_hidden_size": 13696,
+  "fp32_residual_connection": false,
+  "hidden_dropout": 0.0,
+  "hidden_size": 4096,
+  "kv_channels": 128,
+  "layernorm_epsilon": 1e-05,
+  "model_type": "chatglm",
+  "multi_query_attention": true,
+  "multi_query_group_num": 2,
+  "num_attention_heads": 32,
+  "num_layers": 28,
+  "original_rope": true,
+  "pad_token_id": 0,
+  "padded_vocab_size": 65024,
+  "post_layer_norm": true,
+  "pre_seq_len": 128,
+  "prefix_projection": false,
+  "quantization_bit": 0,
+  "rmsnorm": true,
+  "seq_length": 32768,
+  "tie_word_embeddings": false,
+  "torch_dtype": "float16",
+  "transformers_version": "4.30.2",
+  "use_cache": true,
+  "vocab_size": 65024
+}

checkpoint-500/generation_config.json ADDED Viewed

	@@ -0,0 +1,6 @@

+{
+  "_from_model_config": true,
+  "eos_token_id": 2,
+  "pad_token_id": 0,
+  "transformers_version": "4.30.2"
+}

checkpoint-500/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9df4c877e409ae83e7bc7c0f1205d623699a931f44d97cbd852d2946c9fa1c96
+size 14681892

checkpoint-500/pytorch_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:61a87e680f1db9957f77578eb4f8c6df8112d5951619472ae6cfe33f88f3f54e
+size 7341306

checkpoint-500/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e88ac4017435c2ca3872f675a493a2f3116de05fe3fa16f5cc26289716e59698
+size 14244